This repository has been archived on 2024-06-23. You can view files and clone it, but cannot push or open issues or pull requests.
mathdeptv2/工具/分年级专用工具/小闲平台数据导入_2025届.ipynb

141 lines
5.3 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2025届高一11班\n",
"2025届高一11班 有效\n",
"2025届高一12班\n",
"2025届高一12班 有效\n",
"2025届高一01班\n",
"2025届高一01班 有效\n",
"2025届高一02班\n",
"2025届高一02班 有效\n",
"2025届高一06班\n",
"2025届高一06班 有效\n"
]
}
],
"source": [
"import zipfile,os,re,time\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"\n",
"#设置工作目录, 要求工作目录中恰有一个.tex文件和一个.zip文件其余不论\n",
"filepath = \"数据导入作业文件\"\n",
"\n",
"#生成文件名tex_file和zip_file\n",
"files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n",
"tex_file = [f for f in files if \".tex\" in f or \".txt\" in f][0]\n",
"zip_file = [f for f in files if \".zip\" in f][0]\n",
"\n",
"#获得题号数据保存在problems_list中\n",
"with open(tex_file,\"r\",encoding = \"utf8\") as f:\n",
" tex_data = f.read()\n",
"problems_list = [re.sub(r\"\\s+\",\"\\t\",l.strip()) for l in tex_data.split(\"\\n\") if l.strip() != \"\"]\n",
"date = problems_list.pop(0)\n",
"\n",
"\n",
"#在zip文件中找到包含正确率数据的文件\n",
"zf = zipfile.ZipFile(zip_file)\n",
"statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n",
"handinfiles = [f.filename for f in zf.filelist if \"学生成绩\" in f.filename]\n",
"\n",
"#生成答题纸区域编号与题目ID的对应\n",
"correspondence_dict = {}\n",
"if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
" os.remove(\"临时文件/statsfile.xlsx\")\n",
"extractedpath = Path(zf.extract(statfiles[0]))\n",
"extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
"df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
"problems_indexes = list(df[df.columns[0]][2:])\n",
"\n",
"for pind in problems_indexes:\n",
" for p in problems_list:\n",
" if len(re.findall(\"^\"+pind+\"\\t\",p))>0:\n",
" correspondence_dict[pind] = p[p.index(\"\\t\")+1:]\n",
"\n",
"outputstr = \"usages\\n\\n\"\n",
"\n",
"for sf in statfiles:\n",
" #读取文件生成区域列表与难度列表\n",
" classname = \"2025届高一\"+re.findall(r\"高一([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n",
" print(classname)\n",
" if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
" os.remove(\"临时文件/statsfile.xlsx\")\n",
" extractedpath = Path(zf.extract(sf))\n",
" extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
" df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
" if \"handinfile.xlsx\" in os.listdir(\"临时文件\"):\n",
" os.remove(\"临时文件/handinfile.xlsx\")\n",
" extractedpath = Path(zf.extract(sf[:sf.find(\"数学\")]+\"学生成绩.xlsx\"))\n",
" extractedpath.rename(\"临时文件/handinfile.xlsx\")\n",
" handindf = pd.read_excel(\"临时文件/handinfile.xlsx\")\n",
" if str(handindf.iloc[int(len(handindf)*0.66)][\"Unnamed: 4\"]) != \"缺考\":\n",
" difficulties = list(df[df.columns[9]][2:])\n",
" problems_indexes = list(df[df.columns[0]][2:])\n",
" #生成该班级题目ID对应难度列表组成的字典\n",
" class_difficulty = {}\n",
" for i in range(len(difficulties)):\n",
" if problems_indexes[i] in correspondence_dict:\n",
" if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n",
" class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n",
" else:\n",
" class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n",
" #添加到输出字符串中\n",
" for id in class_difficulty:\n",
" outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n",
" print(classname, \"有效\")\n",
" else:\n",
" print(classname, \"无效\")\n",
"\n",
"with open(\"临时文件/自动转换结果.txt\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(outputstr)\n",
"with open(\"../文本文件/metadata.txt\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(outputstr)\n",
"zf.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "mathdept",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "42dd566da87765ddbe9b5c5b483063747fec4aacc5469ad554706e4b742e67b2"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}