141 lines
5.3 KiB
Plaintext
141 lines
5.3 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2025届高一11班\n",
|
||
"2025届高一11班 有效\n",
|
||
"2025届高一12班\n",
|
||
"2025届高一12班 有效\n",
|
||
"2025届高一01班\n",
|
||
"2025届高一01班 有效\n",
|
||
"2025届高一02班\n",
|
||
"2025届高一02班 有效\n",
|
||
"2025届高一06班\n",
|
||
"2025届高一06班 有效\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import zipfile,os,re,time\n",
|
||
"import pandas as pd\n",
|
||
"from pathlib import Path\n",
|
||
"\n",
|
||
"#设置工作目录, 要求工作目录中恰有一个.tex文件和一个.zip文件,其余不论\n",
|
||
"filepath = \"数据导入作业文件\"\n",
|
||
"\n",
|
||
"#生成文件名tex_file和zip_file\n",
|
||
"files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n",
|
||
"tex_file = [f for f in files if \".tex\" in f or \".txt\" in f][0]\n",
|
||
"zip_file = [f for f in files if \".zip\" in f][0]\n",
|
||
"\n",
|
||
"#获得题号数据,保存在problems_list中\n",
|
||
"with open(tex_file,\"r\",encoding = \"utf8\") as f:\n",
|
||
" tex_data = f.read()\n",
|
||
"problems_list = [re.sub(r\"\\s+\",\"\\t\",l.strip()) for l in tex_data.split(\"\\n\") if l.strip() != \"\"]\n",
|
||
"date = problems_list.pop(0)\n",
|
||
"\n",
|
||
"\n",
|
||
"#在zip文件中找到包含正确率数据的文件\n",
|
||
"zf = zipfile.ZipFile(zip_file)\n",
|
||
"statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n",
|
||
"handinfiles = [f.filename for f in zf.filelist if \"学生成绩\" in f.filename]\n",
|
||
"\n",
|
||
"#生成答题纸区域编号与题目ID的对应\n",
|
||
"correspondence_dict = {}\n",
|
||
"if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
|
||
" os.remove(\"临时文件/statsfile.xlsx\")\n",
|
||
"extractedpath = Path(zf.extract(statfiles[0]))\n",
|
||
"extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
|
||
"df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
|
||
"problems_indexes = list(df[df.columns[0]][2:])\n",
|
||
"\n",
|
||
"for pind in problems_indexes:\n",
|
||
" for p in problems_list:\n",
|
||
" if len(re.findall(\"^\"+pind+\"\\t\",p))>0:\n",
|
||
" correspondence_dict[pind] = p[p.index(\"\\t\")+1:]\n",
|
||
"\n",
|
||
"outputstr = \"usages\\n\\n\"\n",
|
||
"\n",
|
||
"for sf in statfiles:\n",
|
||
" #读取文件生成区域列表与难度列表\n",
|
||
" classname = \"2025届高一\"+re.findall(r\"高一([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n",
|
||
" print(classname)\n",
|
||
" if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
|
||
" os.remove(\"临时文件/statsfile.xlsx\")\n",
|
||
" extractedpath = Path(zf.extract(sf))\n",
|
||
" extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
|
||
" df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
|
||
" if \"handinfile.xlsx\" in os.listdir(\"临时文件\"):\n",
|
||
" os.remove(\"临时文件/handinfile.xlsx\")\n",
|
||
" extractedpath = Path(zf.extract(sf[:sf.find(\"数学\")]+\"学生成绩.xlsx\"))\n",
|
||
" extractedpath.rename(\"临时文件/handinfile.xlsx\")\n",
|
||
" handindf = pd.read_excel(\"临时文件/handinfile.xlsx\")\n",
|
||
" if str(handindf.iloc[int(len(handindf)*0.66)][\"Unnamed: 4\"]) != \"缺考\":\n",
|
||
" difficulties = list(df[df.columns[9]][2:])\n",
|
||
" problems_indexes = list(df[df.columns[0]][2:])\n",
|
||
" #生成该班级题目ID对应难度列表组成的字典\n",
|
||
" class_difficulty = {}\n",
|
||
" for i in range(len(difficulties)):\n",
|
||
" if problems_indexes[i] in correspondence_dict:\n",
|
||
" if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n",
|
||
" class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n",
|
||
" else:\n",
|
||
" class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n",
|
||
" #添加到输出字符串中\n",
|
||
" for id in class_difficulty:\n",
|
||
" outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n",
|
||
" print(classname, \"有效\")\n",
|
||
" else:\n",
|
||
" print(classname, \"无效\")\n",
|
||
"\n",
|
||
"with open(\"临时文件/自动转换结果.txt\",\"w\",encoding = \"utf8\") as f:\n",
|
||
" f.write(outputstr)\n",
|
||
"with open(\"../文本文件/metadata.txt\",\"w\",encoding = \"utf8\") as f:\n",
|
||
" f.write(outputstr)\n",
|
||
"zf.close()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "mathdept",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.11"
|
||
},
|
||
"orig_nbformat": 4,
|
||
"vscode": {
|
||
"interpreter": {
|
||
"hash": "42dd566da87765ddbe9b5c5b483063747fec4aacc5469ad554706e4b742e67b2"
|
||
}
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|