修改2023届作业测验数据导入工具为可支持多次作业, zip和txt(tex)文件同名

This commit is contained in:
weiye.wang 2023-05-25 21:36:18 +08:00
parent 84477bd0de
commit 34d0e46b6f
1 changed files with 93 additions and 44 deletions

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@ -16,58 +16,67 @@
"# date = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)\n",
"date = \"20230523\"\n",
"\n",
"#生成文件名tex_file和zip_file\n",
"#生成文件列表\n",
"files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n",
"tex_file = [f for f in files if \".tex\" in f or \".txt\" in f][0]\n",
"zip_file = [f for f in files if \".zip\" in f][0]\n",
"file_pairs = []\n",
"for zf in [f for f in files if \".zip\" in f]:\n",
" tf = [f for f in files if (\".tex\" in f or \".txt\" in f) and zf[:-4] in f][0]\n",
" file_pairs.append((zf,tf))\n",
"\n",
"#获得题号数据保存在problems_list中\n",
"with open(tex_file,\"r\",encoding = \"utf8\") as f:\n",
" tex_data = f.read()\n",
"problems_list = re.findall(r\"\\(([\\d]{6})\\)\",tex_data)\n",
"\n",
"#在zip文件中找到包含正确率数据的文件\n",
"zf = zipfile.ZipFile(zip_file)\n",
"statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n",
"\n",
"#生成答题纸区域编号与题目ID的对应\n",
"correspondence_dict = {}\n",
"if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
" os.remove(\"临时文件/statsfile.xlsx\")\n",
"extractedpath = Path(zf.extract(statfiles[0]))\n",
"extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
"df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
"problems_indexes = list(df[df.columns[0]][2:])\n",
"for p in problems_indexes:\n",
" t = input(\"答题纸区域\"+p+\"的题号为(1-\"+str(len(problems_list))+\", a为自动对应点前的题号):\")\n",
" if t.upper() == \"A\":\n",
" correspondence_dict[p] = problems_list[int(re.findall(r\"([\\d]+?)\\.\",p)[0])-1]\n",
" elif not (int(t)<1 or int(t)>len(problems_list)):\n",
" correspondence_dict[p] = problems_list[int(t)-1] \n",
"\n",
"outputstr = \"usages\\n\\n\"\n",
"for fp in file_pairs:\n",
" #生成文件名tex_file和zip_file\n",
" zip_file, tex_file = fp\n",
"\n",
"for sf in statfiles:\n",
" #读取文件生成区域列表与难度列表\n",
" classname = \"2023届高三\"+re.findall(r\"高三([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n",
"\n",
" #获得题号数据保存在problems_list中\n",
" with open(tex_file,\"r\",encoding = \"utf8\") as f:\n",
" tex_data = f.read()\n",
" problems_list = re.findall(r\"\\(([\\d]{6})\\)\",tex_data)\n",
"\n",
" #在zip文件中找到包含正确率数据的文件\n",
" zf = zipfile.ZipFile(zip_file)\n",
" statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n",
"\n",
" #生成答题纸区域编号与题目ID的对应\n",
" correspondence_dict = {}\n",
" if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
" os.remove(\"临时文件/statsfile.xlsx\")\n",
" extractedpath = Path(zf.extract(sf))\n",
" extractedpath = Path(zf.extract(statfiles[0]))\n",
" extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
" df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
" difficulties = list(df[df.columns[9]][2:])\n",
" problems_indexes = list(df[df.columns[0]][2:])\n",
" #生成该班级题目ID对应难度列表组成的字典\n",
" class_difficulty = {}\n",
" for i in range(len(difficulties)):\n",
" if problems_indexes[i] in correspondence_dict:\n",
" if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n",
" class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n",
" else:\n",
" class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n",
" #添加到输出字符串中\n",
" for id in class_difficulty:\n",
" outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n",
" for p in problems_indexes:\n",
" t = input(os.path.split(zip_file)[-1][:4]+\"答题纸区域\"+p+\"的题号为(1-\"+str(len(problems_list))+\", a为自动对应点前的题号):\")\n",
" if t.upper() == \"A\":\n",
" correspondence_dict[p] = problems_list[int(re.findall(r\"([\\d]+?)\\.\",p)[0])-1]\n",
" elif not (int(t)<1 or int(t)>len(problems_list)):\n",
" correspondence_dict[p] = problems_list[int(t)-1] \n",
"\n",
"\n",
"\n",
" for sf in statfiles:\n",
" #读取文件生成区域列表与难度列表\n",
" classname = \"2023届高三\"+re.findall(r\"高三([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n",
" if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
" os.remove(\"临时文件/statsfile.xlsx\")\n",
" extractedpath = Path(zf.extract(sf))\n",
" extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
" df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
" difficulties = list(df[df.columns[9]][2:])\n",
" problems_indexes = list(df[df.columns[0]][2:])\n",
" #生成该班级题目ID对应难度列表组成的字典\n",
" class_difficulty = {}\n",
" for i in range(len(difficulties)):\n",
" if problems_indexes[i] in correspondence_dict:\n",
" if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n",
" class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n",
" else:\n",
" class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n",
" #添加到输出字符串中\n",
" for id in class_difficulty:\n",
" outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n",
"\n",
"with open(\"临时文件/自动转换结果.txt\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(outputstr)\n",
@ -77,6 +86,26 @@
"\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"''"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -84,6 +113,26 @@
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('数据导入作业文件', 'sj02.zip')"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.path.split(zip_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -115,7 +164,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.15"
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {