This repository has been archived on 2024-06-23. You can view files and clone it, but cannot push or open issues or pull requests.
mathdeptv2/工具/分年级专用工具/小闲平台作业测验数据导入_2023届.ipynb

137 lines
5.7 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import zipfile,os,re,time\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"\n",
"#设置工作目录, 要求工作目录中恰有一些.tex(.txt)文件和一些.zip文件对应的文件名需相同, 其余不论\n",
"filepath = \"数据导入作业文件\"\n",
"\n",
"# date = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)\n",
"date = \"20230523\"\n",
"\n",
"#生成文件列表\n",
"files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n",
"file_pairs = []\n",
"for zf in [f for f in files if \".zip\" in f]:\n",
" tf = [f for f in files if (\".tex\" in f or \".txt\" in f) and zf[:-4] in f][0]\n",
" file_pairs.append((zf,tf))\n",
"\n",
"\n",
"outputstr = \"usages\\n\\n\"\n",
"for fp in file_pairs:\n",
" #生成文件名tex_file和zip_file\n",
" zip_file, tex_file = fp\n",
"\n",
" newdate = input(os.path.split(zip_file)[-1][:-4]+\"使用日期(\"+date+\", 不变按回车, 要变化仅需输入最后几位改变的数字):\")\n",
" newdate = \"\".join(re.findall(r\"\\d\",newdate))\n",
" change_len = len(newdate)\n",
" date = date[:8-change_len]+newdate\n",
"\n",
" #获得题号数据保存在problems_list中\n",
" with open(tex_file,\"r\",encoding = \"utf8\") as f:\n",
" tex_data = f.read()\n",
" problems_list = re.findall(r\"\\(([\\d]{6})\\)\",tex_data)\n",
"\n",
" #在zip文件中找到包含正确率数据的文件\n",
" zf = zipfile.ZipFile(zip_file)\n",
" statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n",
"\n",
" #生成答题纸区域编号与题目ID的对应\n",
" correspondence_dict = {}\n",
" if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
" os.remove(\"临时文件/statsfile.xlsx\")\n",
" extractedpath = Path(zf.extract(statfiles[0]))\n",
" extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
" df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
" problems_indexes = list(df[df.columns[0]][2:])\n",
" autofill = False\n",
" for p in problems_indexes:\n",
" if not autofill:\n",
" t = input(os.path.split(zip_file)[-1][:4]+\"答题纸区域\"+p+\"的题号为(1-\"+str(len(problems_list))+\", a为自动对应点前的题号, u为之后完全自动对应):\")\n",
" if t.upper() == \"A\":\n",
" correspondence_dict[p] = problems_list[int(re.findall(r\"([\\d]+?)\\.\",p)[0])-1]\n",
" elif t.upper() == \"U\":\n",
" correspondence_dict[p] = problems_list[int(re.findall(r\"([\\d]+?)\\.\",p)[0])-1]\n",
" autofill = True\n",
" elif not (int(t)<1 or int(t)>len(problems_list)):\n",
" correspondence_dict[p] = problems_list[int(t)-1]\n",
" else:\n",
" correspondence_dict[p] = problems_list[int(re.findall(r\"([\\d]+?)\\.\",p)[0])-1]\n",
"\n",
"\n",
"\n",
"\n",
" for sf in statfiles:\n",
" #读取文件生成区域列表与难度列表\n",
" classname = \"2023届高三\"+re.findall(r\"高三([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n",
" if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
" os.remove(\"临时文件/statsfile.xlsx\")\n",
" extractedpath = Path(zf.extract(sf))\n",
" extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
" df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
" difficulties = list(df[df.columns[9]][2:])\n",
" problems_indexes = list(df[df.columns[0]][2:])\n",
" #生成该班级题目ID对应难度列表组成的字典\n",
" class_difficulty = {}\n",
" for i in range(len(difficulties)):\n",
" if problems_indexes[i] in correspondence_dict:\n",
" if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n",
" class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n",
" else:\n",
" class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n",
" #添加到输出字符串中\n",
" for id in class_difficulty:\n",
" outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n",
"\n",
"with open(\"临时文件/自动转换结果.txt\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(outputstr)\n",
"with open(\"../文本文件/metadata.txt\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(outputstr)\n",
"zf.close()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "mathdept",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "42dd566da87765ddbe9b5c5b483063747fec4aacc5469ad554706e4b742e67b2"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}