修改2023届作业测验数据导入工具为可支持多次作业, zip和txt(tex)文件同名
This commit is contained in:
parent
84477bd0de
commit
34d0e46b6f
|
|
@ -2,7 +2,7 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -16,58 +16,67 @@
|
|||
"# date = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)\n",
|
||||
"date = \"20230523\"\n",
|
||||
"\n",
|
||||
"#生成文件名tex_file和zip_file\n",
|
||||
"#生成文件列表\n",
|
||||
"files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n",
|
||||
"tex_file = [f for f in files if \".tex\" in f or \".txt\" in f][0]\n",
|
||||
"zip_file = [f for f in files if \".zip\" in f][0]\n",
|
||||
"file_pairs = []\n",
|
||||
"for zf in [f for f in files if \".zip\" in f]:\n",
|
||||
" tf = [f for f in files if (\".tex\" in f or \".txt\" in f) and zf[:-4] in f][0]\n",
|
||||
" file_pairs.append((zf,tf))\n",
|
||||
"\n",
|
||||
"#获得题号数据,保存在problems_list中\n",
|
||||
"with open(tex_file,\"r\",encoding = \"utf8\") as f:\n",
|
||||
" tex_data = f.read()\n",
|
||||
"problems_list = re.findall(r\"\\(([\\d]{6})\\)\",tex_data)\n",
|
||||
"\n",
|
||||
"#在zip文件中找到包含正确率数据的文件\n",
|
||||
"zf = zipfile.ZipFile(zip_file)\n",
|
||||
"statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n",
|
||||
"\n",
|
||||
"#生成答题纸区域编号与题目ID的对应\n",
|
||||
"correspondence_dict = {}\n",
|
||||
"if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
|
||||
" os.remove(\"临时文件/statsfile.xlsx\")\n",
|
||||
"extractedpath = Path(zf.extract(statfiles[0]))\n",
|
||||
"extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
|
||||
"df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
|
||||
"problems_indexes = list(df[df.columns[0]][2:])\n",
|
||||
"for p in problems_indexes:\n",
|
||||
" t = input(\"答题纸区域\"+p+\"的题号为(1-\"+str(len(problems_list))+\", a为自动对应点前的题号):\")\n",
|
||||
" if t.upper() == \"A\":\n",
|
||||
" correspondence_dict[p] = problems_list[int(re.findall(r\"([\\d]+?)\\.\",p)[0])-1]\n",
|
||||
" elif not (int(t)<1 or int(t)>len(problems_list)):\n",
|
||||
" correspondence_dict[p] = problems_list[int(t)-1] \n",
|
||||
"\n",
|
||||
"outputstr = \"usages\\n\\n\"\n",
|
||||
"for fp in file_pairs:\n",
|
||||
" #生成文件名tex_file和zip_file\n",
|
||||
" zip_file, tex_file = fp\n",
|
||||
"\n",
|
||||
"for sf in statfiles:\n",
|
||||
" #读取文件生成区域列表与难度列表\n",
|
||||
" classname = \"2023届高三\"+re.findall(r\"高三([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n",
|
||||
"\n",
|
||||
" #获得题号数据,保存在problems_list中\n",
|
||||
" with open(tex_file,\"r\",encoding = \"utf8\") as f:\n",
|
||||
" tex_data = f.read()\n",
|
||||
" problems_list = re.findall(r\"\\(([\\d]{6})\\)\",tex_data)\n",
|
||||
"\n",
|
||||
" #在zip文件中找到包含正确率数据的文件\n",
|
||||
" zf = zipfile.ZipFile(zip_file)\n",
|
||||
" statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n",
|
||||
"\n",
|
||||
" #生成答题纸区域编号与题目ID的对应\n",
|
||||
" correspondence_dict = {}\n",
|
||||
" if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
|
||||
" os.remove(\"临时文件/statsfile.xlsx\")\n",
|
||||
" extractedpath = Path(zf.extract(sf))\n",
|
||||
" extractedpath = Path(zf.extract(statfiles[0]))\n",
|
||||
" extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
|
||||
" df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
|
||||
" difficulties = list(df[df.columns[9]][2:])\n",
|
||||
" problems_indexes = list(df[df.columns[0]][2:])\n",
|
||||
" #生成该班级题目ID对应难度列表组成的字典\n",
|
||||
" class_difficulty = {}\n",
|
||||
" for i in range(len(difficulties)):\n",
|
||||
" if problems_indexes[i] in correspondence_dict:\n",
|
||||
" if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n",
|
||||
" class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n",
|
||||
" else:\n",
|
||||
" class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n",
|
||||
" #添加到输出字符串中\n",
|
||||
" for id in class_difficulty:\n",
|
||||
" outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n",
|
||||
" for p in problems_indexes:\n",
|
||||
" t = input(os.path.split(zip_file)[-1][:4]+\"答题纸区域\"+p+\"的题号为(1-\"+str(len(problems_list))+\", a为自动对应点前的题号):\")\n",
|
||||
" if t.upper() == \"A\":\n",
|
||||
" correspondence_dict[p] = problems_list[int(re.findall(r\"([\\d]+?)\\.\",p)[0])-1]\n",
|
||||
" elif not (int(t)<1 or int(t)>len(problems_list)):\n",
|
||||
" correspondence_dict[p] = problems_list[int(t)-1] \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" for sf in statfiles:\n",
|
||||
" #读取文件生成区域列表与难度列表\n",
|
||||
" classname = \"2023届高三\"+re.findall(r\"高三([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n",
|
||||
" if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n",
|
||||
" os.remove(\"临时文件/statsfile.xlsx\")\n",
|
||||
" extractedpath = Path(zf.extract(sf))\n",
|
||||
" extractedpath.rename(\"临时文件/statsfile.xlsx\")\n",
|
||||
" df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n",
|
||||
" difficulties = list(df[df.columns[9]][2:])\n",
|
||||
" problems_indexes = list(df[df.columns[0]][2:])\n",
|
||||
" #生成该班级题目ID对应难度列表组成的字典\n",
|
||||
" class_difficulty = {}\n",
|
||||
" for i in range(len(difficulties)):\n",
|
||||
" if problems_indexes[i] in correspondence_dict:\n",
|
||||
" if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n",
|
||||
" class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n",
|
||||
" else:\n",
|
||||
" class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n",
|
||||
" #添加到输出字符串中\n",
|
||||
" for id in class_difficulty:\n",
|
||||
" outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n",
|
||||
"\n",
|
||||
"with open(\"临时文件/自动转换结果.txt\",\"w\",encoding = \"utf8\") as f:\n",
|
||||
" f.write(outputstr)\n",
|
||||
|
|
@ -77,6 +86,26 @@
|
|||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"''"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"t"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
|
@ -84,6 +113,26 @@
|
|||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"('数据导入作业文件', 'sj02.zip')"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.path.split(zip_file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
|
@ -115,7 +164,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.15"
|
||||
"version": "3.9.15"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
|
|
|||
Reference in New Issue