{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import zipfile,os,re,time\n", "import pandas as pd\n", "from pathlib import Path\n", "\n", "#设置工作目录, 要求工作目录中恰有一个.tex文件和一个.zip文件,其余不论\n", "filepath = \"数据导入作业文件\"\n", "\n", "#生成文件名tex_file和zip_file\n", "files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n", "tex_file = [f for f in files if \".tex\" in f or \".txt\" in f][0]\n", "zip_file = [f for f in files if \".zip\" in f][0]\n", "\n", "#获得题号数据,保存在problems_list中\n", "with open(tex_file,\"r\",encoding = \"utf8\") as f:\n", " tex_data = f.read()\n", "problems_list = re.findall(r\"\\(([\\d]{6})\\)\",tex_data)\n", "\n", "#在zip文件中找到包含正确率数据的文件\n", "zf = zipfile.ZipFile(zip_file)\n", "statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n", "\n", "#生成答题纸区域编号与题目ID的对应\n", "correspondence_dict = {}\n", "if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n", " os.remove(\"临时文件/statsfile.xlsx\")\n", "extractedpath = Path(zf.extract(statfiles[0]))\n", "extractedpath.rename(\"临时文件/statsfile.xlsx\")\n", "df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n", "problems_indexes = list(df[df.columns[0]][2:])\n", "for p in problems_indexes:\n", " t = input(\"答题纸区域\"+p+\"的题号为(1-\"+str(len(problems_list))+\", a为自动对应点前的题号):\")\n", " if t.upper() == \"A\":\n", " correspondence_dict[p] = problems_list[int(re.findall(r\"([\\d]+?)\\.\",p)[0])-1]\n", " elif not (int(t)<1 or int(t)>len(problems_list)):\n", " correspondence_dict[p] = problems_list[int(t)-1] \n", "\n", "outputstr = \"usages\\n\\n\"\n", "\n", "for sf in statfiles:\n", " #读取文件生成区域列表与难度列表\n", " classname = \"2023届高三\"+re.findall(r\"高三([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n", " date = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)\n", " if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n", " os.remove(\"临时文件/statsfile.xlsx\")\n", " extractedpath = Path(zf.extract(sf))\n", " extractedpath.rename(\"临时文件/statsfile.xlsx\")\n", " df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n", " difficulties = list(df[df.columns[9]][2:])\n", " problems_indexes = list(df[df.columns[0]][2:])\n", " #生成该班级题目ID对应难度列表组成的字典\n", " class_difficulty = {}\n", " for i in range(len(difficulties)):\n", " if problems_indexes[i] in correspondence_dict:\n", " if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n", " class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n", " else:\n", " class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n", " #添加到输出字符串中\n", " for id in class_difficulty:\n", " outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n", "\n", "with open(\"临时文件/自动转换结果.txt\",\"w\",encoding = \"utf8\") as f:\n", " f.write(outputstr)\n", "with open(\"../文本文件/metadata.txt\",\"w\",encoding = \"utf8\") as f:\n", " f.write(outputstr)\n", "zf.close()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.8 ('base')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "d311ffef239beb3b8f3764271728f3972d7b090c974f8e972fcdeedf230299ac" } } }, "nbformat": 4, "nbformat_minor": 2 }