{ "cells": [ { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2025届高一11班\n", "2025届高一11班 有效\n", "2025届高一12班\n", "2025届高一12班 有效\n", "2025届高一01班\n", "2025届高一01班 有效\n", "2025届高一02班\n", "2025届高一02班 有效\n", "2025届高一04班\n", "2025届高一04班 有效\n", "2025届高一05班\n", "2025届高一05班 有效\n", "2025届高一06班\n", "2025届高一06班 有效\n", "2025届高一08班\n", "2025届高一08班 有效\n", "2025届高一09班\n", "2025届高一09班 有效\n" ] } ], "source": [ "import zipfile,os,re,time\n", "import pandas as pd\n", "from pathlib import Path\n", "\n", "#设置工作目录, 要求工作目录中恰有一个.tex文件和一个.zip文件,其余不论\n", "filepath = \"数据导入作业文件\"\n", "\n", "#生成文件名tex_file和zip_file\n", "files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n", "tex_file = [f for f in files if \".tex\" in f or \".txt\" in f][0]\n", "zip_file = [f for f in files if \".zip\" in f][0]\n", "\n", "#获得题号数据,保存在problems_list中\n", "with open(tex_file,\"r\",encoding = \"utf8\") as f:\n", " tex_data = f.read()\n", "problems_list = [re.sub(r\"\\s+\",\"\\t\",l.strip()) for l in tex_data.split(\"\\n\") if l.strip() != \"\"]\n", "date = problems_list.pop(0)\n", "\n", "\n", "#在zip文件中找到包含正确率数据的文件\n", "zf = zipfile.ZipFile(zip_file)\n", "statfiles = [f.filename for f in zf.filelist if \"试题分析\" in f.filename]\n", "handinfiles = [f.filename for f in zf.filelist if \"学生成绩\" in f.filename]\n", "\n", "#生成答题纸区域编号与题目ID的对应\n", "correspondence_dict = {}\n", "if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n", " os.remove(\"临时文件/statsfile.xlsx\")\n", "extractedpath = Path(zf.extract(statfiles[0]))\n", "extractedpath.rename(\"临时文件/statsfile.xlsx\")\n", "df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n", "problems_indexes = list(df[df.columns[0]][2:])\n", "\n", "for pind in problems_indexes:\n", " for p in problems_list:\n", " if len(re.findall(\"^\"+pind+\"\\t\",p))>0:\n", " correspondence_dict[pind] = p[p.index(\"\\t\")+1:]\n", "\n", "outputstr = \"usages\\n\\n\"\n", "\n", "for sf in statfiles:\n", " #读取文件生成区域列表与难度列表\n", " classname = \"2025届高一\"+re.findall(r\"高一([\\d]*?)班\",sf)[0].zfill(2)+\"班\"\n", " print(classname)\n", " if \"statsfile.xlsx\" in os.listdir(\"临时文件\"):\n", " os.remove(\"临时文件/statsfile.xlsx\")\n", " extractedpath = Path(zf.extract(sf))\n", " extractedpath.rename(\"临时文件/statsfile.xlsx\")\n", " df = pd.read_excel(\"临时文件/statsfile.xlsx\")\n", " if \"handinfile.xlsx\" in os.listdir(\"临时文件\"):\n", " os.remove(\"临时文件/handinfile.xlsx\")\n", " extractedpath = Path(zf.extract(sf[:sf.find(\"数学\")]+\"学生成绩.xlsx\"))\n", " extractedpath.rename(\"临时文件/handinfile.xlsx\")\n", " handindf = pd.read_excel(\"临时文件/handinfile.xlsx\")\n", " if str(handindf.iloc[int(len(handindf)*0.66)][\"Unnamed: 4\"]) != \"缺考\":\n", " difficulties = list(df[df.columns[9]][2:])\n", " problems_indexes = list(df[df.columns[0]][2:])\n", " #生成该班级题目ID对应难度列表组成的字典\n", " class_difficulty = {}\n", " for i in range(len(difficulties)):\n", " if problems_indexes[i] in correspondence_dict:\n", " if not correspondence_dict[problems_indexes[i]] in class_difficulty:\n", " class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]\n", " else:\n", " class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])\n", " #添加到输出字符串中\n", " for id in class_difficulty:\n", " outputstr += id + \"\\n\" + date + \"\\t\" + classname + \"\\t\" + \"\\t\".join([(\"%.3f\" %float(v)) for v in class_difficulty[id]]) + \"\\n\\n\"\n", " print(classname, \"有效\")\n", " else:\n", " print(classname, \"无效\")\n", "\n", "with open(\"临时文件/自动转换结果.txt\",\"w\",encoding = \"utf8\") as f:\n", " f.write(outputstr)\n", "with open(\"../文本文件/metadata.txt\",\"w\",encoding = \"utf8\") as f:\n", " f.write(outputstr)\n", "zf.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "mathdept", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "42dd566da87765ddbe9b5c5b483063747fec4aacc5469ad554706e4b742e67b2" } } }, "nbformat": 4, "nbformat_minor": 2 }