This repository has been archived on 2024-06-23. You can view files and clone it, but cannot push or open issues or pull requests.
mathdeptv2/工具/已用题号剔除.ipynb

138 lines
4.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"首行题目数量: 21\n",
"剩余题目数量: 21\n"
]
}
],
"source": [
"import os,re,json\n",
"\n",
"\"\"\"---设置题号列表所在文件, 仅第一行有用---\"\"\"\n",
"id_list_file = \"文本文件/题号筛选.txt\"\n",
"\"\"\"---设置题号列表文件结束---\"\"\"\n",
"\n",
"\"\"\"---设置要排除的题号所在的绝对路径---\"\"\"\n",
"mainpath = r\"C:/Users/Weiye/Documents/wwy sync/23届/\"\n",
"\n",
"used_path_list = [\n",
"mainpath + \"第一轮复习讲义/\",\n",
"mainpath + \"上学期测验卷/\",\n",
"mainpath + \"上学期周末卷/\",\n",
"mainpath + \"赋能/\",\n",
"mainpath + \"寒假作业/\",\n",
"mainpath + \"下学期测验卷/\",\n",
"mainpath + \"下学期周末卷/\",\n",
"mainpath + \"第二轮复习讲义/\",\n",
"# mainpath + \"简单题/\",\n",
"]\n",
"\"\"\"---路径设置完毕---\"\"\"\n",
"\n",
"# 从字符串中获取题号列表\n",
"def extract_idlist(string):\n",
" idlist = re.findall(r\"\\(([\\d]{6})\\)\",string)\n",
" return idlist\n",
"\n",
"# 生成题号列表\n",
"def generate_number_set(string,dict):\n",
" string = re.sub(r\"[\\n\\s]\",\"\",string)\n",
" string_list = string.split(\",\")\n",
" numbers_list = []\n",
" for s in string_list:\n",
" if not \":\" in s:\n",
" numbers_list.append(s.zfill(6))\n",
" else:\n",
" start,end = s.split(\":\")\n",
" for ind in range(int(start),int(end)+1):\n",
" numbers_list.append(str(ind).zfill(6))\n",
" return numbers_list\n",
"\n",
"#读取题库json文件并转化为字典\n",
"with open(r\"../题库0.3/Problems.json\",\"r\",encoding = \"utf8\") as f:\n",
" database = f.read()\n",
"pro_dict = json.loads(database)\n",
"\n",
"# 生成首行题号列表并输出数量\n",
"with open(id_list_file,\"r\",encoding = \"utf8\") as f:\n",
" data = f.read() + \"\\n\"\n",
"\n",
"# id_list = re.findall(r\"^([^\\n]*)\\n\",data)[0].split(\",\")\n",
"id_list = generate_number_set(data,pro_dict)\n",
"print(\"首行题目数量: \",len(id_list))\n",
"\n",
"# 生成已使用题号列表\n",
"used_id_list = []\n",
"\n",
"for path in used_path_list:\n",
" filelist = [f for f in os.listdir(path) if \".tex\" in f]\n",
" for f in filelist:\n",
" with open(path+f,\"r\",encoding = \"utf8\") as texfile:\n",
" texdata = texfile.read()\n",
" used_id_list += extract_idlist(texdata)\n",
"\n",
"# 去除已使用题号\n",
"checked_id_list = []\n",
"ripped_id_list = []\n",
"for id in id_list:\n",
" if not id in used_id_list:\n",
" checked_id_list.append(id)\n",
" else:\n",
" ripped_id_list.append(id)\n",
"\n",
"print(\"剩余题目数量: \",len(checked_id_list))\n",
"\n",
"# 写入文件\n",
"with open(id_list_file,\"a\",encoding = \"utf8\") as f:\n",
" f.write(\"\\n\\n\")\n",
" f.write(\"未使用题号:\\n\")\n",
" f.write(\",\".join(checked_id_list))\n",
" f.write(\"\\n\\n已使用题号:\\n\")\n",
" f.write(\",\".join(ripped_id_list))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "mathdept",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "ff3c292c316ba85de6f1ad75f19c731e79d694e741b6f515ec18f14996fe48dc"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}