{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "首行题目数量: 21\n", "剩余题目数量: 21\n" ] } ], "source": [ "import os,re,json\n", "\n", "\"\"\"---设置题号列表所在文件, 仅第一行有用---\"\"\"\n", "id_list_file = \"文本文件/题号筛选.txt\"\n", "\"\"\"---设置题号列表文件结束---\"\"\"\n", "\n", "\"\"\"---设置要排除的题号所在的绝对路径---\"\"\"\n", "mainpath = r\"C:/Users/Weiye/Documents/wwy sync/23届/\"\n", "\n", "used_path_list = [\n", "mainpath + \"第一轮复习讲义/\",\n", "mainpath + \"上学期测验卷/\",\n", "mainpath + \"上学期周末卷/\",\n", "mainpath + \"赋能/\",\n", "mainpath + \"寒假作业/\",\n", "mainpath + \"下学期测验卷/\",\n", "mainpath + \"下学期周末卷/\",\n", "mainpath + \"第二轮复习讲义/\",\n", "# mainpath + \"简单题/\",\n", "]\n", "\"\"\"---路径设置完毕---\"\"\"\n", "\n", "# 从字符串中获取题号列表\n", "def extract_idlist(string):\n", " idlist = re.findall(r\"\\(([\\d]{6})\\)\",string)\n", " return idlist\n", "\n", "# 生成题号列表\n", "def generate_number_set(string,dict):\n", " string = re.sub(r\"[\\n\\s]\",\"\",string)\n", " string_list = string.split(\",\")\n", " numbers_list = []\n", " for s in string_list:\n", " if not \":\" in s:\n", " numbers_list.append(s.zfill(6))\n", " else:\n", " start,end = s.split(\":\")\n", " for ind in range(int(start),int(end)+1):\n", " numbers_list.append(str(ind).zfill(6))\n", " return numbers_list\n", "\n", "#读取题库json文件并转化为字典\n", "with open(r\"../题库0.3/Problems.json\",\"r\",encoding = \"utf8\") as f:\n", " database = f.read()\n", "pro_dict = json.loads(database)\n", "\n", "# 生成首行题号列表并输出数量\n", "with open(id_list_file,\"r\",encoding = \"utf8\") as f:\n", " data = f.read() + \"\\n\"\n", "\n", "# id_list = re.findall(r\"^([^\\n]*)\\n\",data)[0].split(\",\")\n", "id_list = generate_number_set(data,pro_dict)\n", "print(\"首行题目数量: \",len(id_list))\n", "\n", "# 生成已使用题号列表\n", "used_id_list = []\n", "\n", "for path in used_path_list:\n", " filelist = [f for f in os.listdir(path) if \".tex\" in f]\n", " for f in filelist:\n", " with open(path+f,\"r\",encoding = \"utf8\") as texfile:\n", " texdata = texfile.read()\n", " used_id_list += extract_idlist(texdata)\n", "\n", "# 去除已使用题号\n", "checked_id_list = []\n", "ripped_id_list = []\n", "for id in id_list:\n", " if not id in used_id_list:\n", " checked_id_list.append(id)\n", " else:\n", " ripped_id_list.append(id)\n", "\n", "print(\"剩余题目数量: \",len(checked_id_list))\n", "\n", "# 写入文件\n", "with open(id_list_file,\"a\",encoding = \"utf8\") as f:\n", " f.write(\"\\n\\n\")\n", " f.write(\"未使用题号:\\n\")\n", " f.write(\",\".join(checked_id_list))\n", " f.write(\"\\n\\n已使用题号:\\n\")\n", " f.write(\",\".join(ripped_id_list))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "mathdept", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "ff3c292c316ba85de6f1ad75f19c731e79d694e741b6f515ec18f14996fe48dc" } } }, "nbformat": 4, "nbformat_minor": 2 }