20230403 afternoon

2023-04-03 19:31:52 +08:00 · 2023-04-03 19:31:52 +08:00 · 69687a0601
parent 3a14be5815
commit 69687a0601
31 changed files with 2361 additions and 5310 deletions
--- a/工具/修改题目数据库.ipynb
+++ b/工具/修改题目数据库.ipynb
@ -19,7 +19,7 @@
   "source": [
    "import os,re,json\n",
    "\"\"\"这里编辑题号(列表)后将在vscode中打开窗口, 编辑后保存关闭, 随后运行第二个代码块\"\"\"\n",
-    "problems = \"31158:31196\"\n",
+    "problems = \"14085\"\n",
    "\n",
    "def generate_number_set(string,dict):\n",
    "    string = re.sub(r\"[\\n\\s]\",\"\",string)\n",
--- a/工具/修改题目数据库.py
+++ b/工具/修改题目数据库.py
@ -1,6 +1,6 @@
 import os,re,json
 """这里编辑题号(列表)后将在vscode中打开窗口, 编辑后保存关闭, 随后运行第二个代码块"""
-problems = "1"
+problems = "10000,10003"

 def generate_number_set(string,dict):
    string = re.sub(r"[\n\s]","",string)
--- a/工具/全局未标注相同题目检测.py
+++ b/工具/全局未标注相同题目检测.py
@ -0,0 +1,102 @@
+import os,re,difflib,Levenshtein,time,json
+
+# 相同题目的阈值
+threshold = 0.99
+
+outputfile = r"临时文件/相同题目列表.txt"
+
+#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
+def generate_number_set(string):
+    string = re.sub(r"[\n\s]","",string)
+    string_list = string.split(",")
+    numbers_list = []
+    for s in string_list:
+        if not ":" in s:
+            numbers_list.append(s.zfill(6))
+        else:
+            start,end = s.split(":")
+            for ind in range(int(start),int(end)+1):
+                numbers_list.append(str(ind).zfill(6))
+    return numbers_list
+
+#字符串预处理
+def pre_treating(string):
+    string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
+    string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
+    string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
+    string = re.sub(r"[\n\t]","",string)
+    string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
+    string = re.sub(r"[,\.:;?]","",string)
+    return string
+
+#difflab字符串比较
+def difflab_get_equal_rate(str1, str2):
+    # str1 = pre_treating(str1)
+    # str2 = pre_treating(str2)
+    return difflib.SequenceMatcher(None, str1, str2).ratio()
+
+#Levenshtein jaro字符串比较
+def jaro_get_equal_rate(str1,str2):
+    # str1 = pre_treating(str1)
+    # str2 = pre_treating(str2)
+    return Levenshtein.jaro(str1,str2)
+
+#Levenshtein 字符串比较
+def Lev_get_equal_rate(str1,str2):
+    # str1 = pre_treating(str1)
+    # str2 = pre_treating(str2)
+    return Levenshtein.ratio(str1,str2)
+
+
+
+
+#指定对比方法
+sim_test = jaro_get_equal_rate
+
+#读入题库
+with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
+    database = f.read()
+pro_dict = json.loads(database)
+
+pro_dict_treated = {}
+for id in pro_dict:
+    pro_dict_treated[id] = pro_dict[id].copy()
+    pro_dict_treated[id]["content"] = pre_treating(pro_dict_treated[id]["content"])
+
+
+print("题目数:",len(pro_dict))
+
+#记录起始时间
+starttime = time.time()
+alike_problems = ""
+
+
+count = 0
+keys = list(pro_dict_treated.keys())
+while len(keys) >= 2:
+    count += 1
+    if count % 500 == 0:
+        print(count)
+    
+    currentid = keys.pop(0)
+    content1 = pro_dict_treated[currentid]["content"]
+    same = []
+    for id in keys:
+        if not id in pro_dict[currentid]["same"] and not id in pro_dict[currentid]["related"]:
+            content2 = pro_dict_treated[id]["content"]
+            if sim_test(content1,content2)>threshold:
+                same.append(id)
+    if len(same) >= 1:
+        # print(currentid)
+        alike_problems += currentid + ","
+        for i in same:
+            # print(i)
+            keys.pop(keys.index(i))
+        alike_problems += ",".join(same)
+        alike_problems += "\n\n"
+
+endtime = time.time()
+print("耗时: %.3f秒" %(endtime-starttime))
+
+with open(outputfile,"w",encoding = "u8") as f:
+    f.write(alike_problems)
--- a/工具/关键字筛选题号.py
+++ b/工具/关键字筛选题号.py
@ -2,7 +2,7 @@ import os,re,json

 """---设置关键字, 同一field下不同选项为or关系, 同一字典中不同字段间为and关系, 不同字典间为or关系, _not表示列表中的关键字都不含, 同一字典中的数字用来供应同一字段不同的条件之间的and---"""
 keywords_dict_table = [
-    {"origin":["一模"]}
+    {"origin":["杨浦"]}
 ]
 """---关键字设置完毕---"""
 # 示例： keywords_dict_table = [
--- a/工具/分年级专用工具/小闲平台作业测验数据导入_2023届.ipynb
+++ b/工具/分年级专用工具/小闲平台作业测验数据导入_2023届.ipynb
@ -14,7 +14,7 @@
    "filepath = \"数据导入作业文件\"\n",
    "\n",
    "# date = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)\n",
-    "date = \"20230329\"\n",
+    "date = \"20230331\"\n",
    "\n",
    "#生成文件名tex_file和zip_file\n",
    "files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n",
--- a/工具/剪贴板表格整理.ipynb
+++ b/工具/剪贴板表格整理.ipynb
@ -0,0 +1,116 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os,re\n",
+    "import win32clipboard as wc\n",
+    "import win32con\n",
+    "\n",
+    "# 获取剪切板内容\n",
+    "def getCopy():\n",
+    "    wc.OpenClipboard()\n",
+    "    t = wc.GetClipboardData(win32con.CF_UNICODETEXT)\n",
+    "    wc.CloseClipboard()\n",
+    "    return t\n",
+    "\n",
+    "# 写入剪切板内容\n",
+    "def setCopy(str):\n",
+    "    wc.OpenClipboard()\n",
+    "    wc.EmptyClipboard()\n",
+    "    wc.SetClipboardData(win32con.CF_UNICODETEXT, str)\n",
+    "    wc.CloseClipboard()\n",
+    "\n",
+    "def dollared(string):\n",
+    "    flag = True\n",
+    "    for c in string:\n",
+    "        if not c in \"1234567890.+-:[]()\":\n",
+    "            flag = False\n",
+    "            break\n",
+    "    if flag:\n",
+    "        string = \"$\" + string + \"$\"\n",
+    "    return string\n",
+    "\n",
+    "\n",
+    "\n",
+    "data = getCopy()\n",
+    "\n",
+    "data1 = \"\"\n",
+    "for c in data:\n",
+    "    if 65296 <= ord(c) < 65306:\n",
+    "        data1 += str(ord(c)-65296)\n",
+    "    else:\n",
+    "        data1 += c\n",
+    "data = data1\n",
+    "data = data.replace(\"．\",\".\").replace(\"：\",\":\")\n",
+    "elements = data.split(\"\\n\")\n",
+    "elements_per_line = int(elements.pop(-1)) #这里需要修改\n",
+    "contents = \"\\\\begin{center}\\n\\\\begin{tabular}{|\"\n",
+    "for i in range(elements_per_line):\n",
+    "    contents += \"c|\"\n",
+    "contents += \"}\\n\"\n",
+    "contents += r\"\\hline\"+\"\\n\"\n",
+    "col = 1\n",
+    "for element in elements:\n",
+    "    if col != 1:\n",
+    "        contents += \" & \"\n",
+    "    contents += dollared(element.strip())\n",
+    "    if col == elements_per_line:\n",
+    "        contents += r\" \\\\ \\hline\"+\"\\n\"\n",
+    "    col += 1\n",
+    "    if col > elements_per_line:\n",
+    "        col = 1\n",
+    "contents += \"\\\\end{tabular}\" + \"\\n\" + \"\\\\end{center}\"\n",
+    "\n",
+    "with open(\"临时文件/tablefile.txt\",\"w\",encoding = \"utf8\") as f:\n",
+    "    f.write(contents)\n",
+    "\n",
+    "setCopy(contents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pythontest",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.15"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "91219a98e0e9be72efb992f647fe78b593124968b75db0b865552d6787c8db93"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/工具/剪贴板表格整理.py
+++ b/工具/剪贴板表格整理.py
@ -0,0 +1,63 @@
+import os,re
+import win32clipboard as wc
+import win32con
+
+# 获取剪切板内容
+def getCopy():
+    wc.OpenClipboard()
+    t = wc.GetClipboardData(win32con.CF_UNICODETEXT)
+    wc.CloseClipboard()
+    return t
+
+# 写入剪切板内容
+def setCopy(str):
+    wc.OpenClipboard()
+    wc.EmptyClipboard()
+    wc.SetClipboardData(win32con.CF_UNICODETEXT, str)
+    wc.CloseClipboard()
+
+def dollared(string):
+    flag = True
+    for c in string:
+        if not c in "1234567890.+-:[]()":
+            flag = False
+            break
+    if flag:
+        string = "$" + string + "$"
+    return string
+
+
+
+data = getCopy()
+
+data1 = ""
+for c in data:
+    if 65296 <= ord(c) < 65306:
+        data1 += str(ord(c)-65296)
+    else:
+        data1 += c
+data = data1
+data = data.replace("．",".").replace("：",":")
+elements = data.split("\n")
+elements_per_line = int(elements.pop(-1)) #这里需要修改
+contents = "\\begin{center}\n\\begin{tabular}{|"
+for i in range(elements_per_line):
+    contents += "c|"
+contents += "}\n"
+contents += r"\hline"+"\n"
+col = 1
+for element in elements:
+    if col != 1:
+        contents += " & "
+    contents += dollared(element.strip())
+    if col == elements_per_line:
+        contents += r" \\ \hline"+"\n"
+    col += 1
+    if col > elements_per_line:
+        col = 1
+contents += "\\end{tabular}" + "\n" + "\\end{center}"
+
+with open("临时文件/tablefile.txt","w",encoding = "utf8") as f:
+    f.write(contents)
+
+setCopy(contents)
--- a/工具/局部相似题目检测.py
+++ b/工具/局部相似题目检测.py
@ -0,0 +1,129 @@
+import os,re,difflib,Levenshtein,time,json
+
+# 重要!!! 新旧题目的范围(有重复默认为新题)
+id_new_problems = "40000:41000"
+id_old_problems = "1:30000"
+threshold = 0.99
+
+#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
+def generate_number_set(string):
+    string = re.sub(r"[\n\s]","",string)
+    string_list = string.split(",")
+    numbers_list = []
+    for s in string_list:
+        if not ":" in s:
+            numbers_list.append(s.zfill(6))
+        else:
+            start,end = s.split(":")
+            for ind in range(int(start),int(end)+1):
+                numbers_list.append(str(ind).zfill(6))
+    return numbers_list
+
+#字符串预处理
+def pre_treating(string):
+    string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
+    string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
+    string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
+    string = re.sub(r"[\n\t]","",string)
+    string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
+    string = re.sub(r"[,\.:;?]","",string)
+    return string
+
+#difflab字符串比较
+def difflab_get_equal_rate(str1, str2):
+    # str1 = pre_treating(str1)
+    # str2 = pre_treating(str2)
+    return difflib.SequenceMatcher(None, str1, str2).ratio()
+
+#Levenshtein jaro字符串比较
+def jaro_get_equal_rate(str1,str2):
+    # str1 = pre_treating(str1)
+    # str2 = pre_treating(str2)
+    return Levenshtein.jaro(str1,str2)
+
+#Levenshtein 字符串比较
+def Lev_get_equal_rate(str1,str2):
+    # str1 = pre_treating(str1)
+    # str2 = pre_treating(str2)
+    return Levenshtein.ratio(str1,str2)
+
+
+
+
+#指定对比方法
+sim_test = jaro_get_equal_rate
+
+#读入题库
+with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
+    database = f.read()
+pro_dict = json.loads(database)
+
+#生成旧题目数据库字典与新题目数据库字典
+new_id_list_raw = generate_number_set(id_new_problems)
+new_id_list = [id for id in pro_dict if id in new_id_list_raw]
+old_id_list_raw = generate_number_set(id_old_problems)
+old_id_list = [id for id in pro_dict if (id in old_id_list_raw and not id in new_id_list_raw)]
+old_problems_dict = {}
+new_problems_dict = {}
+old_problems_dict_content = {}
+new_problems_dict_content = {}
+for id in new_id_list:
+    new_problems_dict[id] = pro_dict[id]
+    new_problems_dict_content[id] = pre_treating(pro_dict[id]["content"])
+for id in old_id_list:
+    old_problems_dict[id] = pro_dict[id]
+    old_problems_dict_content[id] = pre_treating(pro_dict[id]["content"])
+print("旧题目数:",len(old_problems_dict),", 新题目数:",len(new_problems_dict))
+
+#记录起始时间
+start_time = time.time()
+suspect_count = 0
+remarked = 0
+
+alike_problems = ""
+
+
+
+#开始新题与旧题的比对
+count = 0
+print("开始新题与旧题的比对")
+for id_new in new_problems_dict:
+    count += 1
+    if count % 50 == 0:
+        print(count)
+    for id_old in old_problems_dict:
+        similar_rate = sim_test(new_problems_dict_content[id_new],old_problems_dict_content[id_old])
+        if similar_rate > threshold or id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"]:
+            suspect_count += 1
+            if not (id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"]):
+                alike_problems += ("%.4f" %similar_rate) + "\n\n" + id_new + " " + new_problems_dict[id_new]["content"] + "\n\n" + id_old + " " + old_problems_dict[id_old]["content"] + "\n\n"
+            else:
+                remarked += 1
+
+#开始新题之间的比对
+count = 0
+print("开始新题之间的比对")
+while len(new_problems_dict) >= 2:
+    count += 1
+    if count % 50 == 0:
+        print(count)
+    keys = list(new_problems_dict.keys())
+    current_problem = new_problems_dict.pop(keys[0])
+    current_problem_content = new_problems_dict_content[current_problem["id"]]
+    for id_new in new_problems_dict:
+        similar_rate = sim_test(new_problems_dict_content[id_new],current_problem_content)
+        if similar_rate > threshold or id_new in current_problem["related"] or id_new in current_problem["same"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"]:
+            suspect_count += 1
+            if not (id_new in current_problem["related"] or id_new in current_problem["same"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"]):
+                alike_problems += ("%.4f" %similar_rate) + "\n\n" + id_new + " " + new_problems_dict[id_new]["content"] + "\n\n" + current_problem["id"] + " " + current_problem["content"] + "\n\n"
+            else:
+                remarked += 1
+
+
+#记录终止时间及显示结果
+end_time = time.time()
+print("总耗时:",end_time-start_time,"秒.")
+print("发现相似: ",suspect_count,", 其中已标注: ",remarked,".")
+
+with open("临时文件/相似题目.txt","w",encoding="utf8") as f:
+    f.write(alike_problems)
--- a/工具/工具面板.py
+++ b/工具/工具面板.py
@ -47,7 +47,7 @@ def run_command1():
        call(["python","试卷答案生成.py"])
    elif selectedtool == "单元标记转换":
        call(["python","单元标记转换.py"])
-    elif selectedtool == "目标清点":
+    elif selectedtool == "目标清点及清单生成":
        call(["python","单元课时目标题目数据清点.py"])
    elif selectedtool == "目标表体生成":
        call(["python","根据范围提取课时目标.py"])
@ -59,6 +59,28 @@ def run_command1():
        call(["python","讲义生成.py"])
    elif selectedtool == "课时目标划分信息汇总":
        call(["python","课时目标划分信息汇总.py"])
+    elif selectedtool == "识别题目类型":
+        call(["python","识别题目类型.py"])
+    elif selectedtool == "批量添加字段数据":
+        call(["python","批量添加字段数据.py"])
+    elif selectedtool == "目标挂钩清点":
+        call(["python","目标挂钩清点.py"])
+    elif selectedtool == "批量题号选题pdf生成":
+        call(["python","批量生成题目pdf.py"])  
+    elif selectedtool == "目标pdf生成":
+        call(["python","课时目标pdf生成.py"])
+    elif selectedtool == "剪贴板表格整理":
+        call(["python","剪贴板表格整理.py"])
+    elif selectedtool == "手动统计结果转换":
+        call(["python","手动统计结果转换.py"])
+    elif selectedtool == "新题相似相同比对":
+        call(["python","新题相似相同比对.py"])
+    elif selectedtool == "全局未标注相同题目检测":
+        call(["python","全局未标注相同题目检测.py"])
+    elif selectedtool == "局部相似题目检测":
+        call(["python","局部相似题目检测.py"])
+    elif selectedtool == "相同相似题目标注":
+        call(["python","相同相似题目标注.py"])
    LabelTool.config(text = selectedtool+"STEP1命令执行完毕")
    button1.place_forget()

@ -95,7 +117,12 @@ ImportMenu.add_command(label = "添加关联题目", command = lambda: SetButton
 # 设置 维护 菜单项
 MaintainenceMenu = Menu(menubar, tearoff = False)
 menubar.add_cascade(label = "维护", menu = MaintainenceMenu)
+MaintainenceMenu.add_command(label = "批量添加字段数据", command = lambda: SetButton("批量添加字段数据",1,["文本文件/metadata.txt","批量添加字段数据.py"]))
+MaintainenceMenu.add_command(label = "手动统计结果转换", command = lambda: SetButton("手动统计结果转换",1,["文本文件/metadata.txt","文本文件/手动统计结果.txt"]))
+MaintainenceMenu.add_separator()
 MaintainenceMenu.add_command(label = "修改题目", command = lambda: SetButton("修改题目数据库",2,["修改题目数据库.py"]))
+MaintainenceMenu.add_command(label = "识别题目类型", command = lambda: SetButton("识别题目类型",1,[]))
+

 # 设置 使用 菜单项
 UseMenu = Menu(menubar, tearoff = False)
@ -104,6 +131,7 @@ UseMenu.add_command(label = "关键字筛选题号", command = lambda: SetButton
 UseMenu.add_separator()
 UseMenu.add_command(label = "讲义试卷生成", command = lambda: SetButton("讲义生成",1,["讲义生成.py"]))
 UseMenu.add_command(label = "题号选题pdf生成", command = lambda: SetButton("题号选题pdf生成",1,["题号选题pdf生成.py"]))
+UseMenu.add_command(label = "批量题号选题pdf生成", command = lambda: SetButton("批量题号选题pdf生成",1,["批量生成题目pdf.py"]))
 UseMenu.add_command(label = "试卷答案生成", command = lambda: SetButton("试卷答案生成",1,["试卷答案生成.py"]))

 # 设置 目标及标签 菜单项
@ -111,16 +139,32 @@ ObjTagMenu = Menu(menubar, tearoff = False)
 menubar.add_cascade(label = "目标及标签", menu = ObjTagMenu)
 ObjTagMenu.add_command(label = "单元标记转换", command = lambda: SetButton("单元标记转换",1,["单元标记转换.py"]))
 ObjTagMenu.add_separator()
-ObjTagMenu.add_command(label = "目标清点", command = lambda: SetButton("目标清点",1,[]))
 ObjTagMenu.add_command(label = "目标表体生成", command = lambda: SetButton("目标表体生成",1,["根据范围提取课时目标.py"]))
+ObjTagMenu.add_command(label = "目标pdf生成", command = lambda: SetButton("目标pdf生成",1,["课时目标pdf生成.py"]))
 ObjTagMenu.add_command(label = "课时目标寻找题目", command = lambda: SetButton("课时目标寻找题目",1,["课时目标寻找题目.py"]))
 ObjTagMenu.add_command(label = "课时目标划分信息汇总", command = lambda: SetButton("课时目标划分信息汇总",1,[]))
+ObjTagMenu.add_separator()
+ObjTagMenu.add_command(label = "目标挂钩基础清点", command = lambda: SetButton("目标挂钩清点",1,[]))
+ObjTagMenu.add_command(label = "目标清点及清单生成", command = lambda: SetButton("目标清点及清单生成",1,[]))
+
+# 设置 相同相似比对 菜单项
+SimRelMenu = Menu(menubar, tearoff= False)
+menubar.add_cascade(label = "相同相似", menu = SimRelMenu)
+SimRelMenu.add_command(label = "新题相似相同比对", command = lambda: SetButton("新题相似相同比对",1,["新题相似相同比对.py"]))
+SimRelMenu.add_separator()
+SimRelMenu.add_command(label = "局部相似题目检测", command = lambda: SetButton("局部相似题目检测",1,["局部相似题目检测.py"]))
+SimRelMenu.add_command(label = "相同相似题目标注", command = lambda: SetButton("相同相似题目标注",1,["相同相似题目标注.py"]))
+SimRelMenu.add_separator()
+SimRelMenu.add_command(label = "全局未标注相同题目检测", command = lambda: SetButton("全局未标注相同题目检测",1,[]))
+
+

 # 设置 其他 菜单项
 OtherMenu = Menu(menubar, tearoff = False)
 menubar.add_cascade(label = "其他", menu = OtherMenu)
-OtherMenu.add_command(label = "mathpix预处理", command = lambda: SetButton("mathpix预处理",1,[]))
-OtherMenu.add_command(label = "带圈数字处理", command = lambda: SetButton("带圈数字处理",1,[]))
+OtherMenu.add_command(label = "剪贴板mathpix预处理", command = lambda: SetButton("mathpix预处理",1,[]))
+OtherMenu.add_command(label = "剪贴板带圈数字处理", command = lambda: SetButton("带圈数字处理",1,[]))
+OtherMenu.add_command(label = "剪贴板表格整理", command = lambda: SetButton("剪贴板表格整理",1,[]))


 menubar.add_command(label = "退出", command = root.destroy)
--- a/工具/手动统计结果转换.ipynb
+++ b/工具/手动统计结果转换.ipynb
@ -17,7 +17,7 @@
    "        numerals_list[i] = str_numeral\n",
    "    return \"\\t\".join(numerals_list)\n",
    "\n",
-    "with open(\"临时文件/统计结果.txt\",\"r\",encoding = \"utf8\") as f:\n",
+    "with open(\"文本文件/手动统计结果.txt\",\"r\",encoding = \"utf8\") as f:\n",
    "    data = f.read()\n",
    "\n",
    "blocks = re.findall(r\"\\[BEGIN\\]([\\s\\S]*?)\\[END\\]\",data)\n",
@ -43,7 +43,7 @@
    "    output_data += \"\\n\".join(results_dict[id])\n",
    "    output_data += \"\\n\\n\"\n",
    "\n",
-    "with open(\"临时文件/字段数据.txt\",\"w\",encoding = \"utf8\") as f:\n",
+    "with open(\"文本文件/metadata.txt\",\"w\",encoding = \"utf8\") as f:\n",
    "    f.write(output_data)\n"
   ]
  },
--- a/工具/手动统计结果转换.py
+++ b/工具/手动统计结果转换.py
@ -0,0 +1,39 @@
+import os,re,json
+
+def form_decimals(string):
+    string = re.sub(r"[\s]+",r"\t",string)
+    numerals_list = [n for n in string.split("\t") if len(n)>0]
+    for i in range(len(numerals_list)):
+        numeral = numerals_list[i]
+        str_numeral = "%.3f" %float(numeral)
+        numerals_list[i] = str_numeral
+    return "\t".join(numerals_list)
+
+with open("文本文件/手动统计结果.txt","r",encoding = "utf8") as f:
+    data = f.read()
+
+blocks = re.findall(r"\[BEGIN\]([\s\S]*?)\[END\]",data)
+
+results_dict = {}
+for block in blocks:
+    temp_list = [l.strip() for l in block.split("\n") if l.strip() != ""]    
+    for line in temp_list:
+        if line[:2] == "##":
+            date = line[2:].strip()
+        elif line[:2] == "**":
+            current_class = line[2:].strip()
+        else:
+            separating_pos = re.search("\s",line).span(0)[0]
+            if not line[:separating_pos].zfill(6) in results_dict:
+                results_dict[line[:separating_pos].zfill(6)] = [date + "\t" + current_class + "\t" + form_decimals(re.sub("\s+?","\t",line[separating_pos:])).strip()]
+            else:
+                results_dict[line[:separating_pos].zfill(6)].append(date + "\t" + current_class + "\t" + form_decimals(re.sub("\s+?","\t",line[separating_pos:])).strip())
+
+output_data = "usages\n"
+for id in results_dict:
+    output_data += id + "\n"
+    output_data += "\n".join(results_dict[id])
+    output_data += "\n\n"
+
+with open("文本文件/metadata.txt","w",encoding = "utf8") as f:
+    f.write(output_data)
--- a/工具/批量添加字段数据.py
+++ b/工具/批量添加字段数据.py
@ -0,0 +1,83 @@
+import os,re,json
+
+"""---明确数据文件位置---"""
+datafile = "文本文件/metadata.txt"
+# 双回车分隔，记录内单回车分隔列表，首行为字段名
+"""---文件位置结束---"""
+
+def trim(string):
+    string = re.sub(r"^[ \t\n]*","",string)
+    string = re.sub(r"[ \t\n]*$","",string)
+    return string
+def FloatToInt(string):
+    f = float(string)
+    if abs(f-round(f))<0.01:
+        f = round(f)
+    return f
+
+with open(datafile,"r",encoding="utf8") as f:
+    data = f.read().strip()
+pos = data.index("\n")
+field = data[:pos].strip()
+appending_data = data[pos:]
+
+with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
+    database = f.read()
+pro_dict = json.loads(database)
+with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
+    database = f.read()
+obj_dict = json.loads(database)
+
+#该字段列表可能需要更新
+fields = ["content","objs","tags","genre","ans","solution","duration","usages","origin","edit","same","related","remark","space"]
+
+if field in fields:
+    field_type = type(pro_dict["000001"][field])
+    datalist = [record.strip() for record in appending_data.split("\n\n") if len(trim(record)) > 0]
+    for record in datalist:
+        id = re.findall(r"^[\d]{1,}",record)[0]
+        data = record[len(id):].strip()
+        id = id.zfill(6)
+        if not id in pro_dict:
+            print("题号:",id,"不在数据库中.")
+            break
+        
+        #字符串类型字段添加数据
+        elif field_type == str and data in pro_dict[id][field]:
+            print("题号:",id,", 字段:",field,"中已有该数据:",data)
+        elif field_type == str and not data in pro_dict[id][field] and not field == "ans" and not field == "space":
+            origin_data = pro_dict[id][field]
+            new_data = trim(origin_data + "\n" + data)
+            pro_dict[id][field] = new_data
+            print("题号:",id,", 字段:",field,"中已添加数据:",data)
+        elif field_type == str and not data in pro_dict[id][field] and field == "ans" or field == "space":
+            pro_dict[id][field] = data
+            print("题号:",id,", 字段:",field,"中已修改数据:",data)
+        
+        #数值类型字段添加数据
+        elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])<0.01:
+            print("题号:",id,", 字段:",field,"中已有该数据:",FloatToInt(data))
+        elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])>=0.01:
+            pro_dict[id][field] = FloatToInt(data)
+            print("题号:",id,", 字段:",field,"中已修改数据:",FloatToInt(data))
+        
+        #列表类型字段添加数据
+        elif field_type == list:
+            cell_data_list = [d.strip() for d in data.split("\n")]
+            for cell_data in cell_data_list:
+                if cell_data in pro_dict[id][field]:
+                    print("题号:",id,", 字段:",field,"中已有该数据:",cell_data)
+                elif not field == "objs":
+                    pro_dict[id][field].append(cell_data)
+                    print("题号:",id,", 字段:",field,"中已添加数据:",cell_data)
+                else:
+                    if not cell_data in obj_dict and not cell_data.upper() == "KNONE":
+                        print("题号:",id,", 字段:",field,"目标编号有误:",cell_data)
+                    else:
+                        pro_dict[id][field].append(cell_data.upper())
+                        print("题号:",id,", 字段:",field,"中已添加数据:",cell_data.upper())
+else:
+    print("字段名有误")
+
+with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
+    f.write(json.dumps(pro_dict,indent=4,ensure_ascii=False))
--- a/工具/批量添加题库字段数据.ipynb
+++ b/工具/批量添加题库字段数据.ipynb
@ -9,111 +9,126 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "题号: 040422 , 字段: ans 中已修改数据: $(-1,3)$\n",
-      "题号: 040423 , 字段: ans 中已修改数据: $(-\\infty,-1)$\n",
-      "题号: 040424 , 字段: ans 中已修改数据: $\\sqrt{5}$\n",
-      "题号: 040425 , 字段: ans 中已修改数据: $5$\n",
-      "题号: 040426 , 字段: ans 中已修改数据: $\\dfrac{24}{7}$\n",
-      "题号: 040427 , 字段: ans 中已修改数据: $0.12$\n",
-      "题号: 040428 , 字段: ans 中已修改数据: $52$\n",
-      "题号: 040429 , 字段: ans 中已修改数据: $2$\n",
-      "题号: 040430 , 字段: ans 中已修改数据: $60^{\\circ}$\n",
-      "题号: 040431 , 字段: ans 中已修改数据: $\\dfrac{4}{5}$\n",
-      "题号: 040432 , 字段: ans 中已修改数据: $\\dfrac{\\sqrt{10}}{10}$\n",
-      "题号: 040433 , 字段: ans 中已修改数据: $(-\\infty,-\\frac{2}{\\mathrm{e}}) \\cup(\\frac{2}{\\mathrm{e}},+\\infty)$\n",
-      "题号: 040434 , 字段: ans 中已修改数据: D\n",
-      "题号: 040435 , 字段: ans 中已修改数据: C\n",
-      "题号: 040436 , 字段: ans 中已修改数据: B\n",
-      "题号: 040437 , 字段: ans 中已修改数据: D\n",
-      "题号: 040438 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{\\sqrt{15}}{15}$\n",
-      "题号: 040439 , 字段: ans 中已修改数据: (1) $a_1+a_2=6$, 公差为$4$; (2) $S_n=\\begin{cases}n^2+n, & n=2 k, \\\\ n^2+n+2, & n=2 k-1\\end{cases}$, $k$为正整数\n",
-      "题号: 040440 , 字段: ans 中已修改数据: (1) $y=0.1 x-0.2$; (2) (i) $5900$万元; (ii) $[\\dfrac{1}{3}, \\dfrac{5}{8}]$\n",
-      "题号: 040441 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{8}+\\dfrac{y^2}{4}=1$; (2) $y= \\pm \\dfrac{\\sqrt{30}}{10} x+1$; (3) $-\\dfrac{1}{2}$\n",
-      "题号: 040442 , 字段: ans 中已修改数据: (1) $y=(1+a) x$; (2) $-1$; (3) $(-\\infty,-1)$\n",
-      "题号: 040443 , 字段: ans 中已修改数据: $\\{1,2,3\\}$\n",
-      "题号: 040444 , 字段: ans 中已修改数据: $\\sqrt{10}$\n",
-      "题号: 040445 , 字段: ans 中已修改数据: $2 \\pi$\n",
-      "题号: 040446 , 字段: ans 中已修改数据: $-\\dfrac{1}{3}$\n",
-      "题号: 040447 , 字段: ans 中已修改数据: $24$\n",
-      "题号: 040448 , 字段: ans 中已修改数据: $-5$\n",
-      "题号: 040449 , 字段: ans 中已修改数据: $(\\dfrac{7}{2}, 0, \\dfrac{7}{2})$\n",
-      "题号: 040450 , 字段: ans 中已修改数据: $(-\\dfrac{1}{3}, 1)$\n",
-      "题号: 040451 , 字段: ans 中已修改数据: $17$或$18$\n",
-      "题号: 040452 , 字段: ans 中已修改数据: 支持\n",
-      "题号: 040453 , 字段: ans 中已修改数据: $\\sqrt{6}-\\sqrt{3}$\n",
-      "题号: 040454 , 字段: ans 中已修改数据: $\\{(4,-4),(4,1)\\}$\n",
-      "题号: 040455 , 字段: ans 中已修改数据: C\n",
-      "题号: 040456 , 字段: ans 中已修改数据: A\n",
-      "题号: 040457 , 字段: ans 中已修改数据: B\n",
-      "题号: 040458 , 字段: ans 中已修改数据: B\n",
-      "题号: 040459 , 字段: ans 中已修改数据: (1) $[-\\dfrac{\\pi}{2}+k \\pi, k \\pi]$, $k \\in \\mathbf{Z}$; (2) $\\dfrac{15 \\sqrt{3}}{4}$\n",
-      "题号: 040460 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{1}{2}$\n",
-      "题号: 040461 , 字段: ans 中已修改数据: (1) $\\dfrac{3}{256}$; (2) 选择A同学\n",
-      "题号: 040462 , 字段: ans 中已修改数据: (1) $\\sqrt{3}$; (2) 略; (3) 存在, $k=\\dfrac{\\sqrt{3}}{2}$, $P(-3 \\sqrt{3}, 0)$\n",
-      "题号: 040463 , 字段: ans 中已修改数据: (1) $y=2 x$; (2) $(-\\infty, 0] \\cup[1,+\\infty)$; (3) $(-\\infty,\\dfrac{2}{\\mathrm{e}})$\n",
-      "题号: 040464 , 字段: ans 中已修改数据: $(-2,1]$\n",
-      "题号: 040465 , 字段: ans 中已修改数据: $1$\n",
-      "题号: 040466 , 字段: ans 中已修改数据: $\\dfrac{2 \\pi}{3}$\n",
-      "题号: 040467 , 字段: ans 中已修改数据: $\\sqrt{3}$\n",
-      "题号: 040468 , 字段: ans 中已修改数据: $(-\\infty,-2] \\cup[0,2]$\n",
-      "题号: 040469 , 字段: ans 中已修改数据: $y^2=4 x$\n",
-      "题号: 040470 , 字段: ans 中已修改数据: $\\dfrac{14}{15}$\n",
-      "题号: 040471 , 字段: ans 中已修改数据: $5$\n",
-      "题号: 040472 , 字段: ans 中已修改数据: $\\dfrac{6+2 \\sqrt{3}}{5}$\n",
-      "题号: 040473 , 字段: ans 中已修改数据: $1-2^{2023}$\n",
-      "题号: 040474 , 字段: ans 中已修改数据: $\\dfrac{2 \\sqrt{3}}{3}$\n",
-      "题号: 040475 , 字段: ans 中已修改数据: $6+4 \\sqrt{2}$\n",
-      "题号: 040476 , 字段: ans 中已修改数据: B\n",
-      "题号: 040477 , 字段: ans 中已修改数据: D\n",
-      "题号: 040478 , 字段: ans 中已修改数据: C\n",
-      "题号: 040479 , 字段: ans 中已修改数据: B\n",
-      "题号: 040480 , 字段: ans 中已修改数据: (1) 略; (2) $\\arccos \\dfrac{2}{3}$\n",
-      "题号: 040481 , 字段: ans 中已修改数据: (1) $[k \\pi-\\dfrac{3 \\pi}{8}, k \\pi+\\dfrac{\\pi}{8}]$,  $k \\in \\mathbf{Z}$; (2) $\\{x | x=\\dfrac{k \\pi}{2}+\\dfrac{\\pi}{8},\\ k \\in \\mathbf{Z}\\}$\n",
-      "题号: 040482 , 字段: ans 中已修改数据: (1) $1.4$米; (2) $\\arcsin \\dfrac{5 \\sqrt{3}}{14}$\n",
-      "题号: 040483 , 字段: ans 中已修改数据: (1) $\\dfrac{6 \\sqrt{5}}{5}$; (2) 略; (3) $\\dfrac{3 \\sqrt{3}}{4}$\n",
-      "题号: 040484 , 字段: ans 中已修改数据: (1) 略; (2) $a_n=\\dfrac{(n+3)(n+4)}{2}$, $b_n=\\dfrac{(n+4)^2}{2}$; (3) $(-\\infty,1]$\n",
-      "题号: 040485 , 字段: ans 中已修改数据: $4$\n",
-      "题号: 040486 , 字段: ans 中已修改数据: 四\n",
-      "题号: 040487 , 字段: ans 中已修改数据: $2$\n",
-      "题号: 040488 , 字段: ans 中已修改数据: $\\dfrac 23$\n",
-      "题号: 040489 , 字段: ans 中已修改数据: $12$\n",
-      "题号: 040490 , 字段: ans 中已修改数据: $40$\n",
-      "题号: 040491 , 字段: ans 中已修改数据: $2$\n",
-      "题号: 040492 , 字段: ans 中已修改数据: $72$\n",
-      "题号: 040493 , 字段: ans 中已修改数据: $3$\n",
-      "题号: 040494 , 字段: ans 中已修改数据: $\\dfrac{4\\pi}3$\n",
-      "题号: 040495 , 字段: ans 中已修改数据: $\\sqrt{3}$\n",
-      "题号: 040496 , 字段: ans 中已修改数据: $-2+\\sqrt{7}$\n",
-      "题号: 040497 , 字段: ans 中已修改数据: B\n",
-      "题号: 040498 , 字段: ans 中已修改数据: C\n",
-      "题号: 040499 , 字段: ans 中已修改数据: D\n",
-      "题号: 040500 , 字段: ans 中已修改数据: D\n",
-      "题号: 040501 , 字段: ans 中已修改数据: (1) 证明略; (2) $\\dfrac{\\sqrt{14}}7$\n",
-      "题号: 040502 , 字段: ans 中已修改数据: (1) $y=0.072x-0.046$; (2) (i) $1060$万; (ii) $(\\dfrac 13,\\dfrac 58]$\n",
-      "题号: 040503 , 字段: ans 中已修改数据: (1) $[k\\pi,\\dfrac\\pi 4+k\\pi]$, $k\\in \\mathbf{Z}$; (2) $\\dfrac\\pi 4$\n",
-      "题号: 040504 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{4}-y^2=1$; (2) 存在, $t=\\pm \\sqrt{3}$或$t=\\pm \\sqrt{\\dfrac{76}{15}}$; (3) $(-8-5\\sqrt{3},-\\dfrac 12)\\cup (\\dfrac 12,-8+5\\sqrt{3})$\n",
-      "题号: 040505 , 字段: ans 中已修改数据: (1) $f(g(x))$的导函数为$y=-a\\sin x$, $g(f(x))$的导函数为$y=-a\\sin (ax)$; (2) $(-\\infty,-1]$; (3) $[\\dfrac 32,3)$\n",
-      "题号: 040506 , 字段: ans 中已修改数据: $4$\n",
-      "题号: 040507 , 字段: ans 中已修改数据: $1$\n",
-      "题号: 040508 , 字段: ans 中已修改数据: $24$\n",
-      "题号: 040509 , 字段: ans 中已修改数据: $x^2+(y-1)^2=4$\n",
-      "题号: 040510 , 字段: ans 中已修改数据: $(-1,1]$\n",
-      "题号: 040511 , 字段: ans 中已修改数据: $300(4+\\sqrt{3})$\n",
-      "题号: 040512 , 字段: ans 中已修改数据: $\\dfrac{1}{2}$\n",
-      "题号: 040513 , 字段: ans 中已修改数据: $92$\n",
-      "题号: 040514 , 字段: ans 中已修改数据: $\\dfrac{1}{2}$\n",
-      "题号: 040515 , 字段: ans 中已修改数据: $\\dfrac{\\sqrt{21}}{6}$\n",
-      "题号: 040516 , 字段: ans 中已修改数据: $2 \\sqrt{2}$\n",
-      "题号: 040517 , 字段: ans 中已修改数据: $2$\n",
-      "题号: 040518 , 字段: ans 中已修改数据: C\n",
-      "题号: 040519 , 字段: ans 中已修改数据: A\n",
-      "题号: 040520 , 字段: ans 中已修改数据: A\n",
-      "题号: 040521 , 字段: ans 中已修改数据: C\n",
-      "题号: 040522 , 字段: ans 中已修改数据: (1) $2n$; (2) $n^2+n+\\dfrac{4^{n+1}}{3}-\\dfrac{4}{3}$\n",
-      "题号: 040523 , 字段: ans 中已修改数据: (1) $\\begin{pmatrix}0 & 1 & 2 \\\\ \\dfrac{7}{15} & \\dfrac{7}{15} & \\dfrac{1}{15}\\end{pmatrix}$, 期望为$\\dfrac{3}{5}$; (2) $\\chi^2 \\approx 0.794<3.841$, 不能认为有关\n",
-      "题号: 040524 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{\\sqrt{10}}{5}$\n",
-      "题号: 040525 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{4}+y^2=1$; (2) 最大值为$3$; 最小值为$\\dfrac{\\sqrt{6}}{3}$; (3) $(0,3)$\n",
-      "题号: 040526 , 字段: ans 中已修改数据: (1) $y=x+1$; (2) 略; (3) $2$\n"
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t0.000\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t1.000\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t0.000\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t0.000\t1.000\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t0.000\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.800\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.350\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.850\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.750\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.950\t0.900\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.950\t0.750\t0.850\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.900\t0.650\t0.350\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.650\t0.600\t0.600\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.900\t0.600\t0.350\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.842\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.105\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.842\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.421\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.895\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\t0.789\t0.895\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.947\t0.000\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.895\t0.842\t0.895\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.789\t0.842\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.893\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.071\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.893\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.714\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\t0.786\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t1.000\t0.929\t0.821\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\t0.929\t0.000\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\t0.893\t0.821\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\t0.750\t0.821\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.391\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.870\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.652\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\t0.870\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t1.000\t0.957\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t0.913\t0.000\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.696\t0.783\t0.739\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t0.826\t0.696\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.938\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.875\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.000\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.875\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.562\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.750\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.812\t0.812\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.938\t0.062\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.812\t0.688\t0.750\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.938\t0.812\t0.438\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.865\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.973\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.135\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.946\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.730\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t1.000\t1.000\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.973\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.946\t0.838\t0.946\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.351\t0.919\t0.378\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.378\t0.513\t0.622\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.892\t0.622\t0.513\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.846\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.179\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.769\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\t0.974\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\t0.872\t0.974\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\t1.000\t0.051\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.692\t0.718\t0.795\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.667\t0.872\t0.590\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.889\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.037\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.889\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.667\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.963\t0.926\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\t0.852\t0.704\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.963\t0.963\t0.037\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.852\t0.778\t0.778\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t1.000\t0.556\t0.518\n",
+      "题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.842\n",
+      "题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\n",
+      "题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.053\n",
+      "题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.895\n",
+      "题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.632\n",
+      "题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\t0.895\n",
+      "题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.947\n",
+      "题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\n",
+      "题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\t0.842\t0.947\n",
+      "题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.895\t0.895\t0.105\n",
+      "题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.789\t0.632\t0.526\n",
+      "题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.842\t0.421\t0.105\n"
     ]
    }
   ],
@ -196,6 +211,8 @@
    "                    else:\n",
    "                        pro_dict[id][field].append(cell_data.upper())\n",
    "                        print(\"题号:\",id,\", 字段:\",field,\"中已添加数据:\",cell_data.upper())\n",
+    "else:\n",
+    "    print(\"字段名有误\")\n",
    "\n",
    "with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
    "    f.write(json.dumps(pro_dict,indent=4,ensure_ascii=False))"
--- a/工具/批量生成题目pdf.py
+++ b/工具/批量生成题目pdf.py
@ -0,0 +1,197 @@
+import os,re,time,json,sys
+
+"""
+模板文件目录下 题目清单.txt 文件不能缺失
+"""
+
+"""---设置是否在学生版中提供答案---"""
+answered = True
+
+"""---设置文件保存路径---"""
+#目录和文件的分隔务必用/
+directory = "临时文件/"
+# filename = "高三二模前易错题"
+filename = "测试"
+"""---设置文件名结束---"""
+
+"""---设置题目列表---"""
+#字典字段为文件名, 之后为内容的题号
+
+problems_dict =  {
+"函数1":"778,1253,1262,1325,1339,1352,2918,2968,3747,4009,4157,4435,4721,5123,5650,7939,10197,10938,11148,12179,12277,12543,12859,12902,13721,30060,30398,30406,10796,12104,30051,30327,30337,30356,31321",
+"函数2":"87,342,655,1277,1329,2831,2851,2859,2905,2959,3648,4153,4320,4359,11066,11079,11144,11999,12063,12064,12178,12192,12842,12856,12903,13747,13840,14191,30410,12549,12717,30377,30381,30411,30416,30420,30424,30426",
+"数列":"321,403,1781,1788,1810,3210,3219,3283,3309,3310,3312,4472,4476,6793,10777,10942,12067,12112,12239,12933,12979,13921,13925,30072,30473,30499",
+"解析几何":"248,282,363,625,960,2162,2252,2268,2278,2372,2418,3421,3437,3438,4246,8760,8866,8912,9784,10951,10958,12199,12213,12255,12548,13060,13063,13069,13097,13118,13134,13982,14505,31229,31233,31323",
+"概率统计":"332,340,401,412,654,2586,2605,2664,3574,3585,3640,4575,4584,7361,7423,7502,7630,10868,11993,14091,30227,30275,30495,30520,30540,31158,31196,31320"
+
+
+# "2024届高二下学期周末卷01":"40001:40017",
+# "2025届高一下学期周末卷01":"40018:40036",
+# "2024届高二下学期周末卷02":"40037:40056",
+# "2025届高一下学期周末卷02":"40057:40082",
+# "2025届高一下学期周末卷03":"40083:40104",
+# "2025届高一下学期周末卷03小测":"40105:40112",
+# "2025届高一下学期周末卷04旧版":"40113:40130",
+# "2025届高一下学期周末卷04小测":"40131:40139",
+# "2024届高二下学期周末卷03":"40140:40160",
+# "2024届高二上学期期末考试":"31267:31287",
+# "2025届高一上学期期末考试":"31288:31308",
+# "2024届高二下学期周末卷04":"40161:40180",
+# "2025届高一下学期周末卷04":"40181:40201",
+# "2024届高二下学期周末卷05":"40202:40225",
+# "2025届高一下学期周末卷05":"40226:40245",
+# "2024届空间向量校本作业":"22048:22083",
+# "2024届二项式定理校本作业":"22084:22105",
+# "2025届高一下学期周末卷05小测":"40246:40255",
+# "2025届高一下学期周末卷06":"40256:40273",
+# "2025届高一下学期周末卷06小测":"40274:40282",
+# "2025届高一下学期期中复习一(集合逻辑不等式)":"40283:40298",
+# "2024届高二下学期周末卷06":"40299:40316",
+# "2024届高二下学期周末卷07":"40317:40335",
+# "2025届高一下学期测验01":"40336:40349",
+# "2025届高一下学期测验02":"40350:40367",
+# "2025届高一下学期期中复习二(幂指对函数)":"40368:40386",
+# "2025届高一下学期周末卷02小测":"40387:40395",
+# "2025届高一下学期周末卷07":"40396:40413",
+# "2025届高一下学期周末卷07小测":"40414:40421"
+
+}
+
+"""---设置题目列表结束---"""
+
+
+if directory[-1] != "/":
+    directory += "/"
+
+
+#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
+def generate_number_set(string,dict):
+    string = re.sub(r"[\n\s]","",string)
+    string_list = string.split(",")
+    numbers_list = []
+    for s in string_list:
+        if not ":" in s:
+            numbers_list.append(s.zfill(6))
+        else:
+            start,end = s.split(":")
+            for ind in range(int(start),int(end)+1):
+                numbers_list.append(str(ind).zfill(6))
+    return numbers_list
+
+#将正确率转化为含颜色代码的字符串
+def get_color(value):
+    value = float(value)
+    if value>=0.5:
+        (r,g,b)=(1,2-2*value,0)
+    else:
+        (r,g,b)=(2*value,1,0)
+    return "{" + "%.3f" %(r) + "," + "%.3f" %(g) + ",0}"
+
+
+def color_value(matchobj):
+    value = matchobj.group(1)
+    return "\t"+"\\fcolorbox[rgb]{0,0,0}"+ get_color(value) +"{" + value +"}"
+
+
+#读取题库json文件并转化为字典
+with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
+    database = f.read()
+pro_dict = json.loads(database)
+
+#读取目标数据库json并转化为字典
+with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
+    database = f.read()
+obj_dict = json.loads(database)
+
+try:
+    os.mkdir(directory)
+except:
+    pass
+
+#读取系统日期
+current_time = time.localtime()
+time_string = "_"+str(current_time.tm_year).zfill(4)+str(current_time.tm_mon).zfill(2)+str(current_time.tm_mday).zfill(2)
+
+
+data_teachers = ""
+data_students = ""
+
+teachers_latex_file = directory + filename + "_教师用" + time_string + ".tex"
+students_latex_file = directory + filename + "_学生用" + time_string + ".tex"
+
+for section_name in problems_dict:
+    problems = problems_dict[section_name]
+
+    data_teachers += r"\newpage" + "\n\n" + r"\section{" + section_name +"}\n\n"
+    data_teachers += r"\setcounter{enumi}{0}"+"\n\n"
+    data_students += r"\newpage" + "\n\n" + r"\section{" + section_name +"}\n\n"
+    data_students += r"\setcounter{enumi}{0}"+"\n\n"
+
+    #生成题目列表
+    problem_list = [id for id in generate_number_set(problems.strip(),pro_dict) if id in pro_dict]
+
+
+
+
+    #生成教师题目字符串与学生题目字符串, 准备替换至latex文件
+    for id in problem_list:
+        problemset = pro_dict[id]
+        problem = problemset["content"]
+        solution = (problemset["solution"] if problemset["solution"] != "" else "暂无解答与提示")
+        answer = "\\textcolor{red}{" + (problemset["ans"] if problemset["ans"] != "" else "暂无答案") + "}"
+        remarks = (problemset["remark"] if problemset["remark"] != "" else "暂无备注")
+        usages_list = problemset["usages"]
+        if len(usages_list) > 0:
+            usage = re.sub("\\t([\d]\.[\d]{0,10})",color_value,"\n\n".join(usages_list))
+            usage = re.sub("[\\t ]([\d]\.[\d]{0,10})",color_value,usage)
+        else:
+            usage = "暂无使用记录"
+        origin = (problemset["origin"] if problemset["origin"] != "" else "出处不详")
+        objects = problemset["objs"]
+        if len(objects) == 0:
+            objects = "暂未关联目标\n\n"
+        elif "KNONE" in [o.upper() for o in objects]:
+            objects = "该题的考查目标不在目前的集合中\n\n"
+        else:
+            objects_string = ""
+            for obj in objects:
+                if not obj in obj_dict:
+                    objects_string = "目标" + obj + "有误\n\n"
+                    break
+                else:
+                    objects_string += "\\textcolor{blue}{" + obj + "|" + obj_dict[obj]["content"] + "}\n\n"
+            objects = objects_string
+        space = ("" if problemset["space"] == "" or answered else r"\vspace*{"+problemset["space"]+"}\n")
+        tags = ("|".join(problemset["tags"]) if len(problemset["origin"])>0 else "暂无标签")
+        raw_string = "\\item " + "{\\tiny ("+id+")} "+problem
+        teachers_string = raw_string.replace("\\tiny","")+"\n\n关联目标:\n\n"+ objects + "\n\n标签: " + tags + "\n\n答案: "+answer + "\n\n" + "解答或提示: " + solution + "\n\n使用记录:\n\n"+ usage + "\n" + "\n\n出处: "+origin + "\n\n"
+        students_string = raw_string + space + "\n\n"
+        if answered:
+            students_string += "答案: \\textcolor{red}{"+answer + "}\n\n"
+        data_teachers += teachers_string
+        data_students += students_string
+
+#去除第一个newpage
+data_teachers = data_teachers[10:]
+data_students = data_students[10:]
+
+#替换latex文件的内容并编译
+with open("模板文件/题目清单.txt","r",encoding = "utf8") as f:
+    latex_raw = f.read()
+#识别操作系统
+if sys.platform != "win32":
+    latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
+    latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
+latex_teachers = latex_raw.replace("编译模板",data_teachers)
+with open(teachers_latex_file,"w",encoding = "utf8") as f:
+    f.write(latex_teachers)
+print("开始编译教师版本pdf文件: ", teachers_latex_file)
+os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ teachers_latex_file)
+print(os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ teachers_latex_file))
+
+latex_students = latex_raw.replace("编译模板",data_students)
+with open(students_latex_file,"w",encoding = "utf8") as f:
+    f.write(latex_students)
+print("开始编译学生版本pdf文件: ", students_latex_file)
+os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ students_latex_file)
+print(os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ students_latex_file))
--- a/工具/文本文件/metadata.txt
+++ b/工具/文本文件/metadata.txt
@ -1,18 +1,235 @@
-tags
+usages
+001050
+20220906	2023届高三2班	0.871

-1
-第一单元
-第二单元
-第三单元
+009446
+20220906	2023届高三2班	0.355

+001072
+20220906	2023届高三2班	0.774

-2
-第三单元
+010060
+20220906	2023届高三2班	0.742

+000033
+20220906	2023届高三2班	0.903

-4
-第三单元
-第五单元
-第六单元
+001073
+20220906	2023届高三2班	0.935

+000023
+20220906	2023届高三2班	0.774
+
+002773
+20220906	2023届高三2班	0.742
+
+002775
+20220906	2023届高三2班	0.645
+
+002784
+20220906	2023届高三2班	0.903
+
+007991
+20220906	2023届高三2班	0.581
+
+005150
+20220906	2023届高三2班	0.903
+
+002790
+20220906	2023届高三2班	0.774
+
+002791
+20220906	2023届高三2班	0.774
+
+000757
+20220906	2023届高三2班	0.968
+
+002793
+20220906	2023届高三2班	0.968
+
+000389
+20220906	2023届高三2班	0.871
+
+002800
+20220906	2023届高三2班	0.903
+
+000778
+20220907	2023届高三2班	0.750
+
+001262
+20220907	2023届高三2班	0.656
+
+000342
+20220907	2023届高三2班	0.125
+
+001238
+20220907	2023届高三2班	0.906
+
+001239
+20220907	2023届高三2班	0.875
+
+001242
+20220907	2023届高三2班	0.844
+
+002831
+20220907	2023届高三2班	0.594	0.406
+
+002968
+20220907	2023届高三2班	0.531	0.063
+
+009508
+20220907	2023届高三2班	1.000
+
+003936
+20220907	2023届高三2班	0.906
+
+007984
+20220907	2023届高三2班	1.000
+
+009511
+20220907	2023届高三2班	0.844
+
+005508
+20220907	2023届高三2班	0.844
+
+000734
+20220907	2023届高三2班	0.469
+
+002856
+20220907	2023届高三2班	0.781	0.906
+
+000474
+20220907	2023届高三2班	0.813
+
+002847
+20220907	2023届高三2班	0.844	0.938
+
+002851
+20220907	2023届高三2班	0.500	0.281
+
+001286
+20220907	2023届高三2班	0.906
+
+001292
+20220907	2023届高三2班	0.906
+
+010110
+20220907	2023届高三2班	1.000	1.000	0.875	0.906
+
+000058
+20220907	2023届高三2班	1.000
+
+010114
+20220907	2023届高三2班	0.844
+
+001296
+20220907	2023届高三2班	1.000
+
+005610
+20220907	2023届高三2班	0.906
+
+001353
+20220907	2023届高三2班	0.813
+
+001305
+20220907	2023届高三2班	0.813	0.813	0.969	0.969	0.969
+
+010125
+20220907	2023届高三2班	0.969
+
+001309
+20220907	2023届高三2班	1.000	0.750
+
+005123
+20220907	2023届高三2班	0.563
+
+005678
+20220907	2023届高三2班	0.469
+
+001340
+20220908	2023届高三2班	0.788	0.818	0.879	0.727	0.788
+
+002925
+20220908	2023届高三2班	0.939
+
+002911
+20220908	2023届高三2班	0.879
+
+002918
+20220908	2023届高三2班	0.727
+
+003815
+20220908	2023届高三2班	0.939
+
+005568
+20220908	2023届高三2班	0.939
+
+000954
+20220908	2023届高三2班	1.000
+
+001324
+20220908	2023届高三2班	0.879
+
+001326
+20220908	2023届高三2班	0.939	0.970
+
+002871
+20220908	2023届高三2班	0.818
+
+002878
+20220908	2023届高三2班	0.818	0.818
+
+002898
+20220908	2023届高三2班	0.727
+
+000362
+20220908	2023届高三2班	0.879
+
+001351
+20220908	2023届高三2班	0.848
+
+003747
+20220908	2023届高三2班	0.727
+
+005199
+20220908	2023届高三2班	0.970
+
+009490
+20220908	2023届高三2班	0.879
+
+009517
+20220909	2023届高三2班	0.656
+
+007941
+20220909	2023届高三2班	0.813
+
+000092
+20220909	2023届高三2班	0.625
+
+001270
+20220909	2023届高三2班	1.000	1.000	0.969	0.781
+
+002888
+20220909	2023届高三2班	0.719
+
+001331
+20220909	2023届高三2班	0.156
+
+002894
+20220909	2023届高三2班	0.844	0.750
+
+001211
+20220909	2023届高三2班	0.969	0.906	0.875	0.844	0.750
+
+001218
+20220909	2023届高三2班	0.906	0.750	0.625
+
+002893
+20220909	2023届高三2班	0.813
+
+002895
+20220909	2023届高三2班	0.688
+
+009522
+20220909	2023届高三2班	0.563	0.250

--- a/工具/文本文件/手动统计结果.txt
+++ b/工具/文本文件/手动统计结果.txt
@ -0,0 +1,107 @@
+
+
+
+[BEGIN]
+## 20220906
+** 2023届高三2班
+1050	 0.871 
+9446	 0.355 
+1072	 0.774 
+10060	 0.742 
+33	 0.903 
+1073	 0.935 
+23	 0.774 
+2773	 0.742 
+2775	 0.645 
+2784	 0.903 
+7991	 0.581 
+5150	 0.903 
+2790	 0.774 
+2791	 0.774 
+757	 0.968 
+2793	 0.968 
+389	 0.871 
+2800	 0.903 
+[END]
+
+
+[BEGIN]
+## 20220907
+** 2023届高三2班
+778	 0.750 	
+1262	 0.656 	
+342	 0.125 	
+1238	 0.906 	
+1239	 0.875 	
+1242	 0.844 	
+2831	 0.594 	 0.406 
+2968	 0.531 	 0.063 
+9508	 1.000 	
+3936	 0.906 	
+7984	 1.000 	
+9511	 0.844 	
+5508	 0.844 	
+734	 0.469 	
+2856	 0.781 	 0.906 
+474	 0.813 	
+2847	 0.844 	 0.938 
+2851	 0.500 	 0.281 
+[END]
+
+[BEGIN]
+## 20220907
+** 2023届高三2班
+1286	 0.906 				
+1292	 0.906 				
+10110	 1.000 	 1.000 	 0.875 	 0.906 	
+58	 1.000 				
+10114	 0.844 				
+1296	 1.000 				
+5610	 0.906 				
+1353	 0.813 				
+1305	 0.813 	 0.813 	 0.969 	 0.969 	 0.969 
+10125	 0.969 				
+1309	 1.000 	 0.750 			
+5123	 0.563 				
+5678	 0.469 				
+[END]
+
+[BEGIN]
+## 20220908
+** 2023届高三2班
+1340	 0.788 	 0.818 	 0.879 	 0.727 	 0.788 
+2925	 0.939 				
+2911	 0.879 				
+2918	 0.727 				
+3815	 0.939 				
+5568	 0.939 				
+954	 1.000 				
+1324	 0.879 				
+1326	 0.939 	 0.970 			
+2871	 0.818 				
+2878	 0.818 	 0.818 			
+2898	 0.727 				
+362	 0.879 				
+1351	 0.848 				
+3747	 0.727 				
+5199	 0.970 				
+9490	 0.879 				
+[END]
+
+
+[BEGIN]
+## 20220909
+** 2023届高三2班
+9517	 0.656 				
+7941	 0.813 				
+92	 0.625 				
+1270	 1.000 	 1.000 	 0.969 	 0.781 	
+2888	 0.719 				
+1331	 0.156 				
+2894	 0.844 	 0.750 			
+1211	 0.969 	 0.906 	 0.875 	 0.844 	 0.750 
+1218	 0.906 	 0.750 	 0.625 		
+2893	 0.813 				
+2895	 0.688 				
+9522	 0.563 	 0.250 			
+[END]
--- a/工具/文本文件/题号筛选.txt
+++ b/工具/文本文件/题号筛选.txt
--- a/工具/新题比对.ipynb
+++ b/工具/新题比对.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@ -13,61 +13,11 @@
      "0.805\t2\t006207\n",
      "0.812\t3\t006137\n",
      "0.798\t4\t012269\n",
-      "0.831\t5\t040063\n",
+      "0.817\t5\t010989\n",
      "0.888\t6\t021503\n",
-      "0.932\t7\t040057\n",
+      "0.815\t7\t021484\n",
      "0.959\t8\t003206\n",
-      "0.817\t9\t011090\n",
-      "0.796\t10\t003058\n",
-      "0.768\t11\t005984\n",
-      "0.744\t12\t021463\n",
-      "0.872\t13\t008169\n",
-      "0.843\t14\t021559\n",
-      "0.758\t15\t006230\n",
-      "0.888\t16\t008317\n",
-      "0.682\t17\t012005\n",
-      "0.630\t18\t006107\n",
-      "0.765\t19\t021581\n",
-      "0.763\t20\t014242\n",
-      "0.661\t21\t013849\n",
-      "0.716\t22\t008191\n",
-      "0.504\t23\t003537\n",
-      "0.934\t24\t000818\n",
-      "0.755\t25\t012596\n",
-      "0.893\t26\t012355\n",
-      "0.835\t27\t000479\n",
-      "0.950\t28\t001529\n",
-      "0.797\t29\t000577\n",
-      "0.737\t30\t003191\n",
-      "1.000\t31\t012594\n",
-      "0.824\t32\t003150\n",
-      "1.000\t33\t004066\n",
-      "0.707\t34\t003140\n",
-      "0.648\t35\t030294\n",
-      "0.752\t36\t014186\n",
-      "0.621\t37\t014223\n",
-      "0.700\t38\t004442\n",
-      "0.813\t39\t030808\n",
-      "0.669\t40\t013843\n",
-      "0.654\t41\t000738\n",
-      "0.682\t42\t013354\n",
-      "0.830\t43\t030042\n",
-      "0.977\t44\t020411\n",
-      "0.661\t45\t011133\n",
-      "0.681\t46\t004118\n",
-      "0.667\t47\t011943\n",
-      "0.683\t48\t005600\n",
-      "0.691\t49\t011386\n",
-      "0.711\t50\t002922\n",
-      "1.000\t51\t020452\n",
-      "0.736\t52\t020356\n",
-      "0.666\t53\t020430\n",
-      "0.685\t54\t020357\n",
-      "0.713\t55\t005540\n",
-      "0.643\t56\t012868\n",
-      "0.845\t57\t005598\n",
-      "0.860\t58\t002958\n",
-      "0.646\t59\t004184\n"
+      "0.839\t9\t011090\n"
     ]
    }
   ],
@ -75,7 +25,7 @@
    "import os,re,difflib,Levenshtein,time,json\n",
    "\n",
    "# 重要!!! 范围\n",
-    "old_problems_range = \"1:999999\"\n",
+    "old_problems_range = \"1:30000\"\n",
    "threshold = 0.85\n",
    "\n",
    "# 待比对的文件\n",
@ -117,11 +67,11 @@
    "def Lev_get_equal_rate(str1,str2):\n",
    "    return Levenshtein.ratio(str1,str2)\n",
    "\n",
-    "def GenerateProblemListFromString(data):\n",
+    "def GenerateProblemListFromString(problem_string):\n",
    "    try:\n",
-    "        data = re.findall(r\"\\\\begin\\{document\\}([\\s\\S]*?)\\\\end\\{document\\}\",problems_string)[0]\n",
+    "        data = re.findall(r\"\\\\begin\\{document\\}([\\s\\S]*?)\\\\end\\{document\\}\",problem_string)[0]\n",
    "    except:\n",
-    "        pass\n",
+    "        data = problem_string\n",
    "    data = re.sub(r\"\\n{2,}\",\"\\n\",data)\n",
    "    data = re.sub(r\"\\\\item\",r\"\\\\enditem\\\\item\",data)\n",
    "    data = re.sub(r\"\\\\end\\{enumerate\\}\",r\"\\\\enditem\",data)\n",
@ -220,7 +170,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.8.15"
  },
  "orig_nbformat": 4,
  "vscode": {
--- a/工具/新题相似相同比对.py
+++ b/工具/新题相似相同比对.py
@ -0,0 +1,108 @@
+import os,re,difflib,Levenshtein,time,json
+
+# 重要!!! 范围
+old_problems_range = "1:30000"
+threshold = 0.85
+
+# 待比对的文件
+filename = r"C:\Users\weiye\Documents\wwy sync\临时工作区\自拟题目9.tex"
+
+#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
+def generate_number_set(string):
+    string = re.sub(r"[\n\s]","",string)
+    string_list = string.split(",")
+    numbers_list = []
+    for s in string_list:
+        if not ":" in s:
+            numbers_list.append(s.zfill(6))
+        else:
+            start,end = s.split(":")
+            for ind in range(int(start),int(end)+1):
+                numbers_list.append(str(ind).zfill(6))
+    return numbers_list
+
+#字符串预处理
+def pre_treating(string):
+    string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
+    string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)|(mathrm)|(text)","",string)
+    string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
+    string = re.sub(r"[\n\t]","",string)
+    string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
+    string = re.sub(r"[,\.:;?]","",string)
+    return string
+
+#difflab字符串比较
+def difflab_get_equal_rate(str1, str2):
+    return difflib.SequenceMatcher(None, str1, str2).ratio()
+
+#Levenshtein jaro字符串比较
+def jaro_get_equal_rate(str1,str2):
+    return Levenshtein.jaro(str1,str2)
+
+#Levenshtein 字符串比较
+def Lev_get_equal_rate(str1,str2):
+    return Levenshtein.ratio(str1,str2)
+
+def GenerateProblemListFromString(problem_string):
+    try:
+        data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",problem_string)[0]
+    except:
+        data = problem_string
+    data = re.sub(r"\n{2,}","\n",data)
+    data = re.sub(r"\\item",r"\\enditem\\item",data)
+    data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data)
+    ProblemList_raw = [p.strip() for p in re.findall(r"\\item([\s\S]*?)\\enditem",data)]
+    ProblemsList = []
+    for p in ProblemList_raw:
+        startpos = data.index(p)
+        tempdata = data[:startpos]
+        suflist = re.findall(r"\n\%[\dA-Za-z]+",tempdata)
+        if len(suflist) > 0:
+            suffix = suflist[-1].replace("%","").strip()
+        else:
+            suffix = ""
+        ProblemsList.append((p,suffix))
+    return ProblemsList
+
+
+#指定对比方法
+sim_test = jaro_get_equal_rate
+
+#读入题库
+with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
+    database = f.read()
+pro_dict = json.loads(database)
+
+output = ""
+
+with open(filename,"r",encoding="u8") as f:
+    newdatabase = f.read()
+new_pro_list = GenerateProblemListFromString(newdatabase)
+
+pro_dict_treated = {}
+idrange_raw = generate_number_set(old_problems_range)
+idrange = [id for id in pro_dict if id in idrange_raw]
+for p in idrange:
+    pro_dict_treated[p] = pre_treating(pro_dict[p]["content"])
+
+new_dict_treated = {}
+for i in range(len(new_pro_list)):
+    new_dict_treated[i+1] = pre_treating(new_pro_list[i][0])
+
+for i in new_dict_treated:
+    new_p = new_dict_treated[i]
+    maxsim = 0
+    for p in pro_dict_treated:
+        old_p = pro_dict_treated[p]
+        sim = sim_test(new_p,old_p)
+        if sim > maxsim:
+            maxsim = sim
+            argmax = p
+    print("%.3f\t%d\t%s" %(maxsim,i,argmax))
+    output += ("%.3f\t%d\t%s" %(maxsim,i,argmax)) + "\n"
+    # print("\n新题: %s" %new_pro_list[i-1][0])
+    # print("\n原题: %s\n\n\n" %pro_dict[]["content"])
+
+with open("临时文件/新题相似相同.txt","w",encoding = "u8") as f:
+    f.write(output)
+
--- a/工具/生成使用记录excel.ipynb
+++ b/工具/生成使用记录excel.ipynb
@ -2,9 +2,26 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'openpyxl'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[1], line 63\u001b[0m\n\u001b[0;32m     60\u001b[0m                     \u001b[39mfor\u001b[39;00m t \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39mlen\u001b[39m(results)):\n\u001b[0;32m     61\u001b[0m                         df[\u001b[39mid\u001b[39m\u001b[39m+\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m_\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m+\u001b[39m\u001b[39mstr\u001b[39m(t\u001b[39m+\u001b[39m\u001b[39m1\u001b[39m)][c] \u001b[39m=\u001b[39m \u001b[39mfloat\u001b[39m(results[t])\n\u001b[1;32m---> 63\u001b[0m df\u001b[39m.\u001b[39;49mto_excel(outputfile,index \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m)\n",
+      "File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    209\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    210\u001b[0m         kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    209\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    210\u001b[0m         kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\core\\generic.py:2374\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)\u001b[0m\n\u001b[0;32m   2361\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mio\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mformats\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mexcel\u001b[39;00m \u001b[39mimport\u001b[39;00m ExcelFormatter\n\u001b[0;32m   2363\u001b[0m formatter \u001b[39m=\u001b[39m ExcelFormatter(\n\u001b[0;32m   2364\u001b[0m     df,\n\u001b[0;32m   2365\u001b[0m     na_rep\u001b[39m=\u001b[39mna_rep,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2372\u001b[0m     inf_rep\u001b[39m=\u001b[39minf_rep,\n\u001b[0;32m   2373\u001b[0m )\n\u001b[1;32m-> 2374\u001b[0m formatter\u001b[39m.\u001b[39;49mwrite(\n\u001b[0;32m   2375\u001b[0m     excel_writer,\n\u001b[0;32m   2376\u001b[0m     sheet_name\u001b[39m=\u001b[39;49msheet_name,\n\u001b[0;32m   2377\u001b[0m     startrow\u001b[39m=\u001b[39;49mstartrow,\n\u001b[0;32m   2378\u001b[0m     startcol\u001b[39m=\u001b[39;49mstartcol,\n\u001b[0;32m   2379\u001b[0m     freeze_panes\u001b[39m=\u001b[39;49mfreeze_panes,\n\u001b[0;32m   2380\u001b[0m     engine\u001b[39m=\u001b[39;49mengine,\n\u001b[0;32m   2381\u001b[0m     storage_options\u001b[39m=\u001b[39;49mstorage_options,\n\u001b[0;32m   2382\u001b[0m )\n",
+      "File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\io\\formats\\excel.py:918\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[0;32m    914\u001b[0m     need_save \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m    915\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    916\u001b[0m     \u001b[39m# error: Cannot instantiate abstract class 'ExcelWriter' with abstract\u001b[39;00m\n\u001b[0;32m    917\u001b[0m     \u001b[39m# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'\u001b[39;00m\n\u001b[1;32m--> 918\u001b[0m     writer \u001b[39m=\u001b[39m ExcelWriter(  \u001b[39m# type: ignore[abstract]\u001b[39;49;00m\n\u001b[0;32m    919\u001b[0m         writer, engine\u001b[39m=\u001b[39;49mengine, storage_options\u001b[39m=\u001b[39;49mstorage_options\n\u001b[0;32m    920\u001b[0m     )\n\u001b[0;32m    921\u001b[0m     need_save \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m    923\u001b[0m \u001b[39mtry\u001b[39;00m:\n",
+      "File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:56\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[0;32m     43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[0;32m     44\u001b[0m     \u001b[39mself\u001b[39m,\n\u001b[0;32m     45\u001b[0m     path: FilePath \u001b[39m|\u001b[39m WriteExcelBuffer \u001b[39m|\u001b[39m ExcelWriter,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     54\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m     55\u001b[0m     \u001b[39m# Use the openpyxl module as the Excel writer.\u001b[39;00m\n\u001b[1;32m---> 56\u001b[0m     \u001b[39mfrom\u001b[39;00m \u001b[39mopenpyxl\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mworkbook\u001b[39;00m \u001b[39mimport\u001b[39;00m Workbook\n\u001b[0;32m     58\u001b[0m     engine_kwargs \u001b[39m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[0;32m     60\u001b[0m     \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\n\u001b[0;32m     61\u001b[0m         path,\n\u001b[0;32m     62\u001b[0m         mode\u001b[39m=\u001b[39mmode,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     65\u001b[0m         engine_kwargs\u001b[39m=\u001b[39mengine_kwargs,\n\u001b[0;32m     66\u001b[0m     )\n",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'openpyxl'"
+     ]
+    }
+   ],
   "source": [
    "import os,re,json\n",
    "import pandas as pd\n",
@ -71,26 +88,6 @@
    "df.to_excel(outputfile,index = False)"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.231"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "float(\"0.231\")"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -115,7 +112,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.15"
  },
  "orig_nbformat": 4,
  "vscode": {
--- a/工具/目标挂钩清点.py
+++ b/工具/目标挂钩清点.py
@ -0,0 +1,27 @@
+import json
+
+filename = r"文本文件/题号筛选.txt"
+with open(r"..\题库0.3\problems.json","r",encoding = "u8") as f:
+    database = f.read()
+pro_dict = json.loads(database)
+units = ["第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元","暂无对应"]
+count1 = [0]*10
+count2 = [0]*10
+count3 = 0
+untagged = []
+for id in pro_dict:
+    for u in range(10):
+        unit = units[u]
+        if unit in "".join(pro_dict[id]["tags"]):
+            count1[u] += 1
+            if len(pro_dict[id]["objs"]) > 0:
+                count2[u] += 1
+    if len(pro_dict[id]["tags"]) == 0:
+            count3 += 1
+            untagged.append(id)
+for u in range(len(units)):
+    print(units[u],". 总题数:",count1[u],", 完成对应题数:",count2[u])
+print("题库总题数: %d, 已有单元标签题数: %d, 未赋单元标签题数: %d."%(len(pro_dict),len(pro_dict)-count3,count3))
+with open (filename,"w",encoding = "u8") as f:
+     f.write(",".join(untagged))
+print("未赋标签的题号列表已保存至: %s" %filename)
--- a/工具/目标挂钩简要清点.ipynb
+++ b/工具/目标挂钩简要清点.ipynb
@ -2,70 +2,56 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json,os\n",
-    "with open(r\"..\\题库0.3\\problems.json\",\"r\",encoding = \"u8\") as f:\n",
-    "    database = f.read()\n",
-    "pro_dict = json.loads(database)\n",
-    "units = [\"第一单元\",\"第二单元\",\"第三单元\",\"第四单元\",\"第五单元\",\"第六单元\",\"第七单元\",\"第八单元\",\"第九单元\"]\n",
-    "count1 = [0]*9\n",
-    "count2 = [0]*9\n",
-    "for id in pro_dict:\n",
-    "    for u in range(9):\n",
-    "        unit = units[u]\n",
-    "        if unit in \"\".join(pro_dict[id][\"tags\"]):\n",
-    "            count1[u] += 1\n",
-    "            if len(pro_dict[id][\"objs\"]) > 0:\n",
-    "                count2[u] += 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "第一单元 . 总题数: 2014 , 完成对应题数: 1400\n",
-      "第二单元 . 总题数: 3111 , 完成对应题数: 2127\n",
-      "第三单元 . 总题数: 2218 , 完成对应题数: 395\n",
-      "第四单元 . 总题数: 1444 , 完成对应题数: 1036\n",
-      "第五单元 . 总题数: 1433 , 完成对应题数: 306\n",
-      "第六单元 . 总题数: 1383 , 完成对应题数: 431\n",
-      "第七单元 . 总题数: 1712 , 完成对应题数: 957\n",
-      "第八单元 . 总题数: 1378 , 完成对应题数: 522\n",
-      "第九单元 . 总题数: 422 , 完成对应题数: 101\n"
+      "第一单元 . 总题数: 2213 , 完成对应题数: 1401\n",
+      "第二单元 . 总题数: 3472 , 完成对应题数: 2135\n",
+      "第三单元 . 总题数: 2426 , 完成对应题数: 395\n",
+      "第四单元 . 总题数: 1657 , 完成对应题数: 1037\n",
+      "第五单元 . 总题数: 1590 , 完成对应题数: 307\n",
+      "第六单元 . 总题数: 1555 , 完成对应题数: 431\n",
+      "第七单元 . 总题数: 1996 , 完成对应题数: 958\n",
+      "第八单元 . 总题数: 1526 , 完成对应题数: 525\n",
+      "第九单元 . 总题数: 473 , 完成对应题数: 101\n",
+      "暂无对应 . 总题数: 344 , 完成对应题数: 121\n",
+      "题库总题数: 18795, 已有单元标签题数: 16941, 未赋单元标签题数: 1854.\n",
+      "未赋标签的题号列表已保存至: 文本文件/题号筛选.txt\n"
     ]
    }
   ],
   "source": [
+    "import json,os\n",
+    "\n",
+    "filename = r\"文本文件/题号筛选.txt\"\n",
+    "with open(r\"..\\题库0.3\\problems.json\",\"r\",encoding = \"u8\") as f:\n",
+    "    database = f.read()\n",
+    "pro_dict = json.loads(database)\n",
+    "units = [\"第一单元\",\"第二单元\",\"第三单元\",\"第四单元\",\"第五单元\",\"第六单元\",\"第七单元\",\"第八单元\",\"第九单元\",\"暂无对应\"]\n",
+    "count1 = [0]*10\n",
+    "count2 = [0]*10\n",
+    "count3 = 0\n",
+    "untagged = []\n",
+    "for id in pro_dict:\n",
+    "    for u in range(10):\n",
+    "        unit = units[u]\n",
+    "        if unit in \"\".join(pro_dict[id][\"tags\"]):\n",
+    "            count1[u] += 1\n",
+    "            if len(pro_dict[id][\"objs\"]) > 0:\n",
+    "                count2[u] += 1\n",
+    "    if len(pro_dict[id][\"tags\"]) == 0:\n",
+    "            count3 += 1\n",
+    "            untagged.append(id)\n",
    "for u in range(len(units)):\n",
-    "    print(units[u],\". 总题数:\",count1[u],\", 完成对应题数:\",count2[u])\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(12684, 7269)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "(sum(count1),sum(count2))"
+    "    print(units[u],\". 总题数:\",count1[u],\", 完成对应题数:\",count2[u])\n",
+    "print(\"题库总题数: %d, 已有单元标签题数: %d, 未赋单元标签题数: %d.\"%(len(pro_dict),len(pro_dict)-count3,count3))\n",
+    "with open (filename,\"w\",encoding = \"u8\") as f:\n",
+    "     f.write(\",\".join(untagged))\n",
+    "print(\"未赋标签的题号列表已保存至: %s\" %(filename))\n"
   ]
  },
  {
@ -92,7 +78,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.15 (main, Nov 24 2022, 14:39:17) [MSC v.1916 64 bit (AMD64)]"
+   "version": "3.9.15"
  },
  "orig_nbformat": 4,
  "vscode": {
--- a/工具/相似题目标注.ipynb
+++ b/工具/相似题目标注.ipynb
@ -2,12 +2,22 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "相同题目已标注: 000100 031239\n",
+      "关联题目已标注: 000466 030610\n"
+     ]
+    }
+   ],
   "source": [
    "import os,re,json\n",
    "\n",
+    "filename = \"临时文件/相似题目.txt\"\n",
    "\n",
    "# 读取题库数据并转换为字典\n",
    "with open(r\"../题库0.3/Problems.json\",\"r\",encoding = \"utf8\") as f:\n",
@ -15,7 +25,7 @@
    "pro_dict = json.loads(database)\n",
    "\n",
    "# 读取已分类的相似文件列表\n",
-    "with open(\"临时文件/相似题目.txt\",\"r\",encoding = \"utf8\") as f:\n",
+    "with open(filename,\"r\",encoding = \"utf8\") as f:\n",
    "    similar_text = \"\\n\"+f.read()\n",
    "\n",
    "similar_types = re.findall(r\"\\n[\\d]\\.[\\d]{4}[\\s]*([srSRnN ])[\\s]*\\n\",similar_text)\n",
@ -47,6 +57,13 @@
    "with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
    "    f.write(database)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
@ -65,7 +82,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.8.15"
  },
  "orig_nbformat": 4,
  "vscode": {
--- a/工具/相似题目检测.ipynb
+++ b/工具/相似题目检测.ipynb
@ -9,7 +9,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "旧题目数: 0 , 新题目数: 18100\n",
+      "旧题目数: 1907 , 新题目数: 500\n",
      "开始新题与旧题的比对\n",
      "50\n",
      "100\n",
@ -21,358 +21,6 @@
      "400\n",
      "450\n",
      "500\n",
-      "550\n",
-      "600\n",
-      "650\n",
-      "700\n",
-      "750\n",
-      "800\n",
-      "850\n",
-      "900\n",
-      "950\n",
-      "1000\n",
-      "1050\n",
-      "1100\n",
-      "1150\n",
-      "1200\n",
-      "1250\n",
-      "1300\n",
-      "1350\n",
-      "1400\n",
-      "1450\n",
-      "1500\n",
-      "1550\n",
-      "1600\n",
-      "1650\n",
-      "1700\n",
-      "1750\n",
-      "1800\n",
-      "1850\n",
-      "1900\n",
-      "1950\n",
-      "2000\n",
-      "2050\n",
-      "2100\n",
-      "2150\n",
-      "2200\n",
-      "2250\n",
-      "2300\n",
-      "2350\n",
-      "2400\n",
-      "2450\n",
-      "2500\n",
-      "2550\n",
-      "2600\n",
-      "2650\n",
-      "2700\n",
-      "2750\n",
-      "2800\n",
-      "2850\n",
-      "2900\n",
-      "2950\n",
-      "3000\n",
-      "3050\n",
-      "3100\n",
-      "3150\n",
-      "3200\n",
-      "3250\n",
-      "3300\n",
-      "3350\n",
-      "3400\n",
-      "3450\n",
-      "3500\n",
-      "3550\n",
-      "3600\n",
-      "3650\n",
-      "3700\n",
-      "3750\n",
-      "3800\n",
-      "3850\n",
-      "3900\n",
-      "3950\n",
-      "4000\n",
-      "4050\n",
-      "4100\n",
-      "4150\n",
-      "4200\n",
-      "4250\n",
-      "4300\n",
-      "4350\n",
-      "4400\n",
-      "4450\n",
-      "4500\n",
-      "4550\n",
-      "4600\n",
-      "4650\n",
-      "4700\n",
-      "4750\n",
-      "4800\n",
-      "4850\n",
-      "4900\n",
-      "4950\n",
-      "5000\n",
-      "5050\n",
-      "5100\n",
-      "5150\n",
-      "5200\n",
-      "5250\n",
-      "5300\n",
-      "5350\n",
-      "5400\n",
-      "5450\n",
-      "5500\n",
-      "5550\n",
-      "5600\n",
-      "5650\n",
-      "5700\n",
-      "5750\n",
-      "5800\n",
-      "5850\n",
-      "5900\n",
-      "5950\n",
-      "6000\n",
-      "6050\n",
-      "6100\n",
-      "6150\n",
-      "6200\n",
-      "6250\n",
-      "6300\n",
-      "6350\n",
-      "6400\n",
-      "6450\n",
-      "6500\n",
-      "6550\n",
-      "6600\n",
-      "6650\n",
-      "6700\n",
-      "6750\n",
-      "6800\n",
-      "6850\n",
-      "6900\n",
-      "6950\n",
-      "7000\n",
-      "7050\n",
-      "7100\n",
-      "7150\n",
-      "7200\n",
-      "7250\n",
-      "7300\n",
-      "7350\n",
-      "7400\n",
-      "7450\n",
-      "7500\n",
-      "7550\n",
-      "7600\n",
-      "7650\n",
-      "7700\n",
-      "7750\n",
-      "7800\n",
-      "7850\n",
-      "7900\n",
-      "7950\n",
-      "8000\n",
-      "8050\n",
-      "8100\n",
-      "8150\n",
-      "8200\n",
-      "8250\n",
-      "8300\n",
-      "8350\n",
-      "8400\n",
-      "8450\n",
-      "8500\n",
-      "8550\n",
-      "8600\n",
-      "8650\n",
-      "8700\n",
-      "8750\n",
-      "8800\n",
-      "8850\n",
-      "8900\n",
-      "8950\n",
-      "9000\n",
-      "9050\n",
-      "9100\n",
-      "9150\n",
-      "9200\n",
-      "9250\n",
-      "9300\n",
-      "9350\n",
-      "9400\n",
-      "9450\n",
-      "9500\n",
-      "9550\n",
-      "9600\n",
-      "9650\n",
-      "9700\n",
-      "9750\n",
-      "9800\n",
-      "9850\n",
-      "9900\n",
-      "9950\n",
-      "10000\n",
-      "10050\n",
-      "10100\n",
-      "10150\n",
-      "10200\n",
-      "10250\n",
-      "10300\n",
-      "10350\n",
-      "10400\n",
-      "10450\n",
-      "10500\n",
-      "10550\n",
-      "10600\n",
-      "10650\n",
-      "10700\n",
-      "10750\n",
-      "10800\n",
-      "10850\n",
-      "10900\n",
-      "10950\n",
-      "11000\n",
-      "11050\n",
-      "11100\n",
-      "11150\n",
-      "11200\n",
-      "11250\n",
-      "11300\n",
-      "11350\n",
-      "11400\n",
-      "11450\n",
-      "11500\n",
-      "11550\n",
-      "11600\n",
-      "11650\n",
-      "11700\n",
-      "11750\n",
-      "11800\n",
-      "11850\n",
-      "11900\n",
-      "11950\n",
-      "12000\n",
-      "12050\n",
-      "12100\n",
-      "12150\n",
-      "12200\n",
-      "12250\n",
-      "12300\n",
-      "12350\n",
-      "12400\n",
-      "12450\n",
-      "12500\n",
-      "12550\n",
-      "12600\n",
-      "12650\n",
-      "12700\n",
-      "12750\n",
-      "12800\n",
-      "12850\n",
-      "12900\n",
-      "12950\n",
-      "13000\n",
-      "13050\n",
-      "13100\n",
-      "13150\n",
-      "13200\n",
-      "13250\n",
-      "13300\n",
-      "13350\n",
-      "13400\n",
-      "13450\n",
-      "13500\n",
-      "13550\n",
-      "13600\n",
-      "13650\n",
-      "13700\n",
-      "13750\n",
-      "13800\n",
-      "13850\n",
-      "13900\n",
-      "13950\n",
-      "14000\n",
-      "14050\n",
-      "14100\n",
-      "14150\n",
-      "14200\n",
-      "14250\n",
-      "14300\n",
-      "14350\n",
-      "14400\n",
-      "14450\n",
-      "14500\n",
-      "14550\n",
-      "14600\n",
-      "14650\n",
-      "14700\n",
-      "14750\n",
-      "14800\n",
-      "14850\n",
-      "14900\n",
-      "14950\n",
-      "15000\n",
-      "15050\n",
-      "15100\n",
-      "15150\n",
-      "15200\n",
-      "15250\n",
-      "15300\n",
-      "15350\n",
-      "15400\n",
-      "15450\n",
-      "15500\n",
-      "15550\n",
-      "15600\n",
-      "15650\n",
-      "15700\n",
-      "15750\n",
-      "15800\n",
-      "15850\n",
-      "15900\n",
-      "15950\n",
-      "16000\n",
-      "16050\n",
-      "16100\n",
-      "16150\n",
-      "16200\n",
-      "16250\n",
-      "16300\n",
-      "16350\n",
-      "16400\n",
-      "16450\n",
-      "16500\n",
-      "16550\n",
-      "16600\n",
-      "16650\n",
-      "16700\n",
-      "16750\n",
-      "16800\n",
-      "16850\n",
-      "16900\n",
-      "16950\n",
-      "17000\n",
-      "17050\n",
-      "17100\n",
-      "17150\n",
-      "17200\n",
-      "17250\n",
-      "17300\n",
-      "17350\n",
-      "17400\n",
-      "17450\n",
-      "17500\n",
-      "17550\n",
-      "17600\n",
-      "17650\n",
-      "17700\n",
-      "17750\n",
-      "17800\n",
-      "17850\n",
-      "17900\n",
-      "17950\n",
-      "18000\n",
-      "18050\n",
-      "18100\n",
      "开始新题之间的比对\n",
      "50\n",
      "100\n",
@ -383,370 +31,18 @@
      "350\n",
      "400\n",
      "450\n",
-      "500\n",
-      "550\n",
-      "600\n",
-      "650\n",
-      "700\n",
-      "750\n",
-      "800\n",
-      "850\n",
-      "900\n",
-      "950\n",
-      "1000\n",
-      "1050\n",
-      "1100\n",
-      "1150\n",
-      "1200\n",
-      "1250\n",
-      "1300\n",
-      "1350\n",
-      "1400\n",
-      "1450\n",
-      "1500\n",
-      "1550\n",
-      "1600\n",
-      "1650\n",
-      "1700\n",
-      "1750\n",
-      "1800\n",
-      "1850\n",
-      "1900\n",
-      "1950\n",
-      "2000\n",
-      "2050\n",
-      "2100\n",
-      "2150\n",
-      "2200\n",
-      "2250\n",
-      "2300\n",
-      "2350\n",
-      "2400\n",
-      "2450\n",
-      "2500\n",
-      "2550\n",
-      "2600\n",
-      "2650\n",
-      "2700\n",
-      "2750\n",
-      "2800\n",
-      "2850\n",
-      "2900\n",
-      "2950\n",
-      "3000\n",
-      "3050\n",
-      "3100\n",
-      "3150\n",
-      "3200\n",
-      "3250\n",
-      "3300\n",
-      "3350\n",
-      "3400\n",
-      "3450\n",
-      "3500\n",
-      "3550\n",
-      "3600\n",
-      "3650\n",
-      "3700\n",
-      "3750\n",
-      "3800\n",
-      "3850\n",
-      "3900\n",
-      "3950\n",
-      "4000\n",
-      "4050\n",
-      "4100\n",
-      "4150\n",
-      "4200\n",
-      "4250\n",
-      "4300\n",
-      "4350\n",
-      "4400\n",
-      "4450\n",
-      "4500\n",
-      "4550\n",
-      "4600\n",
-      "4650\n",
-      "4700\n",
-      "4750\n",
-      "4800\n",
-      "4850\n",
-      "4900\n",
-      "4950\n",
-      "5000\n",
-      "5050\n",
-      "5100\n",
-      "5150\n",
-      "5200\n",
-      "5250\n",
-      "5300\n",
-      "5350\n",
-      "5400\n",
-      "5450\n",
-      "5500\n",
-      "5550\n",
-      "5600\n",
-      "5650\n",
-      "5700\n",
-      "5750\n",
-      "5800\n",
-      "5850\n",
-      "5900\n",
-      "5950\n",
-      "6000\n",
-      "6050\n",
-      "6100\n",
-      "6150\n",
-      "6200\n",
-      "6250\n",
-      "6300\n",
-      "6350\n",
-      "6400\n",
-      "6450\n",
-      "6500\n",
-      "6550\n",
-      "6600\n",
-      "6650\n",
-      "6700\n",
-      "6750\n",
-      "6800\n",
-      "6850\n",
-      "6900\n",
-      "6950\n",
-      "7000\n",
-      "7050\n",
-      "7100\n",
-      "7150\n",
-      "7200\n",
-      "7250\n",
-      "7300\n",
-      "7350\n",
-      "7400\n",
-      "7450\n",
-      "7500\n",
-      "7550\n",
-      "7600\n",
-      "7650\n",
-      "7700\n",
-      "7750\n",
-      "7800\n",
-      "7850\n",
-      "7900\n",
-      "7950\n",
-      "8000\n",
-      "8050\n",
-      "8100\n",
-      "8150\n",
-      "8200\n",
-      "8250\n",
-      "8300\n",
-      "8350\n",
-      "8400\n",
-      "8450\n",
-      "8500\n",
-      "8550\n",
-      "8600\n",
-      "8650\n",
-      "8700\n",
-      "8750\n",
-      "8800\n",
-      "8850\n",
-      "8900\n",
-      "8950\n",
-      "9000\n",
-      "9050\n",
-      "9100\n",
-      "9150\n",
-      "9200\n",
-      "9250\n",
-      "9300\n",
-      "9350\n",
-      "9400\n",
-      "9450\n",
-      "9500\n",
-      "9550\n",
-      "9600\n",
-      "9650\n",
-      "9700\n",
-      "9750\n",
-      "9800\n",
-      "9850\n",
-      "9900\n",
-      "9950\n",
-      "10000\n",
-      "10050\n",
-      "10100\n",
-      "10150\n",
-      "10200\n",
-      "10250\n",
-      "10300\n",
-      "10350\n",
-      "10400\n",
-      "10450\n",
-      "10500\n",
-      "10550\n",
-      "10600\n",
-      "10650\n",
-      "10700\n",
-      "10750\n",
-      "10800\n",
-      "10850\n",
-      "10900\n",
-      "10950\n",
-      "11000\n",
-      "11050\n",
-      "11100\n",
-      "11150\n",
-      "11200\n",
-      "11250\n",
-      "11300\n",
-      "11350\n",
-      "11400\n",
-      "11450\n",
-      "11500\n",
-      "11550\n",
-      "11600\n",
-      "11650\n",
-      "11700\n",
-      "11750\n",
-      "11800\n",
-      "11850\n",
-      "11900\n",
-      "11950\n",
-      "12000\n",
-      "12050\n",
-      "12100\n",
-      "12150\n",
-      "12200\n",
-      "12250\n",
-      "12300\n",
-      "12350\n",
-      "12400\n",
-      "12450\n",
-      "12500\n",
-      "12550\n",
-      "12600\n",
-      "12650\n",
-      "12700\n",
-      "12750\n",
-      "12800\n",
-      "12850\n",
-      "12900\n",
-      "12950\n",
-      "13000\n",
-      "13050\n",
-      "13100\n",
-      "13150\n",
-      "13200\n",
-      "13250\n",
-      "13300\n",
-      "13350\n",
-      "13400\n",
-      "13450\n",
-      "13500\n",
-      "13550\n",
-      "13600\n",
-      "13650\n",
-      "13700\n",
-      "13750\n",
-      "13800\n",
-      "13850\n",
-      "13900\n",
-      "13950\n",
-      "14000\n",
-      "14050\n",
-      "14100\n",
-      "14150\n",
-      "14200\n",
-      "14250\n",
-      "14300\n",
-      "14350\n",
-      "14400\n",
-      "14450\n",
-      "14500\n",
-      "14550\n",
-      "14600\n",
-      "14650\n",
-      "14700\n",
-      "14750\n",
-      "14800\n",
-      "14850\n",
-      "14900\n",
-      "14950\n",
-      "15000\n",
-      "15050\n",
-      "15100\n",
-      "15150\n",
-      "15200\n",
-      "15250\n",
-      "15300\n",
-      "15350\n",
-      "15400\n",
-      "15450\n",
-      "15500\n",
-      "15550\n",
-      "15600\n",
-      "15650\n",
-      "15700\n",
-      "15750\n",
-      "15800\n",
-      "15850\n",
-      "15900\n",
-      "15950\n",
-      "16000\n",
-      "16050\n",
-      "16100\n",
-      "16150\n",
-      "16200\n",
-      "16250\n",
-      "16300\n",
-      "16350\n",
-      "16400\n",
-      "16450\n",
-      "16500\n",
-      "16550\n",
-      "16600\n",
-      "16650\n",
-      "16700\n",
-      "16750\n",
-      "16800\n",
-      "16850\n",
-      "16900\n",
-      "16950\n",
-      "17000\n",
-      "17050\n",
-      "17100\n",
-      "17150\n",
-      "17200\n",
-      "17250\n",
-      "17300\n",
-      "17350\n",
-      "17400\n",
-      "17450\n",
-      "17500\n",
-      "17550\n",
-      "17600\n",
-      "17650\n",
-      "17700\n",
-      "17750\n",
-      "17800\n",
-      "17850\n",
-      "17900\n",
-      "17950\n",
-      "18000\n",
-      "18050\n",
-      "总耗时: 384.6457269191742 秒.\n",
-      "发现相似:  2800 , 其中已标注:  2223 .\n"
+      "总耗时: 3.233793258666992 秒.\n",
+      "发现相似:  30 , 其中已标注:  28 .\n"
     ]
    }
   ],
   "source": [
-    "# from hashlib import new\n",
    "import os,re,difflib,Levenshtein,time,json\n",
    "\n",
-    "# 重要!!! 新题目的范围\n",
-    "id_new_problems = \"1:50000\"\n",
-    "threshold = 0.99\n",
+    "# 重要!!! 新旧题目的范围(有重复默认为新题)\n",
+    "id_new_problems = \"1:500\"\n",
+    "id_old_problems = \"30000:50000\"\n",
+    "threshold = 0.95\n",
    "\n",
    "#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间\n",
    "def generate_number_set(string):\n",
@ -804,15 +100,16 @@
    "#生成旧题目数据库字典与新题目数据库字典\n",
    "new_id_list_raw = generate_number_set(id_new_problems)\n",
    "new_id_list = [id for id in pro_dict if id in new_id_list_raw]\n",
+    "old_id_list_raw = generate_number_set(id_old_problems)\n",
+    "old_id_list = [id for id in pro_dict if (id in old_id_list_raw and not id in new_id_list_raw)]\n",
    "old_problems_dict = {}\n",
    "new_problems_dict = {}\n",
    "old_problems_dict_content = {}\n",
    "new_problems_dict_content = {}\n",
-    "for id in pro_dict:\n",
-    "    if id in new_id_list:\n",
+    "for id in new_id_list:\n",
    "    new_problems_dict[id] = pro_dict[id]\n",
    "    new_problems_dict_content[id] = pre_treating(pro_dict[id][\"content\"])\n",
-    "    else:\n",
+    "for id in old_id_list:\n",
    "    old_problems_dict[id] = pro_dict[id]\n",
    "    old_problems_dict_content[id] = pre_treating(pro_dict[id][\"content\"])\n",
    "print(\"旧题目数:\",len(old_problems_dict),\", 新题目数:\",len(new_problems_dict))\n",
@ -876,7 +173,60 @@
   "execution_count": null,
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "#记录起始时间\n",
+    "start_time = time.time()\n",
+    "suspect_count = 0\n",
+    "remarked = 0\n",
+    "\n",
+    "alike_problems = \"\"\n",
+    "\n",
+    "\n",
+    "\n",
+    "#开始新题与旧题的比对\n",
+    "count = 0\n",
+    "print(\"开始新题与旧题的比对\")\n",
+    "for id_new in new_problems_dict:\n",
+    "    count += 1\n",
+    "    if count % 50 == 0:\n",
+    "        print(count)\n",
+    "    for id_old in old_problems_dict:\n",
+    "        similar_rate = sim_test(new_problems_dict_content[id_new],old_problems_dict_content[id_old])\n",
+    "        if similar_rate > threshold or id_new in old_problems_dict[id_old][\"related\"] or id_new in old_problems_dict[id_old][\"same\"] or id_old in new_problems_dict[id_new][\"related\"] or id_old in new_problems_dict[id_new][\"same\"]:\n",
+    "            suspect_count += 1\n",
+    "            if not (id_new in old_problems_dict[id_old][\"related\"] or id_new in old_problems_dict[id_old][\"same\"] or id_old in new_problems_dict[id_new][\"related\"] or id_old in new_problems_dict[id_new][\"same\"]):\n",
+    "                alike_problems += (\"%.4f\" %similar_rate) + \"\\n\\n\" + id_new + \" \" + new_problems_dict[id_new][\"content\"] + \"\\n\\n\" + id_old + \" \" + old_problems_dict[id_old][\"content\"] + \"\\n\\n\"\n",
+    "            else:\n",
+    "                remarked += 1\n",
+    "\n",
+    "#开始新题之间的比对\n",
+    "count = 0\n",
+    "print(\"开始新题之间的比对\")\n",
+    "while len(new_problems_dict) >= 2:\n",
+    "    count += 1\n",
+    "    if count % 50 == 0:\n",
+    "        print(count)\n",
+    "    keys = list(new_problems_dict.keys())\n",
+    "    current_problem = new_problems_dict.pop(keys[0])\n",
+    "    current_problem_content = new_problems_dict_content[current_problem[\"id\"]]\n",
+    "    for id_new in new_problems_dict:\n",
+    "        similar_rate = sim_test(new_problems_dict_content[id_new],current_problem_content)\n",
+    "        if similar_rate > threshold or id_new in current_problem[\"related\"] or id_new in current_problem[\"same\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"related\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"same\"]:\n",
+    "            suspect_count += 1\n",
+    "            if not (id_new in current_problem[\"related\"] or id_new in current_problem[\"same\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"related\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"same\"]):\n",
+    "                alike_problems += (\"%.4f\" %similar_rate) + \"\\n\\n\" + id_new + \" \" + new_problems_dict[id_new][\"content\"] + \"\\n\\n\" + current_problem[\"id\"] + \" \" + current_problem[\"content\"] + \"\\n\\n\"\n",
+    "            else:\n",
+    "                remarked += 1\n",
+    "\n",
+    "\n",
+    "#记录终止时间及显示结果\n",
+    "end_time = time.time()\n",
+    "print(\"总耗时:\",end_time-start_time,\"秒.\")\n",
+    "print(\"发现相似: \",suspect_count,\", 其中已标注: \",remarked,\".\")\n",
+    "\n",
+    "with open(\"临时文件/相似题目.txt\",\"w\",encoding=\"utf8\") as f:\n",
+    "    f.write(alike_problems)"
+   ]
  }
 ],
 "metadata": {
@ -895,7 +245,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.8.15"
  },
  "orig_nbformat": 4,
  "vscode": {
--- a/工具/相同相似题目标注.py
+++ b/工具/相同相似题目标注.py
@ -0,0 +1,41 @@
+import os,re,json
+
+filename = "临时文件/相似题目.txt"
+
+# 读取题库数据并转换为字典
+with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
+    database = f.read()
+pro_dict = json.loads(database)
+
+# 读取已分类的相似文件列表
+with open(filename,"r",encoding = "utf8") as f:
+    similar_text = "\n"+f.read()
+
+similar_types = re.findall(r"\n[\d]\.[\d]{4}[\s]*([srSRnN ])[\s]*\n",similar_text)
+similar_problems = re.findall(r"\n([\d]{6}) ",similar_text)
+
+if len(similar_types) * 2 == len(similar_problems):
+    for i in similar_types:
+        id1 = similar_problems.pop(0)
+        id2 = similar_problems.pop(0)
+        if i.upper() == "S":
+            if not id2 in pro_dict[id1]["same"]:
+                pro_dict[id1]["same"].append(id2)
+            if not id1 in pro_dict[id2]["same"]:
+                pro_dict[id2]["same"].append(id1)
+            print("相同题目已标注:",id1,id2)
+        elif i.upper() == "R":
+            if not id2 in pro_dict[id1]["related"]:
+                pro_dict[id1]["related"].append(id2)
+            if not id1 in pro_dict[id2]["related"]:
+                pro_dict[id2]["related"].append(id1)
+            print("关联题目已标注:",id1,id2)
+
+else:
+    print("相似程度数据:",len(similar_types),"个, 相似题目:",len(similar_problems),"题. 数据有问题, 请检查.")
+
+
+# 将题库字典转换为json文件并保存至原位
+database = json.dumps(pro_dict,indent=4,ensure_ascii=False)
+with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
+    f.write(database)
--- a/工具/相同题目检测.ipynb
+++ b/工具/相同题目检测.ipynb
--- a/工具/识别题库中尚未标注的题目类型.ipynb
+++ b/工具/识别题库中尚未标注的题目类型.ipynb
@ -2,22 +2,14 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "040387 填空题\n",
-      "040388 填空题\n",
-      "040389 填空题\n",
-      "040390 填空题\n",
-      "040391 填空题\n",
-      "040392 填空题\n",
-      "040393 填空题\n",
-      "040394 填空题\n",
-      "040395 解答题\n"
+      "均已确定题目类型\n"
     ]
    }
   ],
@ -29,9 +21,11 @@
    "    database = f.read()\n",
    "pro_dict = json.loads(database)\n",
    "\n",
+    "count = 0\n",
    "#根据特征字符识别题目类型\n",
    "for p in pro_dict:\n",
    "    if pro_dict[p][\"genre\"] == \"\":\n",
+    "        count += 1\n",
    "        if \"bracket\" in pro_dict[p][\"content\"]:\n",
    "            pro_dict[p][\"genre\"] = \"选择题\"\n",
    "            print(p,\"选择题\")\n",
@ -46,7 +40,11 @@
    "#将修改结果写入json数据库\n",
    "database = json.dumps(pro_dict,indent = 4, ensure_ascii= False)\n",
    "with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
-    "    f.write(database)"
+    "    f.write(database)\n",
+    "if count > 0:\n",
+    "    print(\"为 %d 道题目确定类型\" %count)\n",
+    "else:\n",
+    "    print(\"均已确定题目类型\")"
   ]
  },
  {
--- a/工具/识别题目类型.py
+++ b/工具/识别题目类型.py
@ -0,0 +1,31 @@
+import os,re,json
+
+# 读取数据库并转成题目字典
+with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
+    database = f.read()
+pro_dict = json.loads(database)
+
+count = 0
+#根据特征字符识别题目类型
+for p in pro_dict:
+    if pro_dict[p]["genre"] == "":
+        count += 1
+        if "bracket" in pro_dict[p]["content"]:
+            pro_dict[p]["genre"] = "选择题"
+            print(p,"选择题")
+        elif "blank" in pro_dict[p]["content"]:
+            pro_dict[p]["genre"] = "填空题"
+            print(p,"填空题")
+        else:
+            pro_dict[p]["genre"] = "解答题"
+            pro_dict[p]["space"] = "12ex"
+            print(p,"解答题")
+
+#将修改结果写入json数据库
+database = json.dumps(pro_dict,indent = 4, ensure_ascii= False)
+with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
+    f.write(database)
+if count > 0:
+    print("为 %d 道题目确定类型" %count)
+else:
+    print("均已确定题目类型")
--- a/工具/课时目标pdf生成.py
+++ b/工具/课时目标pdf生成.py
@ -0,0 +1,66 @@
+import os,re,json,sys
+
+#范围定义在使用前需要替换
+"""使用前替换范围定义"""
+obj_range = "K0810:K0817999"
+"""范围定义到此结束"""
+#定义文件名
+"""规定文件名"""
+index = "38"
+title = "计数原理与排列组合"
+"""文件名到此结束"""
+
+filename = index+"_"+title
+outputfile = r"临时文件/"+filename+".tex"
+
+template_file = r"模板文件/复习课目标模板.tex"
+
+
+# 检查某一字符串是否在由,:的表达式给出的范围内
+def within_range(string,list):
+    flag = False
+    for item in list:
+        if string == item.strip():
+            flag = True
+            break
+        elif ":" in item:
+            start, end = item.split(":")
+            if start <= string <= end:
+                flag = True
+                break
+    return flag
+
+# 读取课时目标数据库
+with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
+    database = f.read()
+obj_dict = json.loads(database)
+
+# 根据范围生成若干用于检查的闭区间范围
+obj_range_list = obj_range.split(",")
+output_string = ""
+
+# 逐一选择目标, 并整合成表格的内容部分
+for obj_id in obj_dict:
+    if within_range(obj_id,obj_range_list):
+        output_string += obj_id + " & " + obj_dict[obj_id]["content"] + " & " + r"\\ \hline" + "\n"
+
+# 打开模板文件
+with open(template_file,"r",encoding="utf8") as f:
+    latex_raw = f.read()
+
+#识别操作系统
+if sys.platform != "win32":
+    latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
+    latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
+
+#预处理
+exec_list = [("编号待替换",index),("标题待替换",title),("内容待替换",output_string)]
+for command in exec_list:
+    latex_raw = latex_raw.replace(command[0],command[1])
+
+#输出到临时文件夹
+with open(outputfile,"w",encoding = "utf8") as f:
+    f.write(latex_raw)
+print("开始编译目标pdf文件: ", outputfile)
+os.system("xelatex -interaction=batchmode -output-directory=" + "临时文件" + " "+ outputfile)
+print(os.system("xelatex -interaction=batchmode -output-directory=" + "临时文件" + " "+ outputfile))
--- a/文本处理工具/剪贴板表格整理.ipynb
+++ b/文本处理工具/剪贴板表格整理.ipynb
@ -0,0 +1,116 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os,re\n",
+    "import win32clipboard as wc\n",
+    "import win32con\n",
+    "\n",
+    "# 获取剪切板内容\n",
+    "def getCopy():\n",
+    "    wc.OpenClipboard()\n",
+    "    t = wc.GetClipboardData(win32con.CF_UNICODETEXT)\n",
+    "    wc.CloseClipboard()\n",
+    "    return t\n",
+    "\n",
+    "# 写入剪切板内容\n",
+    "def setCopy(str):\n",
+    "    wc.OpenClipboard()\n",
+    "    wc.EmptyClipboard()\n",
+    "    wc.SetClipboardData(win32con.CF_UNICODETEXT, str)\n",
+    "    wc.CloseClipboard()\n",
+    "\n",
+    "def dollared(string):\n",
+    "    flag = True\n",
+    "    for c in string:\n",
+    "        if not c in \"1234567890.+-:[]()\":\n",
+    "            flag = False\n",
+    "            break\n",
+    "    if flag:\n",
+    "        string = \"$\" + string + \"$\"\n",
+    "    return string\n",
+    "\n",
+    "\n",
+    "\n",
+    "data = getCopy()\n",
+    "\n",
+    "data1 = \"\"\n",
+    "for c in data:\n",
+    "    if 65296 <= ord(c) < 65306:\n",
+    "        data1 += str(ord(c)-65296)\n",
+    "    else:\n",
+    "        data1 += c\n",
+    "data = data1\n",
+    "data = data.replace(\"．\",\".\").replace(\"：\",\":\")\n",
+    "elements = data.split(\"\\n\")\n",
+    "elements_per_line = int(elements.pop(-1)) #这里需要修改\n",
+    "contents = \"\\\\begin{center}\\n\\\\begin{tabular}{|\"\n",
+    "for i in range(elements_per_line):\n",
+    "    contents += \"c|\"\n",
+    "contents += \"}\\n\"\n",
+    "contents += r\"\\hline\"+\"\\n\"\n",
+    "col = 1\n",
+    "for element in elements:\n",
+    "    if col != 1:\n",
+    "        contents += \" & \"\n",
+    "    contents += dollared(element)\n",
+    "    if col == elements_per_line:\n",
+    "        contents += r\" \\\\ \\hline\"+\"\\n\"\n",
+    "    col += 1\n",
+    "    if col > elements_per_line:\n",
+    "        col = 1\n",
+    "contents += \"\\\\end{tabular}\" + \"\\n\" + \"\\\\end{center}\"\n",
+    "\n",
+    "with open(\"临时文件/tablefile.txt\",\"w\",encoding = \"utf8\") as f:\n",
+    "    f.write(contents)\n",
+    "\n",
+    "setCopy(contents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pythontest",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.15"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "91219a98e0e9be72efb992f647fe78b593124968b75db0b865552d6787c8db93"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/题库0.3/Problems.json
+++ b/题库0.3/Problems.json