diff --git a/工具/相同相似题目标注.py b/工具/相同相似题目标注.py index 8bd70164..1ee7eac9 100644 --- a/工具/相同相似题目标注.py +++ b/工具/相同相似题目标注.py @@ -1,6 +1,7 @@ import os,re,json -filename = "临时文件/相似题目.txt" +# filename = "临时文件/相似题目.txt" +filename = "临时文件/相似1.txt" # 读取题库数据并转换为字典 with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f: @@ -12,7 +13,11 @@ with open(filename,"r",encoding = "utf8") as f: similar_text = "\n"+f.read() similar_types = re.findall(r"\n[\d]\.[\d]{4}[\s]*([srSRnN])*[\s]*\n",similar_text) -similar_problems = re.findall(r"\n([\d]{6}) ",similar_text) +similar_problems = re.findall(r"\n([\d]{6})\s",similar_text) + +print(similar_types) +print(similar_problems) +samecount ,relcount, unrelcount = 0,0,0 if len(similar_types) * 2 == len(similar_problems): for i in similar_types: @@ -21,19 +26,40 @@ if len(similar_types) * 2 == len(similar_problems): if i.upper() == "S": if not id2 in pro_dict[id1]["same"]: pro_dict[id1]["same"].append(id2) + samecount += 1 + print("相同题目已标注: %s <- %s"%(id1,id2)) if not id1 in pro_dict[id2]["same"]: pro_dict[id2]["same"].append(id1) - print("相同题目已标注:",id1,id2) + samecount += 1 + print("相同题目已标注: %s <- %s"%(id2,id1)) + + elif i.upper() == "R": if not id2 in pro_dict[id1]["related"]: pro_dict[id1]["related"].append(id2) + relcount += 1 + print("关联题目已标注: %s <- %s"%(id1,id2)) if not id1 in pro_dict[id2]["related"]: pro_dict[id2]["related"].append(id1) - print("关联题目已标注:",id1,id2) + relcount += 1 + print("关联题目已标注: %s <- %s"%(id2,id1)) + + elif i.upper() == "N": + if not id2 in pro_dict[id1]["unrelated"]: + pro_dict[id1]["unrelated"].append(id2) + unrelcount += 1 + print("无关题目已标注: %s <- %s"%(id1,id2)) + if not id1 in pro_dict[id2]["unrelated"]: + pro_dict[id2]["unrelated"].append(id1) + unrelcount += 1 + print("无关题目已标注: %s <- %s"%(id2,id1)) + + else: print("相似程度数据:",len(similar_types),"个, 相似题目:",len(similar_problems),"题. 数据有问题, 请检查.") +print("已标注相同题目 %d 项, 关联题目 %d 项, 无关题目 %d 项"%(samecount,relcount,unrelcount)) # 将题库字典转换为json文件并保存至原位 database = json.dumps(pro_dict,indent=4,ensure_ascii=False) diff --git a/题库0.3/Problems.json b/题库0.3/Problems.json index 0330dc2e..1ac2b2f2 100644 --- a/题库0.3/Problems.json +++ b/题库0.3/Problems.json @@ -132818,7 +132818,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012013" + ], "related": [ "004447", "005789", @@ -132846,7 +132848,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012014" + ], "related": [], "remark": "", "space": "", @@ -132873,7 +132877,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012015" + ], "related": [ "000808" ], @@ -132899,7 +132905,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012016" + ], "related": [], "remark": "", "space": "", @@ -132925,7 +132933,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012017" + ], "related": [], "remark": "", "space": "", @@ -132950,7 +132960,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012018" + ], "related": [], "remark": "", "space": "", @@ -132976,7 +132988,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012019" + ], "related": [], "remark": "", "space": "", @@ -133042,7 +133056,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012021" + ], "related": [], "remark": "", "space": "", @@ -133066,7 +133082,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012022" + ], "related": [ "003601" ], @@ -265665,7 +265683,10 @@ "related": [], "remark": "", "space": "4em", - "unrelated": [] + "unrelated": [ + "011798", + "011799" + ] }, "010055": { "id": "010055", @@ -309684,7 +309705,9 @@ ], "remark": "", "space": "4em", - "unrelated": [] + "unrelated": [ + "010054" + ] }, "011799": { "id": "011799", @@ -309708,7 +309731,9 @@ ], "remark": "", "space": "4em", - "unrelated": [] + "unrelated": [ + "010054" + ] }, "011800": { "id": "011800", @@ -314629,7 +314654,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004665" + ], "related": [], "remark": "", "space": "", @@ -314658,7 +314685,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004666" + ], "related": [], "remark": "", "space": "", @@ -314687,7 +314716,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004667" + ], "related": [], "remark": "", "space": "", @@ -314716,7 +314747,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004668" + ], "related": [], "remark": "", "space": "", @@ -314745,7 +314778,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004669" + ], "related": [], "remark": "", "space": "", @@ -314775,7 +314810,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004670" + ], "related": [], "remark": "", "space": "", @@ -314804,7 +314841,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004671" + ], "related": [], "remark": "", "space": "", @@ -314873,7 +314912,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004673" + ], "related": [], "remark": "", "space": "", @@ -314902,7 +314943,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004674" + ], "related": [], "remark": "", "space": "",