diff --git a/工具/局部相似题目检测.py b/工具/局部相似题目检测.py index e1e77252..98276dd6 100644 --- a/工具/局部相似题目检测.py +++ b/工具/局部相似题目检测.py @@ -1,9 +1,9 @@ import os,re,difflib,Levenshtein,time,json # 重要!!! 新旧题目的范围(有重复默认为新题) -id_new_problems = "15311:15331" +id_new_problems = "1:50000" id_old_problems = "1:50000" -threshold = 0.85 +threshold = 0.9999 #生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间 def generate_number_set(string): diff --git a/工具/相同相似题目标注.py b/工具/相同相似题目标注.py index 4cb83f10..8bd70164 100644 --- a/工具/相同相似题目标注.py +++ b/工具/相同相似题目标注.py @@ -11,7 +11,7 @@ pro_dict = json.loads(database) with open(filename,"r",encoding = "utf8") as f: similar_text = "\n"+f.read() -similar_types = re.findall(r"\n[\d]\.[\d]{4}[\s]*([srSRnN ])[\s]*\n",similar_text) +similar_types = re.findall(r"\n[\d]\.[\d]{4}[\s]*([srSRnN])*[\s]*\n",similar_text) similar_problems = re.findall(r"\n([\d]{6}) ",similar_text) if len(similar_types) * 2 == len(similar_problems): diff --git a/题库0.3/Problems.json b/题库0.3/Problems.json index 2f206293..16b9487c 100644 --- a/题库0.3/Problems.json +++ b/题库0.3/Problems.json @@ -9181,7 +9181,9 @@ "edit": [ "20220624\t朱敏慧, 王伟叶" ], - "same": [], + "same": [ + "012422" + ], "related": [ "004644" ], @@ -9777,7 +9779,9 @@ "edit": [ "20220624\t朱敏慧, 王伟叶" ], - "same": [], + "same": [ + "010932" + ], "related": [], "remark": "", "space": "" @@ -11423,7 +11427,8 @@ "20220624\t朱敏慧, 王伟叶" ], "same": [ - "011459" + "011459", + "012078" ], "related": [ "000568", @@ -14011,7 +14016,9 @@ "edit": [ "20220624\t朱敏慧, 王伟叶" ], - "same": [], + "same": [ + "012332" + ], "related": [ "010987", "011411" @@ -16963,7 +16970,9 @@ "edit": [ "20220624\t朱敏慧, 王伟叶" ], - "same": [], + "same": [ + "012661" + ], "related": [], "remark": "", "space": "" @@ -18357,7 +18366,9 @@ "edit": [ "20220624\t朱敏慧, 王伟叶" ], - "same": [], + "same": [ + "013591" + ], "related": [], "remark": "", "space": "" @@ -18806,7 +18817,8 @@ "20220624\t朱敏慧, 王伟叶" ], "same": [ - "011603" + "011603", + "014703" ], "related": [], "remark": "", @@ -21338,7 +21350,9 @@ "edit": [ "20220624\t朱敏慧, 王伟叶" ], - "same": [], + "same": [ + "013304" + ], "related": [], "remark": "", "space": "" @@ -23432,7 +23446,9 @@ "edit": [ "20220624\t朱敏慧, 王伟叶" ], - "same": [], + "same": [ + "015018" + ], "related": [ "000507", "000378", @@ -24532,7 +24548,9 @@ "edit": [ "20220624\t朱敏慧, 王伟叶" ], - "same": [], + "same": [ + "011369" + ], "related": [], "remark": "", "space": "" @@ -26553,7 +26571,8 @@ "20220624\t朱敏慧, 王伟叶" ], "same": [ - "011328" + "011328", + "013764" ], "related": [], "remark": "", @@ -27353,7 +27372,8 @@ "20220624\t朱敏慧, 王伟叶" ], "same": [ - "004884" + "004884", + "013377" ], "related": [], "remark": "", @@ -38496,7 +38516,8 @@ "20220625\t王伟叶" ], "same": [ - "008074" + "008074", + "020393" ], "related": [], "remark": "", @@ -42812,7 +42833,9 @@ "edit": [ "20220625\t王伟叶" ], - "same": [], + "same": [ + "012081" + ], "related": [], "remark": "", "space": "" @@ -48693,7 +48716,9 @@ "edit": [ "20220625\t王伟叶" ], - "same": [], + "same": [ + "012942" + ], "related": [], "remark": "", "space": "" @@ -70703,7 +70728,9 @@ "edit": [ "20220625\t王伟叶" ], - "same": [], + "same": [ + "012084" + ], "related": [], "remark": "", "space": "" @@ -75787,7 +75814,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "013705" + ], "related": [], "remark": "", "space": "" @@ -76491,7 +76520,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "031300" + ], "related": [], "remark": "", "space": "" @@ -79279,7 +79310,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "012895" + ], "related": [], "remark": "", "space": "" @@ -79302,7 +79335,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "012905" + ], "related": [], "remark": "", "space": "" @@ -79696,7 +79731,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "013479" + ], "related": [], "remark": "", "space": "" @@ -80142,7 +80179,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "012893" + ], "related": [], "remark": "", "space": "" @@ -81515,7 +81554,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "012904" + ], "related": [], "remark": "", "space": "" @@ -91574,7 +91615,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "021748" + ], "related": [], "remark": "", "space": "" @@ -93258,7 +93301,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "013080" + ], "related": [], "remark": "", "space": "" @@ -94304,7 +94349,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "021316" + ], "related": [], "remark": "", "space": "" @@ -99983,7 +100030,8 @@ "20220701\t王伟叶" ], "same": [ - "003379" + "003379", + "013903" ], "related": [], "remark": "", @@ -100068,7 +100116,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "013569" + ], "related": [], "remark": "", "space": "" @@ -100232,7 +100282,8 @@ "20220701\t王伟叶" ], "same": [ - "011557" + "011557", + "014648" ], "related": [ "003610", @@ -100263,7 +100314,8 @@ "20220701\t王伟叶" ], "same": [ - "011558" + "011558", + "014707" ], "related": [], "remark": "", @@ -100363,7 +100415,8 @@ "20220701\t王伟叶" ], "same": [ - "011561" + "011561", + "013332" ], "related": [], "remark": "", @@ -101079,7 +101132,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "013934" + ], "related": [], "remark": "", "space": "" @@ -102464,7 +102519,9 @@ "edit": [ "20220701\t王伟叶" ], - "same": [], + "same": [ + "012929" + ], "related": [], "remark": "", "space": "" @@ -102571,7 +102628,8 @@ "20220701\t王伟叶" ], "same": [ - "003588" + "003588", + "013218" ], "related": [], "remark": "", @@ -110512,7 +110570,9 @@ "edit": [ "20220705\t王伟叶" ], - "same": [], + "same": [ + "030823" + ], "related": [], "remark": "", "space": "" @@ -118999,7 +119059,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012120" + ], "related": [], "remark": "", "space": "" @@ -119108,7 +119170,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012124" + ], "related": [], "remark": "", "space": "" @@ -119133,7 +119197,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012125" + ], "related": [ "004379", "005788", @@ -119303,7 +119369,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012131" + ], "related": [], "remark": "", "space": "" @@ -119330,7 +119398,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012132" + ], "related": [], "remark": "", "space": "" @@ -120408,7 +120478,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "031027" + ], "related": [], "remark": "", "space": "12ex" @@ -122354,7 +122426,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012364" + ], "related": [], "remark": "", "space": "" @@ -123095,7 +123169,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "030010" + ], "related": [], "remark": "", "space": "12ex" @@ -123743,7 +123819,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012203" + ], "related": [ "003673", "010923", @@ -123773,7 +123851,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012204" + ], "related": [ "000336", "000376", @@ -123812,7 +123892,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012205" + ], "related": [ "002793" ], @@ -123867,7 +123949,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012207" + ], "related": [], "remark": "", "space": "" @@ -123904,7 +123988,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012208" + ], "related": [ "000639" ], @@ -123932,7 +124018,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012209" + ], "related": [ "000483", "000539", @@ -123964,7 +124052,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012210" + ], "related": [], "remark": "", "space": "" @@ -124064,7 +124154,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012215" + ], "related": [ "004401" ], @@ -124186,7 +124278,9 @@ "edit": [ "20220709\t王伟叶" ], - "same": [], + "same": [ + "012220" + ], "related": [], "remark": "", "space": "12ex" @@ -126147,7 +126241,9 @@ "edit": [ "20220710\t王伟叶" ], - "same": [], + "same": [ + "015291" + ], "related": [], "remark": "", "space": "" @@ -126682,7 +126778,8 @@ ], "same": [ "004389", - "011432" + "011432", + "012009" ], "related": [ "002907", @@ -126712,7 +126809,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012010" + ], "related": [ "004884" ], @@ -126740,7 +126839,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012011" + ], "related": [], "remark": "", "space": "" @@ -126763,7 +126864,9 @@ "edit": [ "20220711\t王伟叶" ], - "same": [], + "same": [ + "012012" + ], "related": [], "remark": "", "space": "" @@ -274512,7 +274615,9 @@ "edit": [ "20220817\t王伟叶" ], - "same": [], + "same": [ + "000354" + ], "related": [], "remark": "", "space": "" @@ -285396,7 +285501,9 @@ "edit": [ "20220818\t王伟叶" ], - "same": [], + "same": [ + "000837" + ], "related": [], "remark": "", "space": "" @@ -299636,7 +299743,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004661" + ], "related": [], "remark": "", "space": "" @@ -299664,7 +299773,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004662" + ], "related": [], "remark": "", "space": "" @@ -299695,7 +299806,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004663" + ], "related": [], "remark": "", "space": "" @@ -299723,7 +299836,9 @@ "edit": [ "20221027\t王伟叶" ], - "same": [], + "same": [ + "004664" + ], "related": [], "remark": "", "space": "" @@ -301852,7 +301967,9 @@ "edit": [ "20221121\t王伟叶" ], - "same": [], + "same": [ + "000398" + ], "related": [], "remark": "", "space": "" @@ -301954,7 +302071,9 @@ "edit": [ "20221121\t王伟叶" ], - "same": [], + "same": [ + "001518" + ], "related": [], "remark": "", "space": "" @@ -302056,7 +302175,9 @@ "edit": [ "20221121\t王伟叶" ], - "same": [], + "same": [ + "002533" + ], "related": [], "remark": "", "space": "" @@ -303235,7 +303356,9 @@ "edit": [ "20221206\t王伟叶" ], - "same": [], + "same": [ + "004391" + ], "related": [], "remark": "", "space": "" @@ -303320,7 +303443,9 @@ "edit": [ "20221206\t王伟叶" ], - "same": [], + "same": [ + "004395" + ], "related": [], "remark": "", "space": "" @@ -303341,7 +303466,9 @@ "edit": [ "20221206\t王伟叶" ], - "same": [], + "same": [ + "004396" + ], "related": [], "remark": "", "space": "" @@ -303468,7 +303595,9 @@ "edit": [ "20221206\t王伟叶" ], - "same": [], + "same": [ + "004402" + ], "related": [], "remark": "", "space": "" @@ -303489,7 +303618,9 @@ "edit": [ "20221206\t王伟叶" ], - "same": [], + "same": [ + "004403" + ], "related": [], "remark": "", "space": "" @@ -305235,7 +305366,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004552" + ], "related": [], "remark": "", "space": "" @@ -305256,7 +305389,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004553" + ], "related": [], "remark": "", "space": "" @@ -305290,7 +305425,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004554" + ], "related": [], "remark": "", "space": "" @@ -305345,7 +305482,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004556" + ], "related": [], "remark": "", "space": "" @@ -305379,7 +305518,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004557" + ], "related": [], "remark": "", "space": "" @@ -305413,7 +305554,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004558" + ], "related": [], "remark": "", "space": "" @@ -305447,7 +305590,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004559" + ], "related": [ "031211" ], @@ -305622,7 +305767,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004563" + ], "related": [], "remark": "", "space": "" @@ -305794,7 +305941,9 @@ "edit": [ "20221209\t王伟叶" ], - "same": [], + "same": [ + "004568" + ], "related": [], "remark": "", "space": "12ex" @@ -308665,7 +308814,9 @@ "edit": [ "20221212\t王伟叶" ], - "same": [], + "same": [ + "000469" + ], "related": [], "remark": "", "space": "" @@ -309362,7 +309513,9 @@ "edit": [ "20221212\t王伟叶" ], - "same": [], + "same": [ + "004501" + ], "related": [], "remark": "", "space": "" @@ -310584,7 +310737,9 @@ "edit": [ "20221212\t王伟叶" ], - "same": [], + "same": [ + "000338" + ], "related": [], "remark": "", "space": "" @@ -316154,7 +316309,9 @@ "edit": [ "20221217\t王伟叶" ], - "same": [], + "same": [ + "000561" + ], "related": [], "remark": "", "space": "" @@ -322178,7 +322335,9 @@ "edit": [ "20230118\t王伟叶" ], - "same": [], + "same": [ + "002898" + ], "related": [], "remark": "", "space": "" @@ -322246,7 +322405,9 @@ "edit": [ "20230118\t王伟叶" ], - "same": [], + "same": [ + "002863" + ], "related": [], "remark": "", "space": "" @@ -322524,7 +322685,9 @@ "edit": [ "20230118\t王伟叶" ], - "same": [], + "same": [ + "002954" + ], "related": [], "remark": "", "space": "" @@ -322558,7 +322721,9 @@ "edit": [ "20230118\t王伟叶" ], - "same": [], + "same": [ + "002864" + ], "related": [], "remark": "", "space": "" @@ -323189,7 +323354,9 @@ "edit": [ "20230118\t王伟叶" ], - "same": [], + "same": [ + "003763" + ], "related": [], "remark": "", "space": "" @@ -323526,7 +323693,9 @@ "edit": [ "20230118\t王伟叶" ], - "same": [], + "same": [ + "001745" + ], "related": [], "remark": "", "space": "" @@ -327224,7 +327393,9 @@ "edit": [ "20230118\t王伟叶" ], - "same": [], + "same": [ + "003400" + ], "related": [], "remark": "", "space": "" @@ -330785,7 +330956,9 @@ "edit": [ "20230118\t王伟叶" ], - "same": [], + "same": [ + "003767" + ], "related": [], "remark": "", "space": "" @@ -332752,7 +332925,9 @@ "edit": [ "20230123\t王伟叶" ], - "same": [], + "same": [ + "000721" + ], "related": [], "remark": "", "space": "" @@ -333341,7 +333516,9 @@ "edit": [ "20230123\t王伟叶" ], - "same": [], + "same": [ + "003677" + ], "related": [], "remark": "", "space": "" @@ -334299,7 +334476,9 @@ "edit": [ "20230123\t王伟叶" ], - "same": [], + "same": [ + "000942" + ], "related": [], "remark": "", "space": "" @@ -336442,7 +336621,9 @@ "edit": [ "20230123\t王伟叶" ], - "same": [], + "same": [ + "002880" + ], "related": [], "remark": "", "space": "" @@ -338345,7 +338526,9 @@ "edit": [ "20230123\t王伟叶" ], - "same": [], + "same": [ + "003666" + ], "related": [], "remark": "", "space": "" @@ -338810,7 +338993,9 @@ "edit": [ "20230123\t王伟叶" ], - "same": [], + "same": [ + "000613" + ], "related": [], "remark": "", "space": "" @@ -341253,7 +341438,9 @@ "edit": [ "20230128\t王伟叶" ], - "same": [], + "same": [ + "002723" + ], "related": [], "remark": "", "space": "" @@ -342685,7 +342872,9 @@ "edit": [ "20230128\t王伟叶" ], - "same": [], + "same": [ + "000911" + ], "related": [], "remark": "", "space": "" @@ -345946,7 +346135,9 @@ "edit": [ "20230128\t王伟叶" ], - "same": [], + "same": [ + "003663" + ], "related": [], "remark": "", "space": "" @@ -346688,7 +346879,9 @@ "edit": [ "20230128\t王伟叶" ], - "same": [], + "same": [ + "003705" + ], "related": [], "remark": "", "space": "" @@ -364126,7 +364319,9 @@ "edit": [ "20230312\t王伟叶" ], - "same": [], + "same": [ + "003673" + ], "related": [], "remark": "", "space": "" @@ -365171,7 +365366,9 @@ "edit": [ "20230312\t王伟叶" ], - "same": [], + "same": [ + "000629" + ], "related": [], "remark": "", "space": "" @@ -365247,7 +365444,9 @@ "edit": [ "20230312\t王伟叶" ], - "same": [], + "same": [ + "003674" + ], "related": [], "remark": "", "space": "" @@ -372172,7 +372371,9 @@ "edit": [ "20230413\t王伟叶" ], - "same": [], + "same": [ + "000797" + ], "related": [], "remark": "", "space": "" @@ -378372,7 +378573,9 @@ "edit": [ "20230419\t王伟叶" ], - "same": [], + "same": [ + "004642" + ], "related": [], "remark": "", "space": "" @@ -388590,7 +388793,9 @@ "edit": [ "20221223\t王伟叶" ], - "same": [], + "same": [ + "001353" + ], "related": [], "remark": "", "space": "12ex" @@ -410638,7 +410843,9 @@ "edit": [ "20230101\t王伟叶" ], - "same": [], + "same": [ + "003441" + ], "related": [], "remark": "", "space": "" @@ -421240,7 +421447,9 @@ "edit": [ "20230209\t王伟叶" ], - "same": [], + "same": [ + "003337" + ], "related": [], "remark": "", "space": "" @@ -428256,7 +428465,9 @@ "edit": [ "20220823\t王伟叶" ], - "same": [], + "same": [ + "004528" + ], "related": [], "remark": "", "space": "12ex" @@ -449769,7 +449980,9 @@ "edit": [ "20230107\t王伟叶" ], - "same": [], + "same": [ + "004091" + ], "related": [], "remark": "", "space": "" @@ -454055,7 +454268,9 @@ "edit": [ "20230108\t王伟叶" ], - "same": [], + "same": [ + "004441" + ], "related": [], "remark": "", "space": "12ex" @@ -460926,7 +461141,9 @@ "edit": [ "20230303\t王伟叶" ], - "same": [], + "same": [ + "002751" + ], "related": [], "remark": "", "space": ""