diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index 85300af3..6e406680 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -61,7 +61,7 @@ def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程 maxsim = -1 argmaxsim = "000000" for id in adict: - if not id in excludelist: + if not id in excludelist and not "OBSOLETE" in adict[id]["content"]: simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"]) if simrate > maxsim: maxsim = simrate @@ -76,7 +76,7 @@ def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知 while continue_flag: appending_id_list = [] for oldid in id_list: - for newid in [id for id in prodict if not id in id_list and not id in output_list]: + for newid in [id for id in prodict if not id in id_list and not id in output_list and not "OBSOLETE" in treated_dict[id]["content"]]: simrate = Levenshtein.jaro(treated_dict[oldid]["content"],treated_dict[newid]["content"]) if simrate >= threshold: appending_id_list.append(newid)