From b0a90fcf758d13e3553849aa3bc162cd51d935a5 Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Sun, 9 Jul 2023 16:13:27 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9database=5Ftools=E4=B8=AD?= =?UTF-8?q?=E7=9A=84=E4=B8=A4=E4=B8=AA=E5=87=BD=E6=95=B0,=20=E6=8E=92?= =?UTF-8?q?=E9=99=A4OBSOLETE=E7=9A=84=E9=A2=98=E7=9B=AE=E8=A2=AB=E9=80=89?= =?UTF-8?q?=E5=85=A5=E7=9A=84=E5=8F=AF=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v2/database_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index 85300af3..6e406680 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -61,7 +61,7 @@ def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程 maxsim = -1 argmaxsim = "000000" for id in adict: - if not id in excludelist: + if not id in excludelist and not "OBSOLETE" in adict[id]["content"]: simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"]) if simrate > maxsim: maxsim = simrate @@ -76,7 +76,7 @@ def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知 while continue_flag: appending_id_list = [] for oldid in id_list: - for newid in [id for id in prodict if not id in id_list and not id in output_list]: + for newid in [id for id in prodict if not id in id_list and not id in output_list and not "OBSOLETE" in treated_dict[id]["content"]]: simrate = Levenshtein.jaro(treated_dict[oldid]["content"],treated_dict[newid]["content"]) if simrate >= threshold: appending_id_list.append(newid)