From 7a4fb0e696e644c61be23d3db1ab037145a5ddf3 Mon Sep 17 00:00:00 2001 From: wangweiye7840 Date: Wed, 13 Sep 2023 16:38:16 +0800 Subject: [PATCH] =?UTF-8?q?database=5Ftools=E4=B8=AD=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E5=AD=97=E7=AC=A6=E4=B8=B2=E4=B8=8E=E6=95=B0=E6=8D=AE=E5=BA=93?= =?UTF-8?q?=E4=B8=AD=E5=B7=B2=E6=9C=89=E9=A2=98=E7=9B=AE=E5=8C=B9=E9=85=8D?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v2/database_tools.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index d9a3c1dc..4c8fd375 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -69,6 +69,25 @@ def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程 argmaxsim = id return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号 +def stringmaxsim(string,adict,listlength): + # maxsim = -1 + # argmaxsim = "000000" + maxsimlist = [] + string = pre_treating(string) + for id in adict: + if not "OBSOLETE" in adict[id]["content"]: + simrate = Levenshtein.jaro(adict[id]["content"],string) + # if simrate > maxsim: + # maxsim = simrate + # argmaxsim = id + if len(maxsimlist) < listlength: + maxsimlist.append((id,simrate)) + elif simrate > maxsimlist[-1][1]: + maxsimlist = maxsimlist[:-1] + maxsimlist.append((id,simrate)) + maxsimlist = sorted(maxsimlist,key = lambda x: x[1], reverse = True) + return maxsimlist #返回最大关联的listlength个题号及关联系数 + def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知题块startingids(冒号和逗号连接的字符串)关联程度超过threshold的题目组, 超过max_size即停止 id_list = generate_number_set(startingids) output_list = []