database_tools中新增字符串与数据库中已有题目匹配功能

This commit is contained in:
wangweiye7840 2023-09-13 16:38:16 +08:00
parent edadebd5d7
commit 7a4fb0e696
1 changed files with 19 additions and 0 deletions

View File

@ -69,6 +69,25 @@ def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程
argmaxsim = id
return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号
def stringmaxsim(string,adict,listlength):
# maxsim = -1
# argmaxsim = "000000"
maxsimlist = []
string = pre_treating(string)
for id in adict:
if not "OBSOLETE" in adict[id]["content"]:
simrate = Levenshtein.jaro(adict[id]["content"],string)
# if simrate > maxsim:
# maxsim = simrate
# argmaxsim = id
if len(maxsimlist) < listlength:
maxsimlist.append((id,simrate))
elif simrate > maxsimlist[-1][1]:
maxsimlist = maxsimlist[:-1]
maxsimlist.append((id,simrate))
maxsimlist = sorted(maxsimlist,key = lambda x: x[1], reverse = True)
return maxsimlist #返回最大关联的listlength个题号及关联系数
def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知题块startingids(冒号和逗号连接的字符串)关联程度超过threshold的题目组, 超过max_size即停止
id_list = generate_number_set(startingids)
output_list = []