database_tools中新增字符串与数据库中已有题目匹配功能
This commit is contained in:
parent
edadebd5d7
commit
7a4fb0e696
|
|
@ -69,6 +69,25 @@ def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程
|
||||||
argmaxsim = id
|
argmaxsim = id
|
||||||
return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号
|
return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号
|
||||||
|
|
||||||
|
def stringmaxsim(string,adict,listlength):
|
||||||
|
# maxsim = -1
|
||||||
|
# argmaxsim = "000000"
|
||||||
|
maxsimlist = []
|
||||||
|
string = pre_treating(string)
|
||||||
|
for id in adict:
|
||||||
|
if not "OBSOLETE" in adict[id]["content"]:
|
||||||
|
simrate = Levenshtein.jaro(adict[id]["content"],string)
|
||||||
|
# if simrate > maxsim:
|
||||||
|
# maxsim = simrate
|
||||||
|
# argmaxsim = id
|
||||||
|
if len(maxsimlist) < listlength:
|
||||||
|
maxsimlist.append((id,simrate))
|
||||||
|
elif simrate > maxsimlist[-1][1]:
|
||||||
|
maxsimlist = maxsimlist[:-1]
|
||||||
|
maxsimlist.append((id,simrate))
|
||||||
|
maxsimlist = sorted(maxsimlist,key = lambda x: x[1], reverse = True)
|
||||||
|
return maxsimlist #返回最大关联的listlength个题号及关联系数
|
||||||
|
|
||||||
def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知题块startingids(冒号和逗号连接的字符串)关联程度超过threshold的题目组, 超过max_size即停止
|
def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知题块startingids(冒号和逗号连接的字符串)关联程度超过threshold的题目组, 超过max_size即停止
|
||||||
id_list = generate_number_set(startingids)
|
id_list = generate_number_set(startingids)
|
||||||
output_list = []
|
output_list = []
|
||||||
|
|
|
||||||
Reference in New Issue