56 lines
2.4 KiB
Python
56 lines
2.4 KiB
Python
import json,re,os,Levenshtein
|
|
|
|
def load_dict(filename): #根据filename读取json数据库并转化为python字典
|
|
with open(filename,"r",encoding = "u8") as f:
|
|
adict = json.loads(f.read())
|
|
return adict #返回python字典
|
|
|
|
def save_dict(adict,filename): #将adict字典转化为json文件并保存至filename文件中
|
|
try:
|
|
with open(filename,"w",encoding = "u8") as f:
|
|
f.write(json.dumps(adict,indent=4,ensure_ascii=False))
|
|
return 0 #成功则返回0
|
|
except:
|
|
return 1 #不成功则返回1
|
|
|
|
|
|
def pre_treating(string): #删除字符串中对比较无用的字符, 以供比较
|
|
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
|
|
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
|
|
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
|
|
string = re.sub(r"[\n\t]","",string)
|
|
string = re.sub(r"(displaystyle)|(overrightarrow)|(overline)","",string)
|
|
string = re.sub(r"[,\.:;?]","",string)
|
|
return string #返回处理后的字符串
|
|
|
|
def treat_dict(p_dict): #对整个题库字典中的内容部分进行预处理,删除无用字符
|
|
treated_dict = {}
|
|
for id in p_dict:
|
|
treated_dict[id] = {}
|
|
treated_dict[id]["content"] = pre_treating(p_dict[id]["content"])
|
|
treated_dict[id]["same"] = p_dict[id]["same"]
|
|
return treated_dict #返回处理后的字典, 含内容字段及相同题目字段
|
|
|
|
def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程度最大的题目(除外列表之外的部分)
|
|
maxsim = -1
|
|
argmaxsim = "000000"
|
|
for id in adict:
|
|
if not id in excludelist:
|
|
simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"])
|
|
if simrate > maxsim:
|
|
maxsim = simrate
|
|
argmaxsim = id
|
|
return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号
|
|
|
|
def generate_problem_series(startingid,length,adict): #在adict字典里返回从startingid开始的一系列题号, 每一题都是与上一题的关联程度最大的
|
|
excludelist = [startingid]
|
|
currentid = startingid
|
|
for i in range(length):
|
|
maxsim,currentid = detectmaxsim(currentid,excludelist,adict)
|
|
excludelist.append(currentid)
|
|
return ",".join(excludelist) #返回按顺序的题号列表
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("数据库工具, import用.") |