database_tools中添加新版从字符串生成题目列表的功能和添加题目的功能,能同时处理相同相关

This commit is contained in:
wangweiye7840 2024-01-23 13:28:42 +08:00
parent fe683bed20
commit 3bcc692c6b
1 changed files with 105 additions and 1 deletions

View File

@ -220,6 +220,59 @@ def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成
return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组
def GenerateProblemListFromString2024(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀, item后面的方括号放与题库的交互信息(如[rep3214]表示不添加, 用003214代替; [s2521;r354;u10021,20024]表示和2521相同, 和354相关, 和10021,20024无关), 返回三元组(题目内容, 题目来源前缀, 交互信息列表)的字典
try:
data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0]
except:
pass
data = re.sub(r"\n{2,}","\n",data)
data = re.sub(r"\\begin\{tcolorbox\}[\s\S]*?\\end\{tcolorbox\}","\n",data)
data = re.sub(r"\\item",r"\\enditem\\item",data)
data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息
problempositions = []
for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data):
problempositions.append(item.regs[1]) #确定题目内容所在位置
problem_list = []
for pos in problempositions:
content_raw = data[pos[0]:pos[1]].strip()
content_raw = re.sub(r"\n\%[\s\S]*$","",content_raw) #题目内容
content_raw = re.sub(r"\\\\$","",content_raw) # 删去题目最后一行处可能存在的\\
content_raw = content_raw.strip() # 删去前后多余的空格
if not content_raw[0] == "[": # 根据方括号内的内容生成交互信息, 这是无方括号的内容, 无meta
content = content_raw
meta = {}
elif re.findall(r"^\[rep(\d+)\]",content_raw) != []: # 方括号以rep开始, 用已有的题号代替, 字典中显示唯一的题号
content = re.sub(r"^\[rep(\d+)\]","",content_raw).strip()
meta = {"rep":re.findall(r"\[rep(\d+)\]",content_raw)[0].zfill(6)}
else: # 方括号不以rep开始, 用s,r,u分别表示相同, 相关, 无关, 返回有这三个字段的字典
content = re.sub(r"^\[.*\]","",content_raw).strip()
metaraw = re.findall(r"^\[(.*)\]",content_raw)[0]
sameidraw = re.findall(r"s([\d,:]*)",metaraw)
if not sameidraw == []:
same_id_list = generate_number_set(sameidraw[0])
else:
same_id_list = []
relatedidraw = re.findall(r"r([\d,:]*)",metaraw)
if not relatedidraw == []:
related_id_list = generate_number_set(relatedidraw[0])
else:
related_id_list = []
unrelatedidraw = re.findall(r"u([\d,:]*)",metaraw)
if not unrelatedidraw == []:
unrelated_id_list = generate_number_set(unrelatedidraw[0])
else:
unrelated_id_list = []
meta = {"same":same_id_list,"related":related_id_list,"unrelated":unrelated_id_list}
subdata = data[:pos[0]] #开始寻找出处中缀
suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata)
if len(suflist) == 0:
suffix = ""
else:
suffix = suflist[-1].replace("%","").strip()
problem_list.append((content,suffix,meta))
return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 和 交互信息字典 组成的元组
def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目
NewProblem = problem.copy()
for field in NewProblem:
@ -272,7 +325,58 @@ def AddProblemstoDict(startingid,raworigin,problems,editor,indexdescription,thed
print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p))
id += 1
return 0
def AddProblemstoDict2024(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典, 返回题号列表(包括用老题号替代的题目)
idlist = []
id = int(startingid)
currentsuffix = problems[0][1]
problemindex = 0
for p_and_suffix_and_meta in problems:
p, suffix, meta = p_and_suffix_and_meta
pid = str(id).zfill(6)
if pid in thedict:
print("ID %s 已被使用."%pid)
return 1
else:
if suffix == currentsuffix:
problemindex += 1
else:
problemindex = 1
currentsuffix = suffix
origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == "" else str(problemindex))
if not "rep" in meta:
newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor)
if "blank" in p:
newproblem["genre"] = "填空题"
newproblem["space"] = ""
elif "bracket" in p:
newproblem["genre"] = "选择题"
newproblem["space"] = ""
else:
newproblem["genre"] = "解答题"
newproblem["space"] = "4em"
if "same" in meta:
for sid in meta["same"]:
thedict[sid]["same"].append(pid)
newproblem["same"].append(sid)
if "related" in meta:
for sid in meta["related"]:
thedict[sid]["related"].append(pid)
newproblem["related"].append(sid)
if "unrelated" in meta:
for sid in meta["unrelated"]:
thedict[sid]["unrelated"].append(pid)
newproblem["unrelated"].append(sid)
thedict[pid] = newproblem
maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict)
print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p))
id += 1
idlist.append(pid)
else:
idlist.append(meta["rep"])
print(f"该题 {idlist[-1]} {p} 已在题库中, 不必收录.")
return idlist
def CreateIDLinks(old_id_list,new_id_list,*thedict): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项
if len(thedict) == 1 and type(thedict[0]) == dict: