database_tools中添加新版从字符串生成题目列表的功能和添加题目的功能,能同时处理相同相关
This commit is contained in:
parent
fe683bed20
commit
3bcc692c6b
|
|
@ -220,6 +220,59 @@ def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成
|
|||
return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组
|
||||
|
||||
|
||||
def GenerateProblemListFromString2024(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀, item后面的方括号放与题库的交互信息(如[rep3214]表示不添加, 用003214代替; [s2521;r354;u10021,20024]表示和2521相同, 和354相关, 和10021,20024无关), 返回三元组(题目内容, 题目来源前缀, 交互信息列表)的字典
|
||||
try:
|
||||
data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0]
|
||||
except:
|
||||
pass
|
||||
data = re.sub(r"\n{2,}","\n",data)
|
||||
data = re.sub(r"\\begin\{tcolorbox\}[\s\S]*?\\end\{tcolorbox\}","\n",data)
|
||||
data = re.sub(r"\\item",r"\\enditem\\item",data)
|
||||
data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息
|
||||
problempositions = []
|
||||
for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data):
|
||||
problempositions.append(item.regs[1]) #确定题目内容所在位置
|
||||
problem_list = []
|
||||
for pos in problempositions:
|
||||
content_raw = data[pos[0]:pos[1]].strip()
|
||||
content_raw = re.sub(r"\n\%[\s\S]*$","",content_raw) #题目内容
|
||||
content_raw = re.sub(r"\\\\$","",content_raw) # 删去题目最后一行处可能存在的\\
|
||||
content_raw = content_raw.strip() # 删去前后多余的空格
|
||||
if not content_raw[0] == "[": # 根据方括号内的内容生成交互信息, 这是无方括号的内容, 无meta
|
||||
content = content_raw
|
||||
meta = {}
|
||||
elif re.findall(r"^\[rep(\d+)\]",content_raw) != []: # 方括号以rep开始, 用已有的题号代替, 字典中显示唯一的题号
|
||||
content = re.sub(r"^\[rep(\d+)\]","",content_raw).strip()
|
||||
meta = {"rep":re.findall(r"\[rep(\d+)\]",content_raw)[0].zfill(6)}
|
||||
else: # 方括号不以rep开始, 用s,r,u分别表示相同, 相关, 无关, 返回有这三个字段的字典
|
||||
content = re.sub(r"^\[.*\]","",content_raw).strip()
|
||||
metaraw = re.findall(r"^\[(.*)\]",content_raw)[0]
|
||||
sameidraw = re.findall(r"s([\d,:]*)",metaraw)
|
||||
if not sameidraw == []:
|
||||
same_id_list = generate_number_set(sameidraw[0])
|
||||
else:
|
||||
same_id_list = []
|
||||
relatedidraw = re.findall(r"r([\d,:]*)",metaraw)
|
||||
if not relatedidraw == []:
|
||||
related_id_list = generate_number_set(relatedidraw[0])
|
||||
else:
|
||||
related_id_list = []
|
||||
unrelatedidraw = re.findall(r"u([\d,:]*)",metaraw)
|
||||
if not unrelatedidraw == []:
|
||||
unrelated_id_list = generate_number_set(unrelatedidraw[0])
|
||||
else:
|
||||
unrelated_id_list = []
|
||||
meta = {"same":same_id_list,"related":related_id_list,"unrelated":unrelated_id_list}
|
||||
subdata = data[:pos[0]] #开始寻找出处中缀
|
||||
suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata)
|
||||
if len(suflist) == 0:
|
||||
suffix = ""
|
||||
else:
|
||||
suffix = suflist[-1].replace("%","").strip()
|
||||
problem_list.append((content,suffix,meta))
|
||||
return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 和 交互信息字典 组成的元组
|
||||
|
||||
|
||||
def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目
|
||||
NewProblem = problem.copy()
|
||||
for field in NewProblem:
|
||||
|
|
@ -272,7 +325,58 @@ def AddProblemstoDict(startingid,raworigin,problems,editor,indexdescription,thed
|
|||
print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p))
|
||||
id += 1
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
def AddProblemstoDict2024(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典, 返回题号列表(包括用老题号替代的题目)
|
||||
idlist = []
|
||||
id = int(startingid)
|
||||
currentsuffix = problems[0][1]
|
||||
problemindex = 0
|
||||
for p_and_suffix_and_meta in problems:
|
||||
p, suffix, meta = p_and_suffix_and_meta
|
||||
pid = str(id).zfill(6)
|
||||
if pid in thedict:
|
||||
print("ID %s 已被使用."%pid)
|
||||
return 1
|
||||
else:
|
||||
if suffix == currentsuffix:
|
||||
problemindex += 1
|
||||
else:
|
||||
problemindex = 1
|
||||
currentsuffix = suffix
|
||||
origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == "" else str(problemindex))
|
||||
if not "rep" in meta:
|
||||
newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor)
|
||||
if "blank" in p:
|
||||
newproblem["genre"] = "填空题"
|
||||
newproblem["space"] = ""
|
||||
elif "bracket" in p:
|
||||
newproblem["genre"] = "选择题"
|
||||
newproblem["space"] = ""
|
||||
else:
|
||||
newproblem["genre"] = "解答题"
|
||||
newproblem["space"] = "4em"
|
||||
if "same" in meta:
|
||||
for sid in meta["same"]:
|
||||
thedict[sid]["same"].append(pid)
|
||||
newproblem["same"].append(sid)
|
||||
if "related" in meta:
|
||||
for sid in meta["related"]:
|
||||
thedict[sid]["related"].append(pid)
|
||||
newproblem["related"].append(sid)
|
||||
if "unrelated" in meta:
|
||||
for sid in meta["unrelated"]:
|
||||
thedict[sid]["unrelated"].append(pid)
|
||||
newproblem["unrelated"].append(sid)
|
||||
thedict[pid] = newproblem
|
||||
maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict)
|
||||
print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p))
|
||||
id += 1
|
||||
idlist.append(pid)
|
||||
else:
|
||||
idlist.append(meta["rep"])
|
||||
print(f"该题 {idlist[-1]} {p} 已在题库中, 不必收录.")
|
||||
return idlist
|
||||
|
||||
def CreateIDLinks(old_id_list,new_id_list,*thedict): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项
|
||||
if len(thedict) == 1 and type(thedict[0]) == dict:
|
||||
|
|
|
|||
Reference in New Issue