From 3bcc692c6becc666092166ab847782c50ebc5dbe Mon Sep 17 00:00:00 2001 From: wangweiye7840 Date: Tue, 23 Jan 2024 13:28:42 +0800 Subject: [PATCH] =?UTF-8?q?database=5Ftools=E4=B8=AD=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=96=B0=E7=89=88=E4=BB=8E=E5=AD=97=E7=AC=A6=E4=B8=B2=E7=94=9F?= =?UTF-8?q?=E6=88=90=E9=A2=98=E7=9B=AE=E5=88=97=E8=A1=A8=E7=9A=84=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E5=92=8C=E6=B7=BB=E5=8A=A0=E9=A2=98=E7=9B=AE=E7=9A=84?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=EF=BC=8C=E8=83=BD=E5=90=8C=E6=97=B6=E5=A4=84?= =?UTF-8?q?=E7=90=86=E7=9B=B8=E5=90=8C=E7=9B=B8=E5=85=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v2/database_tools.py | 106 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index 0f41fa1f..b63df758 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -220,6 +220,59 @@ def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成 return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组 +def GenerateProblemListFromString2024(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀, item后面的方括号放与题库的交互信息(如[rep3214]表示不添加, 用003214代替; [s2521;r354;u10021,20024]表示和2521相同, 和354相关, 和10021,20024无关), 返回三元组(题目内容, 题目来源前缀, 交互信息列表)的字典 + try: + data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0] + except: + pass + data = re.sub(r"\n{2,}","\n",data) + data = re.sub(r"\\begin\{tcolorbox\}[\s\S]*?\\end\{tcolorbox\}","\n",data) + data = re.sub(r"\\item",r"\\enditem\\item",data) + data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息 + problempositions = [] + for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data): + problempositions.append(item.regs[1]) #确定题目内容所在位置 + problem_list = [] + for pos in problempositions: + content_raw = data[pos[0]:pos[1]].strip() + content_raw = re.sub(r"\n\%[\s\S]*$","",content_raw) #题目内容 + content_raw = re.sub(r"\\\\$","",content_raw) # 删去题目最后一行处可能存在的\\ + content_raw = content_raw.strip() # 删去前后多余的空格 + if not content_raw[0] == "[": # 根据方括号内的内容生成交互信息, 这是无方括号的内容, 无meta + content = content_raw + meta = {} + elif re.findall(r"^\[rep(\d+)\]",content_raw) != []: # 方括号以rep开始, 用已有的题号代替, 字典中显示唯一的题号 + content = re.sub(r"^\[rep(\d+)\]","",content_raw).strip() + meta = {"rep":re.findall(r"\[rep(\d+)\]",content_raw)[0].zfill(6)} + else: # 方括号不以rep开始, 用s,r,u分别表示相同, 相关, 无关, 返回有这三个字段的字典 + content = re.sub(r"^\[.*\]","",content_raw).strip() + metaraw = re.findall(r"^\[(.*)\]",content_raw)[0] + sameidraw = re.findall(r"s([\d,:]*)",metaraw) + if not sameidraw == []: + same_id_list = generate_number_set(sameidraw[0]) + else: + same_id_list = [] + relatedidraw = re.findall(r"r([\d,:]*)",metaraw) + if not relatedidraw == []: + related_id_list = generate_number_set(relatedidraw[0]) + else: + related_id_list = [] + unrelatedidraw = re.findall(r"u([\d,:]*)",metaraw) + if not unrelatedidraw == []: + unrelated_id_list = generate_number_set(unrelatedidraw[0]) + else: + unrelated_id_list = [] + meta = {"same":same_id_list,"related":related_id_list,"unrelated":unrelated_id_list} + subdata = data[:pos[0]] #开始寻找出处中缀 + suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata) + if len(suflist) == 0: + suffix = "" + else: + suffix = suflist[-1].replace("%","").strip() + problem_list.append((content,suffix,meta)) + return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 和 交互信息字典 组成的元组 + + def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目 NewProblem = problem.copy() for field in NewProblem: @@ -272,7 +325,58 @@ def AddProblemstoDict(startingid,raworigin,problems,editor,indexdescription,thed print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p)) id += 1 return 0 - + + +def AddProblemstoDict2024(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典, 返回题号列表(包括用老题号替代的题目) + idlist = [] + id = int(startingid) + currentsuffix = problems[0][1] + problemindex = 0 + for p_and_suffix_and_meta in problems: + p, suffix, meta = p_and_suffix_and_meta + pid = str(id).zfill(6) + if pid in thedict: + print("ID %s 已被使用."%pid) + return 1 + else: + if suffix == currentsuffix: + problemindex += 1 + else: + problemindex = 1 + currentsuffix = suffix + origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == "" else str(problemindex)) + if not "rep" in meta: + newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor) + if "blank" in p: + newproblem["genre"] = "填空题" + newproblem["space"] = "" + elif "bracket" in p: + newproblem["genre"] = "选择题" + newproblem["space"] = "" + else: + newproblem["genre"] = "解答题" + newproblem["space"] = "4em" + if "same" in meta: + for sid in meta["same"]: + thedict[sid]["same"].append(pid) + newproblem["same"].append(sid) + if "related" in meta: + for sid in meta["related"]: + thedict[sid]["related"].append(pid) + newproblem["related"].append(sid) + if "unrelated" in meta: + for sid in meta["unrelated"]: + thedict[sid]["unrelated"].append(pid) + newproblem["unrelated"].append(sid) + thedict[pid] = newproblem + maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict) + print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p)) + id += 1 + idlist.append(pid) + else: + idlist.append(meta["rep"]) + print(f"该题 {idlist[-1]} {p} 已在题库中, 不必收录.") + return idlist def CreateIDLinks(old_id_list,new_id_list,*thedict): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项 if len(thedict) == 1 and type(thedict[0]) == dict: