From d288a7bf48df38d514c954c8e06b268b6dfbdddf Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Fri, 7 Jun 2024 07:00:23 +0800 Subject: [PATCH] =?UTF-8?q?=E6=89=B9=E9=87=8F=E6=94=B6=E5=BD=95=E6=96=B0?= =?UTF-8?q?=E9=A2=98=E4=BF=AE=E6=94=B9=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v4/database_tools_2.py | 60 ++++++++++++++++++++++++++++++++++++++ 工具v4/批量收录新题.py | 6 ++-- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/工具v4/database_tools_2.py b/工具v4/database_tools_2.py index f1e4e992..bda66771 100644 --- a/工具v4/database_tools_2.py +++ b/工具v4/database_tools_2.py @@ -433,6 +433,66 @@ def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成 return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组 +def GenerateProblemListFromString202406(data): #从data字符串生成题目列表, 每个item是一道题目, 题目后有色块的准关联题目的flushright块中的信息表示交互信息(如(003214rep)表示不添加, 用003214代替; (002521s)表示和002521相同, (000354r)表示和000354相关, 返回二元组(题目内容, 交互信息列表)的字典 + try: + data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0] + except: + pass + data = data + "\n\\end{enumerate}" + data = re.sub(r"\n{2,}","\n",data) + # data = re.sub(r"\\definecolor[^\n]*\n","\n",data) + # data = re.sub(r"\\begin\{tcolorbox\}[\s\S]*?\\end\{tcolorbox\}","\n",data) + data = re.sub(r"\\item",r"\\enditem\\item",data) + data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息 + problempositions = [] + for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data): + problempositions.append(item.regs[1]) #确定题目内容所在位置 + problem_list = [] + for pos in problempositions: + content_raw = data[pos[0]:pos[1]].strip() + sim_problems = re.findall(r"\\definecolor[\s\S]*?\\end\{tcolorbox\}",content_raw) + for sim_p in sim_problems: + #处理meta待修改 + content_raw = content_raw.replace(sim_p,"") + content = content_raw.strip() + #以下待修改----20240607 + + content_raw = re.sub(r"\n\%[\s\S]*$","",content_raw) #题目内容 + content_raw = re.sub(r"\\\\$","",content_raw) # 删去题目最后一行处可能存在的\\ + content_raw = content_raw.strip() # 删去前后多余的空格 + meta = {} + if not content_raw[0] == "[": # 根据方括号内的内容生成交互信息, 这是无方括号的内容, 无meta + content = content_raw + else: + same_id_list = [] + related_id_list = [] + unrelated_id_list = [] + content = re.sub(r"^\[[A-Za-z\d,\s]*?\]","",content_raw).strip() + metaraw = re.findall(r"\[.*?\]",content_raw)[0] + metaraw = re.sub(r"[\[\]]","",metaraw) + metalist = metaraw.split(",") + for metaitem in metalist: + metaitem = metaitem.upper() + if metaitem.startswith("REP"): + meta = {"rep":metaitem[3:].zfill(6)} + elif metaitem.startswith("S"): + same_id_list.append(metaitem[1:].zfill(6)) + elif metaitem.startswith("R"): + related_id_list.append(metaitem[1:].zfill(6)) + elif metaitem.startswith("U"): + unrelated_id_list.append(metaitem[1:].zfill(6)) + if not "rep" in meta: + meta = {"same":same_id_list,"related":related_id_list,"unrelated":unrelated_id_list} + subdata = data[:pos[0]] #开始寻找出处中缀 + suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata) + if len(suflist) == 0: + suffix = "" + else: + suffix = suflist[-1].replace("%","").strip() + problem_list.append((content,suffix,meta)) + return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 和 交互信息字典 组成的元组 + + def GenerateProblemListFromString2024(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀, item后面的方括号放与题库的交互信息(如[rep3214]表示不添加, 用003214代替; [s2521;r354;u10021,20024]表示和2521相同, 和354相关, 和10021,20024无关), 返回三元组(题目内容, 题目来源前缀, 交互信息列表)的字典 try: data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0] diff --git a/工具v4/批量收录新题.py b/工具v4/批量收录新题.py index ddeb21b6..8255c827 100644 --- a/工具v4/批量收录新题.py +++ b/工具v4/批量收录新题.py @@ -65,7 +65,7 @@ class MyWindow_bdsl(QWidget,Ui_Form): output += "\n\\definecolor{mycolor}{rgb}"+colors output += "\n\\begin{tcolorbox}"+f"[colback = mycolor, opacityback = 0.25, colframe = orange!10!white, breakable]\n\n" output += "\\begin{flushright}\\begin{Large}\n" - output += f"{simrate:.3f} \\ {id}()\n" + output += f"相似度: {simrate:.3f} \\ {id}()\n" output += "\\end{Large}\\end{flushright}" content = pro_dict[id] output += f"\n{content}\n" @@ -89,9 +89,9 @@ class MyWindow_bdsl(QWidget,Ui_Form): else: Indexed = False idlistpath = "文本文件/新题收录列表.txt" - problems = GenerateProblemListFromString2024(data) + problems = GenerateProblemListFromString2024(data) #待修改(检测题目方式有变) # pro_dict = load_dict("../题库0.3/Problems.json") - nextspareid = NextSpareID(starting_id,self.database_name) + nextspareid = NextSpareID(starting_id,self.database_name) #待修改(检测空白块数大小) self.db = connect(hostname = db_host, port= db_port, username= db_user, pwd = db_pwd, db= self.database_name) self.cursor = self.db.cursor() rolled_back = False