From 1fafd8196a8ebffbb6cbc4d04158f3f2b8c650c6 Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Sat, 16 Mar 2024 19:59:38 +0800 Subject: [PATCH] =?UTF-8?q?RefineMathpix=E4=B8=AD=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E5=8E=BB=E9=99=A4=E8=AF=95=E5=8D=B7=E4=B8=8A=E7=9A=84=E6=A0=87?= =?UTF-8?q?=E9=A2=98=E5=92=8C=E5=88=86=E6=95=B0=E7=AD=89=E5=8A=9F=E8=83=BD?= =?UTF-8?q?(=E6=B5=8B=E8=AF=95)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v2/database_tools.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index b02e0bb0..1144e6cc 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -1415,6 +1415,7 @@ def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串 string = re.sub(s,replacestrings[s],string) #修改部分LaTeX命令成为惯用的 for s in wrongrecog: string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字 + string = RipTestPaperDesc(string) string = re.sub(r"[\s]*(``|''|\}|\{) *",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格 string = itemizeProblems(string) #将题号替换为\item string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式 @@ -1599,6 +1600,13 @@ def RefineCasesEnv(string): # 美化cases环境 string = re.sub(r"&[,\s]&","& ",string) return string #返回处理后的字符串 +def RipTestPaperDesc(string): + string = re.sub(r"[一二三四五六][、\.\s]+(?:(?:填空)|(?:选择)|(?:解答))题","",string) #去除没有标分数的填空选择解答题描述 + string = re.sub(r"\(本[大]*题满分[^\)]*\d+\s*分\)","",string) #去除带括号的分数描述 + string = re.sub(r"本[大]*题[^\n]*?步骤\.{0,1}","",string) #去除解答题需要写出步骤提示 + string = re.sub(r"本[大]*题共有[^\n]*\d+\s*分\.{0,1}","",string) #去除解答题单题分数提示 + return string + def SubstringOccurence(regex,string): #生成regex在string中出现的所有位置 poslist = [item.start() for item in re.finditer(regex,string)] return poslist