RefineMathpix中新增去除试卷上的标题和分数等功能(测试)

This commit is contained in:
weiye.wang 2024-03-16 19:59:38 +08:00
parent 7c1b64985c
commit 1fafd8196a
1 changed files with 8 additions and 0 deletions

View File

@ -1415,6 +1415,7 @@ def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串
string = re.sub(s,replacestrings[s],string) #修改部分LaTeX命令成为惯用的
for s in wrongrecog:
string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字
string = RipTestPaperDesc(string)
string = re.sub(r"[\s]*(``|''|\}|\{) *",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格
string = itemizeProblems(string) #将题号替换为\item
string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式
@ -1599,6 +1600,13 @@ def RefineCasesEnv(string): # 美化cases环境
string = re.sub(r"&[,\s]&","& ",string)
return string #返回处理后的字符串
def RipTestPaperDesc(string):
string = re.sub(r"[一二三四五六][、\.\s]+(?:(?:填空)|(?:选择)|(?:解答))题","",string) #去除没有标分数的填空选择解答题描述
string = re.sub(r"\(本[大]*题满分[^\)]*\d+\s*分\)","",string) #去除带括号的分数描述
string = re.sub(r"本[大]*题[^\n]*?步骤\.{0,1}","",string) #去除解答题需要写出步骤提示
string = re.sub(r"本[大]*题共有[^\n]*\d+\s*分\.{0,1}","",string) #去除解答题单题分数提示
return string
def SubstringOccurence(regex,string): #生成regex在string中出现的所有位置
poslist = [item.start() for item in re.finditer(regex,string)]
return poslist