RefineMathpix中新增去除试卷上的标题和分数等功能(测试)
This commit is contained in:
parent
7c1b64985c
commit
1fafd8196a
|
|
@ -1415,6 +1415,7 @@ def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串
|
|||
string = re.sub(s,replacestrings[s],string) #修改部分LaTeX命令成为惯用的
|
||||
for s in wrongrecog:
|
||||
string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字
|
||||
string = RipTestPaperDesc(string)
|
||||
string = re.sub(r"[\s]*(``|''|\}|\{) *",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格
|
||||
string = itemizeProblems(string) #将题号替换为\item
|
||||
string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式
|
||||
|
|
@ -1599,6 +1600,13 @@ def RefineCasesEnv(string): # 美化cases环境
|
|||
string = re.sub(r"&[,\s]&","& ",string)
|
||||
return string #返回处理后的字符串
|
||||
|
||||
def RipTestPaperDesc(string):
|
||||
string = re.sub(r"[一二三四五六][、\.\s]+(?:(?:填空)|(?:选择)|(?:解答))题","",string) #去除没有标分数的填空选择解答题描述
|
||||
string = re.sub(r"\(本[大]*题满分[^\)]*\d+\s*分\)","",string) #去除带括号的分数描述
|
||||
string = re.sub(r"本[大]*题[^\n]*?步骤\.{0,1}","",string) #去除解答题需要写出步骤提示
|
||||
string = re.sub(r"本[大]*题共有[^\n]*\d+\s*分\.{0,1}","",string) #去除解答题单题分数提示
|
||||
return string
|
||||
|
||||
def SubstringOccurence(regex,string): #生成regex在string中出现的所有位置
|
||||
poslist = [item.start() for item in re.finditer(regex,string)]
|
||||
return poslist
|
||||
|
|
|
|||
Reference in New Issue