修复mathpix预处理中 将表格中的首数字识别为题号的bug
This commit is contained in:
parent
a699f604b4
commit
c05e25ad23
|
|
@ -1356,6 +1356,17 @@ def setCopy(string): # 写入剪切板内容
|
|||
wc.SetClipboardData(win32con.CF_UNICODETEXT, string)
|
||||
wc.CloseClipboard()
|
||||
|
||||
def itemizeProblems(string): #将题号替换为\item
|
||||
string_list = string.split("\n")
|
||||
itemed_list = []
|
||||
for line in string_list:
|
||||
if not "&" in line:
|
||||
itemed_list.append(re.sub(r"(?:(?:^|\n)+[例]*[\s]*[0-9]+[\s]*[\.、\s]+|\[[\s]*例[\s]*[0-9]*[\s]*\][\s]*)","\\n\\\\item ",line))
|
||||
else:
|
||||
itemed_list.append(line)
|
||||
string = "\n".join(itemed_list)
|
||||
return string
|
||||
|
||||
def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串
|
||||
puctuationsfulltosemi = {" ": " ","。": ". ",".": ". ",",": ", ",":": ": ",";": "; ","(": "(",")": ")","?": "? ","“": "``","”": "''", "【": "[", "】": "]"}
|
||||
replacestrings = {r"\\overparen": r"\\overset\\frown", "eqslant": "eq", r"\\vec": r"\\overrightarrow ", r"\\bar": r"\\overline", r"\\lim": r"\\displaystyle\\lim", r"\\sum":r"\\displaystyle\\sum", r"\\prod":r"\\displaystyle\\prod", r"\\mid":"|", r"\^\{\\prime\}":"'",r"e\^":r"\\mathrm{e}^",r"/\s*/":r"\\parallel "}
|
||||
|
|
@ -1373,7 +1384,7 @@ def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串
|
|||
for s in wrongrecog:
|
||||
string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字
|
||||
string = re.sub(r"[\s]*(``|''|\}|\{) *",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格
|
||||
string = re.sub(r"(?:(?:^|\n)+[例]*[\s]*[0-9]+[\s]*[\.、\s]+|\[[\s]*例[\s]*[0-9]*[\s]*\][\s]*)","\\n\\\\item ",string) #将题号替换为\item
|
||||
string = itemizeProblems(string) #将题号替换为\item
|
||||
string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式
|
||||
string = re.sub(r"\$\s+\$"," ",string) #删除多余的$符号
|
||||
string = re.sub(r"([,.:;?!])\$",lambda x:x.group(1)+" $",string) #标点和$符号分开
|
||||
|
|
|
|||
Reference in New Issue