diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index d36ac772..32cafed4 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -1356,6 +1356,17 @@ def setCopy(string): # 写入剪切板内容 wc.SetClipboardData(win32con.CF_UNICODETEXT, string) wc.CloseClipboard() +def itemizeProblems(string): #将题号替换为\item + string_list = string.split("\n") + itemed_list = [] + for line in string_list: + if not "&" in line: + itemed_list.append(re.sub(r"(?:(?:^|\n)+[例]*[\s]*[0-9]+[\s]*[\.、\s]+|\[[\s]*例[\s]*[0-9]*[\s]*\][\s]*)","\\n\\\\item ",line)) + else: + itemed_list.append(line) + string = "\n".join(itemed_list) + return string + def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串 puctuationsfulltosemi = {" ": " ","。": ". ",".": ". ",",": ", ",":": ": ",";": "; ","(": "(",")": ")","?": "? ","“": "``","”": "''", "【": "[", "】": "]"} replacestrings = {r"\\overparen": r"\\overset\\frown", "eqslant": "eq", r"\\vec": r"\\overrightarrow ", r"\\bar": r"\\overline", r"\\lim": r"\\displaystyle\\lim", r"\\sum":r"\\displaystyle\\sum", r"\\prod":r"\\displaystyle\\prod", r"\\mid":"|", r"\^\{\\prime\}":"'",r"e\^":r"\\mathrm{e}^",r"/\s*/":r"\\parallel "} @@ -1373,7 +1384,7 @@ def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串 for s in wrongrecog: string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字 string = re.sub(r"[\s]*(``|''|\}|\{) *",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格 - string = re.sub(r"(?:(?:^|\n)+[例]*[\s]*[0-9]+[\s]*[\.、\s]+|\[[\s]*例[\s]*[0-9]*[\s]*\][\s]*)","\\n\\\\item ",string) #将题号替换为\item + string = itemizeProblems(string) #将题号替换为\item string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式 string = re.sub(r"\$\s+\$"," ",string) #删除多余的$符号 string = re.sub(r"([,.:;?!])\$",lambda x:x.group(1)+" $",string) #标点和$符号分开