From 006c4cf2f2f8bef6d61915812740804efe7fd27c Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Sun, 16 Jul 2023 22:33:12 +0800 Subject: [PATCH] =?UTF-8?q?database=5Ftools=E4=B8=AD=E6=9B=B4=E6=96=B0math?= =?UTF-8?q?pix=E8=AF=86=E5=88=AB=E6=96=87=E6=9C=AC=E9=A2=84=E5=A4=84?= =?UTF-8?q?=E7=90=86=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v2/database_tools.py | 54 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index a247b58f..8f149a0e 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -937,11 +937,61 @@ def getCopy(): # 获取剪切板内容 return t -def setCopy(str): # 写入剪切板内容 +def setCopy(string): # 写入剪切板内容 wc.OpenClipboard() wc.EmptyClipboard() - wc.SetClipboardData(win32con.CF_UNICODETEXT, str) + wc.SetClipboardData(win32con.CF_UNICODETEXT, string) wc.CloseClipboard() + +def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串 + puctuationsfulltosemi = {" ": " ","。": ". ",".": ". ",",": ", ",":": ": ",";": "; ","(": "(",")": ")","?": "? ","“": "``","”": "''", "【": "[", "】": "]"} + replacestrings = {r"\\overparen": r"\\overset\\frown", "eqslant": "eq", r"\\vec": r"\\overrightarrow ", r"\\bar": r"\\overline", r"\\lim": r"\\displaystyle\\lim", r"\\sum":r"\\displaystyle\\sum", r"\\prod":r"\\displaystyle\\prod", r"\\mid":"|", r"\^\{\\prime\}":"'",r"e\^":r"\\mathrm{e}^"} + wrongrecog = {"雉":"锥","[粗秿]圆":"椭圆","投郑":"投掷","抛郑":"抛掷","范目":"范围","揷":"插","末见":"未见","末成":"未成"} + string = raw_string + string = re.sub(r"\\left(?:\.?)|\\right(?:\.?)","",string) #删去括号前的\left与\right标记 + for s in puctuationsfulltosemi: + string = re.sub(s,puctuationsfulltosemi[s],string) #将部分全角标记替换为半角 + for s in replacestrings: + string = re.sub(s,replacestrings[s],string) #修改部分LaTeX命令成为惯用的 + for s in wrongrecog: + string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字 + string = re.sub(r"[\s]*(``|''|\}|\{)[\s]*",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格 + string = re.sub(r"(?:(?:^|\n)+[例]*[\s]*[0-9]+[\s]*[\.、\s]+|\[[\s]*例[\s]*[0-9]*[\s]*\][\s]*)","\\n\\\\item ",string) #将题号替换为\item + string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式 + string = re.sub(r"([,.:;?!])\$",lambda x:x.group(1)+" $",string) #标点和$符号分开 + string = re.sub(r"\\frac",r"\\dfrac",string) #替换frac为dfrac + string = re.sub(r"\n(?:A\.|\(A\))([\s\S]*?)(?:B\.|\(B\))([\s\S]*?)(?:C\.|\(C\))([\s\S]*?)(?:D\.|\(D\))([\s\S]*?)\n",lambda matchobj: "\n\\fourch{%s}{%s}{%s}{%s}\n"%(matchobj.group(1).strip(),matchobj.group(2).strip(),matchobj.group(3).strip(),matchobj.group(4).strip()),string) # 选择题的选择支处理 + string = re.sub(r"[\.;](\}\{|\}\n)",lambda matchobj: matchobj.group(1),string) #去除选择题选项最末尾的句号或分号 + string = re.sub(r"\n\s+","\n",string) #删除多余的回车 + string = re.sub(r"\\q+uad","",string) #删除\quad,\qquad等 + string = re.sub(r"~","",string) #删除~ + string = re.sub(r"\s*\([\s]{,10}\)",r"\\bracket{20}",string) + string = re.sub(r"\s*\\bracket\{20\}\s*\n",r"\\bracket{20}.\n",string)#行末无内容括号的处理 + for i in range(2): + string = re.sub(r"(\^|_)\{([0-9a-zA-Z])\}",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #删除一些无用的大括号 + for i in range(2): + string = re.sub(r"([0-9A-Z])\s+([0-9A-Z])",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #合并一些公式中的无效空格 + for i in range(2): + string = re.sub(r"([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #合并一些文本中的无效空格 + string = re.sub(r"([CP])(_[^_\^]{,5}\^)",lambda x:r"\mathrm{"+x.group(1)+"}"+x.group(2),string) #处理排列数和组合数字母的正体 + string = re.sub(r"([\^_])\{([-]{0,1})\\dfrac",lambda matchobj: matchobj.group(1) + "{" + matchobj.group(2) + "\\frac",string) + string = re.sub(r"ldots",r"cdots",string) #将ldots替换为cdots + string = re.sub(r"[\\{]*\\(begin|end)\{array\}(?:\{[rcl]*\}){0,1}",lambda matchobj: "\\" + matchobj.group(1) + "{cases}",string) #将分段函数的array环境改为cases环境 + string = re.sub(r"([\u4e00-\u9fa5\$])[\s]*\n\\item",lambda matchobj: matchobj.group(1)+"\\blank{50}.\n\\item",string) #给中文或公式结尾的题目最后一行加上填空的空格. + string = re.sub(r"(是|为|(?:=\$))\s*([,.;\n])",lambda matchobj: matchobj.group(1) + "\\blank{50}" + ("." if matchobj.group(2) == "\n" else "") + matchobj.group(2),string) #给行中的题目需要的地方加上空格 + string = re.sub(r"([\u4e00-\u9fa5\$])(?:\\bracket\{20\})*[\.]*[\s]*\n\\fourch",lambda matchobj: matchobj.group(1)+"\\bracket{20}.\n\\fourch",string) #给中文或公式结尾的题目最后一行加上选择题的括号. + string = re.sub(r"(%[^\n]*)\\blank\{50\}\.",lambda matchobj:matchobj.group(1),string) #注释行不加\blank{50} + string = re.sub(r"[\\\\]*\n(\(\d{1,2}\))(?:(?!\n)\s)*",lambda matchobj: "\\\\\n"+matchobj.group(1)+" ",string) #新一行的小题号回车前加上换行符 + string = RefineChineseComma(string) #改顿号 + return string + +def RefineChineseComma(string): #顿号如果在数学环境中, 则在两侧加上$符号 + CommaPositions = [match.start() for match in re.finditer("、",string)] + CommaPositions.reverse() + for pos in CommaPositions: + if string[:pos].count("$") % 2 == 1: + string = string[:pos].rstrip() + "$、$" + string[(pos+1):].lstrip() + return string if __name__ == "__main__":