From 95d183593ad93a4be7b4688064b5a492b414cae3 Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Sun, 3 Mar 2024 13:56:29 +0800 Subject: [PATCH] =?UTF-8?q?database=5Ftools=E4=B8=AD=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E5=85=A8=E8=A7=92=E6=A0=87=E7=82=B9=E8=BD=AC=E5=8D=8A=E8=A7=92?= =?UTF-8?q?=E6=A0=87=E7=82=B9=E7=9A=84=E5=8A=9F=E8=83=BDRefinePuctuations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v2/database_tools.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index a5ed0848..fae59c1f 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -1385,6 +1385,13 @@ def itemizeProblems(string): #将题号替换为\item string = "\n".join(itemed_list) return string +def RefinePuctuations(raw_string): + puctuationsfulltosemi = {" ": " ","。": ". ",".": ". ",",": ", ",":": ": ",";": "; ","(": "(",")": ")","?": "? ","“": "``","”": "''", "【": "[", "】": "]"} + string = raw_string.strip() + for s in puctuationsfulltosemi: + string = re.sub(s,puctuationsfulltosemi[s],string) #将部分全角标记替换为半角 + return string + def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串 puctuationsfulltosemi = {" ": " ","。": ". ",".": ". ",",": ", ",":": ": ",";": "; ","(": "(",")": ")","?": "? ","“": "``","”": "''", "【": "[", "】": "]"} replacestrings = {r"\\overparen": r"\\overset\\frown", "eqslant": "eq", r"\\vec": r"\\overrightarrow ", r"\\bar": r"\\overline", r"\\lim": r"\\displaystyle\\lim", r"\\sum":r"\\displaystyle\\sum", r"\\prod":r"\\displaystyle\\prod", r"\\mid":"|", r"\^\{\\prime\}":"'",r"e\^":r"\\mathrm{e}^",r"/\s*/":r"\\parallel "}