From e0a15bd556648ca53be4e1dc546e5c852323342c Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Mon, 17 Jul 2023 21:29:59 +0800 Subject: [PATCH] =?UTF-8?q?Mathpix=E6=96=87=E5=AD=97=E5=A4=84=E7=90=86?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E4=B8=AD=E6=96=B0=E5=A2=9E=E4=BA=86=E7=BE=8E?= =?UTF-8?q?=E5=8C=96cases=E7=8E=AF=E5=A2=83=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v2/database_tools.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index 9111ee8c..d1d0f706 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -986,6 +986,7 @@ def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串 string = re.sub(r"\(([^\(\)]*(?:\\in |=|\\ge|\\le|>|<)[^\(\)]*)\)\$",lambda matchobj: "$($" + matchobj.group(1) + "$)",string) #公式最后的范围陈述的括号放到公式环境外 string = SplitMathComma(string) #判断数学环境中的","是否需要用$ $分离, 如果需要则执行分离 string = MergeMathComma(string) #判断非数学环境中的","是否需要合并在一个数学环境中, 如果需要则执行合并 + string = RefineCasesEnv(string) #美化cases环境 string = RefineChineseComma(string) #改顿号 return string @@ -1054,13 +1055,49 @@ def MergeMathComma(string): #判断非数学环境中的","是否需要合并在 if len(SubstringOccurence(p1,locallmatter))-len(SubstringOccurence(p2,locallmatter)) == 1 and len(SubstringOccurence(p2,localrmatter))-len(SubstringOccurence(p1,localrmatter)) == 1: tomerge = True if tomerge: - rmatter = ", "+lmatter[ThePos[1]+1:]+rmatter + rmatter = ", "+lmatter[ThePos[1]:]+rmatter lmatter = lmatter[:ThePos[0]] else: rmatter = lmatter[ThePos[0]:]+rmatter lmatter = lmatter[:ThePos[0]] return lmatter+rmatter +def ReverseMatchBrackets(string):#从最后开始寻找第一组匹配的括号, 返回左括号位置与右括号位置组成的数对 + if not ")" in string: + return -1 + else: + endpos = string.rfind(")") + lmatter = string[:endpos] + startpos = -1 + for i in range(len(lmatter)-1,-1,-1): + templmatter = lmatter[i:] + if templmatter.count("(")-templmatter.count(")") == 1: + startpos = i + break + return (startpos,endpos) + +def RefineCasesEnv(string): # 美化cases环境 + CasesPos = [(item.start(1),item.end(1)) for item in re.finditer(r"\\begin\{cases\}(.*?)\\end\{cases\}",string, re.DOTALL)] #找到所有的cases环境 + CasesPos.reverse() + for start,end in CasesPos: + lmatter = string[:start] + rmatter = string[end:] + content = string[start:end] + lines = [item.strip() for item in content.split(r"\\")] + newlist = [] + for line in lines: + newline = line + if line[-1] == ")": + lbracket,rbracket = ReverseMatchBrackets(line) + if re.findall(r"(?:=|<|>|\\ge|\\le|\\in)",line[lbracket:rbracket]) != []: + newline = line[:lbracket] + ", " + line[lbracket+1:rbracket] + "," #用逗号代替表示范围的括号 + newline = re.sub(r",",",&",newline) + newlist.append(newline) + newcontent = r"\\".join(newlist) + newcontent = re.sub(r",[\s]*&[\s]*\\",r",\\",newcontent) + newcontent = re.sub(r",[\s]*&[\s]*$",",",newcontent) #给逗号添加&分栏符, 并去除无效的& + string = lmatter + newcontent + rmatter + return string #返回处理后的字符串 def SubstringOccurence(regex,string): #生成regex在string中出现的所有位置 poslist = [item.start() for item in re.finditer(regex,string)]