database tools的mathpix预处理中修改了一个$和括号的位置的小bug

This commit is contained in:
weiye.wang 2024-01-10 21:38:00 +08:00
parent 022ef924d3
commit 796aaed1ad
1 changed files with 27 additions and 0 deletions

View File

@ -1120,10 +1120,37 @@ def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串
string = MergeMathComma(string) #判断非数学环境中的","是否需要合并在一个数学环境中, 如果需要则执行合并
string = RefineCasesEnv(string) #美化cases环境
string = RefineChineseComma(string) #改顿号
string = RefineInterval(string) #改错误的dollars符号和括号的顺序
return string
def RefineInterval(string):
newstring = string
matches = re.finditer(r"\+\\infty\$\)",newstring) #先处理右括号
poslist = [(match.start(),match.end()) for match in matches]
poslist.reverse() #从后往前逐一更改
for s,e in poslist:
leftdollar = newstring[:s].rfind("$")
leftbracket = leftdollar-1
lb = newstring[leftbracket]
if lb in ["[","("]: #如果上一个$前面是括号, 就交换这一对相应的$符号和括号
print("$"+lb+newstring[leftbracket+2:e-2] + ")$")
newstring = newstring[:leftbracket] + "$"+lb+newstring[leftbracket+2:e-2] + ")$" + newstring[e:]
matches = re.finditer(r"\(\$-\\infty",newstring)
poslist = [(match.start(),match.end()) for match in matches]
poslist.reverse() #从后往前逐一更改
for s,e in poslist:
print(s,e)
rightdollar = newstring[e:].find("$")+e
rightbracket = rightdollar+1
rb = newstring[rightbracket]
if rb in ["]",")"]:
print("$("+newstring[s+2:rightbracket-1]+rb+"$")
newstring = newstring[:s]+"$("+newstring[s+2:rightbracket-1]+rb+"$"+newstring[rightbracket+1:]
newstring = newstring.replace("($-\\infty$, $+\\infty$)","$(-\\infty, +\\infty)$")
return newstring
def RefineChineseComma(string): #顿号如果在数学环境中, 则在两侧加上$符号
CommaPositions = [match.start() for match in re.finditer("",string)]
CommaPositions.reverse()