import re filename = r"D:\temp\g1ans.txt" with open(filename,"r",encoding = "u8") as f: raw_data = f.read() data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",raw_data)[0] #预处理 data = data.replace("\\newpage","").replace("\\item","").replace("\\end{enumerate}","").replace("\\section","") #读取题目 problems = re.findall(r"([\s\S]*?)",data) IDandAnswer = {} for p in problems: id = re.findall(r"\((\d{6})\)",p)[0] # print(id) pos = re.search(r"\\textcolor\{red\}",p) while pos != None: p = p[pos.span()[1]:] pos = re.search(r"\\textcolor\{red\}",p) answer = p.strip()[1:-2].strip() print(id,answer) if not "暂无答案" in answer: IDandAnswer[id] = answer output = "ans\n\n" for id in IDandAnswer: output += id + "\n" + IDandAnswer[id] + "\n\n\n" with open("文本文件/metadata.txt","w",encoding="u8") as f: f.write(output)