diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index 88ab7d99..26d62482 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -1975,5 +1975,28 @@ def MultiplechoicetoBlankFilling(string_raw): #把多选题的题干和选项转 output = headstring + output[:-2]+"." return output +def ExtractIDList(filepath): #从文件获取题目序号和ID的对应, 返回一个列表, 列表中的每个tuple都是(文件中的题目序号,六位题号) + if filepath[-4:] == ".pdf": + data = parsePDF(filepath) + idlist = re.findall(r"(\d+)\.[\s\n]*\((\d{6})\)",data) + else: + data = ReadTextFile(filepath) + enumerateBlocks = [item.strip() for item in re.findall(r"\\begin\{enumerate\}([\s\S]*?)\\end\{enumerate\}",data)] + for i in range(len(enumerateBlocks)): + if not enumerateBlocks[i].startswith("\\setcounter"): + enumerateBlocks[i] = "\\setcounter{enumi}{0}\n\n" + enumerateBlocks[i] + idlist = [] + for item in enumerateBlocks: + enumilist = [i.span() for i in re.finditer(r"\\setcounter\{enumi\}\{(\d+)\}",item)] + enumilist.append((len(item),len(item))) + for i in range(len(enumilist)-1): + indstring = item[enumilist[i][0]:enumilist[i][1]] + ind = int(re.findall(r"\d+",indstring)[0]) + bodylist = re.findall(r"\((\d{6})\)",item[enumilist[i][1]:enumilist[i+1][0]]) + for id in bodylist: + ind += 1 + idlist.append((str(ind),id)) + return idlist + if __name__ == "__main__": print("数据库工具, import用.") \ No newline at end of file