新增ExtractIDList功能
This commit is contained in:
parent
72e95a0c42
commit
34b4d8a327
|
|
@ -1975,5 +1975,28 @@ def MultiplechoicetoBlankFilling(string_raw): #把多选题的题干和选项转
|
||||||
output = headstring + output[:-2]+"."
|
output = headstring + output[:-2]+"."
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def ExtractIDList(filepath): #从文件获取题目序号和ID的对应, 返回一个列表, 列表中的每个tuple都是(文件中的题目序号,六位题号)
|
||||||
|
if filepath[-4:] == ".pdf":
|
||||||
|
data = parsePDF(filepath)
|
||||||
|
idlist = re.findall(r"(\d+)\.[\s\n]*\((\d{6})\)",data)
|
||||||
|
else:
|
||||||
|
data = ReadTextFile(filepath)
|
||||||
|
enumerateBlocks = [item.strip() for item in re.findall(r"\\begin\{enumerate\}([\s\S]*?)\\end\{enumerate\}",data)]
|
||||||
|
for i in range(len(enumerateBlocks)):
|
||||||
|
if not enumerateBlocks[i].startswith("\\setcounter"):
|
||||||
|
enumerateBlocks[i] = "\\setcounter{enumi}{0}\n\n" + enumerateBlocks[i]
|
||||||
|
idlist = []
|
||||||
|
for item in enumerateBlocks:
|
||||||
|
enumilist = [i.span() for i in re.finditer(r"\\setcounter\{enumi\}\{(\d+)\}",item)]
|
||||||
|
enumilist.append((len(item),len(item)))
|
||||||
|
for i in range(len(enumilist)-1):
|
||||||
|
indstring = item[enumilist[i][0]:enumilist[i][1]]
|
||||||
|
ind = int(re.findall(r"\d+",indstring)[0])
|
||||||
|
bodylist = re.findall(r"\((\d{6})\)",item[enumilist[i][1]:enumilist[i+1][0]])
|
||||||
|
for id in bodylist:
|
||||||
|
ind += 1
|
||||||
|
idlist.append((str(ind),id))
|
||||||
|
return idlist
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("数据库工具, import用.")
|
print("数据库工具, import用.")
|
||||||
Reference in New Issue