丰富database tools
This commit is contained in:
parent
f83b9307d4
commit
1b93b7c314
|
|
@ -101,5 +101,45 @@ def parsePDF(filePath): #提取pdf文件中的字符
|
||||||
text += page.get_text() + "\n"
|
text += page.get_text() + "\n"
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串
|
||||||
|
if filePath[-4:] == ".txt" or filePath[-4:] == ".tex":
|
||||||
|
with open(filePath,"r",encoding = "u8") as f:
|
||||||
|
data = f.read()
|
||||||
|
elif filePath[-4:] == ".pdf":
|
||||||
|
data = parsePDF(filePath)
|
||||||
|
else:
|
||||||
|
return "格式不正确"
|
||||||
|
ids = re.findall(r"\((\d{6})\)",data)
|
||||||
|
return generate_exp(ids)
|
||||||
|
|
||||||
|
|
||||||
|
def spareIDs(filename): #返回空闲题号
|
||||||
|
pro_dict = load_dict(filename)
|
||||||
|
idlist = list(pro_dict.keys())
|
||||||
|
used_str = generate_exp(idlist)
|
||||||
|
used_list = used_str.split(",")
|
||||||
|
output = ""
|
||||||
|
for group in range(len(used_list)-1):
|
||||||
|
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n"
|
||||||
|
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999")
|
||||||
|
return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间
|
||||||
|
|
||||||
|
def parse_usage(datastring): #对单个usages中的项的结果进行分词
|
||||||
|
datastring = re.sub(r"\s+","\t",datastring.strip())
|
||||||
|
datalist = datastring.split("\t")
|
||||||
|
date = ""
|
||||||
|
classname = ""
|
||||||
|
diff = []
|
||||||
|
for item in datalist:
|
||||||
|
if not "." in item and not "高" in item and not "班" in item:
|
||||||
|
date = item
|
||||||
|
elif "高" in item or "班" in item:
|
||||||
|
classname = item
|
||||||
|
else:
|
||||||
|
diff.append(item)
|
||||||
|
return({"date":date,"classname":classname,"difficulty":diff}) #返回一个字典, "date"表示日期, "classname"表示班级, "difficultiy"表示难度列表
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("数据库工具, import用.")
|
print("数据库工具, import用.")
|
||||||
Reference in New Issue