丰富database tools

This commit is contained in:
weiye.wang 2023-06-22 20:10:24 +08:00
parent f83b9307d4
commit 1b93b7c314
1 changed files with 40 additions and 0 deletions

View File

@ -101,5 +101,45 @@ def parsePDF(filePath): #提取pdf文件中的字符
text += page.get_text() + "\n"
return text
def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串
if filePath[-4:] == ".txt" or filePath[-4:] == ".tex":
with open(filePath,"r",encoding = "u8") as f:
data = f.read()
elif filePath[-4:] == ".pdf":
data = parsePDF(filePath)
else:
return "格式不正确"
ids = re.findall(r"\((\d{6})\)",data)
return generate_exp(ids)
def spareIDs(filename): #返回空闲题号
pro_dict = load_dict(filename)
idlist = list(pro_dict.keys())
used_str = generate_exp(idlist)
used_list = used_str.split(",")
output = ""
for group in range(len(used_list)-1):
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n"
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999")
return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间
def parse_usage(datastring): #对单个usages中的项的结果进行分词
datastring = re.sub(r"\s+","\t",datastring.strip())
datalist = datastring.split("\t")
date = ""
classname = ""
diff = []
for item in datalist:
if not "." in item and not "" in item and not "" in item:
date = item
elif "" in item or "" in item:
classname = item
else:
diff.append(item)
return({"date":date,"classname":classname,"difficulty":diff}) #返回一个字典, "date"表示日期, "classname"表示班级, "difficultiy"表示难度列表
if __name__ == "__main__":
print("数据库工具, import用.")