丰富database tools
This commit is contained in:
parent
f83b9307d4
commit
1b93b7c314
|
|
@ -101,5 +101,45 @@ def parsePDF(filePath): #提取pdf文件中的字符
|
|||
text += page.get_text() + "\n"
|
||||
return text
|
||||
|
||||
def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串
|
||||
if filePath[-4:] == ".txt" or filePath[-4:] == ".tex":
|
||||
with open(filePath,"r",encoding = "u8") as f:
|
||||
data = f.read()
|
||||
elif filePath[-4:] == ".pdf":
|
||||
data = parsePDF(filePath)
|
||||
else:
|
||||
return "格式不正确"
|
||||
ids = re.findall(r"\((\d{6})\)",data)
|
||||
return generate_exp(ids)
|
||||
|
||||
|
||||
def spareIDs(filename): #返回空闲题号
|
||||
pro_dict = load_dict(filename)
|
||||
idlist = list(pro_dict.keys())
|
||||
used_str = generate_exp(idlist)
|
||||
used_list = used_str.split(",")
|
||||
output = ""
|
||||
for group in range(len(used_list)-1):
|
||||
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n"
|
||||
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999")
|
||||
return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间
|
||||
|
||||
def parse_usage(datastring): #对单个usages中的项的结果进行分词
|
||||
datastring = re.sub(r"\s+","\t",datastring.strip())
|
||||
datalist = datastring.split("\t")
|
||||
date = ""
|
||||
classname = ""
|
||||
diff = []
|
||||
for item in datalist:
|
||||
if not "." in item and not "高" in item and not "班" in item:
|
||||
date = item
|
||||
elif "高" in item or "班" in item:
|
||||
classname = item
|
||||
else:
|
||||
diff.append(item)
|
||||
return({"date":date,"classname":classname,"difficulty":diff}) #返回一个字典, "date"表示日期, "classname"表示班级, "difficultiy"表示难度列表
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("数据库工具, import用.")
|
||||
Reference in New Issue