diff --git a/工具/database_tools.py b/工具/database_tools.py index b45c79ca..b7f1ab5f 100644 --- a/工具/database_tools.py +++ b/工具/database_tools.py @@ -101,5 +101,45 @@ def parsePDF(filePath): #提取pdf文件中的字符 text += page.get_text() + "\n" return text +def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串 + if filePath[-4:] == ".txt" or filePath[-4:] == ".tex": + with open(filePath,"r",encoding = "u8") as f: + data = f.read() + elif filePath[-4:] == ".pdf": + data = parsePDF(filePath) + else: + return "格式不正确" + ids = re.findall(r"\((\d{6})\)",data) + return generate_exp(ids) + + +def spareIDs(filename): #返回空闲题号 + pro_dict = load_dict(filename) + idlist = list(pro_dict.keys()) + used_str = generate_exp(idlist) + used_list = used_str.split(",") + output = "" + for group in range(len(used_list)-1): + output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n" + output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999") + return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间 + +def parse_usage(datastring): #对单个usages中的项的结果进行分词 + datastring = re.sub(r"\s+","\t",datastring.strip()) + datalist = datastring.split("\t") + date = "" + classname = "" + diff = [] + for item in datalist: + if not "." in item and not "高" in item and not "班" in item: + date = item + elif "高" in item or "班" in item: + classname = item + else: + diff.append(item) + return({"date":date,"classname":classname,"difficulty":diff}) #返回一个字典, "date"表示日期, "classname"表示班级, "difficultiy"表示难度列表 + + + if __name__ == "__main__": print("数据库工具, import用.") \ No newline at end of file