import os,re,json """---设置题号列表所在文件, 仅第一行有用---""" id_list_file = "文本文件/题号筛选.txt" """---设置题号列表文件结束---""" """---设置要排除的题号所在的绝对路径---""" mainpath = r"C:/Users/Weiye/Documents/wwy sync/23届/" used_path_list = [ mainpath + "第一轮复习讲义/", mainpath + "上学期测验卷/", mainpath + "上学期周末卷/", mainpath + "赋能/", mainpath + "寒假作业/", mainpath + "下学期测验卷/", mainpath + "下学期周末卷/", mainpath + "第二轮复习讲义/", # mainpath + "简单题/", ] """---路径设置完毕---""" # 从字符串中获取题号列表 def extract_idlist(string): idlist = re.findall(r"\(([\d]{6})\)",string) return idlist # 生成题号列表 def generate_number_set(string,dict): string = re.sub(r"[\n\s]","",string) string_list = string.split(",") numbers_list = [] for s in string_list: if not ":" in s: numbers_list.append(s.zfill(6)) else: start,end = s.split(":") for ind in range(int(start),int(end)+1): numbers_list.append(str(ind).zfill(6)) return numbers_list #读取题库json文件并转化为字典 with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f: database = f.read() pro_dict = json.loads(database) # 生成首行题号列表并输出数量 with open(id_list_file,"r",encoding = "utf8") as f: data = f.read() + "\n" # id_list = re.findall(r"^([^\n]*)\n",data)[0].split(",") id_list = generate_number_set(data,pro_dict) print("首行题目数量: ",len(id_list)) # 生成已使用题号列表 used_id_list = [] for path in used_path_list: filelist = [f for f in os.listdir(path) if ".tex" in f] for f in filelist: with open(path+f,"r",encoding = "utf8") as texfile: texdata = texfile.read() used_id_list += extract_idlist(texdata) # 去除已使用题号 checked_id_list = [] ripped_id_list = [] for id in id_list: if not id in used_id_list: checked_id_list.append(id) else: ripped_id_list.append(id) print("剩余题目数量: ",len(checked_id_list)) # 写入文件 with open(id_list_file,"a",encoding = "utf8") as f: f.write("\n\n") f.write("未使用题号:\n") f.write(",".join(checked_id_list)) f.write("\n\n已使用题号:\n") f.write(",".join(ripped_id_list))