mathdeptv2/工具v3/database_tools_2.py

import json,re,os,Levenshtein,fitz,time,sys,subprocess
import pandas as pd
import numpy as np
import pyperclip
import mysql.connector


BuildFullScheme = {
            "输出路径": "临时文件",
            "教师版": True,
            "字段显示设置": {
                "题后空间": True,
                "课时目标": True,
                "题目标签": True,
                "答案": True,
                "解答与提示": True,
                "使用记录": [3,-1],
                "使用记录说明": "[a,b]表示显示最好的a个和最差b个, 有-2表示不显示, 无-2但有-1表示全部显示",
                "来源": True,
                "备注": True,
                "届别": []
            }
        }


def GetDate(): #获得当前日期
    currentdate = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)
    return currentdate #返回当前日期yyyymmdd

def GetTime(): #获得当前时间
    t = time.localtime()
    currenttime = f"{t.tm_hour:0>2}{t.tm_min:0>2}{t.tm_sec:0>2}"
    return currenttime #返回当前时间hhmmss

def datestrtotimestamp(date_str): # 将yyyymmdd的时间转为时间戳timestamp(以分钟为单位的整数)
    date_obj = time.strptime(date_str, "%Y%m%d")
    timestamp = int(time.mktime(date_obj))
    return timestamp

def ReadTextFile(filepath): #读取文本格式的文件
    with open(filepath,"r",encoding="u8") as f:
        data = f.read()
    return data #返回文本格式文件的内容

def SaveTextFile(data,filepath): #写入文本格式的文件
    pathname = os.path.dirname(filepath)
    if not os.path.exists(pathname):
        makedir(pathname)
    with open(filepath,"w",encoding="u8") as f:
        f.write(data)
    return filepath #返回文件名

def AppendTextFile(string,filepath): #在文本文件后添加内容
    with open(filepath,"a",encoding = "u8") as f:
        f.write(string.strip()+"\n\n")
    return filepath #返回文件名

def SortDict(adict): #按字典项顺序排序字典
    return dict(sorted(adict.items())) #返回排序后的字典


#读取存储json数据库相关(不限于题号数据库)

def load_dict(filename): #根据filename读取json数据库并转化为python字典
    with open(filename,"r",encoding = "u8") as f:
        adict = json.loads(f.read())
    return adict #返回python字典

def save_dict(adict,filepath): #将adict字典转化为json文件并保存至filename文件中
    try:
        jsondata = json.dumps(adict,indent=4,ensure_ascii=False)
        SaveTextFile(jsondata,filepath)
        return filepath # 成功则返回文件路径
    except:
        print("保存 %s 文件失败"%filepath)
        return 1 # 失败则返回1


def pre_treating(string): #删除字符串中对比较无用的字符, 以供比较
    string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
    string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
    string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
    string = re.sub(r"[\n\t]","",string)
    string = re.sub(r"(displaystyle)|(overrightarrow)|(overline)","",string)
    string = re.sub(r"[,\.:;?]","",string)
    return string #返回处理后的字符串

def treat_dict(p_dict): #对整个题库字典中的内容部分进行预处理，删除无用字符
    treated_dict = {}
    for id in p_dict:
        treated_dict[id] = {}
        treated_dict[id]["content"] = pre_treating(p_dict[id]["content"])
        treated_dict[id]["same"] = p_dict[id]["same"]
    return treated_dict #返回处理后的字典, 含内容字段及相同题目字段

def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程度最大的题目（除外列表之外的部分）
    maxsim = -1
    argmaxsim = "000000"
    for id in adict:
        if not id in excludelist and not "OBSOLETE" in adict[id]["content"]:
            simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"])
            if simrate > maxsim:
                maxsim = simrate
                argmaxsim = id
    return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号

def stringmaxsim(string,adict,listlength):
    # maxsim = -1
    # argmaxsim = "000000"
    maxsimlist = []
    string = pre_treating(string)
    for id in adict:
        if not "OBSOLETE" in adict[id]["content"]:
            simrate = Levenshtein.jaro(adict[id]["content"],string)
            # if simrate > maxsim:
            #     maxsim = simrate
            #     argmaxsim = id
            if len(maxsimlist) < listlength:
                maxsimlist.append((id,simrate))
            elif simrate > maxsimlist[-1][1]:
                maxsimlist = maxsimlist[:-1]
                maxsimlist.append((id,simrate))
                maxsimlist = sorted(maxsimlist,key = lambda x: x[1], reverse = True)
    return maxsimlist #返回最大关联的listlength个题号及关联系数

def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知题块startingids(冒号和逗号连接的字符串)关联程度超过threshold的题目组, 超过max_size即停止
    id_list = generate_number_set(startingids)
    output_list = []
    treated_dict = treat_dict(prodict)
    continue_flag = True
    while continue_flag:
        appending_id_list = []
        for oldid in id_list:
            for newid in [id for id in prodict if not id in id_list and not id in output_list and not "OBSOLETE" in treated_dict[id]["content"]]:
                simrate = Levenshtein.jaro(treated_dict[oldid]["content"],treated_dict[newid]["content"])
                if simrate >= threshold:
                    appending_id_list.append(newid)
        output_list = output_list + id_list.copy()
        id_list = appending_id_list.copy()
        if len(appending_id_list) == 0 or len(output_list)>max_size:
            continue_flag = False
    return generate_exp(sorted(output_list)) #返回冒号和逗号连接的题目组字符串


def generate_problem_series(startingid,length,adict): #在adict字典里返回从startingid开始的一系列题号, 每一题都是与上一题的关联程度最大的
    excludelist = [startingid]
    currentid = startingid
    for i in range(length):
        maxsim,currentid = detectmaxsim(currentid,excludelist,adict)
        excludelist.append(currentid)
    return ",".join(excludelist) #返回按顺序的题号列表


def generate_number_set(string,*thedict): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"]
#可变参数*dict如果存在, 将只生成dict的keys中包含的题号列表
    string = re.sub(r"[\n\s]","",string).strip()
    while not string[-1] in "0123456789":
        string = string[:-1]
    string = RefinePunctuations(string)
    string_list = string.split(",")
    numbers_list = []
    for s in string_list:
        if not ":" in s:
            numbers_list.append(s.zfill(6))
        else:
            start,end = s.split(":")
            for ind in range(int(start),int(end)+1):
                numbers_list.append(str(ind).zfill(6))
    if len(thedict) == 0:
        return numbers_list #返回六位题号列表
    elif len(thedict) == 1 and type(thedict[0]) == dict:
        numbers_list = [id for id in numbers_list if id in thedict[0]]
        return numbers_list #返回字典中存在的六位题号列表
    else:
        return "输入参数有误"

def generate_classid(string): #返回班级列表
    string = re.sub(r"[\n\s]","",string).strip()
    while not string[-1] in "0123456789":
        string = string[:-1]
    string = RefinePunctuations(string)
    string_list = string.split(",")
    numbers_list = []
    for s in string_list:
        if not ":" in s:
            numbers_list.append(s.zfill(2)+"班")
        else:
            start,end = s.split(":")
            for ind in range(int(start),int(end)+1):
                numbers_list.append(str(ind).zfill(2)+"班")
    return numbers_list #返回2位班级号列表


def generate_id_set(string,*thedict): #除了生成题号列表外, 还能根据首字母生成基础知识编号列表或课时目标列表
    if re.findall(r"[BXK]",string) == []:
        if thedict == ():
            return generate_number_set(string)
        else:
            return generate_number_set(string,thedict)
    else:
        if string[0] == "B":
            string = re.sub("B","",string)
            return ["B"+i[-5:] for i in generate_number_set(string)]
        elif string[0] == "K":
            suffix = string.strip()[-1]
            string = re.sub("[KBX]","",string)
            return ["K"+i.zfill(7)+suffix for i in generate_number_set(string)]

def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号"
    if not len(id_list) == 0:
        exp_list = []
        start = id_list[0]
        current = start
        end = start
        for id in id_list[1:]:
            # print(id,current)
            if int(id)-1 == int(current):
                current = id
                end = id
            else:
                if not start == end:
                    exp_list.append('"'+start+":"+end+'"')
                else:
                    exp_list.append('"'+start+'"')
                start = id
                current = id
                end = id
        if not start == end:
            exp_list.append('"'+start+":"+end+'"')
        else:
            exp_list.append('"'+start+'"')
        exp_str = ",".join(exp_list).replace('"',"")
    else:
        exp_str = "无有效题号"
    return exp_str #返回含有":"或","的题号字符串

def parsePDF(filePath): #提取pdf文件中的字符
    with fitz.open(filePath) as doc:
        text = ""
        for page in doc.pages():
            text += page.get_text() + "\n"
    return text

def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串
    if filePath[-4:] == ".txt" or filePath[-4:] == ".tex":
        with open(filePath,"r",encoding = "u8") as f:
            data = f.read()
    elif filePath[-4:] == ".pdf":
        data = parsePDF(filePath)
    else:
        return "格式不正确"
    ids = re.findall(r"\((\d{6})\)",data)
    return generate_exp(ids)


def spareIDs(host,port,username,password): #返回空闲题号
    mydb = mysql.connector.connect(host = host, port = port, user=username, password=password, database = "tikutest")
    mycursor = mydb.cursor()
    mycursor.execute("SELECT ID FROM problems;")
    idlist = [ret[0] for ret in mycursor.fetchall()]
    used_str = generate_exp(idlist)
    used_list = used_str.split(",")
    output = ""
    for group in range(len(used_list)-1):
        output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n"
    output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999")
    mydb.close()
    return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间


def NextSpareID(num,adict): #返回adict中下一个空闲的题号
    num = int(num)
    while str(num).zfill(6) in adict:
        num += 1
    return num

def NextSpareIDBlock(num,adict): #返回adict中下一个空闲的题号块
    start = NextSpareID(num,adict)
    larger_ID_list = [int(id) for id in adict.keys() if int(id)>start]
    if larger_ID_list == []:
        end = 999999
    else:
        end = min(larger_ID_list) - 1
    return str(start)+":"+str(end)

def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀
    try:
        data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0]
    except:
        pass
    data = re.sub(r"\n{2,}","\n",data)
    data = re.sub(r"\\item",r"\\enditem\\item",data)
    data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息
    problempositions = []
    for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data):
        problempositions.append(item.regs[1]) #确定题目内容所在位置
    problem_list = []
    for pos in problempositions:
        content = data[pos[0]:pos[1]].strip()
        content = re.sub(r"\n\%[\s\S]*$","",content) #题目内容
        content = re.sub(r"\\\\$","",content) # 删去题目最后一行处可能存在的\\
        subdata = data[:pos[0]] #开始寻找出处中缀
        suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata)
        if len(suflist) == 0:
            suffix = ""
        else:
            suffix = suflist[-1].replace("%","").strip()
        problem_list.append((content,suffix))
    return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组


def GenerateProblemListFromString2024(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀, item后面的方括号放与题库的交互信息(如[rep3214]表示不添加, 用003214代替; [s2521;r354;u10021,20024]表示和2521相同, 和354相关, 和10021,20024无关), 返回三元组(题目内容, 题目来源前缀, 交互信息列表)的字典
    try:
        data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0]
    except:
        pass
    data = re.sub(r"\n{2,}","\n",data)
    data = re.sub(r"\\definecolor[^\n]*\n","\n",data)
    data = re.sub(r"\\begin\{tcolorbox\}[\s\S]*?\\end\{tcolorbox\}","\n",data)
    data = re.sub(r"\\item",r"\\enditem\\item",data)
    data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息
    problempositions = []
    for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data):
        problempositions.append(item.regs[1]) #确定题目内容所在位置
    problem_list = []
    for pos in problempositions:
        content_raw = data[pos[0]:pos[1]].strip()
        content_raw = re.sub(r"\n\%[\s\S]*$","",content_raw) #题目内容
        content_raw = re.sub(r"\\\\$","",content_raw) # 删去题目最后一行处可能存在的\\
        content_raw = content_raw.strip() # 删去前后多余的空格
        meta = {}
        if not content_raw[0] == "[": # 根据方括号内的内容生成交互信息, 这是无方括号的内容, 无meta
            content = content_raw
        else:
            same_id_list = []
            related_id_list = []
            unrelated_id_list = []
            content = re.sub(r"^\[[A-Za-z\d,\s]*?\]","",content_raw).strip()
            metaraw = re.findall(r"\[.*\]",content_raw)[0]
            metaraw = re.sub(r"[\[\]]","",metaraw)
            metalist = metaraw.split(",")
            for metaitem in metalist:
                metaitem = metaitem.upper()
                if metaitem.startswith("REP"):
                    meta = {"rep":metaitem[3:].zfill(6)}
                elif metaitem.startswith("S"):
                    same_id_list.append(metaitem[1:].zfill(6))
                elif metaitem.startswith("R"):
                    related_id_list.append(metaitem[1:].zfill(6))
                elif metaitem.startswith("U"):
                    unrelated_id_list.append(metaitem[1:].zfill(6))
            if not "rep" in meta:
                meta = {"same":same_id_list,"related":related_id_list,"unrelated":unrelated_id_list}
        subdata = data[:pos[0]] #开始寻找出处中缀
        suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata)
        if len(suflist) == 0:
            suffix = ""
        else:
            suffix = suflist[-1].replace("%","").strip()
        problem_list.append((content,suffix,meta))
    return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 和 交互信息字典 组成的元组


def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目
    NewProblem = problem.copy()
    for field in NewProblem:
        if type(NewProblem[field]) == str:
            NewProblem[field] = ""
        elif type(NewProblem[field]) == list:
            NewProblem[field] = []
        elif type(NewProblem[field]) == int or type(NewProblem[field]) == float:
            NewProblem[field] = -1
    return NewProblem #返回一个空题目的字典, ID和内容待赋值

def CreateNewProblem(id,content,origin,dict,editor): # 构建一道新题目的字典
    NewProblem = CreateEmptyProblem(dict["000001"])
    NewProblem["id"] = str(id).zfill(6)
    NewProblem["content"] = content
    NewProblem["origin"] = origin
    NewProblem["edit"] = [editor]
    return NewProblem # 返回一道新题目的字典, 已赋新的ID, 内容, 来源和编辑者


def AddProblemstoDict(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典
    id = int(startingid)
    currentsuffix = problems[0][1]
    problemindex = 0
    for p_and_suffix in problems:
        p, suffix = p_and_suffix
        pid = str(id).zfill(6)
        if pid in thedict:
            print("ID %s 已被使用."%pid)
            return 1
        else:
            if suffix == currentsuffix:
                problemindex += 1
            else:
                problemindex = 1
                currentsuffix = suffix
            origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == ""  else str(problemindex))
            newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor)
            if "blank" in p:
                newproblem["genre"] = "填空题"
                newproblem["space"] = ""
            elif "bracket" in p:
                newproblem["genre"] = "选择题"
                newproblem["space"] = ""
            else:
                newproblem["genre"] = "解答题"
                newproblem["space"] = "4em"
            thedict[pid] = newproblem
            maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict)
            print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p))
            id += 1
    return 0


def AddProblemstoDict2024(startingid,raworigin,problems,editor,indexed,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典, 返回题号列表(包括用老题号替代的题目)
    idlist = []
    id = int(startingid)
    currentsuffix = problems[0][1]
    problemindex = 0
    for p_and_suffix_and_meta in problems:
        p, suffix, meta = p_and_suffix_and_meta
        pid = str(id).zfill(6)
        if pid in thedict:
            print("ID %s 已被使用."%pid)
            return 1
        else:
            if suffix == currentsuffix:
                problemindex += 1
            else:
                problemindex = 1
                currentsuffix = suffix
            if indexed:
                origin = {"来源": raworigin + suffix, "题号": problemindex}
            else:
                origin = {"来源": raworigin + suffix}
            if not "rep" in meta:
                newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor)
                if "blank" in p:
                    newproblem["genre"] = "填空题"
                    newproblem["space"] = ""
                elif "bracket" in p:
                    newproblem["genre"] = "选择题"
                    newproblem["space"] = ""
                else:
                    newproblem["genre"] = "解答题"
                    newproblem["space"] = "4em"
                if "same" in meta:
                    for sid in meta["same"]:
                        thedict[sid]["same"].append(pid)
                        newproblem["same"].append(sid)
                if "related" in meta:
                    for sid in meta["related"]:
                        thedict[sid]["related"].append(pid)
                        newproblem["related"].append(sid)
                if "unrelated" in meta:
                    for sid in meta["unrelated"]:
                        thedict[sid]["unrelated"].append(pid)
                        newproblem["unrelated"].append(sid)
                thedict[pid] = newproblem
                maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict)
                print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p))
                id += 1
                idlist.append(pid)
            else:
                idlist.append(meta["rep"])
                print(f"该题 {idlist[-1]} {p} 已在题库中, 不必收录.")
    return idlist

def CreateIDLinks(old_id_list,new_id_list,*thedict): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项
    if len(thedict) == 1 and type(thedict[0]) == dict:
        new_id_list = [id for id in new_id_list if not id in thedict[0]]
    if len(old_id_list)>len(new_id_list):
        return "新ID个数不足."
    else:
        id_links = []
        for i in range(len(old_id_list)):
            id_links.append((old_id_list[i],new_id_list[i]))
        return id_links # 返回id联系, 每个元组表示一对id, 前者是旧id, 后者是新id


def CreateRelatedProblems(links,thedict,filepath,editor): # 根据links关联生成待编辑的新题目字典, 等待编辑修改
    try:
        new_dict = {}
        for item in links:
            old_id,new_id = item
            new_dict[old_id] = thedict[old_id].copy()
            new_dict[old_id]["id"] = new_id + "待替换"
            new_dict[old_id]["content"] = new_dict[old_id]["content"]
            new_dict[old_id]["usages"] = []
            new_dict[old_id]["same"] = []
            new_dict[old_id]["unrelated"] = []
            new_dict[old_id]["edit"] = new_dict[old_id]["edit"].copy() + [GetDate()+"\t"+editor]
            new_dict[old_id]["origin"] = {"来源": "改编题目", "前序": old_id}
        save_dict(new_dict,filepath)
    except:
        return 1 #异常返回1
    return 0  #正常返回0

def ImportRelatedProblems(new_json,main_json): # 导入编辑过的关联题目json文件到主数据库
    pro_dict = load_dict(main_json)
    new_dict = load_dict(new_json)
    edited = []
    for id in new_dict:
        new_id = new_dict[id]["id"].replace("待替换","") #新题号后需要跟"待替换"字样
        if new_id in pro_dict:
            print("题号有重复")
            return 1 #异常返回1
        else:
            edited.append(new_id)
            pro_dict[new_id] = new_dict[id].copy()
            pro_dict[new_id]["id"] = new_id
            pro_dict[id]["related"] += [new_id]
            pro_dict[new_id]["related"] += [id]
            p = pro_dict[new_id]["content"]
            if "blank" in p:
                pro_dict[new_id]["genre"] = "填空题"
                pro_dict[new_id]["genre"] = ""
            elif "bracket" in p:
                pro_dict[new_id]["genre"] = "选择题"
                pro_dict[new_id]["genre"] = ""
            else:
                pro_dict[new_id]["genre"] = "解答题"
                pro_dict[new_id]["genre"] = "4em"
            print("导入关联题目 %s -> %s 信息成功."%(id,new_id))
    save_dict(SortDict(pro_dict),main_json) #保存至目标pro_dict文件
    return generate_exp(edited) #正常返回新题号list


def strip_suffix(originalString, suf_words_list): # 字符串去除指定后缀
    for sw in suf_words_list:
        output = re.sub(sw+r"[\S]*$","",originalString)
    return(output) # 返回原字符串中截去suf_words_list及之后字符的部分

def get_striped_origin(pro_dict,id,suf_words_list): # 题目来源去除指定后缀
    return strip_suffix(pro_dict[id]["origin"],suf_words_list) # 返回去除指定后缀后的题目来源


def SeperateFirstLine(string): # 切割一个含有换行的字符串
    thelist = string.split("\n")
    firstline = thelist[0].strip()
    contents = "\n".join([lines.strip() for lines in thelist[1:]])
    return (firstline,contents) # 返回第一行, 其余行形成的二元组, 每一行前后的空格都被删去

def ObtainDatatoModify(metadatafilepath,fields_dict): #从metadata文件中获取需要修改的综合信息, metadata文件用双回车区分项, 单行的项表示此后的字段, 不小于两行的项中第一行是题目id, 第二行起是要修改的内容.
#fieldsdictpath是字段信息数据库文件路径
    data = ReadTextFile(metadatafilepath)
    datalines = data.split("\n")
    data = ""
    for l in datalines:
        if l.strip() in fields_dict:
            data += l + "\n\n"
        else:
            data += l + "\n"
    data = re.sub(r"\n[\s\n]*\n","\n\n",data.strip()) #去除一些无意义的空格和多余的回车
    datalist = data.split("\n\n")
    to_modify_list = []
    currentfield = "NotAField"
    for line in datalist:
        if line.strip() in fields_dict:
            currentfield = line.strip()
        elif not "\n" in line:
            currentfield = "NotAField"
        else:
            id,content = SeperateFirstLine(line)
            to_modify_list.append((currentfield,str(id).zfill(6),content))
    return to_modify_list #返回一个列表, 每一项是一个三元组, 三项依次为字段, 题号, 要修改的内容

def FloatToInt(string): #从字符串返回浮点数，如果值非常接近整数则返回该整数
    f = float(string)
    if abs(f-round(f))<0.01:
        f = round(f)
    return f #返回浮点数或整数

def OverwriteData(prodict,fieldsdict,field_id_and_content): #用覆盖方式修改题目某字段信息
    field,id,content = field_id_and_content
    fieldType = fieldsdict[field]["FieldType"]
    if not id in prodict:
        print("题号 %s 并不在数据库中"%id)
    else:
        if fieldType == "str":
            prodict[id][field] = content
        elif fieldType == "int" or fieldType == "float":
            content = FloatToInt(content)
            prodict[id][field] = content
        print("已覆盖 %s 的 %s 字段, 内容为 %s"%(id,field,content))
    return (id,field,content) #返回三元组: 题号, 字段, 覆盖内容

def AppendData(prodict,fieldsdict,field_id_and_content): #用添加方式修改题目某字段信息
    field,id,content = field_id_and_content
    fieldType = fieldsdict[field]["FieldType"]
    if not id in prodict:
        print("题号 %s 并不在数据库中"%id)
    else:
        if fieldType == "str":
            if content.strip() in prodict[id][field]:
                print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,content))
            elif prodict[id][field].strip() == "":
                prodict[id][field] = content.strip()
                print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,content))
            else:
                prodict[id][field] = (prodict[id][field].strip() + "\\\\\n" + content).strip()
                print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,content))
        elif fieldType == "list":
            lines = [line.strip() for line in content.split("\n")]
            for line in lines:
                if line in prodict[id][field]:
                    print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,line))
                else:
                    prodict[id][field] = prodict[id][field].copy() + [line]
                    print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,line))
    return (id,field,content) #返回三元组: 题号, 字段, 内容

def AppendMutualData(prodict,field_id_and_content): #添加两个id之间的same, related, unrelated关联
    field,id,content = field_id_and_content
    lines = [str(line).zfill(6) for line in content.split("\n")]
    for id2 in lines:
        if not id in prodict:
            print("题号 %s 并不在数据库中"%id)
        elif not id2 in prodict:
            print("题号 %s 并不在数据库中"%id2)
        else:
            if id2 in prodict[id][field]:
                print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,id2))
            else:
                prodict[id][field] = prodict[id][field] + [id2]
                print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,id2))
            if id in prodict[id2][field]:
                print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id2,field,id))
            else:
                prodict[id2][field] = prodict[id2][field] + [id]
                print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id2,field,id))
    return (id,field,content) #返回三元组: 题号, 字段, 内容

def AppendObjData(prodict,objdict,field_id_and_content): #添加目标编号数据
    field,id,content = field_id_and_content
    if not id in prodict:
        print("题号 %s 并不在数据库中"%id)
    else:
        lines = [line.strip() for line in content.split("\n")]
        for objid in lines:
            if not objid in objdict:
                print("目标编号 %s 并不在数据库中"%objid)
            elif objid in prodict[id][field]:
                print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,objid))
            else:
                prodict[id][field] = prodict[id][field] + [objid]
                print("已于 %s 的 %s 字段执行添加, 编号为 %s, 内容为 %s"%(id,field,objid,objdict[objid]["content"]))
    return (id,field,content) #返回三元组: 题号, 字段, 内容

def AppendUsageData(prodict,field_id_and_content): #添加使用记录数据
    field,id,content = field_id_and_content
    lines = [re.sub(r"\s+",r"\t",line.strip()) for line in content.strip().split("\n")]
    pending_list = []
    for line in lines:
        if not "FORCE" in line.upper():
            time_stripped = re.sub(r"^\d{4,}\t","",line) #读取去除时间的数据
            if time_stripped in "\n".join(prodict[id][field]):
                print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,line))
            else:
                classid = [info for info in line.split("\t") if len(re.findall(r"[班高一二三]",info))>0][0] # 读取班级号
                if classid in "\n".join(prodict[id][field]):
                    print("班级 %s 在题号为 %s 的题目处已有使用记录, 在pending list中记录"%(classid,id))
                    oldinfo = [usage for usage in prodict[id][field] if classid in usage]
                    pending_list = pending_list + [(id,line,oldinfo)]
                else:
                    prodict[id][field].append(line)
                    print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,line))
        else:
            line = re.sub(r"\s+",r"\t",re.sub(r"FORCE","",line.upper())).strip()
            prodict[id][field].append(line)
            print("已于 %s 的 %s 字段执行强制添加, 内容为 %s"%(id,field,line))
    output = ""
    for item in pending_list:
        id,new,oldlist = item
        output += id + "\n"
        output += new+"\tFORCE\n"
        output += "\n".join(oldlist)+"\n\n"
    return (field,id,content,output) #返回四元组: 题号, 字段, 内容, 待确定是否要添加的字符串（不含FORCE字样的行为旧结果，含FORCE字样的行为新结果，FORCE是运行后强制添加）

def AppendUsageData2024(prodict,field_id_and_content):
    field,id,content = field_id_and_content
    lines = [re.sub(r"\s+",r"\t",line.strip()) for line in content.strip().split("\n")]
    pending_list = []
    for line in lines:
        if not "FORCE" in line.upper():
            time_stripped = re.sub(r"^\d{4,}\t","",line) #读取去除时间的数据
            if time_stripped in "\n".join(prodict[id][field]):
                print(f"******题号 {id} 的 {field} 字段, 内容 {line} 已存在")
            else:
                usage = parseUsage(line)
                oldusages = prodict[id]["usages"]
                importflag = True
                for u in oldusages:
                    oldusage = parseUsage(u)
                    if usage["classid"] == oldusage["classid"] and abs(datestrtotimestamp(usage["date"])-datestrtotimestamp(oldusage["date"])) < 7*86400 and usagelistdifference(usage["difficulties"],oldusage["difficulties"])<0.05:
                        print(f"######班级 {usage['classid']} 在题号为 {id} 的题目处已有非常类似的使用记录, 不作记录")
                        importflag = False
                        break
                if importflag:
                    for u in oldusages:
                        oldusage = parseUsage(u)
                        if usage["classid"] == oldusage["classid"]:
                            print(f"!!!!!!班级 {usage['classid']} 在题号为 {id} 的题目处已有使用记录, 在pending list中记录")
                            oldinfo = [v for v in prodict[id][field] if usage['classid'] in v]
                            pending_list = pending_list + [(id,line,oldinfo)]
                            importflag = False
                            break
                if importflag:
                    prodict[id][field].append(line)
                    print(f"已于 {id} 的 {field} 字段执行添加, 内容为 {line}")
        else:
            line = re.sub(r"\s+",r"\t",re.sub(r"FORCE","",line.upper())).strip()
            prodict[id][field].append(line)
            print(f"&&&&&&&&&&&&  已于 {id} 的 {field} 字段执行强制添加, 内容为 {line} &&&&&&")
    output = ""
    for item in pending_list:
        id,new,oldlist = item
        output += id + "\n"
        output += new+"\tFORCE\n"
        output += "\n".join(oldlist)+"\n\n"
    return (field,id,content,output) #返回四元组: 题号, 字段, 内容, 待确定是否要添加的字符串（不含FORCE字样的行为旧结果，含FORCE字样的行为新结果，FORCE是运行后强制添加）


def ImportMetadata(prodict,objdict,fieldsdict,metadatafilepath,pendingdatafilepath): #metadata自动修改, 根据字段自适应修改, 参数为题库字典, 目标字典, 字段字典, metadata文本文件路径, 待确定是否替换的内容的存放路径
    data_to_modify = ObtainDatatoModify(metadatafilepath,fieldsdict)
    outputstring = "usages\n\n"
    for item in data_to_modify:
        field = item[0]
        if field == "NotAField":
            print("字段名有误")
            return 1
        else:
            method = fieldsdict[field]["Method"]
            if method == "overwrite":
                feedback = OverwriteData(prodict,fieldsdict,item)
            elif method == "append":
                feedback = AppendData(prodict,fieldsdict,item)
            elif method == "mutualappend":
                feedback = AppendMutualData(prodict,item)
            elif method == "objappend":
                feedback = AppendObjData(prodict,objdict,item)
            elif method == "usageappend":
                feedback = AppendUsageData2024(prodict,item)
                outputstring += feedback[3]
            elif method == "fixed":
                print("字段 %s 不可按此方式修改"%field)
    SaveTextFile(outputstring,pendingdatafilepath)
    return outputstring # 已在数据库中修改, 之后需要将数据库写入一次, 返回1表示字段名有误, 返回其他表示成功进行了修改

def parseUsage(usagestring): #对单行usage信息进行分词
    usagedict = {}
    datalist = re.sub(r"\s+",r"\t",usagestring.strip()).split("\t")
    if re.findall(r"20[\d]{2,6}",datalist[0]) != [] and re.findall(r"[\u4e00-\u9fa5]",datalist[0]) == []:
        date = datalist.pop(0)
    else:
        date = ""
    usagedict["date"] = date
    if re.findall(r"[高一二三班]",datalist[0]) != []:
        classid = datalist.pop(0)
    else:
        classid = ""
    usagedict["classid"] = classid
    usagedict["subproblems"] = len(datalist)
    usagedict["difficulties"] = [float(u) for u in datalist]
    usagedict["glossdiff"] = round(sum(usagedict["difficulties"])/usagedict["subproblems"],3)
    return usagedict #返回词典, 含date日期, classid班级标识, subproblems小题数目, difficulties得分率, glossdiff小题平均得分率

def StringSubstitute(regex,template,stringtuple): # 用stringtuple里的字符串逐个替换template中的符合某种regex规则的字符串
    toSub = re.findall(regex,template)
    if not len(toSub) == len(stringtuple):
        print("长度不符.")
        return 1 #若长度不符则显示“长度不符”并返回1
    else:
        output = template
        for i in range(len(toSub)):
            output = output.replace(toSub[i],stringtuple[i])
        return output #若长度符合则返回替换后的字符串

def GenerateTexDataforEditing(id_string,prodict,templatefilepath,editor): # 根据id_string的题号列表在prodict中生成一个可修改的tex文件, 包含题目内容，答案和解答，editor表示编辑者
    id_list = generate_number_set(id_string,prodict)
    output = "编辑者: " + editor + "\n\n\\begin{enumerate}\n\n"
    for id in id_list:
        content = prodict[id]["content"]
        answer = prodict[id]["ans"]
        solution = prodict[id]["solution"]
        remark = prodict[id]["remark"]
        output += "\\item (%s) "%str(id).zfill(6) + content + "\n\n" + "答案: " + answer + "\n\n" + "解答与提示: " + solution + "\n\n" + "备注: " + remark + "\n\n"
    output += "\\end{enumerate}"
    template = ReadTextFile(templatefilepath)
    texdata = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",template,[output])
    return texdata # 返回Tex文件的字符串, 待保存至tex文件


def GetEditedProblems(string): #从.tex文件所读取的字符串中取出正文内容, 并切割成以题号为key的字典, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组
    bodydata = re.findall(r"\\begin\{enumerate\}([\s\S]*)\\end\{enumerate\}",string)[0]+r"\item"
    problems = re.findall(r"\((\d{6})\)([\s\S]*?)\\item",bodydata)
    edited_dict = {}
    for problem in problems:
        id, pro_string = problem
        edited_dict[id] = parseProblem(pro_string)
    return edited_dict # 返回以题号为key, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组的字典

def parseProblem(string): # 对以不小于两个回车切分的四段式字符串进行分词(通常第二段有"答案:", 第三段有"解答与提示:", 第四段有"备注:")
    data = string.strip()
    data = re.sub(r"\n{2,}","\n\n",data)
    content,ans,solution,remark = data.split("\n\n")
    content = content.strip()
    ans = re.sub("答案:","",ans).strip()
    solution = re.sub("解答与提示:","",solution).strip()
    remark = re.sub("备注:","",remark).strip()
    return (content,ans,solution,remark) # 返回四元组(题目内容, 答案, 解答与提示, 备注)

def ModifyProblembyTeX(id_string,prodict,toeditfilepath,editor): # vscode打开.tex文件修改题目的内容, 答案与解答

    # 读取题号列表并生成待编辑的.tex文件
    texdata = GenerateTexDataforEditing(id_string,prodict,"模板文件/题目编辑.txt",editor)
    SaveTextFile(texdata,toeditfilepath)

    # 打开待编辑的.tex文件
    print("编辑完成后保存文件, 关闭文件继续...")
    os.system("code -w -g "+toeditfilepath)

    # 生成修改后的题号与内容, 答案, 解答与提示的对应
    editor = GetDate() + "\t" + editor
    editedtexdata = ReadTextFile(toeditfilepath)
    edited_dict = GetEditedProblems(editedtexdata)
    editedIDList = []

    # 将.tex文件中的修改反映到原题库字典, 并保存
    for id in edited_dict:
        content, ans, solution, remark = edited_dict[id]
        if not (content == prodict[id]["content"] and ans == prodict[id]["ans"] and solution == prodict[id]["solution"] and remark == prodict[id]["remark"]):
            prodict[id]["content"] = content
            prodict[id]["ans"] = ans
            prodict[id]["solution"] = solution
            prodict[id]["remark"] = remark
            prodict[id]["edit"] = prodict[id]["edit"] + [editor]
            editedIDList.append(id)

    save_dict(prodict,"../题库0.3/Problems.json")
    return generate_exp(editedIDList) # 返回编辑过的字典


def RemoveMutualLink(metadata,prodict): # 删除双向关联id(same,related,unrelated), 输入为一个字符串, 每行表示一对题号, 用空格或制表符隔开
    lines = [re.sub(r"[\s]+",r"\t",line).strip() for line in metadata.split("\n")]
    for line in lines:
        if not line == []:
            id1,id2 = line.split("\t")
            id1 = id1.zfill(6)
            id2 = id2.zfill(6)
            for field in ["same","related","unrelated"]:
                if id1 in prodict[id2][field]:
                    prodict[id2][field].remove(id1)
                if id2 in prodict[id1][field]:
                    prodict[id1][field].remove(id2)
    return 0 # 返回0

def jsonEditProblemMetadata(id_string,prodict,editor): #用vscode在json模式下编辑题目综合信息
    jsontoeditpath = "临时文件/problem_edit.json"
    idlist = generate_number_set(id_string,prodict)
    edit_dict = {}
    for id in idlist:
        edit_dict[id] = prodict[id].copy()
    save_dict(edit_dict,jsontoeditpath)
    #打开待编辑的json文件
    os.system("code -w -g "+jsontoeditpath)
    #编辑后关闭文件窗口自动执行下面步骤
    editeddict = load_dict(jsontoeditpath)
    editlist = []
    for id in editeddict:
        if not prodict[id] == editeddict[id]:
            prodict[id] = editeddict[id].copy()
            prodict[id]["edit"] = prodict[id]["edit"] + [GetDate() + "\t" + editor]
            editlist.append(id)
    return(generate_exp(editlist)) # 返回编辑过的题号字符串

def GetSamePairs(prodict): #获取已标注的相同题目组
    same_groups = []
    for id in prodict:
        same = prodict[id]["same"]
        if len(same) > 0 and not len(prodict[id]["usages"]) == 0:
            same_groups.append([id]+same)
    return(same_groups) #返回相同题目组, 每组由一些相同的题目构成

def ShareSameUsages(prodict,same_group): #对same_group中的所有题号共享使用记录
    current_usages = []
    for id in same_group:
        for usage in prodict[id]["usages"]:
            if not usage in current_usages:
                current_usages = current_usages + [usage]
    for id in same_group:
        prodict[id]["usages"] = current_usages.copy()
    return((same_group,current_usages)) #返回same_group中的题号列表 及 所有题号记录 组成的二元组

def SortUsages(prodict): #对使用记录按字符串进行排序
    for id in prodict:
        usages = prodict[id]["usages"].copy()
        usages.sort()
        prodict[id]["usages"] = usages.copy()
    return 0 #返回0


def SortUsagesbyAverage(theusages): #根据使用记录每一条的平均值进行排序, 越高的在越前面
    glossdifflist = []
    for i in range(len(theusages)):
        glossdiff = parseUsage(theusages[i])["glossdiff"]
        if glossdiff <= 0.999: #去除全对的使用记录
            glossdifflist = glossdifflist + [(i,glossdiff)]
    sortedglossdifflist = sorted(glossdifflist, key = lambda x:x[1], reverse = True)
    newusages = [theusages[i[0]] for i in sortedglossdifflist]
    return(newusages) # 返回排序后的list


def StripSuffix(string, suf_words): #除去字符串前后的空格及suf_words中每一个符合regex的字符串及其之后的部分
    string = string.strip()
    for sw in suf_words:
        string = re.sub(sw+r"[\S]*$","",string)
    return(string) # 返回处理以后的字符串


def generate_origin(origin_dict):
    data = origin_dict["来源"]
    if "题号" in origin_dict:
        data = f"{data}试题{origin_dict['题号']}"
    if "前序" in origin_dict:
        data = f"{data}-改编自{origin_dict['前序']}"
    return data

def MatchCondition2024(problem,condition_list): #判断problem这一字典是否符合condition_list中的所有筛选条件
    match = True #初始设定符合条件
    for field, flag_not, matchexp in condition_list:
        exps = [i.strip() for i in matchexp.split(",")]
        if type(problem[field]) == list:
            data = "\n".join(problem[field])
        elif field == "origin":
            data = generate_origin(problem[field])
        else:
            data = str(problem[field]) #至此将每个字段中的内容都转为string
        if flag_not == False: #表示肯定的筛选
            if all([re.findall(exp,data) == [] for exp in exps]): #如果有一个条件中的每个关键字都找不到, 就返回"不符合条件"(False)
                return False
        if flag_not == True:
            if all([re.findall(exp,data) != [] for exp in exps]): #如果有一个条件中的每个关键字都能找到, 就返回"不符合条件"(False)
                return False
    return match #返回是否符合条件

def MatchCondition(problem,condition_dict): #判断problem这一字典是否符合condition_dict中的所有筛选条件
    match = True #初始设定符合条件
    for fieldraw in [c for c in condition_dict if not "_not" in c and not condition_dict[c] == [""]]: #选出正向的条件([""]表示该条件不起作用)
        cond_list = condition_dict[fieldraw]
        if type(cond_list) == str:
            cond_list = [cond_list]
        field = re.sub(r"\d","",fieldraw)
        if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string
            string = "\n".join((problem[field]))
        else:
            string = str(problem[field])
        current_match = False
        for cond in cond_list: #有一个条件被满足, 就认为当前字段满足条件了
            if len(re.findall(cond,string)) > 0:
                current_match = True
        if current_match == False:
            match = False # 如果某个正向条件未满足, 那么match就赋值为False; 如果每一个正向条件都被满足, 那么match仍为True
    for fieldraw in [c for c in condition_dict if "_not" in c and not condition_dict[c] == [""]]: #选出反向的条件([""]表示该条件不起作用)
        cond_list = condition_dict[fieldraw]
        fieldraw = fieldraw.replace("_not","")
        field = re.sub(r"\d","",fieldraw)
        if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string
            string = "\n".join((problem[field]))
        else:
            string = str(problem[field])
        current_match = True
        for cond in cond_list: #有一个条件被满足, 就认为当前字段不被满足了
            if len(re.findall(cond,string)) > 0:
                current_match = False
        if current_match == False:
            match = False
    return match #返回是否符合条件


def get_color(value): # 根据得分率获得rgb颜色代码
    value = float(value)
    if value>=0.5:
        (r,g)=(1,2-2*value)
    else:
        (r,g)=(2*value,1)
    return "{%.3f,%.3f,0}"%(r,g) #返回用大括号包围的rgb数值

def GenerateValueColorCode(matchobj): # 生成三位小数代表的颜色方框(黑色边框, 难度对应的底色)的LaTeX代码
    value = matchobj.group(1)
    return "\t\\fcolorbox[rgb]{0,0,0}%s{%s}"%(get_color(value),value) #返回代码

def StudentsGetAfterContent(id,prodict,answered,spaceflag): #生成学生版讲义后的答案及空格， answered表示有无答案, spaceflag表示有无空格
    string = ""
    if "ans" in prodict[id]:
        if answered:
            string += "答案: \\textcolor{%s}{%s}\n\n"%(("red",prodict[id]["ans"]) if prodict[id]["ans"] != "" else ("blue","暂无答案"))
    if spaceflag:
        if "space" in prodict[id]:
            if prodict[id]["space"] != "":
                string += "\\vspace*{%s}\n\n"%prodict[id]["space"]
    return string #生成学生讲义后的答案及空格

def XeLaTeXCompile(filedir,filename): #在filedir目录中用XeLaTeX编译filename文件两次（能编译出正确的引用和页码）
    flagsuc = True
    for i in range(2):
        if os.system("xelatex -interaction=batchmode -output-directory=%s %s"%(filedir,filename)) == 0:
            print("第%d次编译成功."%(i+1))
        else:
            flagsuc = False
    return flagsuc # 若第二次编译成功则返回True, 否则返回False


def XeLaTeXTest(editedid,adict,objdict,misc,templatepath,outdir,outfile): #对adict中的题号字符串editedid进行编译测试, 返回成功True或失败False
    latex_raw = ReadTextFile(templatepath)
    if sys.platform == "darwin": #非win系统用默认字体
        latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
        latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
    if misc["教师版"] == True:
        latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学")
    bodystring = "\\begin{enumerate}\n\n"
    idlist = generate_number_set(editedid)
    for id in idlist:
        bodystring += generateLaTeXBodyContent(id,adict,objdict,misc)
    bodystring += "\n\n\\end{enumerate}\n\n"
    latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,("测试",bodystring)) #替换标题和bodystring
    SaveTextFile(latex_data,os.path.join(outdir,outfile))
    succ = XeLaTeXCompile(outdir,outfile)
    return succ

def GenerateSectionBodyString2024(problems,sectiontitles,pro_dict,obj_dict,bk_dict,misc,consecutivenumbering= True): #生成学生版的.tex文件的主体内容
    bodystring = ""
    count = 0
    for i in range(len(problems)):
        if problems[i][0] in "0123456789":
            idlist = generate_number_set(problems[i],pro_dict)
            sectionstring = f"\\section{{{sectiontitles[i]}}}\n\\begin{{enumerate}}\n\\setcounter{{enumi}}{{{count if consecutivenumbering else 0}}}\n\n"
            for id in idlist:
                count += 1
                sectionstring += generateLaTeXBodyContent(id,pro_dict,obj_dict,misc)
            sectionstring += "\\end{enumerate}\n\n"
            bodystring += sectionstring
        if problems[i][0] == "K":
            idlist = problems[i].split(",")
            sectionstring = f"\\section{{{sectiontitles[i]}}}\n\\begin{{enumerate}}\n\n"
            for objid in idlist:
                sectionstring += f"\\item {objid} \\ {(obj_dict[objid]['content'] if objid in obj_dict else '目标编号有误')}\n\n"
            sectionstring += "\\end{enumerate}\n\n"
            bodystring += sectionstring
        if problems[i][0] == "B":
            idlist = problems[i].split(",")
            sectionstring = f"\\section{{{sectiontitles[i]}}}\n\\begin{{enumerate}}\n\n"
            for bkid in idlist:
                if bkid in bk_dict:
                    bk = bk_dict[bkid]["content"]
                else:
                    bk = bkid + " \\ 基础知识编号有误"
                sectionstring += f"\\item {(bk)}\n\n"
            sectionstring += "\\end{enumerate}\n\n"
            bodystring += sectionstring
    return bodystring #返回主题内容字符串

# if not objlist == []:
#     output += "\\section{课时目标}\n\n"
#     output += "\\begin{enumerate}\n\n"
#     for objid in objlist:
#         output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"])
#     output += "\\end{enumerate}\n\n" #生成学习目标
# if not bnlist == []:
#     output += "\\section{双基梳理}\n\n"
#     basic_body = ""
#     for bnid in bnlist:
#         basic_body += "\\item %s\n\n"%basicknowledgedict[bnid]["content"]
#     output += "\\begin{enumerate}\n\n %s\n\n\\end{enumerate}\n\n"%basic_body #生成基础知识梳理


def GenerateStudentBodyString(problems,sectiontitles,pro_dict,consecutivenumbering= True,answered= True,spaceflag=True): #生成学生版的.tex文件的主体内容
    bodystring = ""
    if len(problems) == len(sectiontitles):
        count = 0
        for i in range(len(problems)):
            idlist = generate_number_set(problems[i],pro_dict)
            sectionstring = "\\section{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectiontitles[i],count if consecutivenumbering else 0)
            for id in idlist:
                count += 1
                aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag)
                sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent)
            sectionstring += "\\end{enumerate}\n\n"
            bodystring += sectionstring
    else:
        idstring = ",".join(problems)
        idlist = generate_number_set(idstring,pro_dict)
        sectionstring = "\\begin{enumerate}\n\n"
        for id in idlist:
            aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag)
            sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent)
        sectionstring += "\\end{enumerate}\n\n"
        bodystring += sectionstring
    return bodystring #返回主题内容字符串


def TeachersGetAfterContentColored(id,prodict,objdict,topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #生成教师版讲义后的答案及空格， topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 彩色背景
    string = ""
    objs = GenerateObjTexCode(id,prodict,objdict) if showobjs else ""
    tags = ("\\begin{tcolorbox}[colback = orange!10!white, colframe = orange!10!white, breakable]\n标签: %s\n\n\\end{tcolorbox}\n"%("; ".join(prodict[id]["tags"]))) if showtags else ""
    ans = ("\\begin{tcolorbox}[colback = red!10!white, colframe = red!10!white, breakable]\n答案: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案")) if showans else ""
    solution = ("\\begin{tcolorbox}[colback = green!10!white, colframe = green!10!white, breakable]\n解答或提示: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else ""
    # solution = ("解答或提示: \\textcolor{magenta}{%s}\n\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else ""
    origin = ("\\begin{tcolorbox}[colback = yellow!30!white, colframe = yellow!30!white, breakable]\n来源: %s\n\n\\end{tcolorbox}\n"%prodict[id]["origin"]) if showorigin else ""
    remark = ("\\begin{tcolorbox}[colback = cyan!30!white, colframe = cyan!30!white, breakable]\n备注: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["remark"] if prodict[id]["remark"] != "" else "暂无备注")) if showremark else ""
    usages = ("\\begin{tcolorbox}[colback = lime!10!white, colframe = lime!10!white, breakable]\n使用记录(%s):\n\n%s\n\n\\end{tcolorbox}\n"%(str(topandbottomusagestuple),GenerateUsageTexCode(id,prodict,topandbottomusagestuple))) if showusages else ""
    string += objs + tags + ans + solution + usages + origin + remark
    return string #生成教师版讲义后的答案及空格

def TeachersGetAfterContentPlain(id,prodict,objdict,topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #生成教师版讲义后的答案及空格， topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列
    string = ""
    objs = ("目标:\n\n%s\n\n"%GenerateObjTexCode(id,prodict,objdict)) if showobjs else ""
    tags = ("标签: \\textcolor[rgb]{0.5,0.6,0.8}{%s}\n\n"%("; ".join(prodict[id]["tags"])) if not prodict[id]["tags"] == [] else "标签: \n\n") if showtags else ""
    ans = ("答案: \\textcolor{%s}{%s}\n\n"%(("red",prodict[id]["ans"]) if prodict[id]["ans"] != "" else ("blue","暂无答案"))) if showans else ""
    solution = ("解答或提示: \\textcolor{magenta}{%s}\n\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else ""
    origin = ("来源: %s\n\n"%prodict[id]["origin"]) if showorigin else ""
    remark = ("备注: \\textcolor[rgb]{0,0.5,0.2}{%s}\n\n"%(prodict[id]["remark"] if prodict[id]["remark"] != "" else "暂无备注")) if showremark else ""
    usages = ("使用记录:\n\n%s\n\n"%GenerateUsageTexCode(id,prodict,topandbottomusagestuple)) if showusages else ""
    string += objs + tags + ans + solution + usages + origin + remark
    return string #生成教师版讲义后的答案及空格


def GenerateObjTexCode(id,prodict,objdict): #生成目标代号对应的学习目标字符串（含蓝色编码）
    # string = ""
    # if prodict[id]["objs"] != []:
    string = "\n\\begin{tcolorbox}[colback = blue!10!white, colframe = blue!10!white, breakable]"
    for objid in prodict[id]["objs"]:
        if objid.upper() == "KNONE":
            string += "\n\n%s\t%s"%(objid,"无当前有效关联目标")
        else:
            string += "\n\n%s\t%s"%(objid,objdict[objid]["content"])
    if prodict[id]["objs"] == []:
        string += "暂未关联\n"
    string += "\n\n\\end{tcolorbox}\n"
    return string #返回目标代码字符串

def ChooseUsage(usages,topandbottomusagestuple): #生成题号对应的题目的使用记录, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 两数之和大于记录数则从高到低排列后全部展示
    top,bottom = topandbottomusagestuple
    if top < 0 or bottom < 0:
        return usages # 返回原本的全部使用记录
    elif top + bottom > len(usages):
        return SortUsagesbyAverage(usages) # 返回排列后的使用记录
    else:
        return (SortUsagesbyAverage(usages)[:top] + SortUsagesbyAverage(usages)[(len(usages)-bottom):]) # 返回排序后的最前面top个和最后面bottom个

def GenerateUsageTexCode(id,prodict,topandbottomusagestuple): #根据topandbottomusagestuple的要求生成题号为id的题目的缩减版的使用记录列表， topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 两数之和大于记录数则从高到低排列后全部展示
    rawusages = prodict[id]["usages"].copy()
    usages = ChooseUsage(rawusages,topandbottomusagestuple)
    usagecode = re.sub("\\t([\d]\.[\d]{0,10})",GenerateValueColorCode,"\n\n".join(usages))
    return usagecode #返回缩减后的使用记录列表

def generateLaTeXBodyContent(id,adict,objdict,misc): #根据id,读取的json内容adict,和字典misc来生成讲义
#misc 样例
#{
#     "教师版": True, #如果设置为True则除了 题后空间 之外都进行判断并处理, 否则只处理 题后空间 和 答案
#     "字段显示设置": {
#         "题后空间": True,
#         "课时目标": True,
#         "题目标签": True,
#         "答案": True,
#         "解答与提示": True,
#         "使用记录": (3,-1),
#         "使用记录说明": "(a,b)表示显示最好的a个和最差b个, 有-2表示不显示, 无-2但有-1表示全部显示",
#         "来源": True,
#         "备注": True,
#         "届别": []
#     }
# }
    id = str(id).zfill(6)
    if not "教师版" in misc or misc["教师版"] == False:
        output = f"\n\\item {{\\tiny ({id})}} {adict[id]['content']}"
        if "字段显示设置" in misc and "答案" in misc["字段显示设置"] and misc["字段显示设置"]["答案"] == True:
            ans = adict[id]["ans"] if len(adict[id]["ans"]) > 0 else "暂无答案"
            output += f"\n\n答案: \\textcolor{{{('red' if ans != '暂无答案' else 'blue')}}}{{{ans}}}\n\n"
        if "字段显示设置" in misc and "题后空间" in misc["字段显示设置"] and misc["字段显示设置"]["题后空间"] == True:
            space = f"\n\n\\vspace*{{{adict[id]['space']}}}\n\n" if len(adict[id]["space"]) > 0 else ""
            output += space
    else:
        output = f"\n\\item ({id}) {adict[id]['content']}"
        if "备注" in misc["字段显示设置"] and misc["字段显示设置"]["备注"] == True:
            remark = adict[id]["remark"] if len(adict[id]["remark"]) > 0 else "暂无备注"
            output += f"\n\n备注: \\textcolor[rgb]{{0,0.5,0.2}}{{{remark}}}\n\n"
        if "课时目标" in misc["字段显示设置"] and misc["字段显示设置"]["课时目标"] == True:
            objs = f"\n\n目标:\n\n{GenerateObjTexCode(id,adict,objdict)}\n\n"
            output += objs
        if "题目标签" in misc["字段显示设置"] and misc["字段显示设置"]["题目标签"] == True:
            tags_raw = adict[id]['tags']
            if len(tags_raw) == 0:
                tags = "暂无标签"
            else:
                tags = '; '.join(adict[id]['tags'])
            output += f"\n\n标签: \\textcolor[rgb]{{0.5,0.6,0.8}}{{{tags}}}\n\n"
        if "答案" in misc["字段显示设置"] and misc["字段显示设置"]["答案"] == True:
            ans = adict[id]["ans"] if len(adict[id]["ans"]) > 0 else "暂无答案"
            output += f"\n\n答案: \\textcolor{{{('red' if ans != '暂无答案' else 'blue')}}}{{{ans}}}\n\n"
        if "解答与提示" in misc["字段显示设置"] and misc["字段显示设置"]["解答与提示"] == True:
            solution = adict[id]["solution"] if len(adict[id]["solution"]) > 0 else "暂无解答或提示"
            output += f"\n\n解答或提示: \\textcolor{{magenta}}{{{solution}}}\n\n"
        if "使用记录" in misc["字段显示设置"] and type(misc["字段显示设置"]["使用记录"]) in (list,tuple) and not -2 in misc["字段显示设置"]["使用记录"]:
            usages = f"\n\n使用记录:\n\n{GenerateUsageTexCode(id,adict,misc['字段显示设置']['使用记录'])}\n\n"
            output += usages
        if "来源" in misc["字段显示设置"] and misc["字段显示设置"]["来源"] == True:
            origin = generate_origin(adict[id]["origin"]) if len(adict[id]["origin"]) > 0 else "未记录来源"
            output += f"\n\n来源: {origin}\n\n"
    return output


def GenerateTeacherBodyString(problems,sectiontitles,prodict,objdict,consecutivenumbering = True,topandbottomusagestuple = (3,3),sectionname = "section", showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True, colored = False): #生成教师版的.tex文件的主体内容, 各项是否显示为可选
    bodystring = ""
    GetAfterContent = TeachersGetAfterContentPlain if not colored else TeachersGetAfterContentColored
    if len(problems) == len(sectiontitles):
        count = 0
        for i in range(len(problems)):
            idlist = generate_number_set(problems[i],prodict)
            sectionstring = "\\%s{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectionname,sectiontitles[i],count if consecutivenumbering else 0)
            for id in idlist:
                count += 1
                aftercontent = GetAfterContent(id,prodict,objdict,topandbottomusagestuple = topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark)
                sectionstring += "\\item (%s) %s\n\n%s"%(id,prodict[id]["content"],aftercontent)
            sectionstring += "\\end{enumerate}\n\n"
            bodystring += sectionstring
    else:
        idstring = ",".join(problems)
        idlist = generate_number_set(idstring,prodict)
        sectionstring = "\\begin{enumerate}\n\n"
        for id in idlist:
            aftercontent = GetAfterContent(id,prodict,objdict,topandbottomusagestuple = topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark)
            sectionstring += "\\item (%s) %s\n\n%s"%(id,prodict[id]["content"],aftercontent)
        sectionstring += "\\end{enumerate}\n\n"
        bodystring += sectionstring
    return bodystring #返回主体内容字符串

def GetValidTexFiles(path): #获取 有题号的.tex文件列表 及 有题号的.tex文件所在的路径列表
    texlist = []
    pathlist = []
    for root,folders,files in os.walk(path):
        for file in files:
            if file[-4:] == ".tex":
                texdata = ReadTextFile(os.path.join(root,file))
                ids = re.findall(r"\((\d{6})\)",texdata)
                if len(ids) > 0:
                    pathlist = pathlist + [root]
                    texlist = texlist + [os.path.join(root,file)]
    texlist = sorted(list(set(texlist)))
    pathlist = sorted(list(set(pathlist)))
    return (texlist,pathlist) # 返回 有题号的.tex文件列表 与 所在路径列表 组成的二元组


def generate_lessonid_list(lessonidstr,lessonsdict): #根据lessonidstr的条件(开头字母与数字)返回课时代码列表
    raw_criterion_list = lessonidstr.split(",")
    criterion_list = []
    for cr in raw_criterion_list: #生成满足要求的课时代码的前若干位
        if not ":" in cr:
            criterion_list.append(cr)
        else:
            criterion_list = criterion_list + ["K"+c[-4:] for c in generate_number_set(cr.upper().replace("K",""))]

    lessons_list = []
    for id in lessonsdict: #对每一课时作比对, 生成课时代码列表
        for cr in criterion_list:
            if id.startswith(cr):
                lessons_list.append(id)
                break
    return lessons_list # 返回课时代码列表


def GenerateLessonPreparationDraft(notetitle, outputdir, adict, prodict, objdict, lessonsdict, topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #根据adict中课时与题号的对应生成每一课时的备课草稿
    output = "\\tableofcontents\n\\newpage\n\n"

    outputfilepath = os.path.join(outputdir,notetitle+".tex")


    for lid in adict:
        output += "\\section{%s \ %s}\n\n"%(lid,lessonsdict[lid]["name"])
        output += "\\subsection{课时目标}\n\n"
        output += "\\begin{enumerate}\n\n"
        for objid in objdict:
            if objid.startswith(lid):
                output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"])
        output += "\\end{enumerate}\n\n"
        output += GenerateTeacherBodyString([adict[lid]],["参考题目资源"],prodict,objdict,sectionname = "subsection",topandbottomusagestuple= topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark)
        output += "\\newpage\n\n"


    latex_raw = ReadTextFile("模板文件/讲义模板.txt")
    latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号

    if sys.platform == "darwin": #非win系统用默认字体
        latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
        latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)

    latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
    SaveTextFile(latex_data,outputfilepath) #保存.tex文件

    if XeLaTeXCompile(outputdir,notetitle+".tex"):
        print("编译成功")
        return 0 # 返回0
    else:
        print("编译失败")
        return latex_data # 返回有错误的latex源代码

def GenerateLessonPreparation(notetitle, outputdir, adict, prodict, objdict, basicknowledgedict, homeworkspaces = False): #根据adict生成每一课时的教案
    output = "\\tableofcontents\n\\newpage\n\n"

    outputfilepath = os.path.join(outputdir,notetitle+".tex")


    for key in adict:
        lessonid = key
        lessonname = adict[key]["name"]
        objlist = adict[key]["objects"]
        bnlist = adict[key]["basicknowledges"]
        eblist = adict[key]["examples_basic"]
        ealist = adict[key]["examples_adv"]
        worklist = adict[key]["work_inclass"]
        homeworklist = adict[key]["homework"]
        remarks = adict[key]["remarks"]
        if not "复习" in lessonname:
            output += "\\section{%s \ %s}\n\n"%(lessonid,lessonname)

            if not objlist == []:
                output += "\\subsection{课时目标}\n\n"
                output += "\\begin{enumerate}\n\n"
                for objid in objlist:
                    output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"])
                output += "\\end{enumerate}\n\n" #生成学习目标
            if not bnlist == []:
                output += "\\subsection{双基梳理}\n\n"
                basic_body = ""
                for bnid in bnlist:
                    basic_body += "\\item %s\n\n"%basicknowledgedict[bnid]["content"]
                output += "\\begin{enumerate}\n\n %s\n\n\\end{enumerate}\n\n"%basic_body #生成基础知识梳理
            output += "\\subsection{知识体验}\n\n"
            output += "\\subsubsection{必讲例题}\n\n"
            output += GenerateStudentBodyString(eblist,[],prodict,consecutivenumbering=False,answered=False)
            output += "\\subsubsection{选讲例题}\n\n"
            output += GenerateStudentBodyString(ealist,[],prodict,consecutivenumbering=False,answered=False)
            output += "\\subsection{巩固新知}\n\n"
            output += GenerateStudentBodyString(worklist,[],prodict,consecutivenumbering=False,answered=False)
            if not remarks == "":
                output += "\\subsection{备注}\n\n"
                output += remarks + "\n\n"
            if homeworkspaces:
                output += "\\newpage\n\n"
            output += "\\subsection{课后作业}\n\n"
            output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered=False,spaceflag=homeworkspaces)
            output += "\\newpage\n\n"
        else:
            output += "\\section{%s \ %s}\n\n"%(lessonid,lessonname)
            output += "\\subsection{复习题}\n\n"
            output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered = False,spaceflag=homeworkspaces)
            output += "\\newpage\n\n"

    latex_raw = ReadTextFile("模板文件/讲义模板.txt")
    latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号

    if sys.platform == "darwin": #非win系统用默认字体
        latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
        latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)

    latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
    SaveTextFile(latex_data,outputfilepath) #保存.tex文件

    if XeLaTeXCompile(outputdir,notetitle+".tex"):
        print("编译成功")
        return 0 # 返回0
    else:
        print("编译失败")
        return latex_data # 返回有错误的latex源代码


def GenerateSingleLessonPreparation(lessonid, outputdir, adict, prodict, objdict, basicknowledgedict, homeworkspaces = False, filename = "default"): #根据adict生成每一课时的教案
    output = ""
    notetitle = lessonid  + r" \ " + adict[lessonid]["name"]
    if filename == "default":
        filename = lessonid + adict[lessonid]["name"]+".tex"
    else:
        filename = filename+".tex"
    outputfilepath = os.path.join(outputdir,filename)
    lessonname = adict[lessonid]["name"]
    objlist = adict[lessonid]["objects"]
    bnlist = adict[lessonid]["basicknowledges"]
    eblist = adict[lessonid]["examples_basic"]
    ealist = adict[lessonid]["examples_adv"]
    worklist = adict[lessonid]["work_inclass"]
    homeworklist = adict[lessonid]["homework"]
    remarks = adict[lessonid]["remarks"]

    if not "复习" in lessonname:
        if not objlist == []:
            output += "\\section{课时目标}\n\n"
            output += "\\begin{enumerate}\n\n"
            for objid in objlist:
                output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"])
            output += "\\end{enumerate}\n\n" #生成学习目标
        if not bnlist == []:
            output += "\\section{双基梳理}\n\n"
            basic_body = ""
            for bnid in bnlist:
                basic_body += "\\item %s\n\n"%basicknowledgedict[bnid]["content"]
            output += "\\begin{enumerate}\n\n %s\n\n\\end{enumerate}\n\n"%basic_body #生成基础知识梳理

        output += "\\section{知识体验}\n\n"
        # output += "\\subsection{必讲例题}\n\n"
        output += GenerateStudentBodyString(eblist,[],prodict,consecutivenumbering=False,answered=False)
        # output += "\\subsection{选讲例题}\n\n"
        # output += GenerateStudentBodyString(ealist,[],prodict,consecutivenumbering=False,answered=False)
        output += "\\section{巩固新知}\n\n"
        output += GenerateStudentBodyString(worklist,[],prodict,consecutivenumbering=False,answered=False)
        if not remarks == "":
            output += "\\section{备注}\n\n"
            output += remarks + "\n\n"
        if homeworkspaces:
            output += "\\newpage\n\n"
        output += "\\section{课后作业}\n\n"
        output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered=False,spaceflag=homeworkspaces)
    else:
        output += "\\section{复习题}\n\n"
        output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered = False,spaceflag=homeworkspaces)

    latex_raw = ReadTextFile("模板文件/讲义模板.txt")
    latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号

    if sys.platform == "darwin": #非win系统用默认字体
        latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
        latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)

    latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
    SaveTextFile(latex_data,outputfilepath) #保存.tex文件

    if XeLaTeXCompile(outputdir,filename):
        print("编译成功")
        return 0 # 返回0
    else:
        print("编译失败")
        return latex_data # 返回有错误的latex源代码


def getlineindex(string,filepath): #返回字符串在文本文件中第一次出现的行数, 若未出现则返回1
    with open(filepath,"r",encoding = "u8") as f:
        lines = f.readlines()
    for l in lines:
        if string in l:
            return lines.index(l)+1
    return 1

def getCopy(): # 获取剪切板内容
    t = pyperclip.paste().replace("\r","")
    return t


def setCopy(string): # 写入剪切板内容
    pyperclip.copy(string)

    # wc.OpenClipboard()
    # wc.EmptyClipboard()
    # wc.SetClipboardData(win32con.CF_UNICODETEXT, string)
    # wc.CloseClipboard()

def itemizeProblems(string): #将题号替换为\item
    string_list = string.split("\n")
    itemed_list = []
    for line in string_list:
        if not "&" in line or "tabular" in line or "array" in line or "cases" in line:
            itemed_list.append(re.sub(r"(?:(?:^|\n)+[例]*[\s]*[0-9]+[\s]*[\.、\s]+|\[[\s]*例[\s]*[0-9]*[\s]*\][\s]*)","\\n\\\\item ",line))
        else:
            itemed_list.append(line)
    string = "\n".join(itemed_list)
    return string

def RefinePunctuations(raw_string):
    puctuationsfulltosemi = {"　": " ","。": ". ","．": ". ","，": ", ","：": ": ","；": "; ","（": "(","）": ")","？": "? ","“": "``","”": "''", "【": "[", "】": "]", "！": "!"}
    string = raw_string.strip()
    for s in puctuationsfulltosemi:
        string = re.sub(s,puctuationsfulltosemi[s],string) #将部分全角标记替换为半角
    return string

def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串
    puctuationsfulltosemi = {"　": " ","。": ". ","．": ". ","，": ", ","：": ": ","；": "; ","（": "(","）": ")","？": "? ","“": "``","”": "''", "【": "[", "】": "]"}
    replacestrings = {r"\\overparen": r"\\overset\\frown", "eqslant": "eq", r"\\vec": r"\\overrightarrow ", r"\\bar": r"\\overline", r"\\lim": r"\\displaystyle\\lim", r"\\sum":r"\\displaystyle\\sum", r"\\prod":r"\\displaystyle\\prod", r"\\mid":"|", r"\^\{\\prime\}":"'",r"e\^":r"\\mathrm{e}^",r"/\s*/":r"\\parallel "}
    wrongrecog = {"雉":"锥","[粗秿]圆":"椭圆","投郑":"投掷","抛郑":"抛掷","范目":"范围","揷":"插","末见":"未见","末成":"未成","针角":"钝角","幕函数":"幂函数","末知":"未知","阀值":"阈值","祖[桓晅]":"祖暅","图象":"图像","末使用":"未使用","末来":"未来","竟赛":"竞赛"}
    string = raw_string
    string = re.sub(r"\\left([\.\(\[|])",lambda matchobj: "" if matchobj.group(1) == "." else matchobj.group(1),string) #删去括号前的\left标记
    string = re.sub(r"\\right([\.\)\]|])",lambda matchobj: "" if matchobj.group(1) == "." else matchobj.group(1),string) #删去括号前的\right标记\
    string = re.sub(r"\\left\\\{","\\{",string) #删去大括号前的\left标记
    string = re.sub(r"\\right\\\}","\\}",string) #删去大括号前的\right标记
    string = string.replace("\\left\\langle","\\langle").replace("\\right\\rangle","\\rangle")
    for s in puctuationsfulltosemi:
        string = re.sub(s,puctuationsfulltosemi[s],string) #将部分全角标记替换为半角
    for s in replacestrings:
        string = re.sub(s,replacestrings[s],string) #修改部分LaTeX命令成为惯用的
    for s in wrongrecog:
        string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字
    string = RipTestPaperDesc(string)
    string = re.sub(r"[\s]*(``|''|\}|\{) *",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格
    string = itemizeProblems(string) #将题号替换为\item
    string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式
    string = re.sub(r"\$\s+\$"," ",string) #删除多余的$符号
    string = re.sub(r"([,.:;?!])\$",lambda x:x.group(1)+" $",string) #标点和$符号分开
    string = re.sub(r"\\left\\lvert\\,","|",string) #替换|的错误代码
    string = re.sub(r"\\frac",r"\\dfrac",string) #替换frac为dfrac
    string = re.sub(r"\n(?:A\.|\(A\))([\s\S]*?)(?:B\.|\(B\))([\s\S]*?)(?:C\.|\(C\))([\s\S]*?)(?:D\.|\(D\))([\s\S]*?)\n",lambda matchobj: "\n\\fourch{%s}{%s}{%s}{%s}\n"%(matchobj.group(1).strip(),matchobj.group(2).strip(),matchobj.group(3).strip(),matchobj.group(4).strip()),string+"\n\n") # 选择题的选择支处理
    string = re.sub(r"[\.;](\}\{|\}\n)",lambda matchobj: matchobj.group(1),string) #去除选择题选项最末尾的句号或分号
    string = re.sub(r"\n\s+","\n",string) #删除多余的回车
    string = re.sub(r"\\q+uad","",string) #删除\quad,\qquad等
    string = re.sub(r"\$\\q+uad\$","",string)#删除两边有$的\quad,\qquad等
    string = re.sub(r"~","",string) #删除~
    string = re.sub(r"\s*\([\s]{,10}\)",r"\\bracket{20}",string)
    string = re.sub(r"\s*\\bracket\{20\}\s*\n",r"\\bracket{20}.\n",string)#行末无内容括号的处理
    for i in range(2):
        string = re.sub(r"(\^|_)\{([0-9a-zA-Z])\}",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #删除一些无用的大括号
    for i in range(2):
        string = re.sub(r"([0-9A-Z])\s+([0-9A-Z])",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #合并一些公式中的无效空格
    for i in range(2):
        string = re.sub(r"([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #合并一些文本中的无效空格
    string = re.sub(r"([CP])(_[^_\^]{,5}\^)",lambda x:r"\mathrm{"+x.group(1)+"}"+x.group(2),string) #处理排列数和组合数字母的正体
    string = re.sub(r"([\^_])\{([-]{0,1})\\dfrac",lambda matchobj: matchobj.group(1) + "{" + matchobj.group(2) + "\\frac",string)
    string = re.sub(r"ldots",r"cdots",string) #将ldots替换为cdots
    string = re.sub(r"\\text\s*\{,\s*当\s*\}",", ",string)
    string = re.sub(r"[\\{]*\\(begin|end)\{array\}(?:\{[rcl]*\}){0,1}",lambda matchobj: "\\" + matchobj.group(1) + "{cases}",string) #将分段函数的array环境改为cases环境
    string = re.sub(r"([\u4e00-\u9fa5\$])[\s]*\n\\item",lambda matchobj: matchobj.group(1)+"\\blank{50}.\n\\item",string) #给中文或公式结尾的题目最后一行加上填空的空格.
    string = re.sub(r"(是|为|(?:=\$))\s*([,.;\n])",lambda matchobj: matchobj.group(1) + "\\blank{50}" + ("." if matchobj.group(2) == "\n" else "") + matchobj.group(2),string) #给行中的题目需要的地方加上空格
    string = re.sub(r"(有|为|(?:确定))([种条个])",lambda matchobj: matchobj.group(1) + "\\blank{50}" + matchobj.group(2),string) #给行中的题目需要的地方加上空格
    if not "\\bracket" in string:
        string = re.sub(r"([\u4e00-\u9fa5\$])(?:\\bracket\{20\})*[\.]*[\s]*\n\\fourch",lambda matchobj: matchobj.group(1)+"\\bracket{20}.\n\\fourch",string) #给中文或公式结尾的题目最后一行加上选择题的括号.
    string = re.sub(r"\\bracket\{(\d*)\}\$",lambda matchobj: "$"+"\\bracket{"+matchobj.group(1)+"}",string)#交换出现在$之前的括号与$的位置
    string = re.sub(r"(%[^\n]*)\\blank\{50\}\.",lambda matchobj:matchobj.group(1),string) #注释行不加\blank{50}
    string = re.sub(r"\\blank\{50\}\.\s*\n\\fourch",r"\\bracket{20}."+"\n"+r"\\fourch",string) #选择题前的空格改为括号
    string = re.sub(r"[\\\\]*\n(\(\d{1,2}\))(?:(?!\n)\s)*",lambda matchobj: "\\\\\n"+matchobj.group(1)+" ",string) #新一行的小题号回车前加上换行符
    string = re.sub(r"\(([^\(\)]*(?:\\in|=|\\ge|\\le|\\ne|>|<|\\parallel|\\perp)[^\(\)]*)\)\$",lambda matchobj: "$($" + matchobj.group(1) + "$)",string) #公式最后的范围陈述的括号放到公式环境外
    string = re.sub(r"\$\$\(","(",string) #删去上一步造成的多余双$
    string = re.sub(r"\(\s\$","($",string) #删去首个括号内公式前的空格
    string = re.sub(r"\(\\begin\{cases\}",r"\\begin{pmatrix}",string) #修改错误的\begin{cases})
    string = re.sub(r"\\end\{cases\}\)",r"\\end{pmatrix}",string) #修改错误的(\end{cases}
    string = SplitMathComma(string) #判断数学环境中的","是否需要用$ $分离, 如果需要则执行分离
    string = MergeMathComma(string) #判断非数学环境中的","是否需要合并在一个数学环境中, 如果需要则执行合并
    string = RefineCasesEnv(string) #美化cases环境
    string = RefineChineseComma(string) #改顿号
    string = RefineInterval(string) #改错误的dollars符号和括号的顺序
    string = string.replace("$$"," ")
    string = re.sub(r"\s+\\blank\{50\}",r"\\blank{50}",string)
    string = re.sub(r"\s+\\bracket\{20\}",r"\\bracket{20}",string)


    return string

def RefineInterval(string):
    newstring = string
    matches = re.finditer(r"\+\\infty\$\)",newstring) #先处理右括号
    poslist = [(match.start(),match.end()) for match in matches]
    poslist.reverse() #从后往前逐一更改
    for s,e in poslist:
        leftdollar = newstring[:s].rfind("$")
        leftbracket = leftdollar-1
        lb = newstring[leftbracket]
        if lb in ["[","("]: #如果上一个$前面是括号, 就交换这一对相应的$符号和括号
            print("$"+lb+newstring[leftbracket+2:e-2] + ")$")
            newstring = newstring[:leftbracket] + "$"+lb+newstring[leftbracket+2:e-2] + ")$" + newstring[e:]
    matches = re.finditer(r"\(\$-\\infty",newstring)
    poslist = [(match.start(),match.end()) for match in matches]
    poslist.reverse() #从后往前逐一更改
    for s,e in poslist:
        # print(s,e)
        rightdollar = newstring[e:].find("$")+e
        rightbracket = rightdollar+1
        rb = newstring[rightbracket]
        if rb in ["]",")"]:
            print("$("+newstring[s+2:rightbracket-1]+rb+"$")
            newstring = newstring[:s]+"$("+newstring[s+2:rightbracket-1]+rb+"$"+newstring[rightbracket+1:]
    newstring = newstring.replace("($-\\infty$, $+\\infty$)","$(-\\infty, +\\infty)$")
    return newstring

def RefineChineseComma(string): #顿号如果在数学环境中, 则在两侧加上$符号
    CommaPositions = [match.start() for match in re.finditer("、",string)]
    CommaPositions.reverse()
    for pos in CommaPositions:
        if string[:pos].count("$") % 2 == 1:
            string = string[:pos].rstrip() + "$、$" + string[(pos+1):].lstrip()
    return string

def SplitMathComma(string): #判断数学环境中的","是否需要用$ $分离, 如果需要则执行分离
    UnsplittedPairs = [(r"[\(\[]",r"[\)\]]"),(r"\{",r"\}"),(r"\\begin\{cases\}",r"\\end\{cases\}")]
    lmatter,rmatter = (string,"")
    while "," in lmatter:
        pos = lmatter.rfind(",")
        if lmatter[:pos].count("$") % 2 == 1:
            stringtemp = lmatter+rmatter
            start = stringtemp[:pos].rfind("$")
            end = stringtemp[pos:].find("$")+pos #寻找,两侧的数学环境 stringtemp[start:end+1]
            locallmatter = stringtemp[start+1:pos]
            localrmatter = stringtemp[pos+1:end]
            tosplit = True
            for p1,p2 in UnsplittedPairs:
                p1l = SubstringOccurence(p1,locallmatter)
                p1r = SubstringOccurence(p1,localrmatter)
                p2l = SubstringOccurence(p2,locallmatter)
                p2r = SubstringOccurence(p2,localrmatter)
                if len(p1l)>0 and len(p2l) == 0:
                    tosplit = False
                if len(p2r)>0 and len(p1r) == 0:
                    tosplit = False
                if len(p1l)*len(p2l)>0:
                    if p1l[-1]>p2l[-1]:
                        tosplit = False
                if len(p1r)*len(p2r)>0:
                    if p1r[0]>p2r[0]:
                        tosplit = False
                if len(SubstringOccurence(r"(?:=|\\ge|\\le|\\ne|\\in|>|<|\\parallel|\\perp)",locallmatter))*len(SubstringOccurence(r"(?:=|\\ge|\\le|\\ne|\\in|>|<|\\parallel|\\perp)",localrmatter)) == 0:
                    tosplit = False
            if tosplit:
                rmatter = ", $"+lmatter[pos+1:].lstrip()+rmatter.lstrip()
                lmatter = lmatter[:pos]+"$"
            else:
                rmatter = lmatter[pos-1:]+rmatter
                lmatter = lmatter[:pos-1]
        else:
            rmatter = lmatter[pos-1:]+rmatter
            lmatter = lmatter[:pos-1]
    return lmatter+rmatter

def MergeMathComma(string): #判断非数学环境中的","是否需要合并在一个数学环境中, 如果需要则执行合并
    UnsplittedPairs = [(r"[\(\[]",r"[\)\]]"),(r"\{",r"\}"),(r"\\begin\{cases\}",r"\\end\{cases\}")]
    lmatter,rmatter = (string,"")
    while not [item for item in re.finditer(r"\$,\s*\$",lmatter)] == []:
        CommaPos = [(item.start(),item.end()) for item in re.finditer(r"\$,\s*\$",lmatter)]
        ThePos = CommaPos[-1]
        tempstring = lmatter+rmatter
        lpos = tempstring[:ThePos[0]].rfind("$")
        rpos = tempstring[ThePos[1]+1:].find("$") + ThePos[1] + 1
        locallmatter = tempstring[lpos+1:ThePos[0]]
        localrmatter = tempstring[ThePos[1]:rpos]
        tomerge = False
        for p1,p2 in UnsplittedPairs:
            if len(SubstringOccurence(p1,locallmatter))-len(SubstringOccurence(p2,locallmatter)) == 1 and len(SubstringOccurence(p2,localrmatter))-len(SubstringOccurence(p1,localrmatter)) == 1:
                tomerge = True
        if tomerge:
            rmatter = ", "+lmatter[ThePos[1]:]+rmatter
            lmatter = lmatter[:ThePos[0]]
        else:
            rmatter = lmatter[ThePos[0]:]+rmatter
            lmatter = lmatter[:ThePos[0]]
    return lmatter+rmatter

def ReverseMatchBrackets(string):#从最后开始寻找第一组匹配的括号, 返回左括号位置与右括号位置组成的数对
    if not ")" in string:
        return -1
    else:
        endpos = string.rfind(")")
        lmatter = string[:endpos]
        startpos = -1
        for i in range(len(lmatter)-1,-1,-1):
            templmatter = lmatter[i:]
            if templmatter.count("(")-templmatter.count(")") == 1:
                startpos = i
                break
        return (startpos,endpos)

def RefineCasesEnv(string): # 美化cases环境
    CasesPos = [(item.start(1),item.end(1)) for item in re.finditer(r"\\begin\{cases\}(.*?)\\end\{cases\}",string, re.DOTALL)] #找到所有的cases环境
    CasesPos.reverse()
    for start,end in CasesPos:
        lmatter = string[:start]
        rmatter = string[end:]
        content = string[start:end]
        lines = [item.strip() for item in content.split(r"\\")]
        newlist = []
        for line in lines:
            newline = line
            if line[-1] == ")":
                lbracket,rbracket = ReverseMatchBrackets(line)
                if re.findall(r"(?:=|<|>|\\ge|\\le|\\in)",line[lbracket:rbracket]) != []:
                    newline = line[:lbracket] + ", " + line[lbracket+1:rbracket] + "," #用逗号代替表示范围的括号
            newline = re.sub(r",",",&",newline)
            newlist.append(newline)
        newcontent = r"\\".join(newlist)
        newcontent = re.sub(r",[\s]*&[\s]*\\",r",\\",newcontent)
        newcontent = re.sub(r",[\s]*&[\s]*$",",",newcontent) #给逗号添加&分栏符, 并去除无效的&
        string = lmatter + newcontent + rmatter
        string = re.sub(r"&[,\s]&","& ",string)
    return string #返回处理后的字符串

def RipTestPaperDesc(string):
    string = re.sub(r"[一二三四五六][、\.\s]+(?:(?:填空)|(?:选择)|(?:解答))题","",string) #去除没有标分数的填空选择解答题描述
    string = re.sub(r"\(本[大]*题满分[^\)]*\d+\s*分\)","",string) #去除带括号的分数描述
    string = re.sub(r"本[大]*题[^\n]*?步骤\.{0,1}","",string) #去除解答题需要写出步骤提示
    string = re.sub(r"\({0,1}本[大]*题共有[^\n]*\d+\s*分\.{0,1}\){0,1}","",string) #去除解答题单题分数提示
    string = re.sub(r"每题有[^\n]*涂黑\.{0,1}","",string) #去除选择题提示
    string = re.sub(r"考生[^\n]*结果\.*","",string) #去除作答提示
    return string

def SubstringOccurence(regex,string): #生成regex在string中出现的所有位置
    poslist = [item.start() for item in re.finditer(regex,string)]
    return poslist

def select_grade_from_pro_dict(prodict,grades):
    if len(grades) == 0:
        return prodict
    else:
        gradelist = []
        for g in grades:
            if not "届" in g:
                gradelist.append(g+"届")
            else:
                gradelist.append(g)
        adict = prodict.copy()
        for id in prodict:
            raw_usages = prodict[id]["usages"].copy()
            new_usages = []
            for u in raw_usages:
                for g in gradelist:
                    if g in u:
                        new_usages.append(u)
            adict[id]["usages"] = new_usages.copy()
        return adict

def GenerateSingleLessonNote2024(id,notesdict,pro_dict,obj_dict,bk_dict,templatepath,outputfilepath,misc,consecutivenumbering = False): #20231215版讲义生成
    notetitle = id + r" \ " + notesdict["notes"][id]["name"]
    structure = notesdict["structures"][id[0].upper()]["structure"]
    note_contents = notesdict["notes"][id]
    output = ""
    sections_list = []
    problems_list = []
    for key in structure:
        if not len(note_contents[key]) == 0:
            sections_list.append(key)
            problems_list.append(",".join(note_contents[key]))
    rawoutput = GenerateSectionBodyString2024(problems=problems_list,sectiontitles=sections_list,pro_dict=pro_dict,obj_dict=obj_dict,bk_dict=bk_dict,misc=misc,consecutivenumbering= consecutivenumbering)
    # rawoutput = GenerateStudentBodyString(problems=problems_list,sectiontitles=sections_list,pro_dict=metadict,consecutivenumbering= consecutivenumbering, answered= answered, spaceflag = True)
    paragraphs = [p for p in rawoutput.split("\\section") if not p.strip() == ""]
    for item in paragraphs:
        sectionkey, content = re.findall(r"\{([\S]*)\}\n([\S\s]*)$",item)[0]
        if not len(paragraphs) == 1:
            output += "\\section{" + structure[sectionkey]["name"] + "}\n\n"
        # if not structure[sectionkey]["spaceflag"] or answered:
        #     content = re.sub(r"\\vspace[\*]?\{[\S]*\}","\n",content)
        output += content + "\n\n"

    latex_raw = ReadTextFile(templatepath)
    latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号

    if sys.platform == "darwin": #非win系统用默认字体
        latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
        latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)

    latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
    SaveTextFile(latex_data,outputfilepath) #保存.tex文件

    if misc["编译单个文件"] == True:
        outputdir,filename = os.path.split(outputfilepath)
        print(f"{filename}编译中...")
        if XeLaTeXCompile(outputdir,filename):
            print("编译成功")
        else:
            print("编译失败")
    return latex_data # 返回有错误的latex源代码

def GenerateSingleLessonNote(id,notesdict,metadict,templatepath,outputfilepath,consecutivenumbering = False, answered = False): #20231215版讲义生成
    notetitle = id + r" \ " + notesdict["notes"][id]["name"]
    structure = notesdict["structures"][id[0].upper()]["structure"]
    note_contents = notesdict["notes"][id]
    output = ""
    sections_list = []
    problems_list = []
    for key in structure:
        if not len(note_contents[key]) == 0:
            sections_list.append(key)
            problems_list.append(",".join(note_contents[key]))
    rawoutput = GenerateStudentBodyString(problems=problems_list,sectiontitles=sections_list,pro_dict=metadict,consecutivenumbering= consecutivenumbering, answered= answered, spaceflag = True)
    paragraphs = [p for p in rawoutput.split("\\section") if not p.strip() == ""]
    for item in paragraphs:
        sectionkey, content = re.findall(r"\{([\S]*)\}\n([\S\s]*)$",item)[0]
        if not len(paragraphs) == 1:
            output += "\\section{" + structure[sectionkey]["name"] + "}\n\n"
        if not structure[sectionkey]["spaceflag"] or answered:
            content = re.sub(r"\\vspace[\*]?\{[\S]*\}","\n",content)
        output += content + "\n\n"

    latex_raw = ReadTextFile(templatepath)
    latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号

    if sys.platform == "darwin": #非win系统用默认字体
        latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
        latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)

    latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
    SaveTextFile(latex_data,outputfilepath) #保存.tex文件

    outputdir,filename = os.path.split(outputfilepath)
    print(f"{filename}编译中...")
    if XeLaTeXCompile(outputdir,filename):
        print("编译成功")
        return latex_data # 返回0
    else:
        print("编译失败")
        return latex_data # 返回有错误的latex源代码


def getUnit(n): # 返回0-9的数字对应的单元名
    unitlist = ["暂无对应","第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元"]
    if 0<=n<=9:
        return unitlist[n]

def getUnitNumber(string):
    unitlist = ["暂无对应","第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元"]
    if string in unitlist[1:]:
        return unitlist.index(string)

def ExtractProblemIDs(paperdict,pro_dict):#从备课组材料的每一张讲义的dict(paperdict)中提取题号
    output = []
    for key in paperdict.keys():
        if type(paperdict[key]) == list:
            suslist = paperdict[key]
            flag = True
            for id in suslist:
                if not id in pro_dict:
                    flag = False
                    break
            if flag:
                for id in suslist:
                    output.append(id)
    return(output)


def ParseZipname(zipfilename): #小闲平台的zip文件中获得试卷编号, 返回试卷编号字符串
    xiaoxianpid = re.findall(r"^[可选_]*(\d*?)_",os.path.split(zipfilename)[1])
    return xiaoxianpid[0]

def FindFile(dir,filename): #在指定目录及子目录下寻找特定文件名的文件, 返回文件所在的路径列表
    pathlist = []
    for path,m,filenames in os.walk(dir):
        if filename in filenames:
            pathlist.append(path)
    return pathlist

def FindPaper(xiaoxianpid, answersheetpath): #根据小闲的试卷编号和答题纸对应json的根目录寻找题库的试卷编号,届别,题号, 返回(题库试卷编号,届别,题号列表), 如果未找到则返回False
    answersheetpathlist = FindFile(answersheetpath,"答题纸对应.json")
    foundpid = False
    for dir in answersheetpathlist:
        filepath = os.path.join(dir,"答题纸对应.json")
        anssheetjson = load_dict(filepath)
        if xiaoxianpid in anssheetjson:
            foundpid = True
            grade = "20"+re.findall(r"\d{2}届",dir)[0]
            pid = anssheetjson[xiaoxianpid]["id"]
            notesjson = load_dict(os.path.join(dir,"校本材料.json"))
            if not "idlist" in anssheetjson[xiaoxianpid]:
                idlist = []
                for part in anssheetjson[xiaoxianpid]["parts"]:
                    idlist += notesjson["notes"][pid][part].copy()
            else:
                idlist = anssheetjson[xiaoxianpid]["idlist"]
            if "marks" in anssheetjson[xiaoxianpid]:
                marks = anssheetjson[xiaoxianpid]["marks"]
            else:
                marks = []
            if "exclude" in anssheetjson[xiaoxianpid]:
                excludejson = anssheetjson[xiaoxianpid]["exclude"]
            else:
                excludejson = {}
            break
    if foundpid:
        return(pid,grade,idlist,marks,excludejson)
    else:
        return False

def CheckPaperType(filepath,filename): #根据filepath(通常是小闲的zip解压出的目录)和filename(通常是"小题分_按学号（数学）.xlsx")检测试卷类型, 未找到该文件则返回False, 找到文件且是日常试卷返回"日常卷", 找到文件且不是日常试卷返回"考试卷"
    statsfilepathlist = FindFile(filepath,filename)
    if statsfilepathlist == []:
        return False
    else:
        dir = statsfilepathlist[0]
        dfcurrent = pd.read_excel(os.path.join(dir,filename))
        if re.findall(r"第\d*步",str(dfcurrent.loc[1,:])) == []:
            return "日常卷"
        else:
            return "考试卷"

def generateColIndexandMarks(filepath,statsfilename,paperinfo): #根据filepath(是一个有statsfilename的文件夹列表)中第一个路径中的数据文件, statsfilename数据文件名, 及paperinfo(FindPaper返回的结果)寻找excel文件中有效的列的位置和相应的满分分数
    dir = filepath[0]
    dfcurrent = pd.read_excel(os.path.join(dir,statsfilename))
    validcols = []
    for i in range(len(dfcurrent.columns)):
        colname = str(dfcurrent.iloc[1,i])
        if ("单选" in colname or "填空" in colname or "主观" in colname or "步" in colname) and re.findall("[ABCD]",colname) == []:
            validcols.append(i)
    for col in range(len(validcols)-1,-1,-1):
        colname = str(dfcurrent.iloc[1,validcols[col]])
        if "主观" in colname:
            colname_main = re.findall(r"^([\d\.]*)[\($]",colname[2:])[0]
            t = [dfcurrent.iloc[1,c] for c in validcols[col+1:]]
            t = str(t)
            if colname_main in t:
                validcols.pop(col)
    if paperinfo[3] == []:
        marks = [1] * len(validcols)
    else:
        marks = paperinfo[3]
    if len(marks) == len(validcols):
        return (validcols,marks)
    else:
        return False


def CheckValidity(classpath,gradename,threshold): #根据文件夹classpath, 年级名gradename和提交比例threshold, 检测提交人数是否不小于threshold, 返回(班级名, 提交是否有效)
    classname_raw = re.findall(r"(高[一二三])(\d*?)班",classpath)[0]
    classname = gradename + classname_raw[0] + classname_raw[1].zfill(2) + "班"
    df = pd.read_excel(os.path.join(os.path.split(classpath)[0],"学科总体分析.xlsx"))
    totalstudents = df.loc[2,df.columns[1]]
    validstudents = df.loc[2,df.columns[2]]
    classvalidflag = False
    if threshold * totalstudents < validstudents:
        print(f"{classname} 有效, 共 {totalstudents} 人, 提交 {validstudents} 人")
        classvalidflag = True
    else:
        print(f"!!! {classname} 无效, 共 {totalstudents} 人, 提交 {validstudents} 人")
    return (classname,classvalidflag)

def generateIDtoUsageCorrespondence(idlist,validcols,names): #根据idlist(题库ID列表), validcols(有效列位置列表), names(题目名称列表)自动生成一个字典, 键值为题库ID, 内容为该题对应的列位置列表
    corr_dict = {}
    for i in range(len(idlist)):
        ind = i+1
        collist = []
        for j in range(len(validcols)):
            n = names[j]
            if not "步" in n:
                name = re.findall(r"^([^\(]*)",n)[0]
            else:
                name = re.findall(r"^([^第]*)",n)[0]
            if ind == getindex(name):
                collist.append(j)
        corr_dict[idlist[i]] = collist
    return corr_dict

def CalculateUsages(statsfilepathlist,statsfilename,gradename,threshold,marks,correspondence_dict,validcols,date,exclude = {}): #根据统计数据所在的路径,文件名,年级,阈值,分数列表和题号列数(0-len(validcols))对应字典,以及原excel文件中的有效列位置validcols, 日期date, 生成usages的metadata.txt文件的内容, 如果有正确率大于1的则返回False
    output = "usages\n\n\n"
    validflag = True
    marks = [int(mark) for mark in marks]
    for dir in statsfilepathlist:
        classname, valid = CheckValidity(dir,gradename,threshold)
        if valid:
            dfcurrent = pd.read_excel(os.path.join(dir,statsfilename))
            means = dfcurrent.iloc[2:-2,validcols].mean()/marks
            if max(means)>1:
                print("满分数据有误!!!")
                validflag = False
            else:
                means_out = [f"{t:.3f}" for t in means]
                for id in correspondence_dict:
                    cols = correspondence_dict[id]
                    if not len(cols) == 0 and not (id in exclude and classname[-3:] in generate_classid(exclude[id])):
                        diffs = "\t".join([means_out[u] for u in cols])
                        usages = f"{date}\t{classname}\t{diffs}"
                        output += f"{id}\n{usages}\n\n\n"
    if validflag:
        return output
    else:
        return False


def getindex(string,pos = 2):
    para = string.split(".")
    return int(para[pos-1])

def generateListOfIDandContent(string): #根据标准的讲义LaTeX源码字符串(可能含答案)生成一个list, 每一项是一个(id,id后内容)的tuple
    return re.findall(r"\((\d{6})\)([\s\S]*?)(?:(?:\\item)|(?:\\end\{enumerate\}))",string)

def CountEffectiveBraces(string): #在string中统计有效的(LaTeX的\{和\}不算作有效)的大括号的个数, 返回(左大括号个数, 右大括号个数)
    string_ref = re.sub(r"\\[\{\}]","  ",string)
    return (string_ref.count("{"),string_ref.count("}"))

def generateAnswerTex(content,anspreamble = "答案: \\textcolor{red}{"): #在从anspreamble开始的字符串后找到答案字符串(anspreamble中的字符不算)
    startpos = content.index(anspreamble) + len(anspreamble) - 1
    endpos = startpos + 1
    l,r = CountEffectiveBraces(content[startpos:endpos])
    while not l == r:
        endpos += 1
        l,r = CountEffectiveBraces(content[startpos:endpos])
    return content[startpos+1:endpos-1].strip()

def generateAnswerList(string,anspreamble = "答案: \\textcolor{red}{"): #从LaTeX源代码string中分析题号与对应的答案
    alist = generateListOfIDandContent(string)
    anslist = []
    for a in alist:
        id,content = a
        if anspreamble in content:
            anslist.append((id,generateAnswerTex(content,anspreamble)))
    return anslist


def unUnitted(adict): #返回adict中未赋单元的id列表
    return [id for id in adict if not "单元" in "".join(adict[id]["tags"]) and not "暂无" in "".join(adict[id]["tags"])]


def AutoAssignTagNotoLaTeX(newlist,adict): #根据题库中已有的单元挂钩和字符串相似比对, 给newlist中的每一个ID在可能的情况下自动适配单元号码, 可能有误, 需人工审核, 返回(LaTeX代码的body(从\begin{enumerate}到\end{enumerate}的部分),题号和单元号码的对应)组成的二元tuple
    output = "\\begin{enumerate}\n\n"
    tagrecord = ""
    treateddict = treat_dict(adict)
    for id in newlist:
        samelist = adict[id]["same"]
        relatedlist = adict[id]["related"]
        if not samelist == []:
            unittags = [i for i in adict[samelist[0]]["tags"] if "单元" in i]
        elif not relatedlist == []:
            unittags = [i for i in adict[relatedlist[0]]["tags"] if "单元" in i]
        else:
            simgroup = stringmaxsim(treateddict[id]["content"],treateddict,10)
            unittags_raw = {}
            for g in simgroup:
                rid,sim = g
                if not rid == id and sim > 0.75:
                    for t in [i for i in adict[rid]["tags"] if "单元" in i]:
                        if t in unittags_raw:
                            unittags_raw[t] += 1
                        else:
                            unittags_raw[t] = 1
            if len(unittags_raw) == 0:
                unittags = []
            else:
                unittags = [max(unittags_raw, key = lambda x: unittags_raw[x])]
        tagnumbers = ""
        for u in unittags:
            tagnumbers += str(getUnitNumber(u))
        output += f"\\item ({id})[{tagnumbers}] {adict[id]['content']}\n\n"
        tagrecord += f"{id}\t{tagnumbers}\n"
    output += "\\end{enumerate}\n\n"
    return (output,tagrecord)


def UnitRectoMetadataText(data): #把data中的单元对应变为metadata.txt中的单元对应文本
    lines = [l for l in data.split("\n") if l.strip() != ""]
    output = "tags\n\n"
    for l in lines:
        id,unitno = l.split("\t")
        units = ""
        if not len(unitno.strip()) == 0:
            for n in unitno:
                units += f"{getUnit(int(n))}\n"
            output += f"{id.zfill(6)}\n"
            output += f"{units}\n\n"
    return output

def gitdiff(commitid1,commitid2,fileinrepo="题库0.3/Problems.json",tempfilepath = "临时文件/temp.json"): # 根据两个git 的commit id 比较某一个json文件的不同, 返回一个tuple, 第一项是新增key的变化, 第二项是同一个key下哪些字段变化了
    os.system(f"git show {commitid1}:{fileinrepo} > {tempfilepath}")
    json1 = load_dict("临时文件/temp.json")
    os.system(f"git show {commitid2}:{fileinrepo} > {tempfilepath}")
    json2 = load_dict("临时文件/temp.json")
    os.remove("临时文件/temp.json")

    diff_list = []
    new_or_deleted_list = [id for id in json1.keys() if not id in json2.keys()] + [id for id in json2.keys() if not id in json1.keys()]
    for id in [i for i in json1.keys() if i in json2.keys()]:
        current_diff = []
        for field in json1[id]:
            if not json1[id][field] == json2[id][field]:
                current_diff.append(field)
        if not current_diff == []:
            diff_list.append((id,current_diff.copy()))
    return (new_or_deleted_list,diff_list)


def CheckUsagesValidity(data): #检查使用记录数据是否合理(没有超过1的), 若有则返回1, 合理则返回0
    diffs = re.findall(r"\s(\d+\.\d{3})[\s\n$]",data)
    for d in diffs:
        if float(d)>1:
            print(f"有{d}, 数据有问题")
            return 1
    return 0

def pairingbraces(string_raw,leftbracepos): #根据字符串中的左括号的位置寻找配对的右括号
    types = {"(":"()","[":"[]","{":"{}"}
    braces = "".join([types[i] for i in types])
    string = string_raw
    for i in braces:
        string = string.replace("\\"+i,"  ") #去除LaTeX的括号
    if not string[leftbracepos] in ("(","[","{"):
        print("位置上的字符不是左括号")
        return -1 #位置上不是括号, 返回-1
    else:
        braces = types[string[leftbracepos]]
        lbrace = braces[0]
        rbrace = braces[1]
        count = 1
        currentpos = leftbracepos + 1
        while not currentpos == len(string):
            if string[currentpos] == lbrace:
                count += 1
            elif string[currentpos] == rbrace:
                count -= 1
            if count == 0:
                return currentpos # 返回配对的右括号位置
            currentpos += 1
        return -1 #未找到配对括号

def InMathEnv(string,pos): #判断string的pos位置上的字符是否在math环境中
    string = string.repalce("\$","  ")
    if not string[pos] == "$" and string[:pos].count("$") % 2 == 1:
        return True
    else:
        return False


def MaskingMathEnv(string_raw): #把string_raw中数学环境中的符号都转为$后以新字符串的形式返回
    string_raw = string_raw.replace(r"\$","  ")
    countdollars = 0
    string = ""
    for i in range(len(string_raw)):
        if string_raw[i] == "$":
            countdollars = 1 - countdollars
        if countdollars == 0:
            string += string_raw[i]
        else:
            string += "$"
    return string

def MultiplechoicetoBlankFilling(string_raw): #把多选题的题干和选项转为填空题
    firstchoicepos = re.search(r"(?:(?:four)|(?:two)|(?:one))ch",string_raw).span()[1]
    headstring = string_raw[:firstchoicepos]
    headstring = re.sub(r"\\bracket\{[\d]*\}",r"\\blank{50}",headstring)
    headstring = re.sub(r"\n\\(?:(?:four)|(?:two)|(?:one))ch",r"\\\\\n",headstring)
    string = string_raw[firstchoicepos:]
    choices = []
    while "{" in string:
        endpos = pairingbraces(string,0)
        choices.append(string[1:endpos])
        string = string[endpos+1:]
    output = ""
    for i in range(len(choices)):
        output += f"\\textcircled{{{i+1}}} {choices[i]}; "
    output = headstring + output[:-2]+"."
    return output

def ExtractIDList(filepath): #从文件获取题目序号和ID的对应, 返回一个列表, 列表中的每个tuple都是(文件中的题目序号(int),六位题号(string))
    if filepath[-4:] == ".pdf":
        data = parsePDF(filepath)
        idlist = re.findall(r"(\d+)\.[\s\n]*\((\d{6})\)",data)
    else:
        data = ReadTextFile(filepath)
        enumerateBlocks = [item.strip() for item in re.findall(r"\\begin\{enumerate\}([\s\S]*?)\\end\{enumerate\}",data)]
        for i in range(len(enumerateBlocks)):
            if not enumerateBlocks[i].startswith("\\setcounter"):
                enumerateBlocks[i] = "\\setcounter{enumi}{0}\n\n" + enumerateBlocks[i]
        idlist = []
        for item in enumerateBlocks:
            enumilist = [i.span() for i in re.finditer(r"\\setcounter\{enumi\}\{(\d+)\}",item)]
            enumilist.append((len(item),len(item)))
            for i in range(len(enumilist)-1):
                indstring = item[enumilist[i][0]:enumilist[i][1]]
                ind = int(re.findall(r"\d+",indstring)[0])
                bodylist = re.findall(r"\((\d{6})\)",item[enumilist[i][1]:enumilist[i+1][0]])
                for id in bodylist:
                    ind += 1
                    idlist.append((str(ind),id))
    idlist = [(int(id[0]),id[1]) for id in idlist]
    return idlist

def ExportIDList(filepath): #从.tex或.pdf文件获取题目序号和ID的对应(每一行每个题号分别对应第1,2,3...题, 空缺的题号的ID用999999表示), 返回题号字符串
    idlist = ExtractIDList(filepath)
    output = ""
    currentoutput = []
    for i in range(len(idlist)):
        item = idlist[i]
        if i == 0:
            currentind = item[0]
            currentoutput +=["999999"]*(item[0]-1)
            currentoutput.append(item[1])
        elif item[0] >= currentind + 1:
            currentoutput+=(["999999"]*(item[0]-currentind-1))
            currentind = item[0]
            currentoutput.append(item[1])
        else:
            output += ",".join(currentoutput)+"\n\n"
            currentoutput = []
            currentind = item[0]
            currentoutput +=["999999"]*(item[0]-1)
            currentoutput.append(item[1])
    output += ",".join(currentoutput)+"\n\n"
    return output

def makedir(dirpath): #递归创建文件夹
    dirpath = dirpath.strip().replace("\\","/")
    dirlist_raw = dirpath.split("/")
    if ":" in dirlist_raw[0]:
        dirlist_raw[1] = dirlist_raw[0] + "/" + dirlist_raw[1]
        dirlist = dirlist_raw[1:].copy()
    else:
        dirlist = dirlist_raw.copy()
    for i in range(len(dirlist)):
        if not i == 0:
            dirlist[i] = dirlist[i-1] + "/" + dirlist[i]
    for dpath in dirlist:
        try:
            os.mkdir(dpath)
        except:
            pass
    return dirlist


def TitleIDStringtoTupleList(raw_string): #将类json的标题与题号对应变为(标题,题号字符串)的tuple之后组成list
    corresponding_list = []
    raw_string = RefinePunctuations(raw_string).replace("{","").replace("}","")
    raw_list = [re.sub(r"(?:(?:\s)|(?:,\s*$))","",t) for t in raw_string.split("\n") if not t.strip() == ""]
    for raw_item in raw_list:
        item = raw_item.replace('"','').replace(";","")
        sep = item.find(":")
        title = item[:sep]
        content = item[sep+1:]
        corresponding_list.append((title,content))
    return corresponding_list

def TitleIDTupleListtoString(correspoinding_list): #将(标题,题号字符串)的tuple组成的list转化为更方便阅读的字符串, 前面有编号
    output_string = ""
    count = 0
    for corresp in correspoinding_list:
        count += 1
        output_string += f"{count}. {corresp[0]}: {corresp[1]}\n"
    return output_string

def startfile(filepath): #跨平台打开文件或文件夹
    isfile = os.path.isfile(filepath)
    osname = sys.platform
    if osname == "win32":
        if not isfile:
            ret = subprocess.Popen(["explorer",filepath])
        if isfile:
            ret = subprocess.Popen(["start",filepath], shell=True)
    elif osname == "darwin":
        ret = subprocess.Popen(["open",filepath])
    elif osname == "linux":
        ret = subprocess.Popen(["xdg-open",filepath])
    return ret


def PaintRedAnswers(string,prodict): #将prodict中已有的答案标红色
    output = string
    IDandContentList = generateListOfIDandContent(output)
    for id,content in IDandContentList:
        raw_string = id+")"+content
        # u = output.index(raw_string)
        if "textcolor{blue}" in raw_string:
            new_ans = generateAnswerTex(content,anspreamble="\\textcolor{blue}{")
            raw_ans = prodict[id]["ans"]
            if new_ans == raw_ans:
                sub_string = raw_string.replace("textcolor{blue}","textcolor{red}")
                output = output.replace(raw_string,sub_string)
            elif raw_ans == "" and new_ans != "暂无答案":
                print(f"{id} 已有答案, 请运行提取答案控件以获取新录入的答案")
            elif new_ans != "暂无答案":
                print(f"{id} 新答案 {new_ans} 和原答案 {raw_ans} 有所不同, 请仔细检查")
    return output


def usagelistdifference(lista,listb): #比较两个usage数组的最大差别(绝对值), 数组长度不一致则返回1
    if len(lista) != len(listb):
        return 1
    else:
        maxdiff = 0
        for i in range(len(lista)):
            maxdiff = max(abs(float(lista[i])-float(listb[i])),maxdiff)
        return maxdiff


def RefineExclude(excludejson): #将excludejson的key变为6位题号
    newjson = {}
    for key in excludejson:
        newjson[key.zfill(6)] = excludejson[key]
    return newjson


def ChooseIDsByUsageInterval(startdate,enddate,interval,classregex,prodict): #返回根据条件选出的题号字典及对应小题, 需要留意的记录长度不一的题号, 以及所有使用过的题号
    used_problems = []
    for id in prodict:
        currentproblem = prodict[id]
        currentusages = [parseUsage(u) for u in currentproblem["usages"] if startdate <= parseUsage(u)["date"] <= enddate and re.findall(classregex,parseUsage(u)["classid"])!=[]]
        if not currentusages == []:
            used_problems.append((id,currentusages))
    cautionids = [] #使用记录长度不一的题目id
    chosenproblems = {} #使用记录的平均值在thresholds中的题目id(键值为题号id, 内容为小题号, 内容为空表示没有小题)
    for item in used_problems:
        id,usages = item
        subproblems = [u["subproblems"] for u in usages]
        if max(subproblems) != min(subproblems):
            cautionids.append(id)
            print(f"!!! 题号 {id} 的使用记录中小题数目不全相同, 请检查")
        else:
            for i in range(subproblems[0]):
                difflist = [u["difficulties"][i] for u in usages]
                diffmean = np.mean(difflist)
                if interval[0] <= diffmean <= interval[1]:
                    if subproblems[0] == 1:
                        chosenproblems[id] = []
                    elif not id in chosenproblems:
                        chosenproblems[id] = [i+1]
                    else:
                        chosenproblems[id].append(i+1)
    return (chosenproblems,cautionids,used_problems) #返回根据条件选出的题号字典及对应小题, 需要留意的记录长度不一的题号, 以及所有使用过的题号


if __name__ == "__main__":
    print("数据库工具, import用.")