This repository has been archived on 2024-06-23. You can view files and clone it, but cannot push or open issues or pull requests.
mathdeptv2/工具v3/database_tools_2.py

2802 lines
137 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json,re,os,Levenshtein,fitz,time,sys,subprocess
import pandas as pd
import numpy as np
import pyperclip
import mysql.connector
import tqdm
BuildFullScheme = {
"输出路径": "临时文件",
"教师版": True,
"字段显示设置": {
"题后空间": True,
"课时目标": True,
"题目标签": True,
"答案": True,
"解答与提示": True,
"使用记录": [3,-1],
"使用记录说明": "[a,b]表示显示最好的a个和最差b个, 有-2表示不显示, 无-2但有-1表示全部显示",
"来源": True,
"备注": True,
"届别": []
}
}
db_user = "tikuuser"
db_pwd = "Kjmathds_2024"
db_port = "13306"
db_host = "wwylss.synology.me"
db_database = "tiku"
def get_git_username():
command = "git config --global user.name"
stream = os.popen(command)
username = stream.read().strip()
return username
def connect(hostname,port,username,pwd,db):
mydb = mysql.connector.connect(host = hostname, port = port, user = username, password = pwd, database = db)
return mydb
def GetDate(): #获得当前日期
currentdate = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)
return currentdate #返回当前日期yyyymmdd
def GetTime(): #获得当前时间
t = time.localtime()
currenttime = f"{t.tm_hour:0>2}{t.tm_min:0>2}{t.tm_sec:0>2}"
return currenttime #返回当前时间hhmmss
def datestrtotimestamp(date_str): # 将yyyymmdd的时间转为时间戳timestamp(以分钟为单位的整数)
date_obj = time.strptime(date_str, "%Y%m%d")
timestamp = int(time.mktime(date_obj))
return timestamp
def ReadTextFile(filepath): #读取文本格式的文件
with open(filepath,"r",encoding="u8") as f:
data = f.read()
return data #返回文本格式文件的内容
def SaveTextFile(data,filepath): #写入文本格式的文件
pathname = os.path.dirname(filepath)
if not os.path.exists(pathname):
makedir(pathname)
with open(filepath,"w",encoding="u8") as f:
f.write(data)
return filepath #返回文件名
def AppendTextFile(string,filepath): #在文本文件后添加内容
try:
with open(filepath,"a",encoding = "u8") as f:
f.write(string.strip()+"\n\n")
except:
makedir(os.path.split(filepath)[0])
with open(filepath,"w",encoding = "u8") as f:
f.write(string.strip()+"\n\n")
return filepath #返回文件名
def SortDict(adict): #按字典项顺序排序字典
return dict(sorted(adict.items())) #返回排序后的字典
#读取存储json数据库相关(不限于题号数据库)
def load_dict(filename): #根据filename读取json数据库并转化为python字典
with open(filename,"r",encoding = "u8") as f:
adict = json.loads(f.read())
return adict #返回python字典
def save_dict(adict,filepath): #将adict字典转化为json文件并保存至filename文件中
try:
jsondata = json.dumps(adict,indent=4,ensure_ascii=False)
SaveTextFile(jsondata,filepath)
return filepath # 成功则返回文件路径
except:
print("保存 %s 文件失败"%filepath)
return 1 # 失败则返回1
def pre_treating(string): #删除字符串中对比较无用的字符, 以供比较
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
string = re.sub(r"[\n\t]","",string)
string = re.sub(r"(displaystyle)|(overrightarrow)|(overline)","",string)
string = re.sub(r"[,\.:;?]","",string)
return string #返回处理后的字符串
def findsru(id,alist): #在same_list中寻找与id相同的题号
set1 = set([pair[1] for pair in alist if pair[0] == id])
set2 = set([pair[0] for pair in alist if pair[1] == id])
return sorted(list(set1 | set2))
def findsameinDB(id): #在数据库中寻找与id相同的题号
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
sql = "SELECT SAME_ID FROM same WHERE ID = (%s);"
val = (id,)
mycursor.execute(sql,val)
set1 = set([ret[0] for ret in mycursor.fetchall()])
sql = "SELECT ID FROM same WHERE SAME_ID = (%s);"
mycursor.execute(sql,val)
set2 = set([ret[0] for ret in mycursor.fetchall()])
return sorted(list(set1 | set2))
def findrelatedinDB(id): #在数据库中寻找与id关联的题号
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
sql = "SELECT RELATED_ID FROM related WHERE ID = (%s);"
val = (id,)
mycursor.execute(sql,val)
set1 = set([ret[0] for ret in mycursor.fetchall()])
sql = "SELECT ID FROM related WHERE RELATED_ID = (%s);"
mycursor.execute(sql,val)
set2 = set([ret[0] for ret in mycursor.fetchall()])
return sorted(list(set1 | set2))
def get_unit_tags(id):
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
sql = "SELECT tagname FROM tagcorresp WHERE ID = (%s);"
val = (id,)
mycursor.execute(sql,val)
unittags = [ret[0] for ret in mycursor.fetchall() if "单元" in ret[0]]
mydb.close()
return unittags
def get_problem_content(id):
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
sql = "SELECT content FROM problems WHERE ID = (%s);"
val = (id,)
mycursor.execute(sql,val)
content = mycursor.fetchall()[0][0]
mydb.close()
return content
def generate_same_list():
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
mycursor.execute("SELECT ID,SAME_ID FROM same;")
same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()]
mydb.close()
return same_list.copy()
def generate_related_list():
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
mycursor.execute("SELECT ID,RELATED_ID FROM related;")
related_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()]
mydb.close()
return related_list.copy()
def treat_dict(): #对整个题库字典中的内容部分进行预处理,删除无用字符
treated_dict = {}
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
mycursor.execute("SELECT ID,content FROM problems;")
p_dict = {id:content for id,content in mycursor.fetchall()}
# mycursor.execute("SELECT ID,SAME_ID FROM same;")
# same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()]
for id in p_dict:
treated_dict[id] = {}
treated_dict[id] = pre_treating(p_dict[id])
mydb.close()
return treated_dict #返回处理后的字典, 含内容字段及相同题目字段
def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程度最大的题目(除外列表之外的部分)
maxsim = -1
argmaxsim = "000000"
for id in adict:
if not id in excludelist and not "OBSOLETE" in adict[id]["content"]:
simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"])
if simrate > maxsim:
maxsim = simrate
argmaxsim = id
return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号
def stringmaxsim(string,adict,listlength):
# maxsim = -1
# argmaxsim = "000000"
maxsimlist = []
string = pre_treating(string)
for id in adict:
if not "OBS" in adict[id]:
simrate = Levenshtein.jaro(adict[id],string)
# if simrate > maxsim:
# maxsim = simrate
# argmaxsim = id
if len(maxsimlist) < listlength:
maxsimlist.append((id,simrate))
elif simrate > maxsimlist[-1][1]:
maxsimlist = maxsimlist[:-1]
maxsimlist.append((id,simrate))
maxsimlist = sorted(maxsimlist,key = lambda x: x[1], reverse = True)
return maxsimlist #返回最大关联的listlength个题号及关联系数
def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知题块startingids(冒号和逗号连接的字符串)关联程度超过threshold的题目组, 超过max_size即停止
id_list = generate_number_set(startingids)
output_list = []
treated_dict = treat_dict(prodict)
continue_flag = True
while continue_flag:
appending_id_list = []
for oldid in id_list:
for newid in [id for id in prodict if not id in id_list and not id in output_list and not "OBSOLETE" in treated_dict[id]["content"]]:
simrate = Levenshtein.jaro(treated_dict[oldid]["content"],treated_dict[newid]["content"])
if simrate >= threshold:
appending_id_list.append(newid)
output_list = output_list + id_list.copy()
id_list = appending_id_list.copy()
if len(appending_id_list) == 0 or len(output_list)>max_size:
continue_flag = False
return generate_exp(sorted(output_list)) #返回冒号和逗号连接的题目组字符串
def generate_problem_series(startingid,length,adict): #在adict字典里返回从startingid开始的一系列题号, 每一题都是与上一题的关联程度最大的
excludelist = [startingid]
currentid = startingid
for i in range(length):
maxsim,currentid = detectmaxsim(currentid,excludelist,adict)
excludelist.append(currentid)
return ",".join(excludelist) #返回按顺序的题号列表
def generate_number_set(string,*thedict): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"]
#可变参数*dict如果存在, 将只生成dict的keys中包含的题号列表
string = re.sub(r"[\n\s]","",string).strip()
while not string[-1] in "0123456789":
string = string[:-1]
string = RefinePunctuations(string)
string_list = string.split(",")
numbers_list = []
for s in string_list:
if not ":" in s:
numbers_list.append(s.zfill(6))
else:
start,end = s.split(":")
for ind in range(int(start),int(end)+1):
numbers_list.append(str(ind).zfill(6))
if len(thedict) == 0:
return numbers_list #返回六位题号列表
elif len(thedict) == 1 and type(thedict[0]) == dict:
numbers_list = [id for id in numbers_list if id in thedict[0]]
return numbers_list #返回字典中存在的六位题号列表
else:
return "输入参数有误"
def generate_classid(string): #返回班级列表
string = re.sub(r"[\n\s]","",string).strip()
while not string[-1] in "0123456789":
string = string[:-1]
string = RefinePunctuations(string)
string_list = string.split(",")
numbers_list = []
for s in string_list:
if not ":" in s:
numbers_list.append(s.zfill(2)+"")
else:
start,end = s.split(":")
for ind in range(int(start),int(end)+1):
numbers_list.append(str(ind).zfill(2)+"")
return numbers_list #返回2位班级号列表
def generate_id_set(string,*thedict): #除了生成题号列表外, 还能根据首字母生成基础知识编号列表或课时目标列表
string = RefinePunctuations(string)
if re.findall(r"[BXK]",string) == []:
if thedict == ():
return generate_number_set(string)
else:
return generate_number_set(string,thedict)
else:
if string[0] == "B":
string = re.sub("B","",string)
return ["B"+i[-5:] for i in generate_number_set(string)]
elif string[0] == "K":
suffix = string.strip()[-1]
string = re.sub("[KBX]","",string)
return ["K"+i.zfill(7)+suffix for i in generate_number_set(string)]
def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号"
if not len(id_list) == 0:
exp_list = []
start = id_list[0]
current = start
end = start
for id in id_list[1:]:
# print(id,current)
if int(id)-1 == int(current):
current = id
end = id
else:
if not start == end:
exp_list.append('"'+start+":"+end+'"')
else:
exp_list.append('"'+start+'"')
start = id
current = id
end = id
if not start == end:
exp_list.append('"'+start+":"+end+'"')
else:
exp_list.append('"'+start+'"')
exp_str = ",".join(exp_list).replace('"',"")
else:
exp_str = "无有效题号"
return exp_str #返回含有":"或","的题号字符串
def parsePDF(filePath): #提取pdf文件中的字符
with fitz.open(filePath) as doc:
text = ""
for page in doc.pages():
text += page.get_text() + "\n"
return text
def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串
if filePath[-4:] == ".txt" or filePath[-4:] == ".tex":
with open(filePath,"r",encoding = "u8") as f:
data = f.read()
elif filePath[-4:] == ".pdf":
data = parsePDF(filePath)
else:
return "格式不正确"
ids = re.findall(r"\((\d{6})\)",data)
return generate_exp(ids)
def spareIDs(): #返回空闲题号, 已更新为适合mariadb的版本
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
mycursor.execute("SELECT ID FROM problems;")
idlist = [ret[0] for ret in mycursor.fetchall()]
used_str = generate_exp(idlist)
used_list = used_str.split(",")
output = ""
for group in range(len(used_list)-1):
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n"
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999")
mydb.close()
return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间
def NextSpareID(num): #返回adict中下一个空闲的题号
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
mycursor.execute("SELECT ID FROM problems;")
idlist = [ret[0] for ret in mycursor.fetchall()]
num = int(num)
while str(num).zfill(6) in idlist:
num += 1
mydb.close()
return num
def NextSpareIDBlock(num): #返回adict中下一个空闲的题号块
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
mycursor.execute("SELECT ID FROM problems;")
idlist = [ret[0] for ret in mycursor.fetchall()]
start = NextSpareID(num)
larger_ID_list = [int(id) for id in idlist if int(id)>start]
if larger_ID_list == []:
end = 999999
else:
end = min(larger_ID_list) - 1
mydb.close()
return str(start)+":"+str(end)
def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀
try:
data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0]
except:
pass
data = re.sub(r"\n{2,}","\n",data)
data = re.sub(r"\\item",r"\\enditem\\item",data)
data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息
problempositions = []
for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data):
problempositions.append(item.regs[1]) #确定题目内容所在位置
problem_list = []
for pos in problempositions:
content = data[pos[0]:pos[1]].strip()
content = re.sub(r"\n\%[\s\S]*$","",content) #题目内容
content = re.sub(r"\\\\$","",content) # 删去题目最后一行处可能存在的\\
subdata = data[:pos[0]] #开始寻找出处中缀
suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata)
if len(suflist) == 0:
suffix = ""
else:
suffix = suflist[-1].replace("%","").strip()
problem_list.append((content,suffix))
return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组
def GenerateProblemListFromString2024(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀, item后面的方括号放与题库的交互信息(如[rep3214]表示不添加, 用003214代替; [s2521;r354;u10021,20024]表示和2521相同, 和354相关, 和10021,20024无关), 返回三元组(题目内容, 题目来源前缀, 交互信息列表)的字典
try:
data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0]
except:
pass
data = re.sub(r"\n{2,}","\n",data)
data = re.sub(r"\\definecolor[^\n]*\n","\n",data)
data = re.sub(r"\\begin\{tcolorbox\}[\s\S]*?\\end\{tcolorbox\}","\n",data)
data = re.sub(r"\\item",r"\\enditem\\item",data)
data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息
problempositions = []
for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data):
problempositions.append(item.regs[1]) #确定题目内容所在位置
problem_list = []
for pos in problempositions:
content_raw = data[pos[0]:pos[1]].strip()
content_raw = re.sub(r"\n\%[\s\S]*$","",content_raw) #题目内容
content_raw = re.sub(r"\\\\$","",content_raw) # 删去题目最后一行处可能存在的\\
content_raw = content_raw.strip() # 删去前后多余的空格
meta = {}
if not content_raw[0] == "[": # 根据方括号内的内容生成交互信息, 这是无方括号的内容, 无meta
content = content_raw
else:
same_id_list = []
related_id_list = []
unrelated_id_list = []
content = re.sub(r"^\[[A-Za-z\d,\s]*?\]","",content_raw).strip()
metaraw = re.findall(r"\[.*\]",content_raw)[0]
metaraw = re.sub(r"[\[\]]","",metaraw)
metalist = metaraw.split(",")
for metaitem in metalist:
metaitem = metaitem.upper()
if metaitem.startswith("REP"):
meta = {"rep":metaitem[3:].zfill(6)}
elif metaitem.startswith("S"):
same_id_list.append(metaitem[1:].zfill(6))
elif metaitem.startswith("R"):
related_id_list.append(metaitem[1:].zfill(6))
elif metaitem.startswith("U"):
unrelated_id_list.append(metaitem[1:].zfill(6))
if not "rep" in meta:
meta = {"same":same_id_list,"related":related_id_list,"unrelated":unrelated_id_list}
subdata = data[:pos[0]] #开始寻找出处中缀
suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata)
if len(suflist) == 0:
suffix = ""
else:
suffix = suflist[-1].replace("%","").strip()
problem_list.append((content,suffix,meta))
return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 和 交互信息字典 组成的元组
def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目
NewProblem = problem.copy()
for field in NewProblem:
if type(NewProblem[field]) == str:
NewProblem[field] = ""
elif type(NewProblem[field]) == list:
NewProblem[field] = []
elif type(NewProblem[field]) == int or type(NewProblem[field]) == float:
NewProblem[field] = -1
return NewProblem #返回一个空题目的字典, ID和内容待赋值
def CreateNewProblem(id,content,origin,dict,editor): # 构建一道新题目的字典
NewProblem = CreateEmptyProblem(dict["000001"])
NewProblem["id"] = str(id).zfill(6)
NewProblem["content"] = content
NewProblem["origin"] = origin
NewProblem["edit"] = [editor]
return NewProblem # 返回一道新题目的字典, 已赋新的ID, 内容, 来源和编辑者
def AddRelatedProblemToDB(id,content,oid,editor):
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
id = str(id).zfill(6)
content = content.strip()
if "blank" in content:
genre = "填空题"
space = ""
elif "bracket" in content:
genre = "选择题"
space = ""
else:
genre = "解答题"
space = "4em"
origin_json = {"来源": "改编题目", "前序": str(oid).zfill(6)}
origin = json.dumps(origin_json,ensure_ascii=False)
sql = "INSERT INTO problems (ID,content,genre,origin,space) VALUE (%s,%s,%s,%s,%s);"
val = (id,content,genre,origin,space)
mycursor.execute(sql,val)
sql = "INSERT INTO edit_history (ID,date,editor) VALUE (%s,%s,%s);"
val = (id,GetDate(),editor.strip())
mycursor.execute(sql,val)
sql = "INSERT INTO related (ID,RELATED_ID) VALUE (%s,%s);"
val = (id, oid)
if id > oid:
val = (oid, id)
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,action,id,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),"加关联题",id,content)
mycursor.execute(sql,val)
mydb.commit()
mydb.close()
return id
def AddProblemstoDict2024(startingid,raworigin,problems,editor,indexed): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典, 返回题号列表(包括用老题号替代的题目)
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
idlist = []
id = int(startingid)
currentsuffix = problems[0][1]
problemindex = 0
for p_and_suffix_and_meta in problems:
p, suffix, meta = p_and_suffix_and_meta
pid = str(id).zfill(6)
if suffix == currentsuffix:
problemindex += 1
else:
problemindex = 1
currentsuffix = suffix
if indexed:
origin = {"来源": raworigin + suffix, "题号": problemindex}
else:
origin = {"来源": raworigin + suffix}
if not "rep" in meta:
if "blank" in p:
genre = "填空题"
space = ""
elif "bracket" in p:
genre = "选择题"
space = ""
else:
genre = "解答题"
space = "4em"
sql = "INSERT INTO problems (ID,content,genre,origin,space) VALUE (%s,%s,%s,%s,%s);"
val = (pid,p.strip(),genre,json.dumps(origin,ensure_ascii=False),space)
mycursor.execute(sql,val)
sql = "INSERT INTO edit_history (ID,date,editor) VALUE (%s,%s,%s);"
val = (pid,GetDate(),editor.strip())
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,action,id,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),"新增题目",pid,p.strip())
mycursor.execute(sql,val)
if "same" in meta:
sql = "INSERT INTO same (ID,SAME_ID) VALUE (%s,%s);"
for sid in meta["same"]:
val = (pid, sid)
if pid > sid:
val = (sid, pid)
mycursor.execute(sql,val)
if "related" in meta:
sql = "INSERT INTO related (ID,RELATED_ID) VALUE (%s,%s);"
for rid in meta["related"]:
val = (pid, rid)
if pid > rid:
val = (rid, pid)
mycursor.execute(sql,val)
if "unrelated" in meta:
sql = "INSERT INTO unrelated (ID,UNRELATED_ID) VALUE (%s,%s);"
for uid in meta["unrelated"]:
val = (pid, uid)
if pid > uid:
val = (uid, pid)
mycursor.execute(sql,val)
print(f"已收录题号: {pid}, 题目类型: {genre}, 题目来源: {origin['来源'] + ('试题'+str(origin['题号'])) if '题号' in origin else ''}, 题目内容: {p.strip()}")
id += 1
idlist.append(pid)
else:
idlist.append(meta["rep"])
print(f"该题 {idlist[-1]} {p} 已在题库中, 不必收录.")
mydb.commit()
mydb.close()
return idlist
def CreateIDLinks(old_id_list,new_id_list): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项
if len(old_id_list)>len(new_id_list):
return "新ID个数不足."
else:
id_links = []
for i in range(len(old_id_list)):
id_links.append((old_id_list[i],new_id_list[i]))
return id_links # 返回id联系, 每个元组表示一对id, 前者是旧id, 后者是新id
def CreateRelatedProblems(links): # 根据links关联生成待编辑的新题目字典, 等待编辑修改
output = "\\begin{enumerate}\n"
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
sql = "SELECT content FROM problems WHERE ID = %s;"
for id,rid in links:
val = (id,)
mycursor.execute(sql,val)
ret = mycursor.fetchone()[0]
output += f"\\item ({id}->{rid}) {ret}\n\n"
output = f"{output}\n\\end{{enumerate}}\n"
mydb.close()
return output #正常返回0
def ImportRelatedProblems(new_json,main_json): # 导入编辑过的关联题目json文件到主数据库
pro_dict = load_dict(main_json)
new_dict = load_dict(new_json)
edited = []
for id in new_dict:
new_id = new_dict[id]["id"].replace("待替换","") #新题号后需要跟"待替换"字样
if new_id in pro_dict:
print("题号有重复")
return 1 #异常返回1
else:
edited.append(new_id)
pro_dict[new_id] = new_dict[id].copy()
pro_dict[new_id]["id"] = new_id
pro_dict[id]["related"] += [new_id]
pro_dict[new_id]["related"] += [id]
p = pro_dict[new_id]["content"]
if "blank" in p:
pro_dict[new_id]["genre"] = "填空题"
pro_dict[new_id]["genre"] = ""
elif "bracket" in p:
pro_dict[new_id]["genre"] = "选择题"
pro_dict[new_id]["genre"] = ""
else:
pro_dict[new_id]["genre"] = "解答题"
pro_dict[new_id]["genre"] = "4em"
print("导入关联题目 %s -> %s 信息成功."%(id,new_id))
save_dict(SortDict(pro_dict),main_json) #保存至目标pro_dict文件
return generate_exp(edited) #正常返回新题号list
def strip_suffix(originalString, suf_words_list): # 字符串去除指定后缀
for sw in suf_words_list:
output = re.sub(sw+r"[\S]*$","",originalString)
return(output) # 返回原字符串中截去suf_words_list及之后字符的部分
def get_striped_origin(pro_dict,id,suf_words_list): # 题目来源去除指定后缀
return strip_suffix(pro_dict[id]["origin"],suf_words_list) # 返回去除指定后缀后的题目来源
def SeperateFirstLine(string): # 切割一个含有换行的字符串
thelist = string.split("\n")
firstline = thelist[0].strip()
contents = "\n".join([lines.strip() for lines in thelist[1:]])
return (firstline,contents) # 返回第一行, 其余行形成的二元组, 每一行前后的空格都被删去
def ObtainDatatoModify(metadatafilepath,fields_dict,idlist): #从metadata文件中获取需要修改的综合信息, metadata文件用双回车区分项, 单行的项表示此后的字段, 不小于两行的项中第一行是题目id, 第二行起是要修改的内容.
#fieldsdictpath是字段信息数据库文件路径
data = ReadTextFile(metadatafilepath)
datalines = data.split("\n")
data = ""
for l in datalines:
if l.strip() in fields_dict:
data += l + "\n\n"
else:
data += l + "\n"
data = re.sub(r"\n[\s\n]*\n","\n\n",data.strip()) #去除一些无意义的空格和多余的回车
datalist = data.split("\n\n")
to_modify_list = []
currentfield = "NotAField"
for line in datalist:
if line.strip() in fields_dict:
currentfield = line.strip()
elif not "\n" in line:
currentfield = "NotAField"
else:
id,content = SeperateFirstLine(line)
if str(id).zfill(6) in idlist:
to_modify_list.append((currentfield,str(id).zfill(6),content))
else:
print(f"其中 ID {id} 有误, 已忽略!!")
return to_modify_list #返回一个列表, 每一项是一个三元组, 三项依次为字段, 题号, 要修改的内容
def FloatToInt(string): #从字符串返回浮点数,如果值非常接近整数则返回该整数
f = float(string)
if abs(f-round(f))<0.01:
f = round(f)
return f #返回浮点数或整数
def OverwriteData(prodict,fieldsdict,field_id_and_content): #用覆盖方式修改题目某字段信息
field,id,content = field_id_and_content
fieldType = fieldsdict[field]["FieldType"]
if not id in prodict:
print("题号 %s 并不在数据库中"%id)
else:
if fieldType == "str":
prodict[id][field] = content
elif fieldType == "int" or fieldType == "float":
content = FloatToInt(content)
prodict[id][field] = content
print("已覆盖 %s%s 字段, 内容为 %s"%(id,field,content))
return (id,field,content) #返回三元组: 题号, 字段, 覆盖内容
def AppendData(prodict,fieldsdict,field_id_and_content): #用添加方式修改题目某字段信息
field,id,content = field_id_and_content
fieldType = fieldsdict[field]["FieldType"]
if not id in prodict:
print("题号 %s 并不在数据库中"%id)
else:
if fieldType == "str":
if content.strip() in prodict[id][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,content))
elif prodict[id][field].strip() == "":
prodict[id][field] = content.strip()
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,content))
else:
prodict[id][field] = (prodict[id][field].strip() + "\\\\\n" + content).strip()
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,content))
elif fieldType == "list":
lines = [line.strip() for line in content.split("\n")]
for line in lines:
if line in prodict[id][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,line))
else:
prodict[id][field] = prodict[id][field].copy() + [line]
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,line))
return (id,field,content) #返回三元组: 题号, 字段, 内容
def AppendMutualData(prodict,field_id_and_content): #添加两个id之间的same, related, unrelated关联
field,id,content = field_id_and_content
lines = [str(line).zfill(6) for line in content.split("\n")]
for id2 in lines:
if not id in prodict:
print("题号 %s 并不在数据库中"%id)
elif not id2 in prodict:
print("题号 %s 并不在数据库中"%id2)
else:
if id2 in prodict[id][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,id2))
else:
prodict[id][field] = prodict[id][field] + [id2]
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,id2))
if id in prodict[id2][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id2,field,id))
else:
prodict[id2][field] = prodict[id2][field] + [id]
print("已于 %s%s 字段执行添加, 内容为 %s"%(id2,field,id))
return (id,field,content) #返回三元组: 题号, 字段, 内容
def AppendObjData(prodict,objdict,field_id_and_content): #添加目标编号数据
field,id,content = field_id_and_content
if not id in prodict:
print("题号 %s 并不在数据库中"%id)
else:
lines = [line.strip() for line in content.split("\n")]
for objid in lines:
if not objid in objdict:
print("目标编号 %s 并不在数据库中"%objid)
elif objid in prodict[id][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,objid))
else:
prodict[id][field] = prodict[id][field] + [objid]
print("已于 %s%s 字段执行添加, 编号为 %s, 内容为 %s"%(id,field,objid,objdict[objid]["content"]))
return (id,field,content) #返回三元组: 题号, 字段, 内容
def AppendUsageData(prodict,field_id_and_content): #添加使用记录数据
field,id,content = field_id_and_content
lines = [re.sub(r"\s+",r"\t",line.strip()) for line in content.strip().split("\n")]
pending_list = []
for line in lines:
if not "FORCE" in line.upper():
time_stripped = re.sub(r"^\d{4,}\t","",line) #读取去除时间的数据
if time_stripped in "\n".join(prodict[id][field]):
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,line))
else:
classid = [info for info in line.split("\t") if len(re.findall(r"[班高一二三]",info))>0][0] # 读取班级号
if classid in "\n".join(prodict[id][field]):
print("班级 %s 在题号为 %s 的题目处已有使用记录, 在pending list中记录"%(classid,id))
oldinfo = [usage for usage in prodict[id][field] if classid in usage]
pending_list = pending_list + [(id,line,oldinfo)]
else:
prodict[id][field].append(line)
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,line))
else:
line = re.sub(r"\s+",r"\t",re.sub(r"FORCE","",line.upper())).strip()
prodict[id][field].append(line)
print("已于 %s%s 字段执行强制添加, 内容为 %s"%(id,field,line))
output = ""
for item in pending_list:
id,new,oldlist = item
output += id + "\n"
output += new+"\tFORCE\n"
output += "\n".join(oldlist)+"\n\n"
return (field,id,content,output) #返回四元组: 题号, 字段, 内容, 待确定是否要添加的字符串不含FORCE字样的行为旧结果含FORCE字样的行为新结果FORCE是运行后强制添加
def AppendUsageData2024(prodict,field_id_and_content):
field,id,content = field_id_and_content
lines = [re.sub(r"\s+",r"\t",line.strip()) for line in content.strip().split("\n")]
pending_list = []
for line in lines:
if not "FORCE" in line.upper():
time_stripped = re.sub(r"^\d{4,}\t","",line) #读取去除时间的数据
if time_stripped in "\n".join(prodict[id][field]):
print(f"******题号 {id}{field} 字段, 内容 {line} 已存在")
else:
usage = parseUsage(line)
oldusages = prodict[id]["usages"]
importflag = True
for u in oldusages:
oldusage = parseUsage(u)
if usage["classid"] == oldusage["classid"] and abs(datestrtotimestamp(usage["date"])-datestrtotimestamp(oldusage["date"])) < 7*86400 and usagelistdifference(usage["difficulties"],oldusage["difficulties"])<0.05:
print(f"######班级 {usage['classid']} 在题号为 {id} 的题目处已有非常类似的使用记录, 不作记录")
importflag = False
break
if importflag:
for u in oldusages:
oldusage = parseUsage(u)
if usage["classid"] == oldusage["classid"]:
print(f"!!!!!!班级 {usage['classid']} 在题号为 {id} 的题目处已有使用记录, 在pending list中记录")
oldinfo = [v for v in prodict[id][field] if usage['classid'] in v]
pending_list = pending_list + [(id,line,oldinfo)]
importflag = False
break
if importflag:
prodict[id][field].append(line)
print(f"已于 {id}{field} 字段执行添加, 内容为 {line}")
else:
line = re.sub(r"\s+",r"\t",re.sub(r"FORCE","",line.upper())).strip()
prodict[id][field].append(line)
print(f"&&&&&&&&&&&& 已于 {id}{field} 字段执行强制添加, 内容为 {line} &&&&&&")
output = ""
for item in pending_list:
id,new,oldlist = item
output += id + "\n"
output += new+"\tFORCE\n"
output += "\n".join(oldlist)+"\n\n"
return (field,id,content,output) #返回四元组: 题号, 字段, 内容, 待确定是否要添加的字符串不含FORCE字样的行为旧结果含FORCE字样的行为新结果FORCE是运行后强制添加
def generateUsageList(db,id):
mycursor = db.cursor()
mycursor.execute("SELECT date,classname,diff FROM usages WHERE ID = %s;",(id,))
tuplelist = mycursor.fetchall()
return tuplelist
def ImportMetadata(db,metadatafilepath): #metadata自动修改, 根据字段自适应修改, 参数为题库字典, 目标字典, 字段字典, metadata文本文件路径, 待确定是否替换的内容的存放路径
fieldsdict = {"objs":"objappend",
"obj":"objappend", # 别名
"tags":"tagappend",
"tag":"tagappend", #别名
"ans":"overwrite",
"solution":"overwrite",
"usages":"usageappend",
"usage":"usageappend",
"same":"mutualappend",
"related":"mutualappend",
"unrelated":"mutualappend",
"space":"overwrite",
"remark":"remarkappend",
"remarks":"remarkappend"
}
mycursor = db.cursor()
startdate = GetDate()
starttime = GetTime()
pendingusagelist = []
mycursor.execute("SELECT ID FROM problems;")
id_list_in_DB = [ret[0] for ret in mycursor.fetchall()]
mycursor.execute("SELECT objid FROM lessonobj;")
obj_list_in_DB = [ret[0] for ret in mycursor.fetchall()]
data_to_modify = ObtainDatatoModify(metadatafilepath,fieldsdict,id_list_in_DB)
print(data_to_modify)
for item in tqdm.tqdm(data_to_modify):
field = item[0]
if field == "NotAField":
print(f"项目 {item[1]} {item[2]} 字段名有误, 未对数据库作更改, 请检查!!!")
db.rollback()
db.close()
return 1
else:
method = fieldsdict[field]
if method == "overwrite":
overwriteinDB(db,field,item[1],item[2])
elif method == "mutualappend":
mutualappendinDB(db,field,item[1],item[2],id_list_in_DB)
elif method == "objappend":
objappendinDB(db,item[1],item[2],obj_list_in_DB+["KNONE"])
elif method == "tagappend":
tagappendinDB(db,item[1],item[2])
elif method == "remarkappend":
remarkappendinDB(db,item[1],item[2])
elif method == "usageappend":
pending_list = usageappendinDB(db,item[1],item[2])
pendingusagelist += pending_list.copy()
for pending_item in pendingusagelist:
print(f"新增记录: {pending_item[0]}")
print("原有记录:")
for u in pending_item[1]:
print(u)
insertflag = input("是否导入该数据?(Y/[N])").upper()
if insertflag == "Y":
forceusageappendinDB(db,pending_item[0])
sql = f"SELECT id FROM logs WHERE DATE > %s OR DATE = %s AND TIME >= %s;"
val = (startdate,startdate,starttime)
mycursor.execute(sql,val)
changed_id_list = [item[0] for item in mycursor.fetchall()]
return changed_id_list # 已在数据库中修改, 之后需要将数据库写入一次, 返回1表示字段名有误, 返回其他表示成功进行了修改
def overwriteinDB(mydb,field,id,content_string): #覆盖ans,solution,space字段的内容并在logs中记录
mycursor = mydb.cursor()
sql = f"SELECT {field} FROM problems WHERE ID = %s;"
val = (id,)
mycursor.execute(sql,val)
original_string = mycursor.fetchall()[0][0]
sql = f"UPDATE problems SET {field} = %s WHERE ID = %s;"
val = (content_string,id)
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,ID,action,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),id,f"更改 {field} 数据",f"{original_string} -> {content_string}")
mycursor.execute(sql,val)
def mutualappendinDB(mydb,field,id,content_string,idlist): #新增same,related,unrelated数据
mycursor = mydb.cursor()
content_list = [item.strip().zfill(6) for item in content_string.split("\n") if not item.strip() == ""]
for id2 in content_list:
smallid = min(id,id2)
bigid = max(id,id2)
if smallid in idlist and bigid in idlist:
sql = f"SELECT ID, {field.upper()}_ID from {field} WHERE ID = %s AND {field.upper()}_ID = %s;"
val = (smallid,bigid)
mycursor.execute(sql,val)
ret_list = mycursor.fetchall()
if len(ret_list) == 0:
sql = f"INSERT INTO {field} SET ID = %s, {field.upper()}_ID = %s;"
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,ID,action,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),id,f"新增 {field} 配对",f"{smallid} <-> {bigid}")
mycursor.execute(sql,val)
else:
print(f"{smallid}{bigid} 中有错误, 请检查")
def objappendinDB(mydb,id,content_string,objlist): #新增obj对应
mycursor = mydb.cursor()
content_list = [item.strip() for item in content_string.split("\n") if not item.strip() == ""]
for objid in content_list:
objid = objid.upper()
if not objid in objlist:
print(f"{objid} 有误, 请检查!!!")
else:
sql = f"SELECT ID, obj_ID FROM objcorresp WHERE ID = %s AND obj_ID = %s;"
val = (id,objid)
mycursor.execute(sql,val)
ret_list = mycursor.fetchall()
if len(ret_list) == 0:
sql = f"INSERT INTO objcorresp SET ID = %s, obj_ID = %s;"
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,ID,action,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),id,f"新增目标对应",f"{id}: {objid}")
mycursor.execute(sql,val)
def tagappendinDB(mydb,id,content_string): #新增tag对应
mycursor = mydb.cursor()
content_list = [item.strip() for item in content_string.split("\n") if not item.strip() == ""]
for tag in content_list:
sql = f"SELECT ID, tagname FROM tagcorresp WHERE ID = %s AND tagname = %s;"
val = (id,tag)
mycursor.execute(sql,val)
ret_list = mycursor.fetchall()
if len(ret_list) == 0:
sql = f"INSERT INTO tagcorresp SET ID = %s, tagname = %s;"
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,ID,action,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),id,f"新增标签",f"{id}: {tag}")
mycursor.execute(sql,val)
def remarkappendinDB(mydb,id,content_string): #新增备注
mycursor = mydb.cursor()
content_list = [item.strip() for item in content_string.split("\n") if not item.strip() == ""]
for line in content_list:
date,remark = parseRemark(line)
sql = f"SELECT ID,date,remark_content FROM remarks WHERE ID = %s AND date = %s AND remark_content LIKE %s;"
val = (id,date,"%"+remark+"%")
mycursor.execute(sql,val)
ret_list = mycursor.fetchall()
if len(ret_list) == 0 or len(remark) == 0: #如果原来的记录里remark为空也需要更新
sql = f"INSERT INTO remarks SET ID = %s, date = %s, remark_content = %s;"
val = (id,date,remark)
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,ID,action,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),id,f"新增备注",f"{id}: {remark}")
mycursor.execute(sql,val)
def detectsimUsage(tuple1,tuple2): #对比两个使用记录的tuple是否非常相近
id1,date1,classname1,diff1 = tuple1
id2,date2,classname2,diff2 = tuple2
if id1 == id2 and classname1 == classname2 and abs(datestrtotimestamp(date1)-datestrtotimestamp(date2)) < 7*86400 and usagelistdifference(diff1,diff2)<0.05:
return True
else:
return False
def tostrlist(difflist):
return "[" + ", ".join(['"'+f"{float(data):.3f}"+'"' for data in difflist]) + "]"
def forceusageappendinDB(mydb,rec): #强制新增使用记录
mycursor = mydb.cursor()
id = rec[0]
date = rec[1]
classname = rec[2]
diff = rec[3]
print(id,date,classname,diff)
sql = f"INSERT INTO usages (ID,date,classname,diff) VALUE (%s,%s,%s,%s);"
val = (id,date,classname,tostrlist(diff))
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,ID,action,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),id,f"强制新增使用记录",f"{id}\t{date}\t{classname}\t{tostrlist(diff)}")
mycursor.execute(sql,val)
def usageappendinDB(mydb,id,content_string): #新增使用记录
pending_list = []
mycursor = mydb.cursor()
content_list = [parseUsage(item) for item in content_string.split("\n") if not item.strip() == ""]
for rec in content_list:
date = rec["date"]
classname = rec["classid"]
diff = rec["difficulties"]
sql = f"SELECT ID,date,classname,diff FROM usages WHERE ID = %s and classname = %s;"
val = (id,classname)
mycursor.execute(sql,val)
oldusage_list = mycursor.fetchall()
if len(oldusage_list) > 0:
alike = False
for usage_raw in oldusage_list:
usage_raw = list(usage_raw)
usage = usage_raw[:3]+[json.loads(usage_raw[3])]
if detectsimUsage((id,date,classname,diff),usage):
alike = True
break
if alike == False:
pending_list.append(((id,date,classname,diff),oldusage_list.copy()))
else:
sql = f"INSERT INTO usages (ID,date,classname,diff) VALUE (%s,%s,%s,%s);"
val = (id,date,classname,tostrlist(diff))
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,ID,action,db_content) VALUE (%s,%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),id,f"新增使用记录",f"{id}\t{date}\t{classname}\t{tostrlist(diff)}")
mycursor.execute(sql,val)
return pending_list
def parseUsage(usagestring): #对单行usage信息进行分词
usagedict = {}
datalist = re.sub(r"\s+",r"\t",usagestring.strip()).split("\t")
if re.findall(r"20[\d]{2,6}",datalist[0]) != [] and re.findall(r"[\u4e00-\u9fa5]",datalist[0]) == []:
date = datalist.pop(0)
else:
date = ""
usagedict["date"] = date
if re.findall(r"[高一二三班]",datalist[0]) != []:
classid = datalist.pop(0)
else:
classid = ""
usagedict["classid"] = classid
usagedict["subproblems"] = len(datalist)
usagedict["difficulties"] = [float(u) for u in datalist]
usagedict["glossdiff"] = round(sum(usagedict["difficulties"])/usagedict["subproblems"],3)
return usagedict #返回词典, 含date日期, classid班级标识, subproblems小题数目, difficulties得分率, glossdiff小题平均得分率
def StringSubstitute(regex,template,stringtuple): # 用stringtuple里的字符串逐个替换template中的符合某种regex规则的字符串
toSub = re.findall(regex,template)
if not len(toSub) == len(stringtuple):
print("长度不符.")
return 1 #若长度不符则显示“长度不符”并返回1
else:
output = template
for i in range(len(toSub)):
output = output.replace(toSub[i],stringtuple[i])
return output #若长度符合则返回替换后的字符串
def GenerateTexDataforEditing(id_string,prodict,templatefilepath,editor): # 根据id_string的题号列表在prodict中生成一个可修改的tex文件, 包含题目内容答案和解答editor表示编辑者
id_list = generate_number_set(id_string,prodict)
output = "编辑者: " + editor + "\n\n\\begin{enumerate}\n\n"
for id in id_list:
content = prodict[id]["content"]
answer = prodict[id]["ans"]
solution = prodict[id]["solution"]
remark = prodict[id]["remark"]
output += "\\item (%s) "%str(id).zfill(6) + content + "\n\n" + "答案: " + answer + "\n\n" + "解答与提示: " + solution + "\n\n" + "备注: " + remark + "\n\n"
output += "\\end{enumerate}"
template = ReadTextFile(templatefilepath)
texdata = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",template,[output])
return texdata # 返回Tex文件的字符串, 待保存至tex文件
def GetEditedProblems(string): #从.tex文件所读取的字符串中取出正文内容, 并切割成以题号为key的字典, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组
bodydata = re.findall(r"\\begin\{enumerate\}([\s\S]*)\\end\{enumerate\}",string)[0]+r"\item"
problems = re.findall(r"\((\d{6})\)([\s\S]*?)\\item",bodydata)
edited_dict = {}
for problem in problems:
id, pro_string = problem
edited_dict[id] = parseProblem(pro_string)
return edited_dict # 返回以题号为key, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组的字典
def parseProblem(string): # 对以不小于两个回车切分的四段式字符串进行分词(通常第二段有"答案:", 第三段有"解答与提示:", 第四段有"备注:")
data = string.strip()
data = re.sub(r"\n{2,}","\n\n",data)
content,ans,solution,remark = data.split("\n\n")
content = content.strip()
ans = re.sub("答案:","",ans).strip()
solution = re.sub("解答与提示:","",solution).strip()
remark = re.sub("备注:","",remark).strip()
return (content,ans,solution,remark) # 返回四元组(题目内容, 答案, 解答与提示, 备注)
def ModifyProblembyTeX(id_string,prodict,toeditfilepath,editor): # vscode打开.tex文件修改题目的内容, 答案与解答
# 读取题号列表并生成待编辑的.tex文件
texdata = GenerateTexDataforEditing(id_string,prodict,"模板文件/题目编辑.txt",editor)
SaveTextFile(texdata,toeditfilepath)
# 打开待编辑的.tex文件
print("编辑完成后保存文件, 关闭文件继续...")
os.system("code -w -g "+toeditfilepath)
# 生成修改后的题号与内容, 答案, 解答与提示的对应
editor = GetDate() + "\t" + editor
editedtexdata = ReadTextFile(toeditfilepath)
edited_dict = GetEditedProblems(editedtexdata)
editedIDList = []
# 将.tex文件中的修改反映到原题库字典, 并保存
for id in edited_dict:
content, ans, solution, remark = edited_dict[id]
if not (content == prodict[id]["content"] and ans == prodict[id]["ans"] and solution == prodict[id]["solution"] and remark == prodict[id]["remark"]):
prodict[id]["content"] = content
prodict[id]["ans"] = ans
prodict[id]["solution"] = solution
prodict[id]["remark"] = remark
prodict[id]["edit"] = prodict[id]["edit"] + [editor]
editedIDList.append(id)
save_dict(prodict,"../题库0.3/Problems.json")
return generate_exp(editedIDList) # 返回编辑过的字典
def RemoveMutualLink(metadata,prodict): # 删除双向关联id(same,related,unrelated), 输入为一个字符串, 每行表示一对题号, 用空格或制表符隔开
lines = [re.sub(r"[\s]+",r"\t",line).strip() for line in metadata.split("\n")]
for line in lines:
if not line == []:
id1,id2 = line.split("\t")
id1 = id1.zfill(6)
id2 = id2.zfill(6)
for field in ["same","related","unrelated"]:
if id1 in prodict[id2][field]:
prodict[id2][field].remove(id1)
if id2 in prodict[id1][field]:
prodict[id1][field].remove(id2)
return 0 # 返回0
def jsonEditProblemMetadata(id_string,prodict,editor): #用vscode在json模式下编辑题目综合信息
jsontoeditpath = "临时文件/problem_edit.json"
idlist = generate_number_set(id_string,prodict)
edit_dict = {}
for id in idlist:
edit_dict[id] = prodict[id].copy()
save_dict(edit_dict,jsontoeditpath)
#打开待编辑的json文件
os.system("code -w -g "+jsontoeditpath)
#编辑后关闭文件窗口自动执行下面步骤
editeddict = load_dict(jsontoeditpath)
editlist = []
for id in editeddict:
if not prodict[id] == editeddict[id]:
prodict[id] = editeddict[id].copy()
prodict[id]["edit"] = prodict[id]["edit"] + [GetDate() + "\t" + editor]
editlist.append(id)
return(generate_exp(editlist)) # 返回编辑过的题号字符串
def GetSamePairs(prodict): #获取已标注的相同题目组
same_groups = []
for id in prodict:
same = prodict[id]["same"]
if len(same) > 0 and not len(prodict[id]["usages"]) == 0:
same_groups.append([id]+same)
return(same_groups) #返回相同题目组, 每组由一些相同的题目构成
def ShareSameUsagesinDB(id1,id2,db): #有问题, 待修改
mycursor = db.cursor()
sql = "SELECT date,classname,diff FROM usages WHERE ID = (%s);"
val = (id1,)
mycursor.execute(sql,val)
id1_usages_list = mycursor.fetchall()
val = (id2,)
mycursor.execute(sql,val)
id2_usages_list = mycursor.fetchall()
for item in set(id1_usages_list)-set(id2_usages_list):
print(f"{id1} -> {id2}: {item}")
sql = "INSERT INTO usages (ID,date,classname,diff) VALUE (%s,%s,%s,%s);"
val = (id2,item[0],item[1],item[2])
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,action,db_content) VALUE (%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),"分享使用数据",f"{id1} -> {id2}: {item}")
mycursor.execute(sql,val)
for item in set(id2_usages_list)-set(id1_usages_list):
print(f"{id2} -> {id1}: {item}")
sql = "INSERT INTO usages (ID,date,classname,diff) VALUE (%s,%s,%s,%s);"
val = (id1,item[0],item[1],item[2])
mycursor.execute(sql,val)
sql = "INSERT INTO logs (DATE,TIME,username,action,db_content) VALUE (%s,%s,%s,%s,%s);"
val = (GetDate(),GetTime(),get_git_username(),"分享使用数据",f"{id2} -> {id1}: {item}")
mycursor.execute(sql,val)
return 0
def ShareSameUsages(prodict,same_group): #对same_group中的所有题号共享使用记录
current_usages = []
for id in same_group:
for usage in prodict[id]["usages"]:
if not usage in current_usages:
current_usages = current_usages + [usage]
for id in same_group:
prodict[id]["usages"] = current_usages.copy()
return((same_group,current_usages)) #返回same_group中的题号列表 及 所有题号记录 组成的二元组
def SortUsages(prodict): #对使用记录按字符串进行排序
for id in prodict:
usages = prodict[id]["usages"].copy()
usages.sort()
prodict[id]["usages"] = usages.copy()
return 0 #返回0
def SortUsagesbyAverage(theusages): #根据使用记录每一条的平均值进行排序, 越高的在越前面
glossdifflist = []
for i in range(len(theusages)):
glossdiff = np.mean([float(d) for d in theusages[i][2]])
if glossdiff <= 0.999: #去除全对的使用记录
glossdifflist = glossdifflist + [(i,glossdiff)]
sortedglossdifflist = sorted(glossdifflist, key = lambda x:x[1], reverse = True)
newusages = [theusages[i[0]] for i in sortedglossdifflist]
return(newusages) # 返回排序后的list
def StripSuffix(string, suf_words): #除去字符串前后的空格及suf_words中每一个符合regex的字符串及其之后的部分
string = string.strip()
for sw in suf_words:
string = re.sub(sw+r"[\S]*$","",string)
return(string) # 返回处理以后的字符串
def generate_origin(origin_dict):
data = origin_dict["来源"]
if "题号" in origin_dict:
data = f"{data}试题{origin_dict['题号']}"
if "前序" in origin_dict:
data = f"{data}-改编自{origin_dict['前序']}"
return data
def MatchConditioninMariaDB(condition_list):
corr_dict = {
"content": ('problems','content'),
"objs": ('objcorresp','obj_ID'),
"tags": ('tagcorresp','tagname'),
"origin": ('problems','origin'),
"genre": ('problems','genre'),
"ans": ('problems','ans'),
"solution": ('problems','solution'),
"remark": ('remarks','remark_content'),
"same": ('same','ID','SAME_ID'),
"related": ('related','ID','RELATED_ID'),
"usages": ('usages','date','classname','diff')
}
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
mycursor.execute("SELECT ID FROM problems WHERE NOT content REGEXP 'OBS';")
match = set([u[0] for u in mycursor.fetchall()])
for field,reverse,regexp in condition_list:
if not field in ["same","related","usages"]:
table,column = corr_dict[field]
sql = f"SELECT ID FROM {table} WHERE {column} REGEXP '{regexp}';"
mycursor.execute(sql)
newmatch = set([u[0] for u in mycursor.fetchall()])
elif field in ["same","related"]:
table,col0,col1 = corr_dict[field]
sql1 = f"SELECT {col1} FROM {table} WHERE {col0} REGEXP '{regexp}';"
mycursor.execute(sql1)
newmatch = set([u[0] for u in mycursor.fetchall()])
sql2 = f"SELECT {col0} FROM {table} WHERE {col1} REGEXP '{regexp}';"
mycursor.execute(sql2)
newmatch = newmatch | set([u[0] for u in mycursor.fetchall()])
elif field == "usages":
table,cols = corr_dict[field][0],corr_dict[field][1:]
newmatch = set([])
for i in range(len(cols)):
sql = f"SELECT ID FROM {table} WHERE {cols[i]} REGEXP '{regexp}';"
mycursor.execute(sql)
newmatch = newmatch | set([u[0] for u in mycursor.fetchall()])
if reverse:
match = match - newmatch
else:
match = match & newmatch
mydb.close()
return match
def MatchCondition2024(problem,condition_list): #判断problem这一字典是否符合condition_list中的所有筛选条件
match = True #初始设定符合条件
for field, flag_not, matchexp in condition_list:
exps = [i.strip() for i in matchexp.split(",")]
if type(problem[field]) == list:
data = "\n".join(problem[field])
elif field == "origin":
data = generate_origin(problem[field])
else:
data = str(problem[field]) #至此将每个字段中的内容都转为string
if flag_not == False: #表示肯定的筛选
if all([re.findall(exp,data) == [] for exp in exps]): #如果有一个条件中的每个关键字都找不到, 就返回"不符合条件"(False)
return False
if flag_not == True:
if all([re.findall(exp,data) != [] for exp in exps]): #如果有一个条件中的每个关键字都能找到, 就返回"不符合条件"(False)
return False
return match #返回是否符合条件
def MatchCondition(problem,condition_dict): #判断problem这一字典是否符合condition_dict中的所有筛选条件
match = True #初始设定符合条件
for fieldraw in [c for c in condition_dict if not "_not" in c and not condition_dict[c] == [""]]: #选出正向的条件([""]表示该条件不起作用)
cond_list = condition_dict[fieldraw]
if type(cond_list) == str:
cond_list = [cond_list]
field = re.sub(r"\d","",fieldraw)
if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string
string = "\n".join((problem[field]))
else:
string = str(problem[field])
current_match = False
for cond in cond_list: #有一个条件被满足, 就认为当前字段满足条件了
if len(re.findall(cond,string)) > 0:
current_match = True
if current_match == False:
match = False # 如果某个正向条件未满足, 那么match就赋值为False; 如果每一个正向条件都被满足, 那么match仍为True
for fieldraw in [c for c in condition_dict if "_not" in c and not condition_dict[c] == [""]]: #选出反向的条件([""]表示该条件不起作用)
cond_list = condition_dict[fieldraw]
fieldraw = fieldraw.replace("_not","")
field = re.sub(r"\d","",fieldraw)
if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string
string = "\n".join((problem[field]))
else:
string = str(problem[field])
current_match = True
for cond in cond_list: #有一个条件被满足, 就认为当前字段不被满足了
if len(re.findall(cond,string)) > 0:
current_match = False
if current_match == False:
match = False
return match #返回是否符合条件
def get_color(value): # 根据得分率获得rgb颜色代码
value = float(value)
if value>=0.5:
(r,g)=(1,2-2*value)
else:
(r,g)=(2*value,1)
return "{%.3f,%.3f,0}"%(r,g) #返回用大括号包围的rgb数值
def GenerateValueColorCode(matchobj): # 生成三位小数代表的颜色方框(黑色边框, 难度对应的底色)的LaTeX代码
value = matchobj.group(1)
return "\t\\fcolorbox[rgb]{0,0,0}%s{%s}"%(get_color(value),value) #返回代码
def StudentsGetAfterContent(id,prodict,answered,spaceflag): #生成学生版讲义后的答案及空格, answered表示有无答案, spaceflag表示有无空格
string = ""
if "ans" in prodict[id]:
if answered:
string += "答案: \\textcolor{%s}{%s}\n\n"%(("red",prodict[id]["ans"]) if prodict[id]["ans"] != "" else ("blue","暂无答案"))
if spaceflag:
if "space" in prodict[id]:
if prodict[id]["space"] != "":
string += "\\vspace*{%s}\n\n"%prodict[id]["space"]
return string #生成学生讲义后的答案及空格
def XeLaTeXCompile(filedir,filename,times = 2): #在filedir目录中用XeLaTeX编译filename文件两次能编译出正确的引用和页码
flagsuc = True
for i in range(times):
if os.system("xelatex -interaction=batchmode -output-directory=%s %s"%(filedir,filename)) == 0:
print(f"{i+1} 次编译 {filename} 成功.")
else:
flagsuc = False
return flagsuc # 若第二次编译成功则返回True, 否则返回False
def XeLaTeXTest(editedid,adict,objdict,misc,templatepath,outdir,outfile): #对adict中的题号字符串editedid进行编译测试, 返回成功True或失败False
latex_raw = ReadTextFile(templatepath)
if sys.platform == "darwin": #非win系统用默认字体
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
if misc["教师版"] == True:
latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学")
bodystring = "\\begin{enumerate}\n\n"
idlist = generate_number_set(editedid)
for id in idlist:
bodystring += generateLaTeXBodyContent(id,adict,objdict,misc)
bodystring += "\n\n\\end{enumerate}\n\n"
latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,("测试",bodystring)) #替换标题和bodystring
SaveTextFile(latex_data,os.path.join(outdir,outfile))
succ = XeLaTeXCompile(outdir,outfile)
return succ
def GenerateSectionBodyStringfromMariaDB(cursor,problems,sectiontitles,misc,consecutivenumbering= True): #生成学生版的.tex文件的主体内容
bodystring = ""
count = 0
for i in range(len(problems)):
if problems[i][0] in "0123456789":
idlist = generate_number_set(problems[i])
sectionstring = f"\\section{{{sectiontitles[i]}}}\n\\begin{{enumerate}}\n\\setcounter{{enumi}}{{{count if consecutivenumbering else 0}}}\n\n"
for id in idlist:
count += 1
sectionstring += generateLaTeXBodyContentfromMariaDB(cursor,id,misc)
sectionstring += "\\end{enumerate}\n\n"
bodystring += sectionstring
if problems[i][0] == "K":
idlist = problems[i].split(",")
sectionstring = f"\\section{{{sectiontitles[i]}}}\n\\begin{{enumerate}}\n\n"
for objid in idlist:
sectionstring += generateLaTeXobjsfromMariaDB(cursor,objid)
sectionstring += "\\end{enumerate}\n\n"
bodystring += sectionstring
if problems[i][0] == "B":
idlist = problems[i].split(",")
sectionstring = f"\\section{{{sectiontitles[i]}}}\n\\begin{{enumerate}}\n\n"
for bnid in idlist:
sectionstring += generateLaTeXbnsfromMariaDB(cursor,bnid)
sectionstring += "\\end{enumerate}\n\n"
bodystring += sectionstring
return bodystring #返回主题内容字符串
# if not objlist == []:
# output += "\\section{课时目标}\n\n"
# output += "\\begin{enumerate}\n\n"
# for objid in objlist:
# output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"])
# output += "\\end{enumerate}\n\n" #生成学习目标
# if not bnlist == []:
# output += "\\section{双基梳理}\n\n"
# basic_body = ""
# for bnid in bnlist:
# basic_body += "\\item %s\n\n"%basicknowledgedict[bnid]["content"]
# output += "\\begin{enumerate}\n\n %s\n\n\\end{enumerate}\n\n"%basic_body #生成基础知识梳理
def GenerateStudentBodyString(problems,sectiontitles,pro_dict,consecutivenumbering= True,answered= True,spaceflag=True): #生成学生版的.tex文件的主体内容
bodystring = ""
if len(problems) == len(sectiontitles):
count = 0
for i in range(len(problems)):
idlist = generate_number_set(problems[i],pro_dict)
sectionstring = "\\section{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectiontitles[i],count if consecutivenumbering else 0)
for id in idlist:
count += 1
aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag)
sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent)
sectionstring += "\\end{enumerate}\n\n"
bodystring += sectionstring
else:
idstring = ",".join(problems)
idlist = generate_number_set(idstring,pro_dict)
sectionstring = "\\begin{enumerate}\n\n"
for id in idlist:
aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag)
sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent)
sectionstring += "\\end{enumerate}\n\n"
bodystring += sectionstring
return bodystring #返回主题内容字符串
def TeachersGetAfterContentColored(id,prodict,objdict,topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #生成教师版讲义后的答案及空格, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 彩色背景
string = ""
objs = GenerateObjTexCode(id,prodict,objdict) if showobjs else ""
tags = ("\\begin{tcolorbox}[colback = orange!10!white, colframe = orange!10!white, breakable]\n标签: %s\n\n\\end{tcolorbox}\n"%("; ".join(prodict[id]["tags"]))) if showtags else ""
ans = ("\\begin{tcolorbox}[colback = red!10!white, colframe = red!10!white, breakable]\n答案: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案")) if showans else ""
solution = ("\\begin{tcolorbox}[colback = green!10!white, colframe = green!10!white, breakable]\n解答或提示: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else ""
# solution = ("解答或提示: \\textcolor{magenta}{%s}\n\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else ""
origin = ("\\begin{tcolorbox}[colback = yellow!30!white, colframe = yellow!30!white, breakable]\n来源: %s\n\n\\end{tcolorbox}\n"%prodict[id]["origin"]) if showorigin else ""
remark = ("\\begin{tcolorbox}[colback = cyan!30!white, colframe = cyan!30!white, breakable]\n备注: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["remark"] if prodict[id]["remark"] != "" else "暂无备注")) if showremark else ""
usages = ("\\begin{tcolorbox}[colback = lime!10!white, colframe = lime!10!white, breakable]\n使用记录(%s):\n\n%s\n\n\\end{tcolorbox}\n"%(str(topandbottomusagestuple),GenerateUsageTexCode(id,prodict,topandbottomusagestuple))) if showusages else ""
string += objs + tags + ans + solution + usages + origin + remark
return string #生成教师版讲义后的答案及空格
def TeachersGetAfterContentPlain(id,prodict,objdict,topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #生成教师版讲义后的答案及空格, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列
string = ""
objs = ("目标:\n\n%s\n\n"%GenerateObjTexCode(id,prodict,objdict)) if showobjs else ""
tags = ("标签: \\textcolor[rgb]{0.5,0.6,0.8}{%s}\n\n"%("; ".join(prodict[id]["tags"])) if not prodict[id]["tags"] == [] else "标签: \n\n") if showtags else ""
ans = ("答案: \\textcolor{%s}{%s}\n\n"%(("red",prodict[id]["ans"]) if prodict[id]["ans"] != "" else ("blue","暂无答案"))) if showans else ""
solution = ("解答或提示: \\textcolor{magenta}{%s}\n\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else ""
origin = ("来源: %s\n\n"%prodict[id]["origin"]) if showorigin else ""
remark = ("备注: \\textcolor[rgb]{0,0.5,0.2}{%s}\n\n"%(prodict[id]["remark"] if prodict[id]["remark"] != "" else "暂无备注")) if showremark else ""
usages = ("使用记录:\n\n%s\n\n"%GenerateUsageTexCode(id,prodict,topandbottomusagestuple)) if showusages else ""
string += objs + tags + ans + solution + usages + origin + remark
return string #生成教师版讲义后的答案及空格
def GenerateObjTexCode(id,prodict,objdict): #生成目标代号对应的学习目标字符串(含蓝色编码)
# string = ""
# if prodict[id]["objs"] != []:
string = "\n\\begin{tcolorbox}[colback = blue!10!white, colframe = blue!10!white, breakable]"
for objid in prodict[id]["objs"]:
if objid.upper() == "KNONE":
string += "\n\n%s\t%s"%(objid,"无当前有效关联目标")
else:
string += "\n\n%s\t%s"%(objid,objdict[objid]["content"])
if prodict[id]["objs"] == []:
string += "暂未关联\n"
string += "\n\n\\end{tcolorbox}\n"
return string #返回目标代码字符串
def ChooseUsage(usages,topandbottomusagestuple): #生成题号对应的题目的使用记录, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 两数之和大于记录数则从高到低排列后全部展示
top,bottom = topandbottomusagestuple
if top < 0 or bottom < 0:
return usages # 返回原本的全部使用记录
elif top + bottom > len(usages):
return SortUsagesbyAverage(usages) # 返回排列后的使用记录
else:
return (SortUsagesbyAverage(usages)[:top] + SortUsagesbyAverage(usages)[(len(usages)-bottom):]) # 返回排序后的最前面top个和最后面bottom个
def GenerateUsageTexCode(usage_list_raw,misc): #根据topandbottomusagestuple的要求生成题号为id的题目的缩减版的使用记录列表 topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 两数之和大于记录数则从高到低排列后全部展示
grade = misc["字段显示设置"]["届别"]
topandbottomusagestuple = misc["字段显示设置"]["使用记录"]
usage_list = []
if len(grade) == 0:
usage_list = usage_list_raw.copy()
else:
for u in usage_list_raw:
for g in grade:
if re.findall(g,u[1]) != []:
usage_list.append(u)
break
usages = ChooseUsage(usage_list,topandbottomusagestuple)
usages_str = ["\t".join((u[0],u[1],("\t".join(u[2])))) for u in usages]
usagecode = re.sub("\\t([\d]\.[\d]{0,10})",GenerateValueColorCode,"\n\n".join(usages_str))
return usagecode #返回缩减后的使用记录列表
def generateLaTeXBodyContentfromMariaDB(cursor,id,misc): #根据id,读取的json内容adict,和字典misc来生成讲义
#misc 样例
#{
# "教师版": True, #如果设置为True则除了 题后空间 之外都进行判断并处理, 否则只处理 题后空间 和 答案
# "字段显示设置": {
# "题后空间": True,
# "课时目标": True,
# "题目标签": True,
# "答案": True,
# "解答与提示": True,
# "使用记录": (3,-1),
# "使用记录说明": "(a,b)表示显示最好的a个和最差b个, 有-2表示不显示, 无-2但有-1表示全部显示",
# "来源": True,
# "备注": True,
# "届别": []
# }
# }
id = str(id).zfill(6)
sql = "SELECT content,ans,solution,origin,space FROM problems WHERE ID = %s;"
val = (id,)
cursor.execute(sql,val)
ret_list = cursor.fetchall()
if len(ret_list) != 0:
content,ans,solution,raw_origin,space = ret_list[0]
if ans is None:
ans = ""
if solution is None:
solution = ""
origin = json.loads(raw_origin)
#以下生成obj_list
sql = "SELECT obj_ID FROM objcorresp WHERE ID = %s;"
cursor.execute(sql,val)
ret_list = cursor.fetchall()
obj_list = []
for id_raw in ret_list:
obj_id = id_raw[0]
sql = "SELECT obj_content FROM lessonobj WHERE objid = %s;"
if not obj_id.upper() == "KNONE":
objval = (obj_id,)
cursor.execute(sql,objval)
obj_list.append(f"{obj_id}\t{cursor.fetchall()[0][0]}")
else:
cursor.fetchall()
obj_list.append(f"{obj_id}\t暂无目标对应")
sql = "SELECT tagname FROM tagcorresp WHERE ID = %s;"
cursor.execute(sql,val)
tag_list = [t[0] for t in cursor.fetchall()]
sql = "SELECT date,remark_content FROM remarks WHERE ID = %s;"
cursor.execute(sql,val)
remark_list = sorted([f"{t[0]}\t{t[1]}" for t in cursor.fetchall()])
sql = "SELECT date,classname,diff FROM usages WHERE ID = %s;"
cursor.execute(sql,val)
usages_fetched = [(u[0] if not u[0] is None else "",u[1],json.loads(u[2])) for u in cursor.fetchall()]
usages_raw = sorted(usages_fetched)
if not "教师版" in misc or misc["教师版"] == False:
output = f"\n\\item {{\\tiny ({id})}} {content}"
if "字段显示设置" in misc and "答案" in misc["字段显示设置"] and misc["字段显示设置"]["答案"] == True:
ans = ans if len(ans) > 0 else "暂无答案"
output += f"\n\n答案: \\textcolor{{{('red' if ans != '暂无答案' else 'blue')}}}{{{ans}}}\n\n"
if "字段显示设置" in misc and "题后空间" in misc["字段显示设置"] and misc["字段显示设置"]["题后空间"] == True:
space = f"\n\n\\vspace*{{{space}}}\n\n" if len(space) > 0 else ""
output += space
else:
output = f"\n\\item ({id}) {content}"
if "备注" in misc["字段显示设置"] and misc["字段显示设置"]["备注"] == True:
remark = r"\\".join(remark_list) if len(remark_list) > 0 else "暂无备注"
output += f"\n\n备注: \\textcolor[rgb]{{0,0.5,0.2}}{{{remark}}}\n\n"
if "课时目标" in misc["字段显示设置"] and misc["字段显示设置"]["课时目标"] == True:
if len(obj_list) == 0:
obj_string = "暂无目标"
else:
obj_string = '\n\n'.join(obj_list)
objs = f"\n\n目标:\n\n{obj_string}\n\n"
output += objs
if "题目标签" in misc["字段显示设置"] and misc["字段显示设置"]["题目标签"] == True:
if len(tag_list) == 0:
tags = "暂无标签"
else:
tags = '; '.join(tag_list)
output += f"\n\n标签: \\textcolor[rgb]{{0.5,0.6,0.8}}{{{tags}}}\n\n"
if "答案" in misc["字段显示设置"] and misc["字段显示设置"]["答案"] == True:
ans = ans if len(ans) > 0 else "暂无答案"
output += f"\n\n答案: \\textcolor{{{('red' if ans != '暂无答案' else 'blue')}}}{{{ans}}}\n\n"
if "解答与提示" in misc["字段显示设置"] and misc["字段显示设置"]["解答与提示"] == True:
solution = solution if len(solution) > 0 else "暂无解答或提示"
output += f"\n\n解答或提示: \\textcolor{{magenta}}{{{solution}}}\n\n"
if "使用记录" in misc["字段显示设置"] and type(misc["字段显示设置"]["使用记录"]) in (list,tuple) and not -2 in misc["字段显示设置"]["使用记录"]:
usages = f"\n\n使用记录:\n\n{GenerateUsageTexCode(usages_raw,misc)}\n\n"
output += usages
if "来源" in misc["字段显示设置"] and misc["字段显示设置"]["来源"] == True:
origin = generate_origin(origin) if len(origin) > 0 else "未记录来源"
output += f"\n\n来源: {origin}\n\n"
else:
output = f"\\item ({id}) 题号有误!!!"
return output
def generateLaTeXobjsfromMariaDB(cursor,objid):
objid = str(objid).upper().strip()
if objid == "KNONE":
return ""
else:
sql = "SELECT obj_content FROM lessonobj WHERE objid = %s;"
val = (objid,)
cursor.execute(sql,val)
obj_content = cursor.fetchall()[0][0]
output = f"\\item {objid} {obj_content}\n\n"
return output
def generateLaTeXbnsfromMariaDB(cursor,bnid):
bnid = str(bnid).upper().strip()
sql = "SELECT bn_content FROM basic_knowledges WHERE bn_id = %s;"
val = (bnid,)
cursor.execute(sql,val)
try:
bn_content = cursor.fetchall()[0][0]
except:
bn_content = "基础知识目标编号有误"
output = f"\\item {bn_content}\n\n"
return output
def GenerateTeacherBodyString(problems,sectiontitles,prodict,objdict,consecutivenumbering = True,topandbottomusagestuple = (3,3),sectionname = "section", showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True, colored = False): #生成教师版的.tex文件的主体内容, 各项是否显示为可选
bodystring = ""
GetAfterContent = TeachersGetAfterContentPlain if not colored else TeachersGetAfterContentColored
if len(problems) == len(sectiontitles):
count = 0
for i in range(len(problems)):
idlist = generate_number_set(problems[i],prodict)
sectionstring = "\\%s{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectionname,sectiontitles[i],count if consecutivenumbering else 0)
for id in idlist:
count += 1
aftercontent = GetAfterContent(id,prodict,objdict,topandbottomusagestuple = topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark)
sectionstring += "\\item (%s) %s\n\n%s"%(id,prodict[id]["content"],aftercontent)
sectionstring += "\\end{enumerate}\n\n"
bodystring += sectionstring
else:
idstring = ",".join(problems)
idlist = generate_number_set(idstring,prodict)
sectionstring = "\\begin{enumerate}\n\n"
for id in idlist:
aftercontent = GetAfterContent(id,prodict,objdict,topandbottomusagestuple = topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark)
sectionstring += "\\item (%s) %s\n\n%s"%(id,prodict[id]["content"],aftercontent)
sectionstring += "\\end{enumerate}\n\n"
bodystring += sectionstring
return bodystring #返回主体内容字符串
def GetValidTexFiles(path): #获取 有题号的.tex文件列表 及 有题号的.tex文件所在的路径列表
texlist = []
pathlist = []
for root,folders,files in os.walk(path):
for file in files:
if file[-4:] == ".tex":
texdata = ReadTextFile(os.path.join(root,file))
ids = re.findall(r"\((\d{6})\)",texdata)
if len(ids) > 0:
pathlist = pathlist + [root]
texlist = texlist + [os.path.join(root,file)]
texlist = sorted(list(set(texlist)))
pathlist = sorted(list(set(pathlist)))
return (texlist,pathlist) # 返回 有题号的.tex文件列表 与 所在路径列表 组成的二元组
def generate_lessonid_list(lessonidstr,lessonsdict): #根据lessonidstr的条件(开头字母与数字)返回课时代码列表
raw_criterion_list = lessonidstr.split(",")
criterion_list = []
for cr in raw_criterion_list: #生成满足要求的课时代码的前若干位
if not ":" in cr:
criterion_list.append(cr)
else:
criterion_list = criterion_list + ["K"+c[-4:] for c in generate_number_set(cr.upper().replace("K",""))]
lessons_list = []
for id in lessonsdict: #对每一课时作比对, 生成课时代码列表
for cr in criterion_list:
if id.startswith(cr):
lessons_list.append(id)
break
return lessons_list # 返回课时代码列表
def GenerateLessonPreparationDraft(notetitle, outputdir, adict, prodict, objdict, lessonsdict, topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #根据adict中课时与题号的对应生成每一课时的备课草稿
output = "\\tableofcontents\n\\newpage\n\n"
outputfilepath = os.path.join(outputdir,notetitle+".tex")
for lid in adict:
output += "\\section{%s \ %s}\n\n"%(lid,lessonsdict[lid]["name"])
output += "\\subsection{课时目标}\n\n"
output += "\\begin{enumerate}\n\n"
for objid in objdict:
if objid.startswith(lid):
output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"])
output += "\\end{enumerate}\n\n"
output += GenerateTeacherBodyString([adict[lid]],["参考题目资源"],prodict,objdict,sectionname = "subsection",topandbottomusagestuple= topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark)
output += "\\newpage\n\n"
latex_raw = ReadTextFile("模板文件/讲义模板.txt")
latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号
if sys.platform == "darwin": #非win系统用默认字体
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
SaveTextFile(latex_data,outputfilepath) #保存.tex文件
if XeLaTeXCompile(outputdir,notetitle+".tex"):
print("编译成功")
return 0 # 返回0
else:
print("编译失败")
return latex_data # 返回有错误的latex源代码
def GenerateLessonPreparation(notetitle, outputdir, adict, prodict, objdict, basicknowledgedict, homeworkspaces = False): #根据adict生成每一课时的教案
output = "\\tableofcontents\n\\newpage\n\n"
outputfilepath = os.path.join(outputdir,notetitle+".tex")
for key in adict:
lessonid = key
lessonname = adict[key]["name"]
objlist = adict[key]["objects"]
bnlist = adict[key]["basicknowledges"]
eblist = adict[key]["examples_basic"]
ealist = adict[key]["examples_adv"]
worklist = adict[key]["work_inclass"]
homeworklist = adict[key]["homework"]
remarks = adict[key]["remarks"]
if not "复习" in lessonname:
output += "\\section{%s \ %s}\n\n"%(lessonid,lessonname)
if not objlist == []:
output += "\\subsection{课时目标}\n\n"
output += "\\begin{enumerate}\n\n"
for objid in objlist:
output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"])
output += "\\end{enumerate}\n\n" #生成学习目标
if not bnlist == []:
output += "\\subsection{双基梳理}\n\n"
basic_body = ""
for bnid in bnlist:
basic_body += "\\item %s\n\n"%basicknowledgedict[bnid]["content"]
output += "\\begin{enumerate}\n\n %s\n\n\\end{enumerate}\n\n"%basic_body #生成基础知识梳理
output += "\\subsection{知识体验}\n\n"
output += "\\subsubsection{必讲例题}\n\n"
output += GenerateStudentBodyString(eblist,[],prodict,consecutivenumbering=False,answered=False)
output += "\\subsubsection{选讲例题}\n\n"
output += GenerateStudentBodyString(ealist,[],prodict,consecutivenumbering=False,answered=False)
output += "\\subsection{巩固新知}\n\n"
output += GenerateStudentBodyString(worklist,[],prodict,consecutivenumbering=False,answered=False)
if not remarks == "":
output += "\\subsection{备注}\n\n"
output += remarks + "\n\n"
if homeworkspaces:
output += "\\newpage\n\n"
output += "\\subsection{课后作业}\n\n"
output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered=False,spaceflag=homeworkspaces)
output += "\\newpage\n\n"
else:
output += "\\section{%s \ %s}\n\n"%(lessonid,lessonname)
output += "\\subsection{复习题}\n\n"
output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered = False,spaceflag=homeworkspaces)
output += "\\newpage\n\n"
latex_raw = ReadTextFile("模板文件/讲义模板.txt")
latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号
if sys.platform == "darwin": #非win系统用默认字体
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
SaveTextFile(latex_data,outputfilepath) #保存.tex文件
if XeLaTeXCompile(outputdir,notetitle+".tex"):
print("编译成功")
return 0 # 返回0
else:
print("编译失败")
return latex_data # 返回有错误的latex源代码
def GenerateSingleLessonPreparation(lessonid, outputdir, adict, prodict, objdict, basicknowledgedict, homeworkspaces = False, filename = "default"): #根据adict生成每一课时的教案
output = ""
notetitle = lessonid + r" \ " + adict[lessonid]["name"]
if filename == "default":
filename = lessonid + adict[lessonid]["name"]+".tex"
else:
filename = filename+".tex"
outputfilepath = os.path.join(outputdir,filename)
lessonname = adict[lessonid]["name"]
objlist = adict[lessonid]["objects"]
bnlist = adict[lessonid]["basicknowledges"]
eblist = adict[lessonid]["examples_basic"]
ealist = adict[lessonid]["examples_adv"]
worklist = adict[lessonid]["work_inclass"]
homeworklist = adict[lessonid]["homework"]
remarks = adict[lessonid]["remarks"]
if not "复习" in lessonname:
if not objlist == []:
output += "\\section{课时目标}\n\n"
output += "\\begin{enumerate}\n\n"
for objid in objlist:
output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"])
output += "\\end{enumerate}\n\n" #生成学习目标
if not bnlist == []:
output += "\\section{双基梳理}\n\n"
basic_body = ""
for bnid in bnlist:
basic_body += "\\item %s\n\n"%basicknowledgedict[bnid]["content"]
output += "\\begin{enumerate}\n\n %s\n\n\\end{enumerate}\n\n"%basic_body #生成基础知识梳理
output += "\\section{知识体验}\n\n"
# output += "\\subsection{必讲例题}\n\n"
output += GenerateStudentBodyString(eblist,[],prodict,consecutivenumbering=False,answered=False)
# output += "\\subsection{选讲例题}\n\n"
# output += GenerateStudentBodyString(ealist,[],prodict,consecutivenumbering=False,answered=False)
output += "\\section{巩固新知}\n\n"
output += GenerateStudentBodyString(worklist,[],prodict,consecutivenumbering=False,answered=False)
if not remarks == "":
output += "\\section{备注}\n\n"
output += remarks + "\n\n"
if homeworkspaces:
output += "\\newpage\n\n"
output += "\\section{课后作业}\n\n"
output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered=False,spaceflag=homeworkspaces)
else:
output += "\\section{复习题}\n\n"
output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered = False,spaceflag=homeworkspaces)
latex_raw = ReadTextFile("模板文件/讲义模板.txt")
latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号
if sys.platform == "darwin": #非win系统用默认字体
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
SaveTextFile(latex_data,outputfilepath) #保存.tex文件
if XeLaTeXCompile(outputdir,filename):
print("编译成功")
return 0 # 返回0
else:
print("编译失败")
return latex_data # 返回有错误的latex源代码
def getlineindex(string,filepath): #返回字符串在文本文件中第一次出现的行数, 若未出现则返回1
with open(filepath,"r",encoding = "u8") as f:
lines = f.readlines()
for l in lines:
if string in l:
return lines.index(l)+1
return 1
def getCopy(): # 获取剪切板内容
t = pyperclip.paste().replace("\r","")
return t
def setCopy(string): # 写入剪切板内容
pyperclip.copy(string)
# wc.OpenClipboard()
# wc.EmptyClipboard()
# wc.SetClipboardData(win32con.CF_UNICODETEXT, string)
# wc.CloseClipboard()
def itemizeProblems(string): #将题号替换为\item
string_list = string.split("\n")
itemed_list = []
for line in string_list:
if not "&" in line or "tabular" in line or "array" in line or "cases" in line:
itemed_list.append(re.sub(r"(?:(?:^|\n)+[例]*[\s]*[0-9]+[\s]*[\.、\s]+|\[[\s]*例[\s]*[0-9]*[\s]*\][\s]*)","\\n\\\\item ",line))
else:
itemed_list.append(line)
string = "\n".join(itemed_list)
return string
def RefinePunctuations(raw_string):
puctuationsfulltosemi = {" ": " ","": ". ","": ". ","": ", ","": ": ","": "; ","": "(","": ")","": "? ","": "``","": "''", "": "[", "": "]", "": "!"}
string = raw_string.strip()
for s in puctuationsfulltosemi:
string = re.sub(s,puctuationsfulltosemi[s],string) #将部分全角标记替换为半角
return string
def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串
puctuationsfulltosemi = {" ": " ","": ". ","": ". ","": ", ","": ": ","": "; ","": "(","": ")","": "? ","": "``","": "''", "": "[", "": "]"}
replacestrings = {r"\\overparen": r"\\overset\\frown", "eqslant": "eq", r"\\vec": r"\\overrightarrow ", r"\\bar": r"\\overline", r"\\lim": r"\\displaystyle\\lim", r"\\sum":r"\\displaystyle\\sum", r"\\prod":r"\\displaystyle\\prod", r"\\mid":"|", r"\^\{\\prime\}":"'",r"e\^":r"\\mathrm{e}^",r"/\s*/":r"\\parallel "}
wrongrecog = {"":"","[粗秿]圆":"椭圆","投郑":"投掷","抛郑":"抛掷","范目":"范围","":"","末见":"未见","末成":"未成","针角":"钝角","幕函数":"幂函数","末知":"未知","阀值":"阈值","祖[桓晅]":"祖暅","图象":"图像","末使用":"未使用","末来":"未来","竟赛":"竞赛"}
string = raw_string
string = re.sub(r"\\left([\.\(\[|])",lambda matchobj: "" if matchobj.group(1) == "." else matchobj.group(1),string) #删去括号前的\left标记
string = re.sub(r"\\right([\.\)\]|])",lambda matchobj: "" if matchobj.group(1) == "." else matchobj.group(1),string) #删去括号前的\right标记\
string = re.sub(r"\\left\\\{","\\{",string) #删去大括号前的\left标记
string = re.sub(r"\\right\\\}","\\}",string) #删去大括号前的\right标记
string = string.replace("\\left\\langle","\\langle").replace("\\right\\rangle","\\rangle")
for s in puctuationsfulltosemi:
string = re.sub(s,puctuationsfulltosemi[s],string) #将部分全角标记替换为半角
for s in replacestrings:
string = re.sub(s,replacestrings[s],string) #修改部分LaTeX命令成为惯用的
for s in wrongrecog:
string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字
string = RipTestPaperDesc(string)
string = re.sub(r"[\s]*(``|''|\}|\{) *",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格
string = itemizeProblems(string) #将题号替换为\item
string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式
string = re.sub(r"\$\s+\$"," ",string) #删除多余的$符号
string = re.sub(r"([,.:;?!])\$",lambda x:x.group(1)+" $",string) #标点和$符号分开
string = re.sub(r"\\left\\lvert\\,","|",string) #替换|的错误代码
string = re.sub(r"\\frac",r"\\dfrac",string) #替换frac为dfrac
string = re.sub(r"\n(?:A\.|\(A\))([\s\S]*?)(?:B\.|\(B\))([\s\S]*?)(?:C\.|\(C\))([\s\S]*?)(?:D\.|\(D\))([\s\S]*?)\n",lambda matchobj: "\n\\fourch{%s}{%s}{%s}{%s}\n"%(matchobj.group(1).strip(),matchobj.group(2).strip(),matchobj.group(3).strip(),matchobj.group(4).strip()),string+"\n\n") # 选择题的选择支处理
string = re.sub(r"[\.;](\}\{|\}\n)",lambda matchobj: matchobj.group(1),string) #去除选择题选项最末尾的句号或分号
string = re.sub(r"\n\s+","\n",string) #删除多余的回车
string = re.sub(r"\\q+uad","",string) #删除\quad,\qquad等
string = re.sub(r"\$\\q+uad\$","",string)#删除两边有$的\quad,\qquad等
string = re.sub(r"~","",string) #删除~
string = re.sub(r"\s*\([\s]{,10}\)",r"\\bracket{20}",string)
string = re.sub(r"\s*\\bracket\{20\}\s*\n",r"\\bracket{20}.\n",string)#行末无内容括号的处理
for i in range(2):
string = re.sub(r"(\^|_)\{([0-9a-zA-Z])\}",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #删除一些无用的大括号
for i in range(2):
string = re.sub(r"([0-9A-Z])\s+([0-9A-Z])",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #合并一些公式中的无效空格
for i in range(2):
string = re.sub(r"([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #合并一些文本中的无效空格
string = re.sub(r"([CP])(_[^_\^]{,5}\^)",lambda x:r"\mathrm{"+x.group(1)+"}"+x.group(2),string) #处理排列数和组合数字母的正体
string = re.sub(r"([\^_])\{([-]{0,1})\\dfrac",lambda matchobj: matchobj.group(1) + "{" + matchobj.group(2) + "\\frac",string)
string = re.sub(r"ldots",r"cdots",string) #将ldots替换为cdots
string = re.sub(r"\\text\s*\{,\s*当\s*\}",", ",string)
string = re.sub(r"[\\{]*\\(begin|end)\{array\}(?:\{[rcl]*\}){0,1}",lambda matchobj: "\\" + matchobj.group(1) + "{cases}",string) #将分段函数的array环境改为cases环境
string = re.sub(r"([\u4e00-\u9fa5\$])[\s]*\n\\item",lambda matchobj: matchobj.group(1)+"\\blank{50}.\n\\item",string) #给中文或公式结尾的题目最后一行加上填空的空格.
string = re.sub(r"(是|为|(?:=\$))\s*([,.;\n])",lambda matchobj: matchobj.group(1) + "\\blank{50}" + ("." if matchobj.group(2) == "\n" else "") + matchobj.group(2),string) #给行中的题目需要的地方加上空格
string = re.sub(r"(有|为|(?:确定))([种条个])",lambda matchobj: matchobj.group(1) + "\\blank{50}" + matchobj.group(2),string) #给行中的题目需要的地方加上空格
if not "\\bracket" in string:
string = re.sub(r"([\u4e00-\u9fa5\$])(?:\\bracket\{20\})*[\.]*[\s]*\n\\fourch",lambda matchobj: matchobj.group(1)+"\\bracket{20}.\n\\fourch",string) #给中文或公式结尾的题目最后一行加上选择题的括号.
string = re.sub(r"\\bracket\{(\d*)\}\$",lambda matchobj: "$"+"\\bracket{"+matchobj.group(1)+"}",string)#交换出现在$之前的括号与$的位置
string = re.sub(r"(%[^\n]*)\\blank\{50\}\.",lambda matchobj:matchobj.group(1),string) #注释行不加\blank{50}
string = re.sub(r"\\blank\{50\}\.\s*\n\\fourch",r"\\bracket{20}."+"\n"+r"\\fourch",string) #选择题前的空格改为括号
string = re.sub(r"[\\\\]*\n(\(\d{1,2}\))(?:(?!\n)\s)*",lambda matchobj: "\\\\\n"+matchobj.group(1)+" ",string) #新一行的小题号回车前加上换行符
string = re.sub(r"\(([^\(\)]*(?:\\in|=|\\ge|\\le|\\ne|>|<|\\parallel|\\perp)[^\(\)]*)\)\$",lambda matchobj: "$($" + matchobj.group(1) + "$)",string) #公式最后的范围陈述的括号放到公式环境外
string = re.sub(r"\$\$\(","(",string) #删去上一步造成的多余双$
string = re.sub(r"\(\s\$","($",string) #删去首个括号内公式前的空格
string = re.sub(r"\(\\begin\{cases\}",r"\\begin{pmatrix}",string) #修改错误的\begin{cases})
string = re.sub(r"\\end\{cases\}\)",r"\\end{pmatrix}",string) #修改错误的(\end{cases}
string = SplitMathComma(string) #判断数学环境中的","是否需要用$ $分离, 如果需要则执行分离
string = MergeMathComma(string) #判断非数学环境中的","是否需要合并在一个数学环境中, 如果需要则执行合并
string = RefineCasesEnv(string) #美化cases环境
string = RefineChineseComma(string) #改顿号
string = RefineInterval(string) #改错误的dollars符号和括号的顺序
string = string.replace("$$"," ")
string = re.sub(r"\s+\\blank\{50\}",r"\\blank{50}",string)
string = re.sub(r"\s+\\bracket\{20\}",r"\\bracket{20}",string)
return string
def RefineInterval(string):
newstring = string
matches = re.finditer(r"\+\\infty\$\)",newstring) #先处理右括号
poslist = [(match.start(),match.end()) for match in matches]
poslist.reverse() #从后往前逐一更改
for s,e in poslist:
leftdollar = newstring[:s].rfind("$")
leftbracket = leftdollar-1
lb = newstring[leftbracket]
if lb in ["[","("]: #如果上一个$前面是括号, 就交换这一对相应的$符号和括号
print("$"+lb+newstring[leftbracket+2:e-2] + ")$")
newstring = newstring[:leftbracket] + "$"+lb+newstring[leftbracket+2:e-2] + ")$" + newstring[e:]
matches = re.finditer(r"\(\$-\\infty",newstring)
poslist = [(match.start(),match.end()) for match in matches]
poslist.reverse() #从后往前逐一更改
for s,e in poslist:
# print(s,e)
rightdollar = newstring[e:].find("$")+e
rightbracket = rightdollar+1
rb = newstring[rightbracket]
if rb in ["]",")"]:
print("$("+newstring[s+2:rightbracket-1]+rb+"$")
newstring = newstring[:s]+"$("+newstring[s+2:rightbracket-1]+rb+"$"+newstring[rightbracket+1:]
newstring = newstring.replace("($-\\infty$, $+\\infty$)","$(-\\infty, +\\infty)$")
return newstring
def RefineChineseComma(string): #顿号如果在数学环境中, 则在两侧加上$符号
CommaPositions = [match.start() for match in re.finditer("",string)]
CommaPositions.reverse()
for pos in CommaPositions:
if string[:pos].count("$") % 2 == 1:
string = string[:pos].rstrip() + "$、$" + string[(pos+1):].lstrip()
return string
def SplitMathComma(string): #判断数学环境中的","是否需要用$ $分离, 如果需要则执行分离
UnsplittedPairs = [(r"[\(\[]",r"[\)\]]"),(r"\{",r"\}"),(r"\\begin\{cases\}",r"\\end\{cases\}")]
lmatter,rmatter = (string,"")
while "," in lmatter:
pos = lmatter.rfind(",")
if lmatter[:pos].count("$") % 2 == 1:
stringtemp = lmatter+rmatter
start = stringtemp[:pos].rfind("$")
end = stringtemp[pos:].find("$")+pos #寻找,两侧的数学环境 stringtemp[start:end+1]
locallmatter = stringtemp[start+1:pos]
localrmatter = stringtemp[pos+1:end]
tosplit = True
for p1,p2 in UnsplittedPairs:
p1l = SubstringOccurence(p1,locallmatter)
p1r = SubstringOccurence(p1,localrmatter)
p2l = SubstringOccurence(p2,locallmatter)
p2r = SubstringOccurence(p2,localrmatter)
if len(p1l)>0 and len(p2l) == 0:
tosplit = False
if len(p2r)>0 and len(p1r) == 0:
tosplit = False
if len(p1l)*len(p2l)>0:
if p1l[-1]>p2l[-1]:
tosplit = False
if len(p1r)*len(p2r)>0:
if p1r[0]>p2r[0]:
tosplit = False
if len(SubstringOccurence(r"(?:=|\\ge|\\le|\\ne|\\in|>|<|\\parallel|\\perp)",locallmatter))*len(SubstringOccurence(r"(?:=|\\ge|\\le|\\ne|\\in|>|<|\\parallel|\\perp)",localrmatter)) == 0:
tosplit = False
if tosplit:
rmatter = ", $"+lmatter[pos+1:].lstrip()+rmatter.lstrip()
lmatter = lmatter[:pos]+"$"
else:
rmatter = lmatter[pos-1:]+rmatter
lmatter = lmatter[:pos-1]
else:
rmatter = lmatter[pos-1:]+rmatter
lmatter = lmatter[:pos-1]
return lmatter+rmatter
def MergeMathComma(string): #判断非数学环境中的","是否需要合并在一个数学环境中, 如果需要则执行合并
UnsplittedPairs = [(r"[\(\[]",r"[\)\]]"),(r"\{",r"\}"),(r"\\begin\{cases\}",r"\\end\{cases\}")]
lmatter,rmatter = (string,"")
while not [item for item in re.finditer(r"\$,\s*\$",lmatter)] == []:
CommaPos = [(item.start(),item.end()) for item in re.finditer(r"\$,\s*\$",lmatter)]
ThePos = CommaPos[-1]
tempstring = lmatter+rmatter
lpos = tempstring[:ThePos[0]].rfind("$")
rpos = tempstring[ThePos[1]+1:].find("$") + ThePos[1] + 1
locallmatter = tempstring[lpos+1:ThePos[0]]
localrmatter = tempstring[ThePos[1]:rpos]
tomerge = False
for p1,p2 in UnsplittedPairs:
if len(SubstringOccurence(p1,locallmatter))-len(SubstringOccurence(p2,locallmatter)) == 1 and len(SubstringOccurence(p2,localrmatter))-len(SubstringOccurence(p1,localrmatter)) == 1:
tomerge = True
if tomerge:
rmatter = ", "+lmatter[ThePos[1]:]+rmatter
lmatter = lmatter[:ThePos[0]]
else:
rmatter = lmatter[ThePos[0]:]+rmatter
lmatter = lmatter[:ThePos[0]]
return lmatter+rmatter
def ReverseMatchBrackets(string):#从最后开始寻找第一组匹配的括号, 返回左括号位置与右括号位置组成的数对
if not ")" in string:
return -1
else:
endpos = string.rfind(")")
lmatter = string[:endpos]
startpos = -1
for i in range(len(lmatter)-1,-1,-1):
templmatter = lmatter[i:]
if templmatter.count("(")-templmatter.count(")") == 1:
startpos = i
break
return (startpos,endpos)
def RefineCasesEnv(string): # 美化cases环境
CasesPos = [(item.start(1),item.end(1)) for item in re.finditer(r"\\begin\{cases\}(.*?)\\end\{cases\}",string, re.DOTALL)] #找到所有的cases环境
CasesPos.reverse()
for start,end in CasesPos:
lmatter = string[:start]
rmatter = string[end:]
content = string[start:end]
lines = [item.strip() for item in content.split(r"\\")]
newlist = []
for line in lines:
newline = line
if line[-1] == ")":
lbracket,rbracket = ReverseMatchBrackets(line)
if re.findall(r"(?:=|<|>|\\ge|\\le|\\in)",line[lbracket:rbracket]) != []:
newline = line[:lbracket] + ", " + line[lbracket+1:rbracket] + "," #用逗号代替表示范围的括号
newline = re.sub(r",",",&",newline)
newlist.append(newline)
newcontent = r"\\".join(newlist)
newcontent = re.sub(r",[\s]*&[\s]*\\",r",\\",newcontent)
newcontent = re.sub(r",[\s]*&[\s]*$",",",newcontent) #给逗号添加&分栏符, 并去除无效的&
string = lmatter + newcontent + rmatter
string = re.sub(r"&[,\s]&","& ",string)
return string #返回处理后的字符串
def RipTestPaperDesc(string):
string = re.sub(r"[一二三四五六][、\.\s]+(?:(?:填空)|(?:选择)|(?:解答))题","",string) #去除没有标分数的填空选择解答题描述
string = re.sub(r"\(本[大]*题满分[^\)]*\d+\s*分\)","",string) #去除带括号的分数描述
string = re.sub(r"本[大]*题[^\n]*?步骤\.{0,1}","",string) #去除解答题需要写出步骤提示
string = re.sub(r"\({0,1}本[大]*题共有[^\n]*\d+\s*分\.{0,1}\){0,1}","",string) #去除解答题单题分数提示
string = re.sub(r"每题有[^\n]*涂黑\.{0,1}","",string) #去除选择题提示
string = re.sub(r"考生[^\n]*结果\.*","",string) #去除作答提示
return string
def SubstringOccurence(regex,string): #生成regex在string中出现的所有位置
poslist = [item.start() for item in re.finditer(regex,string)]
return poslist
def select_grade_from_pro_dict(prodict,grades):
if len(grades) == 0:
return prodict
else:
gradelist = []
for g in grades:
if not "" in g:
gradelist.append(g+"")
else:
gradelist.append(g)
adict = prodict.copy()
for id in prodict:
raw_usages = prodict[id]["usages"].copy()
new_usages = []
for u in raw_usages:
for g in gradelist:
if g in u:
new_usages.append(u)
adict[id]["usages"] = new_usages.copy()
return adict
def GenerateSingleLessonNotefromMariaDB(cursor,id,notesdict,templatepath,outputfilepath,misc,consecutivenumbering = False): #20240415版讲义生成
notetitle = id + r" \ " + notesdict["notes"][id]["name"]
structure = notesdict["structures"][id[0].upper()]["structure"]
note_contents = notesdict["notes"][id]
output = ""
sections_list = []
problems_list = []
for key in structure:
if not len(note_contents[key]) == 0:
sections_list.append(key)
problems_list.append(",".join(note_contents[key]))
rawoutput = GenerateSectionBodyStringfromMariaDB(cursor = cursor, problems=problems_list,sectiontitles=sections_list,misc=misc,consecutivenumbering= consecutivenumbering)
paragraphs = [p for p in rawoutput.split("\\section") if not p.strip() == ""]
for item in paragraphs:
sectionkey, content = re.findall(r"\{([\S]*)\}\n([\S\s]*)$",item)[0]
if not len(paragraphs) == 1:
output += "\\section{" + structure[sectionkey]["name"] + "}\n\n"
# if not structure[sectionkey]["spaceflag"] or answered:
# content = re.sub(r"\\vspace[\*]?\{[\S]*\}","\n",content)
output += content + "\n\n"
latex_raw = ReadTextFile(templatepath)
latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号
if sys.platform == "darwin": #非win系统用默认字体
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
SaveTextFile(latex_data,outputfilepath) #保存.tex文件
if misc["编译单个文件"] == True:
outputdir,filename = os.path.split(outputfilepath)
print(f"{filename}编译中...")
if XeLaTeXCompile(outputdir,filename):
print("编译成功")
else:
print("编译失败")
return latex_data # 返回有错误的latex源代码
def GenerateSingleLessonNote(id,notesdict,metadict,templatepath,outputfilepath,consecutivenumbering = False, answered = False): #20231215版讲义生成
notetitle = id + r" \ " + notesdict["notes"][id]["name"]
structure = notesdict["structures"][id[0].upper()]["structure"]
note_contents = notesdict["notes"][id]
output = ""
sections_list = []
problems_list = []
for key in structure:
if not len(note_contents[key]) == 0:
sections_list.append(key)
problems_list.append(",".join(note_contents[key]))
rawoutput = GenerateStudentBodyString(problems=problems_list,sectiontitles=sections_list,pro_dict=metadict,consecutivenumbering= consecutivenumbering, answered= answered, spaceflag = True)
paragraphs = [p for p in rawoutput.split("\\section") if not p.strip() == ""]
for item in paragraphs:
sectionkey, content = re.findall(r"\{([\S]*)\}\n([\S\s]*)$",item)[0]
if not len(paragraphs) == 1:
output += "\\section{" + structure[sectionkey]["name"] + "}\n\n"
if not structure[sectionkey]["spaceflag"] or answered:
content = re.sub(r"\\vspace[\*]?\{[\S]*\}","\n",content)
output += content + "\n\n"
latex_raw = ReadTextFile(templatepath)
latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号
if sys.platform == "darwin": #非win系统用默认字体
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring
SaveTextFile(latex_data,outputfilepath) #保存.tex文件
outputdir,filename = os.path.split(outputfilepath)
print(f"{filename}编译中...")
if XeLaTeXCompile(outputdir,filename):
print("编译成功")
return latex_data # 返回0
else:
print("编译失败")
return latex_data # 返回有错误的latex源代码
def getUnit(n): # 返回0-9的数字对应的单元名
unitlist = ["暂无对应","第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元"]
if 0<=n<=9:
return unitlist[n]
def getUnitNumber(string):
unitlist = ["暂无对应","第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元"]
if string in unitlist[1:]:
return unitlist.index(string)
def ExtractProblemIDs(paperdict,pro_dict):#从备课组材料的每一张讲义的dict(paperdict)中提取题号
output = []
for key in paperdict.keys():
if type(paperdict[key]) == list:
suslist = paperdict[key]
flag = True
for id in suslist:
if not id in pro_dict:
flag = False
break
if flag:
for id in suslist:
output.append(id)
return(output)
def ParseZipname(zipfilename): #小闲平台的zip文件中获得试卷编号, 返回试卷编号字符串
xiaoxianpid = re.findall(r"^[可选_]*(\d*?)_",os.path.split(zipfilename)[1])
return xiaoxianpid[0]
def FindFile(dir,filename): #在指定目录及子目录下寻找特定文件名的文件, 返回文件所在的路径列表
pathlist = []
for path,m,filenames in os.walk(dir):
if filename in filenames:
pathlist.append(path)
return pathlist
def FindPaper(xiaoxianpid, answersheetpath): #根据小闲的试卷编号和答题纸对应json的根目录寻找题库的试卷编号,届别,题号, 返回(题库试卷编号,届别,题号列表), 如果未找到则返回False
answersheetpathlist = FindFile(answersheetpath,"答题纸对应.json")
foundpid = False
for dir in answersheetpathlist:
filepath = os.path.join(dir,"答题纸对应.json")
anssheetjson = load_dict(filepath)
if xiaoxianpid in anssheetjson:
foundpid = True
grade = "20"+re.findall(r"\d{2}",dir)[0]
pid = anssheetjson[xiaoxianpid]["id"]
notesjson = load_dict(os.path.join(dir,"校本材料.json"))
if not "idlist" in anssheetjson[xiaoxianpid]:
idlist = []
for part in anssheetjson[xiaoxianpid]["parts"]:
idlist += notesjson["notes"][pid][part].copy()
else:
idlist = anssheetjson[xiaoxianpid]["idlist"]
if "marks" in anssheetjson[xiaoxianpid]:
marks = anssheetjson[xiaoxianpid]["marks"]
else:
marks = []
if "exclude" in anssheetjson[xiaoxianpid]:
excludejson = anssheetjson[xiaoxianpid]["exclude"]
else:
excludejson = {}
break
if foundpid:
return(pid,grade,idlist,marks,excludejson)
else:
return False
def CheckPaperType(filepath,filename): #根据filepath(通常是小闲的zip解压出的目录)和filename(通常是"小题分_按学号数学.xlsx")检测试卷类型, 未找到该文件则返回False, 找到文件且是日常试卷返回"日常卷", 找到文件且不是日常试卷返回"考试卷"
statsfilepathlist = FindFile(filepath,filename)
if statsfilepathlist == []:
return False
else:
dir = statsfilepathlist[0]
dfcurrent = pd.read_excel(os.path.join(dir,filename))
if re.findall(r"\d*步",str(dfcurrent.loc[1,:])) == []:
return "日常卷"
else:
return "考试卷"
def generateColIndexandMarks(filepath,statsfilename,paperinfo): #根据filepath(是一个有statsfilename的文件夹列表)中第一个路径中的数据文件, statsfilename数据文件名, 及paperinfo(FindPaper返回的结果)寻找excel文件中有效的列的位置和相应的满分分数
dir = filepath[0]
dfcurrent = pd.read_excel(os.path.join(dir,statsfilename))
validcols = []
for i in range(len(dfcurrent.columns)):
colname = str(dfcurrent.iloc[1,i])
if ("单选" in colname or "填空" in colname or "主观" in colname or "" in colname) and re.findall("[ABCD]",colname) == []:
validcols.append(i)
for col in range(len(validcols)-1,-1,-1):
colname = str(dfcurrent.iloc[1,validcols[col]])
if "主观" in colname:
colname_main = re.findall(r"^([\d\.]*)[\($]",colname[2:])[0]
t = [dfcurrent.iloc[1,c] for c in validcols[col+1:]]
t = str(t)
if colname_main in t:
validcols.pop(col)
if paperinfo[3] == []:
marks = [1] * len(validcols)
else:
marks = paperinfo[3]
if len(marks) == len(validcols):
return (validcols,marks)
else:
return False
def CheckValidity(classpath,gradename,threshold): #根据文件夹classpath, 年级名gradename和提交比例threshold, 检测提交人数是否不小于threshold, 返回(班级名, 提交是否有效)
classname_raw = re.findall(r"(高[一二三])(\d*?)班",classpath)[0]
classname = gradename + classname_raw[0] + classname_raw[1].zfill(2) + ""
df = pd.read_excel(os.path.join(os.path.split(classpath)[0],"学科总体分析.xlsx"))
totalstudents = df.loc[2,df.columns[1]]
validstudents = df.loc[2,df.columns[2]]
classvalidflag = False
if threshold * totalstudents < validstudents:
print(f"{classname} 有效, 共 {totalstudents} 人, 提交 {validstudents}")
classvalidflag = True
else:
print(f"!!! {classname} 无效, 共 {totalstudents} 人, 提交 {validstudents}")
return (classname,classvalidflag)
def generateIDtoUsageCorrespondence(idlist,validcols,names): #根据idlist(题库ID列表), validcols(有效列位置列表), names(题目名称列表)自动生成一个字典, 键值为题库ID, 内容为该题对应的列位置列表
corr_dict = {}
for i in range(len(idlist)):
ind = i+1
collist = []
for j in range(len(validcols)):
n = names[j]
if not "" in n:
name = re.findall(r"^([^\(]*)",n)[0]
else:
name = re.findall(r"^([^第]*)",n)[0]
if ind == getindex(name):
collist.append(j)
corr_dict[idlist[i]] = collist
return corr_dict
def CalculateUsages(statsfilepathlist,statsfilename,gradename,threshold,marks,correspondence_dict,validcols,date,exclude = {}): #根据统计数据所在的路径,文件名,年级,阈值,分数列表和题号列数(0-len(validcols))对应字典,以及原excel文件中的有效列位置validcols, 日期date, 生成usages的metadata.txt文件的内容, 如果有正确率大于1的则返回False
output = "usages\n\n\n"
validflag = True
marks = [int(mark) for mark in marks]
for dir in statsfilepathlist:
classname, valid = CheckValidity(dir,gradename,threshold)
if valid:
dfcurrent = pd.read_excel(os.path.join(dir,statsfilename))
means = dfcurrent.iloc[2:-2,validcols].mean()/marks
if max(means)>1:
print("满分数据有误!!!")
validflag = False
else:
means_out = [f"{t:.3f}" for t in means]
for id in correspondence_dict:
cols = correspondence_dict[id]
if not len(cols) == 0 and not (id in exclude and classname[-3:] in generate_classid(exclude[id])):
diffs = "\t".join([means_out[u] for u in cols])
usages = f"{date}\t{classname}\t{diffs}"
output += f"{id}\n{usages}\n\n\n"
if validflag:
return output
else:
return False
def getindex(string,pos = 2):
para = string.split(".")
return int(para[pos-1])
def generateListOfIDandContent(string): #根据标准的讲义LaTeX源码字符串(可能含答案)生成一个list, 每一项是一个(id,id后内容)的tuple
return re.findall(r"\((\d{6})\)([\s\S]*?)(?:(?:\\item)|(?:\\end\{enumerate\}))",string)
def CountEffectiveBraces(string): #在string中统计有效的(LaTeX的\{和\}不算作有效)的大括号的个数, 返回(左大括号个数, 右大括号个数)
string_ref = re.sub(r"\\[\{\}]"," ",string)
return (string_ref.count("{"),string_ref.count("}"))
def generateAnswerTex(content,anspreamble = "答案: \\textcolor{red}{"): #在从anspreamble开始的字符串后找到答案字符串(anspreamble中的字符不算)
startpos = content.index(anspreamble) + len(anspreamble) - 1
endpos = startpos + 1
l,r = CountEffectiveBraces(content[startpos:endpos])
while not l == r:
endpos += 1
l,r = CountEffectiveBraces(content[startpos:endpos])
return content[startpos+1:endpos-1].strip()
def generateAnswerList(string,anspreamble = "答案: \\textcolor{red}{"): #从LaTeX源代码string中分析题号与对应的答案
alist = generateListOfIDandContent(string)
anslist = []
for a in alist:
id,content = a
if anspreamble in content:
anslist.append((id,generateAnswerTex(content,anspreamble)))
return anslist
def unUnitted(idexp): #返回adict中未赋单元的id列表
idlist = generate_number_set(idexp)
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
mycursor.execute("SELECT ID FROM tagcorresp WHERE tagname REGEXP '\\\\S*单元' or tagname = '暂无对应';")
unittedids = set([ret[0] for ret in mycursor.fetchall()])
ununittedids = sorted(list(set(idlist)-unittedids))
return ununittedids
def AutoAssignTagNotoLaTeX(newlist,allsamelist,allrelatedlist):
output = "\\begin{enumerate}\n\n"
tagrecord = ""
print("正在生成简化题目的字典...")
treateddict = treat_dict()
print("简化题目字典生成完毕.")
for id in tqdm.tqdm(newlist):
samelist = findsru(id,allsamelist)
relatedlist = findsru(id,allrelatedlist)
if not samelist == []:
unittags = get_unit_tags(samelist[0])
elif not relatedlist == []:
unittags = get_unit_tags(relatedlist[0])
else:
simgroup = stringmaxsim(treateddict[id],treateddict,10)
unittags_raw = {}
for g in simgroup:
rid,sim = g
if not rid == id and sim > 0.75:
for t in get_unit_tags(rid):
if t in unittags_raw:
unittags_raw[t] += 1
else:
unittags_raw[t] = 1
if len(unittags_raw) == 0:
unittags = []
else:
unittags = [max(unittags_raw, key = lambda x: unittags_raw[x])]
tagnumbers = ""
for u in unittags:
tagnumbers += str(getUnitNumber(u))
output += f"\\item ({id})[{tagnumbers}] {get_problem_content(id)}\n\n"
tagrecord += f"{id}\t{tagnumbers}\n"
output += "\\end{enumerate}\n\n"
return (output,tagrecord)
def UnitRectoMetadataText(data): #把data中的单元对应变为metadata.txt中的单元对应文本
lines = [l for l in data.split("\n") if l.strip() != ""]
output = "tags\n\n"
for l in lines:
id,unitno = l.split("\t")
units = ""
if not len(unitno.strip()) == 0:
for n in unitno:
units += f"{getUnit(int(n))}\n"
output += f"{id.zfill(6)}\n"
output += f"{units}\n\n"
return output
def gitdiff(commitid1,commitid2,fileinrepo="题库0.3/Problems.json",tempfilepath = "临时文件/temp.json"): # 根据两个git 的commit id 比较某一个json文件的不同, 返回一个tuple, 第一项是新增key的变化, 第二项是同一个key下哪些字段变化了
os.system(f"git show {commitid1}:{fileinrepo} > {tempfilepath}")
json1 = load_dict("临时文件/temp.json")
os.system(f"git show {commitid2}:{fileinrepo} > {tempfilepath}")
json2 = load_dict("临时文件/temp.json")
os.remove("临时文件/temp.json")
diff_list = []
new_or_deleted_list = [id for id in json1.keys() if not id in json2.keys()] + [id for id in json2.keys() if not id in json1.keys()]
for id in [i for i in json1.keys() if i in json2.keys()]:
current_diff = []
for field in json1[id]:
if not json1[id][field] == json2[id][field]:
current_diff.append(field)
if not current_diff == []:
diff_list.append((id,current_diff.copy()))
return (new_or_deleted_list,diff_list)
def CheckUsagesValidity(data): #检查使用记录数据是否合理(没有超过1的), 若有则返回1, 合理则返回0
diffs = re.findall(r"\s(\d+\.\d{3})[\s\n$]",data)
for d in diffs:
if float(d)>1:
print(f"{d}, 数据有问题")
return 1
return 0
def pairingbraces(string_raw,leftbracepos): #根据字符串中的左括号的位置寻找配对的右括号
types = {"(":"()","[":"[]","{":"{}"}
braces = "".join([types[i] for i in types])
string = string_raw
for i in braces:
string = string.replace("\\"+i," ") #去除LaTeX的括号
if not string[leftbracepos] in ("(","[","{"):
print("位置上的字符不是左括号")
return -1 #位置上不是括号, 返回-1
else:
braces = types[string[leftbracepos]]
lbrace = braces[0]
rbrace = braces[1]
count = 1
currentpos = leftbracepos + 1
while not currentpos == len(string):
if string[currentpos] == lbrace:
count += 1
elif string[currentpos] == rbrace:
count -= 1
if count == 0:
return currentpos # 返回配对的右括号位置
currentpos += 1
return -1 #未找到配对括号
def InMathEnv(string,pos): #判断string的pos位置上的字符是否在math环境中
string = string.repalce("\$"," ")
if not string[pos] == "$" and string[:pos].count("$") % 2 == 1:
return True
else:
return False
def MaskingMathEnv(string_raw): #把string_raw中数学环境中的符号都转为$后以新字符串的形式返回
string_raw = string_raw.replace(r"\$"," ")
countdollars = 0
string = ""
for i in range(len(string_raw)):
if string_raw[i] == "$":
countdollars = 1 - countdollars
if countdollars == 0:
string += string_raw[i]
else:
string += "$"
return string
def MultiplechoicetoBlankFilling(string_raw): #把多选题的题干和选项转为填空题
firstchoicepos = re.search(r"(?:(?:four)|(?:two)|(?:one))ch",string_raw).span()[1]
headstring = string_raw[:firstchoicepos]
headstring = re.sub(r"\\bracket\{[\d]*\}",r"\\blank{50}",headstring)
headstring = re.sub(r"\n\\(?:(?:four)|(?:two)|(?:one))ch",r"\\\\\n",headstring)
string = string_raw[firstchoicepos:]
choices = []
while "{" in string:
endpos = pairingbraces(string,0)
choices.append(string[1:endpos])
string = string[endpos+1:]
output = ""
for i in range(len(choices)):
output += f"\\textcircled{{{i+1}}} {choices[i]}; "
output = headstring + output[:-2]+"."
return output
def ExtractIDList(filepath): #从文件获取题目序号和ID的对应, 返回一个列表, 列表中的每个tuple都是(文件中的题目序号(int),六位题号(string))
if filepath[-4:] == ".pdf":
data = parsePDF(filepath)
idlist = re.findall(r"(\d+)\.[\s\n]*\((\d{6})\)",data)
else:
data = ReadTextFile(filepath)
enumerateBlocks = [item.strip() for item in re.findall(r"\\begin\{enumerate\}([\s\S]*?)\\end\{enumerate\}",data)]
for i in range(len(enumerateBlocks)):
if not enumerateBlocks[i].startswith("\\setcounter"):
enumerateBlocks[i] = "\\setcounter{enumi}{0}\n\n" + enumerateBlocks[i]
idlist = []
for item in enumerateBlocks:
enumilist = [i.span() for i in re.finditer(r"\\setcounter\{enumi\}\{(\d+)\}",item)]
enumilist.append((len(item),len(item)))
for i in range(len(enumilist)-1):
indstring = item[enumilist[i][0]:enumilist[i][1]]
ind = int(re.findall(r"\d+",indstring)[0])
bodylist = re.findall(r"\((\d{6})\)",item[enumilist[i][1]:enumilist[i+1][0]])
for id in bodylist:
ind += 1
idlist.append((str(ind),id))
idlist = [(int(id[0]),id[1]) for id in idlist]
return idlist
def ExportIDList(filepath): #从.tex或.pdf文件获取题目序号和ID的对应(每一行每个题号分别对应第1,2,3...题, 空缺的题号的ID用999999表示), 返回题号字符串
idlist = ExtractIDList(filepath)
output = ""
currentoutput = []
for i in range(len(idlist)):
item = idlist[i]
if i == 0:
currentind = item[0]
currentoutput +=["999999"]*(item[0]-1)
currentoutput.append(item[1])
elif item[0] >= currentind + 1:
currentoutput+=(["999999"]*(item[0]-currentind-1))
currentind = item[0]
currentoutput.append(item[1])
else:
output += ",".join(currentoutput)+"\n\n"
currentoutput = []
currentind = item[0]
currentoutput +=["999999"]*(item[0]-1)
currentoutput.append(item[1])
output += ",".join(currentoutput)+"\n\n"
return output
def makedir(dirpath): #递归创建文件夹
dirpath = dirpath.strip().replace("\\","/")
dirlist_raw = dirpath.split("/")
if ":" in dirlist_raw[0]:
dirlist_raw[1] = dirlist_raw[0] + "/" + dirlist_raw[1]
dirlist = dirlist_raw[1:].copy()
else:
dirlist = dirlist_raw.copy()
for i in range(len(dirlist)):
if not i == 0:
dirlist[i] = dirlist[i-1] + "/" + dirlist[i]
for dpath in dirlist:
try:
os.mkdir(dpath)
except:
pass
return dirlist
def TitleIDStringtoTupleList(raw_string): #将类json的标题与题号对应变为(标题,题号字符串)的tuple之后组成list
corresponding_list = []
raw_string = RefinePunctuations(raw_string).replace("{","").replace("}","")
raw_list = [re.sub(r"(?:(?:\s)|(?:,\s*$))","",t) for t in raw_string.split("\n") if not t.strip() == ""]
for raw_item in raw_list:
item = raw_item.replace('"','').replace(";","")
sep = item.find(":")
title = item[:sep]
content = item[sep+1:]
corresponding_list.append((title,content))
return corresponding_list
def TitleIDTupleListtoString(correspoinding_list): #将(标题,题号字符串)的tuple组成的list转化为更方便阅读的字符串, 前面有编号
output_string = ""
count = 0
for corresp in correspoinding_list:
count += 1
output_string += f"{count}. {corresp[0]}: {corresp[1]}\n"
return output_string
def startfile(filepath): #跨平台打开文件或文件夹
isfile = os.path.isfile(filepath)
osname = sys.platform
if osname == "win32":
if not isfile:
ret = subprocess.Popen(["explorer",filepath])
if isfile:
ret = subprocess.Popen(["start",filepath], shell=True)
elif osname == "darwin":
ret = subprocess.Popen(["open",filepath])
elif osname == "linux":
ret = subprocess.Popen(["xdg-open",filepath])
return ret
def PaintRedAnswers(string,prodict): #将prodict中已有的答案标红色
output = string
IDandContentList = generateListOfIDandContent(output)
for id,content in IDandContentList:
raw_string = id+")"+content
# u = output.index(raw_string)
if "textcolor{blue}" in raw_string:
new_ans = generateAnswerTex(content,anspreamble="\\textcolor{blue}{")
raw_ans = prodict[id]["ans"]
if new_ans == raw_ans:
sub_string = raw_string.replace("textcolor{blue}","textcolor{red}")
output = output.replace(raw_string,sub_string)
elif raw_ans == "" and new_ans != "暂无答案":
print(f"{id} 已有答案, 请运行提取答案控件以获取新录入的答案")
elif new_ans != "暂无答案":
print(f"{id} 新答案 {new_ans} 和原答案 {raw_ans} 有所不同, 请仔细检查")
return output
def usagelistdifference(lista,listb): #比较两个usage数组的最大差别(绝对值), 数组长度不一致则返回1
if len(lista) != len(listb):
return 1
else:
maxdiff = 0
for i in range(len(lista)):
maxdiff = max(abs(float(lista[i])-float(listb[i])),maxdiff)
return maxdiff
def RefineExclude(excludejson): #将excludejson的key变为6位题号
newjson = {}
for key in excludejson:
newjson[key.zfill(6)] = excludejson[key]
return newjson
def ChooseIDsByUsageInterval(startdate,enddate,interval,classregex): #返回根据条件选出的题号字典及对应小题, 需要留意的记录长度不一的题号, 以及所有使用过的题号
validusages = []
usedproblems = []
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
sql = "SELECT ID,date,classname,diff FROM usages;"
mycursor.execute(sql)
usage_list = mycursor.fetchall()
for id,date,classname,diff_raw in usage_list:
if not date is None and startdate <= date <= enddate and re.findall(classregex,classname) != []:
validusages.append((id,date,classname,json.loads(diff_raw)))
if not id in usedproblems:
usedproblems.append(id)
cautionids = [] #使用记录长度不一的题目id
chosenproblems = {} #使用记录的平均值在thresholds中的题目id(键值为题号id, 内容为小题号, 内容为空表示没有小题)
for id in usedproblems:
usages = [u[1:] for u in validusages if u[0] == id]
subproblems = [len(u[2]) for u in usages]
if max(subproblems) != min(subproblems):
cautionids.append(id)
print(f"!!! 题号 {id} 的使用记录中小题数目不全相同, 请检查")
else:
for i in range(subproblems[0]):
difflist = [float(u[2][i]) for u in usages]
diffmean = np.mean(difflist)
if interval[0] <= diffmean <= interval[1]:
if subproblems[0] == 1:
chosenproblems[id] = []
elif not id in chosenproblems:
chosenproblems[id] = [i+1]
else:
chosenproblems[id].append(i+1)
mydb.close()
return (chosenproblems,cautionids,usedproblems) #返回根据条件选出的题号字典及对应小题, 需要留意的记录长度不一的题号, 以及所有使用过的题号
def getAllIDsExp(obsincluded = True): #获取题库中所有题目的id(含:,)字符串, 不含
mydb = connect(hostname = db_host, port = db_port, username=db_user, pwd=db_pwd, db = db_database)
mycursor = mydb.cursor()
if obsincluded:
mycursor.execute("SELECT ID FROM problems;")
else:
mycursor.execute("SELECT ID FROM problems WHERE NOT content REGEXP 'OBS';")
idlist = [ret[0] for ret in mycursor.fetchall()]
return generate_exp(idlist)
def parseRemark(string): #从一行remark字符串中分出日期和内容, 返回 日期,内容 元组
remark_raw = string.replace(r"\\","").strip()
date_raw_list = re.findall(r"^\({0,1}\d{8}\){0,1}",remark_raw)
if len(date_raw_list) > 0:
date_raw = date_raw_list[0]
date = re.sub(r"[\(\)]","",date_raw)
remark = re.sub(date,"",remark_raw).strip().replace("()","")
else:
date = "00000000"
remark = remark_raw
return date,remark
if __name__ == "__main__":
print("数据库工具, import用.")