120 lines
7.2 KiB
Python
120 lines
7.2 KiB
Python
import os,re,json
|
|
|
|
"""---明确数据文件位置---"""
|
|
datafile = "文本文件/metadata.txt"
|
|
pending_filename = "临时文件/pendingmetadata.txt"
|
|
# 双回车分隔,记录内单回车分隔列表,首行为字段名
|
|
"""---文件位置结束---"""
|
|
|
|
def trim(string):
|
|
string = re.sub(r"^[ \t\n]*","",string)
|
|
string = re.sub(r"[ \t\n]*$","",string)
|
|
return string
|
|
def FloatToInt(string):
|
|
f = float(string)
|
|
if abs(f-round(f))<0.01:
|
|
f = round(f)
|
|
return f
|
|
|
|
with open(datafile,"r",encoding="utf8") as f:
|
|
data = f.read().strip()
|
|
pos = data.index("\n")
|
|
field = data[:pos].strip() #文件的第一个非空行的内容为字段名
|
|
appending_data = data[pos:] #第一个非空行之后的内容为要添加的数据
|
|
appending_data = re.sub(r"\n[\s]*\n","\n\n",appending_data)
|
|
|
|
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
|
|
database = f.read()
|
|
pro_dict = json.loads(database)
|
|
with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
|
|
database = f.read()
|
|
obj_dict = json.loads(database)
|
|
|
|
#该字段列表可能需要更新
|
|
fields = ["content","objs","tags","genre","ans","solution","duration","usages","origin","edit","same","related","remark","space","unrelated"] #题库中的现有字段, 可能会需要更新
|
|
|
|
if field in fields:
|
|
field_type = type(pro_dict["000001"][field]) #得到字段类型
|
|
datalist = [record.strip() for record in appending_data.split("\n\n") if len(trim(record)) > 0] #以连续两个回车为分隔切分要添加的数据
|
|
pending_dict = {}
|
|
for record in datalist:
|
|
id = re.findall(r"^[\d]{1,}",record)[0]
|
|
data = record[len(id):].strip() #每个记录最前方的连续数字作为题号(补全六位), 除题号外的部分作为内容
|
|
id = id.zfill(6)
|
|
if not id in pro_dict:
|
|
print("题号:",id,"不在数据库中.")
|
|
break
|
|
|
|
#字符串类型字段添加数据
|
|
elif field_type == str and data in pro_dict[id][field]: #若数据库内该题目字段中已有该数据则不作变更
|
|
print("题号:",id,", 字段:",field,"中已有该数据:",data)
|
|
elif field_type == str and not data in pro_dict[id][field] and not field == "ans" and not field == "space": #若数据库内该题目字段中未有该数据, 并且字段不是ans也不是space, 则在原内容之后添加新内容
|
|
origin_data = pro_dict[id][field]
|
|
new_data = trim(origin_data + "\n" + data)
|
|
pro_dict[id][field] = new_data
|
|
print("题号:",id,", 字段:",field,"中已添加数据:",data)
|
|
elif field_type == str and not data in pro_dict[id][field] and field == "ans" or field == "space": #若数据库内该题目字段中未有该数据, 并且字段是ans或space, 则用新内容覆盖原内容
|
|
pro_dict[id][field] = data
|
|
print("题号:",id,", 字段:",field,"中已修改数据:",data)
|
|
|
|
#数值类型字段添加数据
|
|
elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])<0.01: #数值型数据若和原数据相差小于0.01, 则维持原状不变
|
|
print("题号:",id,", 字段:",field,"中已有该数据:",FloatToInt(data))
|
|
elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])>=0.01: #数值型数据若和原数据相差大于等于0.01, 则替换原数据
|
|
pro_dict[id][field] = FloatToInt(data)
|
|
print("题号:",id,", 字段:",field,"中已修改数据:",FloatToInt(data))
|
|
|
|
#列表类型字段添加数据
|
|
elif field_type == list:
|
|
cell_data_list = [d.strip() for d in data.split("\n")] #字段为列表型时, 先按照回车切分数据中的各子项
|
|
for cell_data in cell_data_list:
|
|
if cell_data in pro_dict[id][field]: #若数据库内该题目字段中已有该数据则不作变更
|
|
print("题号:",id,", 字段:",field,"中已有该数据:",cell_data)
|
|
elif not field == "objs" and not field == "usages": #若数据库内该题目字段中未有该数据, 且不是objs字段, 也不是usages字段, 则添加新数据
|
|
pro_dict[id][field].append(cell_data)
|
|
print("题号:",id,", 字段:",field,"中已添加数据:",cell_data)
|
|
elif field == "objs": #若数据库内该题目字段中未有该数据, 且是objs字段, 则检测目标字典后, 再添加有效的新数据
|
|
if not cell_data in obj_dict and not cell_data.upper() == "KNONE":
|
|
print("题号:",id,", 字段:",field,"目标编号有误:",cell_data)
|
|
else:
|
|
pro_dict[id][field].append(cell_data.upper())
|
|
print("题号:",id,", 字段:",field,"中已添加数据:",cell_data.upper())
|
|
elif field == "usages": #若字段为usages, 则先判断班级是否做过该题目, 随后做相应处理
|
|
if not cell_data[0] == "p": #若首字母不为p, 按常规方式处理
|
|
cell_data = re.sub(r"\s+",r"\t",cell_data)
|
|
#print(id,cell_data)
|
|
classid = [info for info in cell_data.split("\t") if len(re.findall(r"[班高一二三]",info))>0][0] #读取班级号
|
|
time_striped = re.sub(r"^\d{4,}\t","",cell_data) # 读取去除时间的数据
|
|
# print(id,classid,time_striped)
|
|
if time_striped in "\n".join(pro_dict[id]["usages"]): # 去除时间的数据已有记录
|
|
print("题号",id,classid,"已记录")
|
|
elif classid in "\n".join(pro_dict[id]["usages"]): # 班级已有记录但是去除时间的数据未有记录, 需人工处理
|
|
print("题号",id,classid,"已使用. 在pending list中已记录.")
|
|
if not id in pending_dict: # 记录题号和班级均有重复的数据(可能是第二次做)
|
|
pending_dict[id] = [cell_data]
|
|
else:
|
|
pending_dict[id].append(cell_data)
|
|
else:
|
|
print("题号:",id,", 字段:",field,"中已添加数据:",cell_data.upper())
|
|
pro_dict[id][field].append(cell_data.upper())
|
|
else: #某行数据以p开始
|
|
print("题号:",id,", 字段:",field,"中已强制添加数据:",cell_data.upper()[1:])
|
|
pro_dict[id][field].append(cell_data.upper()[1:]) #强制添加该行数据(去除首字母p)
|
|
if len(pending_dict) > 0:
|
|
print("部分等待处理的数据已输出至",pending_filename)
|
|
output = "usages\n"
|
|
for id in pending_dict:
|
|
output += id + "\n"
|
|
for t in pending_dict[id]:
|
|
output += "p"+t+"\n"
|
|
output += "\n\n"
|
|
with open(pending_filename,"w",encoding="u8") as f:
|
|
f.write(output)
|
|
|
|
|
|
|
|
else:
|
|
print("字段名有误")
|
|
|
|
with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
|
|
f.write(json.dumps(pro_dict,indent=4,ensure_ascii=False)) |