database_tools中添加对单条usage进行分词的功能

This commit is contained in:
wangweiye7840 2023-06-27 10:56:16 +08:00
parent ea9cb3f51b
commit 1c3bea6639
1 changed files with 18 additions and 0 deletions

View File

@ -473,5 +473,23 @@ def ModifyMetadata(prodict,objdict,fieldsdict,metadatafilepath,pendingdatafilepa
print("字段 %s 不可按此方式修改"%field)
return feedback # 已在数据库中修改, 之后需要将数据库写入一次, 返回1表示字段名有误, 返回其他表示成功进行了修改
def parseUsage(usagestring): #对单行usage信息进行分词
usagedict = {}
datalist = re.sub(r"\s+",r"\t",usagestring.strip()).split("\t")
if re.findall(r"20[\d]{2,6}",datalist[0]) != [] and re.findall(r"[\u4e00-\u9fa5]",datalist[0]) == []:
date = datalist.pop(0)
else:
date = ""
usagedict["date"] = date
if re.findall(r"[高一二三班]",datalist[0]) != []:
classid = datalist.pop(0)
else:
classid = ""
usagedict["classid"] = classid
usagedict["subproblems"] = len(datalist)
usagedict["difficulties"] = [float(u) for u in datalist]
usagedict["glossdiff"] = round(sum(usagedict["difficulties"])/usagedict["subproblems"],3)
return usagedict #返回词典, 含date日期, classid班级标识, subproblems小题数目, difficulties得分率, glossdiff小题平均得分率
if __name__ == "__main__":
print("数据库工具, import用.")