diff --git a/工具v4/database_tools_2.py b/工具v4/database_tools_2.py index 3f7432e0..3d93bec2 100644 --- a/工具v4/database_tools_2.py +++ b/工具v4/database_tools_2.py @@ -184,7 +184,7 @@ def treat_dict(database): #对整个题库字典中的内容部分进行预处 # mycursor.execute("SELECT ID,SAME_ID FROM same;") # same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()] for id in p_dict: - treated_dict[id] = {} + # treated_dict[id] = {} treated_dict[id] = pre_treating(p_dict[id]) mydb.close() return treated_dict #返回处理后的字典, 含内容字段及相同题目字段 @@ -3078,5 +3078,84 @@ def generateProDict(cursor): #从数据库的cursor中得到problems字典的con return prodict + +def generate_diff_dict(pro_dict): #根据pro_dict的使用数据生成diff_dict难度词典 + diff_dict = {} + for id in pro_dict: + usages = pro_dict[id]["usages"] + if usages == []: + diff_dict[id] = {"min":-1,"average":-1,"median":-1,"max":-1} + else: + diff_list = [] + for usage_raw in usages: + usage = parseUsage(usage_raw)['difficulties'] + diff_list.append(np.mean(usage)) + diff_dict[id] = {"min":min(diff_list),"average":np.mean(diff_list),"median":np.median(diff_list),"max":max(diff_list)} + return diff_dict + + + +def generate_adaptive_problemIDs(pro_dict,units_list,genre_tuple,diffinterval,method="min",excludeids = None,multianswers = False, sim_threshold = 0.8): #生成弗能题组, genre_tuple是二元组(填空题数, 选择题数), diffinterval是二元组(难度最小值,难度最大值) + diff_dict = generate_diff_dict(pro_dict) + treated_dict = {id:pre_treating(pro_dict[id]["content"]) for id in pro_dict} + if excludeids is None: + excludeids = () + if len(units_list) == 0: + units_list = ["第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元"] + # treated_dict = {id:pre_treating(pro_dict[id]["content"]) for id in pro_dict} + raw_output_list = [] + same_list = [] + for id in pro_dict: #生成raw_output_list, 满足条件的所有题目(无论题目个数, 题目类型) + tags = pro_dict[id]["tags"] + diff_info = diff_dict[id] + if len(set(units_list) & set(tags)) > 0 and diffinterval[0] <= diff_info[method] <= diffinterval[1] and not id in excludeids and not id in same_list: + raw_output_list.append(id) + same_list += pro_dict[id]["same"].copy() + if multianswers == True: #排除多个作答项的题目 + output_list = raw_output_list.copy() + else: + output_list = [] + for id in raw_output_list: + subproblemsfromusage = [parseUsage(u)["subproblems"] for u in pro_dict[id]["usages"]] + if max(subproblemsfromusage) == 1: + output_list.append(id) + tiankong_list_raw = [] + xuanze_list_raw = [] + for id in output_list: #分开生成填空题列表和选择题列表 + if pro_dict[id]["genre"] == "填空题": + tiankong_list_raw.append(id) + if pro_dict[id]["genre"] == "选择题": + xuanze_list_raw.append(id) + print(f"满足条件的填空有 {len(tiankong_list_raw)} 题, 选择有 {len(xuanze_list_raw)} 题.") + tiankong_list = [] + while len(tiankong_list_raw) + len(tiankong_list) > genre_tuple[0] and not len(tiankong_list) == genre_tuple[0]: + poping_index = np.random.randint(len(tiankong_list_raw)) + new_id = tiankong_list_raw.pop(poping_index) + newid_applicable = True + for id in tiankong_list: + if Levenshtein.jaro(treated_dict[id],treated_dict[new_id]) > sim_threshold: + newid_applicable = False + break + if newid_applicable: + tiankong_list.append(new_id) + if len(tiankong_list_raw) + len(tiankong_list) == genre_tuple[0]: + tiankong_list = tiankong_list + tiankong_list_raw + xuanze_list = [] + while len(xuanze_list_raw) + len(xuanze_list) > genre_tuple[1] and not len(xuanze_list) == genre_tuple[1]: + poping_index = np.random.randint(len(xuanze_list_raw)) + new_id = xuanze_list_raw.pop(poping_index) + newid_applicable = True + for id in tiankong_list + xuanze_list: + if Levenshtein.jaro(treated_dict[id],treated_dict[new_id]) > sim_threshold: + newid_applicable = False + break + if newid_applicable: + xuanze_list.append(new_id) + if len(xuanze_list_raw) + len(xuanze_list) == genre_tuple[1]: + xuanze_list = xuanze_list + xuanze_list_raw + + return tiankong_list,xuanze_list + + if __name__ == "__main__": print("数据库工具, import用.") \ No newline at end of file