mathdeptv2/工具/关键字筛选题号.py

import os,re,json

"""---设置关键字, 同一field下不同选项为or关系, 同一字典中不同字段间为and关系, 不同字典间为or关系, _not表示列表中的关键字都不含, 同一字典中的数字用来供应同一字段不同的条件之间的and---"""
keywords_dict_table = [
    {
        "id":[""], #题号
        "content":[""], #题面内容
        "objs":[""], #目标代码
        "tags":[""], #标签, 如["第二单元"]等
        "genre":[""], #题目类型, 填空题, 选择题, 解答题
        "ans":[""], #答案
        "solution":[""], #解答与提示
        "duration":[""], #解题时间(目前未设置)
        "usages":[""], #使用记录, 数据库中格式为 <日期>\t<届别><班别>\t正确率[\t正确率]... 例如"20230301\t2023届01班\t0.985\t0.211
        "origin":["高二下学期期末"], #题目来源
        "edit":[""], #导入者及编辑者
        "same":[""], #相同题目题号
        "related":[""], #关联题目题号
        "remark":[""], #备注, 注记
        "space":[""], #解答题下的空间(em)表示一个m的宽度
        "unrelated":[""], #无关题目题号
        # "content2":["双曲线"], #在字段名中加入数字表示这个字段的另一个必要条件
        # "content_not":["焦"], #加_not表示不能出现该样式的词
        "content_not":["OBSOLETE"], #这一项不要更改, 是排除错构题目的标志

    },
]
"""---关键字设置完毕---"""
# 示例： keywords_dict_table = [
#     {"tags": ["第三单元"], "content1": [r"[\d]\alpha","2x"], "content2": ["sin"], "content3": ["cos"],"content4": ["cot"], "content5": ["tan"]},
# ]
# 实例2: keywords_dict_table = [
#     {"tags":["第三单元"],"content":["f\(","y=","函数"],"usages":[r"0\.9",r"0\.8[3-9]"],"usages_not":[r"0\.[0-7]",r"0\.8[0-2]"],"usages1":["2023届"]},
#     {"tags":["第五单元"],"usages":[r"0\.9"],"usages_not":[r"0\.[0-7]",r"0\.8[0-2]"],"usages1":["2023届"]}
# ]
# 实例3:
# keywords_dict_table = [
#     {"usages":[r"2023届高三02班"],"usages2":[r"202209",r"20221[012]"],"usages3":[r"0\.[678][\d]{2}"],"usages_not":[r"2023届高三02班[^\n]*0\.[0-59][\d]{2}"]}

# ]


"""---设置输出文件名---"""
filename = "文本文件/题号筛选.txt"
"""---文件名设置完毕---"""


def match_condition(problem,condition_dict):
    match = True
    for field1 in [c for c in condition_dict if not "_not" in c and not condition_dict[c] == [""]]:
        cond_list = condition_dict[field1]
        field = re.sub("\d","",field1)
        if type(problem[field]) == list:
            string = "\n".join((problem[field]))
        else:
            string = str(problem[field])
        current_match = False
        for cond in cond_list:
            if len(re.findall(cond,string)) > 0:
                current_match = True
        if current_match == False:
            match = False
    for field1 in [c for c in condition_dict if "_not" in c and not condition_dict[c] == [""]]:
        cond_list = condition_dict[field1]
        field1 = field1.replace("_not","")
        field = re.sub("\d","",field1)
        if type(problem[field]) == list:
            string = "\n".join((problem[field]))
        else:
            string = str(problem[field])
        current_match = True
        for cond in cond_list:
            if len(re.findall(cond,string)) > 0:
                current_match = False
        if current_match == False:
            match = False

    return match

def generate_exp(id_list):
    if not len(id_list) == 0:
        exp_list = []
        start = id_list[0]
        current = start
        end = start
        for id in id_list[1:]:
            # print(id,current)
            if int(id)-1 == int(current):
                current = id
                end = id
            else:
                if not start == end:
                    exp_list.append('"'+start+":"+end+'"')
                else:
                    exp_list.append('"'+start+'"')
                start = id
                current = id
                end = id
        if not start == end:
            exp_list.append('"'+start+":"+end+'"')
        else:
            exp_list.append('"'+start+'"')
    return exp_list

#读取题库json文件并转化为字典
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
    database = f.read()
pro_dict = json.loads(database)

match_list = []
for condition in keywords_dict_table:
    for id in pro_dict:
        if match_condition(pro_dict[id],condition) and not id in match_list:
            match_list.append(id)

match_list_exp = generate_exp(match_list)

with open(filename,"w",encoding="utf8") as f:
    f.write((",".join(match_list_exp)).replace('"',''))

os.system("code "+filename)