mathdeptv2/工具/关键字筛选题号.py

import os,re,json

"""---设置关键字, 同一field下不同选项为or关系, 同一字典中不同字段间为and关系, 不同字典间为or关系, _not表示列表中的关键字都不含, 同一字典中的数字用来供应同一字段不同的条件之间的and---"""
keywords_dict_table = [
    {"origin":[r"2023"],"origin2":[r"二模"],"tags":[r"第九单元"]}
]
"""---关键字设置完毕---"""
# 示例： keywords_dict_table = [
#     {"tags": ["第三单元"], "content1": [r"[\d]\alpha","2x"], "content2": ["sin"], "content3": ["cos"],"content4": ["cot"], "content5": ["tan"]},
# ]
# 实例2: keywords_dict_table = [
#     {"tags":["第三单元"],"content":["f\(","y=","函数"],"usages":[r"0\.9",r"0\.8[3-9]"],"usages_not":[r"0\.[0-7]",r"0\.8[0-2]"],"usages1":["2023届"]},
#     {"tags":["第五单元"],"usages":[r"0\.9"],"usages_not":[r"0\.[0-7]",r"0\.8[0-2]"],"usages1":["2023届"]}
# ]
# 实例3:
# keywords_dict_table = [
#     {"usages":[r"2023届高三02班"],"usages2":[r"202209",r"20221[012]"],"usages3":[r"0\.[678][\d]{2}"],"usages_not":[r"2023届高三02班[^\n]*0\.[0-59][\d]{2}"]}

# ]


"""---设置输出文件名---"""
filename = "文本文件/题号筛选.txt"
"""---文件名设置完毕---"""


def match_condition(problem,condition_dict):
    match = True
    for field1 in [c for c in condition_dict if not "_not" in c]:
        cond_list = condition_dict[field1]
        field = re.sub("\d","",field1)
        if type(problem[field]) == list:
            string = "\n".join((problem[field]))
        else:
            string = str(problem[field])
        current_match = False
        for cond in cond_list:
            if len(re.findall(cond,string)) > 0:
                current_match = True
        if current_match == False:
            match = False
    for field1 in [c for c in condition_dict if "_not" in c]:
        cond_list = condition_dict[field1]
        field1 = field1.replace("_not","")
        field = re.sub("\d","",field1)
        if type(problem[field]) == list:
            string = "\n".join((problem[field]))
        else:
            string = str(problem[field])
        current_match = True
        for cond in cond_list:
            if len(re.findall(cond,string)) > 0:
                current_match = False
        if current_match == False:
            match = False

    return match

#读取题库json文件并转化为字典
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
    database = f.read()
pro_dict = json.loads(database)

match_list = []
for condition in keywords_dict_table:
    for id in pro_dict:
        if match_condition(pro_dict[id],condition) and not id in match_list:
            match_list.append(id)

with open(filename,"w",encoding="utf8") as f:
    f.write(",".join(match_list))

os.system("code "+filename)