This repository has been archived on 2024-06-23. You can view files and clone it, but cannot push or open issues or pull requests.
mathdeptv2/工具/批量添加题库字段数据.ipynb

349 lines
20 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"题号: 000270 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 000271 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 000271 , 字段: objs 中已有该数据: K0714005X\n",
"题号: 000284 , 字段: objs 中已有该数据: K0715003X\n",
"题号: 000285 , 字段: objs 中已添加数据: K0714004X\n",
"题号: 000288 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 000379 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 000485 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 000485 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 000510 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 000560 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 000625 , 字段: objs 中已添加数据: K0714004X\n",
"题号: 000625 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 000705 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 000723 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 000755 , 字段: objs 中已添加数据: K0714002X\n",
"题号: 000801 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 000813 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 000895 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 000908 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 002306 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 002307 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002307 , 字段: objs 中已添加数据: K0714005X\n",
"题号: 002308 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002308 , 字段: objs 中已添加数据: K0714005X\n",
"题号: 002309 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002310 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002311 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 002313 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002314 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002315 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002316 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002317 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 002318 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 002319 , 字段: objs 中已添加数据: KNONE\n",
"题号: 002321 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002322 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002323 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002324 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002325 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002326 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002327 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 002327 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002328 , 字段: objs 中已添加数据: KNONE\n",
"题号: 002329 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 002330 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 002330 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 002331 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002332 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002333 , 字段: objs 中已添加数据: K0714002X\n",
"题号: 002334 , 字段: objs 中已添加数据: K0713001X\n",
"题号: 002335 , 字段: objs 中已添加数据: K0714005X\n",
"题号: 002335 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 002337 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002338 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002339 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002342 , 字段: objs 中已添加数据: K0714005X\n",
"题号: 002342 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 002343 , 字段: objs 中已添加数据: K0714005X\n",
"题号: 002343 , 字段: objs 中已添加数据: K0714002X\n",
"题号: 002344 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002345 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002346 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002347 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002356 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002363 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002367 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002380 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002389 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002428 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002428 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002443 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 002447 , 字段: objs 中已添加数据: KNONE\n",
"题号: 002469 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 002480 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002481 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002482 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 002683 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 002686 , 字段: objs 中已添加数据: KNONE\n",
"题号: 002688 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003399 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 003400 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 003401 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 003401 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 003402 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 003403 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003404 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003406 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 003407 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003408 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003409 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 003409 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003410 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 003411 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 003411 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 003413 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003414 , 字段: objs 中已添加数据: KNONE\n",
"题号: 003415 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 003416 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003428 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 003608 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003619 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003650 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003664 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 003704 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003733 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 003748 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 003826 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 003858 , 字段: objs 中已添加数据: K0714004X\n",
"题号: 003895 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 003909 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 003909 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 003918 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 004078 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 004099 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004120 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004129 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004152 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004162 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 004162 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004182 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004204 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 004204 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004221 , 字段: objs 中已添加数据: K0713001X\n",
"题号: 004221 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004242 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004242 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004288 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 004288 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 004288 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004303 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004309 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004309 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004361 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004487 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004524 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 004529 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004529 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004561 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004633 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 004701 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 004717 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004722 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 004722 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 004743 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 004743 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 008873 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 008873 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 008875 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008876 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008877 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008878 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008879 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 008879 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 008880 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 008881 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 008882 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008883 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 008884 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008884 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 008885 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 008886 , 字段: objs 中已添加数据: K0713001X\n",
"题号: 008887 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 008887 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 008889 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 008890 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 008890 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 008891 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 008892 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 008892 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 008893 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 008894 , 字段: objs 中已添加数据: K0713001X\n",
"题号: 008895 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008897 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008897 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 008899 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 008916 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008916 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 008945 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008946 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 008947 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 008948 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 008949 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 008959 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008962 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 008965 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 009080 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 009087 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 009089 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 009105 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 009109 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 009109 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 009819 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 009820 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 009821 , 字段: objs 中已添加数据: K0714005X\n",
"题号: 009822 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 009823 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 009824 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 009825 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 010003 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 010003 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 010667 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 010668 , 字段: objs 中已添加数据: K0714003X\n",
"题号: 010668 , 字段: objs 中已添加数据: K0714005X\n",
"题号: 010669 , 字段: objs 中已添加数据: K0713004X\n",
"题号: 010670 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 010671 , 字段: objs 中已添加数据: K0713001X\n",
"题号: 010672 , 字段: objs 中已添加数据: K0715002X\n",
"题号: 010673 , 字段: objs 中已添加数据: K0713003X\n",
"题号: 010673 , 字段: objs 中已添加数据: K0715001X\n",
"题号: 010690 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 010702 , 字段: objs 中已添加数据: K0715003X\n",
"题号: 010703 , 字段: objs 中已添加数据: K0713002X\n",
"题号: 010704 , 字段: objs 中已添加数据: K0715003X\n"
]
}
],
"source": [
"import os,re,json\n",
"\n",
"\"\"\"---明确数据文件位置---\"\"\"\n",
"datafile = \"文本文件/metadata.txt\"\n",
"# 双回车分隔,记录内单回车分隔列表,首行为字段名\n",
"\"\"\"---文件位置结束---\"\"\"\n",
"\n",
"def trim(string):\n",
" string = re.sub(r\"^[ \\t\\n]*\",\"\",string)\n",
" string = re.sub(r\"[ \\t\\n]*$\",\"\",string)\n",
" return string\n",
"def FloatToInt(string):\n",
" f = float(string)\n",
" if abs(f-round(f))<0.01:\n",
" f = round(f)\n",
" return f\n",
"\n",
"with open(datafile,\"r\",encoding=\"utf8\") as f:\n",
" data = f.read()\n",
"pos = data.index(\"\\n\")\n",
"field = data[:pos].strip()\n",
"appending_data = data[pos:]\n",
"\n",
"with open(r\"../题库0.3/Problems.json\",\"r\",encoding = \"utf8\") as f:\n",
" database = f.read()\n",
"pro_dict = json.loads(database)\n",
"with open(r\"../题库0.3/LessonObj.json\",\"r\",encoding = \"utf8\") as f:\n",
" database = f.read()\n",
"obj_dict = json.loads(database)\n",
"\n",
"#该字段列表可能需要更新\n",
"fields = [\"content\",\"objs\",\"tags\",\"genre\",\"ans\",\"solution\",\"duration\",\"usages\",\"origin\",\"edit\",\"same\",\"related\",\"remark\",\"space\"]\n",
"\n",
"if field in fields:\n",
" field_type = type(pro_dict[\"000001\"][field])\n",
" datalist = [record.strip() for record in appending_data.split(\"\\n\\n\") if len(trim(record)) > 0]\n",
" for record in datalist:\n",
" id = re.findall(r\"^[\\d]{1,}\",record)[0]\n",
" data = record[len(id):].strip()\n",
" id = id.zfill(6)\n",
" if not id in pro_dict:\n",
" print(\"题号:\",id,\"不在数据库中.\")\n",
" break\n",
" \n",
" #字符串类型字段添加数据\n",
" elif field_type == str and data in pro_dict[id][field]:\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已有该数据:\",data)\n",
" elif field_type == str and not data in pro_dict[id][field] and not field == \"ans\" and not field == \"space\":\n",
" origin_data = pro_dict[id][field]\n",
" new_data = trim(origin_data + \"\\n\" + data)\n",
" pro_dict[id][field] = new_data\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已添加数据:\",data)\n",
" elif field_type == str and not data in pro_dict[id][field] and field == \"ans\" or field == \"space\":\n",
" pro_dict[id][field] = data\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已修改数据:\",data)\n",
" \n",
" #数值类型字段添加数据\n",
" elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])<0.01:\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已有该数据:\",FloatToInt(data))\n",
" elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])>=0.01:\n",
" pro_dict[id][field] = FloatToInt(data)\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已修改数据:\",FloatToInt(data))\n",
" \n",
" #列表类型字段添加数据\n",
" elif field_type == list:\n",
" cell_data_list = [d.strip() for d in data.split(\"\\n\")]\n",
" for cell_data in cell_data_list:\n",
" if cell_data in pro_dict[id][field]:\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已有该数据:\",cell_data)\n",
" elif not field == \"objs\":\n",
" pro_dict[id][field].append(cell_data)\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已添加数据:\",cell_data)\n",
" else:\n",
" if not cell_data in obj_dict and not cell_data.upper() == \"KNONE\":\n",
" print(\"题号:\",id,\", 字段:\",field,\"目标编号有误:\",cell_data)\n",
" else:\n",
" pro_dict[id][field].append(cell_data.upper())\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已添加数据:\",cell_data.upper())\n",
"\n",
"with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(json.dumps(pro_dict,indent=4,ensure_ascii=False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.8 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "d311ffef239beb3b8f3764271728f3972d7b090c974f8e972fcdeedf230299ac"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}