diff --git a/preprocess/user_problem_preprocess.py b/preprocess/user_problem_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..d80b8bd451880851aa006acb51da41d8359d04ba --- /dev/null +++ b/preprocess/user_problem_preprocess.py @@ -0,0 +1,34 @@ +import json + +# 定义处理函数 +def process_json(line): + data = json.loads(line) + if data['score'] == None: + return None + return { + 'log_id': data['log_id'], + 'is_correct': data['is_correct'], + 'attempts': data['attempts'], + 'score': data['score'], + } + +# 读取大型JSON文件 +file_path = '/Users/yr/code/data-mining/Dataset/MOOCCubeX/relations/user-problem.json' # 替换为实际的JSON文件路径 +# file_path = 'pro/data/t.json' # 替换为实际的JSON文件路径 + + +with open(file_path, 'r') as file: + for line in file: + processed_data = process_json(line) + if processed_data != None: + with open('/Users/yr/code/data-mining/Dataset/MOOCCubeX/pro/data/user-problem-pre.json', 'a+', encoding='utf-8') as f: + l = json.dumps(processed_data, ensure_ascii=False) + # print(l) + f.write(l+'\n') + + + + + + +