glm4/finetune_demo/data/text_chart/create.py

29 lines
693 B
Python

import json
import random
def read_jsonl(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
data = [json.loads(line) for line in lines]
return data
def save_jsonl(data, file_path):
with open(file_path, 'w', encoding='utf-8') as f:
for k, v in data.items():
f.write(json.dumps({k:v}, ensure_ascii=False) + '\n')
# 示例使用
input_train_path = './dev.jsonl'
# 读取数据
data = read_jsonl(input_train_path)
msg = {}
for dd in data:
if dd['image'] not in msg:
msg[dd['image']] = []
msg[dd['image']].append({'question':dd['question'], 'answer':dd['answer']})
save_jsonl(msg, 'mid.jsonl')