29 lines
693 B
Python
29 lines
693 B
Python
import json
|
|
import random
|
|
|
|
def read_jsonl(file_path):
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
data = [json.loads(line) for line in lines]
|
|
return data
|
|
|
|
def save_jsonl(data, file_path):
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
for k, v in data.items():
|
|
f.write(json.dumps({k:v}, ensure_ascii=False) + '\n')
|
|
|
|
# 示例使用
|
|
input_train_path = './dev.jsonl'
|
|
|
|
# 读取数据
|
|
data = read_jsonl(input_train_path)
|
|
|
|
msg = {}
|
|
|
|
for dd in data:
|
|
if dd['image'] not in msg:
|
|
msg[dd['image']] = []
|
|
msg[dd['image']].append({'question':dd['question'], 'answer':dd['answer']})
|
|
|
|
save_jsonl(msg, 'mid.jsonl')
|