36 lines
1.0 KiB
Python
36 lines
1.0 KiB
Python
import json
|
|
import random
|
|
|
|
def read_jsonl(file_path):
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
data = [json.loads(line) for line in lines]
|
|
return data
|
|
|
|
def save_jsonl(data, file_path):
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
for entry in data:
|
|
for k, v in entry.items():
|
|
msg = []
|
|
for i in range(len(v)):
|
|
mku = {}
|
|
mka = {}
|
|
if i == 0:
|
|
mku['image'] = k
|
|
mku['role'] = 'user'
|
|
mku['content'] = v[i]['question']
|
|
mka['role'] = 'assistant'
|
|
mka['content'] = v[i]['answer']
|
|
msg.append(mku)
|
|
msg.append(mka)
|
|
|
|
f.write(json.dumps({'messages':msg}, ensure_ascii=False) + '\n')
|
|
|
|
# 示例使用
|
|
input_train_path = './mid.jsonl'
|
|
|
|
# 读取数据
|
|
data = read_jsonl(input_train_path)
|
|
|
|
save_jsonl(data, 'deving.jsonl')
|