forked from microsoft/LoRA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathformat_converting_dart.py
43 lines (35 loc) · 1.4 KB
/
format_converting_dart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import sys
import io
import json
with open(sys.argv[1], 'r', encoding='utf8') as reader, \
open(sys.argv[2], 'w', encoding='utf8') as writer :
lines_dict = json.load(reader)
full_rela_lst = []
full_src_lst = []
full_tgt_lst = []
unique_src = 0
for example in lines_dict:
rela_lst = []
temp_triples = ''
for i, tripleset in enumerate(example['tripleset']):
subj, rela, obj = tripleset
rela = rela.lower()
rela_lst.append(rela)
if i > 0:
temp_triples += ' | '
temp_triples += '{} : {} : {}'.format(subj, rela, obj)
unique_src += 1
for sent in example['annotations']:
full_tgt_lst.append(sent['text'])
full_src_lst.append(temp_triples)
full_rela_lst.append(rela_lst)
print('unique source is', unique_src)
for src, tgt in zip(full_src_lst, full_tgt_lst):
x = {}
x['context'] = src # context #+ '||'
x['completion'] = tgt #completion
writer.write(json.dumps(x)+'\n')