53 lines
1.6 KiB
Python
53 lines
1.6 KiB
Python
import os
|
|
import json
|
|
import numpy as np
|
|
|
|
def load_instr_datasets(anno_dir, dataset, splits):
|
|
data = []
|
|
for split in splits:
|
|
filepath = os.path.join(anno_dir, f'{split}.json')
|
|
with open(filepath) as f:
|
|
new_data = json.load(f)
|
|
|
|
data += new_data
|
|
|
|
return data
|
|
|
|
'''
|
|
def construct_instrs(anno_dir, dataset, splits):
|
|
data = []
|
|
if "instr" in splits[0]:
|
|
return load_instr_datasets(anno_dir, dataset, splits)
|
|
|
|
for i, item in enumerate(load_instr_datasets(anno_dir, dataset, splits)):
|
|
# Split multiple instructions into separate entries
|
|
for j, instr in enumerate(item['instructions']):
|
|
new_item = dict(item)
|
|
new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
|
|
new_item['instruction'] = instr
|
|
del new_item['instructions']
|
|
del new_item['instr_encodings']
|
|
data.append(new_item)
|
|
return data
|
|
'''
|
|
def construct_reverie_instrs(anno_dir, dataset, splits):
|
|
data = []
|
|
if "instr" in splits[0]:
|
|
return load_instr_datasets(anno_dir, dataset, splits)
|
|
|
|
for i, item in enumerate(load_instr_datasets(anno_dir, dataset, splits)):
|
|
# Split multiple instructions into separate entries
|
|
for j, instr in enumerate(item['instructions']):
|
|
new_item = dict(item)
|
|
new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
|
|
new_item['instruction'] = instr
|
|
del new_item['instructions']
|
|
del new_item['instr_encodings']
|
|
data.append(new_item)
|
|
return data
|
|
|
|
def load_json(f):
|
|
with open(f) as fp:
|
|
data = json.load(fp)
|
|
return data
|