adversarial_VLNBERT/adversarial_summary.py

46 lines
1.5 KiB
Python

import json
import os
import re
def remove_non_ascii(text):
return re.sub(r'[^\x00-\x7F]', ' ', text)
for file in ['train', 'val_unseen', 'val_seen', 'test']:
print(file)
if os.path.isfile('data/adversarial/reverie_{}_fnf.json'.format(file)):
with open('data/adversarial/reverie_{}_fnf.json'.format(file)) as fp:
data = json.load(fp)
result = {}
for i in data:
instruction_id = i['id']
if instruction_id not in result:
result[instruction_id] = {
'distance': float(i['distance']),
'ix': int(i['ix']),
'scan': i['scan'],
'id': i['id'],
'instructions_l': i['instructions_l'],
'path_id': int(i['path_id']),
'objId': i['objId'],
'path': i['path'],
'heading': float(i['heading']),
'instructions': [ remove_non_ascii(i['instruction'])],
'found': [ i['found'] ],
}
else:
result[instruction_id]['instructions'].append(remove_non_ascii(i['instruction']))
result[instruction_id]['found'].append( i['found'] )
output = []
for k, item in result.items():
output.append(item)
else:
output = []
with open('data/adversarial/REVERIE_{}.json'.format(file), 'w') as fp:
json.dump(output, fp)