import json import os import re def remove_non_ascii(text): return re.sub(r'[^\x00-\x7F]', ' ', text) for file in ['train', 'val_unseen', 'val_seen', 'train_seen', 'test', 'val_train_seen']: print(file) if os.path.isfile('data/adversarial/reverie_{}_fnf.json'.format(file)): with open('data/adversarial/reverie_{}_fnf.json'.format(file)) as fp: data = json.load(fp) result = {} for i in data: instruction_id = i['path_id'] if instruction_id not in result: result[instruction_id] = { 'distance': float(i['distance']), 'scan': i['scan'], 'path_id': int(i['path_id']), 'path': i['path'], 'heading': float(i['heading']), 'instructions': [ remove_non_ascii(i['instruction'])], 'found': [ i['found'] ], 'id': i['id'], 'objId': i['objId'] } else: result[instruction_id]['instructions'].append(remove_non_ascii(i['instruction'])) result[instruction_id]['found'].append( i['found'] ) output = [] for k, item in result.items(): output.append(item) else: output = [] with open('data/adversarial/R2R_{}.json'.format(file), 'w') as fp: json.dump(output, fp)