adversarial_VLNBERT/r2r_src/eval.py

236 lines
9.1 KiB
Python

''' Evaluation of agent trajectories '''
import json
import os
import sys
from collections import defaultdict
import networkx as nx
import numpy as np
import pprint
pp = pprint.PrettyPrinter(indent=4)
from env import R2RBatch
import utils
from utils import load_datasets, load_nav_graphs, ndtw_graphload, DTW
from agent import BaseAgent
class Evaluation(object):
''' Results submission format: [{'instr_id': string, 'trajectory':[(viewpoint_id, heading_rads, elevation_rads),] } ] '''
def __init__(self, splits, scans, tok):
self.error_margin = 3.0
self.splits = splits
self.tok = tok
self.gt = {}
self.instr_ids = []
self.scans = []
for split in splits:
for item in load_datasets([split]):
if scans is not None and item['scan'] not in scans:
continue
self.gt[str(item['id'])] = item
self.scans.append(item['scan'])
self.instr_ids += ['%s_%d' % (item['id'], i) for i in range(len(item['instructions']))]
self.scans = set(self.scans)
self.instr_ids = set(self.instr_ids)
self.graphs = load_nav_graphs(self.scans)
self.distances = {}
for scan,G in self.graphs.items(): # compute all shortest paths
self.distances[scan] = dict(nx.all_pairs_dijkstra_path_length(G))
self.objProposals, self.obj2viewpoint = utils.loadObjProposals()
# self.ndtw_criterion = {}
# scan_gts_dir = '/home/yicong/research/selfmonitoring-agent/tasks/Env-back/data/id_paths.json'
# with open(scan_gts_dir) as f_:
# self.scan_gts = json.load(f_)
# all_scan_ids = []
# for key in self.scan_gts:
# path_scan_id = self.scan_gts[key][0]
# if path_scan_id not in all_scan_ids:
# all_scan_ids.append(path_scan_id)
# ndtw_graph = ndtw_graphload(path_scan_id)
# self.ndtw_criterion[path_scan_id] = DTW(ndtw_graph)
def _get_nearest(self, scan, goal_id, path):
near_id = path[0][0]
near_d = self.distances[scan][near_id][goal_id]
for item in path:
d = self.distances[scan][item[0]][goal_id]
if d < near_d:
near_id = item[0]
near_d = d
return near_id
def _score_item(self, instr_id, path, ref_objId, predict_found):
''' Calculate error based on the final position in trajectory, and also
the closest position (oracle stopping rule).
The path contains [view_id, angle, vofv] '''
gt = self.gt[instr_id[:-2]]
index = int(instr_id.split('_')[-1])
start = gt['path'][0]
assert start == path[0][0], 'Result trajectories should include the start position'
goal = gt['path'][-1]
final_position = path[-1][0] # the first of [view_id, angle, vofv]
nearest_position = self._get_nearest(gt['scan'], goal, path)
# self.scores['nav_errors'].append(self.distances[gt['scan']][final_position][goal])
# self.scores['oracle_errors'].append(self.distances[gt['scan']][nearest_position][goal])
self.scores['trajectory_steps'].append(len(path)-1)
distance = 0 # Work out the length of the path in meters
prev = path[0]
for curr in path[1:]:
distance += self.distances[gt['scan']][prev[0]][curr[0]]
prev = curr
self.scores['trajectory_lengths'].append(distance)
self.scores['shortest_lengths'].append(
self.distances[gt['scan']][start][goal]
)
# print(predict_found, gt['found'], gt['found'][index])
if gt['found'][index] == True:
if predict_found == -1:
self.scores['found_count'] += 1
else:
if predict_found == -2:
self.scores['found_count'] += 1
# REF sucess or not
if ref_objId == str(gt['objId']):
self.scores['rgs'].append(1)
else:
self.scores['rgs'].append(0)
# navigation - success or not
end_viewpoint_id = gt['scan'] + '_' + final_position
if self.objProposals.__contains__(end_viewpoint_id):
if str(gt['objId']) in self.objProposals[end_viewpoint_id]['objId']:
self.scores['visible'].append(1)
else:
self.scores['visible'].append(0)
else:
self.scores['visible'].append(0)
# navigation - oracle success or not
oracle_succ = 0
for passvp in path:
oracle_viewpoint_id = gt['scan'] + '_' + passvp[0]
if self.objProposals.__contains__(oracle_viewpoint_id):
if str(gt['objId']) in self.objProposals[oracle_viewpoint_id]['objId']:
oracle_succ = 1
break
self.scores['oracle_visible'].append(oracle_succ)
def score(self, output_file):
''' Evaluate each agent trajectory based on how close it got to the goal location '''
self.scores = defaultdict(list)
self.scores['found_count'] = 0
instr_ids = set(self.instr_ids)
if type(output_file) is str:
with open(output_file) as f:
results = json.load(f)
else:
results = output_file
print('result length', len(results))
path_counter = 0
for item in results:
# Check against expected ids
if item['instr_id'] in instr_ids:
instr_ids.remove(item['instr_id'])
self._score_item(item['instr_id'], item['trajectory'], item['ref'], item['found'])
path_counter += 1
if 'train' not in self.splits: # Exclude the training from this. (Because training eval may be partial)
assert len(instr_ids) == 0, 'Missing %d of %d instruction ids from %s - not in %s'\
% (len(instr_ids), len(self.instr_ids), ",".join(self.splits), output_file)
assert len(self.scores['visible']) == len(self.instr_ids)
score_summary = {
'steps': np.average(self.scores['trajectory_steps']),
'lengths': np.average(self.scores['trajectory_lengths']),
'found_score': self.scores['found_count'] / path_counter
}
end_successes = sum(self.scores['visible'])
score_summary['success_rate'] = float(end_successes) / float(len(self.scores['visible']))
oracle_successes = sum(self.scores['oracle_visible'])
score_summary['oracle_rate'] = float(oracle_successes) / float(len(self.scores['oracle_visible']))
spl = [float(visible == 1) * l / max(l, p, 0.01)
for visible, p, l in
zip(self.scores['visible'], self.scores['trajectory_lengths'], self.scores['shortest_lengths'])
]
score_summary['spl'] = np.average(spl)
try:
assert len(self.scores['rgs']) == len(self.instr_ids)
except:
print(len(self.scores['rgs']), len(self.instr_ids))
num_rgs = sum(self.scores['rgs'])
score_summary['rgs'] = float(num_rgs)/float(len(self.scores['rgs']))
rgspl = [float(rgsi == 1) * l / max(l, p)
for rgsi, p, l in
zip(self.scores['rgs'], self.scores['trajectory_lengths'], self.scores['shortest_lengths'])
]
score_summary['rgspl'] = np.average(rgspl)
return score_summary, self.scores
def bleu_score(self, path2inst):
from bleu import compute_bleu
refs = []
candidates = []
for path_id, inst in path2inst.items():
path_id = str(path_id)
assert path_id in self.gt
# There are three references
refs.append([self.tok.split_sentence(sent) for sent in self.gt[path_id]['instructions']])
candidates.append([self.tok.index_to_word[word_id] for word_id in inst])
tuple = compute_bleu(refs, candidates, smooth=False)
bleu_score = tuple[0]
precisions = tuple[1]
return bleu_score, precisions
RESULT_DIR = 'tasks/R2R/results/'
def eval_simple_agents():
''' Run simple baselines on each split. '''
for split in ['train', 'val_seen', 'val_unseen', 'test']:
env = R2RBatch(None, batch_size=1, splits=[split])
ev = Evaluation([split])
for agent_type in ['Stop', 'Shortest', 'Random']:
outfile = '%s%s_%s_agent.json' % (RESULT_DIR, split, agent_type.lower())
agent = BaseAgent.get_agent(agent_type)(env, outfile)
agent.test()
agent.write_results()
score_summary, _ = ev.score(outfile)
print('\n%s' % agent_type)
pp.pprint(score_summary)
def eval_seq2seq():
''' Eval sequence to sequence models on val splits (iteration selected from training error) '''
outfiles = [
RESULT_DIR + 'seq2seq_teacher_imagenet_%s_iter_5000.json',
RESULT_DIR + 'seq2seq_sample_imagenet_%s_iter_20000.json'
]
for outfile in outfiles:
for split in ['val_seen', 'val_unseen']:
ev = Evaluation([split])
score_summary, _ = ev.score(outfile % split)
print('\n%s' % outfile)
pp.pprint(score_summary)
if __name__ == '__main__':
eval_simple_agents()