511 lines
20 KiB
Python
511 lines
20 KiB
Python
''' Batched REVERIE navigation environment '''
|
|
|
|
import json
|
|
import os
|
|
import numpy as np
|
|
import random
|
|
import networkx as nx
|
|
from collections import defaultdict
|
|
from glob import glob
|
|
|
|
from utils.data import load_nav_graphs
|
|
from eval_utils import cal_dtw, cal_cls
|
|
from utils.graph_utils import NavGraph
|
|
|
|
ERROR_MARGIN = 3.0
|
|
|
|
obj2vps = {}
|
|
bbox_data = json.load(open('/data/Matterport3DSimulator-duet/VLN-DUET/datasets/REVERIE/annotations/BBoxes.json'))
|
|
for scanvp, value in bbox_data.items():
|
|
scan, vp = scanvp.split('_')
|
|
# for all visible objects at that viewpoint
|
|
for objid, objinfo in value.items():
|
|
if objinfo['visible_pos']:
|
|
# if such object not already in the dict
|
|
obj2vps.setdefault(scan+'_'+objid, [])
|
|
obj2vps[scan+'_'+objid].append(vp)
|
|
|
|
def load_floorplan():
|
|
region_label_lookup = load_region_label_lookup()
|
|
|
|
house_files = glob('/home/snsd0805/code/research/VLN/base_dir/v1/scans/*/house_segmentations/*.house')
|
|
|
|
node_region_lookups = {}
|
|
region_room_lookups = {}
|
|
region_object_lookups = {}
|
|
node_locations_lookups = {}
|
|
|
|
for house_file in house_files:
|
|
scan_id = house_file.split("/")[-3]
|
|
regions, floors, node_id_regions, node_id_floors = {}, {}, {}, {}
|
|
room_bboxes = {}
|
|
node_coors = {}
|
|
node_locations = {}
|
|
region_objects = defaultdict(list)
|
|
object_name_lookup = {}
|
|
#print(scan_id, datetime.now())
|
|
#house_lines = []
|
|
for line in open(house_file):
|
|
house_line = line.strip()
|
|
#house_lines.append(line.strip())
|
|
|
|
#for house_line in house_lines[1:]:
|
|
house_line_cols = house_line.split()
|
|
house_line_type = house_line_cols[0]
|
|
house_line_cols = house_line_cols[1:]
|
|
|
|
if house_line_type=='R':
|
|
region_index, level_index, _, _, label, px, py, pz, xlo, ylo, zlo, xhi, yhi, zhi, height,_,_,_,_ = house_line_cols
|
|
regions[region_index] = region_label_lookup[label]
|
|
floors[region_index] = level_index
|
|
room_bboxes[region_index] = {
|
|
'name': region_label_lookup[label],
|
|
'floor': level_index
|
|
}
|
|
#for var_name in ['px', 'py', 'pz', 'xlo', 'ylo', 'zlo', 'xhi', 'yhi', 'zhi', 'height']:
|
|
# room_bboxes[region_index][var_name] = float(eval(var_name))
|
|
|
|
if house_line_type=='P':
|
|
node_id, panorama_index, region_index, _, px, py, pz, _,_,_,_,_ = house_line_cols
|
|
node_id_regions[node_id] = region_index#regions[region_index]
|
|
node_locations[node_id] = (px, py, pz)
|
|
#node_id_floors[node_id] = int(floors[region_index]) + 1
|
|
#node_coors[node_id] = (float(px), float(py), float(pz))
|
|
#raise
|
|
#if house_line_type=='I':
|
|
#break
|
|
if house_line_type=='C':
|
|
category_index, category_mapping_index, category_mapping_name, mpcat40_index, mpcat40_name, _,_,_,_,_ = house_line_cols
|
|
object_name_lookup[category_index] = category_mapping_name
|
|
|
|
if house_line_type=='O':
|
|
object_index, region_index, category_index, px, py, pz, a0x, a0y, a0z, a1x, a1y, a1z, r0, r1, r2, _, _, _, _, _, _, _, _ = house_line_cols
|
|
if category_index=='-1' or region_index=='-1':
|
|
#print("error")
|
|
continue
|
|
region_objects[region_index].append(object_name_lookup[category_index])
|
|
#room_lookups[scan_id] = node_id_regions
|
|
#floor_lookups[scan_id] = node_id_floors
|
|
region_room_lookups[scan_id] = room_bboxes
|
|
node_region_lookups[scan_id] = node_id_regions
|
|
node_locations_lookups[scan_id] = node_locations
|
|
region_object_lookups[scan_id] = {k:sorted(v) for k,v in region_objects.items()}
|
|
#node_coor_lookups[scan_id] = node_coors
|
|
return node_region_lookups, region_room_lookups, region_object_lookups, node_locations_lookups
|
|
|
|
def load_region_label_lookup():
|
|
region_label_lookup = {
|
|
'a': 'bathroom',
|
|
'b': 'bedroom',
|
|
'c': 'closet',
|
|
'd': 'dining room',
|
|
'e': 'entryway',#/foyer/lobby (should be the front door, not any door)
|
|
'f': 'familyroom',# (should be a room that a family hangs out in, not any area with couches)
|
|
'g': 'garage',#
|
|
'h': 'hallway',#
|
|
'i': 'library',# (should be room like a library at a university, not an individual study)
|
|
'j': 'laundryroom',#/mudroom (place where people do laundry, etc.)
|
|
'k': 'kitchen',#
|
|
'l': 'living room',# (should be the main "showcase" living room in a house, not any area with couches)
|
|
'm': 'meeting room',#/conferenceroom
|
|
'n': 'lounge',# (any area where people relax in comfy chairs/couches that is not the family room or living room
|
|
'o': 'office',# (usually for an individual, or a small set of people)
|
|
'p': 'porch',#/terrace/deck/driveway (must be outdoors on ground level)
|
|
'r': 'recreation',#/game (should have recreational objects, like pool table, etc.)
|
|
's': 'stairs',#
|
|
't': 'toilet',# (should be a small room with ONLY a toilet)
|
|
'u': 'utility room',#/toolroom
|
|
'v': 'tv',# (must have theater-style seating)
|
|
'w': 'gym',#workout/gym/exercise
|
|
'x': 'outdoor',# areas containing grass, plants, bushes, trees, etc.
|
|
'y': 'balcony',# (must be outside and must not be on ground floor)
|
|
'z': 'other room',# (it is clearly a room, but the function is not clear)
|
|
'B': 'bar',#
|
|
'C': 'classroom',#
|
|
'D': 'dining booth',#
|
|
'S': 'spa',#/sauna
|
|
'Z': 'junk',# (reflections of mirrors, random points floating in space, etc.)
|
|
'-': 'no label',#
|
|
}
|
|
return region_label_lookup
|
|
|
|
with open('./node_region.json') as fp:
|
|
node_region = json.load(fp)
|
|
|
|
class Simulator(object):
|
|
''' A simple simulator in Matterport3D environment '''
|
|
|
|
def __init__(
|
|
self,
|
|
navigable_dir: str,):
|
|
self.heading = 0
|
|
self.elevation = 0
|
|
self.scan_ID = ''
|
|
self.viewpoint_ID = ''
|
|
self.navigable_dir = navigable_dir
|
|
self.navigable_dict = {}
|
|
self.candidate = {}
|
|
self.gmap = NavGraph()
|
|
|
|
self.node_region, self.region_room, self.region_obj, self.node_locations = load_floorplan()
|
|
|
|
|
|
def newEpisode(
|
|
self,
|
|
scan_ID: str,
|
|
viewpoint_ID: str,
|
|
heading: int,
|
|
elevation: int,
|
|
start: str,
|
|
target: str,
|
|
clip_target: str,
|
|
):
|
|
self.heading = heading
|
|
self.elevation = elevation
|
|
self.scan_ID = scan_ID
|
|
self.viewpoint_ID = viewpoint_ID
|
|
self.start = start
|
|
self.target = target
|
|
self.clip_target = clip_target
|
|
# Load navigable dict
|
|
navigable_path = os.path.join(self.navigable_dir, self.scan_ID + '_navigable.json')
|
|
with open(navigable_path, 'r') as f:
|
|
self.navigable_dict = json.load(f)
|
|
|
|
'''
|
|
self.navigable_dict = {}
|
|
for start, v in navigable_dict.items():
|
|
self.navigable_dict[start] = {}
|
|
# print("BEFORE: ", len(navigable_dict[start]))
|
|
for to, _v in navigable_dict[start].items():
|
|
start_region = self.node_region[scan_ID][start]
|
|
to_region = self.node_region[scan_ID][to]
|
|
if start_region == to_region:
|
|
self.navigable_dict[start][to] = _v
|
|
# print(start_region, to_region)
|
|
# print("AFTER: ", len(self.navigable_dict[start]))
|
|
'''
|
|
|
|
# Get candidate
|
|
self.getCandidate()
|
|
|
|
def updateGraph(self):
|
|
# build graph
|
|
for candidate in self.candidate.keys():
|
|
self.gmap.update_connection(self.viewpoint_ID, candidate)
|
|
|
|
def getState(self) -> dict:
|
|
self.state = {
|
|
'scanID': self.scan_ID,
|
|
'viewpointID': self.viewpoint_ID,
|
|
'heading': self.heading,
|
|
'elevation': self.elevation,
|
|
'candidate': self.candidate,
|
|
'start': self.start,
|
|
'target': self.target,
|
|
'clip_target': self.clip_target,
|
|
}
|
|
return self.state
|
|
|
|
def getCandidate(self):
|
|
"""
|
|
Get the agent's candidate list from pre-stored navigable dict.
|
|
"""
|
|
self.candidate = self.navigable_dict[self.viewpoint_ID]
|
|
self.updateGraph()
|
|
|
|
def makeAction(self, next_viewpoint_ID):
|
|
"""
|
|
Make action and update the agent's state.
|
|
"""
|
|
if next_viewpoint_ID == self.viewpoint_ID:
|
|
return
|
|
elif next_viewpoint_ID in self.candidate.keys():
|
|
self.heading = self.candidate[next_viewpoint_ID]['heading']
|
|
self.elevation = self.candidate[next_viewpoint_ID]['elevation']
|
|
self.viewpoint_ID = next_viewpoint_ID
|
|
self.getCandidate()
|
|
|
|
|
|
class EnvBatch(object):
|
|
''' A simple wrapper for a batch of MatterSim environments,
|
|
using discretized viewpoints and pretrained features '''
|
|
|
|
def __init__(self, navigable_dir, feat_db=None, batch_size=100):
|
|
"""
|
|
1. Load pretrained image feature
|
|
2. Init the Simulator.
|
|
:param feat_db: The name of file stored the feature.
|
|
:param batch_size: Used to create the simulator list.
|
|
"""
|
|
self.feat_db = feat_db
|
|
|
|
self.sims = []
|
|
for i in range(batch_size):
|
|
sim = Simulator(navigable_dir)
|
|
self.sims.append(sim)
|
|
|
|
def _make_id(self, scanId, viewpointId):
|
|
return scanId + '_' + viewpointId
|
|
|
|
def newEpisodes(self, scanIds, viewpointIds, headings, starts, targets, clip_targets):
|
|
for i, (scanId, viewpointId, heading, start, target, clip_target) in enumerate(zip(scanIds, viewpointIds, headings, starts, targets, clip_targets)):
|
|
self.sims[i].newEpisode(scanId, viewpointId, heading, 0, start, target, clip_target)
|
|
|
|
def getStates(self):
|
|
"""
|
|
Get list of states augmented with precomputed image features. rgb field will be empty.
|
|
Agent's current view [0-35] (set only when viewing angles are discretized)
|
|
[0-11] looking down, [12-23] looking at horizon, [24-35] looking up
|
|
:return: [ ((36, 2048), sim_state) ] * batch_size
|
|
"""
|
|
feature_states = []
|
|
for i, sim in enumerate(self.sims):
|
|
state = sim.getState()
|
|
|
|
feature = self.feat_db.get_image_observation(state["scanID"], state["viewpointID"])
|
|
feature_states.append((feature, state))
|
|
return feature_states
|
|
|
|
def makeActions(self, next_viewpoint_IDs):
|
|
''' Take an action using the full state dependent action interface (with batched input)'''
|
|
for i, next_viewpoint_ID in enumerate(next_viewpoint_IDs):
|
|
self.sims[i].makeAction(next_viewpoint_ID)
|
|
|
|
|
|
class REVERIENavBatch(object):
|
|
''' Implements the REVERIE navigation task, using discretized viewpoints and pretrained features '''
|
|
|
|
def __init__(
|
|
self, view_db, instr_data, connectivity_dir, navigable_dir,
|
|
batch_size=1, seed=0, name=None
|
|
):
|
|
self.env = EnvBatch(navigable_dir, feat_db=view_db, batch_size=batch_size)
|
|
self.data = instr_data
|
|
self.scans = set([x['scan'] for x in self.data])
|
|
self.connectivity_dir = connectivity_dir
|
|
self.batch_size = batch_size
|
|
self.name = name
|
|
|
|
self.gt_trajs = self._get_gt_trajs(self.data) # for evaluation
|
|
|
|
# use different seeds in different processes to shuffle data
|
|
'''
|
|
self.seed = seed
|
|
random.seed(self.seed)
|
|
random.shuffle(self.data)
|
|
'''
|
|
|
|
|
|
self.ix = 0
|
|
self._load_nav_graphs()
|
|
|
|
self.buffered_state_dict = {}
|
|
print('%s loaded with %d instructions, using splits: %s' % (
|
|
self.__class__.__name__, len(self.data), self.name))
|
|
|
|
def _get_gt_trajs(self, data):
|
|
gt_trajs = {
|
|
x['new_reverie_id']: (x['scan'], x['path']) \
|
|
for x in data if len(x['path']) > 1
|
|
}
|
|
return gt_trajs
|
|
|
|
def size(self):
|
|
return len(self.data)
|
|
|
|
def _load_nav_graphs(self):
|
|
"""
|
|
load graph from self.scan,
|
|
Store the graph {scan_id: graph} in self.graphs
|
|
Store the shortest path {scan_id: {view_id_x: {view_id_y: [path]} } } in self.paths
|
|
Store the distances in self.distances. (Structure see above)
|
|
Load connectivity graph for each scan, useful for reasoning about shortest paths
|
|
:return: None
|
|
"""
|
|
print('Loading navigation graphs for %d scans' % len(self.scans))
|
|
self.graphs = load_nav_graphs(self.connectivity_dir, self.scans)
|
|
self.shortest_paths = {}
|
|
for scan, G in self.graphs.items(): # compute all shortest paths
|
|
self.shortest_paths[scan] = dict(nx.all_pairs_dijkstra_path(G))
|
|
self.shortest_distances = {}
|
|
for scan, G in self.graphs.items(): # compute all shortest paths
|
|
self.shortest_distances[scan] = dict(nx.all_pairs_dijkstra_path_length(G))
|
|
|
|
def _next_minibatch(self, batch_size=None, **kwargs):
|
|
"""
|
|
Store the minibach in 'self.batch'
|
|
"""
|
|
if batch_size is None:
|
|
batch_size = self.batch_size
|
|
|
|
batch = self.data[self.ix: self.ix+batch_size]
|
|
if len(batch) < batch_size:
|
|
random.shuffle(self.data)
|
|
self.ix = batch_size - len(batch)
|
|
batch += self.data[:self.ix]
|
|
else:
|
|
self.ix += batch_size
|
|
self.batch = batch
|
|
|
|
def reset_epoch(self, shuffle=False):
|
|
''' Reset the data index to beginning of epoch. Primarily for testing.
|
|
You must still call reset() for a new episode. '''
|
|
if shuffle:
|
|
random.shuffle(self.data)
|
|
self.ix = 0
|
|
|
|
def _get_obs(self):
|
|
obs = []
|
|
for i, (feature, state) in enumerate(self.env.getStates()):
|
|
item = self.batch[i]
|
|
|
|
ob = {
|
|
'obs' : feature["detail"],
|
|
'obs_summary' : feature["summary"],
|
|
'objects' : feature["objects"],
|
|
# 'instr_id' : item['instr_id'],
|
|
# 'action_plan' : item['action_plan'],
|
|
'scan' : state['scanID'],
|
|
'viewpoint' : state['viewpointID'],
|
|
'heading' : state['heading'],
|
|
'elevation' : state['elevation'],
|
|
'candidate': state['candidate'],
|
|
'instruction' : item['instruction'],
|
|
'gt_path' : item['path'],
|
|
'path_id' : item['path_id'],
|
|
'start': item['start'],
|
|
'new_reverie_id': item['new_reverie_id'],
|
|
'target': item['target'],
|
|
'clip_target': item['clip_target']
|
|
}
|
|
# RL reward. The negative distance between the state and the final state
|
|
# There are multiple gt end viewpoints on REVERIE.
|
|
|
|
'''
|
|
if ob['instr_id'] in self.gt_trajs:
|
|
ob['distance'] = self.shortest_distances[ob['scan']][ob['viewpoint']][item['path'][-1]]
|
|
else:
|
|
ob['distance'] = 0
|
|
'''
|
|
|
|
obs.append(ob)
|
|
return obs
|
|
|
|
def reset(self, **kwargs):
|
|
''' Load a new minibatch / episodes. '''
|
|
self._next_minibatch(**kwargs)
|
|
|
|
scanIds = [item['scan'] for item in self.batch]
|
|
viewpointIds = [item['path'][0] for item in self.batch]
|
|
headings = [item['heading'] for item in self.batch]
|
|
starts = [item['start'] for item in self.batch]
|
|
targets = [item['target'] for item in self.batch]
|
|
clip_targets = [item['clip_target'] for item in self.batch]
|
|
self.env.newEpisodes(scanIds, starts, headings, starts, targets, clip_targets)
|
|
return self._get_obs()
|
|
|
|
def step(self, next_viewpoint_IDs):
|
|
''' Take action (same interface as makeActions) '''
|
|
self.env.makeActions(next_viewpoint_IDs)
|
|
return self._get_obs()
|
|
|
|
############### Nav Evaluation ###############
|
|
def _get_nearest(self, shortest_distances, goal_id, path):
|
|
near_id = path[0]
|
|
near_d = shortest_distances[near_id][goal_id]
|
|
for item in path:
|
|
d = shortest_distances[item][goal_id]
|
|
if d < near_d:
|
|
near_id = item
|
|
near_d = d
|
|
return near_id
|
|
|
|
def _eval_item(self, scan, pred_path, gt_path, gt_found, found, gt_objid):
|
|
scores = {}
|
|
|
|
shortest_distances = self.shortest_distances[scan]
|
|
|
|
path = sum(pred_path, [])
|
|
# assert gt_path[0] == path[0], 'Result trajectories should include the start position'
|
|
|
|
nearest_position = self._get_nearest(shortest_distances, gt_path[-1], path)
|
|
|
|
scores['nav_error'] = shortest_distances[path[-1]][gt_path[-1]]
|
|
scores['oracle_error'] = shortest_distances[nearest_position][gt_path[-1]]
|
|
|
|
scores['action_steps'] = len(pred_path) - 1
|
|
scores['trajectory_steps'] = len(path) - 1
|
|
scores['trajectory_lengths'] = np.sum([shortest_distances[a][b] for a, b in zip(path[:-1], path[1:])])
|
|
|
|
gt_lengths = np.sum([shortest_distances[a][b] for a, b in zip(gt_path[:-1], gt_path[1:])])
|
|
|
|
scores['found_success'] = float(gt_found == found)
|
|
|
|
goal_viewpoints = set(obj2vps['%s_%s'%(scan, str(gt_objid))])
|
|
|
|
pred_stop_region = node_region[scan][path[-1]]
|
|
gt_stop_region = node_region[scan][gt_path[-1]]
|
|
|
|
# scores['success'] = float(scores['nav_error'] < ERROR_MARGIN)
|
|
scores['success'] = float(path[-1] in goal_viewpoints)
|
|
scores['room_success'] = float(gt_stop_region == pred_stop_region)
|
|
# scores['oracle_success'] = float(scores['oracle_error'] < ERROR_MARGIN)
|
|
scores['oracle_success'] = float(any(x in goal_viewpoints for x in path))
|
|
|
|
scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
|
|
scores['sspl_1'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
|
|
scores['sspl_2'] = scores['room_success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
|
|
scores['sspl_3'] = scores['oracle_success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
|
|
|
|
scores['ss_1'] = scores['success'] * scores['found_success']
|
|
scores['ss_2'] = scores['room_success'] * scores['found_success']
|
|
scores['ss_3'] = scores['oracle_success'] * scores['found_success']
|
|
|
|
scores.update(
|
|
cal_dtw(shortest_distances, path, gt_path, scores['success'], ERROR_MARGIN)
|
|
)
|
|
scores['CLS'] = cal_cls(shortest_distances, path, gt_path, ERROR_MARGIN)
|
|
|
|
return scores
|
|
|
|
def eval_metrics(self, preds):
|
|
''' Evaluate each agent trajectory based on how close it got to the goal location
|
|
the path contains [view_id, angle, vofv]'''
|
|
print('eval %d predictions' % (len(preds)))
|
|
|
|
metrics = defaultdict(list)
|
|
for item in preds:
|
|
instr_id = item['instr_id']
|
|
traj = item['trajectory']
|
|
obj_id = instr_id.split('_')[1]
|
|
scan, gt_traj = self.gt_trajs[instr_id]
|
|
traj_scores = self._eval_item(scan, traj, gt_traj, item['gt_found'], item['found'], obj_id)
|
|
for k, v in traj_scores.items():
|
|
metrics[k].append(v)
|
|
metrics['instr_id'].append(instr_id)
|
|
|
|
avg_metrics = {
|
|
'action_steps': np.mean(metrics['action_steps']),
|
|
'steps': np.mean(metrics['trajectory_steps']),
|
|
'lengths': np.mean(metrics['trajectory_lengths']),
|
|
'nav_error': np.mean(metrics['nav_error']),
|
|
'oracle_error': np.mean(metrics['oracle_error']),
|
|
'sr': np.mean(metrics['success']) * 100,
|
|
'room_success': np.mean(metrics['room_success']) * 100,
|
|
'found_success': np.mean(metrics['found_success']) * 100,
|
|
'oracle_sr': np.mean(metrics['oracle_success']) * 100,
|
|
'spl': np.mean(metrics['spl']) * 100,
|
|
'sspl_1': np.mean(metrics['sspl_1']) * 100,
|
|
'sspl_2': np.mean(metrics['sspl_2']) * 100,
|
|
'sspl_3': np.mean(metrics['sspl_3']) * 100,
|
|
'ss_1': np.mean(metrics['ss_1']) * 100,
|
|
'ss_2': np.mean(metrics['ss_2']) * 100,
|
|
'ss_3': np.mean(metrics['ss_3']) * 100,
|
|
'nDTW': np.mean(metrics['nDTW']) * 100,
|
|
'SDTW': np.mean(metrics['SDTW']) * 100,
|
|
'CLS': np.mean(metrics['CLS']) * 100,
|
|
}
|
|
return avg_metrics, metrics
|
|
|