Compare commits

..

3 Commits

13 changed files with 50 additions and 246 deletions

File diff suppressed because one or more lines are too long

View File

@ -37,7 +37,7 @@ def construct_instrs(anno_dir, dataset, splits, tokenizer, max_instr_len=512, is
# Split multiple instructions into separate entries # Split multiple instructions into separate entries
for j, instr in enumerate(item['instructions']): for j, instr in enumerate(item['instructions']):
new_item = dict(item) new_item = dict(item)
new_item['instr_id'] = '%s_%d' % (item['path_id'], j) new_item['instr_id'] = '%s_%d' % (item['id'], j)
new_item['instruction'] = instr new_item['instruction'] = instr
new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len] new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len]
del new_item['instructions'] del new_item['instructions']

View File

@ -58,12 +58,12 @@ def build_dataset(args, rank=0, is_test=False):
) )
# val_env_names = ['val_train_seen'] # val_env_names = ['val_train_seen']
val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] val_env_names = ['val_seen', 'val_unseen']
if args.dataset == 'r4r' and (not args.test): if args.dataset == 'r4r' and (not args.test):
val_env_names[-1] == 'val_unseen_sampled' val_env_names[-1] == 'val_unseen_sampled'
if args.submit and args.dataset != 'r4r': # if args.submit and args.dataset != 'r4r':
val_env_names.append('test') # val_env_names.append('test')
val_envs = {} val_envs = {}
for split in val_env_names: for split in val_env_names:

File diff suppressed because one or more lines are too long

View File

@ -27,13 +27,7 @@ class BaseAgent(object):
def get_results(self, detailed_output=False): def get_results(self, detailed_output=False):
output = [] output = []
for k, v in self.results.items(): for k, v in self.results.items():
output.append({ output.append({'instr_id': k, 'trajectory': v['path'], 'pred_objid': v['pred_objid']})
'instr_id': k,
'trajectory': v['path'],
'pred_objid': v['pred_objid'],
'found': v['found'],
'gt_found': v['gt_found']
})
if detailed_output: if detailed_output:
output[-1]['details'] = v['details'] output[-1]['details'] = v['details']
return output return output

View File

@ -174,14 +174,11 @@ class GMapObjectNavAgent(Seq2SeqAgent):
def _nav_vp_variable(self, obs, gmaps, pano_embeds, cand_vpids, view_lens, obj_lens, nav_types): def _nav_vp_variable(self, obs, gmaps, pano_embeds, cand_vpids, view_lens, obj_lens, nav_types):
batch_size = len(obs) batch_size = len(obs)
# print("PANO shape", pano_embeds.shape)
# add [stop] token & [NOT FOUND] token # add [stop] token
# [STOP] 在最前面, [NOT FOUND] 在最後面
vp_img_embeds = torch.cat( vp_img_embeds = torch.cat(
[torch.zeros_like(pano_embeds[:, :1]), pano_embeds, torch.ones_like(pano_embeds[:, :1])], 1 [torch.zeros_like(pano_embeds[:, :1]), pano_embeds], 1
) )
# print("SHAPE:", vp_img_embeds.shape)
batch_vp_pos_fts = [] batch_vp_pos_fts = []
for i, gmap in enumerate(gmaps): for i, gmap in enumerate(gmaps):
@ -195,33 +192,19 @@ class GMapObjectNavAgent(Seq2SeqAgent):
) )
# add [stop] token at beginning # add [stop] token at beginning
vp_pos_fts = np.zeros((vp_img_embeds.size(1), 14), dtype=np.float32) vp_pos_fts = np.zeros((vp_img_embeds.size(1), 14), dtype=np.float32)
# print("vp_pos_fts:", vp_pos_fts.shape)
vp_pos_fts[:, :7] = cur_start_pos_fts vp_pos_fts[:, :7] = cur_start_pos_fts
# print("vp_pos_fts[:, :7]:", vp_pos_fts[:, :7].shape)
# print("cur_start_pos_fts:", cur_start_pos_fts.shape)
vp_pos_fts[1:len(cur_cand_pos_fts)+1, 7:] = cur_cand_pos_fts vp_pos_fts[1:len(cur_cand_pos_fts)+1, 7:] = cur_cand_pos_fts
# print("vp_pos_fts[1:len(), 7:]:", vp_pos_fts[1:len(cur_cand_pos_fts)+1, 7:].shape)
# print("cur_cand_pos_fts:", cur_cand_pos_fts.shape)
batch_vp_pos_fts.append(torch.from_numpy(vp_pos_fts)) batch_vp_pos_fts.append(torch.from_numpy(vp_pos_fts))
batch_vp_pos_fts = pad_tensors(batch_vp_pos_fts).cuda() batch_vp_pos_fts = pad_tensors(batch_vp_pos_fts).cuda()
# 要把 stop 和 not found 的 mask 補上去 vp_nav_masks = torch.cat([torch.ones(batch_size, 1).bool().cuda(), nav_types == 1], 1)
# 這邊把 stop 跟 candidate 放一起、把 not found 跟 object 放一起 vp_obj_masks = torch.cat([torch.zeros(batch_size, 1).bool().cuda(), nav_types == 2], 1)
vp_nav_masks = torch.cat([torch.ones(batch_size, 1).bool().cuda(), nav_types == 1, torch.zeros(batch_size, 1).bool().cuda()], 1)
vp_obj_masks = torch.cat([torch.zeros(batch_size, 1).bool().cuda(), nav_types == 2, torch.ones(batch_size, 1).bool().cuda()], 1)
# print('vp_nav_masks:', vp_nav_masks.shape)
# print('vp_obj_masks:', vp_obj_masks.shape)
vp_masks = gen_seq_masks(view_lens+obj_lens+2)
# print()
return { return {
'vp_img_embeds': vp_img_embeds, 'vp_img_embeds': vp_img_embeds,
'vp_pos_fts': batch_vp_pos_fts, 'vp_pos_fts': batch_vp_pos_fts,
'vp_masks': vp_masks, 'vp_masks': gen_seq_masks(view_lens+obj_lens+1),
'vp_nav_masks': vp_nav_masks, 'vp_nav_masks': vp_nav_masks,
'vp_obj_masks': vp_obj_masks, 'vp_obj_masks': vp_obj_masks,
'vp_cand_vpids': [[None]+x for x in cand_vpids], 'vp_cand_vpids': [[None]+x for x in cand_vpids],
@ -259,7 +242,7 @@ class GMapObjectNavAgent(Seq2SeqAgent):
return torch.from_numpy(a).cuda() return torch.from_numpy(a).cuda()
def _teacher_object(self, obs, ended, view_lens, obj_logits): def _teacher_object(self, obs, ended, view_lens):
targets = np.zeros(len(obs), dtype=np.int64) targets = np.zeros(len(obs), dtype=np.int64)
for i, ob in enumerate(obs): for i, ob in enumerate(obs):
if ended[i]: if ended[i]:
@ -269,18 +252,12 @@ class GMapObjectNavAgent(Seq2SeqAgent):
if i_vp not in ob['gt_end_vps']: if i_vp not in ob['gt_end_vps']:
targets[i] = self.args.ignoreid targets[i] = self.args.ignoreid
else: else:
i_objids = ob['obj_ids'] i_objids = ob['obj_ids']
targets[i] = self.args.ignoreid targets[i] = self.args.ignoreid
for j, obj_id in enumerate(i_objids): for j, obj_id in enumerate(i_objids):
if str(obj_id) == str(ob['gt_obj_id']): if str(obj_id) == str(ob['gt_obj_id']):
if ob['gt_found'] == True: # 可以找得到
targets[i] = j + view_lens[i] + 1 targets[i] = j + view_lens[i] + 1
else:
targets[i] = len(obj_logits[i])-1 # 不能找到,
break break
return torch.from_numpy(targets).cuda() return torch.from_numpy(targets).cuda()
def make_equiv_action(self, a_t, gmaps, obs, traj=None): def make_equiv_action(self, a_t, gmaps, obs, traj=None):
@ -321,8 +298,7 @@ class GMapObjectNavAgent(Seq2SeqAgent):
batch_size = len(obs) batch_size = len(obs)
# build graph: keep the start viewpoint # build graph: keep the start viewpoint
gmaps = [GraphMap(ob['viewpoint']) for ob in obs]
gmaps = [GraphMap(ob['viewpoint']) for ob in obs] # input the start point
for i, ob in enumerate(obs): for i, ob in enumerate(obs):
gmaps[i].update_graph(ob) gmaps[i].update_graph(ob)
@ -331,9 +307,6 @@ class GMapObjectNavAgent(Seq2SeqAgent):
'instr_id': ob['instr_id'], 'instr_id': ob['instr_id'],
'path': [[ob['viewpoint']]], 'path': [[ob['viewpoint']]],
'pred_objid': None, 'pred_objid': None,
'gt_objid': None,
'found': None,
'gt_found': None,
'details': {}, 'details': {},
} for ob in obs] } for ob in obs]
@ -351,7 +324,8 @@ class GMapObjectNavAgent(Seq2SeqAgent):
ml_loss = 0. ml_loss = 0.
og_loss = 0. og_loss = 0.
for t in range(self.args.max_action_len): # for t in range(self.args.max_action_len):
for t in range(1):
for i, gmap in enumerate(gmaps): for i, gmap in enumerate(gmaps):
if not ended[i]: if not ended[i]:
gmap.node_step_ids[obs[i]['viewpoint']] = t + 1 gmap.node_step_ids[obs[i]['viewpoint']] = t + 1
@ -406,24 +380,11 @@ class GMapObjectNavAgent(Seq2SeqAgent):
i_vp = obs[i]['viewpoint'] i_vp = obs[i]['viewpoint']
# update i_vp: stop and object grounding scores # update i_vp: stop and object grounding scores
i_objids = obs[i]['obj_ids'] i_objids = obs[i]['obj_ids']
i_obj_logits = obj_logits[i, pano_inputs['view_lens'][i]+1:] # 最後一個是 not found i_obj_logits = obj_logits[i, pano_inputs['view_lens'][i]+1:]
if len(i_objids) > 0:
if torch.argmax(i_obj_logits) >= len(i_objids): # not found 那格 logit 最大(會在最後一格)
og = -1
else:
og = i_objids[torch.argmax(i_obj_logits)]
else:
og = None
# 如果有找到og 會是 object id
# 如果是 not foundog 會是 -1
# 如果這個 viewpoint 看不到物件og 會是 None
gmap.node_stop_scores[i_vp] = { gmap.node_stop_scores[i_vp] = {
'stop': nav_probs[i, 0].data.item(), 'stop': nav_probs[i, 0].data.item(),
'og': og, 'og': i_objids[torch.argmax(i_obj_logits)] if len(i_objids) > 0 else None,
'og_details': {'objids': i_objids, 'logits': torch.cat([i_obj_logits[:len(i_objids)], i_obj_logits[[-1]] ], dim=0)}, 'og_details': {'objids': i_objids, 'logits': i_obj_logits[:len(i_objids)]},
} }
if train_ml is not None: if train_ml is not None:
@ -443,9 +404,9 @@ class GMapObjectNavAgent(Seq2SeqAgent):
) )
ml_loss += self.criterion(nav_outs['local_logits'], local_nav_targets) # local ml_loss += self.criterion(nav_outs['local_logits'], local_nav_targets) # local
# objec grounding # objec grounding
# obj_targets = self._teacher_object(obs, ended, pano_inputs['view_lens'], obj_logits) obj_targets = self._teacher_object(obs, ended, pano_inputs['view_lens'])
# print(t, obj_targets[6], obj_logits[6], obs[6]['obj_ids'], pano_inputs['view_lens'][i], obs[6]['gt_obj_id']) # print(t, obj_targets[6], obj_logits[6], obs[6]['obj_ids'], pano_inputs['view_lens'][i], obs[6]['gt_obj_id'])
# og_loss += self.criterion(obj_logits, obj_targets) og_loss += self.criterion(obj_logits, obj_targets)
# print(F.cross_entropy(obj_logits, obj_targets, reduction='none')) # print(F.cross_entropy(obj_logits, obj_targets, reduction='none'))
# print(t, 'og_loss', og_loss.item(), self.criterion(obj_logits, obj_targets).item()) # print(t, 'og_loss', og_loss.item(), self.criterion(obj_logits, obj_targets).item())
@ -491,11 +452,9 @@ class GMapObjectNavAgent(Seq2SeqAgent):
else: else:
cpu_a_t.append(nav_vpids[i][a_t[i]]) cpu_a_t.append(nav_vpids[i][a_t[i]])
original_gt_founds = [ ob['gt_found'] for ob in obs ]
# Make action and get the new state # Make action and get the new state
self.make_equiv_action(cpu_a_t, gmaps, obs, traj) self.make_equiv_action(cpu_a_t, gmaps, obs, traj)
for i in range(batch_size): for i in range(batch_size):
traj[i]['gt_found'] = original_gt_founds[i]
if (not ended[i]) and just_ended[i]: if (not ended[i]) and just_ended[i]:
stop_node, stop_score = None, {'stop': -float('inf'), 'og': None} stop_node, stop_score = None, {'stop': -float('inf'), 'og': None}
for k, v in gmaps[i].node_stop_scores.items(): for k, v in gmaps[i].node_stop_scores.items():
@ -505,10 +464,6 @@ class GMapObjectNavAgent(Seq2SeqAgent):
if stop_node is not None and obs[i]['viewpoint'] != stop_node: if stop_node is not None and obs[i]['viewpoint'] != stop_node:
traj[i]['path'].append(gmaps[i].graph.path(obs[i]['viewpoint'], stop_node)) traj[i]['path'].append(gmaps[i].graph.path(obs[i]['viewpoint'], stop_node))
traj[i]['pred_objid'] = stop_score['og'] traj[i]['pred_objid'] = stop_score['og']
if stop_score['og'] == -1 or stop_score['og'] == None:
traj[i]['found'] = False
else:
traj[i]['found'] = True
if self.args.detailed_output: if self.args.detailed_output:
for k, v in gmaps[i].node_stop_scores.items(): for k, v in gmaps[i].node_stop_scores.items():
traj[i]['details'][k] = { traj[i]['details'][k] = {
@ -532,16 +487,10 @@ class GMapObjectNavAgent(Seq2SeqAgent):
if train_ml is not None: if train_ml is not None:
ml_loss = ml_loss * train_ml / batch_size ml_loss = ml_loss * train_ml / batch_size
# og_loss = og_loss * train_ml / batch_size og_loss = og_loss * train_ml / batch_size
self.loss += ml_loss self.loss += ml_loss
# self.loss += og_loss self.loss += og_loss
self.logs['IL_loss'].append(ml_loss.item()) self.logs['IL_loss'].append(ml_loss.item())
# self.logs['OG_loss'].append(og_loss.item()) self.logs['OG_loss'].append(og_loss.item())
'''
print("TRAJ:")
for i in traj:
print(" GT: {}, PREDICT: {}, SCORE: {}".format(i['gt_found'], i['found'], 1 if i['gt_found']==i['found'] else 0))
'''
return traj return traj

View File

@ -87,8 +87,6 @@ def construct_instrs(anno_dir, dataset, splits, tokenizer, max_instr_len=512):
new_item['objId'] = None new_item['objId'] = None
new_item['instruction'] = instr new_item['instruction'] = instr
new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len] new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len]
new_item['path'] = item['path'][j]
new_item['found'] = item['found'][j]
del new_item['instructions'] del new_item['instructions']
del new_item['instr_encodings'] del new_item['instr_encodings']
data.append(new_item) data.append(new_item)

View File

@ -8,22 +8,12 @@ import random
import networkx as nx import networkx as nx
from collections import defaultdict from collections import defaultdict
import copy import copy
from glob import glob
import MatterSim import MatterSim
from utils.data import load_nav_graphs, new_simulator from utils.data import load_nav_graphs, new_simulator
from utils.data import angle_feature, get_all_point_angle_feature from utils.data import angle_feature, get_all_point_angle_feature
with open('./node_region.json') as fp:
node_region = json.load(fp)
with open('region2objs.json') as fp:
region2objs = json.load(fp)
with open('vp2objs.json') as fp:
vp2objs = json.load(fp)
class EnvBatch(object): class EnvBatch(object):
''' A simple wrapper for a batch of MatterSim environments, ''' A simple wrapper for a batch of MatterSim environments,
@ -321,7 +311,6 @@ class ReverieObjectNavBatch(object):
'navigableLocations' : state.navigableLocations, 'navigableLocations' : state.navigableLocations,
'instruction' : item['instruction'], 'instruction' : item['instruction'],
'instr_encoding': item['instr_encoding'], 'instr_encoding': item['instr_encoding'],
'gt_found' : item['found'],
'gt_path' : item['path'], 'gt_path' : item['path'],
'gt_end_vps': item.get('end_vps', []), 'gt_end_vps': item.get('end_vps', []),
'gt_obj_id': item['objId'], 'gt_obj_id': item['objId'],
@ -350,7 +339,7 @@ class ReverieObjectNavBatch(object):
self._next_minibatch(**kwargs) self._next_minibatch(**kwargs)
scanIds = [item['scan'] for item in self.batch] scanIds = [item['scan'] for item in self.batch]
viewpointIds = [item['path'][0] for item in self.batch] viewpointIds = [item['path'][-1] for item in self.batch]
headings = [item['heading'] for item in self.batch] headings = [item['heading'] for item in self.batch]
self.env.newEpisodes(scanIds, viewpointIds, headings) self.env.newEpisodes(scanIds, viewpointIds, headings)
return self._get_obs() return self._get_obs()
@ -362,17 +351,13 @@ class ReverieObjectNavBatch(object):
############### Nav Evaluation ############### ############### Nav Evaluation ###############
def _eval_item(self, scan, pred_path, pred_objid, gt_path, gt_objid, pred_found, gt_found): def _eval_item(self, scan, pred_path, pred_objid, gt_path, gt_objid):
scores = {} scores = {}
shortest_distances = self.shortest_distances[scan] shortest_distances = self.shortest_distances[scan]
path = sum(pred_path, []) path = sum(pred_path, [])
assert gt_path[0] == path[0], 'Result trajectories should include the start position' # assert gt_path[0] == path[0], 'Result trajectories should include the start position'
pred_stop_region = node_region[scan][path[-1]]
gt_stop_region = node_region[scan][gt_path[-1]]
scores['action_steps'] = len(pred_path) - 1 scores['action_steps'] = len(pred_path) - 1
scores['trajectory_steps'] = len(path) - 1 scores['trajectory_steps'] = len(path) - 1
@ -383,99 +368,9 @@ class ReverieObjectNavBatch(object):
goal_viewpoints = set(self.obj2vps['%s_%s'%(scan, str(gt_objid))]) goal_viewpoints = set(self.obj2vps['%s_%s'%(scan, str(gt_objid))])
assert len(goal_viewpoints) > 0, '%s_%s'%(scan, str(gt_objid)) assert len(goal_viewpoints) > 0, '%s_%s'%(scan, str(gt_objid))
scores['found_success'] = float(pred_found == gt_found)
scores['success'] = float(path[-1] in goal_viewpoints) scores['success'] = float(path[-1] in goal_viewpoints)
scores['room_success'] = float(pred_stop_region == gt_stop_region)
scores['oracle_success'] = float(any(x in goal_viewpoints for x in path)) scores['oracle_success'] = float(any(x in goal_viewpoints for x in path))
gt_room_start_vp = None
gt_back_path = []
gt_front_path = []
for vp in gt_path[::-1]:
if node_region[scan][vp] == gt_stop_region and gt_front_path == []:
gt_back_path.append(vp)
gt_room_start_vp = vp
else:
gt_front_path.append(vp)
gt_front_path = gt_front_path[::-1]
gt_back_path = gt_back_path[::-1]
assert (gt_front_path + gt_back_path) == gt_path, "Front path & Back path error"
gt_front_path += [gt_room_start_vp]
'''
if scores['success'] == 1.0:
scores['found_success'] = float(pred_found == gt_found)
else:
scores['found_success'] = 0.0
'''
gt_reach_length = np.sum([shortest_distances[a][b] for a, b in zip(gt_front_path[:-1], gt_front_path[1:])])
gt_explore_length = np.sum([shortest_distances[a][b] for a, b in zip(gt_back_path[:-1], gt_back_path[1:])])
if scores['room_success'] != 0.0:
# corse-grained
# get the reach_path & explore_path
room_start_vp = None
back_path = []
front_path = []
for vp in path[::-1]:
if node_region[scan][vp] == gt_stop_region and front_path == []:
back_path.append(vp)
room_start_vp = vp
else:
front_path.append(vp)
front_path = front_path[::-1]
back_path = back_path[::-1]
assert (front_path + back_path) == path, "Front path & Back path error"
# front_path = ... room_start_vp
# back_path = room_start_vp ...
front_path += [room_start_vp]
reach_length = np.sum([shortest_distances[a][b] for a, b in zip(front_path[:-1], front_path[1:])]) if len(front_path) != 1 else 0.01
explore_length = np.sum([shortest_distances[a][b] for a, b in zip(back_path[:-1], back_path[1:])]) if len(back_path) != 1 else 0.01
scores['room_spl'] = scores['room_success'] * gt_reach_length / max(reach_length, gt_reach_length, 0.01)
if scores['found_success'] != 0.0:
# fine-grained score
# p is converage rate
if gt_found:
p = 1.0
else:
explore_objs = set()
for vp in back_path:
explore_objs.update(vp2objs[vp])
p = len(explore_objs) / len(region2objs[scan][gt_stop_region])
scores['coverage_rate'] = p
scores['explore_spl'] = scores['room_success'] * scores['found_success'] * gt_explore_length / max(gt_explore_length, explore_length, 0.01) * p
else:
scores['coverage_rate'] = 0
scores['explore_spl'] = 0
else:
scores['room_spl'] = 0.0
scores['coverage_rate'] = 0
scores['explore_spl'] = 0
scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
'''
scores['sspl_1'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
scores['sspl_2'] = scores['room_success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
scores['sspl_3'] = scores['oracle_success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
scores['ss_1'] = scores['success'] * scores['found_success']
scores['ss_2'] = scores['room_success'] * scores['found_success']
scores['ss_3'] = scores['oracle_success'] * scores['found_success']
'''
scores['sspl'] = scores['spl'] * scores['found_success']
scores['rgs'] = str(pred_objid) == str(gt_objid) scores['rgs'] = str(pred_objid) == str(gt_objid)
scores['rgspl'] = scores['rgs'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) scores['rgspl'] = scores['rgs'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
@ -485,8 +380,6 @@ class ReverieObjectNavBatch(object):
''' Evaluate each agent trajectory based on how close it got to the goal location ''' Evaluate each agent trajectory based on how close it got to the goal location
the path contains [view_id, angle, vofv]''' the path contains [view_id, angle, vofv]'''
print('eval %d predictions' % (len(preds))) print('eval %d predictions' % (len(preds)))
print(preds[0])
metrics = defaultdict(list) metrics = defaultdict(list)
for item in preds: for item in preds:
@ -494,16 +387,7 @@ class ReverieObjectNavBatch(object):
traj = item['trajectory'] traj = item['trajectory']
pred_objid = item.get('pred_objid', None) pred_objid = item.get('pred_objid', None)
scan, gt_traj, gt_objid = self.gt_trajs[instr_id] scan, gt_traj, gt_objid = self.gt_trajs[instr_id]
pred_found = item['found'] traj_scores = self._eval_item(scan, traj, pred_objid, gt_traj, gt_objid)
gt_found = item['gt_found']
traj_scores = self._eval_item(scan, traj, pred_objid, gt_traj, gt_objid, pred_found, gt_found)
# record "success" in the result file
# let the visualization tool can get the success status
item['success'] = traj_scores['success']
for k, v in traj_scores.items(): for k, v in traj_scores.items():
metrics[k].append(v) metrics[k].append(v)
metrics['instr_id'].append(instr_id) metrics['instr_id'].append(instr_id)
@ -515,15 +399,8 @@ class ReverieObjectNavBatch(object):
'sr': np.mean(metrics['success']) * 100, 'sr': np.mean(metrics['success']) * 100,
'oracle_sr': np.mean(metrics['oracle_success']) * 100, 'oracle_sr': np.mean(metrics['oracle_success']) * 100,
'spl': np.mean(metrics['spl']) * 100, 'spl': np.mean(metrics['spl']) * 100,
'sspl': np.mean(metrics['sspl']) * 100,
'rgs': np.mean(metrics['rgs']) * 100, 'rgs': np.mean(metrics['rgs']) * 100,
'rgspl': np.mean(metrics['rgspl']) * 100, 'rgspl': np.mean(metrics['rgspl']) * 100,
'found_sr': np.mean(metrics['found_success']) * 100,
'room_sr': np.mean(metrics['room_success']) * 100,
'room_spl': np.mean(metrics['room_spl']) * 100,
'coverage_rate': np.mean(metrics['coverage_rate']) * 100,
'explore_spl': np.mean(metrics['explore_spl']) * 100,
} }
return avg_metrics, metrics return avg_metrics, metrics

View File

@ -65,13 +65,11 @@ def build_dataset(args, rank=0):
multi_endpoints=args.multi_endpoints, multi_startpoints=args.multi_startpoints, multi_endpoints=args.multi_endpoints, multi_startpoints=args.multi_startpoints,
) )
# val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] # val_env_names = ['val_train_seen']
val_env_names = [ 'val_seen', 'val_unseen'] val_env_names = ['val_seen', 'val_unseen']
if args.submit: # if args.submit:
include_test = input('Include test dataset? (y/n)') # val_env_names.append('test')
if include_test == 'y' or include_test == 'Y':
val_env_names.append('test')
val_envs = {} val_envs = {}
for split in val_env_names: for split in val_env_names:
@ -138,7 +136,7 @@ def train(args, train_env, val_envs, aug_env=None, rank=-1):
'\nListener training starts, start iteration: %s' % str(start_iter), record_file '\nListener training starts, start iteration: %s' % str(start_iter), record_file
) )
best_val = {'val_unseen': {"spl": 0., "sr": 0., "room_sr": 0., "state":"", "sspl": 0., 'found_sr': 0., 'explore_spl': 0.}} best_val = {'val_unseen': {"spl": 0., "sr": 0., "state":""}}
for idx in range(start_iter, start_iter+args.iters, args.log_every): for idx in range(start_iter, start_iter+args.iters, args.log_every):
listner.logs = defaultdict(list) listner.logs = defaultdict(list)
@ -203,15 +201,9 @@ def train(args, train_env, val_envs, aug_env=None, rank=-1):
# select model by spl # select model by spl
if env_name in best_val: if env_name in best_val:
if score_summary['explore_spl'] >= best_val[env_name]['explore_spl']: if score_summary['spl'] >= best_val[env_name]['spl']:
best_val[env_name]['spl'] = score_summary['spl'] best_val[env_name]['spl'] = score_summary['spl']
best_val[env_name]['sspl'] = score_summary['sspl']
best_val[env_name]['explore_spl'] = score_summary['explore_spl']
best_val[env_name]['coverage_rate'] = score_summary['coverage_rate']
best_val[env_name]['room_spl'] = score_summary['room_spl']
best_val[env_name]['sr'] = score_summary['sr'] best_val[env_name]['sr'] = score_summary['sr']
best_val[env_name]['found_sr'] = score_summary['found_sr']
best_val[env_name]['room_sr'] = score_summary['room_sr']
best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str) best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str)
listner.save(idx, os.path.join(args.ckpt_dir, "best_%s" % (env_name))) listner.save(idx, os.path.join(args.ckpt_dir, "best_%s" % (env_name)))
@ -245,13 +237,10 @@ def valid(args, train_env, val_envs, rank=-1):
write_to_record_file(str(args) + '\n\n', record_file) write_to_record_file(str(args) + '\n\n', record_file)
for env_name, env in val_envs.items(): for env_name, env in val_envs.items():
print(env_name)
prefix = 'submit' if args.detailed_output is False else 'detail' prefix = 'submit' if args.detailed_output is False else 'detail'
output_file = os.path.join(args.pred_dir, "%s_%s_%s.json" % ( output_file = os.path.join(args.pred_dir, "%s_%s_%s.json" % (
prefix, env_name, args.fusion)) prefix, env_name, args.fusion))
if os.path.exists(output_file): if os.path.exists(output_file):
replace = input(f"{output_file} exists. Replace? (y/n): ")
if replace != 'y' and replace != 'Y':
continue continue
agent.logs = defaultdict(list) agent.logs = defaultdict(list)
agent.env = env agent.env = env

View File

@ -71,7 +71,7 @@ def parse_args():
parser.add_argument('--test', action='store_true', default=False) parser.add_argument('--test', action='store_true', default=False)
parser.add_argument("--submit", action='store_true', default=False) parser.add_argument("--submit", action='store_true', default=False)
parser.add_argument('--no_backtrack', action='store_true', default=False) parser.add_argument('--no_backtrack', action='store_true', default=False)
parser.add_argument('--detailed_output', action='store_true', default=True) parser.add_argument('--detailed_output', action='store_true', default=False)
# Training Configurations # Training Configurations
parser.add_argument( parser.add_argument(

View File

@ -10,7 +10,7 @@ obj_ft_dim=768
ngpus=1 ngpus=1
seed=0 seed=0
name=${train_alg}-${features} name=${train_alg}-${features}-reverie-glip-adversarial
name=${name}-seed.${seed} name=${name}-seed.${seed}
name=${name}-init.aug.45k name=${name}-init.aug.45k
@ -57,11 +57,11 @@ flag="--root_dir ${DATA_ROOT}
# train # train
CUDA_VISIBLE_DEVICES='0' python r2r/main_nav.py $flag \ CUDA_VISIBLE_DEVICES='0' python r2r/main_nav.py $flag \
--tokenizer bert \ --tokenizer bert \
--bert_ckpt_file 'put the pretrained model (see pretrain_src) here' \ --bert_ckpt_file '../datasets/REVERIE/exprs_map/pretrain/cmt-vitbase-mlm.mrc.sap.og-init.lxmert-aug.speaker/ckpts/model_step_100000.pt' \
--eval_first --eval_first
# test # test
CUDA_VISIBLE_DEVICES='0' python r2r/main_nav.py $flag \ # CUDA_VISIBLE_DEVICES='0' python r2r/main_nav.py $flag \
--tokenizer bert \ # --tokenizer bert \
--resume_file ../datasets/R2R/trained_models/best_val_unseen \ # --resume_file ../datasets/R2R/trained_models/best_val_unseen \
--test --submit # --test --submit

View File

@ -10,7 +10,7 @@ obj_ft_dim=768
ngpus=1 ngpus=1
seed=0 seed=0
name=${train_alg}-${features}-new-reverie-all name=${train_alg}-${features}-adversarial-but-original-model-with-glip-filter
name=${name}-seed.${seed} #-${ngpus}gpus name=${name}-seed.${seed} #-${ngpus}gpus
outdir=${DATA_ROOT}/REVERIE/exprs_map/finetune/${name} outdir=${DATA_ROOT}/REVERIE/exprs_map/finetune/${name}
@ -59,14 +59,14 @@ flag="--root_dir ${DATA_ROOT}
--gamma 0." --gamma 0."
# train # train
CUDA_VISIBLE_DEVICES='0' python3 reverie/main_nav_obj.py $flag \ # CUDA_VISIBLE_DEVICES='0' python reverie/main_nav_obj.py $flag \
--tokenizer bert \ # --tokenizer bert \
--bert_ckpt_file '../datasets/REVERIE/exprs_map/pretrain/cmt-vitbase-mlm.mrc.sap.og-init.lxmert-aug.speaker/ckpts/model_step_100000.pt' \ # --resume_file ../datasets/REVERIE/exprs_map/finetune/dagger-vitbase-adversarial-but-original-model-with-glip-filter-seed.0/ckpts/best_val_unseen \
--eval_first # --bert_ckpt_file '../datasets/REVERIE/exprs_map/pretrain/cmt-vitbase-mlm.mrc.sap.og-init.lxmert-aug.speaker/ckpts/model_step_100000.pt' \
# --eval_first
# test # test
echo /root/mount/Matterport3DSimulator/VLN-DUET/datasets/REVERIE/exprs_map/finetune/${name}/ckpts/best_val_unseen CUDA_VISIBLE_DEVICES='0' python reverie/main_nav_obj.py $flag \
CUDA_VISIBLE_DEVICES='0' python3 reverie/main_nav_obj.py $flag \
--tokenizer bert \ --tokenizer bert \
--resume_file /root/mount/Matterport3DSimulator/VLN-DUET/datasets/REVERIE/exprs_map/finetune/${name}/ckpts/best_val_unseen \ --resume_file ../datasets/REVERIE/exprs_map/finetune/dagger-vitbase-adversarial-but-original-model-with-glip-filter-seed.0/ckpts/best_val_unseen \
--test --submit --test --submit

File diff suppressed because one or more lines are too long