Compare commits
17 Commits
origin-ver
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 641b7169b3 | |||
| 36dbfed7e1 | |||
| bfcb1f49ea | |||
| 8b5a7438a0 | |||
| 59dfbf9c30 | |||
| 287a35965e | |||
| 2a561bcf01 | |||
| 0135ab3ac8 | |||
| 924cfe9b43 | |||
| de3326ae85 | |||
| b2dce6111e | |||
| fb82daf16a | |||
| b96106fa69 | |||
| f6c4a4f87e | |||
| 5e424ede40 | |||
| 1b731a14a3 | |||
| bc8bc1b9d4 |
1
map_nav_src/node_region.json
Normal file
1
map_nav_src/node_region.json
Normal file
File diff suppressed because one or more lines are too long
@ -37,10 +37,10 @@ def construct_instrs(anno_dir, dataset, splits, tokenizer, max_instr_len=512, is
|
|||||||
# Split multiple instructions into separate entries
|
# Split multiple instructions into separate entries
|
||||||
for j, instr in enumerate(item['instructions']):
|
for j, instr in enumerate(item['instructions']):
|
||||||
new_item = dict(item)
|
new_item = dict(item)
|
||||||
new_item['instr_id'] = '%s_%d' % (item['id'], j)
|
new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
|
||||||
new_item['instruction'] = instr
|
new_item['instruction'] = instr
|
||||||
new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len]
|
new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len]
|
||||||
del new_item['instructions']
|
del new_item['instructions']
|
||||||
del new_item['instr_encodings']
|
del new_item['instr_encodings']
|
||||||
data.append(new_item)
|
data.append(new_item)
|
||||||
return data
|
return data
|
||||||
@ -58,12 +58,12 @@ def build_dataset(args, rank=0, is_test=False):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# val_env_names = ['val_train_seen']
|
# val_env_names = ['val_train_seen']
|
||||||
val_env_names = ['val_seen', 'val_unseen']
|
val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']
|
||||||
if args.dataset == 'r4r' and (not args.test):
|
if args.dataset == 'r4r' and (not args.test):
|
||||||
val_env_names[-1] == 'val_unseen_sampled'
|
val_env_names[-1] == 'val_unseen_sampled'
|
||||||
|
|
||||||
# if args.submit and args.dataset != 'r4r':
|
if args.submit and args.dataset != 'r4r':
|
||||||
# val_env_names.append('test')
|
val_env_names.append('test')
|
||||||
|
|
||||||
val_envs = {}
|
val_envs = {}
|
||||||
for split in val_env_names:
|
for split in val_env_names:
|
||||||
|
|||||||
1
map_nav_src/region2objs.json
Normal file
1
map_nav_src/region2objs.json
Normal file
File diff suppressed because one or more lines are too long
@ -27,7 +27,13 @@ class BaseAgent(object):
|
|||||||
def get_results(self, detailed_output=False):
|
def get_results(self, detailed_output=False):
|
||||||
output = []
|
output = []
|
||||||
for k, v in self.results.items():
|
for k, v in self.results.items():
|
||||||
output.append({'instr_id': k, 'trajectory': v['path'], 'pred_objid': v['pred_objid']})
|
output.append({
|
||||||
|
'instr_id': k,
|
||||||
|
'trajectory': v['path'],
|
||||||
|
'pred_objid': v['pred_objid'],
|
||||||
|
'found': v['found'],
|
||||||
|
'gt_found': v['gt_found']
|
||||||
|
})
|
||||||
if detailed_output:
|
if detailed_output:
|
||||||
output[-1]['details'] = v['details']
|
output[-1]['details'] = v['details']
|
||||||
return output
|
return output
|
||||||
|
|||||||
@ -174,11 +174,14 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
|
|
||||||
def _nav_vp_variable(self, obs, gmaps, pano_embeds, cand_vpids, view_lens, obj_lens, nav_types):
|
def _nav_vp_variable(self, obs, gmaps, pano_embeds, cand_vpids, view_lens, obj_lens, nav_types):
|
||||||
batch_size = len(obs)
|
batch_size = len(obs)
|
||||||
|
# print("PANO shape", pano_embeds.shape)
|
||||||
|
|
||||||
# add [stop] token
|
# add [stop] token & [NOT FOUND] token
|
||||||
|
# [STOP] 在最前面, [NOT FOUND] 在最後面
|
||||||
vp_img_embeds = torch.cat(
|
vp_img_embeds = torch.cat(
|
||||||
[torch.zeros_like(pano_embeds[:, :1]), pano_embeds], 1
|
[torch.zeros_like(pano_embeds[:, :1]), pano_embeds, torch.ones_like(pano_embeds[:, :1])], 1
|
||||||
)
|
)
|
||||||
|
# print("SHAPE:", vp_img_embeds.shape)
|
||||||
|
|
||||||
batch_vp_pos_fts = []
|
batch_vp_pos_fts = []
|
||||||
for i, gmap in enumerate(gmaps):
|
for i, gmap in enumerate(gmaps):
|
||||||
@ -192,19 +195,33 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
)
|
)
|
||||||
# add [stop] token at beginning
|
# add [stop] token at beginning
|
||||||
vp_pos_fts = np.zeros((vp_img_embeds.size(1), 14), dtype=np.float32)
|
vp_pos_fts = np.zeros((vp_img_embeds.size(1), 14), dtype=np.float32)
|
||||||
|
# print("vp_pos_fts:", vp_pos_fts.shape)
|
||||||
|
|
||||||
vp_pos_fts[:, :7] = cur_start_pos_fts
|
vp_pos_fts[:, :7] = cur_start_pos_fts
|
||||||
|
# print("vp_pos_fts[:, :7]:", vp_pos_fts[:, :7].shape)
|
||||||
|
# print("cur_start_pos_fts:", cur_start_pos_fts.shape)
|
||||||
|
|
||||||
vp_pos_fts[1:len(cur_cand_pos_fts)+1, 7:] = cur_cand_pos_fts
|
vp_pos_fts[1:len(cur_cand_pos_fts)+1, 7:] = cur_cand_pos_fts
|
||||||
|
# print("vp_pos_fts[1:len(), 7:]:", vp_pos_fts[1:len(cur_cand_pos_fts)+1, 7:].shape)
|
||||||
|
# print("cur_cand_pos_fts:", cur_cand_pos_fts.shape)
|
||||||
|
|
||||||
batch_vp_pos_fts.append(torch.from_numpy(vp_pos_fts))
|
batch_vp_pos_fts.append(torch.from_numpy(vp_pos_fts))
|
||||||
|
|
||||||
batch_vp_pos_fts = pad_tensors(batch_vp_pos_fts).cuda()
|
batch_vp_pos_fts = pad_tensors(batch_vp_pos_fts).cuda()
|
||||||
|
|
||||||
vp_nav_masks = torch.cat([torch.ones(batch_size, 1).bool().cuda(), nav_types == 1], 1)
|
# 要把 stop 和 not found 的 mask 補上去
|
||||||
vp_obj_masks = torch.cat([torch.zeros(batch_size, 1).bool().cuda(), nav_types == 2], 1)
|
# 這邊把 stop 跟 candidate 放一起、把 not found 跟 object 放一起
|
||||||
|
vp_nav_masks = torch.cat([torch.ones(batch_size, 1).bool().cuda(), nav_types == 1, torch.zeros(batch_size, 1).bool().cuda()], 1)
|
||||||
|
vp_obj_masks = torch.cat([torch.zeros(batch_size, 1).bool().cuda(), nav_types == 2, torch.ones(batch_size, 1).bool().cuda()], 1)
|
||||||
|
# print('vp_nav_masks:', vp_nav_masks.shape)
|
||||||
|
# print('vp_obj_masks:', vp_obj_masks.shape)
|
||||||
|
vp_masks = gen_seq_masks(view_lens+obj_lens+2)
|
||||||
|
# print()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'vp_img_embeds': vp_img_embeds,
|
'vp_img_embeds': vp_img_embeds,
|
||||||
'vp_pos_fts': batch_vp_pos_fts,
|
'vp_pos_fts': batch_vp_pos_fts,
|
||||||
'vp_masks': gen_seq_masks(view_lens+obj_lens+1),
|
'vp_masks': vp_masks,
|
||||||
'vp_nav_masks': vp_nav_masks,
|
'vp_nav_masks': vp_nav_masks,
|
||||||
'vp_obj_masks': vp_obj_masks,
|
'vp_obj_masks': vp_obj_masks,
|
||||||
'vp_cand_vpids': [[None]+x for x in cand_vpids],
|
'vp_cand_vpids': [[None]+x for x in cand_vpids],
|
||||||
@ -242,7 +259,7 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
|
|
||||||
return torch.from_numpy(a).cuda()
|
return torch.from_numpy(a).cuda()
|
||||||
|
|
||||||
def _teacher_object(self, obs, ended, view_lens):
|
def _teacher_object(self, obs, ended, view_lens, obj_logits):
|
||||||
targets = np.zeros(len(obs), dtype=np.int64)
|
targets = np.zeros(len(obs), dtype=np.int64)
|
||||||
for i, ob in enumerate(obs):
|
for i, ob in enumerate(obs):
|
||||||
if ended[i]:
|
if ended[i]:
|
||||||
@ -252,12 +269,18 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
if i_vp not in ob['gt_end_vps']:
|
if i_vp not in ob['gt_end_vps']:
|
||||||
targets[i] = self.args.ignoreid
|
targets[i] = self.args.ignoreid
|
||||||
else:
|
else:
|
||||||
|
|
||||||
i_objids = ob['obj_ids']
|
i_objids = ob['obj_ids']
|
||||||
targets[i] = self.args.ignoreid
|
targets[i] = self.args.ignoreid
|
||||||
for j, obj_id in enumerate(i_objids):
|
for j, obj_id in enumerate(i_objids):
|
||||||
if str(obj_id) == str(ob['gt_obj_id']):
|
if str(obj_id) == str(ob['gt_obj_id']):
|
||||||
targets[i] = j + view_lens[i] + 1
|
|
||||||
|
if ob['gt_found'] == True: # 可以找得到
|
||||||
|
targets[i] = j + view_lens[i] + 1
|
||||||
|
else:
|
||||||
|
targets[i] = len(obj_logits[i])-1 # 不能找到,
|
||||||
break
|
break
|
||||||
|
|
||||||
return torch.from_numpy(targets).cuda()
|
return torch.from_numpy(targets).cuda()
|
||||||
|
|
||||||
def make_equiv_action(self, a_t, gmaps, obs, traj=None):
|
def make_equiv_action(self, a_t, gmaps, obs, traj=None):
|
||||||
@ -298,7 +321,8 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
|
|
||||||
batch_size = len(obs)
|
batch_size = len(obs)
|
||||||
# build graph: keep the start viewpoint
|
# build graph: keep the start viewpoint
|
||||||
gmaps = [GraphMap(ob['viewpoint']) for ob in obs]
|
|
||||||
|
gmaps = [GraphMap(ob['viewpoint']) for ob in obs] # input the start point
|
||||||
for i, ob in enumerate(obs):
|
for i, ob in enumerate(obs):
|
||||||
gmaps[i].update_graph(ob)
|
gmaps[i].update_graph(ob)
|
||||||
|
|
||||||
@ -307,6 +331,9 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
'instr_id': ob['instr_id'],
|
'instr_id': ob['instr_id'],
|
||||||
'path': [[ob['viewpoint']]],
|
'path': [[ob['viewpoint']]],
|
||||||
'pred_objid': None,
|
'pred_objid': None,
|
||||||
|
'gt_objid': None,
|
||||||
|
'found': None,
|
||||||
|
'gt_found': None,
|
||||||
'details': {},
|
'details': {},
|
||||||
} for ob in obs]
|
} for ob in obs]
|
||||||
|
|
||||||
@ -324,8 +351,7 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
ml_loss = 0.
|
ml_loss = 0.
|
||||||
og_loss = 0.
|
og_loss = 0.
|
||||||
|
|
||||||
# for t in range(self.args.max_action_len):
|
for t in range(self.args.max_action_len):
|
||||||
for t in range(1):
|
|
||||||
for i, gmap in enumerate(gmaps):
|
for i, gmap in enumerate(gmaps):
|
||||||
if not ended[i]:
|
if not ended[i]:
|
||||||
gmap.node_step_ids[obs[i]['viewpoint']] = t + 1
|
gmap.node_step_ids[obs[i]['viewpoint']] = t + 1
|
||||||
@ -380,11 +406,24 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
i_vp = obs[i]['viewpoint']
|
i_vp = obs[i]['viewpoint']
|
||||||
# update i_vp: stop and object grounding scores
|
# update i_vp: stop and object grounding scores
|
||||||
i_objids = obs[i]['obj_ids']
|
i_objids = obs[i]['obj_ids']
|
||||||
i_obj_logits = obj_logits[i, pano_inputs['view_lens'][i]+1:]
|
i_obj_logits = obj_logits[i, pano_inputs['view_lens'][i]+1:] # 最後一個是 not found
|
||||||
|
|
||||||
|
if len(i_objids) > 0:
|
||||||
|
if torch.argmax(i_obj_logits) >= len(i_objids): # not found 那格 logit 最大(會在最後一格)
|
||||||
|
og = -1
|
||||||
|
else:
|
||||||
|
og = i_objids[torch.argmax(i_obj_logits)]
|
||||||
|
else:
|
||||||
|
og = None
|
||||||
|
|
||||||
|
|
||||||
|
# 如果有找到,og 會是 object id
|
||||||
|
# 如果是 not found,og 會是 -1
|
||||||
|
# 如果這個 viewpoint 看不到物件,og 會是 None
|
||||||
gmap.node_stop_scores[i_vp] = {
|
gmap.node_stop_scores[i_vp] = {
|
||||||
'stop': nav_probs[i, 0].data.item(),
|
'stop': nav_probs[i, 0].data.item(),
|
||||||
'og': i_objids[torch.argmax(i_obj_logits)] if len(i_objids) > 0 else None,
|
'og': og,
|
||||||
'og_details': {'objids': i_objids, 'logits': i_obj_logits[:len(i_objids)]},
|
'og_details': {'objids': i_objids, 'logits': torch.cat([i_obj_logits[:len(i_objids)], i_obj_logits[[-1]] ], dim=0)},
|
||||||
}
|
}
|
||||||
|
|
||||||
if train_ml is not None:
|
if train_ml is not None:
|
||||||
@ -404,9 +443,9 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
)
|
)
|
||||||
ml_loss += self.criterion(nav_outs['local_logits'], local_nav_targets) # local
|
ml_loss += self.criterion(nav_outs['local_logits'], local_nav_targets) # local
|
||||||
# objec grounding
|
# objec grounding
|
||||||
obj_targets = self._teacher_object(obs, ended, pano_inputs['view_lens'])
|
# obj_targets = self._teacher_object(obs, ended, pano_inputs['view_lens'], obj_logits)
|
||||||
# print(t, obj_targets[6], obj_logits[6], obs[6]['obj_ids'], pano_inputs['view_lens'][i], obs[6]['gt_obj_id'])
|
# print(t, obj_targets[6], obj_logits[6], obs[6]['obj_ids'], pano_inputs['view_lens'][i], obs[6]['gt_obj_id'])
|
||||||
og_loss += self.criterion(obj_logits, obj_targets)
|
# og_loss += self.criterion(obj_logits, obj_targets)
|
||||||
# print(F.cross_entropy(obj_logits, obj_targets, reduction='none'))
|
# print(F.cross_entropy(obj_logits, obj_targets, reduction='none'))
|
||||||
# print(t, 'og_loss', og_loss.item(), self.criterion(obj_logits, obj_targets).item())
|
# print(t, 'og_loss', og_loss.item(), self.criterion(obj_logits, obj_targets).item())
|
||||||
|
|
||||||
@ -452,9 +491,11 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
else:
|
else:
|
||||||
cpu_a_t.append(nav_vpids[i][a_t[i]])
|
cpu_a_t.append(nav_vpids[i][a_t[i]])
|
||||||
|
|
||||||
|
original_gt_founds = [ ob['gt_found'] for ob in obs ]
|
||||||
# Make action and get the new state
|
# Make action and get the new state
|
||||||
self.make_equiv_action(cpu_a_t, gmaps, obs, traj)
|
self.make_equiv_action(cpu_a_t, gmaps, obs, traj)
|
||||||
for i in range(batch_size):
|
for i in range(batch_size):
|
||||||
|
traj[i]['gt_found'] = original_gt_founds[i]
|
||||||
if (not ended[i]) and just_ended[i]:
|
if (not ended[i]) and just_ended[i]:
|
||||||
stop_node, stop_score = None, {'stop': -float('inf'), 'og': None}
|
stop_node, stop_score = None, {'stop': -float('inf'), 'og': None}
|
||||||
for k, v in gmaps[i].node_stop_scores.items():
|
for k, v in gmaps[i].node_stop_scores.items():
|
||||||
@ -464,6 +505,10 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
if stop_node is not None and obs[i]['viewpoint'] != stop_node:
|
if stop_node is not None and obs[i]['viewpoint'] != stop_node:
|
||||||
traj[i]['path'].append(gmaps[i].graph.path(obs[i]['viewpoint'], stop_node))
|
traj[i]['path'].append(gmaps[i].graph.path(obs[i]['viewpoint'], stop_node))
|
||||||
traj[i]['pred_objid'] = stop_score['og']
|
traj[i]['pred_objid'] = stop_score['og']
|
||||||
|
if stop_score['og'] == -1 or stop_score['og'] == None:
|
||||||
|
traj[i]['found'] = False
|
||||||
|
else:
|
||||||
|
traj[i]['found'] = True
|
||||||
if self.args.detailed_output:
|
if self.args.detailed_output:
|
||||||
for k, v in gmaps[i].node_stop_scores.items():
|
for k, v in gmaps[i].node_stop_scores.items():
|
||||||
traj[i]['details'][k] = {
|
traj[i]['details'][k] = {
|
||||||
@ -487,10 +532,16 @@ class GMapObjectNavAgent(Seq2SeqAgent):
|
|||||||
|
|
||||||
if train_ml is not None:
|
if train_ml is not None:
|
||||||
ml_loss = ml_loss * train_ml / batch_size
|
ml_loss = ml_loss * train_ml / batch_size
|
||||||
og_loss = og_loss * train_ml / batch_size
|
# og_loss = og_loss * train_ml / batch_size
|
||||||
self.loss += ml_loss
|
self.loss += ml_loss
|
||||||
self.loss += og_loss
|
# self.loss += og_loss
|
||||||
self.logs['IL_loss'].append(ml_loss.item())
|
self.logs['IL_loss'].append(ml_loss.item())
|
||||||
self.logs['OG_loss'].append(og_loss.item())
|
# self.logs['OG_loss'].append(og_loss.item())
|
||||||
|
|
||||||
|
'''
|
||||||
|
print("TRAJ:")
|
||||||
|
for i in traj:
|
||||||
|
print(" GT: {}, PREDICT: {}, SCORE: {}".format(i['gt_found'], i['found'], 1 if i['gt_found']==i['found'] else 0))
|
||||||
|
|
||||||
|
'''
|
||||||
return traj
|
return traj
|
||||||
|
|||||||
@ -87,6 +87,8 @@ def construct_instrs(anno_dir, dataset, splits, tokenizer, max_instr_len=512):
|
|||||||
new_item['objId'] = None
|
new_item['objId'] = None
|
||||||
new_item['instruction'] = instr
|
new_item['instruction'] = instr
|
||||||
new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len]
|
new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len]
|
||||||
|
new_item['path'] = item['path'][j]
|
||||||
|
new_item['found'] = item['found'][j]
|
||||||
del new_item['instructions']
|
del new_item['instructions']
|
||||||
del new_item['instr_encodings']
|
del new_item['instr_encodings']
|
||||||
data.append(new_item)
|
data.append(new_item)
|
||||||
|
|||||||
@ -8,12 +8,22 @@ import random
|
|||||||
import networkx as nx
|
import networkx as nx
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import copy
|
import copy
|
||||||
|
from glob import glob
|
||||||
|
|
||||||
|
|
||||||
import MatterSim
|
import MatterSim
|
||||||
|
|
||||||
from utils.data import load_nav_graphs, new_simulator
|
from utils.data import load_nav_graphs, new_simulator
|
||||||
from utils.data import angle_feature, get_all_point_angle_feature
|
from utils.data import angle_feature, get_all_point_angle_feature
|
||||||
|
|
||||||
|
with open('./node_region.json') as fp:
|
||||||
|
node_region = json.load(fp)
|
||||||
|
|
||||||
|
with open('region2objs.json') as fp:
|
||||||
|
region2objs = json.load(fp)
|
||||||
|
|
||||||
|
with open('vp2objs.json') as fp:
|
||||||
|
vp2objs = json.load(fp)
|
||||||
|
|
||||||
class EnvBatch(object):
|
class EnvBatch(object):
|
||||||
''' A simple wrapper for a batch of MatterSim environments,
|
''' A simple wrapper for a batch of MatterSim environments,
|
||||||
@ -311,6 +321,7 @@ class ReverieObjectNavBatch(object):
|
|||||||
'navigableLocations' : state.navigableLocations,
|
'navigableLocations' : state.navigableLocations,
|
||||||
'instruction' : item['instruction'],
|
'instruction' : item['instruction'],
|
||||||
'instr_encoding': item['instr_encoding'],
|
'instr_encoding': item['instr_encoding'],
|
||||||
|
'gt_found' : item['found'],
|
||||||
'gt_path' : item['path'],
|
'gt_path' : item['path'],
|
||||||
'gt_end_vps': item.get('end_vps', []),
|
'gt_end_vps': item.get('end_vps', []),
|
||||||
'gt_obj_id': item['objId'],
|
'gt_obj_id': item['objId'],
|
||||||
@ -339,7 +350,7 @@ class ReverieObjectNavBatch(object):
|
|||||||
self._next_minibatch(**kwargs)
|
self._next_minibatch(**kwargs)
|
||||||
|
|
||||||
scanIds = [item['scan'] for item in self.batch]
|
scanIds = [item['scan'] for item in self.batch]
|
||||||
viewpointIds = [item['path'][-1] for item in self.batch]
|
viewpointIds = [item['path'][0] for item in self.batch]
|
||||||
headings = [item['heading'] for item in self.batch]
|
headings = [item['heading'] for item in self.batch]
|
||||||
self.env.newEpisodes(scanIds, viewpointIds, headings)
|
self.env.newEpisodes(scanIds, viewpointIds, headings)
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
@ -351,13 +362,17 @@ class ReverieObjectNavBatch(object):
|
|||||||
|
|
||||||
|
|
||||||
############### Nav Evaluation ###############
|
############### Nav Evaluation ###############
|
||||||
def _eval_item(self, scan, pred_path, pred_objid, gt_path, gt_objid):
|
def _eval_item(self, scan, pred_path, pred_objid, gt_path, gt_objid, pred_found, gt_found):
|
||||||
scores = {}
|
scores = {}
|
||||||
|
|
||||||
shortest_distances = self.shortest_distances[scan]
|
shortest_distances = self.shortest_distances[scan]
|
||||||
|
|
||||||
path = sum(pred_path, [])
|
path = sum(pred_path, [])
|
||||||
# assert gt_path[0] == path[0], 'Result trajectories should include the start position'
|
assert gt_path[0] == path[0], 'Result trajectories should include the start position'
|
||||||
|
|
||||||
|
pred_stop_region = node_region[scan][path[-1]]
|
||||||
|
gt_stop_region = node_region[scan][gt_path[-1]]
|
||||||
|
|
||||||
|
|
||||||
scores['action_steps'] = len(pred_path) - 1
|
scores['action_steps'] = len(pred_path) - 1
|
||||||
scores['trajectory_steps'] = len(path) - 1
|
scores['trajectory_steps'] = len(path) - 1
|
||||||
@ -368,9 +383,99 @@ class ReverieObjectNavBatch(object):
|
|||||||
goal_viewpoints = set(self.obj2vps['%s_%s'%(scan, str(gt_objid))])
|
goal_viewpoints = set(self.obj2vps['%s_%s'%(scan, str(gt_objid))])
|
||||||
assert len(goal_viewpoints) > 0, '%s_%s'%(scan, str(gt_objid))
|
assert len(goal_viewpoints) > 0, '%s_%s'%(scan, str(gt_objid))
|
||||||
|
|
||||||
|
scores['found_success'] = float(pred_found == gt_found)
|
||||||
|
|
||||||
scores['success'] = float(path[-1] in goal_viewpoints)
|
scores['success'] = float(path[-1] in goal_viewpoints)
|
||||||
|
scores['room_success'] = float(pred_stop_region == gt_stop_region)
|
||||||
scores['oracle_success'] = float(any(x in goal_viewpoints for x in path))
|
scores['oracle_success'] = float(any(x in goal_viewpoints for x in path))
|
||||||
|
|
||||||
|
gt_room_start_vp = None
|
||||||
|
gt_back_path = []
|
||||||
|
gt_front_path = []
|
||||||
|
for vp in gt_path[::-1]:
|
||||||
|
if node_region[scan][vp] == gt_stop_region and gt_front_path == []:
|
||||||
|
gt_back_path.append(vp)
|
||||||
|
gt_room_start_vp = vp
|
||||||
|
else:
|
||||||
|
gt_front_path.append(vp)
|
||||||
|
|
||||||
|
gt_front_path = gt_front_path[::-1]
|
||||||
|
gt_back_path = gt_back_path[::-1]
|
||||||
|
|
||||||
|
assert (gt_front_path + gt_back_path) == gt_path, "Front path & Back path error"
|
||||||
|
|
||||||
|
gt_front_path += [gt_room_start_vp]
|
||||||
|
|
||||||
|
'''
|
||||||
|
if scores['success'] == 1.0:
|
||||||
|
scores['found_success'] = float(pred_found == gt_found)
|
||||||
|
else:
|
||||||
|
scores['found_success'] = 0.0
|
||||||
|
'''
|
||||||
|
gt_reach_length = np.sum([shortest_distances[a][b] for a, b in zip(gt_front_path[:-1], gt_front_path[1:])])
|
||||||
|
gt_explore_length = np.sum([shortest_distances[a][b] for a, b in zip(gt_back_path[:-1], gt_back_path[1:])])
|
||||||
|
|
||||||
|
if scores['room_success'] != 0.0:
|
||||||
|
# corse-grained
|
||||||
|
|
||||||
|
# get the reach_path & explore_path
|
||||||
|
room_start_vp = None
|
||||||
|
back_path = []
|
||||||
|
front_path = []
|
||||||
|
for vp in path[::-1]:
|
||||||
|
if node_region[scan][vp] == gt_stop_region and front_path == []:
|
||||||
|
back_path.append(vp)
|
||||||
|
room_start_vp = vp
|
||||||
|
else:
|
||||||
|
front_path.append(vp)
|
||||||
|
|
||||||
|
front_path = front_path[::-1]
|
||||||
|
back_path = back_path[::-1]
|
||||||
|
assert (front_path + back_path) == path, "Front path & Back path error"
|
||||||
|
|
||||||
|
# front_path = ... room_start_vp
|
||||||
|
# back_path = room_start_vp ...
|
||||||
|
front_path += [room_start_vp]
|
||||||
|
|
||||||
|
reach_length = np.sum([shortest_distances[a][b] for a, b in zip(front_path[:-1], front_path[1:])]) if len(front_path) != 1 else 0.01
|
||||||
|
explore_length = np.sum([shortest_distances[a][b] for a, b in zip(back_path[:-1], back_path[1:])]) if len(back_path) != 1 else 0.01
|
||||||
|
|
||||||
|
|
||||||
|
scores['room_spl'] = scores['room_success'] * gt_reach_length / max(reach_length, gt_reach_length, 0.01)
|
||||||
|
if scores['found_success'] != 0.0:
|
||||||
|
# fine-grained score
|
||||||
|
# p is converage rate
|
||||||
|
if gt_found:
|
||||||
|
p = 1.0
|
||||||
|
else:
|
||||||
|
explore_objs = set()
|
||||||
|
for vp in back_path:
|
||||||
|
explore_objs.update(vp2objs[vp])
|
||||||
|
p = len(explore_objs) / len(region2objs[scan][gt_stop_region])
|
||||||
|
scores['coverage_rate'] = p
|
||||||
|
scores['explore_spl'] = scores['room_success'] * scores['found_success'] * gt_explore_length / max(gt_explore_length, explore_length, 0.01) * p
|
||||||
|
else:
|
||||||
|
scores['coverage_rate'] = 0
|
||||||
|
scores['explore_spl'] = 0
|
||||||
|
else:
|
||||||
|
scores['room_spl'] = 0.0
|
||||||
|
scores['coverage_rate'] = 0
|
||||||
|
scores['explore_spl'] = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
|
scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
|
||||||
|
'''
|
||||||
|
scores['sspl_1'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
|
||||||
|
scores['sspl_2'] = scores['room_success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
|
||||||
|
scores['sspl_3'] = scores['oracle_success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01) * scores['found_success']
|
||||||
|
|
||||||
|
scores['ss_1'] = scores['success'] * scores['found_success']
|
||||||
|
scores['ss_2'] = scores['room_success'] * scores['found_success']
|
||||||
|
scores['ss_3'] = scores['oracle_success'] * scores['found_success']
|
||||||
|
'''
|
||||||
|
scores['sspl'] = scores['spl'] * scores['found_success']
|
||||||
|
|
||||||
scores['rgs'] = str(pred_objid) == str(gt_objid)
|
scores['rgs'] = str(pred_objid) == str(gt_objid)
|
||||||
scores['rgspl'] = scores['rgs'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
|
scores['rgspl'] = scores['rgs'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
|
||||||
@ -380,6 +485,8 @@ class ReverieObjectNavBatch(object):
|
|||||||
''' Evaluate each agent trajectory based on how close it got to the goal location
|
''' Evaluate each agent trajectory based on how close it got to the goal location
|
||||||
the path contains [view_id, angle, vofv]'''
|
the path contains [view_id, angle, vofv]'''
|
||||||
print('eval %d predictions' % (len(preds)))
|
print('eval %d predictions' % (len(preds)))
|
||||||
|
print(preds[0])
|
||||||
|
|
||||||
|
|
||||||
metrics = defaultdict(list)
|
metrics = defaultdict(list)
|
||||||
for item in preds:
|
for item in preds:
|
||||||
@ -387,7 +494,16 @@ class ReverieObjectNavBatch(object):
|
|||||||
traj = item['trajectory']
|
traj = item['trajectory']
|
||||||
pred_objid = item.get('pred_objid', None)
|
pred_objid = item.get('pred_objid', None)
|
||||||
scan, gt_traj, gt_objid = self.gt_trajs[instr_id]
|
scan, gt_traj, gt_objid = self.gt_trajs[instr_id]
|
||||||
traj_scores = self._eval_item(scan, traj, pred_objid, gt_traj, gt_objid)
|
pred_found = item['found']
|
||||||
|
gt_found = item['gt_found']
|
||||||
|
|
||||||
|
|
||||||
|
traj_scores = self._eval_item(scan, traj, pred_objid, gt_traj, gt_objid, pred_found, gt_found)
|
||||||
|
|
||||||
|
# record "success" in the result file
|
||||||
|
# let the visualization tool can get the success status
|
||||||
|
item['success'] = traj_scores['success']
|
||||||
|
|
||||||
for k, v in traj_scores.items():
|
for k, v in traj_scores.items():
|
||||||
metrics[k].append(v)
|
metrics[k].append(v)
|
||||||
metrics['instr_id'].append(instr_id)
|
metrics['instr_id'].append(instr_id)
|
||||||
@ -399,8 +515,15 @@ class ReverieObjectNavBatch(object):
|
|||||||
'sr': np.mean(metrics['success']) * 100,
|
'sr': np.mean(metrics['success']) * 100,
|
||||||
'oracle_sr': np.mean(metrics['oracle_success']) * 100,
|
'oracle_sr': np.mean(metrics['oracle_success']) * 100,
|
||||||
'spl': np.mean(metrics['spl']) * 100,
|
'spl': np.mean(metrics['spl']) * 100,
|
||||||
|
'sspl': np.mean(metrics['sspl']) * 100,
|
||||||
'rgs': np.mean(metrics['rgs']) * 100,
|
'rgs': np.mean(metrics['rgs']) * 100,
|
||||||
'rgspl': np.mean(metrics['rgspl']) * 100,
|
'rgspl': np.mean(metrics['rgspl']) * 100,
|
||||||
|
'found_sr': np.mean(metrics['found_success']) * 100,
|
||||||
|
'room_sr': np.mean(metrics['room_success']) * 100,
|
||||||
|
'room_spl': np.mean(metrics['room_spl']) * 100,
|
||||||
|
'coverage_rate': np.mean(metrics['coverage_rate']) * 100,
|
||||||
|
'explore_spl': np.mean(metrics['explore_spl']) * 100,
|
||||||
}
|
}
|
||||||
return avg_metrics, metrics
|
return avg_metrics, metrics
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -65,11 +65,13 @@ def build_dataset(args, rank=0):
|
|||||||
multi_endpoints=args.multi_endpoints, multi_startpoints=args.multi_startpoints,
|
multi_endpoints=args.multi_endpoints, multi_startpoints=args.multi_startpoints,
|
||||||
)
|
)
|
||||||
|
|
||||||
# val_env_names = ['val_train_seen']
|
# val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']
|
||||||
val_env_names = ['val_seen', 'val_unseen']
|
val_env_names = [ 'val_seen', 'val_unseen']
|
||||||
|
|
||||||
# if args.submit:
|
if args.submit:
|
||||||
# val_env_names.append('test')
|
include_test = input('Include test dataset? (y/n)')
|
||||||
|
if include_test == 'y' or include_test == 'Y':
|
||||||
|
val_env_names.append('test')
|
||||||
|
|
||||||
val_envs = {}
|
val_envs = {}
|
||||||
for split in val_env_names:
|
for split in val_env_names:
|
||||||
@ -136,7 +138,7 @@ def train(args, train_env, val_envs, aug_env=None, rank=-1):
|
|||||||
'\nListener training starts, start iteration: %s' % str(start_iter), record_file
|
'\nListener training starts, start iteration: %s' % str(start_iter), record_file
|
||||||
)
|
)
|
||||||
|
|
||||||
best_val = {'val_unseen': {"spl": 0., "sr": 0., "state":""}}
|
best_val = {'val_unseen': {"spl": 0., "sr": 0., "room_sr": 0., "state":"", "sspl": 0., 'found_sr': 0., 'explore_spl': 0.}}
|
||||||
|
|
||||||
for idx in range(start_iter, start_iter+args.iters, args.log_every):
|
for idx in range(start_iter, start_iter+args.iters, args.log_every):
|
||||||
listner.logs = defaultdict(list)
|
listner.logs = defaultdict(list)
|
||||||
@ -201,9 +203,15 @@ def train(args, train_env, val_envs, aug_env=None, rank=-1):
|
|||||||
|
|
||||||
# select model by spl
|
# select model by spl
|
||||||
if env_name in best_val:
|
if env_name in best_val:
|
||||||
if score_summary['spl'] >= best_val[env_name]['spl']:
|
if score_summary['explore_spl'] >= best_val[env_name]['explore_spl']:
|
||||||
best_val[env_name]['spl'] = score_summary['spl']
|
best_val[env_name]['spl'] = score_summary['spl']
|
||||||
|
best_val[env_name]['sspl'] = score_summary['sspl']
|
||||||
|
best_val[env_name]['explore_spl'] = score_summary['explore_spl']
|
||||||
|
best_val[env_name]['coverage_rate'] = score_summary['coverage_rate']
|
||||||
|
best_val[env_name]['room_spl'] = score_summary['room_spl']
|
||||||
best_val[env_name]['sr'] = score_summary['sr']
|
best_val[env_name]['sr'] = score_summary['sr']
|
||||||
|
best_val[env_name]['found_sr'] = score_summary['found_sr']
|
||||||
|
best_val[env_name]['room_sr'] = score_summary['room_sr']
|
||||||
best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str)
|
best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str)
|
||||||
listner.save(idx, os.path.join(args.ckpt_dir, "best_%s" % (env_name)))
|
listner.save(idx, os.path.join(args.ckpt_dir, "best_%s" % (env_name)))
|
||||||
|
|
||||||
@ -237,11 +245,14 @@ def valid(args, train_env, val_envs, rank=-1):
|
|||||||
write_to_record_file(str(args) + '\n\n', record_file)
|
write_to_record_file(str(args) + '\n\n', record_file)
|
||||||
|
|
||||||
for env_name, env in val_envs.items():
|
for env_name, env in val_envs.items():
|
||||||
|
print(env_name)
|
||||||
prefix = 'submit' if args.detailed_output is False else 'detail'
|
prefix = 'submit' if args.detailed_output is False else 'detail'
|
||||||
output_file = os.path.join(args.pred_dir, "%s_%s_%s.json" % (
|
output_file = os.path.join(args.pred_dir, "%s_%s_%s.json" % (
|
||||||
prefix, env_name, args.fusion))
|
prefix, env_name, args.fusion))
|
||||||
if os.path.exists(output_file):
|
if os.path.exists(output_file):
|
||||||
continue
|
replace = input(f"{output_file} exists. Replace? (y/n): ")
|
||||||
|
if replace != 'y' and replace != 'Y':
|
||||||
|
continue
|
||||||
agent.logs = defaultdict(list)
|
agent.logs = defaultdict(list)
|
||||||
agent.env = env
|
agent.env = env
|
||||||
|
|
||||||
|
|||||||
@ -71,7 +71,7 @@ def parse_args():
|
|||||||
parser.add_argument('--test', action='store_true', default=False)
|
parser.add_argument('--test', action='store_true', default=False)
|
||||||
parser.add_argument("--submit", action='store_true', default=False)
|
parser.add_argument("--submit", action='store_true', default=False)
|
||||||
parser.add_argument('--no_backtrack', action='store_true', default=False)
|
parser.add_argument('--no_backtrack', action='store_true', default=False)
|
||||||
parser.add_argument('--detailed_output', action='store_true', default=False)
|
parser.add_argument('--detailed_output', action='store_true', default=True)
|
||||||
|
|
||||||
# Training Configurations
|
# Training Configurations
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@ -10,7 +10,7 @@ obj_ft_dim=768
|
|||||||
ngpus=1
|
ngpus=1
|
||||||
seed=0
|
seed=0
|
||||||
|
|
||||||
name=${train_alg}-${features}-reverie-glip-adversarial
|
name=${train_alg}-${features}
|
||||||
name=${name}-seed.${seed}
|
name=${name}-seed.${seed}
|
||||||
name=${name}-init.aug.45k
|
name=${name}-init.aug.45k
|
||||||
|
|
||||||
@ -57,11 +57,11 @@ flag="--root_dir ${DATA_ROOT}
|
|||||||
# train
|
# train
|
||||||
CUDA_VISIBLE_DEVICES='0' python r2r/main_nav.py $flag \
|
CUDA_VISIBLE_DEVICES='0' python r2r/main_nav.py $flag \
|
||||||
--tokenizer bert \
|
--tokenizer bert \
|
||||||
--bert_ckpt_file '../datasets/REVERIE/exprs_map/pretrain/cmt-vitbase-mlm.mrc.sap.og-init.lxmert-aug.speaker/ckpts/model_step_100000.pt' \
|
--bert_ckpt_file 'put the pretrained model (see pretrain_src) here' \
|
||||||
--eval_first
|
--eval_first
|
||||||
|
|
||||||
# test
|
# test
|
||||||
# CUDA_VISIBLE_DEVICES='0' python r2r/main_nav.py $flag \
|
CUDA_VISIBLE_DEVICES='0' python r2r/main_nav.py $flag \
|
||||||
# --tokenizer bert \
|
--tokenizer bert \
|
||||||
# --resume_file ../datasets/R2R/trained_models/best_val_unseen \
|
--resume_file ../datasets/R2R/trained_models/best_val_unseen \
|
||||||
# --test --submit
|
--test --submit
|
||||||
@ -10,7 +10,7 @@ obj_ft_dim=768
|
|||||||
ngpus=1
|
ngpus=1
|
||||||
seed=0
|
seed=0
|
||||||
|
|
||||||
name=${train_alg}-${features}-adversarial-but-original-model-with-glip-filter
|
name=${train_alg}-${features}-new-reverie-all
|
||||||
name=${name}-seed.${seed} #-${ngpus}gpus
|
name=${name}-seed.${seed} #-${ngpus}gpus
|
||||||
|
|
||||||
outdir=${DATA_ROOT}/REVERIE/exprs_map/finetune/${name}
|
outdir=${DATA_ROOT}/REVERIE/exprs_map/finetune/${name}
|
||||||
@ -59,14 +59,14 @@ flag="--root_dir ${DATA_ROOT}
|
|||||||
--gamma 0."
|
--gamma 0."
|
||||||
|
|
||||||
# train
|
# train
|
||||||
# CUDA_VISIBLE_DEVICES='0' python reverie/main_nav_obj.py $flag \
|
CUDA_VISIBLE_DEVICES='0' python3 reverie/main_nav_obj.py $flag \
|
||||||
# --tokenizer bert \
|
--tokenizer bert \
|
||||||
# --resume_file ../datasets/REVERIE/exprs_map/finetune/dagger-vitbase-adversarial-but-original-model-with-glip-filter-seed.0/ckpts/best_val_unseen \
|
--bert_ckpt_file '../datasets/REVERIE/exprs_map/pretrain/cmt-vitbase-mlm.mrc.sap.og-init.lxmert-aug.speaker/ckpts/model_step_100000.pt' \
|
||||||
# --bert_ckpt_file '../datasets/REVERIE/exprs_map/pretrain/cmt-vitbase-mlm.mrc.sap.og-init.lxmert-aug.speaker/ckpts/model_step_100000.pt' \
|
--eval_first
|
||||||
# --eval_first
|
|
||||||
|
|
||||||
# test
|
# test
|
||||||
CUDA_VISIBLE_DEVICES='0' python reverie/main_nav_obj.py $flag \
|
echo /root/mount/Matterport3DSimulator/VLN-DUET/datasets/REVERIE/exprs_map/finetune/${name}/ckpts/best_val_unseen
|
||||||
|
CUDA_VISIBLE_DEVICES='0' python3 reverie/main_nav_obj.py $flag \
|
||||||
--tokenizer bert \
|
--tokenizer bert \
|
||||||
--resume_file ../datasets/REVERIE/exprs_map/finetune/dagger-vitbase-adversarial-but-original-model-with-glip-filter-seed.0/ckpts/best_val_unseen \
|
--resume_file /root/mount/Matterport3DSimulator/VLN-DUET/datasets/REVERIE/exprs_map/finetune/${name}/ckpts/best_val_unseen \
|
||||||
--test --submit
|
--test --submit
|
||||||
|
|||||||
1
map_nav_src/vp2objs.json
Normal file
1
map_nav_src/vp2objs.json
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user