diff --git a/reverie_src/agent.py b/reverie_src/agent.py index 63cece0..f7f807d 100644 --- a/reverie_src/agent.py +++ b/reverie_src/agent.py @@ -36,7 +36,7 @@ class BaseAgent(object): json.dump(output, f) def get_results(self): - output = [{'instr_id': k, 'trajectory': v, 'predObjId': r} for k, (v,r) in self.results.items()] + output = [{'instr_id': k, 'trajectory': v, 'predObjId': r, 'found': found} for k, (v,r, found) in self.results.items()] return output def rollout(self, **args): @@ -57,17 +57,19 @@ class BaseAgent(object): if iters is not None: # For each time, it will run the first 'iters' iterations. (It was shuffled before) for i in range(iters): - for traj in self.rollout(**kwargs): + trajs, found = self.rollout(**kwargs) + for index, traj in enumerate(trajs): self.loss = 0 - self.results[traj['instr_id']] = (traj['path'], traj['predObjId']) + self.results[traj['instr_id']] = (traj['path'], traj['predObjId'], found[index]) else: # Do a full round while True: - for traj in self.rollout(**kwargs): + trajs, found = self.rollout(**kwargs) + for index, traj in enumerate(trajs): if traj['instr_id'] in self.results: looped = True else: self.loss = 0 - self.results[traj['instr_id']] = (traj['path'], traj['predObjId']) + self.results[traj['instr_id']] = (traj['path'], traj['predObjId'], found[index]) if looped: break @@ -169,8 +171,14 @@ class Seq2SeqAgent(BaseAgent): for i, ob in enumerate(obs): for j, cc in enumerate(ob['candidate']): candidate_feat[i, j, :] = cc['feature'] + result = torch.from_numpy(candidate_feat) + ''' + for i, ob in enumerate(obs): + result[i, len(ob['candidate']), :] = torch.ones((self.feature_size + args.angle_feat_size), dtype=torch.float32) + ''' + result = result.cuda() - return torch.from_numpy(candidate_feat).cuda(), candidate_leng + return result, candidate_leng def _object_variable(self, obs): cand_obj_leng = [len(ob['candidate_obj'][2]) + 1 for ob in obs] # +1 is for no REF @@ -202,7 +210,7 @@ class Seq2SeqAgent(BaseAgent): return input_a_t, f_t, candidate_feat, candidate_leng, obj_feat, obj_pos, obj_leng - def _teacher_action(self, obs, ended, cand_size): + def _teacher_action(self, obs, ended, cand_size, candidate_leng): """ Extract teacher actions into variable. :param obs: The observation. @@ -221,6 +229,12 @@ class Seq2SeqAgent(BaseAgent): else: # Stop here assert ob['teacher'] == ob['viewpoint'] # The teacher action should be "STAY HERE" a[i] = cand_size - 1 + ''' + if ob['found']: + a[i] = cand_size - 1 + else: + a[i] = candidate_leng[i] - 1 + ''' return torch.from_numpy(a).cuda() def _teacher_REF(self, obs, just_ended): @@ -232,8 +246,12 @@ class Seq2SeqAgent(BaseAgent): candidate_objs = ob['candidate_obj'][2] for k, kid in enumerate(candidate_objs): if kid == ob['objId']: - a[i] = k - break + if ob['found']: + a[i] = k + break + else: + a[i] = len(candidate_objs) + break else: a[i] = args.ignoreid return torch.from_numpy(a).cuda() @@ -256,7 +274,7 @@ class Seq2SeqAgent(BaseAgent): for i, idx in enumerate(perm_idx): action = a_t[i] - if action != -1: # -1 is the action + if action != -1 and action != -2: # -1 is the action select_candidate = perm_obs[i]['candidate'][action] src_point = perm_obs[i]['viewIndex'] trg_point = select_candidate['pointId'] @@ -296,6 +314,7 @@ class Seq2SeqAgent(BaseAgent): else: obs = np.array(self.env._get_obs()) + batch_size = len(obs) # Reorder the language input for the encoder (do not ruin the original code) @@ -334,6 +353,7 @@ class Seq2SeqAgent(BaseAgent): # Initialization the tracking state ended = np.array([False] * batch_size) # Indices match permuation of the model, not env just_ended = np.array([False] * batch_size) + found = np.array([None] * batch_size) # Init the logs rewards = [] @@ -398,7 +418,7 @@ class Seq2SeqAgent(BaseAgent): if train_ml is not None: # Supervised training - target = self._teacher_action(perm_obs, ended, candidate_mask.size(1)) + target = self._teacher_action(perm_obs, ended, candidate_mask.size(1), candidate_leng) ml_loss += self.criterion(logit, target) # Determine next model inputs @@ -424,12 +444,15 @@ class Seq2SeqAgent(BaseAgent): # NOTE: Env action is in the perm_obs space cpu_a_t = a_t.cpu().numpy() for i, next_id in enumerate(cpu_a_t): - if ((next_id == visual_temp_mask.size(1)) or (t == self.episode_len-1)) and (not ended[i]): # just stopped and forced stopped + if ((next_id == visual_temp_mask.size(1)) or (next_id == (candidate_leng[i]-1)) or (t == self.episode_len-1)) \ + and (not ended[i]): # just stoppped and forced stopped just_ended[i] = True if self.feedback == 'argmax': _, ref_t = logit_REF[i].max(0) if ref_t != obj_leng[i]-1: # decide not to do REF traj[i]['predObjId'] = perm_obs[i]['candidate_obj'][2][ref_t] + else: + traj[i]['ref'] = 'NOT_FOUND' if args.submit: if obj_leng[i] == 1: @@ -443,8 +466,18 @@ class Seq2SeqAgent(BaseAgent): else: just_ended[i] = False - if (next_id == visual_temp_mask.size(1)) or (next_id == args.ignoreid) or (ended[i]): # The last action is - cpu_a_t[i] = -1 # Change the and ignore action to -1 + + if (next_id == args.ignoreid) or (ended[i]): + cpu_a_t[i] = found[i] + elif (next_id == visual_temp_mask.size(1)): + cpu_a_t[i] = -1 + found[i] = -1 + if self.feedback == 'argmax': + _, ref_t = logit_REF[1].max(0) + if ref_t == obj_leng[i]-1: + found[i] = -2 + else: + found[i] = -1 ''' Supervised training for REF ''' if train_ml is not None: @@ -600,7 +633,7 @@ class Seq2SeqAgent(BaseAgent): # import pdb; pdb.set_trace() - return traj + return traj, found def test(self, use_dropout=False, feedback='argmax', allow_cheat=False, iters=None): ''' Evaluate once on each instruction in the current environment ''' diff --git a/reverie_src/env.py b/reverie_src/env.py index 4bc25a4..aeb576d 100644 --- a/reverie_src/env.py +++ b/reverie_src/env.py @@ -127,6 +127,7 @@ class R2RBatch(): new_item = dict(item) new_item['instr_id'] = '%s_%d' % (item['id'], j) new_item['instructions'] = instr + new_item['found'] = item['found'][j] ''' BERT tokenizer ''' instr_tokens = tokenizer.tokenize(instr) @@ -339,7 +340,8 @@ class R2RBatch(): 'gt_path' : item['path'], 'path_id' : item['id'], 'objId': str(item['objId']) if 'objId' in item else str(None), # target objId - 'candidate_obj': (obj_local_pos[:args.maxObject], obj_features[:args.maxObject], candidate_objId[:args.maxObject]) + 'candidate_obj': (obj_local_pos[:args.maxObject], obj_features[:args.maxObject], candidate_objId[:args.maxObject]), + 'found': item['found'] }) if 'instr_encoding' in item: obs[-1]['instr_encoding'] = item['instr_encoding'] diff --git a/reverie_src/eval.py b/reverie_src/eval.py index 0aa9624..07dd72c 100644 --- a/reverie_src/eval.py +++ b/reverie_src/eval.py @@ -50,11 +50,12 @@ class Evaluation(object): near_d = d return near_id - def _score_item(self, instr_id, path, ref_objId): + def _score_item(self, instr_id, path, ref_objId, predict_found): ''' Calculate error based on the final position in trajectory, and also the closest position (oracle stopping rule). The path contains [view_id, angle, vofv] ''' gt = self.gt[instr_id[:-2]] # pathId_objId + index = int(instr_id.split('_')[-1]) start = gt['path'][0] assert start == path[0][0], 'Result trajectories should include the start position' goal = gt['path'][-1] @@ -74,6 +75,19 @@ class Evaluation(object): self.distances[gt['scan']][start][goal] ) + if gt['found'][index] == True: + if predict_found == -1: + self.scores['found_count'] += 1 + self.scores['foundable'].append(1) + else: + self.scores['foundable'].append(0) + else: + if predict_found == -2: + self.scores['found_count'] += 1 + self.scores['foundable'].append(1) + else: + self.scores['foundable'].append(0) + # REF success or not if (ref_objId == str(gt.get('objId', 0))) or (ref_objId == gt.get('objId', 0)): self.scores['rgs'].append(1) @@ -104,6 +118,8 @@ class Evaluation(object): def score(self, output_file): ''' Evaluate each agent trajectory based on how close it got to the goal location ''' self.scores = defaultdict(list) + self.scores['found_count'] = 0 + self.scores['foundable'] = [] instr_ids = set(self.instr_ids) if type(output_file) is str: with open(output_file) as f: @@ -112,11 +128,13 @@ class Evaluation(object): results = output_file print('result length', len(results)) + path_counter = 0 for item in results: # Check against expected ids if item['instr_id'] in instr_ids: instr_ids.remove(item['instr_id']) - self._score_item(item['instr_id'], item['trajectory'], item['predObjId']) + self._score_item(item['instr_id'], item['trajectory'], item['predObjId'], item['found']) + path_counter += 1 if 'train' not in self.splits: # Exclude the training from this. (Because training eval may be partial) assert len(instr_ids) == 0, 'Missing %d of %d instruction ids from %s - not in %s'\ @@ -125,7 +143,8 @@ class Evaluation(object): score_summary = { 'steps': np.average(self.scores['trajectory_steps']), - 'lengths': np.average(self.scores['trajectory_lengths']) + 'lengths': np.average(self.scores['trajectory_lengths']), + 'found_score': self.scores['found_count'] / path_counter } end_successes = sum(self.scores['visible']) score_summary['success_rate'] = float(end_successes) / float(len(self.scores['visible'])) @@ -137,8 +156,18 @@ class Evaluation(object): zip(self.scores['visible'], self.scores['trajectory_lengths'], self.scores['shortest_lengths']) ] score_summary['spl'] = np.average(spl) + # sspl + sspl = [float( foundable == 1) * float( visible == 1 ) * l / max(l, p, 0.01) + for foundable, visible, p, l in + zip(self.scores['foundable'], self.scores['visible'], self.scores['trajectory_lengths'], self.scores['shortest_lengths']) + ] + score_summary['sspl'] = np.average(sspl) assert len(self.scores['rgs']) == len(self.instr_ids) + try: + assert len(self.scores['rgs']) == len(self.instr_ids) + except: + print(len(self.scores['rgs']), len(self.instr_ids)) num_rgs = sum(self.scores['rgs']) score_summary['rgs'] = float(num_rgs) / float(len(self.scores['rgs'])) diff --git a/reverie_src/train.py b/reverie_src/train.py index 92254ce..4e64bf9 100644 --- a/reverie_src/train.py +++ b/reverie_src/train.py @@ -110,14 +110,14 @@ def train(train_env, tok, n_iters, log_every=2000, val_envs={}, aug_env=None): score_summary, _ = evaluator.score(result) loss_str += ", %s " % env_name for metric, val in score_summary.items(): - if metric in ['spl']: - writer.add_scalar("spl/%s" % env_name, val, idx) + if metric in ['sspl']: + writer.add_scalar("sspl/%s" % env_name, val, idx) if env_name in best_val: - if val > best_val[env_name]['spl']: - best_val[env_name]['spl'] = val + if val > best_val[env_name]['sspl']: + best_val[env_name]['sspl'] = val best_val[env_name]['update'] = True - elif (val == best_val[env_name]['spl']) and (score_summary['success_rate'] > best_val[env_name]['sr']): - best_val[env_name]['spl'] = val + elif (val == best_val[env_name]['sspl']) and (score_summary['success_rate'] > best_val[env_name]['sr']): + best_val[env_name]['sspl'] = val best_val[env_name]['update'] = True loss_str += ', %s: %.4f' % (metric, val) @@ -236,6 +236,7 @@ def train_val(test_only=False): if args.train == 'listener': train(train_env, tok, args.iters, log_every=args.log_every, val_envs=val_envs) + # train(train_env, tok, args.iters, log_every=100, val_envs=val_envs) elif args.train == 'validlistener': valid(train_env, tok, val_envs=val_envs) else: diff --git a/scripts/train_reverie_agent.sh b/scripts/train_reverie_agent.sh index 47bf98e..524f2f8 100644 --- a/scripts/train_reverie_agent.sh +++ b/scripts/train_reverie_agent.sh @@ -1,7 +1,7 @@ export AIRBERT_ROOT=$(pwd) export PYTHONPATH=${PYTHONPATH}:${AIRBERT_ROOT}/build -name=REVERIE-RC-VLN-BERT-original/train-init.airbert +name=REVERIE-RC-VLN-BERT-original/train-init.airbert-ver2 flag="--vlnbert vilbert @@ -13,7 +13,7 @@ flag="--vlnbert vilbert --features places365 --maxAction 15 --maxInput 50 - --batchSize 4 + --batchSize 8 --feedback sample --lr 1e-5 --iters 200000