fix: original json contains unicode characters

feat: complete NOT_FOUND but always 50:50
- Notice: comment out RL - Notice: always 50:50, seems there exist some bugs
2023-11-08 23:17:13 +08:00 · 2023-11-07 01:21:15 +08:00 · 2023-11-06 18:31:14 +08:00 · 2023-11-06 18:30:31 +08:00 · 2023-11-06 15:52:54 +08:00 · 2023-11-06 15:51:50 +08:00
6 changed files with 144 additions and 119 deletions
--- a/adversarial_summary.py
+++ b/adversarial_summary.py
@ -0,0 +1,42 @@
+import json
+import os
+import re
+
+def remove_non_ascii(text):
+    return re.sub(r'[^\x00-\x7F]', ' ', text)
+
+
+for file in ['train', 'val_unseen', 'val_seen', 'train_seen', 'test', 'val_train_seen']:
+    print(file)
+    if os.path.isfile('data/adversarial/reverie_{}_fnf.json'.format(file)):
+        with open('data/adversarial/reverie_{}_fnf.json'.format(file)) as fp:
+            data = json.load(fp)
+
+
+        result = {}
+        for i in data:
+            instruction_id = i['path_id']
+            if instruction_id not in result:
+                result[instruction_id] = {
+                    'distance': float(i['distance']),
+                    'scan': i['scan'],
+                    'path_id': int(i['path_id']),
+                    'path': i['path'],
+                    'heading': float(i['heading']),
+                    'instructions': [ remove_non_ascii(i['instruction'])], 
+                    'found': [ i['found'] ],
+                    'id': i['id'],
+                    'objId': i['objId']
+                }
+            else:
+                result[instruction_id]['instructions'].append(remove_non_ascii(i['instruction']))
+                result[instruction_id]['found'].append( i['found'] )
+
+        output = []
+        for k, item in result.items():
+            output.append(item)
+    else:
+        output = []
+
+    with open('data/adversarial/R2R_{}.json'.format(file), 'w') as fp:
+        json.dump(output, fp)
--- a/data/adversarial.py
+++ b/data/adversarial.py
@ -1,21 +0,0 @@
-import json
-import sys
-import random
-
-with open(sys.argv[1]) as fp:
-    data = json.load(fp)
-
-for _, d in enumerate(data):
-    swaps = []
-    for index, ins in enumerate(d['instructions']):
-        p = random.random()
-        if p > 0.5:
-            swaps.append(True)
-            d['instructions'][index] += 'This is swap.'
-        else:
-            swaps.append(False)
-        d['swap'] = swaps
-print(data)
-
-with open(sys.argv[1], 'w') as fp:
-    json.dump(data, fp)
--- a/r2r_src/agent.py
+++ b/r2r_src/agent.py
@ -61,16 +61,14 @@ class BaseAgent(object):
        if iters is not None:
            # For each time, it will run the first 'iters' iterations. (It was shuffled before)
            for i in range(iters):
-                trajs, found = self.rollout(**kwargs)
-                print(found)
-                for index, traj in enumerate(trajs):
+                traj, found = self.rollout(**kwargs)
+                for index, traj in enumerate(traj):
                    self.loss = 0
                    self.results[traj['instr_id']] = (traj['path'], found[index])
        else:   # Do a full round
            while True:
-                trajs, found = self.rollout(**kwargs)
-                print("FOUND: ", found)
-                for index, traj in enumerate(trajs):
+                traj, found = self.rollout(**kwargs)
+                for index, traj in enumerate(traj):
                    if traj['instr_id'] in self.results:
                        looped = True
                    else:
@ -159,9 +157,8 @@ class Seq2SeqAgent(BaseAgent):
        for i, ob in enumerate(obs):
            for j, cc in enumerate(ob['candidate']):
                candidate_feat[i, j, :] = cc['feature']
-
-            # 補上 not fount token
-            candidate_feat[i, len(ob['candidate'])+1, :] = np.ones((self.feature_size + args.angle_feat_size))
+            candidate_feat[i, len(ob['candidate']), :] = np.zeros(self.feature_size+args.angle_feat_size, dtype=np.float32)     # <STOP>
+            candidate_feat[i, len(ob['candidate'])+1, :] = np.ones(self.feature_size+args.angle_feat_size, dtype=np.float32)    # <NOT_FOUND>

        return torch.from_numpy(candidate_feat).cuda(), candidate_leng

@ -193,11 +190,10 @@ class Seq2SeqAgent(BaseAgent):
                        break
                else:   # Stop here
                    assert ob['teacher'] == ob['viewpoint']         # The teacher action should be "STAY HERE"
-                    if ob['swap']:                                 # instruction 有被換過，所以要 not found
-                        a[i] = len(ob['candidate'])-1
-                    else:                                           # STOP
-                        a[i] = len(ob['candidate'])-2
-        print("  ", a)
+                    if ob['found']:
+                        a[i] = len(ob['candidate'])
+                    else:
+                        a[i] = len(ob['candidate'])+1
        return torch.from_numpy(a).cuda()

    def make_equiv_action(self, a_t, perm_obs, perm_idx=None, traj=None, found=None):
@ -216,7 +212,6 @@ class Seq2SeqAgent(BaseAgent):

        for i, idx in enumerate(perm_idx):
            action = a_t[i]
-            # print('action: ', action)
            if action != -1 and action != -2:            # -1 is the <stop> action
                select_candidate = perm_obs[i]['candidate'][action]
                src_point = perm_obs[i]['viewIndex']
@ -240,18 +235,11 @@ class Seq2SeqAgent(BaseAgent):
                # print("action: {} view_index: {}".format(action, state.viewIndex))
                if traj is not None:
                    traj[i]['path'].append((state.location.viewpointId, state.heading, state.elevation))
-            else:
-                found[i] = action
-                
+            elif action == -1 or action == -2:
+                if found is not None:
+                    found[i] = action


-            '''
-            elif action == -1:
-                print('<STOP>')
-            elif action == -2:
-                print('<NOT_FOUND>')
-            '''
-
    def rollout(self, train_ml=None, train_rl=True, reset=True):
        """
        :param train_ml:    The weight to train with maximum likelihood
@ -260,7 +248,6 @@ class Seq2SeqAgent(BaseAgent):

        :return:
        """
-        print("ROLLOUT!!!")
        if self.feedback == 'teacher' or self.feedback == 'argmax':
            train_rl = False

@ -270,15 +257,13 @@ class Seq2SeqAgent(BaseAgent):
            obs = np.array(self.env.reset())
        else:
            obs = np.array(self.env._get_obs()) 
-        
+
        batch_size = len(obs)

        # Language input
        sentence, language_attention_mask, token_type_ids, \
            seq_lengths, perm_idx = self._sort_batch(obs)
-
        perm_obs = obs[perm_idx]
-        

        ''' Language BERT '''
        language_inputs = {'mode':        'language',
@ -296,9 +281,8 @@ class Seq2SeqAgent(BaseAgent):
            'instr_id': ob['instr_id'],
            'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])],
        } for ob in perm_obs]
-
-        found = [None for _ in range(len(perm_obs))]
-
+        
+        found = [ None for _ in range(len(perm_obs)) ]

        # Init the reward shaping
        last_dist = np.zeros(batch_size, np.float32)
@ -322,6 +306,15 @@ class Seq2SeqAgent(BaseAgent):
        for t in range(self.episode_len):

            input_a_t, candidate_feat, candidate_leng = self.get_input_feat(perm_obs)
+            
+            '''
+            # show feature
+            for index, feat in enumerate(candidate_feat):
+                for ff in feat:
+                    print(ff)
+                print(candidate_leng[index])
+                print()
+            '''


            # the first [CLS] token, initialized by the language BERT, serves
@ -348,37 +341,30 @@ class Seq2SeqAgent(BaseAgent):

            # Mask outputs where agent can't move forward
            # Here the logit is [b, max_candidate]
-            # (8, max(candidate))
            candidate_mask = utils.length2mask(candidate_leng)
            logit.masked_fill_(candidate_mask, -float('inf'))

            # Supervised training
            target = self._teacher_action(perm_obs, ended)
-            for i, d in enumerate(target):
-                # print(perm_obs[i]['swap'], perm_obs[i]['instructions'])
-                # print(d)
-                _, at_t = logit.max(1)
-                '''
-                if at_t[i].item() == candidate_leng[i]-1:
-                    print("-2")
-                elif at_t[i].item() == candidate_leng[i]-2:
-                    print("-1")
-                else:
-                    print(at_t[i].item())
-                print()
-                '''
            ml_loss += self.criterion(logit, target)

-            a_predict = None
+
+            '''
+            for index, mask in enumerate(candidate_mask):
+                print(mask)
+                print(candidate_leng[index])
+                print(logit[index])
+                print(target[index])
+                print("\n\n")
+            '''
+
            # Determine next model inputs
+            
            if self.feedback == 'teacher':
                a_t = target                 # teacher forcing
-                _, a_predict = logit.max(1)
-                a_predict = a_predict.detach()
            elif self.feedback == 'argmax':
                _, a_t = logit.max(1)        # student forcing - argmax
                a_t = a_t.detach()
-                a_predict = a_t.detach()
                log_probs = F.log_softmax(logit, 1)                              # Calculate the log_prob here
                policy_log_probs.append(log_probs.gather(1, a_t.unsqueeze(1)))   # Gather the log_prob for each batch
            elif self.feedback == 'sample':
@ -386,42 +372,39 @@ class Seq2SeqAgent(BaseAgent):
                c = torch.distributions.Categorical(probs)
                self.logs['entropy'].append(c.entropy().sum().item())            # For log
                entropys.append(c.entropy())                                     # For optimization
-                new_c = c.sample()
-                a_t = new_c.detach()
-                a_predict = new_c.detach()
+                a_t = c.sample().detach()
                policy_log_probs.append(c.log_prob(a_t))
            else:
-                # print(self.feedback)
+                print(self.feedback)
                sys.exit('Invalid feedback option')

            # Prepare environment action
            # NOTE: Env action is in the perm_obs space
            cpu_a_t = a_t.cpu().numpy()
            for i, next_id in enumerate(cpu_a_t):
-                if next_id == args.ignoreid or ended[i]:
-                    if found[i] == True: 
-                        cpu_a_t[i] = -1             # Change the <end> and ignore action to -1
-                    else:
-                        cpu_a_t[i] = -2
+                if next_id == (args.ignoreid) or ended[i]:
+                    cpu_a_t[i] = found[i]
                elif next_id == (candidate_leng[i]-2):
-                    cpu_a_t[i] = -1             # Change the <end> and ignore action to -1
+                    cpu_a_t[i] = -1
                elif next_id == (candidate_leng[i]-1):
                    cpu_a_t[i] = -2


-            cpu_a_predict = a_predict.cpu().numpy()
-            for i, next_id in enumerate(cpu_a_predict):
-                if next_id == (candidate_leng[i]-2):
-                    cpu_a_predict[i] = -1             # Change the <end> and ignore action to -1
-                elif next_id == (candidate_leng[i]-1):
-                    cpu_a_predict[i] = -2
-
            # Make action and get the new state
-            print(cpu_a_t)
-            self.make_equiv_action(cpu_a_t, perm_obs, perm_idx, traj, found=found)
+            self.make_equiv_action(cpu_a_t, perm_obs, perm_idx, traj, found)
+            
+            '''
+            print(self.feedback, end=' ')
+            print(cpu_a_t, end=' ')
+            for i in perm_obs:
+                print(i['found'], end=' ')
+            print(found)
+            print()
+            '''
            obs = np.array(self.env._get_obs())
            perm_obs = obs[perm_idx]            # Perm the obs for the resu

+            '''
            if train_rl:
                # Calculate the mask and reward
                dist = np.zeros(batch_size, np.float32)
@ -442,22 +425,22 @@ class Seq2SeqAgent(BaseAgent):
                        if action_idx == -1:                              # If the action now is end
                            if dist[i] < 3.0:                             # Correct
                                reward[i] = 2.0 + ndtw_score[i] * 2.0
-                                if ob['swap']:
-                                    reward[i] -= 2                        
-                                else:
+                                if ob['found']:
                                    reward[i] += 1
-                            else:                                         # Incorrect
-                                reward[i] = -2.0
-                        elif action_idx == -2:                             # NOT_FOUND reward 設定在這裏
-                            if dist[i] < 3.0:
-                                reward[i] = 2.0 + ndtw_score[i] * 2.0
-                                if ob['swap']:
-                                    reward[i] += 3                        # 偵測到錯誤 instruction,多加一分
                                else:
                                    reward[i] -= 2
                            else:                                         # Incorrect
                                reward[i] = -2.0
-                                reward[i] += 1                            # distance > 3, 確實沒找到東西，從扣二變成扣一
+
+                        elif action_idx == -2:
+                            if dist[i] < 3.0:
+                                reward[i] = 2.0 + ndtw_score[i] * 2.0
+                                if ob['found']:
+                                    reward[i] -= 2
+                                else:
+                                    reward[i] += 1
+                            else:                                         # Incorrect
+                                reward[i] = -2.0
                        else:                                             # The action is not end
                            # Path fidelity rewards (distance & nDTW)
                            reward[i] = - (dist[i] - last_dist[i])
@ -475,6 +458,7 @@ class Seq2SeqAgent(BaseAgent):
                masks.append(mask)
                last_dist[:] = dist
                last_ndtw[:] = ndtw_score
+            '''

            # Update the finished actions
            # -1 means ended or ignored (already ended)
@ -485,8 +469,7 @@ class Seq2SeqAgent(BaseAgent):
            if ended.all():
                break

-            # print()
-
+        '''
        if train_rl:
            # Last action in A2C
            input_a_t, candidate_feat, candidate_leng = self.get_input_feat(perm_obs)
@ -497,7 +480,6 @@ class Seq2SeqAgent(BaseAgent):
            visual_attention_mask = torch.cat((language_attention_mask, visual_temp_mask), dim=-1)

            self.vln_bert.vln_bert.config.directions = max(candidate_leng)
-            ''' Visual BERT '''
            visual_inputs = {'mode':              'visual',
                            'sentence':           language_features,
                            'attention_mask':     visual_attention_mask,
@ -548,6 +530,7 @@ class Seq2SeqAgent(BaseAgent):

            self.loss += rl_loss
            self.logs['RL_loss'].append(rl_loss.item())
+        '''

        if train_ml is not None:
            self.loss += ml_loss * train_ml / batch_size
@ -557,9 +540,8 @@ class Seq2SeqAgent(BaseAgent):
            self.losses.append(0.)
        else:
            self.losses.append(self.loss.item() / self.episode_len)  # This argument is useless.
-        print("\n\n")

-        return traj, found
+        return traj, found 

    def test(self, use_dropout=False, feedback='argmax', allow_cheat=False, iters=None):
        ''' Evaluate once on each instruction in the current environment '''
--- a/r2r_src/env.py
+++ b/r2r_src/env.py
@ -1,8 +1,6 @@
 ''' Batched Room-to-Room navigation environment '''

 import sys
-
-from networkx.algorithms import swap
 sys.path.append('buildpy36')
 sys.path.append('Matterport_Simulator/build/')
 import MatterSim
@ -16,7 +14,6 @@ import os
 import random
 import networkx as nx
 from param import args
-import time

 from utils import load_datasets, load_nav_graphs, pad_instr_tokens
 from IPython import embed
@ -130,7 +127,7 @@ class R2RBatch():
                            new_item = dict(item)
                            new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
                            new_item['instructions'] = instr
-                            new_item['swap'] = item['swap'][j]
+                            new_item['found'] = item['found'][j]

                            ''' BERT tokenizer '''
                            instr_tokens = tokenizer.tokenize(instr)
@ -140,12 +137,10 @@ class R2RBatch():
                            if new_item['instr_encoding'] is not None:  # Filter the wrong data
                                self.data.append(new_item)
                                scans.append(item['scan'])
-
                        except:
                            continue
            print("split {} has {} datas in the file.".format(split, max_len))

-
        if name is None:
            self.name = splits[0] if len(splits) > 0 else "FAKE"
        else:
@ -334,6 +329,7 @@ class R2RBatch():
            # [visual_feature, angle_feature] for views
            feature = np.concatenate((feature, self.angle_feature[base_view_id]), -1)

+
            obs.append({
                'instr_id' : item['instr_id'],
                'scan' : state.scanId,
@ -348,7 +344,7 @@ class R2RBatch():
                'teacher' : self._shortest_path_action(state, item['path'][-1]),
                'gt_path' : item['path'],
                'path_id' : item['path_id'],
-                'swap': item['swap']
+                'found': item['found']
            })
            if 'instr_encoding' in item:
                obs[-1]['instr_encoding'] = item['instr_encoding']
--- a/r2r_src/eval.py
+++ b/r2r_src/eval.py
@ -55,11 +55,16 @@ class Evaluation(object):
                near_d = d
        return near_id

-    def _score_item(self, instr_id, path):
+    def _score_item(self, instr_id, path, predict_found):
        ''' Calculate error based on the final position in trajectory, and also
            the closest position (oracle stopping rule).
            The path contains [view_id, angle, vofv] '''
        gt = self.gt[instr_id.split('_')[-2]]
+        index = int(instr_id.split('_')[-1])
+        
+        gt_instruction = gt['instructions'][index]
+        gt_found = gt['found'][index]
+
        start = gt['path'][0]
        assert start == path[0][0], 'Result trajectories should include the start position'
        goal = gt['path'][-1]
@ -68,6 +73,19 @@ class Evaluation(object):
        self.scores['nav_errors'].append(self.distances[gt['scan']][final_position][goal])
        self.scores['oracle_errors'].append(self.distances[gt['scan']][nearest_position][goal])
        self.scores['trajectory_steps'].append(len(path)-1)
+
+        # <STOP> <NOT_FOUND> score
+        score = 0
+        if gt_found == True:
+            if predict_found == -1:
+                score = 1
+        else:
+            if predict_found == -2:
+                score = 1
+        self.scores['found_count'] += score
+
+
+
        distance = 0  # length of the path in meters
        prev = path[0]
        for curr in path[1:]:
@ -81,6 +99,7 @@ class Evaluation(object):
    def score(self, output_file):
        ''' Evaluate each agent trajectory based on how close it got to the goal location '''
        self.scores = defaultdict(list)
+        self.scores['found_count'] = 0
        instr_ids = set(self.instr_ids)
        if type(output_file) is str:
            with open(output_file) as f:
@ -90,12 +109,14 @@ class Evaluation(object):

        # print('result length', len(results))
        # print("RESULT:", results)
+        path_counter = 0
        for item in results:
            # Check against expected ids
            if item['instr_id'] in instr_ids:
                # print("{} exist".format(item['instr_id']))
                instr_ids.remove(item['instr_id'])
-                self._score_item(item['instr_id'], item['trajectory'])
+                self._score_item(item['instr_id'], item['trajectory'], item['found'])
+                path_counter += 1
            else:
                print("{} not exist".format(item['instr_id']))
                print(item)
@ -108,7 +129,8 @@ class Evaluation(object):
            'nav_error': np.average(self.scores['nav_errors']),
            'oracle_error': np.average(self.scores['oracle_errors']),
            'steps': np.average(self.scores['trajectory_steps']),
-            'lengths': np.average(self.scores['trajectory_lengths'])
+            'lengths': np.average(self.scores['trajectory_lengths']),
+            'found_score': self.scores['found_count'] / path_counter
        }
        num_successes = len([i for i in self.scores['nav_errors'] if i < self.error_margin])
        score_summary['success_rate'] = float(num_successes)/float(len(self.scores['nav_errors']))
--- a/r2r_src/train.py
+++ b/r2r_src/train.py
@ -105,6 +105,9 @@ def train(train_env, tok, n_iters, log_every=2000, val_envs={}, aug_env=None):

        # Run validation
        loss_str = "iter {}".format(iter)
+        
+
+        save_results = []
        for env_name, (env, evaluator) in val_envs.items():
            listner.env = env

@ -112,6 +115,8 @@ def train(train_env, tok, n_iters, log_every=2000, val_envs={}, aug_env=None):
            listner.test(use_dropout=False, feedback='argmax', iters=None)
            result = listner.get_results()
            score_summary, _ = evaluator.score(result)
+            
+            print(score_summary)
            loss_str += ", %s " % env_name
            for metric, val in score_summary.items():
                if metric in ['spl']:
@ -195,12 +200,11 @@ def train_val(test_only=False):

    if test_only:
        featurized_scans = None
-        val_env_names = ['val_train_seen']
+        val_env_names = ['val_unseen']
    else:
        featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())])
        # val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']
-        # val_env_names = ['val_train_seen']
-        val_env_names = ['val_unseen']
+        val_env_names = ['train','val_unseen']

    train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok)
    from collections import OrderedDict
Author	SHA1	Message	Date
Ting-Jun Wang	4073c52bb8	fix: original json contains unicode characters	2023-11-08 23:17:13 +08:00
Ting-Jun Wang	595866c2f4	feat: complete NOT_FOUND but always 50:50 - Notice: comment out RL - Notice: always 50:50, seems there exist some bugs	2023-11-07 01:21:15 +08:00
Ting-Jun Wang	03a3e5b489	feat: add NOT_FOUND action in rollout	2023-11-06 18:31:14 +08:00
Ting-Jun Wang	4936098b5e	fix: change 'swap' to 'found'	2023-11-06 18:30:31 +08:00
Ting-Jun Wang	a5db597de5	feat: REVERIE to R2R format	2023-11-06 15:52:54 +08:00
Ting-Jun Wang	ab5010d32d	feat: vlnbert which can run with adversarial json	2023-11-06 15:51:50 +08:00