feat: haven't fix not found

feat: add NOT_FOUND token
feat: adversarial test & load swap in observations
2023-11-06 14:20:59 +08:00 · 2023-11-04 22:53:01 +08:00 · 2023-11-04 21:29:17 +08:00
4 changed files with 129 additions and 21 deletions
--- a/data/adversarial.py
+++ b/data/adversarial.py
@ -0,0 +1,21 @@
+import json
+import sys
+import random
+
+with open(sys.argv[1]) as fp:
+    data = json.load(fp)
+
+for _, d in enumerate(data):
+    swaps = []
+    for index, ins in enumerate(d['instructions']):
+        p = random.random()
+        if p > 0.5:
+            swaps.append(True)
+            d['instructions'][index] += 'This is swap.'
+        else:
+            swaps.append(False)
+        d['swap'] = swaps
+print(data)
+
+with open(sys.argv[1], 'w') as fp:
+    json.dump(data, fp)
--- a/r2r_src/agent.py
+++ b/r2r_src/agent.py
@ -35,12 +35,12 @@ class BaseAgent(object):
        self.losses = [] # For learning agents

    def write_results(self):
-        output = [{'instr_id':k, 'trajectory': v} for k,v in self.results.items()]
+        output = [{'instr_id':k, 'trajectory': v[0], 'found': v[1]} for k,v in self.results.items()]
        with open(self.results_path, 'w') as f:
            json.dump(output, f)

    def get_results(self):
-        output = [{'instr_id': k, 'trajectory': v} for k, v in self.results.items()]
+        output = [{'instr_id': k, 'trajectory': v[0], 'found': v[1]} for k, v in self.results.items()]
        return output

    def rollout(self, **args):
@ -61,17 +61,21 @@ class BaseAgent(object):
        if iters is not None:
            # For each time, it will run the first 'iters' iterations. (It was shuffled before)
            for i in range(iters):
-                for traj in self.rollout(**kwargs):
+                trajs, found = self.rollout(**kwargs)
+                print(found)
+                for index, traj in enumerate(trajs):
                    self.loss = 0
-                    self.results[traj['instr_id']] = traj['path']
+                    self.results[traj['instr_id']] = (traj['path'], found[index])
        else:   # Do a full round
            while True:
-                for traj in self.rollout(**kwargs):
+                trajs, found = self.rollout(**kwargs)
+                print("FOUND: ", found)
+                for index, traj in enumerate(trajs):
                    if traj['instr_id'] in self.results:
                        looped = True
                    else:
                        self.loss = 0
-                        self.results[traj['instr_id']] = traj['path']
+                        self.results[traj['instr_id']] = (traj['path'], found[index])
                if looped:
                    break

@ -147,7 +151,7 @@ class Seq2SeqAgent(BaseAgent):
        return Variable(torch.from_numpy(features), requires_grad=False).cuda()

    def _candidate_variable(self, obs):
-        candidate_leng = [len(ob['candidate']) + 1 for ob in obs]  # +1 is for the end
+        candidate_leng = [len(ob['candidate']) + 2 for ob in obs]  # +1 is for the end
        candidate_feat = np.zeros((len(obs), max(candidate_leng), self.feature_size + args.angle_feat_size), dtype=np.float32)

        # Note: The candidate_feat at len(ob['candidate']) is the feature for the END
@ -156,6 +160,9 @@ class Seq2SeqAgent(BaseAgent):
            for j, cc in enumerate(ob['candidate']):
                candidate_feat[i, j, :] = cc['feature']

+            # 補上 not fount token
+            candidate_feat[i, len(ob['candidate'])+1, :] = np.ones((self.feature_size + args.angle_feat_size))
+
        return torch.from_numpy(candidate_feat).cuda(), candidate_leng

    def get_input_feat(self, obs):
@ -186,10 +193,14 @@ class Seq2SeqAgent(BaseAgent):
                        break
                else:   # Stop here
                    assert ob['teacher'] == ob['viewpoint']         # The teacher action should be "STAY HERE"
-                    a[i] = len(ob['candidate'])
+                    if ob['swap']:                                 # instruction 有被換過，所以要 not found
+                        a[i] = len(ob['candidate'])-1
+                    else:                                           # STOP
+                        a[i] = len(ob['candidate'])-2
+        print("  ", a)
        return torch.from_numpy(a).cuda()

-    def make_equiv_action(self, a_t, perm_obs, perm_idx=None, traj=None):
+    def make_equiv_action(self, a_t, perm_obs, perm_idx=None, traj=None, found=None):
        """
        Interface between Panoramic view and Egocentric view
        It will convert the action panoramic view action a_t to equivalent egocentric view actions for the simulator
@ -205,7 +216,8 @@ class Seq2SeqAgent(BaseAgent):

        for i, idx in enumerate(perm_idx):
            action = a_t[i]
-            if action != -1:            # -1 is the <stop> action
+            # print('action: ', action)
+            if action != -1 and action != -2:            # -1 is the <stop> action
                select_candidate = perm_obs[i]['candidate'][action]
                src_point = perm_obs[i]['viewIndex']
                trg_point = select_candidate['pointId']
@ -228,6 +240,17 @@ class Seq2SeqAgent(BaseAgent):
                # print("action: {} view_index: {}".format(action, state.viewIndex))
                if traj is not None:
                    traj[i]['path'].append((state.location.viewpointId, state.heading, state.elevation))
+            else:
+                found[i] = action
+                
+
+
+            '''
+            elif action == -1:
+                print('<STOP>')
+            elif action == -2:
+                print('<NOT_FOUND>')
+            '''

    def rollout(self, train_ml=None, train_rl=True, reset=True):
        """
@ -237,6 +260,7 @@ class Seq2SeqAgent(BaseAgent):

        :return:
        """
+        print("ROLLOUT!!!")
        if self.feedback == 'teacher' or self.feedback == 'argmax':
            train_rl = False

@ -252,7 +276,9 @@ class Seq2SeqAgent(BaseAgent):
        # Language input
        sentence, language_attention_mask, token_type_ids, \
            seq_lengths, perm_idx = self._sort_batch(obs)
+
        perm_obs = obs[perm_idx]
+        

        ''' Language BERT '''
        language_inputs = {'mode':        'language',
@ -271,6 +297,9 @@ class Seq2SeqAgent(BaseAgent):
            'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])],
        } for ob in perm_obs]

+        found = [None for _ in range(len(perm_obs))]
+
+
        # Init the reward shaping
        last_dist = np.zeros(batch_size, np.float32)
        last_ndtw = np.zeros(batch_size, np.float32)
@ -294,9 +323,6 @@ class Seq2SeqAgent(BaseAgent):

            input_a_t, candidate_feat, candidate_leng = self.get_input_feat(perm_obs)

-            print("input_a_t: ", input_a_t.shape)
-            print("candidate_feat: ", candidate_feat.shape)
-            print("candidate_leng: ", candidate_leng)

            # the first [CLS] token, initialized by the language BERT, serves
            # as the agent's state passing through time steps
@ -322,19 +348,37 @@ class Seq2SeqAgent(BaseAgent):

            # Mask outputs where agent can't move forward
            # Here the logit is [b, max_candidate]
+            # (8, max(candidate))
            candidate_mask = utils.length2mask(candidate_leng)
            logit.masked_fill_(candidate_mask, -float('inf'))

            # Supervised training
            target = self._teacher_action(perm_obs, ended)
+            for i, d in enumerate(target):
+                # print(perm_obs[i]['swap'], perm_obs[i]['instructions'])
+                # print(d)
+                _, at_t = logit.max(1)
+                '''
+                if at_t[i].item() == candidate_leng[i]-1:
+                    print("-2")
+                elif at_t[i].item() == candidate_leng[i]-2:
+                    print("-1")
+                else:
+                    print(at_t[i].item())
+                print()
+                '''
            ml_loss += self.criterion(logit, target)

+            a_predict = None
            # Determine next model inputs
            if self.feedback == 'teacher':
                a_t = target                 # teacher forcing
+                _, a_predict = logit.max(1)
+                a_predict = a_predict.detach()
            elif self.feedback == 'argmax':
                _, a_t = logit.max(1)        # student forcing - argmax
                a_t = a_t.detach()
+                a_predict = a_t.detach()
                log_probs = F.log_softmax(logit, 1)                              # Calculate the log_prob here
                policy_log_probs.append(log_probs.gather(1, a_t.unsqueeze(1)))   # Gather the log_prob for each batch
            elif self.feedback == 'sample':
@ -342,20 +386,39 @@ class Seq2SeqAgent(BaseAgent):
                c = torch.distributions.Categorical(probs)
                self.logs['entropy'].append(c.entropy().sum().item())            # For log
                entropys.append(c.entropy())                                     # For optimization
-                a_t = c.sample().detach()
+                new_c = c.sample()
+                a_t = new_c.detach()
+                a_predict = new_c.detach()
                policy_log_probs.append(c.log_prob(a_t))
            else:
-                print(self.feedback)
+                # print(self.feedback)
                sys.exit('Invalid feedback option')
+
            # Prepare environment action
            # NOTE: Env action is in the perm_obs space
            cpu_a_t = a_t.cpu().numpy()
            for i, next_id in enumerate(cpu_a_t):
-                if next_id == (candidate_leng[i]-1) or next_id == args.ignoreid or ended[i]:    # The last action is <end>
+                if next_id == args.ignoreid or ended[i]:
+                    if found[i] == True: 
+                        cpu_a_t[i] = -1             # Change the <end> and ignore action to -1
+                    else:
+                        cpu_a_t[i] = -2
+                elif next_id == (candidate_leng[i]-2):
                    cpu_a_t[i] = -1             # Change the <end> and ignore action to -1
+                elif next_id == (candidate_leng[i]-1):
+                    cpu_a_t[i] = -2
+
+
+            cpu_a_predict = a_predict.cpu().numpy()
+            for i, next_id in enumerate(cpu_a_predict):
+                if next_id == (candidate_leng[i]-2):
+                    cpu_a_predict[i] = -1             # Change the <end> and ignore action to -1
+                elif next_id == (candidate_leng[i]-1):
+                    cpu_a_predict[i] = -2

            # Make action and get the new state
-            self.make_equiv_action(cpu_a_t, perm_obs, perm_idx, traj)
+            print(cpu_a_t)
+            self.make_equiv_action(cpu_a_t, perm_obs, perm_idx, traj, found=found)
            obs = np.array(self.env._get_obs())
            perm_obs = obs[perm_idx]            # Perm the obs for the resu

@ -379,8 +442,22 @@ class Seq2SeqAgent(BaseAgent):
                        if action_idx == -1:                              # If the action now is end
                            if dist[i] < 3.0:                             # Correct
                                reward[i] = 2.0 + ndtw_score[i] * 2.0
+                                if ob['swap']:
+                                    reward[i] -= 2                        
+                                else:
+                                    reward[i] += 1
                            else:                                         # Incorrect
                                reward[i] = -2.0
+                        elif action_idx == -2:                             # NOT_FOUND reward 設定在這裏
+                            if dist[i] < 3.0:
+                                reward[i] = 2.0 + ndtw_score[i] * 2.0
+                                if ob['swap']:
+                                    reward[i] += 3                        # 偵測到錯誤 instruction,多加一分
+                                else:
+                                    reward[i] -= 2
+                            else:                                         # Incorrect
+                                reward[i] = -2.0
+                                reward[i] += 1                            # distance > 3, 確實沒找到東西，從扣二變成扣一
                        else:                                             # The action is not end
                            # Path fidelity rewards (distance & nDTW)
                            reward[i] = - (dist[i] - last_dist[i])
@ -402,12 +479,13 @@ class Seq2SeqAgent(BaseAgent):
            # Update the finished actions
            # -1 means ended or ignored (already ended)
            ended[:] = np.logical_or(ended, (cpu_a_t == -1))
+            ended[:] = np.logical_or(ended, (cpu_a_t == -2))

            # Early exit if all ended
            if ended.all():
                break

-            print()
+            # print()

        if train_rl:
            # Last action in A2C
@ -479,8 +557,9 @@ class Seq2SeqAgent(BaseAgent):
            self.losses.append(0.)
        else:
            self.losses.append(self.loss.item() / self.episode_len)  # This argument is useless.
+        print("\n\n")

-        return traj
+        return traj, found

    def test(self, use_dropout=False, feedback='argmax', allow_cheat=False, iters=None):
        ''' Evaluate once on each instruction in the current environment '''
--- a/r2r_src/env.py
+++ b/r2r_src/env.py
@ -1,6 +1,8 @@
 ''' Batched Room-to-Room navigation environment '''

 import sys
+
+from networkx.algorithms import swap
 sys.path.append('buildpy36')
 sys.path.append('Matterport_Simulator/build/')
 import MatterSim
@ -14,6 +16,7 @@ import os
 import random
 import networkx as nx
 from param import args
+import time

 from utils import load_datasets, load_nav_graphs, pad_instr_tokens
 from IPython import embed
@ -127,6 +130,7 @@ class R2RBatch():
                            new_item = dict(item)
                            new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
                            new_item['instructions'] = instr
+                            new_item['swap'] = item['swap'][j]

                            ''' BERT tokenizer '''
                            instr_tokens = tokenizer.tokenize(instr)
@ -136,10 +140,12 @@ class R2RBatch():
                            if new_item['instr_encoding'] is not None:  # Filter the wrong data
                                self.data.append(new_item)
                                scans.append(item['scan'])
+
                        except:
                            continue
            print("split {} has {} datas in the file.".format(split, max_len))

+
        if name is None:
            self.name = splits[0] if len(splits) > 0 else "FAKE"
        else:
@ -341,7 +347,8 @@ class R2RBatch():
                'instructions' : item['instructions'],
                'teacher' : self._shortest_path_action(state, item['path'][-1]),
                'gt_path' : item['path'],
-                'path_id' : item['path_id']
+                'path_id' : item['path_id'],
+                'swap': item['swap']
            })
            if 'instr_encoding' in item:
                obs[-1]['instr_encoding'] = item['instr_encoding']
--- a/r2r_src/train.py
+++ b/r2r_src/train.py
@ -199,7 +199,8 @@ def train_val(test_only=False):
    else:
        featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())])
        # val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']
-        val_env_names = ['val_train_seen']
+        # val_env_names = ['val_train_seen']
+        val_env_names = ['val_unseen']

    train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok)
    from collections import OrderedDict
Author	SHA1	Message	Date
Ting-Jun Wang	d2f18c1c61	feat: haven't fix not found	2023-11-06 14:20:59 +08:00
Ting-Jun Wang	857c7e8e10	feat: add NOT_FOUND token	2023-11-04 22:53:01 +08:00
Ting-Jun Wang	7329f7fa0a	feat: adversarial test & load swap in observations	2023-11-04 21:29:17 +08:00