Compare commits

...

3 Commits

4 changed files with 129 additions and 21 deletions

21
data/adversarial.py Normal file
View File

@ -0,0 +1,21 @@
import json
import sys
import random
with open(sys.argv[1]) as fp:
data = json.load(fp)
for _, d in enumerate(data):
swaps = []
for index, ins in enumerate(d['instructions']):
p = random.random()
if p > 0.5:
swaps.append(True)
d['instructions'][index] += 'This is swap.'
else:
swaps.append(False)
d['swap'] = swaps
print(data)
with open(sys.argv[1], 'w') as fp:
json.dump(data, fp)

View File

@ -35,12 +35,12 @@ class BaseAgent(object):
self.losses = [] # For learning agents self.losses = [] # For learning agents
def write_results(self): def write_results(self):
output = [{'instr_id':k, 'trajectory': v} for k,v in self.results.items()] output = [{'instr_id':k, 'trajectory': v[0], 'found': v[1]} for k,v in self.results.items()]
with open(self.results_path, 'w') as f: with open(self.results_path, 'w') as f:
json.dump(output, f) json.dump(output, f)
def get_results(self): def get_results(self):
output = [{'instr_id': k, 'trajectory': v} for k, v in self.results.items()] output = [{'instr_id': k, 'trajectory': v[0], 'found': v[1]} for k, v in self.results.items()]
return output return output
def rollout(self, **args): def rollout(self, **args):
@ -61,17 +61,21 @@ class BaseAgent(object):
if iters is not None: if iters is not None:
# For each time, it will run the first 'iters' iterations. (It was shuffled before) # For each time, it will run the first 'iters' iterations. (It was shuffled before)
for i in range(iters): for i in range(iters):
for traj in self.rollout(**kwargs): trajs, found = self.rollout(**kwargs)
print(found)
for index, traj in enumerate(trajs):
self.loss = 0 self.loss = 0
self.results[traj['instr_id']] = traj['path'] self.results[traj['instr_id']] = (traj['path'], found[index])
else: # Do a full round else: # Do a full round
while True: while True:
for traj in self.rollout(**kwargs): trajs, found = self.rollout(**kwargs)
print("FOUND: ", found)
for index, traj in enumerate(trajs):
if traj['instr_id'] in self.results: if traj['instr_id'] in self.results:
looped = True looped = True
else: else:
self.loss = 0 self.loss = 0
self.results[traj['instr_id']] = traj['path'] self.results[traj['instr_id']] = (traj['path'], found[index])
if looped: if looped:
break break
@ -147,7 +151,7 @@ class Seq2SeqAgent(BaseAgent):
return Variable(torch.from_numpy(features), requires_grad=False).cuda() return Variable(torch.from_numpy(features), requires_grad=False).cuda()
def _candidate_variable(self, obs): def _candidate_variable(self, obs):
candidate_leng = [len(ob['candidate']) + 1 for ob in obs] # +1 is for the end candidate_leng = [len(ob['candidate']) + 2 for ob in obs] # +1 is for the end
candidate_feat = np.zeros((len(obs), max(candidate_leng), self.feature_size + args.angle_feat_size), dtype=np.float32) candidate_feat = np.zeros((len(obs), max(candidate_leng), self.feature_size + args.angle_feat_size), dtype=np.float32)
# Note: The candidate_feat at len(ob['candidate']) is the feature for the END # Note: The candidate_feat at len(ob['candidate']) is the feature for the END
@ -156,6 +160,9 @@ class Seq2SeqAgent(BaseAgent):
for j, cc in enumerate(ob['candidate']): for j, cc in enumerate(ob['candidate']):
candidate_feat[i, j, :] = cc['feature'] candidate_feat[i, j, :] = cc['feature']
# 補上 not fount token
candidate_feat[i, len(ob['candidate'])+1, :] = np.ones((self.feature_size + args.angle_feat_size))
return torch.from_numpy(candidate_feat).cuda(), candidate_leng return torch.from_numpy(candidate_feat).cuda(), candidate_leng
def get_input_feat(self, obs): def get_input_feat(self, obs):
@ -186,10 +193,14 @@ class Seq2SeqAgent(BaseAgent):
break break
else: # Stop here else: # Stop here
assert ob['teacher'] == ob['viewpoint'] # The teacher action should be "STAY HERE" assert ob['teacher'] == ob['viewpoint'] # The teacher action should be "STAY HERE"
a[i] = len(ob['candidate']) if ob['swap']: # instruction 有被換過,所以要 not found
a[i] = len(ob['candidate'])-1
else: # STOP
a[i] = len(ob['candidate'])-2
print(" ", a)
return torch.from_numpy(a).cuda() return torch.from_numpy(a).cuda()
def make_equiv_action(self, a_t, perm_obs, perm_idx=None, traj=None): def make_equiv_action(self, a_t, perm_obs, perm_idx=None, traj=None, found=None):
""" """
Interface between Panoramic view and Egocentric view Interface between Panoramic view and Egocentric view
It will convert the action panoramic view action a_t to equivalent egocentric view actions for the simulator It will convert the action panoramic view action a_t to equivalent egocentric view actions for the simulator
@ -205,7 +216,8 @@ class Seq2SeqAgent(BaseAgent):
for i, idx in enumerate(perm_idx): for i, idx in enumerate(perm_idx):
action = a_t[i] action = a_t[i]
if action != -1: # -1 is the <stop> action # print('action: ', action)
if action != -1 and action != -2: # -1 is the <stop> action
select_candidate = perm_obs[i]['candidate'][action] select_candidate = perm_obs[i]['candidate'][action]
src_point = perm_obs[i]['viewIndex'] src_point = perm_obs[i]['viewIndex']
trg_point = select_candidate['pointId'] trg_point = select_candidate['pointId']
@ -228,6 +240,17 @@ class Seq2SeqAgent(BaseAgent):
# print("action: {} view_index: {}".format(action, state.viewIndex)) # print("action: {} view_index: {}".format(action, state.viewIndex))
if traj is not None: if traj is not None:
traj[i]['path'].append((state.location.viewpointId, state.heading, state.elevation)) traj[i]['path'].append((state.location.viewpointId, state.heading, state.elevation))
else:
found[i] = action
'''
elif action == -1:
print('<STOP>')
elif action == -2:
print('<NOT_FOUND>')
'''
def rollout(self, train_ml=None, train_rl=True, reset=True): def rollout(self, train_ml=None, train_rl=True, reset=True):
""" """
@ -237,6 +260,7 @@ class Seq2SeqAgent(BaseAgent):
:return: :return:
""" """
print("ROLLOUT!!!")
if self.feedback == 'teacher' or self.feedback == 'argmax': if self.feedback == 'teacher' or self.feedback == 'argmax':
train_rl = False train_rl = False
@ -252,7 +276,9 @@ class Seq2SeqAgent(BaseAgent):
# Language input # Language input
sentence, language_attention_mask, token_type_ids, \ sentence, language_attention_mask, token_type_ids, \
seq_lengths, perm_idx = self._sort_batch(obs) seq_lengths, perm_idx = self._sort_batch(obs)
perm_obs = obs[perm_idx] perm_obs = obs[perm_idx]
''' Language BERT ''' ''' Language BERT '''
language_inputs = {'mode': 'language', language_inputs = {'mode': 'language',
@ -271,6 +297,9 @@ class Seq2SeqAgent(BaseAgent):
'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])], 'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])],
} for ob in perm_obs] } for ob in perm_obs]
found = [None for _ in range(len(perm_obs))]
# Init the reward shaping # Init the reward shaping
last_dist = np.zeros(batch_size, np.float32) last_dist = np.zeros(batch_size, np.float32)
last_ndtw = np.zeros(batch_size, np.float32) last_ndtw = np.zeros(batch_size, np.float32)
@ -294,9 +323,6 @@ class Seq2SeqAgent(BaseAgent):
input_a_t, candidate_feat, candidate_leng = self.get_input_feat(perm_obs) input_a_t, candidate_feat, candidate_leng = self.get_input_feat(perm_obs)
print("input_a_t: ", input_a_t.shape)
print("candidate_feat: ", candidate_feat.shape)
print("candidate_leng: ", candidate_leng)
# the first [CLS] token, initialized by the language BERT, serves # the first [CLS] token, initialized by the language BERT, serves
# as the agent's state passing through time steps # as the agent's state passing through time steps
@ -322,19 +348,37 @@ class Seq2SeqAgent(BaseAgent):
# Mask outputs where agent can't move forward # Mask outputs where agent can't move forward
# Here the logit is [b, max_candidate] # Here the logit is [b, max_candidate]
# (8, max(candidate))
candidate_mask = utils.length2mask(candidate_leng) candidate_mask = utils.length2mask(candidate_leng)
logit.masked_fill_(candidate_mask, -float('inf')) logit.masked_fill_(candidate_mask, -float('inf'))
# Supervised training # Supervised training
target = self._teacher_action(perm_obs, ended) target = self._teacher_action(perm_obs, ended)
for i, d in enumerate(target):
# print(perm_obs[i]['swap'], perm_obs[i]['instructions'])
# print(d)
_, at_t = logit.max(1)
'''
if at_t[i].item() == candidate_leng[i]-1:
print("-2")
elif at_t[i].item() == candidate_leng[i]-2:
print("-1")
else:
print(at_t[i].item())
print()
'''
ml_loss += self.criterion(logit, target) ml_loss += self.criterion(logit, target)
a_predict = None
# Determine next model inputs # Determine next model inputs
if self.feedback == 'teacher': if self.feedback == 'teacher':
a_t = target # teacher forcing a_t = target # teacher forcing
_, a_predict = logit.max(1)
a_predict = a_predict.detach()
elif self.feedback == 'argmax': elif self.feedback == 'argmax':
_, a_t = logit.max(1) # student forcing - argmax _, a_t = logit.max(1) # student forcing - argmax
a_t = a_t.detach() a_t = a_t.detach()
a_predict = a_t.detach()
log_probs = F.log_softmax(logit, 1) # Calculate the log_prob here log_probs = F.log_softmax(logit, 1) # Calculate the log_prob here
policy_log_probs.append(log_probs.gather(1, a_t.unsqueeze(1))) # Gather the log_prob for each batch policy_log_probs.append(log_probs.gather(1, a_t.unsqueeze(1))) # Gather the log_prob for each batch
elif self.feedback == 'sample': elif self.feedback == 'sample':
@ -342,20 +386,39 @@ class Seq2SeqAgent(BaseAgent):
c = torch.distributions.Categorical(probs) c = torch.distributions.Categorical(probs)
self.logs['entropy'].append(c.entropy().sum().item()) # For log self.logs['entropy'].append(c.entropy().sum().item()) # For log
entropys.append(c.entropy()) # For optimization entropys.append(c.entropy()) # For optimization
a_t = c.sample().detach() new_c = c.sample()
a_t = new_c.detach()
a_predict = new_c.detach()
policy_log_probs.append(c.log_prob(a_t)) policy_log_probs.append(c.log_prob(a_t))
else: else:
print(self.feedback) # print(self.feedback)
sys.exit('Invalid feedback option') sys.exit('Invalid feedback option')
# Prepare environment action # Prepare environment action
# NOTE: Env action is in the perm_obs space # NOTE: Env action is in the perm_obs space
cpu_a_t = a_t.cpu().numpy() cpu_a_t = a_t.cpu().numpy()
for i, next_id in enumerate(cpu_a_t): for i, next_id in enumerate(cpu_a_t):
if next_id == (candidate_leng[i]-1) or next_id == args.ignoreid or ended[i]: # The last action is <end> if next_id == args.ignoreid or ended[i]:
if found[i] == True:
cpu_a_t[i] = -1 # Change the <end> and ignore action to -1
else:
cpu_a_t[i] = -2
elif next_id == (candidate_leng[i]-2):
cpu_a_t[i] = -1 # Change the <end> and ignore action to -1 cpu_a_t[i] = -1 # Change the <end> and ignore action to -1
elif next_id == (candidate_leng[i]-1):
cpu_a_t[i] = -2
cpu_a_predict = a_predict.cpu().numpy()
for i, next_id in enumerate(cpu_a_predict):
if next_id == (candidate_leng[i]-2):
cpu_a_predict[i] = -1 # Change the <end> and ignore action to -1
elif next_id == (candidate_leng[i]-1):
cpu_a_predict[i] = -2
# Make action and get the new state # Make action and get the new state
self.make_equiv_action(cpu_a_t, perm_obs, perm_idx, traj) print(cpu_a_t)
self.make_equiv_action(cpu_a_t, perm_obs, perm_idx, traj, found=found)
obs = np.array(self.env._get_obs()) obs = np.array(self.env._get_obs())
perm_obs = obs[perm_idx] # Perm the obs for the resu perm_obs = obs[perm_idx] # Perm the obs for the resu
@ -379,8 +442,22 @@ class Seq2SeqAgent(BaseAgent):
if action_idx == -1: # If the action now is end if action_idx == -1: # If the action now is end
if dist[i] < 3.0: # Correct if dist[i] < 3.0: # Correct
reward[i] = 2.0 + ndtw_score[i] * 2.0 reward[i] = 2.0 + ndtw_score[i] * 2.0
if ob['swap']:
reward[i] -= 2
else:
reward[i] += 1
else: # Incorrect else: # Incorrect
reward[i] = -2.0 reward[i] = -2.0
elif action_idx == -2: # NOT_FOUND reward 設定在這裏
if dist[i] < 3.0:
reward[i] = 2.0 + ndtw_score[i] * 2.0
if ob['swap']:
reward[i] += 3 # 偵測到錯誤 instruction,多加一分
else:
reward[i] -= 2
else: # Incorrect
reward[i] = -2.0
reward[i] += 1 # distance > 3, 確實沒找到東西,從扣二變成扣一
else: # The action is not end else: # The action is not end
# Path fidelity rewards (distance & nDTW) # Path fidelity rewards (distance & nDTW)
reward[i] = - (dist[i] - last_dist[i]) reward[i] = - (dist[i] - last_dist[i])
@ -402,12 +479,13 @@ class Seq2SeqAgent(BaseAgent):
# Update the finished actions # Update the finished actions
# -1 means ended or ignored (already ended) # -1 means ended or ignored (already ended)
ended[:] = np.logical_or(ended, (cpu_a_t == -1)) ended[:] = np.logical_or(ended, (cpu_a_t == -1))
ended[:] = np.logical_or(ended, (cpu_a_t == -2))
# Early exit if all ended # Early exit if all ended
if ended.all(): if ended.all():
break break
print() # print()
if train_rl: if train_rl:
# Last action in A2C # Last action in A2C
@ -479,8 +557,9 @@ class Seq2SeqAgent(BaseAgent):
self.losses.append(0.) self.losses.append(0.)
else: else:
self.losses.append(self.loss.item() / self.episode_len) # This argument is useless. self.losses.append(self.loss.item() / self.episode_len) # This argument is useless.
print("\n\n")
return traj return traj, found
def test(self, use_dropout=False, feedback='argmax', allow_cheat=False, iters=None): def test(self, use_dropout=False, feedback='argmax', allow_cheat=False, iters=None):
''' Evaluate once on each instruction in the current environment ''' ''' Evaluate once on each instruction in the current environment '''

View File

@ -1,6 +1,8 @@
''' Batched Room-to-Room navigation environment ''' ''' Batched Room-to-Room navigation environment '''
import sys import sys
from networkx.algorithms import swap
sys.path.append('buildpy36') sys.path.append('buildpy36')
sys.path.append('Matterport_Simulator/build/') sys.path.append('Matterport_Simulator/build/')
import MatterSim import MatterSim
@ -14,6 +16,7 @@ import os
import random import random
import networkx as nx import networkx as nx
from param import args from param import args
import time
from utils import load_datasets, load_nav_graphs, pad_instr_tokens from utils import load_datasets, load_nav_graphs, pad_instr_tokens
from IPython import embed from IPython import embed
@ -127,6 +130,7 @@ class R2RBatch():
new_item = dict(item) new_item = dict(item)
new_item['instr_id'] = '%s_%d' % (item['path_id'], j) new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
new_item['instructions'] = instr new_item['instructions'] = instr
new_item['swap'] = item['swap'][j]
''' BERT tokenizer ''' ''' BERT tokenizer '''
instr_tokens = tokenizer.tokenize(instr) instr_tokens = tokenizer.tokenize(instr)
@ -136,10 +140,12 @@ class R2RBatch():
if new_item['instr_encoding'] is not None: # Filter the wrong data if new_item['instr_encoding'] is not None: # Filter the wrong data
self.data.append(new_item) self.data.append(new_item)
scans.append(item['scan']) scans.append(item['scan'])
except: except:
continue continue
print("split {} has {} datas in the file.".format(split, max_len)) print("split {} has {} datas in the file.".format(split, max_len))
if name is None: if name is None:
self.name = splits[0] if len(splits) > 0 else "FAKE" self.name = splits[0] if len(splits) > 0 else "FAKE"
else: else:
@ -341,7 +347,8 @@ class R2RBatch():
'instructions' : item['instructions'], 'instructions' : item['instructions'],
'teacher' : self._shortest_path_action(state, item['path'][-1]), 'teacher' : self._shortest_path_action(state, item['path'][-1]),
'gt_path' : item['path'], 'gt_path' : item['path'],
'path_id' : item['path_id'] 'path_id' : item['path_id'],
'swap': item['swap']
}) })
if 'instr_encoding' in item: if 'instr_encoding' in item:
obs[-1]['instr_encoding'] = item['instr_encoding'] obs[-1]['instr_encoding'] = item['instr_encoding']

View File

@ -199,7 +199,8 @@ def train_val(test_only=False):
else: else:
featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())]) featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())])
# val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] # val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']
val_env_names = ['val_train_seen'] # val_env_names = ['val_train_seen']
val_env_names = ['val_unseen']
train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok)
from collections import OrderedDict from collections import OrderedDict