Compare commits

..

6 Commits

Author SHA1 Message Date
4073c52bb8
fix: original json contains unicode characters 2023-11-08 23:17:13 +08:00
595866c2f4
feat: complete NOT_FOUND but always 50:50
- Notice: comment out RL
- Notice: always 50:50, seems there exist some bugs
2023-11-07 01:21:15 +08:00
03a3e5b489
feat: add NOT_FOUND action in rollout 2023-11-06 18:31:14 +08:00
4936098b5e
fix: change 'swap' to 'found' 2023-11-06 18:30:31 +08:00
a5db597de5
feat: REVERIE to R2R format 2023-11-06 15:52:54 +08:00
ab5010d32d
feat: vlnbert which can run with adversarial json 2023-11-06 15:51:50 +08:00
6 changed files with 144 additions and 119 deletions

42
adversarial_summary.py Normal file
View File

@ -0,0 +1,42 @@
import json
import os
import re
def remove_non_ascii(text):
return re.sub(r'[^\x00-\x7F]', ' ', text)
for file in ['train', 'val_unseen', 'val_seen', 'train_seen', 'test', 'val_train_seen']:
print(file)
if os.path.isfile('data/adversarial/reverie_{}_fnf.json'.format(file)):
with open('data/adversarial/reverie_{}_fnf.json'.format(file)) as fp:
data = json.load(fp)
result = {}
for i in data:
instruction_id = i['path_id']
if instruction_id not in result:
result[instruction_id] = {
'distance': float(i['distance']),
'scan': i['scan'],
'path_id': int(i['path_id']),
'path': i['path'],
'heading': float(i['heading']),
'instructions': [ remove_non_ascii(i['instruction'])],
'found': [ i['found'] ],
'id': i['id'],
'objId': i['objId']
}
else:
result[instruction_id]['instructions'].append(remove_non_ascii(i['instruction']))
result[instruction_id]['found'].append( i['found'] )
output = []
for k, item in result.items():
output.append(item)
else:
output = []
with open('data/adversarial/R2R_{}.json'.format(file), 'w') as fp:
json.dump(output, fp)

View File

@ -1,21 +0,0 @@
import json
import sys
import random
with open(sys.argv[1]) as fp:
data = json.load(fp)
for _, d in enumerate(data):
swaps = []
for index, ins in enumerate(d['instructions']):
p = random.random()
if p > 0.5:
swaps.append(True)
d['instructions'][index] += 'This is swap.'
else:
swaps.append(False)
d['swap'] = swaps
print(data)
with open(sys.argv[1], 'w') as fp:
json.dump(data, fp)

View File

@ -61,16 +61,14 @@ class BaseAgent(object):
if iters is not None:
# For each time, it will run the first 'iters' iterations. (It was shuffled before)
for i in range(iters):
trajs, found = self.rollout(**kwargs)
print(found)
for index, traj in enumerate(trajs):
traj, found = self.rollout(**kwargs)
for index, traj in enumerate(traj):
self.loss = 0
self.results[traj['instr_id']] = (traj['path'], found[index])
else: # Do a full round
while True:
trajs, found = self.rollout(**kwargs)
print("FOUND: ", found)
for index, traj in enumerate(trajs):
traj, found = self.rollout(**kwargs)
for index, traj in enumerate(traj):
if traj['instr_id'] in self.results:
looped = True
else:
@ -159,9 +157,8 @@ class Seq2SeqAgent(BaseAgent):
for i, ob in enumerate(obs):
for j, cc in enumerate(ob['candidate']):
candidate_feat[i, j, :] = cc['feature']
# 補上 not fount token
candidate_feat[i, len(ob['candidate'])+1, :] = np.ones((self.feature_size + args.angle_feat_size))
candidate_feat[i, len(ob['candidate']), :] = np.zeros(self.feature_size+args.angle_feat_size, dtype=np.float32) # <STOP>
candidate_feat[i, len(ob['candidate'])+1, :] = np.ones(self.feature_size+args.angle_feat_size, dtype=np.float32) # <NOT_FOUND>
return torch.from_numpy(candidate_feat).cuda(), candidate_leng
@ -193,11 +190,10 @@ class Seq2SeqAgent(BaseAgent):
break
else: # Stop here
assert ob['teacher'] == ob['viewpoint'] # The teacher action should be "STAY HERE"
if ob['swap']: # instruction 有被換過,所以要 not found
a[i] = len(ob['candidate'])-1
else: # STOP
a[i] = len(ob['candidate'])-2
print(" ", a)
if ob['found']:
a[i] = len(ob['candidate'])
else:
a[i] = len(ob['candidate'])+1
return torch.from_numpy(a).cuda()
def make_equiv_action(self, a_t, perm_obs, perm_idx=None, traj=None, found=None):
@ -216,7 +212,6 @@ class Seq2SeqAgent(BaseAgent):
for i, idx in enumerate(perm_idx):
action = a_t[i]
# print('action: ', action)
if action != -1 and action != -2: # -1 is the <stop> action
select_candidate = perm_obs[i]['candidate'][action]
src_point = perm_obs[i]['viewIndex']
@ -240,18 +235,11 @@ class Seq2SeqAgent(BaseAgent):
# print("action: {} view_index: {}".format(action, state.viewIndex))
if traj is not None:
traj[i]['path'].append((state.location.viewpointId, state.heading, state.elevation))
else:
found[i] = action
elif action == -1 or action == -2:
if found is not None:
found[i] = action
'''
elif action == -1:
print('<STOP>')
elif action == -2:
print('<NOT_FOUND>')
'''
def rollout(self, train_ml=None, train_rl=True, reset=True):
"""
:param train_ml: The weight to train with maximum likelihood
@ -260,7 +248,6 @@ class Seq2SeqAgent(BaseAgent):
:return:
"""
print("ROLLOUT!!!")
if self.feedback == 'teacher' or self.feedback == 'argmax':
train_rl = False
@ -270,15 +257,13 @@ class Seq2SeqAgent(BaseAgent):
obs = np.array(self.env.reset())
else:
obs = np.array(self.env._get_obs())
batch_size = len(obs)
# Language input
sentence, language_attention_mask, token_type_ids, \
seq_lengths, perm_idx = self._sort_batch(obs)
perm_obs = obs[perm_idx]
''' Language BERT '''
language_inputs = {'mode': 'language',
@ -296,9 +281,8 @@ class Seq2SeqAgent(BaseAgent):
'instr_id': ob['instr_id'],
'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])],
} for ob in perm_obs]
found = [None for _ in range(len(perm_obs))]
found = [ None for _ in range(len(perm_obs)) ]
# Init the reward shaping
last_dist = np.zeros(batch_size, np.float32)
@ -322,6 +306,15 @@ class Seq2SeqAgent(BaseAgent):
for t in range(self.episode_len):
input_a_t, candidate_feat, candidate_leng = self.get_input_feat(perm_obs)
'''
# show feature
for index, feat in enumerate(candidate_feat):
for ff in feat:
print(ff)
print(candidate_leng[index])
print()
'''
# the first [CLS] token, initialized by the language BERT, serves
@ -348,37 +341,30 @@ class Seq2SeqAgent(BaseAgent):
# Mask outputs where agent can't move forward
# Here the logit is [b, max_candidate]
# (8, max(candidate))
candidate_mask = utils.length2mask(candidate_leng)
logit.masked_fill_(candidate_mask, -float('inf'))
# Supervised training
target = self._teacher_action(perm_obs, ended)
for i, d in enumerate(target):
# print(perm_obs[i]['swap'], perm_obs[i]['instructions'])
# print(d)
_, at_t = logit.max(1)
'''
if at_t[i].item() == candidate_leng[i]-1:
print("-2")
elif at_t[i].item() == candidate_leng[i]-2:
print("-1")
else:
print(at_t[i].item())
print()
'''
ml_loss += self.criterion(logit, target)
a_predict = None
'''
for index, mask in enumerate(candidate_mask):
print(mask)
print(candidate_leng[index])
print(logit[index])
print(target[index])
print("\n\n")
'''
# Determine next model inputs
if self.feedback == 'teacher':
a_t = target # teacher forcing
_, a_predict = logit.max(1)
a_predict = a_predict.detach()
elif self.feedback == 'argmax':
_, a_t = logit.max(1) # student forcing - argmax
a_t = a_t.detach()
a_predict = a_t.detach()
log_probs = F.log_softmax(logit, 1) # Calculate the log_prob here
policy_log_probs.append(log_probs.gather(1, a_t.unsqueeze(1))) # Gather the log_prob for each batch
elif self.feedback == 'sample':
@ -386,42 +372,39 @@ class Seq2SeqAgent(BaseAgent):
c = torch.distributions.Categorical(probs)
self.logs['entropy'].append(c.entropy().sum().item()) # For log
entropys.append(c.entropy()) # For optimization
new_c = c.sample()
a_t = new_c.detach()
a_predict = new_c.detach()
a_t = c.sample().detach()
policy_log_probs.append(c.log_prob(a_t))
else:
# print(self.feedback)
print(self.feedback)
sys.exit('Invalid feedback option')
# Prepare environment action
# NOTE: Env action is in the perm_obs space
cpu_a_t = a_t.cpu().numpy()
for i, next_id in enumerate(cpu_a_t):
if next_id == args.ignoreid or ended[i]:
if found[i] == True:
cpu_a_t[i] = -1 # Change the <end> and ignore action to -1
else:
cpu_a_t[i] = -2
if next_id == (args.ignoreid) or ended[i]:
cpu_a_t[i] = found[i]
elif next_id == (candidate_leng[i]-2):
cpu_a_t[i] = -1 # Change the <end> and ignore action to -1
cpu_a_t[i] = -1
elif next_id == (candidate_leng[i]-1):
cpu_a_t[i] = -2
cpu_a_predict = a_predict.cpu().numpy()
for i, next_id in enumerate(cpu_a_predict):
if next_id == (candidate_leng[i]-2):
cpu_a_predict[i] = -1 # Change the <end> and ignore action to -1
elif next_id == (candidate_leng[i]-1):
cpu_a_predict[i] = -2
# Make action and get the new state
print(cpu_a_t)
self.make_equiv_action(cpu_a_t, perm_obs, perm_idx, traj, found=found)
self.make_equiv_action(cpu_a_t, perm_obs, perm_idx, traj, found)
'''
print(self.feedback, end=' ')
print(cpu_a_t, end=' ')
for i in perm_obs:
print(i['found'], end=' ')
print(found)
print()
'''
obs = np.array(self.env._get_obs())
perm_obs = obs[perm_idx] # Perm the obs for the resu
'''
if train_rl:
# Calculate the mask and reward
dist = np.zeros(batch_size, np.float32)
@ -442,22 +425,22 @@ class Seq2SeqAgent(BaseAgent):
if action_idx == -1: # If the action now is end
if dist[i] < 3.0: # Correct
reward[i] = 2.0 + ndtw_score[i] * 2.0
if ob['swap']:
reward[i] -= 2
else:
if ob['found']:
reward[i] += 1
else: # Incorrect
reward[i] = -2.0
elif action_idx == -2: # NOT_FOUND reward 設定在這裏
if dist[i] < 3.0:
reward[i] = 2.0 + ndtw_score[i] * 2.0
if ob['swap']:
reward[i] += 3 # 偵測到錯誤 instruction,多加一分
else:
reward[i] -= 2
else: # Incorrect
reward[i] = -2.0
reward[i] += 1 # distance > 3, 確實沒找到東西,從扣二變成扣一
elif action_idx == -2:
if dist[i] < 3.0:
reward[i] = 2.0 + ndtw_score[i] * 2.0
if ob['found']:
reward[i] -= 2
else:
reward[i] += 1
else: # Incorrect
reward[i] = -2.0
else: # The action is not end
# Path fidelity rewards (distance & nDTW)
reward[i] = - (dist[i] - last_dist[i])
@ -475,6 +458,7 @@ class Seq2SeqAgent(BaseAgent):
masks.append(mask)
last_dist[:] = dist
last_ndtw[:] = ndtw_score
'''
# Update the finished actions
# -1 means ended or ignored (already ended)
@ -485,8 +469,7 @@ class Seq2SeqAgent(BaseAgent):
if ended.all():
break
# print()
'''
if train_rl:
# Last action in A2C
input_a_t, candidate_feat, candidate_leng = self.get_input_feat(perm_obs)
@ -497,7 +480,6 @@ class Seq2SeqAgent(BaseAgent):
visual_attention_mask = torch.cat((language_attention_mask, visual_temp_mask), dim=-1)
self.vln_bert.vln_bert.config.directions = max(candidate_leng)
''' Visual BERT '''
visual_inputs = {'mode': 'visual',
'sentence': language_features,
'attention_mask': visual_attention_mask,
@ -548,6 +530,7 @@ class Seq2SeqAgent(BaseAgent):
self.loss += rl_loss
self.logs['RL_loss'].append(rl_loss.item())
'''
if train_ml is not None:
self.loss += ml_loss * train_ml / batch_size
@ -557,9 +540,8 @@ class Seq2SeqAgent(BaseAgent):
self.losses.append(0.)
else:
self.losses.append(self.loss.item() / self.episode_len) # This argument is useless.
print("\n\n")
return traj, found
return traj, found
def test(self, use_dropout=False, feedback='argmax', allow_cheat=False, iters=None):
''' Evaluate once on each instruction in the current environment '''

View File

@ -1,8 +1,6 @@
''' Batched Room-to-Room navigation environment '''
import sys
from networkx.algorithms import swap
sys.path.append('buildpy36')
sys.path.append('Matterport_Simulator/build/')
import MatterSim
@ -16,7 +14,6 @@ import os
import random
import networkx as nx
from param import args
import time
from utils import load_datasets, load_nav_graphs, pad_instr_tokens
from IPython import embed
@ -130,7 +127,7 @@ class R2RBatch():
new_item = dict(item)
new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
new_item['instructions'] = instr
new_item['swap'] = item['swap'][j]
new_item['found'] = item['found'][j]
''' BERT tokenizer '''
instr_tokens = tokenizer.tokenize(instr)
@ -140,12 +137,10 @@ class R2RBatch():
if new_item['instr_encoding'] is not None: # Filter the wrong data
self.data.append(new_item)
scans.append(item['scan'])
except:
continue
print("split {} has {} datas in the file.".format(split, max_len))
if name is None:
self.name = splits[0] if len(splits) > 0 else "FAKE"
else:
@ -334,6 +329,7 @@ class R2RBatch():
# [visual_feature, angle_feature] for views
feature = np.concatenate((feature, self.angle_feature[base_view_id]), -1)
obs.append({
'instr_id' : item['instr_id'],
'scan' : state.scanId,
@ -348,7 +344,7 @@ class R2RBatch():
'teacher' : self._shortest_path_action(state, item['path'][-1]),
'gt_path' : item['path'],
'path_id' : item['path_id'],
'swap': item['swap']
'found': item['found']
})
if 'instr_encoding' in item:
obs[-1]['instr_encoding'] = item['instr_encoding']

View File

@ -55,11 +55,16 @@ class Evaluation(object):
near_d = d
return near_id
def _score_item(self, instr_id, path):
def _score_item(self, instr_id, path, predict_found):
''' Calculate error based on the final position in trajectory, and also
the closest position (oracle stopping rule).
The path contains [view_id, angle, vofv] '''
gt = self.gt[instr_id.split('_')[-2]]
index = int(instr_id.split('_')[-1])
gt_instruction = gt['instructions'][index]
gt_found = gt['found'][index]
start = gt['path'][0]
assert start == path[0][0], 'Result trajectories should include the start position'
goal = gt['path'][-1]
@ -68,6 +73,19 @@ class Evaluation(object):
self.scores['nav_errors'].append(self.distances[gt['scan']][final_position][goal])
self.scores['oracle_errors'].append(self.distances[gt['scan']][nearest_position][goal])
self.scores['trajectory_steps'].append(len(path)-1)
# <STOP> <NOT_FOUND> score
score = 0
if gt_found == True:
if predict_found == -1:
score = 1
else:
if predict_found == -2:
score = 1
self.scores['found_count'] += score
distance = 0 # length of the path in meters
prev = path[0]
for curr in path[1:]:
@ -81,6 +99,7 @@ class Evaluation(object):
def score(self, output_file):
''' Evaluate each agent trajectory based on how close it got to the goal location '''
self.scores = defaultdict(list)
self.scores['found_count'] = 0
instr_ids = set(self.instr_ids)
if type(output_file) is str:
with open(output_file) as f:
@ -90,12 +109,14 @@ class Evaluation(object):
# print('result length', len(results))
# print("RESULT:", results)
path_counter = 0
for item in results:
# Check against expected ids
if item['instr_id'] in instr_ids:
# print("{} exist".format(item['instr_id']))
instr_ids.remove(item['instr_id'])
self._score_item(item['instr_id'], item['trajectory'])
self._score_item(item['instr_id'], item['trajectory'], item['found'])
path_counter += 1
else:
print("{} not exist".format(item['instr_id']))
print(item)
@ -108,7 +129,8 @@ class Evaluation(object):
'nav_error': np.average(self.scores['nav_errors']),
'oracle_error': np.average(self.scores['oracle_errors']),
'steps': np.average(self.scores['trajectory_steps']),
'lengths': np.average(self.scores['trajectory_lengths'])
'lengths': np.average(self.scores['trajectory_lengths']),
'found_score': self.scores['found_count'] / path_counter
}
num_successes = len([i for i in self.scores['nav_errors'] if i < self.error_margin])
score_summary['success_rate'] = float(num_successes)/float(len(self.scores['nav_errors']))

View File

@ -105,6 +105,9 @@ def train(train_env, tok, n_iters, log_every=2000, val_envs={}, aug_env=None):
# Run validation
loss_str = "iter {}".format(iter)
save_results = []
for env_name, (env, evaluator) in val_envs.items():
listner.env = env
@ -112,6 +115,8 @@ def train(train_env, tok, n_iters, log_every=2000, val_envs={}, aug_env=None):
listner.test(use_dropout=False, feedback='argmax', iters=None)
result = listner.get_results()
score_summary, _ = evaluator.score(result)
print(score_summary)
loss_str += ", %s " % env_name
for metric, val in score_summary.items():
if metric in ['spl']:
@ -195,12 +200,11 @@ def train_val(test_only=False):
if test_only:
featurized_scans = None
val_env_names = ['val_train_seen']
val_env_names = ['val_unseen']
else:
featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())])
# val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']
# val_env_names = ['val_train_seen']
val_env_names = ['val_unseen']
val_env_names = ['train','val_unseen']
train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok)
from collections import OrderedDict