Compare commits

...

5 Commits

Author SHA1 Message Date
cd2e0a30e4
fix: EMNLP's prompts 2024-06-16 23:43:47 +08:00
a85950f06f
update 2024-06-10 18:52:39 +08:00
5cbd75711e
feat: evaluation in result 2024-05-06 16:42:40 +08:00
64fbce018a
feat: steps counter in llm-success 2024-05-06 16:41:07 +08:00
68330c5163
feat: evaluate llm success 2024-05-05 23:02:06 +08:00
7 changed files with 100 additions and 53 deletions

View File

@ -11,7 +11,8 @@ def dump_json(data, filename):
json.dump(data, fp)
for f in os.listdir():
if 'json' in f:
if 'navgpt' in f:
print(f)
data = load_json(f)
new_data = []
@ -19,7 +20,8 @@ for f in os.listdir():
for index, instr in enumerate(i['instructions']):
new_i = i.copy()
new_i['instruction'] = instr
new_i['instr_id'] = f'{new_i["id"]}_{index}'
# new_i['instr_id'] = f'{new_i["id"]}_{index}'
new_i['new_reverie_id'] = f'{new_i["new_reverie_id"]}_{index}'
del new_i['instructions']
new_data.append(new_i)

View File

@ -10,7 +10,7 @@ from parser import parse_args
from env import REVERIENavBatch
from agent import NavGPTAgent
def build_dataset(args, data_limit=100):
def build_dataset(args):
feat_db = ImageObservationsDB(args.obs_dir, args.obs_summary_dir, args.obj_dir)
print(feat_db)
@ -26,7 +26,7 @@ def build_dataset(args, data_limit=100):
)
val_env = dataset_class(
feat_db, val_instr_data, args.connectivity_dir, args.navigable_dir,
batch_size=args.batch_size, seed=args.seed, name=split, data_limit=data_limit
batch_size=args.batch_size, seed=args.seed, name=split
) # evaluation using all objects
val_envs[split] = val_env
@ -96,7 +96,7 @@ def valid_from_file(args, val_envs):
def main():
args = parse_args()
val_envs = build_dataset(args, data_limit=100)
val_envs = build_dataset(args)
if args.valid_file is not None:
valid_from_file(args, val_envs)

View File

@ -46,9 +46,11 @@ EXCEPTION_TOOL_NAME = "_Exception"
MAX_SCRATCHPAD_LENGTH = 7000
FINAL_STOP_POINT = ""
FINAL_STATE = ""
SUCCESS = 0
TEMP_STEPS_COUNTER = 0
STEPS_COUNTER = 0
NOW_LOCATION = None
MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE = (
"Invalid Format: Missing 'Action:' after 'Thought:"
@ -71,16 +73,18 @@ class NavGPTOutputParser(AgentOutputParser):
global STEPS_COUNTER
global TEMP_STEPS_COUNTER
global SUCCESS
# includes_answer = FINAL_ANSWER_ACTION in text
global NOW_LOCATION
global FINAL_STATE
includes_answer = FINAL_ANSWER_ACTION in text
regex = (
r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*\"?([a-fA-F0-9]{32})\"?"
)
action_match = re.search(regex, text, re.DOTALL)
if action_match:
# if includes_answer:
# raise OutputParserException(
# f"{FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE}: {text}"
# )
if includes_answer:
raise OutputParserException(
f"{FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE}: {text}"
)
action = action_match.group(1).strip()
action_input = action_match.group(2)
tool_input = action_input.strip(" ")
@ -92,13 +96,17 @@ class NavGPTOutputParser(AgentOutputParser):
print("ACTION: ", action_input)
print(f"MY FINAL_STOP_POINT = {FINAL_STOP_POINT}")
TEMP_STEPS_COUNTER += 1
# TEMP_STEPS_COUNTER += 1
print(f"TEMP_STEPS_COUNT = {TEMP_STEPS_COUNTER}")
print(f"STEPS_COUNT = {STEPS_COUNTER}")
print(f"SUCCESS = {SUCCESS}")
NOW_LOCATION = tool_input
TEMP_STEPS_COUNTER += 1
print(f"NOW_LOCATION = {NOW_LOCATION}")
'''
if FINAL_STOP_POINT in text:
STEPS_COUNTER += TEMP_STEPS_COUNTER
SUCCESS += 1
@ -110,14 +118,34 @@ class NavGPTOutputParser(AgentOutputParser):
return AgentFinish(
{"output": action_input}, text
)
'''
return AgentAction(action, tool_input, text)
'''
elif includes_answer:
is_STOP = 'Finished' in text
print("FINAL: ", is_STOP)
if is_STOP:
FINAL_STATE = 'stop'
else:
FINAL_STATE = 'not found'
if NOW_LOCATION == FINAL_STOP_POINT:
STEPS_COUNTER += TEMP_STEPS_COUNTER
TEMP_STEPS_COUNTER = 0
SUCCESS += 1
print(f"SUCCESS = {SUCCESS}")
else:
print("NOT SUCCESS")
print(f"{NOW_LOCATION}_{type(NOW_LOCATION)}")
print(f"{FINAL_STOP_POINT}_{type(FINAL_STOP_POINT)}")
print(f"SUCCESS = {SUCCESS}")
print(f"STEPS_COUNTER = {STEPS_COUNTER}")
return AgentFinish(
{"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
)
'''
if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
raise OutputParserException(
@ -364,7 +392,7 @@ class NavGPTAgent(BaseAgent):
"""Initialize the trajectory with the given observation."""
# Record the navigation path
self.traj = [{
'instr_id': ob['instr_id'],
'instr_id': ob['new_reverie_id'],
'path': [[ob['start']]],
'details': [],
} for ob in obs]
@ -604,7 +632,7 @@ class NavGPTAgent(BaseAgent):
tools = [
self.action_maker,
self.back_tracer
self.back_tracer,
]
if self.config.use_tool_chain:
@ -682,7 +710,10 @@ class NavGPTAgent(BaseAgent):
new_obs = self.env.step(actions)[0]
new_heading = np.rad2deg(new_obs['heading'])
# Record the trajectory
self.traj[0]['path'].append(self.env.env.sims[0].gmap.bfs_shortest_path(cur_obs['viewpoint'], actions[0])[1:])
try:
self.traj[0]['path'].append(self.env.env.sims[0].gmap.bfs_shortest_path(cur_obs['viewpoint'], actions[0])[1:])
except:
None
# Calculate the turned angle
turned_angle = new_heading - cur_heading
# Generate action description
@ -699,8 +730,12 @@ class NavGPTAgent(BaseAgent):
global FINAL_STOP_POINT
global TEMP_STEPS_COUNTER
global STEPS_COUNTER
global FINAL_STATE
global NOW_LOCATION
FINAL_STOP_POINT = obs[0]['stop']
FINAL_STOP_POINT = obs[0]['gt_path'][-1]
FINAL_STATE = ""
if TEMP_STEPS_COUNTER != 0:
TEMP_STEPS_COUNTER = 0
@ -713,7 +748,6 @@ class NavGPTAgent(BaseAgent):
print(obs[0]['obs'])
print(obs[0]['obs_summary'])
print(obs[0]['objects'])
print(obs[0]['instr_id'])
print(obs[0]['scan'])
print(obs[0]['viewpoint'])
print(obs[0]['heading'])
@ -722,9 +756,10 @@ class NavGPTAgent(BaseAgent):
print(obs[0]['instruction'])
print(obs[0]['gt_path'])
print(obs[0]['path_id'])
print(obs[0]['stop'])
print(obs[0]['start'])
print(obs[0]['target'])
print(obs[0]['new_reverie_id'])
NOW_LOCATION = obs[0]['start']
print("==")
@ -804,6 +839,11 @@ class NavGPTAgent(BaseAgent):
}
output = self.agent_executor(input)
if 'stop' in FINAL_STATE:
self.traj[i]['final_state'] = 'stop'
else:
self.traj[i]['final_state'] = 'not found'
self.traj[i]['llm_output'] = output['output']
self.traj[i]['action_plan'] = output['action_plan']
# extract agent's thought from llm output
@ -816,4 +856,3 @@ class NavGPTAgent(BaseAgent):
self.traj[i]['llm_observation'].append(observation)
return self.traj

View File

@ -18,6 +18,7 @@ class BaseAgent(object):
output[-1]['llm_output'] = v['llm_output']
output[-1]['llm_thought'] = v['llm_thought']
output[-1]['llm_observation'] = v['llm_observation']
output[-1]['final_state'] = v['final_state']
return output
def rollout(self, **args):
@ -50,6 +51,8 @@ class BaseAgent(object):
else: # Do a full round
while True:
for traj in self.rollout(**kwargs):
print(f"ID: {traj['instr_id']}")
print(self.results.keys())
if traj['instr_id'] in self.results:
looped = True
else:

View File

@ -143,7 +143,6 @@ class Simulator(object):
viewpoint_ID: str,
heading: int,
elevation: int,
stop: str,
start: str,
target: str
):
@ -151,7 +150,6 @@ class Simulator(object):
self.elevation = elevation
self.scan_ID = scan_ID
self.viewpoint_ID = viewpoint_ID
self.stop = stop
self.start = start
self.target = target
# Load navigable dict
@ -162,14 +160,14 @@ class Simulator(object):
self.navigable_dict = {}
for start, v in navigable_dict.items():
self.navigable_dict[start] = {}
print("BEFORE: ", len(navigable_dict[start]))
# print("BEFORE: ", len(navigable_dict[start]))
for to, _v in navigable_dict[start].items():
start_region = self.node_region[scan_ID][start]
to_region = self.node_region[scan_ID][to]
if start_region == to_region:
self.navigable_dict[start][to] = _v
print(start_region, to_region)
print("AFTER: ", len(self.navigable_dict[start]))
# print(start_region, to_region)
# print("AFTER: ", len(self.navigable_dict[start]))
# Get candidate
self.getCandidate()
@ -186,7 +184,6 @@ class Simulator(object):
'heading': self.heading,
'elevation': self.elevation,
'candidate': self.candidate,
'stop': self.stop,
'start': self.start,
'target': self.target
}
@ -233,9 +230,9 @@ class EnvBatch(object):
def _make_id(self, scanId, viewpointId):
return scanId + '_' + viewpointId
def newEpisodes(self, scanIds, viewpointIds, headings, stops, starts, targets):
for i, (scanId, viewpointId, heading, stop, start, target) in enumerate(zip(scanIds, viewpointIds, headings, stops, starts, targets)):
self.sims[i].newEpisode(scanId, viewpointId, heading, 0, stop, start, target)
def newEpisodes(self, scanIds, viewpointIds, headings, starts, targets):
for i, (scanId, viewpointId, heading, start, target) in enumerate(zip(scanIds, viewpointIds, headings, starts, targets)):
self.sims[i].newEpisode(scanId, viewpointId, heading, 0, start, target)
def getStates(self):
"""
@ -263,7 +260,7 @@ class REVERIENavBatch(object):
def __init__(
self, view_db, instr_data, connectivity_dir, navigable_dir,
batch_size=1, seed=0, name=None, data_limit=100
batch_size=1, seed=0, name=None
):
self.env = EnvBatch(navigable_dir, feat_db=view_db, batch_size=batch_size)
self.data = instr_data
@ -272,14 +269,15 @@ class REVERIENavBatch(object):
self.batch_size = batch_size
self.name = name
#self.gt_trajs = self._get_gt_trajs(self.data) # for evaluation
self.gt_trajs = self._get_gt_trajs(self.data) # for evaluation
# use different seeds in different processes to shuffle data
'''
self.seed = seed
random.seed(self.seed)
random.shuffle(self.data)
'''
self.data = self.data[:data_limit]
self.ix = 0
self._load_nav_graphs()
@ -288,14 +286,12 @@ class REVERIENavBatch(object):
print('%s loaded with %d instructions, using splits: %s' % (
self.__class__.__name__, len(self.data), self.name))
'''
def _get_gt_trajs(self, data):
gt_trajs = {
x['instr_id']: (x['scan'], x['path']) \
x['new_reverie_id']: (x['scan'], x['path']) \
for x in data if len(x['path']) > 1
}
return gt_trajs
'''
def size(self):
return len(self.data)
@ -350,7 +346,7 @@ class REVERIENavBatch(object):
'obs' : feature["detail"],
'obs_summary' : feature["summary"],
'objects' : feature["objects"],
'instr_id' : item['instr_id'],
# 'instr_id' : item['instr_id'],
# 'action_plan' : item['action_plan'],
'scan' : state['scanID'],
'viewpoint' : state['viewpointID'],
@ -360,8 +356,8 @@ class REVERIENavBatch(object):
'instruction' : item['instruction'],
'gt_path' : item['path'],
'path_id' : item['path_id'],
'stop': item['stop'],
'start': item['start'],
'new_reverie_id': item['new_reverie_id'],
'target': item['target']
}
# RL reward. The negative distance between the state and the final state
@ -384,10 +380,9 @@ class REVERIENavBatch(object):
scanIds = [item['scan'] for item in self.batch]
viewpointIds = [item['path'][0] for item in self.batch]
headings = [item['heading'] for item in self.batch]
stops = [item['stop'] for item in self.batch]
starts = [item['start'] for item in self.batch]
targets = [item['target'] for item in self.batch]
self.env.newEpisodes(scanIds, starts, headings, stops, starts, targets)
self.env.newEpisodes(scanIds, starts, headings, starts, targets)
return self._get_obs()
def step(self, next_viewpoint_IDs):
@ -412,7 +407,7 @@ class REVERIENavBatch(object):
shortest_distances = self.shortest_distances[scan]
path = sum(pred_path, [])
assert gt_path[0] == path[0], 'Result trajectories should include the start position'
# assert gt_path[0] == path[0], 'Result trajectories should include the start position'
nearest_position = self._get_nearest(shortest_distances, gt_path[-1], path)
@ -426,7 +421,7 @@ class REVERIENavBatch(object):
gt_lengths = np.sum([shortest_distances[a][b] for a, b in zip(gt_path[:-1], gt_path[1:])])
scores['success'] = float(scores['nav_error'] < ERROR_MARGIN)
scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
# scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
scores['oracle_success'] = float(scores['oracle_error'] < ERROR_MARGIN)
scores.update(
@ -459,7 +454,7 @@ class REVERIENavBatch(object):
'oracle_error': np.mean(metrics['oracle_error']),
'sr': np.mean(metrics['success']) * 100,
'oracle_sr': np.mean(metrics['oracle_success']) * 100,
'spl': np.mean(metrics['spl']) * 100,
# 'spl': np.mean(metrics['spl']) * 100,
'nDTW': np.mean(metrics['nDTW']) * 100,
'SDTW': np.mean(metrics['SDTW']) * 100,
'CLS': np.mean(metrics['CLS']) * 100,

View File

@ -7,8 +7,8 @@ def parse_args():
# datasets
parser.add_argument('--root_dir', type=str, default='../datasets')
parser.add_argument('--dataset', type=str, default='r2r', choices=['r2r', 'r4r'])
parser.add_argument('--output_dir', type=str, default='../datasets/R2R/exprs/gpt-3.5-turbo', help='experiment id')
parser.add_argument('--dataset', type=str, default='reverie', choices=['r2r', 'r4r', 'reverie'])
parser.add_argument('--output_dir', type=str, default='../datasets/REVERIE/exprs/gpt-3.5-turbo', help='experiment id')
# parser.add_argument('--output_dir', type=str, default='../datasets/R2R/exprs/LlaMA-2-13b-test', help='experiment id')
parser.add_argument('--seed', type=int, default=0)
@ -21,7 +21,7 @@ def parse_args():
parser.add_argument('--max_iterations', type=int, default=25)
# General config
parser.add_argument('--iters', type=int, default=10, help='number of iterations to run')
parser.add_argument('--iters', type=int, default=None, help='number of iterations to run')
# parser.add_argument('--iters', type=int, default=None, help='number of iterations to run')
parser.add_argument('--max_scratchpad_length', type=int, default=1000, help='max number of steps in an episode')
parser.add_argument('--test', action='store_true', default=False)

View File

@ -244,15 +244,16 @@ Instruction: {action_plan}
Initial Observation: {init_observation}
Thought: I should start navigation according to the instruction, {agent_scratchpad}"""
VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate an indoor environment to reach a target viewpoint based on a given instruction, performing the Vision and Language Navigation (VLN) task. You'll move among static positions within a pre-defined graph, aiming for minimal steps.
VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate an indoor environment to reach a target viewpoint based on a given instruction, performing the Vision and Language Navigation (VLN) task. The instruction may be either feasible or infeasible (i.e., the specified object might not be found in the environment). You will move among static positions within a pre-defined graph, aiming for the nearest position to the object if the object is present.
You will receive a trajectory instruction at the start and will have access to step history (your Thought, Action, Action Input and Obeservation after the Begin! sign) and current viewpoint observation (including scene descriptions, objects, and navigable directions/distances within 3 meters) during navigation. Orientations range from -180 to 180 degrees, with 0 being forward, right 90 rightward, right/left 180 backward, and left 90 leftward.
Explore the environment and don't stay at the original point. Keep Walking! Reach within 3 meters of the instructed destination, and if it's visible but no objects are detected, move closer.
If you find the object but I haven't said you can stop. You cannot say you have finished the task! Keep exploring the nearby area.
continue by considering your location and the next viewpoint based on the instruction, using the action_maker tool.
Explore the environment while avoiding revisiting viewpoints by comparing current and previously visited IDs. Reach the instructed destination as closely as possible. The task will fail if you do not reach within 3 meters of the instructed destination, even if it is observable. Therefore, if the destination is visible but you do not see the object within 3 meters, move closer.
At each step, determine if you've reached the destination(If the object is more than three meters away from you, you are not considered to have reached the destination).
If yes, stop and output 'Final Answer: Finished!'.
If not, continue by considering your location and the next viewpoint based on the instruction, using the action_maker tool.
And if you explored all room, you think this object doesn't exist in this room. stop and output 'Final Answer: Not found!'.
If you find another room seems to be the closest match to the instruction but no viewpoint can access to this room. Please output "Final Answer: Not found!"
Show your reasoning in the Thought section.
Follow the given format and use provided tools.
@ -266,12 +267,19 @@ Instruction: the instruction describing the whole trajectory
Initial Observation: the initial observation of the environment
Thought: you should always think about what to do next and why
Action: the action to take, must be one of the tools [{tool_names}]
Action Input: "Viewpoint ID" but do not stay in the original viewpoint
Action Input: "Viewpoint ID"
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I have reached the destination, I can stop.
Final Answer: Finished!
or
Thought: I cannot find the object in this room, I should stop.
Final Answer: Not found!
----
Begin!
Instruction: {action_plan}
Initial Observation: {init_observation}
Thought: I should start navigation according to the instruction, {agent_scratchpad}"""