Compare commits
2 Commits
master
...
random-suc
| Author | SHA1 | Date | |
|---|---|---|---|
| 82e2c7e053 | |||
| 68330c5163 |
151
nav_src/agent.py
151
nav_src/agent.py
@ -1,4 +1,5 @@
|
|||||||
"""Agent that interacts with Matterport3D simulator via a hierarchical planning approach."""
|
"""Agent that interacts with Matterport3D simulator via a hierarchical planning approach."""
|
||||||
|
import random
|
||||||
import json
|
import json
|
||||||
import yaml
|
import yaml
|
||||||
import re
|
import re
|
||||||
@ -49,6 +50,7 @@ FINAL_STOP_POINT = ""
|
|||||||
SUCCESS = 0
|
SUCCESS = 0
|
||||||
TEMP_STEPS_COUNTER = 0
|
TEMP_STEPS_COUNTER = 0
|
||||||
STEPS_COUNTER = 0
|
STEPS_COUNTER = 0
|
||||||
|
NOW_LOCATION = None
|
||||||
|
|
||||||
MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE = (
|
MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE = (
|
||||||
"Invalid Format: Missing 'Action:' after 'Thought:"
|
"Invalid Format: Missing 'Action:' after 'Thought:"
|
||||||
@ -71,16 +73,17 @@ class NavGPTOutputParser(AgentOutputParser):
|
|||||||
global STEPS_COUNTER
|
global STEPS_COUNTER
|
||||||
global TEMP_STEPS_COUNTER
|
global TEMP_STEPS_COUNTER
|
||||||
global SUCCESS
|
global SUCCESS
|
||||||
# includes_answer = FINAL_ANSWER_ACTION in text
|
global NOW_LOCATION
|
||||||
|
includes_answer = FINAL_ANSWER_ACTION in text
|
||||||
regex = (
|
regex = (
|
||||||
r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*\"?([a-fA-F0-9]{32})\"?"
|
r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*\"?([a-fA-F0-9]{32})\"?"
|
||||||
)
|
)
|
||||||
action_match = re.search(regex, text, re.DOTALL)
|
action_match = re.search(regex, text, re.DOTALL)
|
||||||
if action_match:
|
if action_match:
|
||||||
# if includes_answer:
|
if includes_answer:
|
||||||
# raise OutputParserException(
|
raise OutputParserException(
|
||||||
# f"{FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE}: {text}"
|
f"{FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE}: {text}"
|
||||||
# )
|
)
|
||||||
action = action_match.group(1).strip()
|
action = action_match.group(1).strip()
|
||||||
action_input = action_match.group(2)
|
action_input = action_match.group(2)
|
||||||
tool_input = action_input.strip(" ")
|
tool_input = action_input.strip(" ")
|
||||||
@ -92,13 +95,16 @@ class NavGPTOutputParser(AgentOutputParser):
|
|||||||
print("ACTION: ", action_input)
|
print("ACTION: ", action_input)
|
||||||
print(f"MY FINAL_STOP_POINT = {FINAL_STOP_POINT}")
|
print(f"MY FINAL_STOP_POINT = {FINAL_STOP_POINT}")
|
||||||
|
|
||||||
TEMP_STEPS_COUNTER += 1
|
# TEMP_STEPS_COUNTER += 1
|
||||||
print(f"TEMP_STEPS_COUNT = {TEMP_STEPS_COUNTER}")
|
print(f"TEMP_STEPS_COUNT = {TEMP_STEPS_COUNTER}")
|
||||||
print(f"STEPS_COUNT = {STEPS_COUNTER}")
|
print(f"STEPS_COUNT = {STEPS_COUNTER}")
|
||||||
print(f"SUCCESS = {SUCCESS}")
|
print(f"SUCCESS = {SUCCESS}")
|
||||||
|
|
||||||
|
NOW_LOCATION = tool_input
|
||||||
|
print(f"NOW_LOCATION = {NOW_LOCATION}")
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
if FINAL_STOP_POINT in text:
|
if FINAL_STOP_POINT in text:
|
||||||
STEPS_COUNTER += TEMP_STEPS_COUNTER
|
STEPS_COUNTER += TEMP_STEPS_COUNTER
|
||||||
SUCCESS += 1
|
SUCCESS += 1
|
||||||
@ -110,14 +116,22 @@ class NavGPTOutputParser(AgentOutputParser):
|
|||||||
return AgentFinish(
|
return AgentFinish(
|
||||||
{"output": action_input}, text
|
{"output": action_input}, text
|
||||||
)
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
return AgentAction(action, tool_input, text)
|
return AgentAction(action, tool_input, text)
|
||||||
'''
|
|
||||||
elif includes_answer:
|
elif includes_answer:
|
||||||
|
if NOW_LOCATION == FINAL_STOP_POINT:
|
||||||
|
SUCCESS += 1
|
||||||
|
print(f"SUCCESS = {SUCCESS}")
|
||||||
|
else:
|
||||||
|
|
||||||
|
print("NOT SUCCESS")
|
||||||
|
print(f"{NOW_LOCATION}_{type(NOW_LOCATION)}")
|
||||||
|
print(f"{FINAL_STOP_POINT}_{type(FINAL_STOP_POINT)}")
|
||||||
|
print(f"SUCCESS = {SUCCESS}")
|
||||||
return AgentFinish(
|
return AgentFinish(
|
||||||
{"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
|
{"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
|
||||||
)
|
)
|
||||||
'''
|
|
||||||
|
|
||||||
if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
|
if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
|
||||||
raise OutputParserException(
|
raise OutputParserException(
|
||||||
@ -698,122 +712,29 @@ class NavGPTAgent(BaseAgent):
|
|||||||
obs = self.env._get_obs()
|
obs = self.env._get_obs()
|
||||||
|
|
||||||
global FINAL_STOP_POINT
|
global FINAL_STOP_POINT
|
||||||
global TEMP_STEPS_COUNTER
|
global SUCCESS
|
||||||
|
|
||||||
FINAL_STOP_POINT = obs[0]['stop']
|
FINAL_STOP_POINT = obs[0]['stop']
|
||||||
|
|
||||||
if TEMP_STEPS_COUNTER != 0:
|
|
||||||
TEMP_STEPS_COUNTER = 0
|
|
||||||
|
|
||||||
print(f"HAVE SET FINAL_STOP_POINT = {FINAL_STOP_POINT}")
|
|
||||||
|
|
||||||
print(len(obs))
|
|
||||||
|
|
||||||
print(obs[0].keys())
|
|
||||||
print(obs[0]['obs'])
|
|
||||||
print(obs[0]['obs_summary'])
|
|
||||||
print(obs[0]['objects'])
|
|
||||||
print(obs[0]['instr_id'])
|
|
||||||
print(obs[0]['scan'])
|
|
||||||
print(obs[0]['viewpoint'])
|
|
||||||
print(obs[0]['heading'])
|
|
||||||
print(obs[0]['elevation'])
|
|
||||||
print(obs[0]['candidate'])
|
|
||||||
print(obs[0]['instruction'])
|
|
||||||
print(obs[0]['gt_path'])
|
|
||||||
print(obs[0]['path_id'])
|
|
||||||
print(obs[0]['stop'])
|
|
||||||
print(obs[0]['start'])
|
|
||||||
print(obs[0]['target'])
|
|
||||||
|
|
||||||
|
|
||||||
print("==")
|
|
||||||
|
|
||||||
# Initialize the trajectory
|
# Initialize the trajectory
|
||||||
self.init_trajecotry(obs)
|
self.init_trajecotry(obs)
|
||||||
|
|
||||||
# Load the instruction
|
print(obs[0].keys())
|
||||||
# instructions = [ob['instruction'] for ob in obs]
|
print(obs[0]['start'])
|
||||||
targets = [ob['target'] for ob in obs]
|
print(obs[0]['stop'])
|
||||||
|
print(obs[0]['target'])
|
||||||
|
candidates = self.env.env.sims[0].getNodesInTheRoom()
|
||||||
|
candidates.remove(obs[0]['start'])
|
||||||
|
|
||||||
|
next_point = random.choice(candidates)
|
||||||
|
print(next_point)
|
||||||
|
|
||||||
|
|
||||||
print(self.config.load_instruction)
|
if next_point == FINAL_STOP_POINT:
|
||||||
print(self.config.load_action_plan)
|
print(" SUCCESS")
|
||||||
|
SUCCESS += 1
|
||||||
|
|
||||||
if self.config.load_instruction:
|
print(f"SUCCESS={SUCCESS}")
|
||||||
# action_plans = instructions
|
|
||||||
action_plans = targets
|
|
||||||
elif self.config.load_action_plan:
|
|
||||||
action_plans = [ob['action_plan'] for ob in obs]
|
|
||||||
else:
|
|
||||||
action_plans = []
|
|
||||||
for instruction in instructions:
|
|
||||||
action_plan = self.plan_chain.run(instruction = instruction)
|
|
||||||
action_plans.append(action_plan)
|
|
||||||
print(action_plans)
|
|
||||||
|
|
||||||
for i, init_ob in enumerate(obs):
|
|
||||||
|
|
||||||
# for our work
|
|
||||||
# cur_action_plan is "target object with its location"
|
|
||||||
self.cur_action_plan = action_plans[i]
|
|
||||||
|
|
||||||
print("use_tool_chain:", self.config.use_tool_chain)
|
|
||||||
|
|
||||||
# Take the first action
|
|
||||||
if self.config.use_tool_chain: # we will not HERE
|
|
||||||
first_obs = self.action_maker('')
|
|
||||||
input = {
|
|
||||||
'action_plan': self.cur_action_plan,
|
|
||||||
'init_observation': init_ob['obs_summary'],
|
|
||||||
'observation': first_obs,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# Get current feature
|
|
||||||
|
|
||||||
# we are HERE
|
|
||||||
feature = init_ob['obs']
|
|
||||||
navigable = init_ob['candidate']
|
|
||||||
objects = init_ob['objects']
|
|
||||||
heading = np.rad2deg(init_ob['heading'])
|
|
||||||
elevation = np.rad2deg(init_ob['elevation'])
|
|
||||||
orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}'
|
|
||||||
|
|
||||||
print("use_relative_angle:", self.config.use_relative_angle)
|
|
||||||
print("use_relative_angle:", self.config.use_navigable)
|
|
||||||
if self.config.use_relative_angle: # True
|
|
||||||
feature = self.modify_heading_angles(heading, feature, navigable, objects)
|
|
||||||
if self.config.use_navigable: # False
|
|
||||||
navigable = self.get_navigable_str(heading, elevation, navigable)
|
|
||||||
|
|
||||||
if self.config.use_relative_angle:
|
|
||||||
if self.config.use_navigable:
|
|
||||||
init_observation = f"\n\tCurrent Viewpoint:\n{feature}\n\tNavigable Viewpoints:\n{navigable}"
|
|
||||||
else:
|
|
||||||
init_observation = f"\n\tCurrent Viewpoint:\n{feature}"
|
|
||||||
else:
|
|
||||||
if self.config.use_navigable:
|
|
||||||
init_observation = f"\n\tCurrent Orientation:\n{orientation}\n\tCurrent Viewpoint:\n{feature}\n\tNavigable Viewpoints:\n{navigable}"
|
|
||||||
else:
|
|
||||||
init_observation = f"\n\tCurrent Orientation:\n{orientation}\n\tCurrent Viewpoint:\n{feature}"
|
|
||||||
|
|
||||||
|
|
||||||
input = {
|
|
||||||
'action_plan': self.cur_action_plan, # here will be "object & its location" in our work
|
|
||||||
'init_observation': init_observation, # 8 direction's observation caption & navigable point & objects
|
|
||||||
}
|
|
||||||
output = self.agent_executor(input)
|
|
||||||
|
|
||||||
self.traj[i]['llm_output'] = output['output']
|
|
||||||
self.traj[i]['action_plan'] = output['action_plan']
|
|
||||||
# extract agent's thought from llm output
|
|
||||||
intermediate_steps = output['intermediate_steps']
|
|
||||||
self.traj[i]['llm_thought'] = []
|
|
||||||
self.traj[i]['llm_observation'] = []
|
|
||||||
for action, observation in intermediate_steps:
|
|
||||||
thought = action.log
|
|
||||||
self.traj[i]['llm_thought'].append(thought)
|
|
||||||
self.traj[i]['llm_observation'].append(observation)
|
|
||||||
|
|
||||||
return self.traj
|
return self.traj
|
||||||
|
|
||||||
|
|||||||
@ -38,15 +38,18 @@ class BaseAgent(object):
|
|||||||
if iters is not None:
|
if iters is not None:
|
||||||
# For each time, it will run the first 'iters' iterations. (It was shuffled before)
|
# For each time, it will run the first 'iters' iterations. (It was shuffled before)
|
||||||
for i in range(iters):
|
for i in range(iters):
|
||||||
|
print(i)
|
||||||
for traj in self.rollout(**kwargs):
|
for traj in self.rollout(**kwargs):
|
||||||
self.loss = 0
|
self.loss = 0
|
||||||
self.results[traj['instr_id']] = traj
|
self.results[traj['instr_id']] = traj
|
||||||
|
'''
|
||||||
preds_detail = self.get_results(detailed_output=True)
|
preds_detail = self.get_results(detailed_output=True)
|
||||||
json.dump(
|
json.dump(
|
||||||
preds_detail,
|
preds_detail,
|
||||||
open(os.path.join(self.config.log_dir, 'runtime.json'), 'w'),
|
open(os.path.join(self.config.log_dir, 'runtime.json'), 'w'),
|
||||||
sort_keys=True, indent=4, separators=(',', ': ')
|
sort_keys=True, indent=4, separators=(',', ': ')
|
||||||
)
|
)
|
||||||
|
'''
|
||||||
else: # Do a full round
|
else: # Do a full round
|
||||||
while True:
|
while True:
|
||||||
for traj in self.rollout(**kwargs):
|
for traj in self.rollout(**kwargs):
|
||||||
|
|||||||
@ -162,17 +162,26 @@ class Simulator(object):
|
|||||||
self.navigable_dict = {}
|
self.navigable_dict = {}
|
||||||
for start, v in navigable_dict.items():
|
for start, v in navigable_dict.items():
|
||||||
self.navigable_dict[start] = {}
|
self.navigable_dict[start] = {}
|
||||||
print("BEFORE: ", len(navigable_dict[start]))
|
# print("BEFORE: ", len(navigable_dict[start]))
|
||||||
for to, _v in navigable_dict[start].items():
|
for to, _v in navigable_dict[start].items():
|
||||||
start_region = self.node_region[scan_ID][start]
|
start_region = self.node_region[scan_ID][start]
|
||||||
to_region = self.node_region[scan_ID][to]
|
to_region = self.node_region[scan_ID][to]
|
||||||
if start_region == to_region:
|
if start_region == to_region:
|
||||||
self.navigable_dict[start][to] = _v
|
self.navigable_dict[start][to] = _v
|
||||||
print(start_region, to_region)
|
# print(start_region, to_region)
|
||||||
print("AFTER: ", len(self.navigable_dict[start]))
|
# print("AFTER: ", len(self.navigable_dict[start]))
|
||||||
|
|
||||||
# Get candidate
|
# Get candidate
|
||||||
self.getCandidate()
|
self.getCandidate()
|
||||||
|
|
||||||
|
def getNodesInTheRoom(self):
|
||||||
|
ans = []
|
||||||
|
start_region = self.node_region[self.scan_ID][self.viewpoint_ID]
|
||||||
|
for node, region_id in self.node_region[self.scan_ID].items():
|
||||||
|
if region_id == start_region:
|
||||||
|
ans.append(node)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def updateGraph(self):
|
def updateGraph(self):
|
||||||
# build graph
|
# build graph
|
||||||
|
|||||||
@ -248,11 +248,11 @@ VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate an ind
|
|||||||
|
|
||||||
You will receive a trajectory instruction at the start and will have access to step history (your Thought, Action, Action Input and Obeservation after the Begin! sign) and current viewpoint observation (including scene descriptions, objects, and navigable directions/distances within 3 meters) during navigation. Orientations range from -180 to 180 degrees, with 0 being forward, right 90 rightward, right/left 180 backward, and left 90 leftward.
|
You will receive a trajectory instruction at the start and will have access to step history (your Thought, Action, Action Input and Obeservation after the Begin! sign) and current viewpoint observation (including scene descriptions, objects, and navigable directions/distances within 3 meters) during navigation. Orientations range from -180 to 180 degrees, with 0 being forward, right 90 rightward, right/left 180 backward, and left 90 leftward.
|
||||||
|
|
||||||
Explore the environment and don't stay at the original point. Keep Walking! Reach within 3 meters of the instructed destination, and if it's visible but no objects are detected, move closer.
|
Explore the environment while avoiding revisiting viewpoints by comparing current and previously visited IDs. Reach within 3 meters of the instructed destination, and if it's visible but no objects are detected, move closer.
|
||||||
|
|
||||||
If you find the object but I haven't said you can stop. You cannot say you have finished the task! Keep exploring the nearby area.
|
At each step, determine if you've reached the destination.
|
||||||
|
If yes, stop and output 'Final Answer: Finished!'.
|
||||||
continue by considering your location and the next viewpoint based on the instruction, using the action_maker tool.
|
If not, continue by considering your location and the next viewpoint based on the instruction, using the action_maker tool.
|
||||||
Show your reasoning in the Thought section.
|
Show your reasoning in the Thought section.
|
||||||
|
|
||||||
Follow the given format and use provided tools.
|
Follow the given format and use provided tools.
|
||||||
@ -266,8 +266,11 @@ Instruction: the instruction describing the whole trajectory
|
|||||||
Initial Observation: the initial observation of the environment
|
Initial Observation: the initial observation of the environment
|
||||||
Thought: you should always think about what to do next and why
|
Thought: you should always think about what to do next and why
|
||||||
Action: the action to take, must be one of the tools [{tool_names}]
|
Action: the action to take, must be one of the tools [{tool_names}]
|
||||||
Action Input: "Viewpoint ID" but do not stay in the original viewpoint
|
Action Input: "Viewpoint ID"
|
||||||
Observation: the result of the action
|
Observation: the result of the action
|
||||||
|
... (this Thought/Action/Action Input/Observation can repeat N times)
|
||||||
|
Thought: I have reached the destination, I can stop.
|
||||||
|
Final Answer: Finished!
|
||||||
----
|
----
|
||||||
|
|
||||||
Begin!
|
Begin!
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user