diff --git a/nav_src/agent.py b/nav_src/agent.py index a04868a..f24375d 100644 --- a/nav_src/agent.py +++ b/nav_src/agent.py @@ -402,6 +402,7 @@ class NavGPTAgent(BaseAgent): rel_viewpoint_heading = normalize_angle(rel_viewpoint_heading) rel_viewpoint_heading = angle_to_left_right(rel_viewpoint_heading) vp_description = rel_viewpoint_heading + f', {viewpoint_data["distance"]:.2f}m' + vp_description = vp_description + f', {viewpoint_data["wall_distance"]:.2f}m to the wall' # rel_range_idx = (vp_range_idx - range_idx) % 8 candidate_range.setdefault(vp_range_idx, {}).update({viewpoint_id: vp_description}) @@ -491,6 +492,8 @@ class NavGPTAgent(BaseAgent): # Get current observation cur_obs = self.env._get_obs()[0] + print(cur_obs) + # Get current feature feature = cur_obs['obs'] heading = np.rad2deg(cur_obs['heading']) @@ -498,9 +501,14 @@ class NavGPTAgent(BaseAgent): objects = cur_obs['objects'] orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}' navigable = cur_obs['candidate'] - if self.config.use_relative_angle: + + for vp, data in navigable.items(): + data['wall_distance'] = distances[cur_obs['scan']][cur_obs['viewpoint']][vp] + print(data['wall_distance']) + + if self.config.use_relative_angle: # True feature = self.modify_heading_angles(heading, feature, navigable, objects) - if self.config.use_navigable: + if self.config.use_navigable: # False navigable = self.get_navigable_str(heading, elevation, navigable) if self.config.use_tool_chain: @@ -537,6 +545,11 @@ class NavGPTAgent(BaseAgent): new_objects = new_obs['objects'] new_heading = np.rad2deg(new_obs['heading']) new_elevation = np.rad2deg(new_obs['elevation']) + + for vp, data in new_navigable.items(): + data['wall_distance'] = distances[new_obs['scan']][new_obs['viewpoint']][vp] + print(data['wall_distance']) + if self.config.use_relative_angle: new_feature = self.modify_heading_angles(new_heading, new_feature, new_navigable, new_objects) new_orientation = f'\nheading: {new_heading:.2f}, elevation: {new_elevation:.2f}' @@ -619,6 +632,14 @@ class NavGPTAgent(BaseAgent): heading = np.rad2deg(cur_obs['heading']) elevation = np.rad2deg(cur_obs['elevation']) orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}' + + + for vp, data in navigable.items(): + data['wall_distance'] = distances[cur_obs['scan']][cur_obs['viewpoint']][vp] + print(data['wall_distance']) + + + if self.config.use_relative_angle: feature = self.modify_heading_angles(heading, feature, navigable, objects) if self.config.use_navigable: @@ -652,6 +673,12 @@ class NavGPTAgent(BaseAgent): new_heading = np.rad2deg(new_obs['heading']) new_elevation = np.rad2deg(new_obs['elevation']) new_orientation = f'\nheading: {new_heading:.2f}, elevation: {new_elevation:.2f}' + + + for vp, data in new_navigable.items(): + data['wall_distance'] = distances[new_obs['scan']][new_obs['viewpoint']][vp] + print(data['wall_distance']) + if self.config.use_relative_angle: new_feature = self.modify_heading_angles(new_heading, new_feature, new_navigable, new_objects) if self.config.use_navigable: @@ -882,11 +909,16 @@ class NavGPTAgent(BaseAgent): # we are HERE feature = init_ob['obs'] navigable = init_ob['candidate'] + # distances = objects = init_ob['objects'] heading = np.rad2deg(init_ob['heading']) elevation = np.rad2deg(init_ob['elevation']) orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}' + for vp, data in navigable.items(): + data['wall_distance'] = distances[init_ob['scan']][init_ob['viewpoint']][vp] + print(data['wall_distance']) + print("use_relative_angle:", self.config.use_relative_angle) print("use_relative_angle:", self.config.use_navigable) if self.config.use_relative_angle: # True diff --git a/nav_src/prompt/planner_prompt.py b/nav_src/prompt/planner_prompt.py index dd09f4f..a423395 100644 --- a/nav_src/prompt/planner_prompt.py +++ b/nav_src/prompt/planner_prompt.py @@ -244,7 +244,7 @@ Instruction: {action_plan} Initial Observation: {init_observation} Thought: I should start navigation according to the instruction, {agent_scratchpad}""" -VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate in an indoor environment to reach a target viewpoint based on a given instruction, performing the Vision and Language Navigation (VLN) task. +VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate in an indoor environment to reach a target viewpoint to find the object based on a given instruction, performing the Vision and Language Navigation (VLN) task. The instruction will let you find all the target objects in a room. You should have a good stratedy to check all the object in the shortest path in the room. @@ -254,9 +254,14 @@ You will move among static positions within a pre-defined graph, aiming for the You will receive a trajectory instruction at the start and will have access to step history (your Thought, Action, Action Input and Obeservation after the Begin! sign) and current viewpoint observation (including scene descriptions, objects, and navigable directions/distances within 3 meters) during navigation. Orientations range from -180 to 180 degrees, with 0 being forward, right 90 rightward, right/left 180 backward, and left 90 leftward. +And we will calculate how many meters extend in the direction of each viewpoint before hitting a wall. We hope this distance information can help you understand the spatial layout of the room. Please plan an effective exploration strategy based on this distance information. + +For example, if I have 2 viewpoints to choose (A: 1m, B: 5m) but I cannot find the target object so I better choose viewpoint B because I may have more exploration space to find the target. + + Explore the environment while avoiding revisiting viewpoints by comparing current and previously visited IDs and the most important thing is that you should not leave the room so you better not move closed to the door. -Notice: You should have a good strategy to check whether the target object exists in this room. +Notice: You should have a good strategy to check whether the target object exists in this room, and stop when you exploring all viewpoint in this room. If you think you are moving in circles, please stop and think whether any other objects may be hiden. If no, please output 'Final Answer: Not found'. @@ -269,13 +274,13 @@ Follow the given format and use provided tools. Do not fabricate nonexistent viewpoint IDs. ---- -Starting below, you should follow this format: +Starting below, you should follow this format, do not use other format: Instruction: the instruction describing the whole trajectory Initial Observation: the initial observation of the environment Thought: you should always think about what to do next and why Action: the action to take, must be one of the tools [{tool_names}] -Action Input: "Viewpoint ID" +Action Input: "Viewpoint ID", you should not choose object name or others, please only output "Viewpoint ID" Observation: the result of the action ... (this Thought/Action/Action Input/Observation can repeat N times) Thought: I found my target object, but I should check whether any other objects may be hidden.