feat: with our module 'distance module'

This commit is contained in:
Ting-Jun Wang 2025-01-12 14:31:42 +08:00
parent bc6cb9a9f8
commit d00b84432e
Signed by: snsd0805
GPG Key ID: 48D331A3D6160354
2 changed files with 43 additions and 6 deletions

View File

@ -402,6 +402,7 @@ class NavGPTAgent(BaseAgent):
rel_viewpoint_heading = normalize_angle(rel_viewpoint_heading) rel_viewpoint_heading = normalize_angle(rel_viewpoint_heading)
rel_viewpoint_heading = angle_to_left_right(rel_viewpoint_heading) rel_viewpoint_heading = angle_to_left_right(rel_viewpoint_heading)
vp_description = rel_viewpoint_heading + f', {viewpoint_data["distance"]:.2f}m' vp_description = rel_viewpoint_heading + f', {viewpoint_data["distance"]:.2f}m'
vp_description = vp_description + f', {viewpoint_data["wall_distance"]:.2f}m to the wall'
# rel_range_idx = (vp_range_idx - range_idx) % 8 # rel_range_idx = (vp_range_idx - range_idx) % 8
candidate_range.setdefault(vp_range_idx, {}).update({viewpoint_id: vp_description}) candidate_range.setdefault(vp_range_idx, {}).update({viewpoint_id: vp_description})
@ -491,6 +492,8 @@ class NavGPTAgent(BaseAgent):
# Get current observation # Get current observation
cur_obs = self.env._get_obs()[0] cur_obs = self.env._get_obs()[0]
print(cur_obs)
# Get current feature # Get current feature
feature = cur_obs['obs'] feature = cur_obs['obs']
heading = np.rad2deg(cur_obs['heading']) heading = np.rad2deg(cur_obs['heading'])
@ -498,9 +501,14 @@ class NavGPTAgent(BaseAgent):
objects = cur_obs['objects'] objects = cur_obs['objects']
orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}' orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}'
navigable = cur_obs['candidate'] navigable = cur_obs['candidate']
if self.config.use_relative_angle:
for vp, data in navigable.items():
data['wall_distance'] = distances[cur_obs['scan']][cur_obs['viewpoint']][vp]
print(data['wall_distance'])
if self.config.use_relative_angle: # True
feature = self.modify_heading_angles(heading, feature, navigable, objects) feature = self.modify_heading_angles(heading, feature, navigable, objects)
if self.config.use_navigable: if self.config.use_navigable: # False
navigable = self.get_navigable_str(heading, elevation, navigable) navigable = self.get_navigable_str(heading, elevation, navigable)
if self.config.use_tool_chain: if self.config.use_tool_chain:
@ -537,6 +545,11 @@ class NavGPTAgent(BaseAgent):
new_objects = new_obs['objects'] new_objects = new_obs['objects']
new_heading = np.rad2deg(new_obs['heading']) new_heading = np.rad2deg(new_obs['heading'])
new_elevation = np.rad2deg(new_obs['elevation']) new_elevation = np.rad2deg(new_obs['elevation'])
for vp, data in new_navigable.items():
data['wall_distance'] = distances[new_obs['scan']][new_obs['viewpoint']][vp]
print(data['wall_distance'])
if self.config.use_relative_angle: if self.config.use_relative_angle:
new_feature = self.modify_heading_angles(new_heading, new_feature, new_navigable, new_objects) new_feature = self.modify_heading_angles(new_heading, new_feature, new_navigable, new_objects)
new_orientation = f'\nheading: {new_heading:.2f}, elevation: {new_elevation:.2f}' new_orientation = f'\nheading: {new_heading:.2f}, elevation: {new_elevation:.2f}'
@ -619,6 +632,14 @@ class NavGPTAgent(BaseAgent):
heading = np.rad2deg(cur_obs['heading']) heading = np.rad2deg(cur_obs['heading'])
elevation = np.rad2deg(cur_obs['elevation']) elevation = np.rad2deg(cur_obs['elevation'])
orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}' orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}'
for vp, data in navigable.items():
data['wall_distance'] = distances[cur_obs['scan']][cur_obs['viewpoint']][vp]
print(data['wall_distance'])
if self.config.use_relative_angle: if self.config.use_relative_angle:
feature = self.modify_heading_angles(heading, feature, navigable, objects) feature = self.modify_heading_angles(heading, feature, navigable, objects)
if self.config.use_navigable: if self.config.use_navigable:
@ -652,6 +673,12 @@ class NavGPTAgent(BaseAgent):
new_heading = np.rad2deg(new_obs['heading']) new_heading = np.rad2deg(new_obs['heading'])
new_elevation = np.rad2deg(new_obs['elevation']) new_elevation = np.rad2deg(new_obs['elevation'])
new_orientation = f'\nheading: {new_heading:.2f}, elevation: {new_elevation:.2f}' new_orientation = f'\nheading: {new_heading:.2f}, elevation: {new_elevation:.2f}'
for vp, data in new_navigable.items():
data['wall_distance'] = distances[new_obs['scan']][new_obs['viewpoint']][vp]
print(data['wall_distance'])
if self.config.use_relative_angle: if self.config.use_relative_angle:
new_feature = self.modify_heading_angles(new_heading, new_feature, new_navigable, new_objects) new_feature = self.modify_heading_angles(new_heading, new_feature, new_navigable, new_objects)
if self.config.use_navigable: if self.config.use_navigable:
@ -882,11 +909,16 @@ class NavGPTAgent(BaseAgent):
# we are HERE # we are HERE
feature = init_ob['obs'] feature = init_ob['obs']
navigable = init_ob['candidate'] navigable = init_ob['candidate']
# distances =
objects = init_ob['objects'] objects = init_ob['objects']
heading = np.rad2deg(init_ob['heading']) heading = np.rad2deg(init_ob['heading'])
elevation = np.rad2deg(init_ob['elevation']) elevation = np.rad2deg(init_ob['elevation'])
orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}' orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}'
for vp, data in navigable.items():
data['wall_distance'] = distances[init_ob['scan']][init_ob['viewpoint']][vp]
print(data['wall_distance'])
print("use_relative_angle:", self.config.use_relative_angle) print("use_relative_angle:", self.config.use_relative_angle)
print("use_relative_angle:", self.config.use_navigable) print("use_relative_angle:", self.config.use_navigable)
if self.config.use_relative_angle: # True if self.config.use_relative_angle: # True

View File

@ -244,7 +244,7 @@ Instruction: {action_plan}
Initial Observation: {init_observation} Initial Observation: {init_observation}
Thought: I should start navigation according to the instruction, {agent_scratchpad}""" Thought: I should start navigation according to the instruction, {agent_scratchpad}"""
VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate in an indoor environment to reach a target viewpoint based on a given instruction, performing the Vision and Language Navigation (VLN) task. VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate in an indoor environment to reach a target viewpoint to find the object based on a given instruction, performing the Vision and Language Navigation (VLN) task.
The instruction will let you find all the target objects in a room. You should have a good stratedy to check all the object in the shortest path in the room. The instruction will let you find all the target objects in a room. You should have a good stratedy to check all the object in the shortest path in the room.
@ -254,9 +254,14 @@ You will move among static positions within a pre-defined graph, aiming for the
You will receive a trajectory instruction at the start and will have access to step history (your Thought, Action, Action Input and Obeservation after the Begin! sign) and current viewpoint observation (including scene descriptions, objects, and navigable directions/distances within 3 meters) during navigation. Orientations range from -180 to 180 degrees, with 0 being forward, right 90 rightward, right/left 180 backward, and left 90 leftward. You will receive a trajectory instruction at the start and will have access to step history (your Thought, Action, Action Input and Obeservation after the Begin! sign) and current viewpoint observation (including scene descriptions, objects, and navigable directions/distances within 3 meters) during navigation. Orientations range from -180 to 180 degrees, with 0 being forward, right 90 rightward, right/left 180 backward, and left 90 leftward.
And we will calculate how many meters extend in the direction of each viewpoint before hitting a wall. We hope this distance information can help you understand the spatial layout of the room. Please plan an effective exploration strategy based on this distance information.
For example, if I have 2 viewpoints to choose (A: 1m, B: 5m) but I cannot find the target object so I better choose viewpoint B because I may have more exploration space to find the target.
Explore the environment while avoiding revisiting viewpoints by comparing current and previously visited IDs and the most important thing is that you should not leave the room so you better not move closed to the door. Explore the environment while avoiding revisiting viewpoints by comparing current and previously visited IDs and the most important thing is that you should not leave the room so you better not move closed to the door.
Notice: You should have a good strategy to check whether the target object exists in this room. Notice: You should have a good strategy to check whether the target object exists in this room, and stop when you exploring all viewpoint in this room.
If you think you are moving in circles, please stop and think whether any other objects may be hiden. If no, please output 'Final Answer: Not found'. If you think you are moving in circles, please stop and think whether any other objects may be hiden. If no, please output 'Final Answer: Not found'.
@ -269,13 +274,13 @@ Follow the given format and use provided tools.
Do not fabricate nonexistent viewpoint IDs. Do not fabricate nonexistent viewpoint IDs.
---- ----
Starting below, you should follow this format: Starting below, you should follow this format, do not use other format:
Instruction: the instruction describing the whole trajectory Instruction: the instruction describing the whole trajectory
Initial Observation: the initial observation of the environment Initial Observation: the initial observation of the environment
Thought: you should always think about what to do next and why Thought: you should always think about what to do next and why
Action: the action to take, must be one of the tools [{tool_names}] Action: the action to take, must be one of the tools [{tool_names}]
Action Input: "Viewpoint ID" Action Input: "Viewpoint ID", you should not choose object name or others, please only output "Viewpoint ID"
Observation: the result of the action Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times) ... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I found my target object, but I should check whether any other objects may be hidden. Thought: I found my target object, but I should check whether any other objects may be hidden.