feat: with our module 'distance module'

2025-01-12 14:31:42 +08:00 · 2025-01-12 14:31:42 +08:00 · d00b84432e
commit d00b84432e
parent bc6cb9a9f8
2 changed files with 43 additions and 6 deletions
--- a/nav_src/agent.py
+++ b/nav_src/agent.py
@ -402,6 +402,7 @@ class NavGPTAgent(BaseAgent):
                rel_viewpoint_heading = normalize_angle(rel_viewpoint_heading)
                rel_viewpoint_heading = angle_to_left_right(rel_viewpoint_heading)
                vp_description = rel_viewpoint_heading + f', {viewpoint_data["distance"]:.2f}m'
                vp_description = vp_description + f', {viewpoint_data["wall_distance"]:.2f}m to the wall'
                # rel_range_idx = (vp_range_idx - range_idx) % 8
                candidate_range.setdefault(vp_range_idx, {}).update({viewpoint_id: vp_description})
@ -491,6 +492,8 @@ class NavGPTAgent(BaseAgent):
            # Get current observation
            cur_obs = self.env._get_obs()[0]
            print(cur_obs)
            # Get current feature
            feature = cur_obs['obs']
            heading = np.rad2deg(cur_obs['heading'])
@ -498,9 +501,14 @@ class NavGPTAgent(BaseAgent):
            objects = cur_obs['objects']
            orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}'
            navigable = cur_obs['candidate']
-            if self.config.use_relative_angle:
+
            for vp, data in navigable.items():
                data['wall_distance'] = distances[cur_obs['scan']][cur_obs['viewpoint']][vp]
                print(data['wall_distance'])
            if self.config.use_relative_angle: # True
                feature = self.modify_heading_angles(heading, feature, navigable, objects)
-            if self.config.use_navigable:
+            if self.config.use_navigable:   # False
                navigable = self.get_navigable_str(heading, elevation, navigable)
            if self.config.use_tool_chain:
@ -537,6 +545,11 @@ class NavGPTAgent(BaseAgent):
            new_objects = new_obs['objects']
            new_heading = np.rad2deg(new_obs['heading'])
            new_elevation = np.rad2deg(new_obs['elevation'])
            for vp, data in new_navigable.items():
                data['wall_distance'] = distances[new_obs['scan']][new_obs['viewpoint']][vp]
                print(data['wall_distance'])
            if self.config.use_relative_angle:
                new_feature = self.modify_heading_angles(new_heading, new_feature, new_navigable, new_objects)
            new_orientation = f'\nheading: {new_heading:.2f}, elevation: {new_elevation:.2f}'
@ -619,6 +632,14 @@ class NavGPTAgent(BaseAgent):
            heading = np.rad2deg(cur_obs['heading'])
            elevation = np.rad2deg(cur_obs['elevation'])
            orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}'
            for vp, data in navigable.items():
                data['wall_distance'] = distances[cur_obs['scan']][cur_obs['viewpoint']][vp]
                print(data['wall_distance'])
            if self.config.use_relative_angle:
                feature = self.modify_heading_angles(heading, feature, navigable, objects)
            if self.config.use_navigable:
@ -652,6 +673,12 @@ class NavGPTAgent(BaseAgent):
            new_heading = np.rad2deg(new_obs['heading'])
            new_elevation = np.rad2deg(new_obs['elevation'])
            new_orientation = f'\nheading: {new_heading:.2f}, elevation: {new_elevation:.2f}'
            for vp, data in new_navigable.items():
                data['wall_distance'] = distances[new_obs['scan']][new_obs['viewpoint']][vp]
                print(data['wall_distance'])
            if self.config.use_relative_angle:
                new_feature = self.modify_heading_angles(new_heading, new_feature, new_navigable, new_objects)
            if self.config.use_navigable:
@ -882,11 +909,16 @@ class NavGPTAgent(BaseAgent):
                # we are HERE
                feature = init_ob['obs']
                navigable = init_ob['candidate']
                # distances = 
                objects = init_ob['objects']
                heading = np.rad2deg(init_ob['heading'])
                elevation = np.rad2deg(init_ob['elevation'])
                orientation = f'\nheading: {heading:.2f}, elevation: {elevation:.2f}'
                for vp, data in navigable.items():
                    data['wall_distance'] = distances[init_ob['scan']][init_ob['viewpoint']][vp]
                    print(data['wall_distance'])
                print("use_relative_angle:", self.config.use_relative_angle)
                print("use_relative_angle:", self.config.use_navigable)
                if self.config.use_relative_angle:      # True
--- a/nav_src/prompt/planner_prompt.py
+++ b/nav_src/prompt/planner_prompt.py
@ -244,7 +244,7 @@ Instruction: {action_plan}
 Initial Observation: {init_observation}
 Thought: I should start navigation according to the instruction, {agent_scratchpad}"""
-VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate in an indoor environment to reach a target viewpoint based on a given instruction, performing the Vision and Language Navigation (VLN) task.
+VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate in an indoor environment to reach a target viewpoint to find the object based on a given instruction, performing the Vision and Language Navigation (VLN) task.
 The instruction will let you find all the target objects in a room. You should have a good stratedy to check all the object in the shortest path in the room.
@ -254,9 +254,14 @@ You will move among static positions within a pre-defined graph, aiming for the
 You will receive a trajectory instruction at the start and will have access to step history (your Thought, Action, Action Input and Obeservation after the Begin! sign) and current viewpoint observation (including scene descriptions, objects, and navigable directions/distances within 3 meters) during navigation. Orientations range from -180 to 180 degrees, with 0 being forward, right 90 rightward, right/left 180 backward, and left 90 leftward.
 And we will calculate how many meters extend in the direction of each viewpoint before hitting a wall. We hope this distance information can help you understand the spatial layout of the room. Please plan an effective exploration strategy based on this distance information.
 For example, if I have 2 viewpoints to choose (A: 1m, B: 5m) but I cannot find the target object so I better choose viewpoint B because I may have more exploration space to find the target.
 Explore the environment while avoiding revisiting viewpoints by comparing current and previously visited IDs and the most important thing is that you should not leave the room so you better not move closed to the door. 
-Notice: You should have a good strategy to check whether the target object exists in this room.
+Notice: You should have a good strategy to check whether the target object exists in this room, and stop when you exploring all viewpoint in this room.
 If you think you are moving in circles, please stop and think whether any other objects may be hiden. If no, please output 'Final Answer: Not found'.
@ -269,13 +274,13 @@ Follow the given format and use provided tools.
 Do not fabricate nonexistent viewpoint IDs.
 ----
-Starting below, you should follow this format:
+Starting below, you should follow this format, do not use other format:
 Instruction: the instruction describing the whole trajectory
 Initial Observation: the initial observation of the environment
 Thought: you should always think about what to do next and why
 Action: the action to take, must be one of the tools [{tool_names}]
-Action Input: "Viewpoint ID"
+Action Input: "Viewpoint ID", you should not choose object name or others, please only output "Viewpoint ID"
 Observation: the result of the action
 ... (this Thought/Action/Action Input/Observation can repeat N times)
 Thought: I found my target object, but I should check whether any other objects may be hidden.