fix: EMNLP's prompts

update
feat: evaluation in result
2024-06-16 23:43:47 +08:00 · 2024-06-10 18:52:39 +08:00 · 2024-05-06 16:42:40 +08:00 · 2024-05-06 16:41:07 +08:00 · 2024-05-05 23:02:06 +08:00
7 changed files with 100 additions and 53 deletions
--- a/datasets/REVERIE/annotations/to_instr_type.py
+++ b/datasets/REVERIE/annotations/to_instr_type.py
@ -11,7 +11,8 @@ def dump_json(data, filename):
        json.dump(data, fp)

 for f in os.listdir():
-    if 'json' in f:
+    if 'navgpt' in f:
+        print(f)
        data = load_json(f)
        
        new_data = []
@ -19,7 +20,8 @@ for f in os.listdir():
            for index, instr in enumerate(i['instructions']):
                new_i = i.copy()
                new_i['instruction'] = instr
-                new_i['instr_id'] = f'{new_i["id"]}_{index}'
+                # new_i['instr_id'] = f'{new_i["id"]}_{index}'
+                new_i['new_reverie_id'] = f'{new_i["new_reverie_id"]}_{index}'
                del new_i['instructions']

                new_data.append(new_i)
--- a/nav_src/NavGPT.py
+++ b/nav_src/NavGPT.py
@ -10,7 +10,7 @@ from parser import parse_args
 from env import REVERIENavBatch
 from agent import NavGPTAgent

-def build_dataset(args, data_limit=100):
+def build_dataset(args):

    feat_db = ImageObservationsDB(args.obs_dir, args.obs_summary_dir, args.obj_dir)
    print(feat_db)
@ -26,7 +26,7 @@ def build_dataset(args, data_limit=100):
        )
        val_env = dataset_class(
            feat_db, val_instr_data, args.connectivity_dir, args.navigable_dir,
-            batch_size=args.batch_size, seed=args.seed, name=split, data_limit=data_limit
+            batch_size=args.batch_size, seed=args.seed, name=split
        )   # evaluation using all objects
        val_envs[split] = val_env

@ -96,7 +96,7 @@ def valid_from_file(args, val_envs):
 def main():
    args = parse_args()

-    val_envs = build_dataset(args, data_limit=100)
+    val_envs = build_dataset(args)

    if args.valid_file is not None:
        valid_from_file(args, val_envs)
--- a/nav_src/agent.py
+++ b/nav_src/agent.py
@ -46,9 +46,11 @@ EXCEPTION_TOOL_NAME = "_Exception"
 MAX_SCRATCHPAD_LENGTH = 7000

 FINAL_STOP_POINT = ""
+FINAL_STATE = ""
 SUCCESS = 0
 TEMP_STEPS_COUNTER = 0
 STEPS_COUNTER = 0
+NOW_LOCATION = None

 MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE = (
    "Invalid Format: Missing 'Action:' after 'Thought:"
@ -71,16 +73,18 @@ class NavGPTOutputParser(AgentOutputParser):
        global STEPS_COUNTER
        global TEMP_STEPS_COUNTER
        global SUCCESS
-        # includes_answer = FINAL_ANSWER_ACTION in text
+        global NOW_LOCATION
+        global FINAL_STATE
+        includes_answer = FINAL_ANSWER_ACTION in text
        regex = (
            r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*\"?([a-fA-F0-9]{32})\"?"
        )
        action_match = re.search(regex, text, re.DOTALL)
        if action_match:
-            # if includes_answer:
-            #     raise OutputParserException(
-            #         f"{FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE}: {text}"
-            #     )
+            if includes_answer:
+                raise OutputParserException(
+                    f"{FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE}: {text}"
+                )
            action = action_match.group(1).strip()
            action_input = action_match.group(2)
            tool_input = action_input.strip(" ")
@ -92,13 +96,17 @@ class NavGPTOutputParser(AgentOutputParser):
            print("ACTION: ", action_input)
            print(f"MY FINAL_STOP_POINT = {FINAL_STOP_POINT}")

-            TEMP_STEPS_COUNTER += 1
+            # TEMP_STEPS_COUNTER += 1
            print(f"TEMP_STEPS_COUNT = {TEMP_STEPS_COUNTER}")
            print(f"STEPS_COUNT = {STEPS_COUNTER}")
            print(f"SUCCESS = {SUCCESS}")

+            NOW_LOCATION = tool_input
+            TEMP_STEPS_COUNTER += 1
+            print(f"NOW_LOCATION = {NOW_LOCATION}")


+            '''
            if FINAL_STOP_POINT in text:
                STEPS_COUNTER += TEMP_STEPS_COUNTER
                SUCCESS += 1
@ -110,14 +118,34 @@ class NavGPTOutputParser(AgentOutputParser):
                return AgentFinish(
                    {"output": action_input}, text
                )
+            '''

            return AgentAction(action, tool_input, text)
-        '''
        elif includes_answer:
+            is_STOP = 'Finished' in text
+            print("FINAL: ", is_STOP)
+
+            if is_STOP:
+                FINAL_STATE = 'stop'
+            else:
+                FINAL_STATE = 'not found'
+
+
+            if NOW_LOCATION == FINAL_STOP_POINT:
+                STEPS_COUNTER += TEMP_STEPS_COUNTER
+                TEMP_STEPS_COUNTER = 0
+                SUCCESS += 1
+                print(f"SUCCESS = {SUCCESS}")
+            else:
+                
+                print("NOT SUCCESS")
+                print(f"{NOW_LOCATION}_{type(NOW_LOCATION)}")
+                print(f"{FINAL_STOP_POINT}_{type(FINAL_STOP_POINT)}")
+                print(f"SUCCESS = {SUCCESS}")
+                print(f"STEPS_COUNTER  = {STEPS_COUNTER}")
            return AgentFinish(
                {"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
            )
-        '''

        if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
            raise OutputParserException(
@ -364,7 +392,7 @@ class NavGPTAgent(BaseAgent):
        """Initialize the trajectory with the given observation."""
        # Record the navigation path
        self.traj = [{
-            'instr_id': ob['instr_id'],
+            'instr_id': ob['new_reverie_id'],
            'path': [[ob['start']]],
            'details': [],
        } for ob in obs]
@ -604,7 +632,7 @@ class NavGPTAgent(BaseAgent):

        tools = [
            self.action_maker,
-            self.back_tracer
+            self.back_tracer,
        ]

        if self.config.use_tool_chain:
@ -682,7 +710,10 @@ class NavGPTAgent(BaseAgent):
        new_obs = self.env.step(actions)[0]
        new_heading = np.rad2deg(new_obs['heading'])
        # Record the trajectory
-        self.traj[0]['path'].append(self.env.env.sims[0].gmap.bfs_shortest_path(cur_obs['viewpoint'], actions[0])[1:])
+        try:
+            self.traj[0]['path'].append(self.env.env.sims[0].gmap.bfs_shortest_path(cur_obs['viewpoint'], actions[0])[1:])
+        except:
+            None
        # Calculate the turned angle
        turned_angle = new_heading - cur_heading
        # Generate action description
@ -699,8 +730,12 @@ class NavGPTAgent(BaseAgent):

        global FINAL_STOP_POINT
        global TEMP_STEPS_COUNTER
+        global STEPS_COUNTER
+        global FINAL_STATE
+        global NOW_LOCATION

-        FINAL_STOP_POINT = obs[0]['stop']
+        FINAL_STOP_POINT = obs[0]['gt_path'][-1]
+        FINAL_STATE = ""

        if TEMP_STEPS_COUNTER != 0:
            TEMP_STEPS_COUNTER = 0
@ -713,7 +748,6 @@ class NavGPTAgent(BaseAgent):
        print(obs[0]['obs'])
        print(obs[0]['obs_summary'])
        print(obs[0]['objects'])
-        print(obs[0]['instr_id'])
        print(obs[0]['scan'])
        print(obs[0]['viewpoint'])
        print(obs[0]['heading'])
@ -722,9 +756,10 @@ class NavGPTAgent(BaseAgent):
        print(obs[0]['instruction'])
        print(obs[0]['gt_path'])
        print(obs[0]['path_id'])
-        print(obs[0]['stop'])
        print(obs[0]['start'])
        print(obs[0]['target'])
+        print(obs[0]['new_reverie_id'])
+        NOW_LOCATION = obs[0]['start']


        print("==")
@ -804,6 +839,11 @@ class NavGPTAgent(BaseAgent):
                }
            output = self.agent_executor(input)

+            if 'stop' in FINAL_STATE:
+                self.traj[i]['final_state'] = 'stop'
+            else:
+                self.traj[i]['final_state'] = 'not found'
+
            self.traj[i]['llm_output'] = output['output']
            self.traj[i]['action_plan'] = output['action_plan']
            # extract agent's thought from llm output
@ -816,4 +856,3 @@ class NavGPTAgent(BaseAgent):
                self.traj[i]['llm_observation'].append(observation)

        return self.traj
-
--- a/nav_src/agent_base.py
+++ b/nav_src/agent_base.py
@ -18,6 +18,7 @@ class BaseAgent(object):
                output[-1]['llm_output'] = v['llm_output']
                output[-1]['llm_thought'] = v['llm_thought']
                output[-1]['llm_observation'] = v['llm_observation']
+                output[-1]['final_state'] = v['final_state']
        return output

    def rollout(self, **args):
@ -50,6 +51,8 @@ class BaseAgent(object):
        else:   # Do a full round
            while True:
                for traj in self.rollout(**kwargs):
+                    print(f"ID: {traj['instr_id']}")
+                    print(self.results.keys())
                    if traj['instr_id'] in self.results:
                        looped = True
                    else:
--- a/nav_src/env.py
+++ b/nav_src/env.py
@ -143,7 +143,6 @@ class Simulator(object):
            viewpoint_ID: str,
            heading: int, 
            elevation: int,
-            stop: str,
            start: str,
            target: str
        ):
@ -151,7 +150,6 @@ class Simulator(object):
        self.elevation = elevation
        self.scan_ID = scan_ID
        self.viewpoint_ID = viewpoint_ID
-        self.stop = stop
        self.start = start
        self.target = target
        # Load navigable dict
@ -162,14 +160,14 @@ class Simulator(object):
        self.navigable_dict = {}
        for start, v in navigable_dict.items():
            self.navigable_dict[start] = {}
-            print("BEFORE: ", len(navigable_dict[start]))
+            # print("BEFORE: ", len(navigable_dict[start]))
            for to, _v in navigable_dict[start].items():
                start_region = self.node_region[scan_ID][start]
                to_region = self.node_region[scan_ID][to]
                if start_region == to_region:
                    self.navigable_dict[start][to] = _v 
-                print(start_region, to_region)
-            print("AFTER: ", len(self.navigable_dict[start]))
+                # print(start_region, to_region)
+            # print("AFTER: ", len(self.navigable_dict[start]))

        # Get candidate
        self.getCandidate()
@ -186,7 +184,6 @@ class Simulator(object):
            'heading': self.heading,
            'elevation': self.elevation,
            'candidate': self.candidate,
-            'stop': self.stop,
            'start': self.start,
            'target': self.target
        }
@ -233,9 +230,9 @@ class EnvBatch(object):
    def _make_id(self, scanId, viewpointId):
        return scanId + '_' + viewpointId

-    def newEpisodes(self, scanIds, viewpointIds, headings, stops, starts, targets):
-        for i, (scanId, viewpointId, heading, stop, start, target) in enumerate(zip(scanIds, viewpointIds, headings, stops, starts, targets)):
-            self.sims[i].newEpisode(scanId, viewpointId, heading, 0, stop, start, target)
+    def newEpisodes(self, scanIds, viewpointIds, headings, starts, targets):
+        for i, (scanId, viewpointId, heading, start, target) in enumerate(zip(scanIds, viewpointIds, headings, starts, targets)):
+            self.sims[i].newEpisode(scanId, viewpointId, heading, 0, start, target)

    def getStates(self):
        """
@ -263,7 +260,7 @@ class REVERIENavBatch(object):

    def __init__(
        self, view_db, instr_data, connectivity_dir, navigable_dir,
-        batch_size=1, seed=0, name=None, data_limit=100
+        batch_size=1, seed=0, name=None
    ):
        self.env = EnvBatch(navigable_dir, feat_db=view_db, batch_size=batch_size)
        self.data = instr_data
@ -272,14 +269,15 @@ class REVERIENavBatch(object):
        self.batch_size = batch_size
        self.name = name

-        #self.gt_trajs = self._get_gt_trajs(self.data) # for evaluation
+        self.gt_trajs = self._get_gt_trajs(self.data) # for evaluation

        # use different seeds in different processes to shuffle data
+        '''
        self.seed = seed
        random.seed(self.seed)
        random.shuffle(self.data)
+        '''

-        self.data = self.data[:data_limit]

        self.ix = 0
        self._load_nav_graphs()
@ -288,14 +286,12 @@ class REVERIENavBatch(object):
        print('%s loaded with %d instructions, using splits: %s' % (
            self.__class__.__name__, len(self.data), self.name))

-    '''
    def _get_gt_trajs(self, data):
        gt_trajs = {
-            x['instr_id']: (x['scan'], x['path']) \
+            x['new_reverie_id']: (x['scan'], x['path']) \
                for x in data if len(x['path']) > 1
        }
        return gt_trajs
-    '''

    def size(self):
        return len(self.data)
@ -350,7 +346,7 @@ class REVERIENavBatch(object):
                'obs' : feature["detail"],
                'obs_summary' : feature["summary"],
                'objects' : feature["objects"],
-                'instr_id' : item['instr_id'],
+                # 'instr_id' : item['instr_id'],
                # 'action_plan' : item['action_plan'],
                'scan' : state['scanID'],
                'viewpoint' : state['viewpointID'],
@ -360,8 +356,8 @@ class REVERIENavBatch(object):
                'instruction' : item['instruction'],
                'gt_path' : item['path'],
                'path_id' : item['path_id'],
-                'stop': item['stop'],
                'start': item['start'],
+                'new_reverie_id': item['new_reverie_id'],
                'target': item['target']
            }
            # RL reward. The negative distance between the state and the final state
@ -384,10 +380,9 @@ class REVERIENavBatch(object):
        scanIds = [item['scan'] for item in self.batch]
        viewpointIds = [item['path'][0] for item in self.batch]
        headings = [item['heading'] for item in self.batch]
-        stops = [item['stop'] for item in self.batch]
        starts = [item['start'] for item in self.batch]
        targets = [item['target'] for item in self.batch]
-        self.env.newEpisodes(scanIds, starts, headings, stops, starts, targets)
+        self.env.newEpisodes(scanIds, starts, headings, starts, targets)
        return self._get_obs()

    def step(self, next_viewpoint_IDs):
@ -412,7 +407,7 @@ class REVERIENavBatch(object):
        shortest_distances = self.shortest_distances[scan]

        path = sum(pred_path, [])
-        assert gt_path[0] == path[0], 'Result trajectories should include the start position'
+        # assert gt_path[0] == path[0], 'Result trajectories should include the start position'

        nearest_position = self._get_nearest(shortest_distances, gt_path[-1], path)

@ -426,7 +421,7 @@ class REVERIENavBatch(object):
        gt_lengths = np.sum([shortest_distances[a][b] for a, b in zip(gt_path[:-1], gt_path[1:])])
        
        scores['success'] = float(scores['nav_error'] < ERROR_MARGIN)
-        scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
+        # scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
        scores['oracle_success'] = float(scores['oracle_error'] < ERROR_MARGIN)

        scores.update(
@ -459,7 +454,7 @@ class REVERIENavBatch(object):
            'oracle_error': np.mean(metrics['oracle_error']),
            'sr': np.mean(metrics['success']) * 100,
            'oracle_sr': np.mean(metrics['oracle_success']) * 100,
-            'spl': np.mean(metrics['spl']) * 100,
+            # 'spl': np.mean(metrics['spl']) * 100,
            'nDTW': np.mean(metrics['nDTW']) * 100,
            'SDTW': np.mean(metrics['SDTW']) * 100,
            'CLS': np.mean(metrics['CLS']) * 100,
--- a/nav_src/parser.py
+++ b/nav_src/parser.py
@ -7,8 +7,8 @@ def parse_args():

    # datasets
    parser.add_argument('--root_dir', type=str, default='../datasets')
-    parser.add_argument('--dataset', type=str, default='r2r', choices=['r2r', 'r4r'])
-    parser.add_argument('--output_dir', type=str, default='../datasets/R2R/exprs/gpt-3.5-turbo', help='experiment id')
+    parser.add_argument('--dataset', type=str, default='reverie', choices=['r2r', 'r4r', 'reverie'])
+    parser.add_argument('--output_dir', type=str, default='../datasets/REVERIE/exprs/gpt-3.5-turbo', help='experiment id')
    # parser.add_argument('--output_dir', type=str, default='../datasets/R2R/exprs/LlaMA-2-13b-test', help='experiment id')
    parser.add_argument('--seed', type=int, default=0)

@ -21,7 +21,7 @@ def parse_args():
    parser.add_argument('--max_iterations', type=int, default=25)

    # General config
-    parser.add_argument('--iters', type=int, default=10, help='number of iterations to run')
+    parser.add_argument('--iters', type=int, default=None, help='number of iterations to run')
    # parser.add_argument('--iters', type=int, default=None, help='number of iterations to run')
    parser.add_argument('--max_scratchpad_length', type=int, default=1000, help='max number of steps in an episode')
    parser.add_argument('--test', action='store_true', default=False)
--- a/nav_src/prompt/planner_prompt.py
+++ b/nav_src/prompt/planner_prompt.py
@ -244,15 +244,16 @@ Instruction: {action_plan}
 Initial Observation: {init_observation}
 Thought: I should start navigation according to the instruction, {agent_scratchpad}"""

-VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate an indoor environment to reach a target viewpoint based on a given instruction, performing the Vision and Language Navigation (VLN) task. You'll move among static positions within a pre-defined graph, aiming for minimal steps.
+VLN_GPT35_PROMPT = """As an intelligent embodied agent, you will navigate an indoor environment to reach a target viewpoint based on a given instruction, performing the Vision and Language Navigation (VLN) task. The instruction may be either feasible or infeasible (i.e., the specified object might not be found in the environment). You will move among static positions within a pre-defined graph, aiming for the nearest position to the object if the object is present.

 You will receive a trajectory instruction at the start and will have access to step history (your Thought, Action, Action Input and Obeservation after the Begin! sign) and current viewpoint observation (including scene descriptions, objects, and navigable directions/distances within 3 meters) during navigation. Orientations range from -180 to 180 degrees, with 0 being forward, right 90 rightward, right/left 180 backward, and left 90 leftward.

-Explore the environment and don't stay at the original point. Keep Walking! Reach within 3 meters of the instructed destination, and if it's visible but no objects are detected, move closer.
-
-If you find the object but I haven't said you can stop. You cannot say you have finished the task! Keep exploring the nearby area.
-
-continue by considering your location and the next viewpoint based on the instruction, using the action_maker tool.
+Explore the environment while avoiding revisiting viewpoints by comparing current and previously visited IDs. Reach the instructed destination as closely as possible. The task will fail if you do not reach within 3 meters of the instructed destination, even if it is observable. Therefore, if the destination is visible but you do not see the object within 3 meters, move closer.
+At each step, determine if you've reached the destination(If the object is more than three meters away from you, you are not considered to have reached the destination).
+If yes, stop and output 'Final Answer: Finished!'.
+If not, continue by considering your location and the next viewpoint based on the instruction, using the action_maker tool.
+And if you explored all room, you think this object doesn't exist in this room. stop and output 'Final Answer: Not found!'.
+If you find another room seems to be the closest match to the instruction but no viewpoint can access to this room. Please output "Final Answer: Not found!"
 Show your reasoning in the Thought section.

 Follow the given format and use provided tools.
@ -266,12 +267,19 @@ Instruction: the instruction describing the whole trajectory
 Initial Observation: the initial observation of the environment
 Thought: you should always think about what to do next and why
 Action: the action to take, must be one of the tools [{tool_names}]
-Action Input: "Viewpoint ID" but do not stay in the original viewpoint
+Action Input: "Viewpoint ID"
 Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can repeat N times)
+Thought: I have reached the destination, I can stop.
+Final Answer: Finished!
+
+or
+
+Thought: I cannot find the object in this room, I should stop.
+Final Answer: Not found!
 ----

 Begin!
-
 Instruction: {action_plan}
 Initial Observation: {init_observation}
 Thought: I should start navigation according to the instruction, {agent_scratchpad}"""
Author	SHA1	Message	Date
Ting-Jun Wang	cd2e0a30e4	fix: EMNLP's prompts	2024-06-16 23:43:47 +08:00
Ting-Jun Wang	a85950f06f	update	2024-06-10 18:52:39 +08:00
Ting-Jun Wang	5cbd75711e	feat: evaluation in result	2024-05-06 16:42:40 +08:00
Ting-Jun Wang	64fbce018a	feat: steps counter in llm-success	2024-05-06 16:41:07 +08:00
Ting-Jun Wang	68330c5163	feat: evaluate llm success	2024-05-05 23:02:06 +08:00