update

feat: evaluation in result
feat: steps counter in llm-success
2024-06-10 18:52:39 +08:00 · 2024-05-06 16:42:40 +08:00 · 2024-05-06 16:41:07 +08:00
7 changed files with 65 additions and 34 deletions
--- a/datasets/REVERIE/annotations/to_instr_type.py
+++ b/datasets/REVERIE/annotations/to_instr_type.py
@ -11,7 +11,8 @@ def dump_json(data, filename):
        json.dump(data, fp)

 for f in os.listdir():
-    if 'json' in f:
+    if 'navgpt' in f:
+        print(f)
        data = load_json(f)
        
        new_data = []
@ -19,7 +20,8 @@ for f in os.listdir():
            for index, instr in enumerate(i['instructions']):
                new_i = i.copy()
                new_i['instruction'] = instr
-                new_i['instr_id'] = f'{new_i["id"]}_{index}'
+                # new_i['instr_id'] = f'{new_i["id"]}_{index}'
+                new_i['new_reverie_id'] = f'{new_i["new_reverie_id"]}_{index}'
                del new_i['instructions']

                new_data.append(new_i)
--- a/nav_src/NavGPT.py
+++ b/nav_src/NavGPT.py
@ -10,7 +10,7 @@ from parser import parse_args
 from env import REVERIENavBatch
 from agent import NavGPTAgent

-def build_dataset(args, data_limit=100):
+def build_dataset(args):

    feat_db = ImageObservationsDB(args.obs_dir, args.obs_summary_dir, args.obj_dir)
    print(feat_db)
@ -26,7 +26,7 @@ def build_dataset(args, data_limit=100):
        )
        val_env = dataset_class(
            feat_db, val_instr_data, args.connectivity_dir, args.navigable_dir,
-            batch_size=args.batch_size, seed=args.seed, name=split, data_limit=data_limit
+            batch_size=args.batch_size, seed=args.seed, name=split
        )   # evaluation using all objects
        val_envs[split] = val_env

@ -96,7 +96,7 @@ def valid_from_file(args, val_envs):
 def main():
    args = parse_args()

-    val_envs = build_dataset(args, data_limit=100)
+    val_envs = build_dataset(args)

    if args.valid_file is not None:
        valid_from_file(args, val_envs)
--- a/nav_src/agent.py
+++ b/nav_src/agent.py
@ -46,6 +46,7 @@ EXCEPTION_TOOL_NAME = "_Exception"
 MAX_SCRATCHPAD_LENGTH = 7000

 FINAL_STOP_POINT = ""
+FINAL_STATE = ""
 SUCCESS = 0
 TEMP_STEPS_COUNTER = 0
 STEPS_COUNTER = 0
@ -73,6 +74,7 @@ class NavGPTOutputParser(AgentOutputParser):
        global TEMP_STEPS_COUNTER
        global SUCCESS
        global NOW_LOCATION
+        global FINAL_STATE
        includes_answer = FINAL_ANSWER_ACTION in text
        regex = (
            r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*\"?([a-fA-F0-9]{32})\"?"
@ -100,6 +102,7 @@ class NavGPTOutputParser(AgentOutputParser):
            print(f"SUCCESS = {SUCCESS}")

            NOW_LOCATION = tool_input
+            TEMP_STEPS_COUNTER += 1
            print(f"NOW_LOCATION = {NOW_LOCATION}")


@ -119,7 +122,18 @@ class NavGPTOutputParser(AgentOutputParser):

            return AgentAction(action, tool_input, text)
        elif includes_answer:
+            is_STOP = 'Finished' in text
+            print("FINAL: ", is_STOP)
+
+            if is_STOP:
+                FINAL_STATE = 'stop'
+            else:
+                FINAL_STATE = 'not found'
+
+
            if NOW_LOCATION == FINAL_STOP_POINT:
+                STEPS_COUNTER += TEMP_STEPS_COUNTER
+                TEMP_STEPS_COUNTER = 0
                SUCCESS += 1
                print(f"SUCCESS = {SUCCESS}")
            else:
@ -128,6 +142,7 @@ class NavGPTOutputParser(AgentOutputParser):
                print(f"{NOW_LOCATION}_{type(NOW_LOCATION)}")
                print(f"{FINAL_STOP_POINT}_{type(FINAL_STOP_POINT)}")
                print(f"SUCCESS = {SUCCESS}")
+                print(f"STEPS_COUNTER  = {STEPS_COUNTER}")
            return AgentFinish(
                {"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
            )
@ -377,7 +392,7 @@ class NavGPTAgent(BaseAgent):
        """Initialize the trajectory with the given observation."""
        # Record the navigation path
        self.traj = [{
-            'instr_id': ob['instr_id'],
+            'instr_id': ob['new_reverie_id'],
            'path': [[ob['start']]],
            'details': [],
        } for ob in obs]
@ -617,7 +632,7 @@ class NavGPTAgent(BaseAgent):

        tools = [
            self.action_maker,
-            self.back_tracer
+            self.back_tracer,
        ]

        if self.config.use_tool_chain:
@ -695,7 +710,10 @@ class NavGPTAgent(BaseAgent):
        new_obs = self.env.step(actions)[0]
        new_heading = np.rad2deg(new_obs['heading'])
        # Record the trajectory
-        self.traj[0]['path'].append(self.env.env.sims[0].gmap.bfs_shortest_path(cur_obs['viewpoint'], actions[0])[1:])
+        try:
+            self.traj[0]['path'].append(self.env.env.sims[0].gmap.bfs_shortest_path(cur_obs['viewpoint'], actions[0])[1:])
+        except:
+            None
        # Calculate the turned angle
        turned_angle = new_heading - cur_heading
        # Generate action description
@ -712,9 +730,12 @@ class NavGPTAgent(BaseAgent):

        global FINAL_STOP_POINT
        global TEMP_STEPS_COUNTER
+        global STEPS_COUNTER
+        global FINAL_STATE
        global NOW_LOCATION

-        FINAL_STOP_POINT = obs[0]['stop']
+        FINAL_STOP_POINT = obs[0]['gt_path'][-1]
+        FINAL_STATE = ""

        if TEMP_STEPS_COUNTER != 0:
            TEMP_STEPS_COUNTER = 0
@ -727,7 +748,6 @@ class NavGPTAgent(BaseAgent):
        print(obs[0]['obs'])
        print(obs[0]['obs_summary'])
        print(obs[0]['objects'])
-        print(obs[0]['instr_id'])
        print(obs[0]['scan'])
        print(obs[0]['viewpoint'])
        print(obs[0]['heading'])
@ -736,9 +756,9 @@ class NavGPTAgent(BaseAgent):
        print(obs[0]['instruction'])
        print(obs[0]['gt_path'])
        print(obs[0]['path_id'])
-        print(obs[0]['stop'])
        print(obs[0]['start'])
        print(obs[0]['target'])
+        print(obs[0]['new_reverie_id'])
        NOW_LOCATION = obs[0]['start']


@ -819,6 +839,11 @@ class NavGPTAgent(BaseAgent):
                }
            output = self.agent_executor(input)

+            if 'stop' in FINAL_STATE:
+                self.traj[i]['final_state'] = 'stop'
+            else:
+                self.traj[i]['final_state'] = 'not found'
+
            self.traj[i]['llm_output'] = output['output']
            self.traj[i]['action_plan'] = output['action_plan']
            # extract agent's thought from llm output
--- a/nav_src/agent_base.py
+++ b/nav_src/agent_base.py
@ -18,6 +18,7 @@ class BaseAgent(object):
                output[-1]['llm_output'] = v['llm_output']
                output[-1]['llm_thought'] = v['llm_thought']
                output[-1]['llm_observation'] = v['llm_observation']
+                output[-1]['final_state'] = v['final_state']
        return output

    def rollout(self, **args):
@ -50,6 +51,8 @@ class BaseAgent(object):
        else:   # Do a full round
            while True:
                for traj in self.rollout(**kwargs):
+                    print(f"ID: {traj['instr_id']}")
+                    print(self.results.keys())
                    if traj['instr_id'] in self.results:
                        looped = True
                    else:
--- a/nav_src/env.py
+++ b/nav_src/env.py
@ -143,7 +143,6 @@ class Simulator(object):
            viewpoint_ID: str,
            heading: int, 
            elevation: int,
-            stop: str,
            start: str,
            target: str
        ):
@ -151,7 +150,6 @@ class Simulator(object):
        self.elevation = elevation
        self.scan_ID = scan_ID
        self.viewpoint_ID = viewpoint_ID
-        self.stop = stop
        self.start = start
        self.target = target
        # Load navigable dict
@ -186,7 +184,6 @@ class Simulator(object):
            'heading': self.heading,
            'elevation': self.elevation,
            'candidate': self.candidate,
-            'stop': self.stop,
            'start': self.start,
            'target': self.target
        }
@ -233,9 +230,9 @@ class EnvBatch(object):
    def _make_id(self, scanId, viewpointId):
        return scanId + '_' + viewpointId

-    def newEpisodes(self, scanIds, viewpointIds, headings, stops, starts, targets):
-        for i, (scanId, viewpointId, heading, stop, start, target) in enumerate(zip(scanIds, viewpointIds, headings, stops, starts, targets)):
-            self.sims[i].newEpisode(scanId, viewpointId, heading, 0, stop, start, target)
+    def newEpisodes(self, scanIds, viewpointIds, headings, starts, targets):
+        for i, (scanId, viewpointId, heading, start, target) in enumerate(zip(scanIds, viewpointIds, headings, starts, targets)):
+            self.sims[i].newEpisode(scanId, viewpointId, heading, 0, start, target)

    def getStates(self):
        """
@ -263,7 +260,7 @@ class REVERIENavBatch(object):

    def __init__(
        self, view_db, instr_data, connectivity_dir, navigable_dir,
-        batch_size=1, seed=0, name=None, data_limit=100
+        batch_size=1, seed=0, name=None
    ):
        self.env = EnvBatch(navigable_dir, feat_db=view_db, batch_size=batch_size)
        self.data = instr_data
@ -272,14 +269,15 @@ class REVERIENavBatch(object):
        self.batch_size = batch_size
        self.name = name

-        #self.gt_trajs = self._get_gt_trajs(self.data) # for evaluation
+        self.gt_trajs = self._get_gt_trajs(self.data) # for evaluation

        # use different seeds in different processes to shuffle data
+        '''
        self.seed = seed
        random.seed(self.seed)
        random.shuffle(self.data)
+        '''

-        self.data = self.data[:data_limit]

        self.ix = 0
        self._load_nav_graphs()
@ -288,14 +286,12 @@ class REVERIENavBatch(object):
        print('%s loaded with %d instructions, using splits: %s' % (
            self.__class__.__name__, len(self.data), self.name))

-    '''
    def _get_gt_trajs(self, data):
        gt_trajs = {
-            x['instr_id']: (x['scan'], x['path']) \
+            x['new_reverie_id']: (x['scan'], x['path']) \
                for x in data if len(x['path']) > 1
        }
        return gt_trajs
-    '''

    def size(self):
        return len(self.data)
@ -350,7 +346,7 @@ class REVERIENavBatch(object):
                'obs' : feature["detail"],
                'obs_summary' : feature["summary"],
                'objects' : feature["objects"],
-                'instr_id' : item['instr_id'],
+                # 'instr_id' : item['instr_id'],
                # 'action_plan' : item['action_plan'],
                'scan' : state['scanID'],
                'viewpoint' : state['viewpointID'],
@ -360,8 +356,8 @@ class REVERIENavBatch(object):
                'instruction' : item['instruction'],
                'gt_path' : item['path'],
                'path_id' : item['path_id'],
-                'stop': item['stop'],
                'start': item['start'],
+                'new_reverie_id': item['new_reverie_id'],
                'target': item['target']
            }
            # RL reward. The negative distance between the state and the final state
@ -384,10 +380,9 @@ class REVERIENavBatch(object):
        scanIds = [item['scan'] for item in self.batch]
        viewpointIds = [item['path'][0] for item in self.batch]
        headings = [item['heading'] for item in self.batch]
-        stops = [item['stop'] for item in self.batch]
        starts = [item['start'] for item in self.batch]
        targets = [item['target'] for item in self.batch]
-        self.env.newEpisodes(scanIds, starts, headings, stops, starts, targets)
+        self.env.newEpisodes(scanIds, starts, headings, starts, targets)
        return self._get_obs()

    def step(self, next_viewpoint_IDs):
@ -412,7 +407,7 @@ class REVERIENavBatch(object):
        shortest_distances = self.shortest_distances[scan]

        path = sum(pred_path, [])
-        assert gt_path[0] == path[0], 'Result trajectories should include the start position'
+        # assert gt_path[0] == path[0], 'Result trajectories should include the start position'

        nearest_position = self._get_nearest(shortest_distances, gt_path[-1], path)

@ -426,7 +421,7 @@ class REVERIENavBatch(object):
        gt_lengths = np.sum([shortest_distances[a][b] for a, b in zip(gt_path[:-1], gt_path[1:])])
        
        scores['success'] = float(scores['nav_error'] < ERROR_MARGIN)
-        scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
+        # scores['spl'] = scores['success'] * gt_lengths / max(scores['trajectory_lengths'], gt_lengths, 0.01)
        scores['oracle_success'] = float(scores['oracle_error'] < ERROR_MARGIN)

        scores.update(
@ -459,7 +454,7 @@ class REVERIENavBatch(object):
            'oracle_error': np.mean(metrics['oracle_error']),
            'sr': np.mean(metrics['success']) * 100,
            'oracle_sr': np.mean(metrics['oracle_success']) * 100,
-            'spl': np.mean(metrics['spl']) * 100,
+            # 'spl': np.mean(metrics['spl']) * 100,
            'nDTW': np.mean(metrics['nDTW']) * 100,
            'SDTW': np.mean(metrics['SDTW']) * 100,
            'CLS': np.mean(metrics['CLS']) * 100,
--- a/nav_src/parser.py
+++ b/nav_src/parser.py
@ -7,8 +7,8 @@ def parse_args():

    # datasets
    parser.add_argument('--root_dir', type=str, default='../datasets')
-    parser.add_argument('--dataset', type=str, default='r2r', choices=['r2r', 'r4r'])
-    parser.add_argument('--output_dir', type=str, default='../datasets/R2R/exprs/gpt-3.5-turbo', help='experiment id')
+    parser.add_argument('--dataset', type=str, default='reverie', choices=['r2r', 'r4r', 'reverie'])
+    parser.add_argument('--output_dir', type=str, default='../datasets/REVERIE/exprs/gpt-3.5-turbo', help='experiment id')
    # parser.add_argument('--output_dir', type=str, default='../datasets/R2R/exprs/LlaMA-2-13b-test', help='experiment id')
    parser.add_argument('--seed', type=int, default=0)

@ -21,7 +21,7 @@ def parse_args():
    parser.add_argument('--max_iterations', type=int, default=25)

    # General config
-    parser.add_argument('--iters', type=int, default=10, help='number of iterations to run')
+    parser.add_argument('--iters', type=int, default=None, help='number of iterations to run')
    # parser.add_argument('--iters', type=int, default=None, help='number of iterations to run')
    parser.add_argument('--max_scratchpad_length', type=int, default=1000, help='max number of steps in an episode')
    parser.add_argument('--test', action='store_true', default=False)
--- a/nav_src/prompt/planner_prompt.py
+++ b/nav_src/prompt/planner_prompt.py
@ -250,9 +250,10 @@ You will receive a trajectory instruction at the start and will have access to s

 Explore the environment while avoiding revisiting viewpoints by comparing current and previously visited IDs. Reach within 3 meters of the instructed destination, and if it's visible but no objects are detected, move closer.

-At each step, determine if you've reached the destination.
+At each step, determine if you've reached the destination(If the object is more than three meters away from you, you are not considered to have reached the destination).
 If yes, stop and output 'Final Answer: Finished!'.
 If not, continue by considering your location and the next viewpoint based on the instruction, using the action_maker tool.
+And if you explored all room, you think this object doesn't exist in this room. stop and output 'Final Answer: Not found!'.
 Show your reasoning in the Thought section.

 Follow the given format and use provided tools.
@ -271,6 +272,11 @@ Observation: the result of the action
 ... (this Thought/Action/Action Input/Observation can repeat N times)
 Thought: I have reached the destination, I can stop.
 Final Answer: Finished!
+
+or
+
+Thought: I cannot find the object in this room, I should stop.
+Final Answer: Not found!
 ----

 Begin!
Author	SHA1	Message	Date
Ting-Jun Wang	a85950f06f	update	2024-06-10 18:52:39 +08:00
Ting-Jun Wang	5cbd75711e	feat: evaluation in result	2024-05-06 16:42:40 +08:00
Ting-Jun Wang	64fbce018a	feat: steps counter in llm-success	2024-05-06 16:41:07 +08:00