In [5]:
import json, os
import openai
from openai import OpenAI
from datetime import datetime
from typing import Tuple, List
import spacy
from PIL import Image
import matplotlib.pyplot as plt


# ==============================================
# !!!!! OPEN AI API KEY HERE !!!!!
# Please delete your API key before the commit.
# ==============================================
OPENAI_API_KEY = ''

DATASET = 'val_seen'
#!this version is proccess on the adversarial instruction file generated by H.T., not original REVERIE data
REVERIE_TRAIN_JSON_FILE = '/data/Matterport3DSimulator-duet/VLN-DUET/datasets/REVERIE/annotations/REVERIE_train.json'
REVERIE_VAL_UNSEEN_JSON_FILE = '/data/Matterport3DSimulator-duet/VLN-DUET/datasets/REVERIE/annotations/REVERIE_val_unseen.json'
REVERIE_VAL_SEEN_JSON_FILE = '/data/Matterport3DSimulator-duet/VLN-DUET/datasets/REVERIE/annotations/REVERIE_val_seen.json'
BBOXES_JSON_FILE = '/data/Matterport3DSimulator-duet/VLN-DUET/datasets/REVERIE/annotations/BBoxes.json'
NAVIGABLE_PATH = '/data/NavGPT_data/navigable'
SKYBOX_PATH = '/home/snsd0805/code/research/VLN/base_dir/v1/scans'

In [2]:
REVERIE_TRAIN_JSON_FILE

'/data/Matterport3DSimulator-duet/VLN-DUET/datasets/REVERIE/annotations/REVERIE_train.json'

In [3]:
! python3 -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
 Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[K |████████████████████████████████| 12.8 MB 1.6 MB/s eta 0:00:01
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [6]:
def load_json(fn):
 with open(fn) as f:
 ret = json.load(f)
 return ret

def dump_json(data, fn, force=False):
 if not force:
 assert not os.path.exists(fn)
 with open(fn, 'w') as f:
 json.dump(data, f)

In [None]:
# ============================================================
# ChatGPT to extract the room & the target object
# ============================================================

In [7]:
TEMPLATE = '''
Please extract the target room, the goal object, and the relations between the goal object and other reference objects.
Example:
inputs:
{}
outputs:
{}
Now it is your turn:
inputs: 
___inputs___
outputs:
'''

def get_template() -> str:
 inputs = 'In the kitchen above the brown shelves and above the basket on the top shelf there is a ceiling beam Please firm this beam'
 outputs = {
 'room': 'kitchen',
 'goal': 'ceiling beam',
 'goal_relations':[
 {'relation': 'above', 'reference': 'brown shelves'},
 {'relation': 'above', 'reference': 'basket'},
 {'relation': 'on the top', 'reference': 'shelf'},
 ]
 }
 template = TEMPLATE.format(inputs, json.dumps(outputs, indent=4))
 return template

In [8]:
def query(openai: OpenAI, prompt: str) -> Tuple[str, int]:
 response = client.chat.completions.create(
 model="gpt-3.5-turbo-1106",
 response_format={ "type": "json_object" },
 messages=[
 {"role": "system", "content": "Please output JSON."},
 {"role": "user", "content": prompt}
 ]
 )
 
 return (
 json.loads(response.choices[0].message.content),
 response.usage.total_tokens
 )

In [9]:
reverie_train = load_json(REVERIE_TRAIN_JSON_FILE)
reverie_val_unseen = load_json(REVERIE_VAL_UNSEEN_JSON_FILE)
reverie_val_seen = load_json(REVERIE_VAL_SEEN_JSON_FILE)
print(len(reverie_train))
print(len(reverie_val_unseen))
print(len(reverie_val_seen))

2996
1096
750


In [10]:
# OpenAI GPT to extract the relationship between objects
client = OpenAI(api_key=OPENAI_API_KEY)
template = get_template()

logs = {}
tokens = 0

if DATASET == 'train':
 print('train')
 dataset = reverie_train
elif DATASET == 'val_unseen':
 print('val_unseen')
 dataset = reverie_val_unseen
else:
 print('val_seen')
 dataset = reverie_val_seen

for idx, r in enumerate(dataset):
 if idx%10==0:
 dump_json(logs, f'gpt_outputs_{DATASET}.json', force=True)
 print('\n', end='')
 print(datetime.now(), idx, '/', len(dataset))
 print(f" use {tokens} tokens")
 
 instruction = r['instructions'][0]
 prompt = template.replace("___inputs___", instruction)
 response, total_tokens = query(client, prompt)
 tokens += total_tokens
 
 query_name = f'reverie__{DATASET}__{idx}__0'
 logs[query_name] = response
 print('.', end='')
 
dump_json(logs, f'gpt_outputs_{DATASET}.json', force=True)


val_seen

2024-01-28 16:45:49.797527 0 / 750
 use 0 tokens
..........
2024-01-28 16:46:04.932742 10 / 750
 use 2480 tokens
..........
2024-01-28 16:46:23.465098 20 / 750
 use 5206 tokens
..........
2024-01-28 16:46:38.780172 30 / 750
 use 7705 tokens
..........
2024-01-28 16:46:52.773380 40 / 750
 use 10161 tokens
..........
2024-01-28 16:47:05.971142 50 / 750
 use 12556 tokens
..........
2024-01-28 16:47:19.135185 60 / 750
 use 14983 tokens
..........
2024-01-28 16:47:31.982835 70 / 750
 use 17418 tokens
..........
2024-01-28 16:47:44.867015 80 / 750
 use 19848 tokens
..........
2024-01-28 16:47:58.097011 90 / 750
 use 22322 tokens
..........
2024-01-28 16:48:09.810830 100 / 750
 use 24670 tokens
..........
2024-01-28 16:48:25.051226 110 / 750
 use 27178 tokens
..........
2024-01-28 16:48:39.153634 120 / 750
 use 29675 tokens
..........
2024-01-28 16:48:52.691056 130 / 750
 use 32131 tokens
..........
2024-01-28 16:49:08.061178 140 / 750
 use 34721 tokens
..........
2024-01-28 16:49:2

In [11]:
print(f'Used {tokens} tokens')

Used 185368 tokens


In [None]:
# ============================================================
# Method #1 : ChatGPT api to replace the target object
# ============================================================

In [17]:
def show_skybox(scan: str, viewpoint:str) -> None:
 img_path = f'{SKYBOX_PATH}/{scan}/matterport_skybox_images/{viewpoint}_skybox_small.jpg'
 im = Image.open(img_path)
 from matplotlib import rcParams

 plt.imshow(im)
 plt.show()
 print(img_path)


def get_navigable_viewpoints(scan: str, viewpoint: str) -> list:
 '''
 Get all neighbor vps around it.
 '''
 data = load_json(f'{NAVIGABLE_PATH}/{scan}_navigable.json') 
 navigable_viewpoints = []
 for k, v in data[viewpoint].items():
 navigable_viewpoints.append(k)
 
 return navigable_viewpoints

def get_objects_in_the_rooms(bboxes: dict, scan: str, viewpoint: str) -> list:
 '''
 Get all touchable objects around this viewpoint.
 
 Touchable: define by REVERIE datasets, means the objects is close to this point (maybe 1m).
 '''
 objs = set()
 for k, v in bboxes[f'{scan}_{viewpoint}'].items():
 objs.add(v['name'].replace('#', ' '))
 return list(objs)

def get_avoid_objs(bboxes: dict, scan: str, viewpoint: str) -> list:
 '''
 Get objects around this viewpoint
 
 First, it call get_navigable_viewpoints() to get the neighbor viewpoints.
 Then, it get all the objects around its neighbor, we assume these objects is all visible bbox in this room
 We need this list to avoid generating the objects that exist in this room

 '''
 vps = get_navigable_viewpoints(scan, viewpoint)
 objs = get_objects_in_the_rooms(bboxes, scan, viewpoint)
 for i in vps:
 tmp_objs = get_objects_in_the_rooms(bboxes, scan, i)
 objs += tmp_objs
 
 return list(set(objs))


In [13]:
REPLACE_OBJECT_TEMPLATE = '''
You should replace the target object and return me a new instruction.
Notice: the new target object must be suitable for this room (room name), and it must doesn't look like any objects(different type) in avoid_objects list.
Some times, you can change the verb which suitable for the new target objects.

Example:
inputs:
{}
outputs:
{}
Now it is your turn:
inputs: 
___inputs___
outputs:
'''

def get_replace_object_template() -> str:
 inputs = {
 'instruction': 'Go to bedroom at the back left side of the house and turn on the lamp nearest the bedroom door',
 'room_name': 'bedroom',
 'avoid_objects': ['window', 'lamp', 'picture', 'bed'],
 'target_object': 'lamp'
 }
 outputs = {
 'new_instruction': 'Go to bedroom at the back left side of the house and take the mirror nearest the bedroom door',
 }
 template = REPLACE_OBJECT_TEMPLATE.format(json.dumps(inputs, indent=4), json.dumps(outputs, indent=4))
 return template
print(get_replace_object_template())


You should replace the target object and return me a new instruction.
Notice: the new target object must be suitable for this room (room name), and it must doesn't look like any objects(different type) in avoid_objects list.
Some times, you can change the verb which suitable for the new target objects.

Example:
inputs:
{
 "instruction": "Go to bedroom at the back left side of the house and turn on the lamp nearest the bedroom door",
 "room_name": "bedroom",
 "avoid_objects": [
 "window",
 "lamp",
 "picture",
 "bed"
 ],
 "target_object": "lamp"
}
outputs:
{
 "new_instruction": "Go to bedroom at the back left side of the house and take the mirror nearest the bedroom door"
}
Now it is your turn:
inputs: 
___inputs___
outputs:



In [14]:
logs = load_json(f'gpt_outputs_{DATASET}.json')
nlp = spacy.load("en_core_web_sm")
bboxes = load_json(BBOXES_JSON_FILE)

client = OpenAI(api_key=OPENAI_API_KEY)
template = get_replace_object_template()

tokens = 0
for idx, r in enumerate(dataset):
 if idx%10==0:
 dump_json(logs, f'gpt_outputs_{DATASET}.json', force=True)
 print('\n', end='')
 print(datetime.now(), idx, '/', len(dataset))
 print(f" use {tokens} tokens")

 log = logs[f'reverie__{DATASET}__{idx}__0']
 scan = r['scan']
 target_vp = r['path'][-1]
 avoid_objs = get_avoid_objs(bboxes, scan, target_vp)
# print(log['room'])
# print(log['goal'])
# print(r['instructions'][0])
# print(f'avoid {avoid_objs}')
 try:
 inputs = {
 'instruction': r['instructions'][0],
 'room_name': log['room'],
 'avoid_objects': avoid_objs,
 'target_object': log['goal']
 } 
 prompt = template.replace('___inputs___', json.dumps(inputs, indent=4))
 response, total_tokens = query(client, prompt)
 tokens += total_tokens

 log['instruction'] = r['instructions'][0]
 log['new_instruction'] = response['new_instruction']
 print('.', end='')
 except:
 print(log)

dump_json(logs, f'gpt_outputs_{DATASET}.json', force=True)



2024-01-28 17:02:12.982758 0 / 750
 use 0 tokens
..........
2024-01-28 17:02:21.516631 10 / 750
 use 3207 tokens
..........
2024-01-28 17:02:30.687271 20 / 750
 use 6593 tokens
..........
2024-01-28 17:02:38.764408 30 / 750
 use 9909 tokens
..........
2024-01-28 17:02:46.047457 40 / 750
 use 13275 tokens
..........
2024-01-28 17:02:53.616187 50 / 750
 use 16683 tokens
..........
2024-01-28 17:03:00.754883 60 / 750
 use 19998 tokens
..........
2024-01-28 17:03:10.039162 70 / 750
 use 23367 tokens
..........
2024-01-28 17:03:18.122595 80 / 750
 use 26633 tokens
..........
2024-01-28 17:03:26.190372 90 / 750
 use 29905 tokens
..........
2024-01-28 17:03:33.605275 100 / 750
 use 33258 tokens
..........
2024-01-28 17:03:41.508207 110 / 750
 use 36639 tokens
..........
2024-01-28 17:03:49.187187 120 / 750
 use 39933 tokens
..........
2024-01-28 17:03:57.650307 130 / 750
 use 43265 tokens
..........
2024-01-28 17:04:05.802962 140 / 750
 use 46670 tokens
..........
2024-01-28 17:04:14.862087 

In [None]:
# ============================================================
# Transform the log into REVERIE data 
# ============================================================

In [None]:
for index in range(len(dataset)):
 original_data = dataset[index]
 log = logs[f'reverie__{DATASET}__{index}__0']
 print(original_data['scan'])
 print(original_data['path'][-1])
 print(original_data['instructions'][0])
 print(original_data['instructions'][1])
 print(log)
 if 'new_instruction' in log:
 original_data['instructions'][1] = log['new_instruction']
 print(original_data['instructions'][1])
 print(get_avoid_objs(bboxes, original_data['scan'], original_data['path'][-1]))
 else:
 del log
 print()


In [None]:
dump_json(dataset, f'REVERIE_{DATASET}.json.adversarial', force=True)

In [None]:
# ============================================================
# Method #2 : NLP tool to build the swap pool
# ( Haven't implement )
# ============================================================

In [None]:
def get_subject(nlp, text) -> str:
 doc = nlp(text)
 
 # find subject
 subject = None
 for index, token in enumerate(doc):
# print("--->", token, token.dep_)
 if "ROOT" in token.dep_ or 'obj' in token.dep_:
 if doc[index-1].dep_ == 'compound' or \
 (doc[index-1].dep_ == 'amod' and doc[index-1].text == 'living' ):
 
 if doc[index-1].text != 'level' and doc[index-1].text != 'floor':
 subject = doc[index-1].text + " " + token.text
 else:
 subject = token.text
 else:
 subject = token.text
 if subject:
 subject = subject.replace('area', '')
 subject = subject.replace('\n', '')
 subject = subject.replace(' ', '')
 return subject

In [7]:
logs = load_json('gpt_outputs.json')
nlp = spacy.load("en_core_web_sm")
bboxes = load_json(BBOXES_JSON_FILE)

rooms = {}

for idx, r in enumerate(reverie_train):
 try:
 room_descr = logs[f'reverie__train__{idx}__0']['room']
 room = get_subject(nlp, room_descr)
 if room in rooms:
 rooms[room] += 1
 else:
 rooms[room] = 1
 except:
 print("NO ROOM:", logs[f'reverie__train__{idx}__0'])
 print(" ", r['instructions'][0])
 
selected_rooms = set()
for k, v in rooms.items():
 if v >= 5:
 selected_rooms.add(k)

for i in selected_rooms:
 print(i, rooms[i])

NO ROOM: {'room': None, 'goal': 'open double doors', 'goal_relations': [{'relation': 'to the left', 'reference': None}]}
 Go through the open double doors to the left
NO ROOM: {'room': None, 'goal': 'cabinet handle', 'goal_relations': [{'relation': 'bottom left', 'reference': 'washer and dryer'}, {'relation': 'beside', 'reference': 'washer and dryer'}, {'relation': 'on the second level', 'reference': None}]}
 tighten the screws in the cabinet handle that is the bottom left beside the washer and dryer on the second level
NO ROOM: {'error': 'Unable to extract the goal object and its relations. Please provide a valid input.'}
 Move to the closet and take the dress of the rack
NO ROOM: {'room': ['kitchen', 'family room'], 'goal': 'table', 'goal_relations': [{'relation': 'between', 'reference': 'white chairs'}]}
 Walk through the kitchen into the family room and touch the table between the white chairs
stairs 22
foyer 10
laundryroom 182
lounge 126
floor 22
porch 22
masterbathroom 10
dining 

In [None]:
# ============================================================
# DEBUG 
# ============================================================

In [None]:
reverie_train = load_json(REVERIE_TRAIN_JSON_FILE)
