feat: prepare data

This commit is contained in:
Ting-Jun Wang 2024-04-28 20:44:10 +08:00
parent b3fc8a21b8
commit 7454bc15af
Signed by: snsd0805
GPG Key ID: 48D331A3D6160354
3 changed files with 276 additions and 0 deletions

View File

@ -0,0 +1,27 @@
import json
import os
def load_json(filename):
with open(filename) as fp:
data = json.load(fp)
return data
def dump_json(data, filename):
with open(filename, 'w') as fp:
json.dump(data, fp)
for f in os.listdir():
if 'json' in f:
data = load_json(f)
new_data = []
for i in data:
for index, instr in enumerate(i['instructions']):
new_i = i.copy()
new_i['instruction'] = instr
new_i['instr_id'] = f'{new_i["id"]}_{index}'
del new_i['instructions']
new_data.append(new_i)
dump_json(new_data, f.replace('.json', '_instr.json'))

76
nav_src/extract_obj.py Normal file
View File

@ -0,0 +1,76 @@
import openai
from openai import OpenAI
import os
import json
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
PROMPT_TEMPLATE = """
You are a good housework assistant, please help me to find the target object in a housework instruction.
You will receive a housework instruction, and you need to return the target object and its location.
For example:
Input:
{
"instruction": "Enter the kitchen and pick up the cup on the table"
}
Output:
{
"target": "the cup on the table"
}
Now, it's your turn:
Input:
{
"instruction": ___input___
}
Output:
"""
def query(openai: OpenAI, prompt: str):
response = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
response_format={ "type": "json_object" },
messages=[
{"role": "system", "content": "Please output JSON."},
{"role": "user", "content": prompt}
]
)
return (
json.loads(response.choices[0].message.content),
response.usage.total_tokens
)
def load_json(filename):
with open(filename) as fp:
data = json.load(fp)
return data
def dump_json(data, filename):
with open(filename, 'w') as fp:
json.dump(data, fp)
if __name__ == '__main__':
client = OpenAI(api_key=OPENAI_API_KEY)
data = load_json('../datasets/REVERIE/annotations/REVERIE_val_unseen_instr.json')
for index, i in enumerate(data):
instr = i['instruction']
prompt = PROMPT_TEMPLATE.replace('___input___', instr)
OK = False
while not OK:
response, token = query(client, prompt)
if 'target' in response:
target = response['target']
OK = True
i['target'] = target
print(instr)
print(target)
print()
dump_json(data, 'new_REVERIE_val_unseen_instr.json')

173
nav_src/filter_distance.py Normal file
View File

@ -0,0 +1,173 @@
from glob import glob
import math
from collections import defaultdict
import json
def load_json(filename):
with open(filename) as fp:
data = json.load(fp)
return data
def load_floorplan():
region_label_lookup = load_region_label_lookup()
house_files = glob('/home/snsd0805/code/research/VLN/base_dir/v1/scans/*/house_segmentations/*.house')
#room_lookups = {}
#floor_lookups = {}
#room_bbox_lookups = {}
#node_coor_lookups = {}
node_region_lookups = {}
region_room_lookups = {}
region_object_lookups = {}
node_locations_lookups = {}
for house_file in house_files:
scan_id = house_file.split("/")[-3]
regions, floors, node_id_regions, node_id_floors = {}, {}, {}, {}
room_bboxes = {}
node_coors = {}
node_locations = {}
region_objects = defaultdict(list)
object_name_lookup = {}
#print(scan_id, datetime.now())
#house_lines = []
for line in open(house_file):
house_line = line.strip()
#house_lines.append(line.strip())
#for house_line in house_lines[1:]:
house_line_cols = house_line.split()
house_line_type = house_line_cols[0]
house_line_cols = house_line_cols[1:]
if house_line_type=='R':
region_index, level_index, _, _, label, px, py, pz, xlo, ylo, zlo, xhi, yhi, zhi, height,_,_,_,_ = house_line_cols
regions[region_index] = region_label_lookup[label]
floors[region_index] = level_index
room_bboxes[region_index] = {
'name': region_label_lookup[label],
'floor': level_index
}
#for var_name in ['px', 'py', 'pz', 'xlo', 'ylo', 'zlo', 'xhi', 'yhi', 'zhi', 'height']:
# room_bboxes[region_index][var_name] = float(eval(var_name))
if house_line_type=='P':
node_id, panorama_index, region_index, _, px, py, pz, _,_,_,_,_ = house_line_cols
node_id_regions[node_id] = region_index#regions[region_index]
node_locations[node_id] = (px, py, pz)
#node_id_floors[node_id] = int(floors[region_index]) + 1
#node_coors[node_id] = (float(px), float(py), float(pz))
#raise
#if house_line_type=='I':
#break
if house_line_type=='C':
category_index, category_mapping_index, category_mapping_name, mpcat40_index, mpcat40_name, _,_,_,_,_ = house_line_cols
object_name_lookup[category_index] = category_mapping_name
if house_line_type=='O':
object_index, region_index, category_index, px, py, pz, a0x, a0y, a0z, a1x, a1y, a1z, r0, r1, r2, _, _, _, _, _, _, _, _ = house_line_cols
if category_index=='-1' or region_index=='-1':
#print("error")
continue
region_objects[region_index].append(object_name_lookup[category_index])
#room_lookups[scan_id] = node_id_regions
#floor_lookups[scan_id] = node_id_floors
region_room_lookups[scan_id] = room_bboxes
node_region_lookups[scan_id] = node_id_regions
node_locations_lookups[scan_id] = node_locations
region_object_lookups[scan_id] = {k:sorted(v) for k,v in region_objects.items()}
#node_coor_lookups[scan_id] = node_coors
return node_region_lookups, region_room_lookups, region_object_lookups, node_locations_lookups
def load_region_label_lookup():
region_label_lookup = {
'a': 'bathroom',
'b': 'bedroom',
'c': 'closet',
'd': 'dining room',
'e': 'entryway',#/foyer/lobby (should be the front door, not any door)
'f': 'familyroom',# (should be a room that a family hangs out in, not any area with couches)
'g': 'garage',#
'h': 'hallway',#
'i': 'library',# (should be room like a library at a university, not an individual study)
'j': 'laundryroom',#/mudroom (place where people do laundry, etc.)
'k': 'kitchen',#
'l': 'living room',# (should be the main "showcase" living room in a house, not any area with couches)
'm': 'meeting room',#/conferenceroom
'n': 'lounge',# (any area where people relax in comfy chairs/couches that is not the family room or living room
'o': 'office',# (usually for an individual, or a small set of people)
'p': 'porch',#/terrace/deck/driveway (must be outdoors on ground level)
'r': 'recreation',#/game (should have recreational objects, like pool table, etc.)
's': 'stairs',#
't': 'toilet',# (should be a small room with ONLY a toilet)
'u': 'utility room',#/toolroom
'v': 'tv',# (must have theater-style seating)
'w': 'gym',#workout/gym/exercise
'x': 'outdoor',# areas containing grass, plants, bushes, trees, etc.
'y': 'balcony',# (must be outside and must not be on ground floor)
'z': 'other room',# (it is clearly a room, but the function is not clear)
'B': 'bar',#
'C': 'classroom',#
'D': 'dining booth',#
'S': 'spa',#/sauna
'Z': 'junk',# (reflections of mirrors, random points floating in space, etc.)
'-': 'no label',#
}
return region_label_lookup
def get_distance(nodeA, nodeB):
a_x = float(nodeA[0])
a_y = float(nodeA[1])
b_x = float(nodeB[0])
b_y = float(nodeB[1])
return math.sqrt( (a_x-b_x) ** 2 + (a_y-b_y) ** 2)
node_region, region_room, region_obj, node_locations = load_floorplan()
data = load_json('../datasets/REVERIE/annotations/REVERIE_val_unseen_instr.json')
counter = 0
new_data = []
for i in data:
scan = i['scan']
stop = i['path'][-1]
room_id = node_region[scan][stop]
room_name = region_room[scan][node_region[scan][stop]]
stop_location = node_locations[scan][stop]
max_distance, max_node = 0.0, None
for k, v in node_region[scan].items():
if v == room_id:
k_location = node_locations[scan][k]
distance = get_distance(stop_location, k_location)
if distance >= 3.0 and distance > max_distance:
max_distance = distance
max_node = k
if max_node:
print(scan, stop, room_name, room_id)
counter += 1
print(max_node, max_distance)
print()
new_i = i.copy()
new_i['start'] = max_node
new_i['stop'] = stop
new_data.append(new_i)
print(counter)
with open('../datasets/REVERIE/annotations/new_REVERIE_val_unseen_instr.json', 'w') as fp:
json.dump(new_data, fp)