From 25885375050f5192115aabf140d8087150057a19 Mon Sep 17 00:00:00 2001 From: Yicong Hong Date: Thu, 14 Jan 2021 16:15:01 +1100 Subject: [PATCH] update readme --- README.md | 37 +++++++++++----------------- r2r_src/vlnbert/vlnbert_OSCAR.py | 2 -- r2r_src/vlnbert/vlnbert_PREVALENT.py | 1 - r2r_src/vlnbert/vlnbert_init.py | 3 --- 4 files changed, 15 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 66d93ee..20c8dbe 100644 --- a/README.md +++ b/README.md @@ -1,45 +1,38 @@ -# Entity-Graph-VLN +# Recurrent-VLN-BERT -Code of the NeurIPS 2020 paper: -**Language and Visual Entity Relationship Graph for Agent Navigation**
-[**Yicong Hong**](http://www.yiconghong.me/), [Cristian Rodriguez-Opazo](https://crodriguezo.github.io/), [Yuankai Qi](https://sites.google.com/site/yuankiqi/home), [Qi Wu](http://www.qi-wu.me/), [Stephen Gould](http://users.cecs.anu.edu.au/~sgould/)
+Code of the Recurrent-VLN-BERT paper: +**A Recurrent Vision-and-Language BERT for Navigation**
+[**Yicong Hong**](http://www.yiconghong.me/), [Qi Wu](http://www.qi-wu.me/), [Yuankai Qi](https://sites.google.com/site/yuankiqi/home), [Cristian Rodriguez-Opazo](https://crodriguezo.github.io/), [Stephen Gould](http://users.cecs.anu.edu.au/~sgould/)
-[[Paper](https://papers.nips.cc/paper/2020/hash/56dc0997d871e9177069bb472574eb29-Abstract.html)] [[Supplemental](https://papers.nips.cc/paper/2020/file/56dc0997d871e9177069bb472574eb29-Supplemental.pdf)] [[GitHub](https://github.com/YicongHong/Entity-Graph-VLN)] - -

- -

+[[Paper & Appendices](https://arxiv.org/abs/2011.13922) | [GitHub](https://github.com/YicongHong/Entity-Graph-VLN)] ## Prerequisites ### Installation Install the [Matterport3D Simulator](https://github.com/peteanderson80/Matterport3DSimulator). +Please find the versions of packages in our environment [here](https://github.com/YicongHong/Recurrent-VLN-BERT/blob/main/recurrent-vln-bert.yml). -Please find the versions of packages in our environment [here](https://github.com/YicongHong/Entity-Graph-VLN/blob/master/entity_graph_vln.yml). In particular, we use: -- Python 3.6.9 -- NumPy 1.18.1 -- OpenCV 3.4.2 -- PyTorch 1.3.0 -- Torchvision 0.4.1 +Install the [Pytorch-Transformers](https://github.com/huggingface/transformers). +In particular, we use [this version](https://github.com/huggingface/transformers/tree/067923d3267325f525f4e46f357360c191ba562e) (same as [OSCAR](https://github.com/microsoft/Oscar)) in our experiments. ### Data Preparation Please follow the instructions below to prepare the data in directories: -- `connectivity` +- MP3D navigability graphs: `connectivity` - Download the [connectivity maps [23.8MB]](https://github.com/peteanderson80/Matterport3DSimulator/tree/master/connectivity). -- `data` +- R2R data: `data` - Download the [R2R data [5.8MB]](https://github.com/peteanderson80/Matterport3DSimulator/tree/master/tasks/R2R/data). - - Download the vocabulary and the [augmented data from EnvDrop [79.5MB]](https://github.com/airsplay/R2R-EnvDrop/tree/master/tasks/R2R/data). -- `img_features` +- Augmented data: `data/prevalent` + - Download the [collected triplets in PREVALENT [1.5GB]](https://zenodo.org/record/4437864/files/prevalent_aug.json?download=1) (pre-processed for easy use). +- MP3D image features: `img_features` - Download the [Scene features [4.2GB]](https://www.dropbox.com/s/85tpa6tc3enl5ud/ResNet-152-places365.zip?dl=1) (ResNet-152-Places365). - - Download the pre-processed [Object features and vocabulary [1.3GB]](https://zenodo.org/record/4310441/files/objects.zip?download=1) ([Caffe Faster-RCNN](https://github.com/peteanderson80/bottom-up-attention)). ### Trained Network Weights -- `snap` - - Download the trained [network weights [146.0MB]](https://zenodo.org/record/4310441/files/snap.zip?download=1) +- Recurrent-VLN-BERT: `snap` + - Download the [trained network weights [2.5GB]](https://zenodo.org/record/4437864/files/snap.zip?download=1) for our OSCAR-based and PREVALENT-based models. ## R2R Navigation diff --git a/r2r_src/vlnbert/vlnbert_OSCAR.py b/r2r_src/vlnbert/vlnbert_OSCAR.py index 77a263e..c6bd2c1 100644 --- a/r2r_src/vlnbert/vlnbert_OSCAR.py +++ b/r2r_src/vlnbert/vlnbert_OSCAR.py @@ -9,8 +9,6 @@ from torch import nn import torch.nn.functional as F from torch.nn import CrossEntropyLoss, MSELoss -import sys -sys.path.append('Oscar/Oscar') from transformers.pytorch_transformers.modeling_bert import (BertEmbeddings, BertSelfAttention, BertAttention, BertEncoder, BertLayer, BertSelfOutput, BertIntermediate, BertOutput, diff --git a/r2r_src/vlnbert/vlnbert_PREVALENT.py b/r2r_src/vlnbert/vlnbert_PREVALENT.py index f5daa3e..84ee44f 100644 --- a/r2r_src/vlnbert/vlnbert_PREVALENT.py +++ b/r2r_src/vlnbert/vlnbert_PREVALENT.py @@ -14,7 +14,6 @@ import torch from torch import nn from torch.nn import CrossEntropyLoss, MSELoss -sys.path.append('Oscar/Oscar') from transformers.pytorch_transformers.modeling_bert import BertPreTrainedModel, BertConfig import pdb diff --git a/r2r_src/vlnbert/vlnbert_init.py b/r2r_src/vlnbert/vlnbert_init.py index 80a7181..3d423a8 100644 --- a/r2r_src/vlnbert/vlnbert_init.py +++ b/r2r_src/vlnbert/vlnbert_init.py @@ -1,8 +1,5 @@ # Recurrent VLN-BERT, 2020, by Yicong.Hong@anu.edu.au -import sys -sys.path.append('Oscar/Oscar') - from transformers.pytorch_transformers import (BertConfig, BertTokenizer) def get_tokenizer(args):