Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Add VQA V2.0 and Visual Dialog V0.9. #54

Merged
merged 9 commits into from
May 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion parlai/core/dialog_teacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def get(self, episode_idx, entry_idx=0):
table['reward'] = entry[2]
if len(entry) > 3:
table['label_candidates'] = entry[3]
if len(entry) > 4 and not opt.get('no_images', False):
if len(entry) > 4 and not self.opt.get('no_images', False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah good catch thank you

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:)

table['image'] = load_image(self.opt, entry[4])


Expand Down
5 changes: 5 additions & 0 deletions parlai/tasks/visdial/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
86 changes: 86 additions & 0 deletions parlai/tasks/visdial/agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

from parlai.core.dialog_teacher import DialogTeacher
from .build import build, buildImage

from PIL import Image
import json
import random
import os

def _path(opt):
build(opt)
buildImage(opt)
dt = opt['datatype'].split(':')[0]

if dt == 'train':
suffix = 'train'
img_suffix = os.path.join('train2014', 'COCO_train2014_')
elif dt == 'valid':
suffix = 'val'
img_suffix = os.path.join('val2014', 'COCO_val2014_')
else:
raise RuntimeError('Not valid datatype.')

data_path = os.path.join(opt['datapath'], 'VisDial-v0.9',
'visdial_0.9_' + suffix + '.json')

image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)

return data_path, image_path


def _image_loader(path):
"""
Loads the appropriate image from the image_id and returns PIL Image format.
"""
return Image.open(path).convert('RGB')


class DefaultTeacher(DialogTeacher):
"""
This version of VisDial inherits from the core Dialog Teacher, which just
requires it to define an iterator over its data `setup_data` in order to
inherit basic metrics, a `act` function, and enables
Hogwild training with shared memory with no extra work.
"""
def __init__(self, opt, shared=None):

self.datatype = opt['datatype']
data_path, self.image_path = _path(opt)
opt['datafile'] = data_path
self.id = 'visdial'

super().__init__(opt, shared)

def setup_data(self, path):
print('loading: ' + path)
with open(path) as data_file:
self.visdial = json.load(data_file)

self.questions = self.visdial['data']['questions']
self.answers = self.visdial['data']['answers']

for dialog in self.visdial['data']['dialogs']:
# for each dialog
image_id = dialog['image_id']
caption = dialog['caption']
img_path = self.image_path + '%012d.jpg' % (image_id)

episode_done = False
for i, qa in enumerate(dialog['dialog']):
if i == len(dialog['dialog']):
episode_done = True
# for each question answer pair.
question = self.questions[qa['question']]
answer = [self.answers[qa['answer']]]
answer_options = []
for ans_id in qa['answer_options']:
answer_options.append(self.answers[ans_id])
#answer_options = qa['answer_options']
gt_index = qa['gt_index']
yield (question, answer, 'None', answer_options, img_path), True
60 changes: 60 additions & 0 deletions parlai/tasks/visdial/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os


def buildImage(opt):
dpath = os.path.join(opt['datapath'], 'COCO-IMG')

if not build_data.built(dpath):
print('[building image data: ' + dpath + ']')
build_data.remove_dir(dpath)
build_data.make_dir(dpath)

# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'

url = 'http://msvocds.blob.core.windows.net/coco2014/'

build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)
build_data.download(dpath, url + fname3)

build_data.untar(dpath, fname1, False)
build_data.untar(dpath, fname2, False)
build_data.untar(dpath, fname3, False)

# Mark the data as built.
build_data.mark_done(dpath)


def build(opt):
dpath = os.path.join(opt['datapath'], 'VisDial-v0.9')

if not build_data.built(dpath):
print('[building data: ' + dpath + ']')
build_data.remove_dir(dpath)
build_data.make_dir(dpath)

# Download the data.
fname1 = 'visdial_0.9_train.zip'
fname2 = 'visdial_0.9_val.zip'

url = 'https://computing.ece.vt.edu/~abhshkdz/data/visdial/'
build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)


build_data.untar(dpath, fname1)
build_data.untar(dpath, fname2)

# Mark the data as built.
build_data.mark_done(dpath)
6 changes: 3 additions & 3 deletions parlai/tasks/vqa_coco2014/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import random
import os


def _path(opt):
build(opt)
buildImage(opt)
dt = opt['datatype'].split(':')[0]

if dt == 'train':
Expand All @@ -36,7 +36,7 @@ def _path(opt):
annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014',
annotation_suffix + '_annotations.json')

image_path = os.path.join(opt['datapath'], 'VQA-COCO2014', img_suffix)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually, we'd like to keep the images in datapath

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Then I think we might not want the image stay in 'VQA-COCO2014' folder, other task such as Visual Dialog may also use the COCO image. How about put the image under COCO-IMG ? Then, multiple task can share the image data.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds great! Yeah giving it the most general name for the data file is perfect, and then multiple tasks can depend on it (and it won't rebuild it if it's already there).

image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)

return data_path, annotation_path, image_path

Expand Down Expand Up @@ -100,7 +100,7 @@ def act(self):
self.episode_idx = (self.episode_idx + self.step_size) % len(self)
if self.episode_idx == len(self) - self.step_size:
self.epochDone = True
# always showing the same index now.

qa = self.ques['questions'][self.episode_idx]
question = qa['question']
image_id = qa['image_id']
Expand Down
36 changes: 21 additions & 15 deletions parlai/tasks/vqa_coco2014/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,30 @@
import os


def buildImage(dpath):
print('[building image data: ' + dpath + ']')
# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'
def buildImage(opt):
dpath = os.path.join(opt['datapath'], 'COCO-IMG')

url = 'http://msvocds.blob.core.windows.net/coco2014/'
if not build_data.built(dpath):
print('[building image data: ' + dpath + ']')
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'

build_data.download(os.path.join(dpath, fname1), url + fname1, False)
build_data.download(os.path.join(dpath, fname2), url + fname2, False)
build_data.download(os.path.join(dpath, fname3), url + fname3, False)
url = 'http://msvocds.blob.core.windows.net/coco2014/'

build_data.untar(dpath, fname1)
build_data.untar(dpath, fname2)
build_data.untar(dpath, fname3)
build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)
build_data.download(dpath, url + fname3)

build_data.untar(dpath, fname1, False)
build_data.untar(dpath, fname2, False)
build_data.untar(dpath, fname3, False)

# Mark the data as built.
build_data.mark_done(dpath)



Expand Down Expand Up @@ -61,7 +69,5 @@ def build(opt):
build_data.untar(dpath, fname4)
build_data.untar(dpath, fname5)

buildImage(dpath)

# Mark the data as built.
build_data.mark_done(dpath)
5 changes: 5 additions & 0 deletions parlai/tasks/vqa_coco2014_v2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
146 changes: 146 additions & 0 deletions parlai/tasks/vqa_coco2014_v2/agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

from parlai.core.agents import Teacher
from .build import build, buildImage

from PIL import Image
import json
import random
import os
import pdb

def _path(opt):
build(opt)
buildImage(opt)
dt = opt['datatype'].split(':')[0]

if dt == 'train':
ques_suffix = 'v2_OpenEnded_mscoco_train2014'
annotation_suffix = 'v2_mscoco_train2014'
img_suffix = os.path.join('train2014', 'COCO_train2014_')
elif dt == 'valid':
ques_suffix = 'v2_OpenEnded_mscoco_val2014'
annotation_suffix = 'v2_mscoco_val2014'
img_suffix = os.path.join('val2014', 'COCO_val2014_')
else:
ques_suffix = 'v2_OpenEnded_mscoco_test2015'
annotation_suffix = 'None'
img_suffix = os.path.join('test2014', 'COCO_test2014_')

data_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2',
ques_suffix + '_questions.json')

annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2',
annotation_suffix + '_annotations.json')

image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)

return data_path, annotation_path, image_path


def _image_loader(opt, path):
"""
Loads the appropriate image from the image_id and returns PIL Image format.
"""
if not opt.get('no_images', False):
return Image.open(path).convert('RGB')
else:
return None


class OeTeacher(Teacher):
"""
VQA v2.0 Open-Ended teacher, which loads the json vqa data and implements its
own `act` method for interacting with student agent.
agent.
"""
def __init__(self, opt, shared=None):
super().__init__(opt)
self.datatype = opt['datatype']
data_path, annotation_path, self.image_path = _path(opt)

if shared and 'ques' in shared:
self.ques = shared['ques']
if 'annotation' in shared:
self.annotation = shared['annotation']
else:
self._setup_data(data_path, annotation_path)


# for ordered data in batch mode (especially, for validation and
# testing), each teacher in the batch gets a start index and a step
# size so they all process disparate sets of the data
self.step_size = opt.get('batchsize', 1)
self.data_offset = opt.get('batchindex', 0)

self.reset()

def __len__(self):
return self.len

def reset(self):
# Reset the dialog so that it is at the start of the epoch,
# and all metrics are reset.
super().reset()
self.lastY = None
self.episode_idx = self.data_offset - self.step_size

def observe(self, observation):
"""Process observation for metrics."""
if self.lastY is not None:
loss = self.metrics.update(observation, self.lastY)
self.lastY = None
return observation

def act(self):
if self.datatype == 'train':
self.episode_idx = random.randrange(self.len)
else:
self.episode_idx = (self.episode_idx + 1) % self.len

qa = self.ques['questions'][self.episode_idx]
question = qa['question']
image_id = qa['image_id']

img_path = self.image_path + '%012d.jpg' % (image_id)

action = {
'image': _image_loader(self.opt, img_path),
'text': question,
'episode_done': True
}

if not self.datatype.startswith('test'):
anno = self.annotation['annotations'][self.episode_idx]
self.lastY = [ans['answer'] for ans in anno['answers']]

if self.datatype.startswith('train'):
action['labels'] = self.lastY

return action

def share(self):
shared = super().share()
shared['ques'] = self.ques
if hasattr(self, 'annotation'):
shared['annotation'] = self.annotation
return shared

def _setup_data(self, data_path, annotation_path):
print('loading: ' + data_path)
with open(data_path) as data_file:
self.ques = json.load(data_file)

if self.datatype != 'test':
print('loading: ' + annotation_path)
with open(annotation_path) as data_file:
self.annotation = json.load(data_file)

self.len = len(self.ques['questions'])

class DefaultTeacher(OeTeacher):
pass
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does v2 have a multiple-choice version?

Copy link
Contributor Author

@jiasenlu jiasenlu May 11, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, VQA v2.0 doesn't have the multiple-choice now.

Loading