-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Add VQA V2.0 and Visual Dialog V0.9. #54
Changes from all commits
75ca32d
7511160
a134a3d
911740c
a5dd2d3
75d7145
63e7ae7
6255107
7abd701
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Copyright (c) 2017-present, Facebook, Inc. | ||
# All rights reserved. | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. An additional grant | ||
# of patent rights can be found in the PATENTS file in the same directory. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# Copyright (c) 2017-present, Facebook, Inc. | ||
# All rights reserved. | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. An additional grant | ||
# of patent rights can be found in the PATENTS file in the same directory. | ||
|
||
from parlai.core.dialog_teacher import DialogTeacher | ||
from .build import build, buildImage | ||
|
||
from PIL import Image | ||
import json | ||
import random | ||
import os | ||
|
||
def _path(opt): | ||
build(opt) | ||
buildImage(opt) | ||
dt = opt['datatype'].split(':')[0] | ||
|
||
if dt == 'train': | ||
suffix = 'train' | ||
img_suffix = os.path.join('train2014', 'COCO_train2014_') | ||
elif dt == 'valid': | ||
suffix = 'val' | ||
img_suffix = os.path.join('val2014', 'COCO_val2014_') | ||
else: | ||
raise RuntimeError('Not valid datatype.') | ||
|
||
data_path = os.path.join(opt['datapath'], 'VisDial-v0.9', | ||
'visdial_0.9_' + suffix + '.json') | ||
|
||
image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix) | ||
|
||
return data_path, image_path | ||
|
||
|
||
def _image_loader(path): | ||
""" | ||
Loads the appropriate image from the image_id and returns PIL Image format. | ||
""" | ||
return Image.open(path).convert('RGB') | ||
|
||
|
||
class DefaultTeacher(DialogTeacher): | ||
""" | ||
This version of VisDial inherits from the core Dialog Teacher, which just | ||
requires it to define an iterator over its data `setup_data` in order to | ||
inherit basic metrics, a `act` function, and enables | ||
Hogwild training with shared memory with no extra work. | ||
""" | ||
def __init__(self, opt, shared=None): | ||
|
||
self.datatype = opt['datatype'] | ||
data_path, self.image_path = _path(opt) | ||
opt['datafile'] = data_path | ||
self.id = 'visdial' | ||
|
||
super().__init__(opt, shared) | ||
|
||
def setup_data(self, path): | ||
print('loading: ' + path) | ||
with open(path) as data_file: | ||
self.visdial = json.load(data_file) | ||
|
||
self.questions = self.visdial['data']['questions'] | ||
self.answers = self.visdial['data']['answers'] | ||
|
||
for dialog in self.visdial['data']['dialogs']: | ||
# for each dialog | ||
image_id = dialog['image_id'] | ||
caption = dialog['caption'] | ||
img_path = self.image_path + '%012d.jpg' % (image_id) | ||
|
||
episode_done = False | ||
for i, qa in enumerate(dialog['dialog']): | ||
if i == len(dialog['dialog']): | ||
episode_done = True | ||
# for each question answer pair. | ||
question = self.questions[qa['question']] | ||
answer = [self.answers[qa['answer']]] | ||
answer_options = [] | ||
for ans_id in qa['answer_options']: | ||
answer_options.append(self.answers[ans_id]) | ||
#answer_options = qa['answer_options'] | ||
gt_index = qa['gt_index'] | ||
yield (question, answer, 'None', answer_options, img_path), True |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Copyright (c) 2017-present, Facebook, Inc. | ||
# All rights reserved. | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. An additional grant | ||
# of patent rights can be found in the PATENTS file in the same directory. | ||
# Download and build the data if it does not exist. | ||
|
||
import parlai.core.build_data as build_data | ||
import os | ||
|
||
|
||
def buildImage(opt): | ||
dpath = os.path.join(opt['datapath'], 'COCO-IMG') | ||
|
||
if not build_data.built(dpath): | ||
print('[building image data: ' + dpath + ']') | ||
build_data.remove_dir(dpath) | ||
build_data.make_dir(dpath) | ||
|
||
# download the image data. | ||
fname1 = 'train2014.zip' | ||
fname2 = 'val2014.zip' | ||
fname3 = 'test2014.zip' | ||
|
||
url = 'http://msvocds.blob.core.windows.net/coco2014/' | ||
|
||
build_data.download(dpath, url + fname1) | ||
build_data.download(dpath, url + fname2) | ||
build_data.download(dpath, url + fname3) | ||
|
||
build_data.untar(dpath, fname1, False) | ||
build_data.untar(dpath, fname2, False) | ||
build_data.untar(dpath, fname3, False) | ||
|
||
# Mark the data as built. | ||
build_data.mark_done(dpath) | ||
|
||
|
||
def build(opt): | ||
dpath = os.path.join(opt['datapath'], 'VisDial-v0.9') | ||
|
||
if not build_data.built(dpath): | ||
print('[building data: ' + dpath + ']') | ||
build_data.remove_dir(dpath) | ||
build_data.make_dir(dpath) | ||
|
||
# Download the data. | ||
fname1 = 'visdial_0.9_train.zip' | ||
fname2 = 'visdial_0.9_val.zip' | ||
|
||
url = 'https://computing.ece.vt.edu/~abhshkdz/data/visdial/' | ||
build_data.download(dpath, url + fname1) | ||
build_data.download(dpath, url + fname2) | ||
|
||
|
||
build_data.untar(dpath, fname1) | ||
build_data.untar(dpath, fname2) | ||
|
||
# Mark the data as built. | ||
build_data.mark_done(dpath) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,9 +12,9 @@ | |
import random | ||
import os | ||
|
||
|
||
def _path(opt): | ||
build(opt) | ||
buildImage(opt) | ||
dt = opt['datatype'].split(':')[0] | ||
|
||
if dt == 'train': | ||
|
@@ -36,7 +36,7 @@ def _path(opt): | |
annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014', | ||
annotation_suffix + '_annotations.json') | ||
|
||
image_path = os.path.join(opt['datapath'], 'VQA-COCO2014', img_suffix) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually, we'd like to keep the images in datapath There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. Then I think we might not want the image stay in 'VQA-COCO2014' folder, other task such as Visual Dialog may also use the COCO image. How about put the image under There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds great! Yeah giving it the most general name for the data file is perfect, and then multiple tasks can depend on it (and it won't rebuild it if it's already there). |
||
image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix) | ||
|
||
return data_path, annotation_path, image_path | ||
|
||
|
@@ -100,7 +100,7 @@ def act(self): | |
self.episode_idx = (self.episode_idx + self.step_size) % len(self) | ||
if self.episode_idx == len(self) - self.step_size: | ||
self.epochDone = True | ||
# always showing the same index now. | ||
|
||
qa = self.ques['questions'][self.episode_idx] | ||
question = qa['question'] | ||
image_id = qa['image_id'] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Copyright (c) 2017-present, Facebook, Inc. | ||
# All rights reserved. | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. An additional grant | ||
# of patent rights can be found in the PATENTS file in the same directory. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
# Copyright (c) 2017-present, Facebook, Inc. | ||
# All rights reserved. | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. An additional grant | ||
# of patent rights can be found in the PATENTS file in the same directory. | ||
|
||
from parlai.core.agents import Teacher | ||
from .build import build, buildImage | ||
|
||
from PIL import Image | ||
import json | ||
import random | ||
import os | ||
import pdb | ||
|
||
def _path(opt): | ||
build(opt) | ||
buildImage(opt) | ||
dt = opt['datatype'].split(':')[0] | ||
|
||
if dt == 'train': | ||
ques_suffix = 'v2_OpenEnded_mscoco_train2014' | ||
annotation_suffix = 'v2_mscoco_train2014' | ||
img_suffix = os.path.join('train2014', 'COCO_train2014_') | ||
elif dt == 'valid': | ||
ques_suffix = 'v2_OpenEnded_mscoco_val2014' | ||
annotation_suffix = 'v2_mscoco_val2014' | ||
img_suffix = os.path.join('val2014', 'COCO_val2014_') | ||
else: | ||
ques_suffix = 'v2_OpenEnded_mscoco_test2015' | ||
annotation_suffix = 'None' | ||
img_suffix = os.path.join('test2014', 'COCO_test2014_') | ||
|
||
data_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2', | ||
ques_suffix + '_questions.json') | ||
|
||
annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2', | ||
annotation_suffix + '_annotations.json') | ||
|
||
image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix) | ||
|
||
return data_path, annotation_path, image_path | ||
|
||
|
||
def _image_loader(opt, path): | ||
""" | ||
Loads the appropriate image from the image_id and returns PIL Image format. | ||
""" | ||
if not opt.get('no_images', False): | ||
return Image.open(path).convert('RGB') | ||
else: | ||
return None | ||
|
||
|
||
class OeTeacher(Teacher): | ||
""" | ||
VQA v2.0 Open-Ended teacher, which loads the json vqa data and implements its | ||
own `act` method for interacting with student agent. | ||
agent. | ||
""" | ||
def __init__(self, opt, shared=None): | ||
super().__init__(opt) | ||
self.datatype = opt['datatype'] | ||
data_path, annotation_path, self.image_path = _path(opt) | ||
|
||
if shared and 'ques' in shared: | ||
self.ques = shared['ques'] | ||
if 'annotation' in shared: | ||
self.annotation = shared['annotation'] | ||
else: | ||
self._setup_data(data_path, annotation_path) | ||
|
||
|
||
# for ordered data in batch mode (especially, for validation and | ||
# testing), each teacher in the batch gets a start index and a step | ||
# size so they all process disparate sets of the data | ||
self.step_size = opt.get('batchsize', 1) | ||
self.data_offset = opt.get('batchindex', 0) | ||
|
||
self.reset() | ||
|
||
def __len__(self): | ||
return self.len | ||
|
||
def reset(self): | ||
# Reset the dialog so that it is at the start of the epoch, | ||
# and all metrics are reset. | ||
super().reset() | ||
self.lastY = None | ||
self.episode_idx = self.data_offset - self.step_size | ||
|
||
def observe(self, observation): | ||
"""Process observation for metrics.""" | ||
if self.lastY is not None: | ||
loss = self.metrics.update(observation, self.lastY) | ||
self.lastY = None | ||
return observation | ||
|
||
def act(self): | ||
if self.datatype == 'train': | ||
self.episode_idx = random.randrange(self.len) | ||
else: | ||
self.episode_idx = (self.episode_idx + 1) % self.len | ||
|
||
qa = self.ques['questions'][self.episode_idx] | ||
question = qa['question'] | ||
image_id = qa['image_id'] | ||
|
||
img_path = self.image_path + '%012d.jpg' % (image_id) | ||
|
||
action = { | ||
'image': _image_loader(self.opt, img_path), | ||
'text': question, | ||
'episode_done': True | ||
} | ||
|
||
if not self.datatype.startswith('test'): | ||
anno = self.annotation['annotations'][self.episode_idx] | ||
self.lastY = [ans['answer'] for ans in anno['answers']] | ||
|
||
if self.datatype.startswith('train'): | ||
action['labels'] = self.lastY | ||
|
||
return action | ||
|
||
def share(self): | ||
shared = super().share() | ||
shared['ques'] = self.ques | ||
if hasattr(self, 'annotation'): | ||
shared['annotation'] = self.annotation | ||
return shared | ||
|
||
def _setup_data(self, data_path, annotation_path): | ||
print('loading: ' + data_path) | ||
with open(data_path) as data_file: | ||
self.ques = json.load(data_file) | ||
|
||
if self.datatype != 'test': | ||
print('loading: ' + annotation_path) | ||
with open(annotation_path) as data_file: | ||
self.annotation = json.load(data_file) | ||
|
||
self.len = len(self.ques['questions']) | ||
|
||
class DefaultTeacher(OeTeacher): | ||
pass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does v2 have a multiple-choice version? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, VQA v2.0 doesn't have the multiple-choice now. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah good catch thank you
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
:)