diff --git a/parlai/core/dialog_teacher.py b/parlai/core/dialog_teacher.py index 658f2048785..2f75d2d7fcf 100644 --- a/parlai/core/dialog_teacher.py +++ b/parlai/core/dialog_teacher.py @@ -265,7 +265,7 @@ def get(self, episode_idx, entry_idx=0): table['reward'] = entry[2] if len(entry) > 3: table['label_candidates'] = entry[3] - if len(entry) > 4 and not opt.get('no_images', False): + if len(entry) > 4 and not self.opt.get('no_images', False): table['image'] = load_image(self.opt, entry[4]) diff --git a/parlai/tasks/visdial/__init__.py b/parlai/tasks/visdial/__init__.py new file mode 100644 index 00000000000..de7579ee4a2 --- /dev/null +++ b/parlai/tasks/visdial/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. diff --git a/parlai/tasks/visdial/agents.py b/parlai/tasks/visdial/agents.py new file mode 100644 index 00000000000..6ce211a8f46 --- /dev/null +++ b/parlai/tasks/visdial/agents.py @@ -0,0 +1,86 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +from parlai.core.dialog_teacher import DialogTeacher +from .build import build, buildImage + +from PIL import Image +import json +import random +import os + +def _path(opt): + build(opt) + buildImage(opt) + dt = opt['datatype'].split(':')[0] + + if dt == 'train': + suffix = 'train' + img_suffix = os.path.join('train2014', 'COCO_train2014_') + elif dt == 'valid': + suffix = 'val' + img_suffix = os.path.join('val2014', 'COCO_val2014_') + else: + raise RuntimeError('Not valid datatype.') + + data_path = os.path.join(opt['datapath'], 'VisDial-v0.9', + 'visdial_0.9_' + suffix + '.json') + + image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix) + + return data_path, image_path + + +def _image_loader(path): + """ + Loads the appropriate image from the image_id and returns PIL Image format. + """ + return Image.open(path).convert('RGB') + + +class DefaultTeacher(DialogTeacher): + """ + This version of VisDial inherits from the core Dialog Teacher, which just + requires it to define an iterator over its data `setup_data` in order to + inherit basic metrics, a `act` function, and enables + Hogwild training with shared memory with no extra work. + """ + def __init__(self, opt, shared=None): + + self.datatype = opt['datatype'] + data_path, self.image_path = _path(opt) + opt['datafile'] = data_path + self.id = 'visdial' + + super().__init__(opt, shared) + + def setup_data(self, path): + print('loading: ' + path) + with open(path) as data_file: + self.visdial = json.load(data_file) + + self.questions = self.visdial['data']['questions'] + self.answers = self.visdial['data']['answers'] + + for dialog in self.visdial['data']['dialogs']: + # for each dialog + image_id = dialog['image_id'] + caption = dialog['caption'] + img_path = self.image_path + '%012d.jpg' % (image_id) + + episode_done = False + for i, qa in enumerate(dialog['dialog']): + if i == len(dialog['dialog']): + episode_done = True + # for each question answer pair. + question = self.questions[qa['question']] + answer = [self.answers[qa['answer']]] + answer_options = [] + for ans_id in qa['answer_options']: + answer_options.append(self.answers[ans_id]) + #answer_options = qa['answer_options'] + gt_index = qa['gt_index'] + yield (question, answer, 'None', answer_options, img_path), True diff --git a/parlai/tasks/visdial/build.py b/parlai/tasks/visdial/build.py new file mode 100644 index 00000000000..c369a418e2c --- /dev/null +++ b/parlai/tasks/visdial/build.py @@ -0,0 +1,60 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. +# Download and build the data if it does not exist. + +import parlai.core.build_data as build_data +import os + + +def buildImage(opt): + dpath = os.path.join(opt['datapath'], 'COCO-IMG') + + if not build_data.built(dpath): + print('[building image data: ' + dpath + ']') + build_data.remove_dir(dpath) + build_data.make_dir(dpath) + + # download the image data. + fname1 = 'train2014.zip' + fname2 = 'val2014.zip' + fname3 = 'test2014.zip' + + url = 'http://msvocds.blob.core.windows.net/coco2014/' + + build_data.download(dpath, url + fname1) + build_data.download(dpath, url + fname2) + build_data.download(dpath, url + fname3) + + build_data.untar(dpath, fname1, False) + build_data.untar(dpath, fname2, False) + build_data.untar(dpath, fname3, False) + + # Mark the data as built. + build_data.mark_done(dpath) + + +def build(opt): + dpath = os.path.join(opt['datapath'], 'VisDial-v0.9') + + if not build_data.built(dpath): + print('[building data: ' + dpath + ']') + build_data.remove_dir(dpath) + build_data.make_dir(dpath) + + # Download the data. + fname1 = 'visdial_0.9_train.zip' + fname2 = 'visdial_0.9_val.zip' + + url = 'https://computing.ece.vt.edu/~abhshkdz/data/visdial/' + build_data.download(dpath, url + fname1) + build_data.download(dpath, url + fname2) + + + build_data.untar(dpath, fname1) + build_data.untar(dpath, fname2) + + # Mark the data as built. + build_data.mark_done(dpath) diff --git a/parlai/tasks/vqa_coco2014/agents.py b/parlai/tasks/vqa_coco2014/agents.py index f73c914b678..2ba29ca5514 100644 --- a/parlai/tasks/vqa_coco2014/agents.py +++ b/parlai/tasks/vqa_coco2014/agents.py @@ -12,9 +12,9 @@ import random import os - def _path(opt): build(opt) + buildImage(opt) dt = opt['datatype'].split(':')[0] if dt == 'train': @@ -36,7 +36,7 @@ def _path(opt): annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014', annotation_suffix + '_annotations.json') - image_path = os.path.join(opt['datapath'], 'VQA-COCO2014', img_suffix) + image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix) return data_path, annotation_path, image_path @@ -100,7 +100,7 @@ def act(self): self.episode_idx = (self.episode_idx + self.step_size) % len(self) if self.episode_idx == len(self) - self.step_size: self.epochDone = True - # always showing the same index now. + qa = self.ques['questions'][self.episode_idx] question = qa['question'] image_id = qa['image_id'] diff --git a/parlai/tasks/vqa_coco2014/build.py b/parlai/tasks/vqa_coco2014/build.py index 8666689a0d8..8fe6d9dc7f4 100644 --- a/parlai/tasks/vqa_coco2014/build.py +++ b/parlai/tasks/vqa_coco2014/build.py @@ -9,22 +9,30 @@ import os -def buildImage(dpath): - print('[building image data: ' + dpath + ']') - # download the image data. - fname1 = 'train2014.zip' - fname2 = 'val2014.zip' - fname3 = 'test2014.zip' +def buildImage(opt): + dpath = os.path.join(opt['datapath'], 'COCO-IMG') - url = 'http://msvocds.blob.core.windows.net/coco2014/' + if not build_data.built(dpath): + print('[building image data: ' + dpath + ']') + build_data.remove_dir(dpath) + build_data.make_dir(dpath) + # download the image data. + fname1 = 'train2014.zip' + fname2 = 'val2014.zip' + fname3 = 'test2014.zip' - build_data.download(os.path.join(dpath, fname1), url + fname1, False) - build_data.download(os.path.join(dpath, fname2), url + fname2, False) - build_data.download(os.path.join(dpath, fname3), url + fname3, False) + url = 'http://msvocds.blob.core.windows.net/coco2014/' - build_data.untar(dpath, fname1) - build_data.untar(dpath, fname2) - build_data.untar(dpath, fname3) + build_data.download(dpath, url + fname1) + build_data.download(dpath, url + fname2) + build_data.download(dpath, url + fname3) + + build_data.untar(dpath, fname1, False) + build_data.untar(dpath, fname2, False) + build_data.untar(dpath, fname3, False) + + # Mark the data as built. + build_data.mark_done(dpath) @@ -61,7 +69,5 @@ def build(opt): build_data.untar(dpath, fname4) build_data.untar(dpath, fname5) - buildImage(dpath) - # Mark the data as built. build_data.mark_done(dpath) diff --git a/parlai/tasks/vqa_coco2014_v2/__init__.py b/parlai/tasks/vqa_coco2014_v2/__init__.py new file mode 100644 index 00000000000..8eff276d72d --- /dev/null +++ b/parlai/tasks/vqa_coco2014_v2/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. \ No newline at end of file diff --git a/parlai/tasks/vqa_coco2014_v2/agents.py b/parlai/tasks/vqa_coco2014_v2/agents.py new file mode 100644 index 00000000000..7ee0624dd4d --- /dev/null +++ b/parlai/tasks/vqa_coco2014_v2/agents.py @@ -0,0 +1,146 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +from parlai.core.agents import Teacher +from .build import build, buildImage + +from PIL import Image +import json +import random +import os +import pdb + +def _path(opt): + build(opt) + buildImage(opt) + dt = opt['datatype'].split(':')[0] + + if dt == 'train': + ques_suffix = 'v2_OpenEnded_mscoco_train2014' + annotation_suffix = 'v2_mscoco_train2014' + img_suffix = os.path.join('train2014', 'COCO_train2014_') + elif dt == 'valid': + ques_suffix = 'v2_OpenEnded_mscoco_val2014' + annotation_suffix = 'v2_mscoco_val2014' + img_suffix = os.path.join('val2014', 'COCO_val2014_') + else: + ques_suffix = 'v2_OpenEnded_mscoco_test2015' + annotation_suffix = 'None' + img_suffix = os.path.join('test2014', 'COCO_test2014_') + + data_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2', + ques_suffix + '_questions.json') + + annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2', + annotation_suffix + '_annotations.json') + + image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix) + + return data_path, annotation_path, image_path + + +def _image_loader(opt, path): + """ + Loads the appropriate image from the image_id and returns PIL Image format. + """ + if not opt.get('no_images', False): + return Image.open(path).convert('RGB') + else: + return None + + +class OeTeacher(Teacher): + """ + VQA v2.0 Open-Ended teacher, which loads the json vqa data and implements its + own `act` method for interacting with student agent. + agent. + """ + def __init__(self, opt, shared=None): + super().__init__(opt) + self.datatype = opt['datatype'] + data_path, annotation_path, self.image_path = _path(opt) + + if shared and 'ques' in shared: + self.ques = shared['ques'] + if 'annotation' in shared: + self.annotation = shared['annotation'] + else: + self._setup_data(data_path, annotation_path) + + + # for ordered data in batch mode (especially, for validation and + # testing), each teacher in the batch gets a start index and a step + # size so they all process disparate sets of the data + self.step_size = opt.get('batchsize', 1) + self.data_offset = opt.get('batchindex', 0) + + self.reset() + + def __len__(self): + return self.len + + def reset(self): + # Reset the dialog so that it is at the start of the epoch, + # and all metrics are reset. + super().reset() + self.lastY = None + self.episode_idx = self.data_offset - self.step_size + + def observe(self, observation): + """Process observation for metrics.""" + if self.lastY is not None: + loss = self.metrics.update(observation, self.lastY) + self.lastY = None + return observation + + def act(self): + if self.datatype == 'train': + self.episode_idx = random.randrange(self.len) + else: + self.episode_idx = (self.episode_idx + 1) % self.len + + qa = self.ques['questions'][self.episode_idx] + question = qa['question'] + image_id = qa['image_id'] + + img_path = self.image_path + '%012d.jpg' % (image_id) + + action = { + 'image': _image_loader(self.opt, img_path), + 'text': question, + 'episode_done': True + } + + if not self.datatype.startswith('test'): + anno = self.annotation['annotations'][self.episode_idx] + self.lastY = [ans['answer'] for ans in anno['answers']] + + if self.datatype.startswith('train'): + action['labels'] = self.lastY + + return action + + def share(self): + shared = super().share() + shared['ques'] = self.ques + if hasattr(self, 'annotation'): + shared['annotation'] = self.annotation + return shared + + def _setup_data(self, data_path, annotation_path): + print('loading: ' + data_path) + with open(data_path) as data_file: + self.ques = json.load(data_file) + + if self.datatype != 'test': + print('loading: ' + annotation_path) + with open(annotation_path) as data_file: + self.annotation = json.load(data_file) + + self.len = len(self.ques['questions']) + +class DefaultTeacher(OeTeacher): + pass diff --git a/parlai/tasks/vqa_coco2014_v2/build.py b/parlai/tasks/vqa_coco2014_v2/build.py new file mode 100644 index 00000000000..39898c76bcf --- /dev/null +++ b/parlai/tasks/vqa_coco2014_v2/build.py @@ -0,0 +1,70 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. +# Download and build the data if it does not exist. + +import parlai.core.build_data as build_data +import os + + +def buildImage(opt): + dpath = os.path.join(opt['datapath'], 'COCO-IMG') + + if not build_data.built(dpath): + print('[building image data: ' + dpath + ']') + build_data.remove_dir(dpath) + build_data.make_dir(dpath) + # download the image data. + fname1 = 'train2014.zip' + fname2 = 'val2014.zip' + fname3 = 'test2014.zip' + + url = 'http://msvocds.blob.core.windows.net/coco2014/' + + build_data.download(dpath, url + fname1) + build_data.download(dpath, url + fname2) + build_data.download(dpath, url + fname3) + + build_data.untar(dpath, fname1, False) + build_data.untar(dpath, fname2, False) + build_data.untar(dpath, fname3, False) + + # Mark the data as built. + build_data.mark_done(dpath) + + + +def build(opt): + dpath = os.path.join(opt['datapath'], 'VQA-COCO2014-v2') + + if not build_data.built(dpath): + print('[building data: ' + dpath + ']') + build_data.remove_dir(dpath) + build_data.make_dir(dpath) + + # Download the data. + fname1 = 'v2_Questions_Train_mscoco.zip' + fname2 = 'v2_Questions_Val_mscoco.zip' + fname3 = 'v2_Questions_Test_mscoco.zip' + + fname4 = 'v2_Annotations_Val_mscoco.zip' + fname5 = 'v2_Annotations_Train_mscoco.zip' + + url = 'http://visualqa.org/data/mscoco/vqa/' + build_data.download(dpath, url + fname1) + build_data.download(dpath, url + fname2) + build_data.download(dpath, url + fname3) + + build_data.download(dpath, url + fname4) + build_data.download(dpath, url + fname5) + + build_data.untar(dpath, fname1) + build_data.untar(dpath, fname2) + build_data.untar(dpath, fname3) + build_data.untar(dpath, fname4) + build_data.untar(dpath, fname5) + + # Mark the data as built. + build_data.mark_done(dpath)