facebookresearch · alexholdenmiller · May 12, 2017 · May 8, 2017 · May 8, 2017 · May 8, 2017
diff --git a/parlai/core/dialog_teacher.py b/parlai/core/dialog_teacher.py
@@ -265,7 +265,7 @@ def get(self, episode_idx, entry_idx=0):
                 table['reward'] = entry[2]
                 if len(entry) > 3:
                     table['label_candidates'] = entry[3]
-                    if len(entry) > 4 and not opt.get('no_images', False):
+                    if len(entry) > 4 and not self.opt.get('no_images', False):
                         table['image'] = load_image(self.opt, entry[4])
 
 

diff --git a/parlai/tasks/visdial/__init__.py b/parlai/tasks/visdial/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
diff --git a/parlai/tasks/visdial/agents.py b/parlai/tasks/visdial/agents.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+
+from parlai.core.dialog_teacher import DialogTeacher
+from .build import build, buildImage
+
+from PIL import Image
+import json
+import random
+import os
+
+def _path(opt):
+    build(opt)
+    buildImage(opt)
+    dt = opt['datatype'].split(':')[0]
+
+    if dt == 'train':
+        suffix = 'train'
+        img_suffix = os.path.join('train2014', 'COCO_train2014_')
+    elif dt == 'valid':
+        suffix = 'val'
+        img_suffix = os.path.join('val2014', 'COCO_val2014_')
+    else:
+        raise RuntimeError('Not valid datatype.')
+
+    data_path = os.path.join(opt['datapath'], 'VisDial-v0.9',
+        'visdial_0.9_' + suffix + '.json')
+
+    image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)
+
+    return data_path, image_path
+
+
+def _image_loader(path):
+    """
+    Loads the appropriate image from the image_id and returns PIL Image format.
+    """
+    return Image.open(path).convert('RGB')
+
+
+class DefaultTeacher(DialogTeacher):
+    """
+    This version of VisDial inherits from the core Dialog Teacher, which just
+    requires it to define an iterator over its data `setup_data` in order to
+    inherit basic metrics, a `act` function, and enables
+    Hogwild training with shared memory with no extra work.
+    """
+    def __init__(self, opt, shared=None):
+
+        self.datatype = opt['datatype']
+        data_path, self.image_path = _path(opt)
+        opt['datafile'] = data_path
+        self.id = 'visdial'
+
+        super().__init__(opt, shared)
+
+    def setup_data(self, path):
+        print('loading: ' + path)
+        with open(path) as data_file:
+            self.visdial = json.load(data_file)
+
+        self.questions = self.visdial['data']['questions']
+        self.answers = self.visdial['data']['answers']
+
+        for dialog in self.visdial['data']['dialogs']:
+            # for each dialog
+            image_id = dialog['image_id']
+            caption = dialog['caption']
+            img_path = self.image_path + '%012d.jpg' % (image_id)
+
+            episode_done = False
+            for i, qa in enumerate(dialog['dialog']):
+                if i == len(dialog['dialog']):
+                    episode_done = True
+                # for each question answer pair.
+                question = self.questions[qa['question']]
+                answer = [self.answers[qa['answer']]]
+                answer_options = []
+                for ans_id in qa['answer_options']:
+                    answer_options.append(self.answers[ans_id])
+                #answer_options = qa['answer_options']
+                gt_index = qa['gt_index']
+                yield (question, answer, 'None', answer_options, img_path), True
diff --git a/parlai/tasks/visdial/build.py b/parlai/tasks/visdial/build.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+# Download and build the data if it does not exist.
+
+import parlai.core.build_data as build_data
+import os
+
+
+def buildImage(opt):
+    dpath = os.path.join(opt['datapath'], 'COCO-IMG')
+
+    if not build_data.built(dpath):
+        print('[building image data: ' + dpath + ']')
+        build_data.remove_dir(dpath)
+        build_data.make_dir(dpath)
+
+        # download the image data.
+        fname1 = 'train2014.zip'
+        fname2 = 'val2014.zip'
+        fname3 = 'test2014.zip'
+
+        url = 'http://msvocds.blob.core.windows.net/coco2014/'
+
+        build_data.download(dpath, url + fname1)
+        build_data.download(dpath, url + fname2)
+        build_data.download(dpath, url + fname3)
+
+        build_data.untar(dpath, fname1, False)
+        build_data.untar(dpath, fname2, False)
+        build_data.untar(dpath, fname3, False)
+
+        # Mark the data as built.
+        build_data.mark_done(dpath)
+
+
+def build(opt):
+    dpath = os.path.join(opt['datapath'], 'VisDial-v0.9')
+
+    if not build_data.built(dpath):
+        print('[building data: ' + dpath + ']')
+        build_data.remove_dir(dpath)
+        build_data.make_dir(dpath)
+
+        # Download the data.
+        fname1 = 'visdial_0.9_train.zip'
+        fname2 = 'visdial_0.9_val.zip'
+
+        url = 'https://computing.ece.vt.edu/~abhshkdz/data/visdial/'
+        build_data.download(dpath, url + fname1)
+        build_data.download(dpath, url + fname2)
+
+
+        build_data.untar(dpath, fname1)
+        build_data.untar(dpath, fname2)
+
+        # Mark the data as built.
+        build_data.mark_done(dpath)
diff --git a/parlai/tasks/vqa_coco2014/agents.py b/parlai/tasks/vqa_coco2014/agents.py
@@ -12,9 +12,9 @@
 import random
 import os
 
-
 def _path(opt):
     build(opt)
+    buildImage(opt)
     dt = opt['datatype'].split(':')[0]
 
     if dt == 'train':
@@ -36,7 +36,7 @@ def _path(opt):
     annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014',
         annotation_suffix + '_annotations.json')
 
-    image_path = os.path.join(opt['datapath'], 'VQA-COCO2014', img_suffix)
+    image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)
 
     return data_path, annotation_path, image_path
 
@@ -100,7 +100,7 @@ def act(self):
             self.episode_idx = (self.episode_idx + self.step_size) % len(self)
             if self.episode_idx == len(self) - self.step_size:
                 self.epochDone = True
-            # always showing the same index now.
+
         qa = self.ques['questions'][self.episode_idx]
         question = qa['question']
         image_id = qa['image_id']

diff --git a/parlai/tasks/vqa_coco2014/build.py b/parlai/tasks/vqa_coco2014/build.py
@@ -9,22 +9,30 @@
 import os
 
 
-def buildImage(dpath):
-    print('[building image data: ' + dpath + ']')
-    # download the image data.
-    fname1 = 'train2014.zip'
-    fname2 = 'val2014.zip'
-    fname3 = 'test2014.zip'
+def buildImage(opt):
+    dpath = os.path.join(opt['datapath'], 'COCO-IMG')
 
-    url = 'http://msvocds.blob.core.windows.net/coco2014/'
+    if not build_data.built(dpath):
+        print('[building image data: ' + dpath + ']')
+        build_data.remove_dir(dpath)
+        build_data.make_dir(dpath)
+        # download the image data.
+        fname1 = 'train2014.zip'
+        fname2 = 'val2014.zip'
+        fname3 = 'test2014.zip'
 
-    build_data.download(os.path.join(dpath, fname1), url + fname1, False)
-    build_data.download(os.path.join(dpath, fname2), url + fname2, False)
-    build_data.download(os.path.join(dpath, fname3), url + fname3, False)
+        url = 'http://msvocds.blob.core.windows.net/coco2014/'
 
-    build_data.untar(dpath, fname1)
-    build_data.untar(dpath, fname2)
-    build_data.untar(dpath, fname3)
+        build_data.download(dpath, url + fname1)
+        build_data.download(dpath, url + fname2)
+        build_data.download(dpath, url + fname3)
+
+        build_data.untar(dpath, fname1, False)
+        build_data.untar(dpath, fname2, False)
+        build_data.untar(dpath, fname3, False)
+
+        # Mark the data as built.
+        build_data.mark_done(dpath)
 
 
 
@@ -61,7 +69,5 @@ def build(opt):
         build_data.untar(dpath, fname4)
         build_data.untar(dpath, fname5)
 
-        buildImage(dpath)
-
         # Mark the data as built.
         build_data.mark_done(dpath)
diff --git a/parlai/tasks/vqa_coco2014_v2/__init__.py b/parlai/tasks/vqa_coco2014_v2/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
diff --git a/parlai/tasks/vqa_coco2014_v2/agents.py b/parlai/tasks/vqa_coco2014_v2/agents.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+
+from parlai.core.agents import Teacher
+from .build import build, buildImage
+
+from PIL import Image
+import json
+import random
+import os
+import pdb
+
+def _path(opt):
+    build(opt)
+    buildImage(opt)
+    dt = opt['datatype'].split(':')[0]
+
+    if dt == 'train':
+        ques_suffix = 'v2_OpenEnded_mscoco_train2014'
+        annotation_suffix = 'v2_mscoco_train2014'
+        img_suffix = os.path.join('train2014', 'COCO_train2014_')
+    elif dt == 'valid':
+        ques_suffix = 'v2_OpenEnded_mscoco_val2014'
+        annotation_suffix = 'v2_mscoco_val2014'
+        img_suffix = os.path.join('val2014', 'COCO_val2014_')
+    else:
+        ques_suffix = 'v2_OpenEnded_mscoco_test2015'
+        annotation_suffix = 'None'
+        img_suffix = os.path.join('test2014', 'COCO_test2014_')
+
+    data_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2',
+        ques_suffix + '_questions.json')
+
+    annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2',
+        annotation_suffix + '_annotations.json')
+
+    image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)
+
+    return data_path, annotation_path, image_path
+
+
+def _image_loader(opt, path):
+    """
+    Loads the appropriate image from the image_id and returns PIL Image format.
+    """
+    if not opt.get('no_images', False):
+        return Image.open(path).convert('RGB')
+    else:
+        return None
+
+
+class OeTeacher(Teacher):
+    """
+    VQA v2.0 Open-Ended teacher, which loads the json vqa data and implements its
+    own `act` method for interacting with student agent.
+    agent.
+    """
+    def __init__(self, opt, shared=None):
+        super().__init__(opt)
+        self.datatype = opt['datatype']
+        data_path, annotation_path, self.image_path = _path(opt)
+
+        if shared and 'ques' in shared:
+            self.ques = shared['ques']
+            if 'annotation' in shared:
+                self.annotation = shared['annotation']
+        else:
+            self._setup_data(data_path, annotation_path)
+
+
+        # for ordered data in batch mode (especially, for validation and
+        # testing), each teacher in the batch gets a start index and a step
+        # size so they all process disparate sets of the data
+        self.step_size = opt.get('batchsize', 1)
+        self.data_offset = opt.get('batchindex', 0)
+
+        self.reset()
+
+    def __len__(self):
+        return self.len
+
+    def reset(self):
+        # Reset the dialog so that it is at the start of the epoch,
+        # and all metrics are reset.
+        super().reset()
+        self.lastY = None
+        self.episode_idx = self.data_offset - self.step_size
+
+    def observe(self, observation):
+        """Process observation for metrics."""
+        if self.lastY is not None:
+            loss = self.metrics.update(observation, self.lastY)
+            self.lastY = None
+        return observation
+
+    def act(self):
+        if self.datatype == 'train':
+            self.episode_idx = random.randrange(self.len)
+        else:
+            self.episode_idx = (self.episode_idx + 1) % self.len
+
+        qa = self.ques['questions'][self.episode_idx]
+        question = qa['question']
+        image_id = qa['image_id']
+
+        img_path = self.image_path + '%012d.jpg' % (image_id)
+
+        action = {
+            'image': _image_loader(self.opt, img_path),
+            'text': question,
+            'episode_done': True
+        }
+
+        if not self.datatype.startswith('test'):
+            anno = self.annotation['annotations'][self.episode_idx]
+            self.lastY = [ans['answer'] for ans in anno['answers']]
+
+        if self.datatype.startswith('train'):
+            action['labels'] = self.lastY
+
+        return action
+
+    def share(self):
+        shared = super().share()
+        shared['ques'] = self.ques
+        if hasattr(self, 'annotation'):
+            shared['annotation'] = self.annotation
+        return shared
+
+    def _setup_data(self, data_path, annotation_path):
+        print('loading: ' + data_path)
+        with open(data_path) as data_file:
+            self.ques = json.load(data_file)
+
+        if self.datatype != 'test':
+            print('loading: ' + annotation_path)
+            with open(annotation_path) as data_file:
+                self.annotation = json.load(data_file)
+
+        self.len = len(self.ques['questions'])
+
+class DefaultTeacher(OeTeacher):
+    pass