typedb · jmsfltchr · Oct 7, 2019 · Sep 19, 2019 · Sep 20, 2019 · Sep 23, 2019
@@ -19,14 +19,15 @@
 
 import copy
 import inspect
+import time
 
 from grakn.client import GraknClient
 
 from kglib.kgcn.pipeline.pipeline import pipeline
 from kglib.utils.grakn.synthetic.examples.diagnosis.generate import generate_example_graphs
-from kglib.utils.graph.thing.queries_to_graph import build_graph_from_queries
-from kglib.utils.graph.query.query_graph import QueryGraph
 from kglib.utils.graph.iterate import multidigraph_data_iterator
+from kglib.utils.graph.query.query_graph import QueryGraph
+from kglib.utils.graph.thing.queries_to_graph import build_graph_from_queries
 
 
 def diagnosis_example(num_graphs=200,
@@ -58,8 +59,9 @@ def diagnosis_example(num_graphs=200,
                                                  num_processing_steps_tr=num_processing_steps_tr,
                                                  num_processing_steps_ge=num_processing_steps_ge,
                                                  num_training_iterations=num_training_iterations,
+                                                 continuous_attributes=CONTINUOUS_ATTRIBUTES,
                                                  categorical_attributes=CATEGORICAL_ATTRIBUTES,
-                                                 )
+                                                 output_dir=f"./events/{time.time()}/")
 
     with session.transaction().write() as tx:
         write_predictions_to_grakn(ge_graphs, tx)
@@ -71,6 +73,7 @@ def diagnosis_example(num_graphs=200,
 
 
 CATEGORICAL_ATTRIBUTES = {'name': ['meningitis', 'flu', 'fever', 'light-sensitivity']}
+CONTINUOUS_ATTRIBUTES = {'severity': (0, 1)}
 
 
 def create_concept_graphs(example_indices, grakn_session):
@@ -119,18 +122,19 @@ def diagnosis_query(self, example_id):
                $p isa person, has example-id {example_id};
                $s isa symptom, has name $sn;
                $d isa disease, has name $dn;
-               $sp(presented-symptom: $s, symptomatic-patient: $p) isa symptom-presentation;
+               $sp(presented-symptom: $s, symptomatic-patient: $p) isa symptom-presentation, has severity $sev;
                $c(cause: $d, effect: $s) isa causality;
                $diag(patient: $p, diagnosed-disease: $d) isa diagnosis;
                get;''')
 
     def base_query_graph(self):
-        p, s, sn, d, dn, sp, c = 'p', 's', 'sn', 'd', 'dn', 'sp', 'c'
+        vars = p, s, sn, d, dn, sp, sev, c = 'p', 's', 'sn', 'd', 'dn', 'sp', 'sev', 'c'
         g = QueryGraph()
-        g.add_vars(p, s, sn, d, dn, sp, c, **PREEXISTS)
+        g.add_vars(*vars, **PREEXISTS)
         g.add_has_edge(s, sn, **PREEXISTS)
         g.add_has_edge(d, dn, **PREEXISTS)
         g.add_role_edge(sp, s, 'presented-symptom', **PREEXISTS)
+        g.add_has_edge(sp, sev, **PREEXISTS)
         g.add_role_edge(sp, p, 'symptomatic-patient', **PREEXISTS)
         g.add_role_edge(c, s, 'effect', **PREEXISTS)
         g.add_role_edge(c, d, 'cause', **PREEXISTS)
@@ -153,7 +157,7 @@ def candidate_diagnosis_query(self, example_id):
                $p isa person, has example-id {example_id};
                $s isa symptom, has name $sn;
                $d isa disease, has name $dn;
-               $sp(presented-symptom: $s, symptomatic-patient: $p) isa symptom-presentation;
+               $sp(presented-symptom: $s, symptomatic-patient: $p) isa symptom-presentation, has severity $sev;
                $c(cause: $d, effect: $s) isa causality;
                $diag(candidate-patient: $p, candidate-diagnosed-disease: $d) isa candidate-diagnosis; 
                get;''')

@@ -70,20 +70,36 @@ def __call__(self,
         loss_ops_tr = loss_ops_preexisting_no_penalty(target_ph, output_ops_tr)
         # Loss across processing steps.
         loss_op_tr = sum(loss_ops_tr) / self._num_processing_steps_tr
+
+        tf.summary.scalar('loss_op_tr', loss_op_tr)
         # Test/generalization loss.
         loss_ops_ge = loss_ops_preexisting_no_penalty(target_ph, output_ops_ge)
         loss_op_ge = loss_ops_ge[-1]  # Loss from final processing step.
+        tf.summary.scalar('loss_op_ge', loss_op_ge)
 
         # Optimizer
         optimizer = tf.train.AdamOptimizer(learning_rate)
-        step_op = optimizer.minimize(loss_op_tr)
+        gradients, variables = zip(*optimizer.compute_gradients(loss_op_tr))
+
+        for grad, var in zip(gradients, variables):
+            try:
+                print(var.name)
+                tf.summary.histogram('gradients/' + var.name, grad)
+            except:
+                pass
+
+        gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
+        step_op = optimizer.apply_gradients(zip(gradients, variables))
 
         input_ph, target_ph = make_all_runnable_in_session(input_ph, target_ph)
 
         sess = tf.Session()
+        merged_summaries = tf.summary.merge_all()
+
+        train_writer = None
 
         if log_dir is not None:
-            tf.summary.FileWriter(log_dir, sess.graph)
+            train_writer = tf.summary.FileWriter(log_dir, sess.graph)
 
         sess.run(tf.global_variables_initializer())
 
@@ -105,16 +121,22 @@ def __call__(self,
         start_time = time.time()
         for iteration in range(num_training_iterations):
             feed_dict = create_feed_dict(input_ph, target_ph, tr_input_graphs, tr_target_graphs)
-            train_values = sess.run(
-                {
-                    "step": step_op,
-                    "target": target_ph,
-                    "loss": loss_op_tr,
-                    "outputs": output_ops_tr
-                },
-                feed_dict=feed_dict)
 
             if iteration % log_every_epochs == 0:
+
+                train_values = sess.run(
+                    {
+                        "step": step_op,
+                        "target": target_ph,
+                        "loss": loss_op_tr,
+                        "outputs": output_ops_tr,
+                        "summary": merged_summaries
+                    },
+                    feed_dict=feed_dict)
+
+                if train_writer is not None:
+                    train_writer.add_summary(train_values["summary"], iteration)
+
                 feed_dict = create_feed_dict(input_ph, target_ph, ge_input_graphs, ge_target_graphs)
                 test_values = sess.run(
                     {
@@ -140,6 +162,15 @@ def __call__(self,
                       " {:.4f}, Cge {:.4f}, Sge {:.4f}".format(
                         iteration, elapsed, train_values["loss"], test_values["loss"],
                         correct_tr, solved_tr, correct_ge, solved_ge))
+            else:
+                train_values = sess.run(
+                    {
+                        "step": step_op,
+                        "target": target_ph,
+                        "loss": loss_op_tr,
+                        "outputs": output_ops_tr
+                    },
+                    feed_dict=feed_dict)
 
         training_info = logged_iterations, losses_tr, losses_ge, corrects_tr, corrects_ge, solveds_tr, solveds_ge
         return train_values, test_values, training_info
@@ -17,31 +17,57 @@
 #  under the License.
 #
 
+import abc
+from functools import partial
+
 import sonnet as snt
 import tensorflow as tf
 
 
-class CategoricalAttribute(snt.AbstractModule):
+class Attribute(snt.AbstractModule, abc.ABC):
+    """
+    Abstract base class for Attribute value embedding models
+    """
+    def __init__(self, attr_embedding_dim, name='AttributeEmbedder'):
+        super(Attribute, self).__init__(name=name)
+        self._attr_embedding_dim = attr_embedding_dim
+
+
+class ContinuousAttribute(Attribute):
+    def __init__(self, attr_embedding_dim, name='ContinuousAttributeEmbedder'):
+        super(ContinuousAttribute, self).__init__(attr_embedding_dim, name=name)
+
+    def _build(self, attribute_value):
+        tf.summary.histogram('cont_attribute_value_histogram', attribute_value)
+        embedding = snt.Sequential([
+            snt.nets.MLP([self._attr_embedding_dim] * 3, activate_final=True, use_dropout=True),
+            snt.LayerNorm(),
+        ])(tf.cast(attribute_value, dtype=tf.float32))
+        tf.summary.histogram('cont_embedding_histogram', embedding)
+        return embedding
+
+
+class CategoricalAttribute(Attribute):
     def __init__(self, num_categories, attr_embedding_dim, name='CategoricalAttributeEmbedder'):
-        super(CategoricalAttribute, self).__init__(name=name)
+        super(CategoricalAttribute, self).__init__(attr_embedding_dim, name=name)
 
-        self._attr_embedding_dim = attr_embedding_dim
         self._num_categories = num_categories
 
-    def _build(self, inputs):
-        int_inputs = tf.cast(inputs, dtype=tf.int32)
-        embedding = snt.Embed(self._num_categories, self._attr_embedding_dim)(int_inputs)
+    def _build(self, attribute_value):
+        int_attribute_value = tf.cast(attribute_value, dtype=tf.int32)
+        tf.summary.histogram('cat_attribute_value_histogram', int_attribute_value)
+        embedding = snt.Embed(self._num_categories, self._attr_embedding_dim)(int_attribute_value)
+        tf.summary.histogram('cat_embedding_histogram', embedding)
         return tf.squeeze(embedding, axis=1)
 
 
-class BlankAttribute(snt.AbstractModule):
+class BlankAttribute(Attribute):
 
     def __init__(self, attr_embedding_dim, name='BlankAttributeEmbedder'):
-        super(BlankAttribute, self).__init__(name=name)
-        self._attr_embedding_dim = attr_embedding_dim
+        super(BlankAttribute, self).__init__(attr_embedding_dim, name=name)
 
-    def _build(self, features):
-        shape = tf.stack([tf.shape(features)[0], self._attr_embedding_dim])
+    def _build(self, attribute_value):
+        shape = tf.stack([tf.shape(attribute_value)[0], self._attr_embedding_dim])
 
         encoded_features = tf.zeros(shape, dtype=tf.float32)
         return encoded_features
@@ -25,13 +25,17 @@
 def common_embedding(features, num_types, type_embedding_dim):
     preexistance_feat = tf.expand_dims(tf.cast(features[:, 0], dtype=tf.float32), axis=1)
     type_embedder = snt.Embed(num_types, type_embedding_dim)
-    type_embedding = type_embedder(tf.cast(features[:, 1], tf.int32))
+    norm = snt.LayerNorm()
+    type_embedding = norm(type_embedder(tf.cast(features[:, 1], tf.int32)))
+    tf.summary.histogram('type_embedding_histogram', type_embedding)
     return tf.concat([preexistance_feat, type_embedding], axis=1)
 
 
 def attribute_embedding(features, attr_encoders, attr_embedding_dim):
     typewise_attribute_encoder = TypewiseEncoder(attr_encoders, attr_embedding_dim)
-    return typewise_attribute_encoder(features[:, 1:])
+    attr_embedding = typewise_attribute_encoder(features[:, 1:])
+    tf.summary.histogram('attribute_embedding_histogram', attr_embedding)
+    return attr_embedding
 
 
 def node_embedding(features, num_types, type_embedding_dim, attr_encoders, attr_embedding_dim):

@@ -28,8 +28,10 @@
 
 
 class TestCommonEmbedding(unittest.TestCase):
-    def test_embedding_output_shape_as_expected(self):
+    def setUp(self):
         tf.enable_eager_execution()
+
+    def test_embedding_output_shape_as_expected(self):
         features = np.array([[1, 0, 0.7], [1, 2, 0.7], [0, 1, 0.5]], dtype=np.float32)
         type_embedding_dim = 5
         output = common_embedding(features, 3, type_embedding_dim)
@@ -38,11 +40,13 @@ def test_embedding_output_shape_as_expected(self):
 
 
 class TestAttributeEmbedding(unittest.TestCase):
+    def setUp(self):
+        tf.enable_eager_execution()
 
     def test_embedding_is_typewise(self):
         features = np.array([[1, 0, 0.7], [1, 2, 0.7], [0, 1, 0.5]])
 
-        mock_instance = Mock()
+        mock_instance = Mock(return_value=tf.convert_to_tensor(np.array([[1, 0.7], [1, 0.7], [0, 0.5]])))
         mock = Mock(return_value=mock_instance)
         patcher = patch('kglib.kgcn.models.embedding.TypewiseEncoder', spec=True, new=mock)
         mock_class = patcher.start()
@@ -62,9 +66,10 @@ def test_embedding_is_typewise(self):
 
 class TestNodeEmbedding(unittest.TestCase):
 
-    def test_embedding_is_typewise(self):
+    def setUp(self):
         tf.enable_eager_execution()
 
+    def test_embedding_is_typewise(self):
         features = Mock()
         num_types = Mock()
         type_embedding_dim = Mock()

@@ -52,6 +52,8 @@ def __init__(self, encoders_for_types, feature_length, name="typewise_encoder"):
 
     def _build(self, features):
 
+        tf.summary.histogram('typewise_encoder_features_histogram', features)
+
         shape = tf.stack([tf.shape(features)[0], self._feature_length])
 
         encoded_features = tf.zeros(shape, dtype=tf.float32)
@@ -69,9 +71,12 @@ def _build(self, features):
             # Use this encoder when the feat_type matches any of the types
             applicable_types_mask = tf.reduce_any(elementwise_equality, axis=1)
             indices_to_encode = tf.where(applicable_types_mask)
+
             feats_to_encode = tf.squeeze(tf.gather(features[:, 1:], indices_to_encode), axis=1)
             encoded_feats = encoder()(feats_to_encode)
 
             encoded_features += tf.scatter_nd(tf.cast(indices_to_encode, dtype=tf.int32), encoded_feats, shape)
 
+        tf.summary.histogram('typewise_encoder_encoded_features_histogram', encoded_features)
+
         return encoded_features