Merge pull request #340 from dice-group/tdl

tDL, Verbalization, and CV
dice-group · Jan 18, 2024 · 7a70145 · 7a70145
2 parents ac2d69d + 857d8df
commit 7a70145
Show file tree

Hide file tree

Showing 4 changed files with 588 additions and 210 deletions.
diff --git a/examples/concept_learning_cv_evaluation.py b/examples/concept_learning_cv_evaluation.py
@@ -1,24 +1,33 @@
+"""
+StratifiedKFold Cross Validating DL Concept Learning Algorithms
+Usage
+python examples/concept_learning_evaluation.py
+                    --lps LPs/Family/lps.json
+                    --kb KGs/Family/family.owl
+                    --max_runtime 30
+                    --report family.csv
+
+"""
 import json
-import os
 import time
 import pandas as pd
 from ontolearn.knowledge_base import KnowledgeBase
 from ontolearn.concept_learner import CELOE, OCEL, EvoLearner
 from ontolearn.learners import Drill, TDL
 from ontolearn.learning_problem import PosNegLPStandard
-from ontolearn.metrics import Accuracy, F1
-from owlapy.model import OWLClass, OWLNamedIndividual, IRI
+from ontolearn.metrics import F1
+from owlapy.model import OWLNamedIndividual, IRI
 import argparse
 from rdflib import Graph
 from sklearn.model_selection import StratifiedKFold
 import numpy as np
 
 pd.set_option("display.precision", 5)
 
-
+# @TODO This should be standalone function that can be imported from ontolearn/static_funcs.py
 def compute_f1_score(individuals, pos, neg):
     tp = len(pos.intersection(individuals))
-    tn = len(neg.difference(individuals))
+    # tn = len(neg.difference(individuals))
 
     fp = len(neg.intersection(individuals))
     fn = len(pos.difference(individuals))
@@ -45,18 +54,27 @@ def dl_concept_learning(args):
         settings = json.load(json_file)
 
     kb = KnowledgeBase(path=args.kb)
+    ocel = OCEL(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(),
+                max_runtime=args.max_runtime)
+    celoe = CELOE(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(),
+                  max_runtime=args.max_runtime)
+    drill = Drill(knowledge_base=KnowledgeBase(path=args.kb), path_pretrained_kge=args.path_pretrained_kge,
+                  quality_func=F1(), max_runtime=args.max_runtime)
+    tdl = TDL(knowledge_base=KnowledgeBase(path=args.kb),
+              dataframe_triples=pd.DataFrame(
+                  data=sorted([(str(s), str(p), str(o)) for s, p, o in Graph().parse(args.kb)], key=lambda x: len(x)),
+                  columns=['subject', 'relation', 'object'], dtype=str),
+              kwargs_classifier={"random_state": 0},
+              max_runtime=args.max_runtime)
 
     # dictionary to store the data
     data = dict()
     for str_target_concept, examples in settings['problems'].items():
+        print('Target concept: ', str_target_concept)
         p = examples['positive_examples']
         n = examples['negative_examples']
-        print('\n\n')
 
-        print('Target concept: ', str_target_concept)
-
-        # Take p and n, generate Kfolds
-        kf = StratifiedKFold(n_splits=10, shuffle=False)
+        kf = StratifiedKFold(n_splits=args.folds, shuffle=True, random_state=args.random_seed)
         X = np.array(p + n)
         y = np.array([1.0 for _ in p] + [0.0 for _ in n])
 
@@ -67,6 +85,7 @@ def dl_concept_learning(args):
             # () Extract positive and negative examples from train fold
             train_pos = {pos_individual for pos_individual in X[train_index][y[train_index] == 1]}
             train_neg = {neg_individual for neg_individual in X[train_index][y[train_index] == 0]}
+
             # Sanity checking for individuals used for training.
             assert train_pos.issubset(examples['positive_examples'])
             assert train_neg.issubset(examples['negative_examples'])
@@ -82,23 +101,20 @@ def dl_concept_learning(args):
                                         neg=set(map(OWLNamedIndividual, map(IRI.create, train_neg))))
 
             test_lp = PosNegLPStandard(pos=set(map(OWLNamedIndividual, map(IRI.create, test_pos))),
-                                        neg=set(map(OWLNamedIndividual, map(IRI.create, test_neg))))
-
+                                       neg=set(map(OWLNamedIndividual, map(IRI.create, test_neg))))
             print("OCEL starts..", end="\t")
             start_time = time.time()
-            model = OCEL(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(), max_runtime=args.max_runtime)
-            pred_ocel = model.fit(train_lp).best_hypotheses(n=1)
+            pred_ocel = ocel.fit(train_lp).best_hypotheses(n=1)
             rt_ocel = time.time() - start_time
             print("OCEL ends..", end="\t")
-
             # () Quality on the training data
             train_f1_ocel = compute_f1_score(individuals={i for i in kb.individuals(pred_ocel.concept)},
-                                            pos=train_lp.pos,
-                                            neg=train_lp.neg)
+                                             pos=train_lp.pos,
+                                             neg=train_lp.neg)
             # () Quality on test data
             test_f1_ocel = compute_f1_score(individuals={i for i in kb.individuals(pred_ocel.concept)},
-                                           pos=test_lp.pos,
-                                           neg=test_lp.neg)
+                                            pos=test_lp.pos,
+                                            neg=test_lp.neg)
             # Reporting
             data.setdefault("Train-F1-OCEL", []).append(train_f1_ocel)
             data.setdefault("Test-F1-OCEL", []).append(test_f1_ocel)
@@ -107,23 +123,19 @@ def dl_concept_learning(args):
             print(f"OCEL Test Quality: {test_f1_ocel:.3f}", end="\t")
             print(f"OCEL Runtime: {rt_ocel:.3f}")
 
-
-
             print("CELOE starts..", end="\t")
             start_time = time.time()
-            model = CELOE(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(), max_runtime=args.max_runtime)
-            pred_celoe = model.fit(train_lp).best_hypotheses(n=1)
+            pred_celoe = celoe.fit(train_lp).best_hypotheses(n=1)
             rt_celoe = time.time() - start_time
             print("CELOE ends..", end="\t")
-
             # () Quality on the training data
             train_f1_celoe = compute_f1_score(individuals={i for i in kb.individuals(pred_celoe.concept)},
-                                            pos=train_lp.pos,
-                                            neg=train_lp.neg)
+                                              pos=train_lp.pos,
+                                              neg=train_lp.neg)
             # () Quality on test data
             test_f1_celoe = compute_f1_score(individuals={i for i in kb.individuals(pred_celoe.concept)},
-                                           pos=test_lp.pos,
-                                           neg=test_lp.neg)
+                                             pos=test_lp.pos,
+                                             neg=test_lp.neg)
             # Reporting
             data.setdefault("Train-F1-CELOE", []).append(train_f1_celoe)
             data.setdefault("Test-F1-CELOE", []).append(test_f1_celoe)
@@ -132,12 +144,14 @@ def dl_concept_learning(args):
             print(f"CELOE Test Quality: {test_f1_celoe:.3f}", end="\t")
             print(f"CELOE Runtime: {rt_celoe:.3f}")
 
-
-
             print("Evo starts..", end="\t")
             start_time = time.time()
-            model = EvoLearner(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(), max_runtime=args.max_runtime)
-            pred_evo = model.fit(train_lp).best_hypotheses(n=1)
+            # BUG: Evolearner needs to be intialized for each learning problem
+            evolearner = EvoLearner(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(),
+                                    max_runtime=args.max_runtime,
+                                    use_data_properties=False,
+                                    use_inverse=False, use_card_restrictions=False)
+            pred_evo = evolearner.fit(train_lp).best_hypotheses(n=1)
             rt_evo = time.time() - start_time
             print("Evo ends..", end="\t")
 
@@ -157,41 +171,34 @@ def dl_concept_learning(args):
             print(f"Evo Test Quality: {test_f1_evo:.3f}", end="\t")
             print(f"Evo Runtime: {rt_evo:.3f}")
 
-
             print("DRILL starts..", end="\t")
             start_time = time.time()
-            model = Drill(knowledge_base=KnowledgeBase(path=args.kb), path_pretrained_kge=args.path_pretrained_kge,quality_func=F1(), max_runtime=args.max_runtime)
-            pred_drill = model.fit(train_lp).best_hypotheses(n=1)
+            pred_drill = drill.fit(train_lp).best_hypotheses(n=1)
             rt_drill = time.time() - start_time
             print("DRILL ends..", end="\t")
 
             # () Quality on the training data
             train_f1_drill = compute_f1_score(individuals={i for i in kb.individuals(pred_drill.concept)},
-                                            pos=train_lp.pos,
-                                            neg=train_lp.neg)
+                                              pos=train_lp.pos,
+                                              neg=train_lp.neg)
             # () Quality on test data
             test_f1_drill = compute_f1_score(individuals={i for i in kb.individuals(pred_drill.concept)},
-                                           pos=test_lp.pos,
-                                           neg=test_lp.neg)
+                                             pos=test_lp.pos,
+                                             neg=test_lp.neg)
             # Reporting
             data.setdefault("Train-F1-DRILL", []).append(train_f1_drill)
             data.setdefault("Test-F1-DRILL", []).append(test_f1_drill)
             data.setdefault("RT-DRILL", []).append(rt_drill)
             print(f"DRILL Train Quality: {train_f1_drill:.3f}", end="\t")
             print(f"DRILL Test Quality: {test_f1_drill:.3f}", end="\t")
             print(f"DRILL Runtime: {rt_drill:.3f}")
-
             print("TDL starts..", end="\t")
             start_time = time.time()
-            model = TDL(knowledge_base=KnowledgeBase(path=args.kb), dataframe_triples=pd.DataFrame(
-                data=[(str(s), str(p), str(o)) for s, p, o in Graph().parse(args.kb)],
-                columns=['subject', 'relation', 'object'], dtype=str).sort_values('subject'),
-                        kwargs_classifier={"criterion": "gini", "random_state": 0},
-                        max_runtime=args.max_runtime)
             # () Fit model training dataset
-            pred_tdl = model.fit(train_lp).best_hypotheses(n=1)
+            pred_tdl = tdl.fit(train_lp).best_hypotheses(n=1)
             print("TDL ends..", end="\t")
             rt_tdl = time.time() - start_time
+
             # () Quality on the training data
             train_f1_tdl = compute_f1_score(individuals={i for i in kb.individuals(pred_tdl)},
                                             pos=train_lp.pos,
@@ -216,33 +223,12 @@ def dl_concept_learning(args):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Description Logic Concept Learning')
-
-    parser.add_argument("--max_runtime", type=int, default=1)
-    parser.add_argument("--lps", type=str, required=True)
-    parser.add_argument("--kb", type=str, required=True)
+    parser.add_argument("--max_runtime", type=int, default=10, help="Max runtime")
+    parser.add_argument("--lps", type=str, required=True, help="Path fto the learning problems")
+    parser.add_argument("--folds", type=int, default=10, help="Number of folds of cross validation.")
+    parser.add_argument("--kb", type=str, required=True,
+                        help="Knowledge base")
     parser.add_argument("--path_pretrained_kge", type=str, default=None)
     parser.add_argument("--report", type=str, default="report.csv")
-    dl_concept_learning(parser.parse_args())
-
-
-"""
-# Benchmarking: Run a bash script tdl_stratified_kfold_cv_experiments.sh with the followings 
-
-mkdir CVFamilyBenchmarkResults
-python examples/concept_learning_cv_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family.owl --max_runtime 60 --report cv_family_results.csv && mv cv_family_results.csv CVFamilyBenchmarkResults
-mkdir CVMutagenesisBenchmarkResults
-python examples/concept_learning_cv_evaluation.py --lps LPs/Mutagenesis/lps.json --kb KGs/Mutagenesis/mutagenesis.owl --max_runtime 60 --report cv_mutagenesis_results.csv && mv cv_mutagenesis_results.csv CVMutagenesisBenchmarkResults
-mkdir CVCarcinogenesisBenchmarkResults
-python examples/concept_learning_cv_evaluation.py --lps LPs/Carcinogenesis/lps.json --kb KGs/Carcinogenesis/carcinogenesis.owl --max_runtime 60 --report cv_carcinogenesis_results.csv && mv cv_carcinogenesis_results.csv CVCarcinogenesisBenchmarkResults
-
-#Anaylsing results
-import pandas as pd
-pd.set_option("display.precision", 3)
-pd.set_option('display.max_columns', None)
-path="CVCarcinogenesisBenchmarkResults/cv_carcinogenesis_results.csv"
-df = pd.read_csv(path, index_col=0)
-df_mean_by_lp = df.groupby(by=df.index).mean()
-filter_col = [col for col in df if col.startswith('Test-F1') or col.startswith('RT')]
-print(df_mean_by_lp[filter_col])
-print(df_mean_by_lp[filter_col].to_latex(index=True, formatters={"name": str.upper}, float_format="{:.1f}".format))
-"""
+    parser.add_argument("--random_seed", type=int, default=1)
+    dl_concept_learning(parser.parse_args())
diff --git a/examples/concept_learning_evaluation.py b/examples/concept_learning_evaluation.py
@@ -1,3 +1,12 @@
+"""
+Fitting DL Concept Learning Algorithms:
+
+Given E^+  and E^-, a learner finds a concept H and F1 score is computed w.r.t. E^+, E^-, and R(H) retrieval of H.
+
+python examples/concept_learning_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family.owl --max_runtime 30 --report family.csv
+
+"""
+
 import json
 import os
 import time
@@ -44,6 +53,18 @@ def dl_concept_learning(args):
 
     kb = KnowledgeBase(path=args.kb)
 
+    ocel = OCEL(knowledge_base=kb, quality_func=F1(), max_runtime=args.max_runtime)
+    celoe = CELOE(knowledge_base=kb, quality_func=F1(), max_runtime=args.max_runtime)
+    drill = Drill(knowledge_base=KnowledgeBase(path=args.kb),
+                  path_pretrained_kge=args.path_pretrained_kge,
+                  quality_func=F1(),
+                  max_runtime=args.max_runtime)
+    tdl = TDL(knowledge_base=KnowledgeBase(path=args.kb),
+              dataframe_triples=pd.DataFrame(
+                  data=sorted([(str(s), str(p), str(o)) for s, p, o in Graph().parse(args.kb)], key=lambda x: len(x)),
+                  columns=['subject', 'relation', 'object'], dtype=str),
+              kwargs_classifier={"random_state": 0},
+              max_runtime=args.max_runtime)
     # dictionary to store the data
     data = dict()
     for str_target_concept, examples in settings['problems'].items():
@@ -59,9 +80,8 @@ def dl_concept_learning(args):
         lp = PosNegLPStandard(pos=typed_pos, neg=typed_neg)
 
         print("OCEL starts..", end="\t")
-        model = OCEL(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(), max_runtime=args.max_runtime)
         start_time = time.time()
-        pred_ocel = model.fit(lp).best_hypotheses(n=1)
+        pred_ocel = ocel.fit(lp).best_hypotheses(n=1)
         print("OCEL ends..", end="\t")
         rt_ocel = time.time() - start_time
         f1_ocel = compute_f1_score(individuals={i for i in kb.individuals(pred_ocel.concept)}, pos=lp.pos, neg=lp.neg)
@@ -71,9 +91,8 @@ def dl_concept_learning(args):
         print(f"OCEL Runtime: {rt_ocel:.3f}")
 
         print("CELOE starts..", end="\t")
-        model = CELOE(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(), max_runtime=args.max_runtime)
         start_time = time.time()
-        pred_celoe = model.fit(lp).best_hypotheses(n=1)
+        pred_celoe = celoe.fit(lp).best_hypotheses(n=1)
         print("CELOE Ends..", end="\t")
         rt_celoe = time.time() - start_time
         f1_celoe = compute_f1_score(individuals={i for i in kb.individuals(pred_celoe.concept)}, pos=lp.pos, neg=lp.neg)
@@ -83,9 +102,10 @@ def dl_concept_learning(args):
         print(f"CELOE Runtime: {rt_celoe:.3f}")
 
         print("Evo starts..", end="\t")
-        model = EvoLearner(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(), max_runtime=args.max_runtime)
         start_time = time.time()
-        pred_evo = model.fit(lp).best_hypotheses(n=1)
+        # Evolearner has a bug and KB needs to be reloaded
+        evo = EvoLearner(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(), max_runtime=args.max_runtime)
+        pred_evo = evo.fit(lp).best_hypotheses(n=1)
         print("Evo ends..", end="\t")
         rt_evo = time.time() - start_time
         f1_evo = compute_f1_score(individuals={i for i in kb.individuals(pred_evo.concept)}, pos=lp.pos, neg=lp.neg)
@@ -96,11 +116,7 @@ def dl_concept_learning(args):
 
         print("DRILL starts..", end="\t")
         start_time = time.time()
-        model = Drill(knowledge_base=KnowledgeBase(path=args.kb),
-                      path_pretrained_kge=args.path_pretrained_kge,
-                      quality_func=F1(),
-                      max_runtime=args.max_runtime)
-        pred_drill = model.fit(lp).best_hypotheses(n=1)
+        pred_drill = drill.fit(lp).best_hypotheses(n=1)
         print("DRILL ends..", end="\t")
         rt_drill = time.time() - start_time
         f1_drill = compute_f1_score(individuals=set(kb.individuals(pred_drill.concept)), pos=lp.pos, neg=lp.neg)
@@ -111,21 +127,20 @@ def dl_concept_learning(args):
 
         print("TDL starts..", end="\t")
         start_time = time.time()
-        model = TDL(knowledge_base=KnowledgeBase(path=args.kb), dataframe_triples=pd.DataFrame(
-            data=[(str(s), str(p), str(o)) for s, p, o in Graph().parse(args.kb)],
-            columns=['subject', 'relation', 'object'], dtype=str).sort_values('subject'),
-                    kwargs_classifier={"criterion": "gini", "random_state": 0},
-                    max_runtime=args.max_runtime)
-        pred_tdl = model.fit(lp).best_hypotheses(n=1)
+        # () Fit model training dataset
+        pred_tdl = tdl.fit(lp).best_hypotheses(n=1)
         print("TDL ends..", end="\t")
         rt_tdl = time.time() - start_time
-        # Compute quality of best prediction
-        f1_tdl = compute_f1_score(individuals={i for i in kb.individuals(pred_tdl)}, pos=lp.pos, neg=lp.neg)
+
+        # () Quality on the training data
+        f1_tdl = compute_f1_score(individuals={i for i in kb.individuals(pred_tdl)},
+                                        pos=lp.pos,
+                                        neg=lp.neg)
+
         data.setdefault("F1-TDL", []).append(f1_tdl)
         data.setdefault("RT-TDL", []).append(rt_tdl)
         print(f"TDL Quality: {f1_tdl:.3f}", end="\t")
         print(f"TDL Runtime: {rt_tdl:.3f}")
-
     df = pd.DataFrame.from_dict(data)
     df.to_csv(args.report, index=False)
     print(df)
@@ -134,7 +149,6 @@ def dl_concept_learning(args):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Description Logic Concept Learning')
-
     parser.add_argument("--max_runtime", type=int, default=1)
     parser.add_argument("--lps", type=str, required=True)
     parser.add_argument("--kb", type=str, required=True)

diff --git a/ontolearn/knowledge_base.py b/ontolearn/knowledge_base.py
@@ -482,9 +482,6 @@ def encode_learning_problem(self, lp: PosNegLPStandard):
         Return:
             EncodedPosNegLPStandard: The encoded learning problem.
         """
-
-        assert len(self.class_hierarchy) > 0
-
         if lp.all is None:
             kb_all = self.all_individuals_set()
         else: