Skip to content

Commit

Permalink
WIP: Verbalization with llm and LD2NL
Browse files Browse the repository at this point in the history
  • Loading branch information
Demirrr committed Jan 15, 2024
1 parent 7ea52b1 commit 37b0566
Showing 1 changed file with 160 additions and 24 deletions.
184 changes: 160 additions & 24 deletions ontolearn/learners/tree_learner.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
import numpy as np
import owlapy.model
import pandas as pd

import requests
import json
from ontolearn.knowledge_base import KnowledgeBase
from ontolearn.base import OWLOntologyManager_Owlready2
from owlapy.model import OWLEquivalentClassesAxiom, OWLOntologyManager, OWLOntology, AddImport, OWLImportsDeclaration, \
IRI

# mv best_pred.owl
# (base) demir@demir:~/Desktop/Softwares/Ontolearn/LD2NL/owl2nl$ ./owl2nl.sh -a ./src/test/resources/best_pred.owl -u false -o ./src/test/resources/family.owl -t json -s test_out.json -m rule
# ./owl2nl.sh -a ./home/demir/Desktop/Softwares/Ontolearn/examples/best_pred.owl -u false -o ./home/demir/Desktop/Softwares/Ontolearn/KGs/Family/family.owl -t json -s test_out.json -m rule

from typing import Dict, Set, Tuple, List, Union, TypeVar, Callable
from ontolearn.learning_problem import PosNegLPStandard
import collections
Expand All @@ -15,11 +24,13 @@
OWLObjectIntersectionOf, OWLClassExpression, OWLNothing, OWLThing, OWLNaryBooleanClassExpression, \
OWLObjectUnionOf, OWLClass, OWLObjectComplementOf, OWLObjectMaxCardinality, OWLObjectMinCardinality, \
OWLDataSomeValuesFrom, OWLDatatypeRestriction, OWLLiteral, OWLDataHasValue, OWLObjectHasValue, OWLNamedIndividual
from owlapy.render import DLSyntaxObjectRenderer
from owlapy.render import DLSyntaxObjectRenderer, ManchesterOWLSyntaxOWLObjectRenderer
from sklearn.model_selection import GridSearchCV

import time

from sklearn.tree import export_text


def is_float(value):
try:
Expand Down Expand Up @@ -168,7 +179,9 @@ def __init__(self, knowledge_base, dataframe_triples: pd.DataFrame, kwargs_class
# Mappings from string of IRI to individuals.
self.owl_individuals = {i.get_iri().as_str(): i for i in self.knowledge_base.individuals()}

self.render = DLSyntaxObjectRenderer()
self.dl_render = DLSyntaxObjectRenderer()
self.manchester_render = ManchesterOWLSyntaxOWLObjectRenderer()

# Keyword arguments for sklearn Decision tree.
# Initialize classifier
self.clf = None
Expand All @@ -177,7 +190,9 @@ def __init__(self, knowledge_base, dataframe_triples: pd.DataFrame, kwargs_class

self.max_runtime = max_runtime
self.on_fly_tabular = on_fly_tabular
self.best_pred = None
# best pred
self.disjunction_of_conjunctive_concepts = None
self.conjunctive_concepts = None
# Remove uninformative triples if exists.
# print("Removing uninformative triples...")
self.dataframe_triples = self.dataframe_triples[
Expand Down Expand Up @@ -206,7 +221,8 @@ def __init__(self, knowledge_base, dataframe_triples: pd.DataFrame, kwargs_class
else:
raise NotImplementedError()

def built_sparse_training_data(self, entity_infos: Dict[str, Dict], individuals: List[str],
@staticmethod
def built_sparse_training_data(entity_infos: Dict[str, Dict], individuals: List[str],
feature_names: List[Tuple[str, Union[str, None]]]):
""" Construct a tabular representations from fixed features """
assert entity_infos is not None, "No entity_infos"
Expand Down Expand Up @@ -265,7 +281,8 @@ def construct_hop(self, individuals: List[str]) -> Dict[str, Dict]:
hop[s] = temp
return hop, features

def labeling(self, Xraw, pos, neg, apply_dummy=False):
@staticmethod
def labeling(Xraw, pos, neg, apply_dummy=False):
""" Labelling """
# (5) Labeling: Label each row/node
# Drop "label" if exists
Expand Down Expand Up @@ -340,6 +357,17 @@ def decision_to_owl_class_exp(self, reasoning_step: dict):

return owl_class

def feature_pretify(self):
pretified_feature_names = []
for i in self.feature_names:
feature = ""
for x in i:
x = x.replace("http://www.benchmark.org/family#", "")
x = x.replace("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "")
feature += x + " "
pretified_feature_names.append(feature)
return pretified_feature_names

def plot(self):
pretified_feature_names = []
for i in self.feature_names:
Expand Down Expand Up @@ -436,32 +464,26 @@ def fit(self, lp: PosNegLPStandard, max_runtime=None):
# Remove paths from the root to leafs if overallping
prediction_per_example = {p for p, indv in prediction_per_example}
# If debugging needed.
"""
set_str_pos = set(str_pos_examples)
set_str_neg = set(str_neg_examples)
for i in prediction_per_example:
quality = compute_f1_score(individuals={_.get_iri().as_str() for _ in self.knowledge_base.individuals(i)},
pos=set_str_pos, neg=set_str_neg)
print(self.render.render(i), quality)
"""
predictions = [pred for pred in prediction_per_example]
"""
print(len(predictions))
for i in predictions:
print(self.render.render(i))
"""
# set_str_pos = set(str_pos_examples)
# set_str_neg = set(str_neg_examples)
# for i in prediction_per_example:
# quality = compute_f1_score(individuals={_.get_iri().as_str() for _ in self.knowledge_base.individuals(i)},
# pos=set_str_pos, neg=set_str_neg)
# print(self.render.render(i), quality)

self.conjunctive_concepts = [pred for pred in prediction_per_example]

self.best_pred = concepts_reducer(concepts=predictions,
reduced_cls=OWLObjectUnionOf)
self.disjunction_of_conjunctive_concepts = concepts_reducer(concepts=self.conjunctive_concepts,
reduced_cls=OWLObjectUnionOf)

# print(self.render.render(self.best_pred), compute_f1_score(individuals={_.get_iri().as_str() for _ in self.knowledge_base.individuals(self.best_pred)},
# pos=set(str_pos_examples), neg=set(str_neg_examples)))
# pos=set(str_pos_examples), neg=set(str_neg_examples)))

return self

def best_hypotheses(self, n=1):
assert n == 1
return self.best_pred
return self.disjunction_of_conjunctive_concepts

def predict(self, X: List[OWLNamedIndividual]) -> np.ndarray:
#
Expand All @@ -470,3 +492,117 @@ def predict(self, X: List[OWLNamedIndividual]) -> np.ndarray:
individuals=owl_individuals,
feature_names=self.features)
return self.clf.predict(X_vector.values)

@staticmethod
def __post_request_llm(prompt, llm_name: str):
print("###########")
print(f"{llm_name} STARTS")
print("###########")
assert llm_name in ["mistral", "llama2"]
data = {"model": llm_name,
"prompt": prompt,
"content": "You are an expert. Be concise in your answers",
"options": { # "num_keep": 5,
"seed": 1,
# "num_predict": 100,
# "top_k": 20,
# "top_p": 0.9,
# "tfs_z": 0.5,
# "typical_p": 0.7,
# "repeat_last_n": 33,
"temperature": 0.0,
"repeat_penalty": 1.2,
# "presence_penalty": 1.5,
# "frequency_penalty": 1.0,
# "mirostat": 1,
# "mirostat_tau": 0.8,
# "mirostat_eta": 0.6,
# "penalize_newline": true,
# "stop": ["\n", "user:"],
# "numa": false,
# "num_ctx": 1024,
# "num_batch": 2,
# "num_gqa": 1,
# "num_gpu": 1,
# "main_gpu": 0,
# "low_vram": false,
# "f16_kv": true,
# "vocab_only": false,
# "use_mmap": true,
# "use_mlock": false,
# "embedding_only": false,
# "rope_frequency_base": 1.1,
# "rope_frequency_scale": 0.8,
# "num_thread": 8
}}

text = ""
response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
response.raise_for_status()

for line in response.iter_lines():
body = json.loads(line)
response_part = body.get('response', '')
print(response_part, end='', flush=True)
text += response_part
if 'error' in body:
raise Exception(body['error'])

if body.get('done', False):
break
print("\n###########")
print(f"{llm_name} END")
print("###########\n")
return text

def verbalize(self):
"""
Ensure that Ollama is running athttp://localhost:11434/
"""

""" Map a DL concept into natural languages """
# features = self.feature_pretify()
# r = export_text(self.clf, feature_names=features)
# # replace <= with ==0.0
# print("Learned rules: ")
# r = r.replace("<= 0.50", "==False")
# r = r.replace("> 0.50", "==True")
# print(r)

# https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion

# Save the best prediction
self.save_best_hypothesis(concepts=[self.disjunction_of_conjunctive_concepts],
path="best_pred")

prompt = f"Translate this description logic concept into english sentences. Provide no explanations: {self.dl_render.render(self.disjunction_of_conjunctive_concepts)}"
print(prompt)
full_text_mistral = self.__post_request_llm(prompt, llm_name="mistral")
full_text_llama2 = self.__post_request_llm(prompt, llm_name="llama2")

def save_best_hypothesis(self, concepts: List[OWLClassExpression],
path: str = 'Predictions',
rdf_format: str = 'rdfxml') -> None:
"""Serialise the best hypotheses to a file.
Args:
concepts:
path: Filename base (extension will be added automatically).
rdf_format: Serialisation format. currently supported: "rdfxml".
"""
#NS: Final = 'https://dice-research.org/predictions/' + str(time.time()) + '#'
NS: Final = 'https://dice-research.org/predictions' + str(time.time()) + '#'
if rdf_format != 'rdfxml':
raise NotImplementedError(f'Format {rdf_format} not implemented.')
# ()
manager: OWLOntologyManager = OWLOntologyManager_Owlready2()
# ()
ontology: OWLOntology = manager.create_ontology(IRI.create(NS))
# () Iterate over concepts
for i in concepts:
cls_a: OWLClass = OWLClass(IRI.create(NS, self.manchester_render.render(i)))
equivalent_classes_axiom = OWLEquivalentClassesAxiom([cls_a, i])
manager.add_axiom(ontology, equivalent_classes_axiom)

manager.save_ontology(ontology, IRI.create('file:/' + path + '.owl'))

0 comments on commit 37b0566

Please sign in to comment.