Skip to content

Commit

Permalink
validation
Browse files Browse the repository at this point in the history
  • Loading branch information
pnrobinson committed Nov 13, 2023
1 parent 150d27f commit e8068b7
Show file tree
Hide file tree
Showing 10 changed files with 695 additions and 108 deletions.
601 changes: 547 additions & 54 deletions notebooks/HPOA_exporter.ipynb

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions src/pyphetools/creation/disease.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,12 @@ def id(self):
@property
def label(self):
return self._label

def __hash__(self):
return hash((self._id, self._label))

def __eq__(self, other):
return (self._id, self._label) == (other._id, other._label)

def __repr__(self):
return f'{self._label} ({self._id})'
3 changes: 2 additions & 1 deletion src/pyphetools/validation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# to do
from .cohort_validator import CohortValidator
from .content_validator import ContentValidator
from .ontology_qc import OntologyQC
from .phenopacket_validator import PhenopacketValidator
from .validation_result import ValidationResult

__all__ = [
"CohortValidator",
"ContentValidator",
"OntologyQC",
"PhenopacketValidator",
Expand Down
17 changes: 17 additions & 0 deletions src/pyphetools/validation/cohort_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import List
from ..creation.individual import Individual
from .validated_individual import ValidatedIndividual
import hpotk

class CohortValidator:

def __init__(self, cohort:List[Individual], ontology:hpotk.MinimalOntology, min_var:int, min_hpo:int, min_allele:int=None) -> None:
self._cohort = cohort
self._validated_individual_list = []
for indi in cohort:
vindi = ValidatedIndividual(individual=indi)
vindi.validate(ontology=ontology, min_hpo=min_hpo, min_allele=min_allele, min_var=min_var)
self._validated_individual_list.append(vindi)

def get_validated_individual_list(self):
return self._validated_individual_list
28 changes: 12 additions & 16 deletions src/pyphetools/validation/content_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,18 @@ def validate_individual(self, individual:Individual) -> List[ValidationResult]:
n_var = 0
n_alleles = 0
pp_id = individual.get_phenopacket_id()
for interpretation in individual.interpretation_list:
if interpretation.diagnosis is not None:
dx = interpretation.diagnosis
for genomic_interpretation in dx.genomic_interpretations:
n_var += 1
vint = genomic_interpretation.variant_interpretation
if vint.variation_descriptor is not None:
vdesc = vint.variation_descriptor
if vdesc.allelic_state is not None:
gtype = vdesc.allelic_state
if gtype.label == "heterozygous": # "GENO:0000135"
n_alleles += 1
elif gtype.label == "homozygous": # "GENO:0000136"
n_alleles += 2
elif gtype.label == "hemizygous": # "GENO:0000134"
n_alleles += 1
for variant_interpretation in individual.interpretation_list:
n_var += 1
if variant_interpretation.variation_descriptor is not None:
vdesc = variant_interpretation.variation_descriptor
if vdesc.allelic_state is not None:
gtype = vdesc.allelic_state
if gtype.label == "heterozygous": # "GENO:0000135"
n_alleles += 1
elif gtype.label == "homozygous": # "GENO:0000136"
n_alleles += 2
elif gtype.label == "hemizygous": # "GENO:0000134"
n_alleles += 1
if n_pf < self._min_hpo:
msg = f"Minimum HPO terms required {self._min_hpo} but only {n_pf} found"
validation_results.append(ValidationResult.error(phenopacket_id=pp_id, message=msg))
Expand Down
19 changes: 19 additions & 0 deletions src/pyphetools/validation/ontology_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,21 @@ def _fix_redundancies(self, hpo_terms:List[HpTerm]) -> List[HpTerm]:
return non_redundant_terms


def _check_terms(self, hpo_terms:List[HpTerm]) -> None:
for term in hpo_terms:
hpo_id = term.id
if not hpo_id in self._ontology:
error = ValidationResultBuilder(self._phenopacket_id).error().malformed_hpo_id(hpo_id).build()
self._errors.append(error)
else:
hpo_term = self._ontology.get_term(term_id=hpo_id)
if hpo_term.name != term.label:
error = ValidationResultBuilder(self._phenopacket_id).error().malformed_hpo_label(term.label).build()
self._errors.append(error)





def _clean_terms(self) -> List[HpTerm]:
"""
Expand All @@ -99,6 +114,7 @@ def _clean_terms(self) -> List[HpTerm]:
by_age_dictionary = defaultdict(list)
for term in self._individual.hpo_terms:
by_age_dictionary[term.onset].append(term)
self._check_terms(self._individual.hpo_terms)
clean_terms = []
self._errors.clear() # reset
for onset, term_list in by_age_dictionary.items():
Expand Down Expand Up @@ -128,6 +144,9 @@ def get_error_list(self) -> List[ValidationResult]:
"""
return self._errors

def get_clean_terms(self):
return self._clean_hpo_terms


def get_error_string(self) -> str:
"""
Expand Down
57 changes: 34 additions & 23 deletions src/pyphetools/validation/validated_individual.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,43 @@
from .content_validator import ContentValidator
from typing import List
from .validation_result import ValidationResult

from .ontology_qc import OntologyQC
import hpotk

class ValidatedIndividual:

def __init__(self, individual:Individual) -> None:
self._individual = individual








def validate_phenopacket_list(self, individual_list,min_var:int, min_hpo:int, min_allele:int=None) -> List[ValidatedIndividual]:
"""individual_list can be a list of individuals
:param phenopacket_list: list of GA4GH phenopackets to be validated
:type phenopacket_list: Union[List[phenopackets.Phenopackets], List[str]]
:returns: potentially empty list of warnings and errors
:rtype: List[ValidationResult]
self._clean_terms = []
self._validation_errors = []

def validate(self, ontology:hpotk.MinimalOntology, min_var:int, min_hpo:int, min_allele:int=None) -> None:
"""validate an Individual object for errors in the Ontology or the minimum number of HPO terms/alleles/variants
:param ontology: HPO object
:type ontology: hpotk.MinimalOntology
:param min_var: minimum number of variants for this phenopacket to be considered valid
:type min_var: int
:param min_hpo: minimum number of phenotypic features (HP terms) for this phenopacket to be considered valid
:type min_hpo: int
:param min_allele: minimum number of alleles for this phenopacket to be considered valid
:type min_allele: int
"""
validated_individual_list = []
for individual in individual_list:
cvalidator = ContentValidator(min_hpo=min_hpo, min_allele=min_allele, min_var=min_var)
validation_results = cvalidator.validate_individual(individual=individual)

validation_results.extend(self.validate_phenopacket(pp))
return validation_results
cvalidator = ContentValidator(min_hpo=min_hpo, min_allele=min_allele, min_var=min_var)
validation_results = cvalidator.validate_individual(individual=self._individual)
self._validation_errors.extend(validation_results)
qc = OntologyQC(individual=self._individual, ontology=ontology)
qc_validation_results = qc.get_error_list()
self._validation_errors.extend(qc_validation_results)
self._clean_terms = qc.get_clean_terms()

def get_individual_with_clean_terms(self) -> Individual:
indi = self._individual
indi.set_hpo_terms(self._clean_terms)
return indi

def get_validation_errors(self) -> List[ValidationResult]:
return self._validation_errors

def has_error(self) -> bool:
return len(self._validation_errors) > 0
24 changes: 14 additions & 10 deletions src/pyphetools/validation/validation_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ class Category(Enum):
INSUFFICIENT_HPOS = 3
INSUFFICIENT_ALLELES = 4
INSUFFICIENT_VARIANTS = 5
UNKNOWN = 6
MALFORMED_ID = 6
MALFORMED_LABEL = 7
UNKNOWN = 8


class ValidationResult:
Expand Down Expand Up @@ -72,22 +74,16 @@ def term(self):
def category(self):
return self._category


@staticmethod
def error(phenopacket_id:str, message:str, category:Category):
return ValidationResult(phenopacket_id=phenopacket_id, message=message, category=category, term=None, errorlevel=ErrorLevel.ERROR)

@staticmethod
def warning(phenopacket_id:str, message:str, category:Category):
return ValidationResult(phenopacket_id=phenopacket_id, message=message, category=category, term=None, errorlevel=ErrorLevel.WARNING)

def __repr__(self):
return f"{self._error_level}: {self._message}"




class ValidationResultBuilder:
"""
This class is intended for internal use only, and makes constructing ValidatioResult objects a little easier.
"""

def __init__(self, ppkt_id:str):
self._phenopacket_id = ppkt_id
Expand Down Expand Up @@ -128,6 +124,14 @@ def set_message(self, msg):
self._message = msg
return self

def malformed_hpo_id(self, hpo_id):
self._category = Category.MALFORMED_ID
self._message = f"Invalid HPO id {hpo_id}"

def malformed_hpo_label(self, hpo_label):
self._category = Category.MALFORMED_LABEL
self._message = f"Invalid HPO id {hpo_label}"

def set_term(self, term:HpTerm):
self._term = term
return self
Expand Down
30 changes: 29 additions & 1 deletion src/pyphetools/visualization/hpoa_table_creator.py
Original file line number Diff line number Diff line change
@@ -1 +1,29 @@
import os
import os



EMPTY_CELL = ""

class HpoaTableCreator:
"""
Create an HPO "small file" with the following columns
1. #diseaseID
2. diseaseName
3. phenotypeID
4. phenotypeName
5. onsetID
6. onsetName
7. frequency
8. sex
9. negation
10. modifier
11. description
12. publication
13. evidence
14. biocuration
These should be tab separate fields.
"""

def __init__(self, phenopacket_dir) -> None:
pass
15 changes: 12 additions & 3 deletions src/pyphetools/visualization/qc_visualizer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@

from typing import List
import hpotk
from ..validation.validation_result import ValidationResult
from ..validation.validated_individual import ValidatedIndividual

class QcVisualizer:
def __init__(self, ontology:hpotk.MinimalOntology) -> None:
self._ontology = ontology


def to_html(self, validation_result_list:list[ValidationResult]) -> str:
pass
def to_html(self, validated_individual_list:List[ValidatedIndividual]) -> str:
html_lines = []
n_individuals = len(validated_individual_list)
n_individuals_with_errors = sum([1 for i in validated_individual_list if i.has_error()])
html_lines.append("<h2>Cohort validation</h2>")
if n_individuals_with_errors == 0:
html_lines.append(f"<p>No errors found for the cohort with {n_individuals} individuals</p>")
return "\n".join(html_lines)
else:
return "HELP"

0 comments on commit e8068b7

Please sign in to comment.