From ed39e52fdc07e2707dc0b6339407efb9d09a7e4a Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 8 Aug 2024 13:18:33 +0200 Subject: [PATCH 1/7] fixig vital status extraction --- src/pyphetools/creation/individual.py | 4 +- .../visualization/kaplan_meier_visualizer.py | 21 ++++++++++ .../visualization/simple_patient.py | 18 +++++++++ test/test_individual.py | 39 ++++++++++++++++++- 4 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 src/pyphetools/visualization/kaplan_meier_visualizer.py diff --git a/src/pyphetools/creation/individual.py b/src/pyphetools/creation/individual.py index fba9e7fe..8343ef4e 100644 --- a/src/pyphetools/creation/individual.py +++ b/src/pyphetools/creation/individual.py @@ -69,7 +69,6 @@ def __init__(self, self._interpretation_list = interpretation_list self._disease = disease self._citation = citation - self._vital_status = None @property def id(self): @@ -205,6 +204,9 @@ def set_vital_status(self, vstatus:VitalStatus): raise ValueError(f"vstatus argument must be pyphetools.pp.v202.VitalStatus but was{type(vstatus)}") self._vital_status = vstatus + def get_vital_status(self) -> VitalStatus: + return self._vital_status + def get_phenopacket_id(self, phenopacket_id=None) -> str: """ :returns: the Phenopacket identifier for this individual diff --git a/src/pyphetools/visualization/kaplan_meier_visualizer.py b/src/pyphetools/visualization/kaplan_meier_visualizer.py new file mode 100644 index 00000000..ef63f553 --- /dev/null +++ b/src/pyphetools/visualization/kaplan_meier_visualizer.py @@ -0,0 +1,21 @@ +import typing +import hpotk +from pyphetools.visualization.simple_patient import SimplePatient + + + +class KaplanMeierVisualizer: + + def __init__(self, + simple_patient_list: typing.List[SimplePatient], + target_term: typing.Union[str, hpotk.TermId]=None) -> None: + """ + The goal of this class is to provide a visualization as a KaplanMeier survival curve with respect to + the age of onset of a specific HPO term (feature of the disease). + TODO - also add support for survival curves with GA4GH Phenopacket VitalStatus element. + This will be performed if the target term is None + """ + if target_term is None: + raise ValueError("VitalStatus based KM curve not implemented yet") + + pass \ No newline at end of file diff --git a/src/pyphetools/visualization/simple_patient.py b/src/pyphetools/visualization/simple_patient.py index 561c4ba1..5cd9afd3 100644 --- a/src/pyphetools/visualization/simple_patient.py +++ b/src/pyphetools/visualization/simple_patient.py @@ -6,6 +6,7 @@ from ..creation.constants import Constants from ..creation.hp_term import HpTerm from .simple_variant import SimpleVariant +from ..pp.v202 import VitalStatus class SimplePatient: """ @@ -53,6 +54,23 @@ def __init__(self, ga4gh_phenopacket) -> None: self._sex = "OTHER" else: self._sex = "UNKNOWN" + ## get vital status if possible + self._vstat = None + self._age_last_encounter = None + self._survival_time_in_days = None + self._cause_of_death = None + if ppack.HasField("vital_status"): + vstat = ppack.vital_status + if vstat.status == VitalStatus.Status.DECEASED: + self._vstat = "DECEASED" + elif vstat.status == VitalStatus.Status.ALIVE: + self._vstat = "ALIVE" + else: + pass # keep self._vstat as None + if vstat.survival_time_in_days is not None: + self._survival_time_in_days = vstat.survival_time_in_days + self._cause_of_death = vstat.cause_of_death + for pf in ppack.phenotypic_features: hpterm = HpTerm(hpo_id=pf.type.id, label=pf.type.label, observed=not pf.excluded) if pf.excluded: diff --git a/test/test_individual.py b/test/test_individual.py index 3bbbc9b7..9544638b 100644 --- a/test/test_individual.py +++ b/test/test_individual.py @@ -2,6 +2,7 @@ import pytest from pyphetools.creation import Citation, Disease,Individual, HpTerm +from pyphetools.pp.v202 import VitalStatus, TimeElement, Age, OntologyClass @@ -34,6 +35,25 @@ def ind_c(self) -> Individual: i.add_hpo_term(HpTerm(hpo_id="HP:0000490", label="Deeply set eye")) i.set_disease(disease=Disease(disease_id="OMIM:123456", disease_label="label")) return i + + @pytest.fixture + def ind_d(self) -> Individual: + cite = Citation(pmid="PMID:36446582", title="some title") + time_at_last_encounter=TimeElement( + element=Age( + iso8601duration='P6M', + ) + ) + vital_status=VitalStatus( + status=VitalStatus.Status.DECEASED, + time_of_death=TimeElement(element=Age(iso8601duration='P1Y')), + cause_of_death=OntologyClass(id='NCIT:C7541', label='Retinoblastoma'), + survival_time_in_days=180, + ) + i = Individual(individual_id="Low, 2016_P17 (10)", age_at_last_encounter=time_at_last_encounter, vital_status=vital_status, citation=cite) + i.add_hpo_term(HpTerm(hpo_id="HP:0000490", label="Deeply set eye")) + i.set_disease(disease=Disease(disease_id="OMIM:123456", disease_label="label")) + return i @@ -55,4 +75,21 @@ def test_phenopacket_id_B(self, ind_b: Individual): def test_phenopacket_id_C(self, ind_c: Individual): phenopacket_id = ind_c.get_phenopacket_id() expected = "PMID_36446582_Low_2016_P17_10" - assert expected == phenopacket_id \ No newline at end of file + assert expected == phenopacket_id + + def test_phenopacket_vital_status(self, ind_d: Individual): + vstat = ind_d.get_vital_status() + assert vstat is not None + assert vstat.status == VitalStatus.Status.DECEASED + assert 180 == vstat.survival_time_in_days + # cause_of_death=OntologyClass(id='NCIT:C7541', label='Retinoblastoma'), + assert "NCIT:C7541" == vstat.cause_of_death.id + assert "Retinoblastoma" == vstat.cause_of_death.label + + def test_phenopacket_last_encounter(self, ind_d: Individual): + last_encounter = ind_d.age_at_last_encounter + assert last_encounter is not None + assert last_encounter.age is not None + assert last_encounter.age_range is None + age = last_encounter.age + assert age.iso8601duration == "P6M" From 2b624f0a2bfd27be1aa87a0d4e3a4627ec90d93a Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 9 Aug 2024 12:57:58 +0200 Subject: [PATCH 2/7] refactoring (1) --- .../creation/case_template_encoder.py | 61 +++++++----- src/pyphetools/creation/hp_term.py | 99 ++++++++++--------- src/pyphetools/creation/import_template.py | 2 + src/pyphetools/creation/individual.py | 43 ++++---- src/pyphetools/creation/pyphetools_age.py | 63 ++++++++++++ src/pyphetools/validation/ontology_qc.py | 23 +++-- src/pyphetools/visualization/__init__.py | 1 + .../visualization/kaplan_meier_visualizer.py | 51 +++++++++- .../visualization/simple_patient.py | 79 ++++++++++----- test/test_hp_term.py | 25 +++-- test/test_pyphetools_age.py | 21 ++++ 11 files changed, 331 insertions(+), 137 deletions(-) diff --git a/src/pyphetools/creation/case_template_encoder.py b/src/pyphetools/creation/case_template_encoder.py index 49200fc4..e430e2ab 100644 --- a/src/pyphetools/creation/case_template_encoder.py +++ b/src/pyphetools/creation/case_template_encoder.py @@ -1,5 +1,5 @@ import abc -from typing import Dict, List +import typing from pyphetools.creation.citation import Citation from pyphetools.creation.constants import Constants from pyphetools.creation.disease import Disease @@ -8,6 +8,7 @@ from pyphetools.creation.hp_term import HpTerm from pyphetools.creation.individual import Individual from pyphetools.creation.pyphetools_age import NoneAge, PyPheToolsAge +from ..pp.v202 import TimeElement as TimeElement202 import os import re import pandas as pd @@ -54,10 +55,10 @@ class DataEncoder(CellEncoder): def __init__(self, h1:str, h2:str): super().__init__(name=h1) - def encode(self, cell_contents): + def encode(self, cell_contents) -> str: return str(cell_contents) - def columntype(self): + def columntype(self) -> "CellType": return CellType.DATA class HpoEncoder(CellEncoder): @@ -112,7 +113,7 @@ def needs_attention(self) -> bool: def get_error(self) -> str: return self._error - def encode(self, cell_contents) -> None: + def encode(self, cell_contents) -> typing.Optional[HpTerm]: """ Parses one cell from the template. Valid entries are observed, excluded, na, and ISO8601 age strings. Any other entry will lead to raising an Exception, probably the user entered something erroneous. @@ -126,9 +127,11 @@ def encode(self, cell_contents) -> None: return None elif len(cell_contents) > 0: try: - onset = PyPheToolsAge.get_age(cell_contents) - if onset.is_valid(): # valid OSO8601 age of onset + onset = PyPheToolsAge.get_age_pp201(cell_contents) + if onset is not None: # valid age of onset return HpTerm(hpo_id=self._hpo_id, label=self._hpo_label, onset=onset) + else: + raise ValueError(f"Could not code age of onset {cell_contents}") except Exception as parse_error: raise ValueError(f"Could not parse HPO column cell_contents: \”{str(parse_error)}\"") # if we cannot parse successfully, there is probably an error in the format. Drop down to end of function to warn user @@ -142,7 +145,7 @@ def __init__(self, hpo_cr:HpoConceptRecognizer, h1=None, h2=None, ): super().__init__(name="Miscellaneous HPO column") self._hpo_cr = hpo_cr - def encode(self, cell_contents): + def encode(self, cell_contents) -> typing.List[HpTerm]: if isinstance(cell_contents, float): return [] cell_contents = str(cell_contents) @@ -170,10 +173,10 @@ class NullEncoder(CellEncoder): def __init__(self, h1=None, h2=None): super().__init__(name="Begin of HPO column") - def encode(self, cell_contents): + def encode(self, cell_contents) -> None: return None - def columntype(self): + def columntype(self) -> "CellType": return CellType.NULL EXPECTED_HEADERS = {"PMID", "title", "individual_id", "comment", "disease_id", "disease_label", @@ -263,7 +266,10 @@ def __init__(self, df:pd.DataFrame, hpo_cr:HpoConceptRecognizer, created_by:str, metadata.default_versions_with_hpo(CaseTemplateEncoder.HPO_VERSION) self._metadata_d[i.id] = metadata - def _process_header(self, header_1:List, header_2:List, hpo_cr:HpoConceptRecognizer) -> Dict[int, CellEncoder]: + def _process_header(self, + header_1:typing.List[str], + header_2:typing.List[str], + hpo_cr:HpoConceptRecognizer) -> typing.Dict[int, CellEncoder]: index_to_decoder_d = {} in_hpo_range = False for i in range(self._n_columns): @@ -307,7 +313,8 @@ def _process_header(self, header_1:List, header_2:List, hpo_cr:HpoConceptRecogn print(f"ERROR: {e}") return index_to_decoder_d - def _check_for_duplicate_individual_ids(self, df:pd.DataFrame) -> None: + def _check_for_duplicate_individual_ids(self, + df:pd.DataFrame) -> None: """Check that no two individuals in the dataframe have the same identifier Duplicate identifiers can lead to other errors in the code An identifier is made from the combination of PMID and individual_id and must be unique @@ -332,9 +339,13 @@ def _check_for_duplicate_individual_ids(self, df:pd.DataFrame) -> None: raise ValueError(err_str) # else, all is OK, no duplicate ids - def _parse_individual(self, row:pd.Series): + def _parse_individual(self, + row:pd.Series) -> Individual: + """ + Parse one row of the Data ingest (Excel) template, corresponding to one individual + """ if not isinstance(row, pd.Series): - raise ValueError(f"argument df must be pandas DSeriestaFrame but was {type(row)}") + raise ValueError(f"argument df must be pandas Series but was {type(row)}") data = row.values.tolist() if len(data) != self._n_columns: # Should never happen @@ -345,9 +356,9 @@ def _parse_individual(self, row:pd.Series): encoder = self._index_to_decoder.get(i) cell_contents = data[i] if encoder is None: - print(f"Encoder {i} was None for data \"{cell_contents}\"") + print(f"Encoder for column {i} was None for data \"{cell_contents}\"") self._debug_row(i, row) - raise ValueError(f"Encoder {i} was None for data \"{cell_contents}\"") + raise ValueError(f"Encoder for column {i} was None for data \"{cell_contents}\"") elif encoder.columntype == CellType.NTR: continue ## cannot be use yet because new term request. encoder_type = encoder.columntype() @@ -438,22 +449,22 @@ def _debug_row(self, target_idx:int, row:pd.Series): else: print(f"[{j}] {hdr}={row_items[j]}") - def get_individuals(self) -> List[Individual]: + def get_individuals(self) -> typing.List[Individual]: return self._individuals - def get_allele1_d(self)-> Dict[str,str]: + def get_allele1_d(self)-> typing.Dict[str,str]: return self._allele1_d - def get_allele2_d(self)-> Dict[str,str]: + def get_allele2_d(self)-> typing.Dict[str,str]: return self._allele2_d def _is_biallelic(self) -> bool: return self._is_biallelic - def get_metadata_d(self) -> Dict[str,MetaData]: + def get_metadata_d(self) -> typing.Dict[str,MetaData]: return self._metadata_d - def get_phenopackets(self) -> List[PPKt.Phenopacket]: + def get_phenopackets(self) -> typing.List[PPKt.Phenopacket]: ppack_list = [] for individual in self._individuals: cite = individual._citation @@ -465,7 +476,8 @@ def get_phenopackets(self) -> List[PPKt.Phenopacket]: - def _transform_individuals_to_phenopackets(self, individual_list:List[Individual]): + def _transform_individuals_to_phenopackets(self, + individual_list:typing.List[Individual]): """Create one phenopacket for each of the individuals :param individual_list: List of individual objects @@ -486,7 +498,9 @@ def _transform_individuals_to_phenopackets(self, individual_list:List[Individual ppkt_list.append(phenopckt) return ppkt_list - def output_individuals_as_phenopackets(self, individual_list:List[Individual], outdir="phenopackets") -> None: + def output_individuals_as_phenopackets(self, + individual_list:typing.List[Individual], + outdir:str="phenopackets") -> None: """write a list of Individual objects to file in GA4GH Phenopacket format Note that the individual_list needs to be passed to this object, because we expect that the QC code will have been used to cleanse the data of redundancies etc before output. @@ -526,7 +540,8 @@ def output_individuals_as_phenopackets(self, individual_list:List[Individual], o print(f"We output {written} GA4GH phenopackets to the directory {outdir}") - def print_individuals_as_phenopackets(self, individual_list:List[Individual]) -> None: + def print_individuals_as_phenopackets(self, + individual_list:typing.List[Individual]) -> None: """Function designed to show all phenopackets in a notebook for Q/C :param individual_list: List of individual objects :type individual_list:List[Individual] diff --git a/src/pyphetools/creation/hp_term.py b/src/pyphetools/creation/hp_term.py index 64b08f72..1a634704 100644 --- a/src/pyphetools/creation/hp_term.py +++ b/src/pyphetools/creation/hp_term.py @@ -1,9 +1,11 @@ +import typing import pandas as pd import phenopackets as PPKt -from .constants import Constants -from .pyphetools_age import HpoAge, IsoAge, NoneAge, PyPheToolsAge +from .pyphetools_age import PyPheToolsAge +from ..pp.v202 import TimeElement as TimeElement202 import hpotk + class HpTerm: """ Class to represent a phenotypic observation as an HPO term with optional modifiers @@ -21,7 +23,14 @@ class HpTerm: :param resolution: an ISO8601 string representing the age of resolution, optional :type resolution: str """ - def __init__(self, hpo_id:str, label:str, observed:bool=True, measured:bool=True, onset=NoneAge("na"), resolution=NoneAge("na")): + + def __init__(self, + hpo_id: str, + label: str, + observed: bool = True, + measured: bool = True, + onset: TimeElement202 = None, + resolution: TimeElement202 = None): if hpo_id is None or len(hpo_id) == 0 or not hpo_id.startswith("HP"): raise ValueError(f"invalid id argument: '{hpo_id}'") if label is None or len(label) == 0: @@ -30,8 +39,8 @@ def __init__(self, hpo_id:str, label:str, observed:bool=True, measured:bool=True self._label = label self._observed = observed self._measured = measured - if not isinstance(onset, PyPheToolsAge): - raise ValueError(f"onset argument must be PyPheToolsAge or subclass but was {type(onset)}") + #if not onset is None or str(type(onset)) != "": + # raise ValueError(f"onset argument must be TimeElement202 or None but was {type(onset)}") self._onset = onset self._resolution = resolution @@ -77,24 +86,23 @@ def measured(self) -> bool: return self._measured @property - def onset(self) -> PyPheToolsAge: + def onset(self) -> typing.Optional[TimeElement202]: """ :returns: A PyPheToolsAge object representing the age this abnormality first was observed - :rtype: PyPheToolsAge + :rtype: typing.Optional[TimeElement202] """ return self._onset - def set_onset(self, onset:PyPheToolsAge) -> None: - if not isinstance(onset, PyPheToolsAge): - raise ValueError(f"argument of set_onset but be PyPheToolsAge but was {type(onset)}") + def set_onset(self, onset: TimeElement202) -> None: + if not isinstance(onset, TimeElement202): + raise ValueError(f"argument of set_onset but be TimeElement202 but was {type(onset)}") self._onset = onset - @property - def resolution(self) -> PyPheToolsAge: + def resolution(self) -> typing.Optional[TimeElement202]: """ :returns: A PyPheToolsAge object representing the age this abnormality resolved - :rtype: PyPheToolsAge + :rtype: typing.Optional[TimeElement202] """ return self._resolution @@ -121,7 +129,7 @@ def hpo_term_and_id(self) -> str: def _term_and_id_with_onset(self) -> str: if self._onset is not None and self._onset.is_valid(): - return f"{self.hpo_term_and_id}: onset {self._onset.age_string}" + return f"{self.hpo_term_and_id}: onset {self._onset}" else: return self.hpo_term_and_id @@ -152,13 +160,12 @@ def to_ga4gh_phenotypic_feature(self) -> PPKt.PhenotypicFeature: pf.type.label = self._label if not self._observed: pf.excluded = True - if self._onset.is_valid(): - pf.onset.CopyFrom(self._onset.to_ga4gh_time_element()) - if self._resolution.is_valid(): - pf.resolution.CopyFrom(self._resolution.to_ga4gh_time_element()) + if self._onset is not None: + pf.onset.CopyFrom(self._onset.to_message()) + if self._resolution is not None: + pf.resolution.CopyFrom(self._resolution.to_message()) return pf - @staticmethod def term_list_to_dataframe(hpo_list) -> pd.DataFrame: if not isinstance(hpo_list, list): @@ -171,12 +178,12 @@ def term_list_to_dataframe(hpo_list) -> pd.DataFrame: return pd.DataFrame(columns=['Col1', 'Col2', 'Col3']) items = [] for hp in hpo_list: - d = { "id": hp.id, "label": hp.label, "observed":hp.observed, "measured": hp.measured } + d = {"id": hp.id, "label": hp.label, "observed": hp.observed, "measured": hp.measured} items.append(d) return pd.DataFrame(items) @staticmethod - def from_hpo_tk_term(hpotk_term:hpotk.Term) -> "HpTerm": + def from_hpo_tk_term(hpotk_term: hpotk.Term) -> "HpTerm": """Create a pyphetools HpTerm object from an hpo-toolkit Term object :param hpotk_term: A term from the HPO toolkit @@ -191,10 +198,12 @@ def from_hpo_tk_term(hpotk_term:hpotk.Term) -> "HpTerm": class HpTermBuilder: - def __init__(self, hpo_id:str, hpo_label:str): + def __init__(self, + hpo_id: str, + hpo_label: str): if not hpo_id.startswith("HP:"): raise ValueError(f"Malformed HPO id {hpo_id}") - if not len(hpo_id) == 10: + if len(hpo_id) != 10: raise ValueError(f"Malformed HPO id with length {len(hpo_id)}: {hpo_id}") self._hpo_id = hpo_id if hpo_label is None or len(hpo_label) < 3: @@ -202,8 +211,8 @@ def __init__(self, hpo_id:str, hpo_label:str): self._hpo_label = hpo_label self._observed = True self._measured = True - self._onset = NoneAge("na") - self._resolution = NoneAge("na") + self._onset = None + self._resolution = None def excluded(self): self._observed = False @@ -218,98 +227,96 @@ def iso8601_onset(self, onset): """ if not isinstance(onset, str): raise ValueError(f"onset argument must be iso8601 string but was {type(onset)}") - self._onset = IsoAge.from_iso8601(onset) + self._onset = PyPheToolsAge.get_age_pp201(onset) return self def embryonal_onset(self): """Onset of disease at up to 8 weeks following fertilization (corresponding to 10 weeks of gestation). """ - self._onsetTerm = HpoAge("Embryonal onset") # HP:0011460 + self._onsetTerm = PyPheToolsAge.get_age_pp201("Embryonal onset") # HP:0011460 return self def fetal_onset(self): """Onset prior to birth but after 8 weeks of embryonic development (corresponding to a gestational age of 10 weeks). """ - self._onset = HpoAge("Fetal onset") # HP:0011461 + self._onset = PyPheToolsAge.get_age_pp201("Fetal onset") # HP:0011461 return self def second_trimester_onset(self): """second trimester, which comprises the range of gestational ages from 14 0/7 weeks to 27 6/7 (inclusive) """ - self._onset = HpoAge("Second trimester onset") # HP:0034198 + self._onset = PyPheToolsAge.get_age_pp201("Second trimester onset") # HP:0034198 return self def late_first_trimester_onset(self): """late first trimester during the early fetal period, which is defined as 11 0/7 to 13 6/7 weeks of gestation (inclusive). """ - self._onset = HpoAge("Late first trimester onset") # HP:0034199 + self._onset = PyPheToolsAge.get_age_pp201("Late first trimester onset") # HP:0034199 return self def third_trimester_onset(self): """third trimester, which is defined as 28 weeks and zero days (28+0) of gestation and beyond. """ - self._onset = HpoAge("Third trimester onset") # HP:0034197 + self._onset = PyPheToolsAge.get_age_pp201("Third trimester onset") # HP:0034197 return self def antenatal_onset(self): """onset prior to birth """ - self._onset = HpoAge("Antenatal onset") # HP:0030674 + self._onset = PyPheToolsAge.get_age_pp201("Antenatal onset") # HP:0030674 return self def congenital_onset(self): """A phenotypic abnormality that is present at birth. """ - self._onset = HpoAge("Congenital onset") # HP:0003577 + self._onset = PyPheToolsAge.get_age_pp201("Congenital onset") # HP:0003577 return self def neonatal_onset(self): """Onset of signs or symptoms of disease within the first 28 days of life. """ - self._onset = HpoAge("Neonatal onset") # HP:0003623) + self._onset = PyPheToolsAge.get_age_pp201("Neonatal onset") # HP:0003623) return self - def infantile_onset(self): """Onset of signs or symptoms of disease between 28 days to one year of life. """ - self._onset = HpoAge("Infantile onset") # HP:0003593 + self._onset = PyPheToolsAge.get_age_pp201("Infantile onset") # HP:0003593 return self def childhood_onset(self): """Onset of disease at the age of between 1 and 5 years. """ - self._onset = HpoAge("Childhood onset") # HP:0011463 + self._onset = PyPheToolsAge.get_age_pp201("Childhood onset") # HP:0011463 return self def juvenile_onset(self): """Onset of signs or symptoms of disease between the age of 5 and 15 years. """ - self._onset = HpoAge("Juvenile onset") # HP:0003621 + self._onset = PyPheToolsAge.get_age_pp201("Juvenile onset") # HP:0003621 return self def young_adult_onset(self): """Onset of disease at the age of between 16 and 40 years. """ - self._onset = HpoAge("Young adult onset") # HP:0011462 + self._onset = PyPheToolsAge.get_age_pp201("Young adult onset") # HP:0011462 return self def middle_age_onset(self): """onset of symptoms at the age of 40 to 60 years. """ - self._onset = HpoAge("Middle age onset") # HP:0003596 + self._onset = PyPheToolsAge.get_age_pp201("Middle age onset") # HP:0003596 return self def late_onset(self): """Onset of symptoms after the age of 60 years. """ - self._onset = HpoAge("Late onset") # HP:0003584 + self._onset = PyPheToolsAge.get_age_pp201("Late onset") # HP:0003584 return self def build(self) -> HpTerm: return HpTerm(hpo_id=self._hpo_id, - label=self._hpo_label, - observed=self._observed, - measured=self._measured, - onset=self._onset) - + label=self._hpo_label, + observed=self._observed, + measured=self._measured, + onset=self._onset) diff --git a/src/pyphetools/creation/import_template.py b/src/pyphetools/creation/import_template.py index 77caade8..ba2c2214 100644 --- a/src/pyphetools/creation/import_template.py +++ b/src/pyphetools/creation/import_template.py @@ -126,6 +126,8 @@ def import_phenopackets_from_template(self, hemizygous:bool=False, leniant_MOI:bool=False): """Import the data from an Excel template and create a collection of Phenopackets + This method writes the individuals as Phenopackets to file and also returns Individuals and the CValidator. + ToDo -- refactor to avoid side effects. Note that things will be completely automatic if the template just has HGNC encoding variants If there are structural variants, we need to encode them by hand by passing them as diff --git a/src/pyphetools/creation/individual.py b/src/pyphetools/creation/individual.py index 8343ef4e..b7644569 100644 --- a/src/pyphetools/creation/individual.py +++ b/src/pyphetools/creation/individual.py @@ -10,7 +10,7 @@ from .hgvs_variant import Variant from .metadata import MetaData, Resource from .pyphetools_age import PyPheToolsAge, NoneAge, IsoAge -from ..pp.v202 import VitalStatus +from ..pp.v202 import OntologyClass, TimeElement, VitalStatus class Individual: """ @@ -35,8 +35,8 @@ def __init__(self, hpo_terms:List[HpTerm]=None, citation:Citation=None, sex:str=Constants.NOT_PROVIDED, - age_of_onset:str=NoneAge("na"), - age_at_last_encounter:str=NoneAge("na"), + age_of_onset:PyPheToolsAge=NoneAge("na"), + age_at_last_encounter:PyPheToolsAge=NoneAge("na"), vital_status:VitalStatus=None, interpretation_list:List[PPKt.VariantInterpretation]=None, disease:Disease=None): @@ -90,36 +90,21 @@ def set_sex(self, sex): self._sex = sex @property - def age_of_onset(self): + def age_of_onset(self) -> PyPheToolsAge: """ - :returns: an iso8601 representation of age or HPO term label - :rtype: str + :returns: a representation of age when the disease first manifested + :rtype: PyPheToolsAge """ return self._age_of_onset @property - def age_at_last_encounter(self): + def age_at_last_encounter(self) -> PyPheToolsAge: """ - :returns: an iso8601 representation of age or HPO term label - :rtype: str + :returns: a representation of age when the individual was last seen in a medical context + :rtype: PyPheToolsAge """ return self._age_at_last_encounter - def set_age_of_onset(self, age): - """ - :param age: iso8601 string or HPO onset label - :type age: str - """ - if not isinstance(age, str): - raise ValueError(f"age argument must be a string but was {type(age)}") - self._age_of_onset = PyPheToolsAge.get_age(age) - - def set_age_at_last_encounter(self, age): - """ - :param age: iso8601 string or HPO onset label - :type age: str - """ - self._age_at_last_encounter = age @property def hpo_terms(self): @@ -303,7 +288,10 @@ def to_ga4gh_phenopacket(self, metadata, phenopacket_id=None) -> PPKt.Phenopacke elif self._sex == Constants.UNKNOWN_SEX_SYMBOL: php.subject.sex = PPKt.Sex.UNKNOWN_SEX if self._vital_status is not None: + print("DATA TYPE OF SELV VS", type(self._vital_status)) + print("DATA TYPE OF php.subject.vital_status", type(php.subject.vital_status)) vs = self._vital_status.to_message() + print("DATA TYPE OF vs", type(vs), " for ", php.id) php.subject.vital_status.CopyFrom(vs) disease_object = self._get_disease_object() php.diseases.append(disease_object) @@ -354,6 +342,13 @@ def to_ga4gh_phenopacket(self, metadata, phenopacket_id=None) -> PPKt.Phenopacke metadata.external_references.append(extref) php.meta_data.CopyFrom(metadata) return php + + + + + + + def __str__(self): hpo_list = [t.to_string() for t in self._hpo_terms] diff --git a/src/pyphetools/creation/pyphetools_age.py b/src/pyphetools/creation/pyphetools_age.py index 7beb739e..4c4b83d2 100644 --- a/src/pyphetools/creation/pyphetools_age.py +++ b/src/pyphetools/creation/pyphetools_age.py @@ -1,11 +1,18 @@ import math import abc import re +import typing + DAYS_IN_WEEK = 7 AVERAGE_DAYS_IN_MONTH = 30.437 AVERAGE_DAYS_IN_YEAR = 365.25 import phenopackets as PPKt +from ..pp.v202 import GestationalAge as GestationalAge202 +from ..pp.v202 import OntologyClass as OntologyClass202 +from ..pp.v202 import TimeElement as TimeElement202 +from ..pp.v202 import Age as Age202 + from .constants import Constants @@ -93,6 +100,35 @@ def age_string(self): else: return Constants.NOT_PROVIDED + + + @staticmethod + def get_age_pp201(age_string:str) -> typing.Optional[TimeElement202]: + """ + Encode the age string as a TimeElement if possible + """ + if age_string is None or len(age_string) == 0: + return None + if isinstance(age_string, float) and math.isnan(age_string): + return None # sometimes pandas returns an empty cell as a float NaN + if age_string.startswith("P"): + return TimeElement202(Age202(age_string)) + elif age_string in HPO_ONSET_TERMS: + hpo_id = HPO_ONSET_TERMS.get(age_string) + onsetClz = OntologyClass202(id=hpo_id, label=age_string) + return TimeElement202(onsetClz) + elif GestationalAge.is_gestational_age(age_string): + ga = GestationalAge(age_string) + ga202 = GestationalAge202(weeks=ga.weeks, days=ga.days) + return TimeElement202(ga202) + else: + # only warn if the user did not enter na=not available + if age_string != 'na': + raise ValueError(f"Could not parse \"{age_string}\" as age.") + return NoneAge + + + @staticmethod def get_age(age_string) -> "PyPheToolsAge": """Return an appropriate subclass of PyPheToolsAge or None @@ -120,6 +156,20 @@ def get_age(age_string) -> "PyPheToolsAge": if age_string != 'na': raise ValueError(f"Could not parse \"{age_string}\" as age.") return NoneAge(age_string=age_string) + + @staticmethod + def age_key_to_ga4gh(age_string) : + """ + Transform an age key such as either an iso8601 string (e.g. P41Y) or an HPO Onset label (e.g., Congenital onset) into a TimeElement + The age keys are used in the Excel template files. Currently, only iso8601 and HPO Onset are supported. + """ + if not isinstance(age_string, str): + raise ValueError(f"age_string argument {age_string} must be a string but was {type(age_string)}") + + age_obj = PyPheToolsAge.get_age(age_string=age_string) + if not age_obj.is_valid(): + raise ValueError(f"Could not parse age key \"{age_string}\"") + return age_obj.to_ga4gh_time_element() @@ -323,6 +373,15 @@ def __init__(self, age_string) -> None: else: raise ValueError(f"Could not extract gestation age from \"{age_string}\".") + + @property + def weeks(self): + return self._weeks + + @property + def days(self): + return self._days + def is_valid(self): return True @@ -365,3 +424,7 @@ def is_gestational_age(age_string): return True else: return False + + + + diff --git a/src/pyphetools/validation/ontology_qc.py b/src/pyphetools/validation/ontology_qc.py index 745ce23b..9fa9bcb1 100644 --- a/src/pyphetools/validation/ontology_qc.py +++ b/src/pyphetools/validation/ontology_qc.py @@ -1,11 +1,10 @@ import hpotk from collections import Counter -from typing import List, Optional, Set +from typing import List, Optional from ..creation.hp_term import HpTerm from .validation_result import ValidationResult, ValidationResultBuilder from collections import defaultdict from ..creation.individual import Individual -from ..creation.constants import Constants @@ -18,7 +17,11 @@ class OntologyQC: """ - def __init__(self, ontology:hpotk.MinimalOntology, individual:Individual, fix_conflicts=True, fix_redundancies=True): + def __init__(self, + ontology:hpotk.MinimalOntology, + individual:Individual, + fix_conflicts=True, + fix_redundancies=True): self._ontology = ontology self._individual = individual self._phenopacket_id = individual.get_phenopacket_id() @@ -28,7 +31,9 @@ def __init__(self, ontology:hpotk.MinimalOntology, individual:Individual, fix_co self._clean_hpo_terms = self._clean_terms() - def _fix_conflicts(self, observed_hpo_terms:List[HpTerm], excluded_hpo_terms) -> List[HpTerm]: + def _fix_conflicts(self, + observed_hpo_terms:List[HpTerm], + excluded_hpo_terms) -> List[HpTerm]: """ This class detects excluded superclasses that have observed subclasses -- a conflict. @@ -72,7 +77,8 @@ def _fix_conflicts(self, observed_hpo_terms:List[HpTerm], excluded_hpo_terms) -> - def _fix_redundancies(self, hpo_terms:List[HpTerm]) -> List[HpTerm]: + def _fix_redundancies(self, + hpo_terms:List[HpTerm]) -> List[HpTerm]: """ Remove redundant terms from a list of HPO terms. @@ -108,7 +114,8 @@ def _fix_redundancies(self, hpo_terms:List[HpTerm]) -> List[HpTerm]: return non_redundant_terms - def _check_term_ids_and_labels(self, hpo_terms:List[HpTerm]) -> None: + def _check_term_ids_and_labels(self, + hpo_terms:List[HpTerm]) -> None: """ Check whether the term identifiers (e.g., HP:0001234) are present in the ontology as primary ids and whether the label matches the current priumary label; if not, flag the errors in self._errors @@ -140,8 +147,8 @@ def _clean_terms(self) -> List[HpTerm]: if not term.measured: self._errors.append(ValidationResultBuilder(self._phenopacket_id).not_measured(term=term).build()) else: - if term.onset.is_valid(): - by_age_dictionary[term.onset.age_string].append(term) + if term.onset is not None: + by_age_dictionary[term.onset].append(term) else: if term.observed: observed_terms_without_onset.append(term) diff --git a/src/pyphetools/visualization/__init__.py b/src/pyphetools/visualization/__init__.py index 38077517..62f6c7e9 100644 --- a/src/pyphetools/visualization/__init__.py +++ b/src/pyphetools/visualization/__init__.py @@ -2,6 +2,7 @@ from .focus_count_table import FocusCountTable from .hpoa_table_creator import HpoaTableCreator, HpoaTableBuilder from .individual_table import IndividualTable +from .kaplan_meier_visualizer import KaplanMeierVisualizer from .phenopacket_charts import PhenopacketCharts from .phenopacket_ingestor import PhenopacketIngestor from .phenopacket_table import PhenopacketTable diff --git a/src/pyphetools/visualization/kaplan_meier_visualizer.py b/src/pyphetools/visualization/kaplan_meier_visualizer.py index ef63f553..5b503b37 100644 --- a/src/pyphetools/visualization/kaplan_meier_visualizer.py +++ b/src/pyphetools/visualization/kaplan_meier_visualizer.py @@ -8,14 +8,57 @@ class KaplanMeierVisualizer: def __init__(self, simple_patient_list: typing.List[SimplePatient], - target_term: typing.Union[str, hpotk.TermId]=None) -> None: + target_tid: typing.Union[str, hpotk.TermId]=None) -> None: """ - The goal of this class is to provide a visualization as a KaplanMeier survival curve with respect to + The goal of this class is to provide a data for a visualization as a KaplanMeier survival curve with respect to the age of onset of a specific HPO term (feature of the disease). TODO - also add support for survival curves with GA4GH Phenopacket VitalStatus element. This will be performed if the target term is None """ - if target_term is None: + if target_tid is None: raise ValueError("VitalStatus based KM curve not implemented yet") + time_in_years = list() + event = list() + n_observed = 0 + n_excluded = 0 + n_invalid = 0 + for spat in simple_patient_list: + years_at_last_exam = spat.get_age_in_years() ## float or None + ## check for target HPO + if spat.contains_observed_term_id(target_tid): + observed_term = spat.get_observed_term_by_id(target_tid) + print("found " + target_tid) + else: + observed_term = None + if spat.contains_excluded_term_id(target_tid): + excluded_term = spat.get_excluded_term_by_id(target_tid) + else: + excluded_term = None + ## If we do not have a last date, we cannot include this in the KM analysis. + ## We can tak the last date from the last_observed or from the data of the feature if it was present + if years_at_last_exam is None and observed_term is None: + print(f"[WARN] skipping {spat.get_phenopacket_id()} because we could not find last age/event age") + if observed_term is not None: + event_age = observed_term.onset + event_years = SimplePatient.age_in_years(iso_age=event_age) + print("event years", event_years) + else: + event_years = None + if event_years is not None: + time_in_years.append(event_years) + event.append(1) + n_observed += 1 + elif excluded_term is not None and years_at_last_exam is not None: + time_in_years.append(years_at_last_exam) + event.append(0) + n_excluded += 1 + else: + print(f"[WARN] skipping {spat.get_phenopacket_id()} because we could not find last age/event age (2)") + n_invalid += 1 + print(f"observed events {n_observed}, right-censored cases {n_excluded}, invalid {n_invalid}") + self._T = time_in_years + self._E = event + - pass \ No newline at end of file + def get_time_and_event(self): + return self._T, self._E \ No newline at end of file diff --git a/src/pyphetools/visualization/simple_patient.py b/src/pyphetools/visualization/simple_patient.py index 5cd9afd3..b4eeb2f8 100644 --- a/src/pyphetools/visualization/simple_patient.py +++ b/src/pyphetools/visualization/simple_patient.py @@ -1,10 +1,14 @@ import os import phenopackets from google.protobuf.json_format import Parse +import typing import json +import typing from collections import defaultdict from ..creation.constants import Constants from ..creation.hp_term import HpTerm +from ..creation.individual import Individual +from ..creation.pyphetools_age import IsoAge, NoneAge from .simple_variant import SimpleVariant from ..pp.v202 import VitalStatus @@ -25,8 +29,8 @@ def __init__(self, ga4gh_phenopacket) -> None: raise ValueError(f"phenopacket argument must be GA4GH Phenopacket Schema Phenopacket but was {type(ga4gh_phenopacket)}") else: ppack = ga4gh_phenopacket - observed_hpo_terms = defaultdict(HpTerm) - excluded_hpo_terms = defaultdict(HpTerm) + self._observed_hpo_terms = defaultdict(HpTerm) + self._excluded_hpo_terms = defaultdict(HpTerm) self._by_age_dictionary = defaultdict(list) self._phenopacket_id = ppack.id if not ppack.HasField("subject"): @@ -56,11 +60,10 @@ def __init__(self, ga4gh_phenopacket) -> None: self._sex = "UNKNOWN" ## get vital status if possible self._vstat = None - self._age_last_encounter = None self._survival_time_in_days = None self._cause_of_death = None - if ppack.HasField("vital_status"): - vstat = ppack.vital_status + if ppack.subject.HasField("vital_status"): + vstat = ppack.subject.vital_status if vstat.status == VitalStatus.Status.DECEASED: self._vstat = "DECEASED" elif vstat.status == VitalStatus.Status.ALIVE: @@ -74,19 +77,17 @@ def __init__(self, ga4gh_phenopacket) -> None: for pf in ppack.phenotypic_features: hpterm = HpTerm(hpo_id=pf.type.id, label=pf.type.label, observed=not pf.excluded) if pf.excluded: - excluded_hpo_terms[pf.type.id] = hpterm + self._excluded_hpo_terms[pf.type.id] = hpterm else: - observed_hpo_terms[pf.type.id] = hpterm + self._observed_hpo_terms[pf.type.id] = hpterm if pf.onset is not None and pf.onset.age is not None and pf.onset.age.iso8601duration: term_onset = pf.onset.age.iso8601duration else: term_onset = Constants.NOT_PROVIDED self._by_age_dictionary[term_onset].append(hpterm) - for k, v in observed_hpo_terms.items(): - if k in excluded_hpo_terms: - excluded_hpo_terms.pop(k) # remove observed terms that may have been excluded at other occasion - self._observed = observed_hpo_terms - self._excluded = excluded_hpo_terms + for k, v in self._observed_hpo_terms.items(): + if k in self._excluded_hpo_terms: + self._excluded_hpo_terms.pop(k) # remove observed terms that may have been excluded at other occasion # Add information about variants self._variant_list = [] self._disease = None @@ -113,7 +114,7 @@ def __init__(self, ga4gh_phenopacket) -> None: self._pmid = eref.id @staticmethod - def from_file(phenopacket_file): + def from_file(phenopacket_file: str) -> "SimplePatient": """ Return a SimplePatient object that corresponds to a phenopacket (JSON) file :param phenopacket_file: A phenopacket file (JSON format) @@ -129,7 +130,7 @@ def from_file(phenopacket_file): @staticmethod - def from_individual(individual, metadata): + def from_individual(individual: Individual, metadata): """ Return a SimplePatient object that corresponds to a pyphetools Individual object :param individual: Am Individual object @@ -150,8 +151,30 @@ def get_subject_id(self) -> str: def get_sex(self): return self._sex - def get_age(self)-> str: - return self._time_at_last_encounter or "n/a" + def get_age(self)-> typing.Optional[str]: + return self._time_at_last_encounter + + @staticmethod + def age_in_years(iso_age:str) -> typing.Optional[float]: + if iso_age is None: + return None + if isinstance(iso_age, NoneAge): + return None + if len(iso_age) == 0: + return None + if not iso_age.startswith("P"): + print(f"ERROR-isoage malformed: {iso_age}") + return None + ppt_age = IsoAge.from_iso8601(iso_age=iso_age) + years = ppt_age.years + (1/12) * ppt_age.months + (1/365) * ppt_age.days + return years + + + def get_age_in_years(self) -> typing.Optional[float]: + if self._time_at_last_encounter is None or not self._time_at_last_encounter: + return None + return SimplePatient.age_in_years(iso_age=self._time_at_last_encounter) + def get_disease(self) -> str: return self._disease or "n/a" @@ -160,20 +183,20 @@ def get_observed_hpo_d(self): """ returns map of observed phenotypic features with key (string) HP id, value, HpTerm from creation submodule """ - return self._observed + return self._observed_hpo_terms def get_excluded_hpo_d(self): """ :return: map of excluded phenotypic features with key (string) HP id, value, HpTerm from creation submodule """ - return self._excluded + return self._excluded_hpo_terms def get_total_hpo_count(self): """ :return: total count of HPO terms (observed and excluded) :rtype: int """ - return len(self._observed) + len(self._excluded) + return len(self._observed_hpo_terms) + len(self._excluded_hpo_terms) def get_variant_list(self): return self._variant_list @@ -184,9 +207,21 @@ def has_pmid(self): def get_pmid(self): return self._pmid - def contains_observed_term_id(self, hpo_term_id): - return hpo_term_id in self._observed - + def contains_observed_term_id(self, hpo_term_id) -> bool: + return hpo_term_id in self._observed_hpo_terms + + def contains_excluded_term_id(self, hpo_term_id) -> bool: + return hpo_term_id in self._excluded_hpo_terms + + def get_observed_term_by_id(self, hpo_term_id) -> typing.Optional[HpTerm]: + return self._observed_hpo_terms.get(hpo_term_id) + + def get_excluded_term_by_id(self, hpo_term_id)-> typing.Optional[HpTerm]: + return self._excluded_hpo_terms.get(hpo_term_id) + + def get_term_by_age_dict(self): return self._by_age_dictionary + + diff --git a/test/test_hp_term.py b/test/test_hp_term.py index 841fed36..ee65fba1 100644 --- a/test/test_hp_term.py +++ b/test/test_hp_term.py @@ -1,6 +1,7 @@ import unittest -from pyphetools.creation import HpTerm, HpTermBuilder, IsoAge +from pyphetools.creation import HpTerm, HpTermBuilder, PyPheToolsAge +from pyphetools.pp.v202 import TimeElement as TimeElement202 class TestHpTerm(unittest.TestCase): @@ -18,11 +19,13 @@ def test_excluded(self): def test_create_phenotypic_feature(self): """test creation of GA4GH PhenotypicFeature object with onset and resolution """ - hpo_term = HpTerm(hpo_id="HP:0001250", label="test label", onset=IsoAge.from_iso8601("P2M"), resolution=IsoAge.from_iso8601("P6M")) + onset202 = PyPheToolsAge.get_age_pp201("P2M") + resolution202 = PyPheToolsAge.get_age_pp201("P6M") + hpo_term = HpTerm(hpo_id="HP:0001250", label="test label", onset=onset202, resolution=resolution202) self.assertEqual("HP:0001250", hpo_term.id) self.assertEqual("test label", hpo_term.label) - self.assertEqual("P2M", hpo_term.onset.age_string) - self.assertEqual("P6M", hpo_term.resolution.age_string) + self.assertEqual("P2M", hpo_term.onset.age.iso8601duration) + self.assertEqual("P6M", hpo_term.resolution.age.iso8601duration) self.assertTrue(hpo_term.observed) self.assertTrue(hpo_term.measured) pfeat = hpo_term.to_ga4gh_phenotypic_feature() @@ -40,12 +43,13 @@ def test_fetal_onset(self): self.assertEqual("HP:0010942", hpterm.id) self.assertEqual("Echogenic intracardiac focus", hpterm.label) self.assertIsNotNone(hpterm.onset) - self.assertTrue(hpterm.onset.is_valid()) onset_term = hpterm.onset print(type(onset_term)) # Fetal onset HP:0011461 - self.assertEqual("Fetal onset", onset_term.age_string) - #self.assertEqual("HP:0011461", onset_term[1]) + self.assertIsNotNone(onset_term.ontology_class) + oclazz = onset_term.ontology_class + self.assertEqual("Fetal onset", oclazz.label) + self.assertEqual("HP:0011461", oclazz.id) phenotypic_feature = hpterm.to_ga4gh_phenotypic_feature() self.assertIsNotNone(phenotypic_feature.onset) self.assertIsNotNone(phenotypic_feature.onset.ontology_class) @@ -57,11 +61,12 @@ def test_late_onset(self): hpterm = HpTermBuilder(hpo_id="HP:0000726", hpo_label="Dementia").late_onset().build() self.assertEqual("HP:0000726", hpterm.id) self.assertEqual("Dementia", hpterm.label) - self.assertTrue(hpterm.onset.is_valid()) + self.assertIsNotNone(hpterm.onset.ontology_class) self.assertIsNotNone(hpterm.onset) - onset_term = hpterm.onset + onset_term = hpterm.onset.ontology_class # Late onset HP:0003584 - self.assertEqual("Late onset", onset_term.age_string) + self.assertEqual("Late onset", onset_term.label) + self.assertEqual("HP:0003584", onset_term.id) phenotypic_feature = hpterm.to_ga4gh_phenotypic_feature() self.assertIsNotNone(phenotypic_feature.onset) self.assertIsNotNone(phenotypic_feature.onset.ontology_class) diff --git a/test/test_pyphetools_age.py b/test/test_pyphetools_age.py index ae829200..b754ecc2 100644 --- a/test/test_pyphetools_age.py +++ b/test/test_pyphetools_age.py @@ -1,4 +1,5 @@ import unittest +import pytest from pyphetools.creation import PyPheToolsAge, HpoAge, IsoAge @@ -21,3 +22,23 @@ def test_2m(self): self.assertIsNotNone(onset_age) self.assertEqual('P2M', onset_age.age_string) self.assertTrue(isinstance(onset_age, IsoAge)) + + + def test_age_key_converter_iso(self): + time_elem202 = PyPheToolsAge.get_age_pp201("P41Y") + assert time_elem202 is not None + assert time_elem202.age + assert time_elem202.age.iso8601duration == "P41Y" + + def test_age_key_converter_hpterm(self): + time_elem202 = PyPheToolsAge.get_age_pp201("Congenital onset") + assert time_elem202 is not None + assert not time_elem202.age + assert time_elem202.ontology_class + oterm = time_elem202.ontology_class + assert oterm.id == "HP:0003577" + assert oterm.label == "Congenital onset" + + def test_malformed_key_converter(self): + with pytest.raises(ValueError) as verror: + time_elem = PyPheToolsAge.get_age_pp201("MALFORMED LABEL") From 5db6ceb4c8ed1ede7671c671860a87a945a98a26 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 10 Aug 2024 06:58:54 +0200 Subject: [PATCH 3/7] refactoring (2) --- src/pyphetools/creation/age_column_mapper.py | 70 ++++---- .../creation/age_of_death_mapper.py | 14 +- .../creation/case_template_encoder.py | 15 +- src/pyphetools/creation/cohort_encoder.py | 75 ++++----- src/pyphetools/creation/individual.py | 154 ++++++++---------- src/pyphetools/creation/pyphetools_age.py | 18 -- src/pyphetools/pp/v202/_base.py | 15 ++ src/pyphetools/pp/v202/_individual.py | 2 + src/pyphetools/validation/ontology_qc.py | 2 +- .../visualization/simple_patient.py | 4 +- test/test_age_column_mapper.py | 48 +++--- 11 files changed, 198 insertions(+), 219 deletions(-) diff --git a/src/pyphetools/creation/age_column_mapper.py b/src/pyphetools/creation/age_column_mapper.py index dad32986..acd1db56 100644 --- a/src/pyphetools/creation/age_column_mapper.py +++ b/src/pyphetools/creation/age_column_mapper.py @@ -1,13 +1,13 @@ import re from collections import defaultdict -from enum import Enum import abc import math import pandas as pd - +import typing from .age_isoformater import AgeIsoFormater -from .pyphetools_age import HPO_ONSET_TERMS, PyPheToolsAge, IsoAge, NoneAge, GestationalAge, HpoAge +from .pyphetools_age import HPO_ONSET_TERMS, PyPheToolsAge from .constants import Constants +from ..pp.v202 import TimeElement as TimeElement202 ISO8601_REGEX = r"^P(\d+Y)?(\d+M)?(\d+D)?" # e.g., 14 y 8 m or 8 y @@ -32,8 +32,6 @@ class AgeColumnMapper(metaclass=abc.ABCMeta): def __init__(self, column_name:str, string_to_iso_d=None) -> None: """ - :param ageEncodingType: Formatting convention used to represent the age - :type ageEncodingType: one of Year (e.g. 42), ISO 8601 (e.g. P42Y2M), year/month (e.g. 42y2m) :param column_name: Name of the Age column in the original table :type column_name: str :param string_to_iso_d: dictionary from free text (input table) to ISO8601 strings @@ -140,14 +138,14 @@ class Iso8601AgeColumnMapper(AgeColumnMapper): def __init__(self, column_name) -> None: super().__init__(column_name=column_name) - def map_cell(self, cell_contents) -> PyPheToolsAge: + def map_cell(self, cell_contents) -> typing.Optional[TimeElement202]: contents = self._clean_contents(cell_contents=cell_contents) match = re.search(ISO8601_REGEX, contents) if match: - return IsoAge.from_iso8601(contents) + return PyPheToolsAge.get_age_pp201(age_string=contents) else: self._erroneous_input_counter[contents] += 1 - return NoneAge(contents) + return None class YearMonthAgeColumnMapper(AgeColumnMapper): @@ -157,7 +155,7 @@ class YearMonthAgeColumnMapper(AgeColumnMapper): def __init__(self, column_name) -> None: super().__init__(column_name=column_name) - def map_cell(self, cell_contents) -> PyPheToolsAge: + def map_cell(self, cell_contents) -> typing.Optional[TimeElement202]: contents = self._clean_contents(cell_contents=cell_contents) try: match = re.search(YEAR_AND_MONTH_REGEX, contents) @@ -165,20 +163,20 @@ def map_cell(self, cell_contents) -> PyPheToolsAge: years = int(match.group(1)) months = int(match.group(2)) age_string = f"P{years}Y{months}M" - return IsoAge(y=years, m=months, age_string=age_string) + return PyPheToolsAge.get_age_pp201(age_string=age_string) match = re.search(YEAR_REGEX, contents) if match: years = int(match.group(1)) age_string = f"P{years}Y" - return IsoAge(y=years, age_string=age_string) + return PyPheToolsAge.get_age_pp201(age_string=age_string) match = re.search(MONTH_REGEX, contents) if match: months = int(match.group(1)) age_string = f"P{months}M" - return IsoAge(m=months, age_string=age_string) + return PyPheToolsAge.get_age_pp201(age_string=age_string) except ValueError as verr: print(f"Could not parse {cell_contents} as year/month: {verr}") - return NoneAge(contents) + return None class MonthAgeColumnMapper(AgeColumnMapper): """Mapper for entries such as P1Y2M (ISO 8601 period to represent age) @@ -187,7 +185,7 @@ class MonthAgeColumnMapper(AgeColumnMapper): def __init__(self, column_name) -> None: super().__init__(column_name=column_name) - def map_cell(self, cell_contents) -> PyPheToolsAge: + def map_cell(self, cell_contents) -> typing.Optional[TimeElement202]: # assume month encoded by integer or float. contents = self._clean_contents(cell_contents=cell_contents) month = str(contents) @@ -195,7 +193,7 @@ def map_cell(self, cell_contents) -> PyPheToolsAge: full_months = int(month) days = 0 age_string = AgeIsoFormater.from_numerical_month(full_months) - return IsoAge(m=full_months, age_string=age_string) + return PyPheToolsAge.get_age_pp201(age_string=age_string) elif month.replace('.', '', 1).isdigit() and month.count('.') < 2: # a float such as 0.9 (months) months = float(month) @@ -205,15 +203,15 @@ def map_cell(self, cell_contents) -> PyPheToolsAge: days = int(months * avg_num_days_in_month) full_months = 0 age_string = f"P{days}D" - return IsoAge(d=days, age_string=age_string) + return PyPheToolsAge.get_age_pp201(age_string=age_string) else: remainder = months - floor_months full_months = int(months - remainder) days = int(remainder * avg_num_days_in_month) age_string = f"P{full_months}M{days}D" - return IsoAge(m=full_months, d=days, age_string=age_string) + return PyPheToolsAge.get_age_pp201(age_string=age_string) else: - return NoneAge("na") + return None @@ -222,33 +220,34 @@ class YearAgeColumnMapper(AgeColumnMapper): def __init__(self, column_name) -> None: super().__init__(column_name=column_name) - def map_cell(self, cell_contents) -> PyPheToolsAge: + def map_cell(self, cell_contents) -> typing.Optional[TimeElement202]: """ Extract an iso8601 string for age recorded as a year (either an int such as 4 or a float such as 4.25 for P4Y3M) :param age: an int representing years or a float such as 2.5 for two and a half years :return: an ISO 8601 string such as P2Y6M """ if isinstance(cell_contents, int): - return IsoAge(y=cell_contents, age_string=contents) + age_str = f"P{cell_contents}Y" + return PyPheToolsAge.get_age_pp201(age_string=age_str) elif isinstance(cell_contents, float): - age = str(age) + age = str(cell_contents) elif not isinstance(cell_contents, str): - raise ValueError(f"Malformed agestring {age}, type={type(age)}") + raise ValueError(f"Malformed agestring {cell_contents}, type={type(cell_contents)}") contents = self._clean_contents(cell_contents=cell_contents) int_or_float = r"(\d+)(\.\d+)?" p = re.compile(int_or_float) results = p.search(contents).groups() if len(results) != 2: - return NoneAge(contents) + return None if results[0] is None: - return NoneAge(contents) + return None y = int(results[0]) if results[1] is None: - return IsoAge(y=y, age_string=f"P{y}Y") + return PyPheToolsAge.get_age_pp201(age_string=f"P{y}Y") else: m = float(results[1]) # something like .25 months = round(12 * m) - return IsoAge(y=y, m=months, age_string=f"P{y}Y{months}M") + return PyPheToolsAge.get_age_pp201(age_string=f"P{y}Y{months}M") class CustomAgeColumnMapper(AgeColumnMapper): @@ -260,12 +259,12 @@ def __init__(self, column_name:str, string_to_iso_d) -> None: super().__init__(column_name=column_name) self._string_to_iso_d = string_to_iso_d - def map_cell(self, cell_contents) -> PyPheToolsAge: + def map_cell(self, cell_contents) -> typing.Optional[TimeElement202]: if cell_contents not in self._string_to_iso_d: print(f"[WARNING] Could not find \"{cell_contents}\" in custom dictionary") - return NoneAge(cell_contents) + return None iso8601 = self._string_to_iso_d.get(cell_contents, Constants.NOT_PROVIDED) - return IsoAge.from_iso8601(iso8601) + return PyPheToolsAge.get_age_pp201(age_string=iso8601) class NotProvidedAgeColumnMapper(AgeColumnMapper): """Mapper if there is no information @@ -274,11 +273,8 @@ class NotProvidedAgeColumnMapper(AgeColumnMapper): def __init__(self, column_name:str) -> None: super().__init__(column_name=column_name) - def map_cell(self, cell_contents) -> str: - if cell_contents is None or math.isnan(cell_contents): - cell_contents = "na" - contents = self._clean_contents(cell_contents=cell_contents) - return NoneAge(age_string=contents) + def map_cell(self, cell_contents) -> typing.Optional[TimeElement202]: + return None class HpoAgeColumnMapper(AgeColumnMapper): @@ -290,10 +286,10 @@ class HpoAgeColumnMapper(AgeColumnMapper): def __init__(self, column_name:str) -> None: super().__init__(column_name=column_name) - def map_cell(self, cell_contents) -> PyPheToolsAge: + def map_cell(self, cell_contents) -> typing.Optional[TimeElement202]: contents = self._clean_contents(cell_contents=cell_contents) if contents in HPO_ONSET_TERMS: - return HpoAge(hpo_onset_label=contents) + return PyPheToolsAge.get_age_pp201(age_string=contents) else: self._erroneous_input_counter[contents] += 1 - return NoneAge(cell_contents) \ No newline at end of file + return None diff --git a/src/pyphetools/creation/age_of_death_mapper.py b/src/pyphetools/creation/age_of_death_mapper.py index 70d63984..a75950a0 100644 --- a/src/pyphetools/creation/age_of_death_mapper.py +++ b/src/pyphetools/creation/age_of_death_mapper.py @@ -4,9 +4,9 @@ from .age_isoformater import AgeIsoFormater from .constants import Constants -from pyphetools.pp.v202 import VitalStatus as pptVitalStatus -from pyphetools.pp.v202 import TimeElement as pptTimeElement -from pyphetools.pp.v202 import Age as pptAge +from pyphetools.pp.v202 import VitalStatus as VitalStatus202 +from pyphetools.pp.v202 import TimeElement as TimeElement202 +from pyphetools.pp.v202 import Age as Age202 class AgeOfDeathColumnMapper: @@ -26,7 +26,7 @@ def __init__(self, column_name, string_to_iso_d=None) -> None: self._column_name = column_name self._string_to_iso_d = string_to_iso_d - def map_cell_to_vital_status(self, cell_contents) -> Optional[pptVitalStatus]: + def map_cell_to_vital_status(self, cell_contents) -> Optional[VitalStatus202]: """ Map a single cell of the table @@ -39,9 +39,9 @@ def map_cell_to_vital_status(self, cell_contents) -> Optional[pptVitalStatus]: if contents not in self._string_to_iso_d: return None # Wrap the Age (iso8601) in a TimeElement. - iso_age = pptAge(self._string_to_iso_d.get(contents)) - telem = pptTimeElement(iso_age) - vstatus = pptVitalStatus(status=pptVitalStatus.Status.DECEASED, time_of_death=telem) + iso_age = Age202(self._string_to_iso_d.get(contents)) + telem = TimeElement202(iso_age) + vstatus = VitalStatus202(status=VitalStatus202.Status.DECEASED, time_of_death=telem) return vstatus @property diff --git a/src/pyphetools/creation/case_template_encoder.py b/src/pyphetools/creation/case_template_encoder.py index e430e2ab..2a3c0be8 100644 --- a/src/pyphetools/creation/case_template_encoder.py +++ b/src/pyphetools/creation/case_template_encoder.py @@ -7,7 +7,7 @@ from pyphetools.creation.metadata import MetaData from pyphetools.creation.hp_term import HpTerm from pyphetools.creation.individual import Individual -from pyphetools.creation.pyphetools_age import NoneAge, PyPheToolsAge +from pyphetools.creation.pyphetools_age import PyPheToolsAge from ..pp.v202 import TimeElement as TimeElement202 import os import re @@ -408,20 +408,19 @@ def _parse_individual(self, raise ValueError(f"Unrecognized sex symbol: {sex} for individual \"{individual_id}\"") onset_age = data_items.get(AGE_OF_ONSET_FIELDNAME) if onset_age is not None and isinstance(onset_age, str): - onset_age = PyPheToolsAge.get_age(onset_age) + onset_age = PyPheToolsAge.get_age_pp201(onset_age) else: - onset_age = NoneAge("na") + onset_age = None encounter_age = data_items.get(AGE_AT_LAST_ENCOUNTER_FIELDNAME) if encounter_age is not None and isinstance(encounter_age, str): - encounter_age = PyPheToolsAge.get_age(encounter_age) + encounter_age = PyPheToolsAge.get_age_pp201(encounter_age) else: - encounter_age = NoneAge("na") + encounter_age = None vitStat = None if "deceased" in data_items: decsd = data_items.get("deceased") - if decsd == "yes" and encounter_age.is_valid(): - timeelem = encounter_age.to_ga4gh_time_element() - vitStat = VitalStatus(status=VitalStatus.Status.DECEASED, time_of_death=timeelem) + if decsd == "yes" and encounter_age is not None: + vitStat = VitalStatus(status=VitalStatus.Status.DECEASED, time_of_death=encounter_age) else: vitStat = VitalStatus(status=VitalStatus.Status.DECEASED) disease_id = data_items.get("disease_id") diff --git a/src/pyphetools/creation/cohort_encoder.py b/src/pyphetools/creation/cohort_encoder.py index 9af98b0d..710ad561 100644 --- a/src/pyphetools/creation/cohort_encoder.py +++ b/src/pyphetools/creation/cohort_encoder.py @@ -1,16 +1,14 @@ import pandas as pd from math import isnan -from typing import Dict, List +import typing from .abstract_encoder import AbstractEncoder from .age_column_mapper import AgeColumnMapper -from .citation import Citation from .column_mapper import ColumnMapper from .constants import Constants from .disease import Disease from .hpo_cr import HpoConceptRecognizer from .individual import Individual -from .pyphetools_age import NoneAge from .sex_column_mapper import SexColumnMapper from .variant_column_mapper import VariantColumnMapper @@ -50,16 +48,16 @@ class CohortEncoder(AbstractEncoder): """ def __init__(self, - df:pd.DataFrame, - hpo_cr: HpoConceptRecognizer, - column_mapper_list:List[ColumnMapper], - individual_column_name:str, - metadata, - age_of_onset_mapper:AgeColumnMapper=AgeColumnMapper.not_provided(), - age_at_last_encounter_mapper:AgeColumnMapper=AgeColumnMapper.not_provided(), - sexmapper:SexColumnMapper=SexColumnMapper.not_provided(), - variant_mapper:VariantColumnMapper=None, - delimiter:str=None): + df: pd.DataFrame, + hpo_cr: HpoConceptRecognizer, + column_mapper_list: typing.List[ColumnMapper], + individual_column_name: str, + metadata, + age_of_onset_mapper: AgeColumnMapper = AgeColumnMapper.not_provided(), + age_at_last_encounter_mapper: AgeColumnMapper = AgeColumnMapper.not_provided(), + sexmapper: SexColumnMapper = SexColumnMapper.not_provided(), + variant_mapper: VariantColumnMapper = None, + delimiter: str = None): """Constructor """ super().__init__(metadata=metadata) @@ -117,14 +115,14 @@ def preview_dataframe(self): hpo_terms.extend(terms) hpo_string = "\n".join([h.to_string() for h in hpo_terms]) d = {'id': individual_id, - 'sex': sex, - 'age': age, - 'phenotypic features': hpo_string} + 'sex': sex, + 'age': age, + 'phenotypic features': hpo_string} individuals.append(d) df = pd.DataFrame(individuals) return df.set_index('id') - def set_disease(self, disease:Disease): + def set_disease(self, disease: Disease): """Set the disease diagnosis for all patients in the cohort If all patients in the cohort have the same disease we can set it with this method @@ -134,7 +132,8 @@ def set_disease(self, disease:Disease): self._disease = disease self._disease_dictionary = None - def set_disease_dictionary(self, disease_d:Dict[str, Disease]): + def set_disease_dictionary(self, + disease_d: typing.Dict[str, Disease]): """Set the dictionary of disease ontology terms For tables with multiple different diseases, we provide a dictionary that has as key @@ -143,24 +142,22 @@ def set_disease_dictionary(self, disease_d:Dict[str, Disease]): self._disease_dictionary = disease_d self._disease = None - - def _get_age(row:pd.Series, mapper:AgeColumnMapper): + def _get_age(row: pd.Series, mapper: AgeColumnMapper): import math column_name = mapper.get_column_name() if column_name == Constants.NOT_PROVIDED: - return NoneAge("na") + return None age_cell_contents = row[column_name] if isinstance(age_cell_contents, float) and math.isnan(age_cell_contents): - return NoneAge("na") + return None try: age = mapper.map_cell(age_cell_contents) except Exception as ee: print(f"Warning: Could not parse age {ee}. Setting age to \"not provided\"") - age = NoneAge("na") + age = None return age - - def get_individuals(self) -> List[Individual]: + def get_individuals(self) -> typing.List[Individual]: """Get a list of all Individual objects in the cohort :returns: a list of all Individual objects in the cohort @@ -214,22 +211,22 @@ def get_individuals(self) -> List[Individual]: raise ValueError(f"Could not find disease link for {individual_id}") disease = self._disease_dictionary.get(individual_id) indi = Individual(individual_id=individual_id, - sex=sex, - age_of_onset=age_of_onset, - age_at_last_encounter=age_last_encounter, - hpo_terms=hpo_terms, - citation=self._metadata.get_citation(), - interpretation_list=interpretation_list, - disease=disease) + sex=sex, + age_of_onset=age_of_onset, + age_at_last_encounter=age_last_encounter, + hpo_terms=hpo_terms, + citation=self._metadata.get_citation(), + interpretation_list=interpretation_list, + disease=disease) elif self._disease_dictionary is None and self._disease is not None: indi = Individual(individual_id=individual_id, - sex=sex, - age_of_onset=age_of_onset, - age_at_last_encounter=age_last_encounter, - hpo_terms=hpo_terms, - citation=self._metadata.get_citation(), - interpretation_list=interpretation_list, - disease=self._disease) + sex=sex, + age_of_onset=age_of_onset, + age_at_last_encounter=age_last_encounter, + hpo_terms=hpo_terms, + citation=self._metadata.get_citation(), + interpretation_list=interpretation_list, + disease=self._disease) else: raise ValueError(f"Could not find disease data for '{individual_id}'") individuals.append(indi) diff --git a/src/pyphetools/creation/individual.py b/src/pyphetools/creation/individual.py index b7644569..1de7f5be 100644 --- a/src/pyphetools/creation/individual.py +++ b/src/pyphetools/creation/individual.py @@ -1,3 +1,5 @@ +import typing + import phenopackets as PPKt import re import os @@ -9,8 +11,11 @@ from .hp_term import HpTerm from .hgvs_variant import Variant from .metadata import MetaData, Resource -from .pyphetools_age import PyPheToolsAge, NoneAge, IsoAge -from ..pp.v202 import OntologyClass, TimeElement, VitalStatus +from .pyphetools_age import PyPheToolsAge +from ..pp.v202 import TimeElement as TimeElement202 +from ..pp.v202 import VitalStatus as VitalStatus202 +from ..pp.v202 import OntologyClass as OntologyClass202 + class Individual: """ @@ -31,15 +36,15 @@ class Individual: """ def __init__(self, - individual_id:str, - hpo_terms:List[HpTerm]=None, - citation:Citation=None, - sex:str=Constants.NOT_PROVIDED, - age_of_onset:PyPheToolsAge=NoneAge("na"), - age_at_last_encounter:PyPheToolsAge=NoneAge("na"), - vital_status:VitalStatus=None, - interpretation_list:List[PPKt.VariantInterpretation]=None, - disease:Disease=None): + individual_id: str, + hpo_terms: List[HpTerm] = None, + citation: Citation = None, + sex: str = Constants.NOT_PROVIDED, + age_of_onset: TimeElement202 = None, + age_at_last_encounter: TimeElement202 = None, + vital_status: VitalStatus202 = None, + interpretation_list: List[PPKt.VariantInterpretation] = None, + disease: Disease = None): """Constructor """ if isinstance(individual_id, int): @@ -90,7 +95,7 @@ def set_sex(self, sex): self._sex = sex @property - def age_of_onset(self) -> PyPheToolsAge: + def age_of_onset(self) -> typing.Optional[TimeElement202]: """ :returns: a representation of age when the disease first manifested :rtype: PyPheToolsAge @@ -98,14 +103,13 @@ def age_of_onset(self) -> PyPheToolsAge: return self._age_of_onset @property - def age_at_last_encounter(self) -> PyPheToolsAge: + def age_at_last_encounter(self) -> typing.Optional[TimeElement202]: """ :returns: a representation of age when the individual was last seen in a medical context :rtype: PyPheToolsAge """ return self._age_at_last_encounter - @property def hpo_terms(self): """ @@ -122,7 +126,7 @@ def interpretation_list(self) -> List[PPKt.VariantInterpretation]: """ return self._interpretation_list - def add_variant(self, v:Union[Variant, PPKt.VariantInterpretation], acmg:str=None): + def add_variant(self, v: Union[Variant, PPKt.VariantInterpretation], acmg: str = None): """ :param v: A Variant obeserved in this individual :type v: Union[Variant, PPKt.VariantInterpretation] @@ -136,11 +140,10 @@ def add_variant(self, v:Union[Variant, PPKt.VariantInterpretation], acmg:str=Non if isinstance(variant, PPKt.VariantInterpretation): self._interpretation_list.append(variant) else: - raise ValueError(f"variant argument must be pyphetools Variant or GA4GH VariantInterpretation but was {type(variant)}") - - + raise ValueError( + f"variant argument must be pyphetools Variant or GA4GH VariantInterpretation but was {type(variant)}") - def add_hpo_term(self, term:HpTerm) -> None: + def add_hpo_term(self, term: HpTerm) -> None: """ Adds one HPO term to the current individual. :param term: An HPO term (observed or excluded, potentially with Age of observation @@ -150,7 +153,7 @@ def add_hpo_term(self, term:HpTerm) -> None: raise ValueError(f"\"term\" argument must be HpTerm but was {type(term)}") self._hpo_terms.append(term) - def set_disease(self, disease:Disease) -> None: + def set_disease(self, disease: Disease) -> None: """ This method is typically useful for a cohort with multiple diagnoses; otherwise, the disease can be set by the CohortEncoder @@ -166,7 +169,7 @@ def disease_count(self): else: return 1 - def set_hpo_terms(self, cleansed_hpo_terms:List[HpTerm]): + def set_hpo_terms(self, cleansed_hpo_terms: List[HpTerm]): """ :param cleansed_hpo_terms: a list of HpTerm objects that has been cleansed by OntologyQC :type cleansed_hpo_terms: List[pyphetools.creation.HpTerm] @@ -177,19 +180,19 @@ def set_hpo_terms(self, cleansed_hpo_terms:List[HpTerm]): def pmid(self): return self._citation.pmid - def set_citation(self, citation:Citation): + def set_citation(self, citation: Citation): """ :param citation: Object with the title and PubMed identifier for the publication in which this individual was described (e.g. PMID:321..) :type citation: Citation """ self._citation = citation - def set_vital_status(self, vstatus:VitalStatus): - if not isinstance(vstatus, VitalStatus): + def set_vital_status(self, vstatus: VitalStatus202): + if not isinstance(vstatus, VitalStatus202): raise ValueError(f"vstatus argument must be pyphetools.pp.v202.VitalStatus but was{type(vstatus)}") self._vital_status = vstatus - def get_vital_status(self) -> VitalStatus: + def get_vital_status(self) -> VitalStatus202: return self._vital_status def get_phenopacket_id(self, phenopacket_id=None) -> str: @@ -207,7 +210,7 @@ def get_phenopacket_id(self, phenopacket_id=None) -> str: else: ppkt_id = phenopacket_id # strip non alphanumeric characters - ppkt_id = ''.join(e if e.isalnum() else "_" for e in ppkt_id) + ppkt_id = ''.join(e if e.isalnum() else "_" for e in ppkt_id) ppkt_id = ppkt_id.replace("__", "_") if ppkt_id.endswith("_"): ppkt_id = ppkt_id[:-1] @@ -220,8 +223,7 @@ def get_citation(self) -> Citation: """ return self._citation - - def _get_onset(self) -> Union[PyPheToolsAge,str]: + def _get_onset(self) -> typing.Optional[TimeElement202]: """The assumption of this method is that if we have a valid age of onset field, use this. Otherwise, try to find an age of onset from the phenotypic features and take the youngest age """ @@ -233,15 +235,16 @@ def _get_onset(self) -> Union[PyPheToolsAge,str]: if not hp.measured: continue pf = hp.to_ga4gh_phenotypic_feature() - if pf.onset.age.iso8601duration is None and self._age_of_onset != Constants.NOT_PROVIDED: - phenotypic_feature_onsets.add(pf.onset.age.iso8601duration) + if pf.onset.age.iso8601duration is None: + phenotypic_feature_onsets.add(pf.onset) if len(phenotypic_feature_onsets) == 0: - return Constants.NOT_PROVIDED - age_format_list = list() - for o in phenotypic_feature_onsets: - age_format_list.append(IsoAge.from_iso8601(o)) - sorted_age = sorted(age_format_list, lambda x: x.total_days) - youngest_age = sorted_age[0] + return None + age_format_list = list(phenotypic_feature_onsets) + #for o in phenotypic_feature_onsets: + # age_format_list.append(IsoAge.from_iso8601(o)) + #sorted_age = sorted(age_format_list, lambda x: x.total_days) + # TODO SORT!!!!!!!!!!!!!!!!!!!!!!! + youngest_age = age_format_list[0] return youngest_age def _get_disease_object(self): @@ -256,14 +259,12 @@ def _get_disease_object(self): print("[WARNING] could not find disease information") disease_object = PPKt.Disease() disease_object.term.CopyFrom(disease_term) - if self.age_of_onset is not None and self.age_of_onset.is_valid(): - disease_object.onset.CopyFrom(self.age_of_onset.to_ga4gh_time_element()) - if iso_age is not None and iso_age.is_valid(): - disease_object.onset.CopyFrom(iso_age.to_ga4gh_time_element()) + if self.age_of_onset is not None: + disease_object.onset.CopyFrom(self.age_of_onset.to_message()) + if iso_age is not None: + disease_object.onset.CopyFrom(iso_age.to_message()) return disease_object - - def to_ga4gh_phenopacket(self, metadata, phenopacket_id=None) -> PPKt.Phenopacket: """ Transform the data into GA4GH Phenopacket format @@ -273,12 +274,13 @@ def to_ga4gh_phenopacket(self, metadata, phenopacket_id=None) -> PPKt.Phenopacke if isinstance(metadata, MetaData): metadata = metadata.to_ga4gh() if not isinstance(metadata, PPKt.MetaData): - raise ValueError(f"metadata argument must be pyphetools.MetaData or GA4GH MetaData but was {type(metadata)}") + raise ValueError( + f"metadata argument must be pyphetools.MetaData or GA4GH MetaData but was {type(metadata)}") php = PPKt.Phenopacket() php.id = self.get_phenopacket_id(phenopacket_id=phenopacket_id) php.subject.id = self._individual_id - if self._age_at_last_encounter is not None and self._age_at_last_encounter.is_valid(): - php.subject.time_at_last_encounter.CopyFrom(self._age_at_last_encounter.to_ga4gh_time_element()) + if self._age_at_last_encounter is not None: + php.subject.time_at_last_encounter.CopyFrom(self._age_at_last_encounter.to_message()) if self._sex == Constants.MALE_SYMBOL: php.subject.sex = PPKt.Sex.MALE elif self._sex == Constants.FEMALE_SYMBOL: @@ -288,10 +290,7 @@ def to_ga4gh_phenopacket(self, metadata, phenopacket_id=None) -> PPKt.Phenopacke elif self._sex == Constants.UNKNOWN_SEX_SYMBOL: php.subject.sex = PPKt.Sex.UNKNOWN_SEX if self._vital_status is not None: - print("DATA TYPE OF SELV VS", type(self._vital_status)) - print("DATA TYPE OF php.subject.vital_status", type(php.subject.vital_status)) vs = self._vital_status.to_message() - print("DATA TYPE OF vs", type(vs), " for ", php.id) php.subject.vital_status.CopyFrom(vs) disease_object = self._get_disease_object() php.diseases.append(disease_object) @@ -342,13 +341,6 @@ def to_ga4gh_phenopacket(self, metadata, phenopacket_id=None) -> PPKt.Phenopacke metadata.external_references.append(extref) php.meta_data.CopyFrom(metadata) return php - - - - - - - def __str__(self): hpo_list = [t.to_string() for t in self._hpo_terms] @@ -356,7 +348,7 @@ def __str__(self): return f"{self._individual_id}: {self._age_of_onset}, {self._sex}: {self._disease} {hpo_str}" @staticmethod - def output_individuals_as_phenopackets(individual_list, metadata:MetaData, outdir="phenopackets"): + def output_individuals_as_phenopackets(individual_list, metadata: MetaData, outdir="phenopackets"): """write a list of Individual objects to file in GA4GH Phenopacket format This methods depends on the MetaData object having a PMID and will fail otherwise @@ -374,7 +366,8 @@ def output_individuals_as_phenopackets(individual_list, metadata:MetaData, outdi os.makedirs(outdir) written = 0 if not isinstance(metadata, MetaData): - raise ValueError(f"metadata argument must be pyphetools MetaData object (not GA4GH metadata message), but was {type(metadata)}") + raise ValueError( + f"metadata argument must be pyphetools MetaData object (not GA4GH metadata message), but was {type(metadata)}") pmid = metadata.get_pmid() for individual in individual_list: phenopckt = individual.to_ga4gh_phenopacket(metadata=metadata) @@ -392,13 +385,11 @@ def output_individuals_as_phenopackets(individual_list, metadata:MetaData, outdi written += 1 print(f"We output {written} GA4GH phenopackets to the directory {outdir}") - - @staticmethod - def from_ga4gh_metadata(mdata:PPKt.MetaData) -> MetaData: + def from_ga4gh_metadata(mdata: PPKt.MetaData) -> MetaData: created_by = mdata.created_by created_time = str(mdata.created) - if len (mdata.external_references) > 1: + if len(mdata.external_references) > 1: raise ValueError("multiple external references not supported") elif len(mdata.external_references) == 0: id = None @@ -411,13 +402,14 @@ def from_ga4gh_metadata(mdata:PPKt.MetaData) -> MetaData: description = eref.description resource_list = [] for resource in mdata.resources: - resource_id=resource.id + resource_id = resource.id name = resource.name namespace_prefix = resource.namespace_prefix iri_prefix = resource.iri_prefix url = resource.url version = resource.version - r = Resource(resource_id=resource_id,name=name, namespace_prefix=namespace_prefix, iriprefix=iri_prefix, url=url, version=version) + r = Resource(resource_id=resource_id, name=name, namespace_prefix=namespace_prefix, iriprefix=iri_prefix, + url=url, version=version) resource_list.append(r) cite = Citation(pmid=id, title=description) metadata = MetaData(created_by=created_by, citation=cite) @@ -426,7 +418,7 @@ def from_ga4gh_metadata(mdata:PPKt.MetaData) -> MetaData: return metadata @staticmethod - def get_variants_and_disease(ppkt:PPKt.Phenopacket): + def get_variants_and_disease(ppkt: PPKt.Phenopacket): """extract the pyphetools Disease object and the VariantInterpretation objects that can be used to construct an Individual :param ppkt: a GA4GH phenopacket @@ -438,7 +430,8 @@ def get_variants_and_disease(ppkt:PPKt.Phenopacket): print(f"No interpretation found for {ppkt.id}") return None, [] if len(ppkt.interpretations) > 1: - raise ValueError(f"pyphetools dpoes not currently support multiple Interpretation messages in one phenopacket but we found {len(ppkt.interpretations)}") + raise ValueError( + f"pyphetools dpoes not currently support multiple Interpretation messages in one phenopacket but we found {len(ppkt.interpretations)}") interpretation = ppkt.interpretations[0] if interpretation.HasField("diagnosis") and interpretation.diagnosis.HasField("disease"): d = interpretation.diagnosis.disease @@ -447,17 +440,16 @@ def get_variants_and_disease(ppkt:PPKt.Phenopacket): disease = None if len(interpretation.diagnosis.genomic_interpretations) == 0: return disease, [] - else : + else: variant_list = [] for gen_interpretation in interpretation.diagnosis.genomic_interpretations: variant_list.append(gen_interpretation.variant_interpretation) return disease, variant_list - @staticmethod - def from_ga4gh_phenopacket(ppkt:PPKt.Phenopacket): + def from_ga4gh_phenopacket(ppkt: PPKt.Phenopacket): """ - Transform a GA4GH Phenopacket into an Individual obect -- useful for testing + Transform a GA4GH Phenopacket into an Individual object -- useful for testing :returns: an individual object corresponding to the GA4GH Phenopacket :rtype: Individual """ @@ -465,33 +457,31 @@ def from_ga4gh_phenopacket(ppkt:PPKt.Phenopacket): raise ValueError(f"argument must be a GA4GH Phenopacket Message but was {type(ppkt)}") #metadata = ppkt.meta_data #pypt_metadata = Individual.from_ga4gh_metadata(mdata=metadata) - subject_id = ppkt.subject.id + subject_id = ppkt.subject.id sex = ppkt.subject.sex age_at_last_encounter = ppkt.subject.time_at_last_encounter.age.iso8601duration - age_of_onset = NoneAge("na") + age_of_onset = None if len(ppkt.diseases) > 0: d = ppkt.diseases[0] if d.HasField("onset") and d.onset.HasField("age"): - age_of_onset = d.onset.age + age_of_onset = TimeElement202.from_message(d.onset.age) hpo_terms = [] for pf in ppkt.phenotypic_features: hpo_id = pf.type.id hpo_label = pf.type.label observed = not pf.excluded - onset_age = NoneAge("na") + onset_age = None if pf.HasField("onset"): - onset = pf.onset - if onset.HasField("age") and onset.age.HasField("iso8601duration"): - onset_age = pf.onset.age.iso8601duration + onset_age = TimeElement202.from_message(pf.onset) hpo_terms.append(HpTerm(hpo_id=hpo_id, label=hpo_label, observed=observed, onset=onset_age)) disease, var_list = Individual.get_variants_and_disease(ppkt) indi = Individual(individual_id=subject_id, - hpo_terms=hpo_terms, - citation=None, - sex=sex, - age_of_onset=age_of_onset, - age_at_last_encounter=age_at_last_encounter, - interpretation_list=var_list) + hpo_terms=hpo_terms, + citation=None, + sex=sex, + age_of_onset=age_of_onset, + age_at_last_encounter=age_at_last_encounter, + interpretation_list=var_list) if disease is not None: indi.set_disease(disease=disease) return indi diff --git a/src/pyphetools/creation/pyphetools_age.py b/src/pyphetools/creation/pyphetools_age.py index 4c4b83d2..37ae3fff 100644 --- a/src/pyphetools/creation/pyphetools_age.py +++ b/src/pyphetools/creation/pyphetools_age.py @@ -173,24 +173,6 @@ def age_key_to_ga4gh(age_string) : -class NoneAge(PyPheToolsAge): - """class to be used if no age information was available - """ - def __init__(self, age_string:str): - super().__init__(age_string) - - def to_ga4gh_time_element(self): - return None - - def is_valid(self): - return False - - def to_hpo_age(self): - """There is no information about age, so return the NoneAge object to denote this. - Client code should always check the is_valid function - """ - return self - class IsoAge(PyPheToolsAge): """Class to record and sort ages formated according to iso8601 diff --git a/src/pyphetools/pp/v202/_base.py b/src/pyphetools/pp/v202/_base.py index 87295f37..1474d977 100644 --- a/src/pyphetools/pp/v202/_base.py +++ b/src/pyphetools/pp/v202/_base.py @@ -343,6 +343,9 @@ def __eq__(self, other): and self._weeks == other._weeks \ and self._days == other._days + def __hash__(self): + return hash((self._weeks, self._days)) + def __repr__(self): return f'GestationalAge(weeks={self._weeks}, days={self._days})' @@ -400,6 +403,9 @@ def __eq__(self, other): return isinstance(other, Age) \ and self._iso8601duration == other._iso8601duration + def __hash__(self): + return hash((self._iso8601duration,)) + def __repr__(self): return f'Age(iso8601duration={self._iso8601duration})' @@ -473,6 +479,9 @@ def __eq__(self, other): and self._start == other._start \ and self._end == other._end + def __hash__(self): + return hash((self._start, self._end)) + def __repr__(self): return f'AgeRange(start={self._start}, end={self._end})' @@ -546,6 +555,9 @@ def __eq__(self, other): and self._start == other._start \ and self._end == other._end + def __hash__(self): + return hash((self._start, self._end)) + def __repr__(self): return f'TimeInterval(start={self._start}, end={self._end})' @@ -679,6 +691,9 @@ def from_message(cls, msg: Message): def __eq__(self, other): return isinstance(other, TimeElement) and self._element == other._element + def __hash__(self): + return hash((self._element,)) + def __repr__(self): return f'TimeElement(element={self._element})' diff --git a/src/pyphetools/pp/v202/_individual.py b/src/pyphetools/pp/v202/_individual.py index 3804e9f9..5db5eebf 100644 --- a/src/pyphetools/pp/v202/_individual.py +++ b/src/pyphetools/pp/v202/_individual.py @@ -82,6 +82,8 @@ def __init__( survival_time_in_days: typing.Optional[int] = None, ): self._status = status + if time_of_death is not None: + assert isinstance(time_of_death, TimeElement) self._time_of_death = time_of_death self._cause_of_death = cause_of_death self._survival_time_in_days = survival_time_in_days diff --git a/src/pyphetools/validation/ontology_qc.py b/src/pyphetools/validation/ontology_qc.py index 9fa9bcb1..930766d6 100644 --- a/src/pyphetools/validation/ontology_qc.py +++ b/src/pyphetools/validation/ontology_qc.py @@ -138,7 +138,7 @@ def _clean_terms(self) -> List[HpTerm]: :returns: list of HPO terms without redundancies/conflicts :rtype hpo_terms: List[HpTerm] """ - by_age_dictionary = defaultdict(list) + by_age_dictionary = defaultdict(list) # collect all terms without a defined age of onset # We will assume these terms exist at all specific ages of onset, thus we need this to calculate redundancy observed_terms_without_onset = list() diff --git a/src/pyphetools/visualization/simple_patient.py b/src/pyphetools/visualization/simple_patient.py index b4eeb2f8..92db2ea2 100644 --- a/src/pyphetools/visualization/simple_patient.py +++ b/src/pyphetools/visualization/simple_patient.py @@ -8,7 +8,7 @@ from ..creation.constants import Constants from ..creation.hp_term import HpTerm from ..creation.individual import Individual -from ..creation.pyphetools_age import IsoAge, NoneAge +from ..creation.pyphetools_age import IsoAge from .simple_variant import SimpleVariant from ..pp.v202 import VitalStatus @@ -158,8 +158,6 @@ def get_age(self)-> typing.Optional[str]: def age_in_years(iso_age:str) -> typing.Optional[float]: if iso_age is None: return None - if isinstance(iso_age, NoneAge): - return None if len(iso_age) == 0: return None if not iso_age.startswith("P"): diff --git a/test/test_age_column_mapper.py b/test/test_age_column_mapper.py index 5ee354e1..20bda796 100644 --- a/test/test_age_column_mapper.py +++ b/test/test_age_column_mapper.py @@ -31,46 +31,46 @@ class TestOptionMapper(unittest.TestCase): def test_year(self): ageMapper = AgeColumnMapper.by_year(column_name=TEST_COLUMN) p3 = ageMapper.map_cell("3") - self.assertEqual("P3Y", p3.age_string) + self.assertEqual("P3Y", p3.age.iso8601duration) p42 = ageMapper.map_cell("42") - self.assertEqual("P42Y", p42.age_string) + self.assertEqual("P42Y", p42.age.iso8601duration) def test_iso8601(self): ageMapper = AgeColumnMapper.iso8601(column_name=TEST_COLUMN) p3y = ageMapper.map_cell("P3Y") - self.assertEqual("P3Y", p3y.age_string) + self.assertEqual("P3Y", p3y.age.iso8601duration) p3m25d = ageMapper.map_cell("P3Y2M5D") - self.assertEqual("P3Y2M5D", p3m25d.age_string) + self.assertEqual("P3Y2M5D", p3m25d.age.iso8601duration) def test_year_month_both_1(self): ageMapper = AgeColumnMapper.by_year_and_month(column_name=TEST_COLUMN) age_string = "14 y 8 m" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P14Y8M", age_iso.age_string) + self.assertEqual("P14Y8M", age_iso.age.iso8601duration) def test_year_month_both_2(self): ageMapper = AgeColumnMapper.by_year_and_month(column_name=TEST_COLUMN) age_string = "7 y 6 m" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P7Y6M", age_iso.age_string) + self.assertEqual("P7Y6M", age_iso.age.iso8601duration) def test_year_month_both_3(self): ageMapper = AgeColumnMapper.by_year_and_month(column_name=TEST_COLUMN) age_string = "7y6m" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P7Y6M", age_iso.age_string) + self.assertEqual("P7Y6M", age_iso.age.iso8601duration) def test_year_month_just_year_1(self): ageMapper = AgeColumnMapper.by_year_and_month(column_name=TEST_COLUMN) age_string = "7 y" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P7Y", age_iso.age_string) + self.assertEqual("P7Y", age_iso.age.iso8601duration) def test_year_month_just_month_1(self): ageMapper = AgeColumnMapper.by_year_and_month(column_name=TEST_COLUMN) age_string = "2 m" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P2M", age_iso.age_string) + self.assertEqual("P2M", age_iso.age.iso8601duration) def test_int_or_float_regex(self): int_or_float = r"(\d+)(\.\d+)?" @@ -90,53 +90,53 @@ def test_fractional_year_strings(self): ageMapper = AgeColumnMapper.by_year(column_name=TEST_COLUMN) age_string = "2" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P2Y", age_iso.age_string) + self.assertEqual("P2Y", age_iso.age.iso8601duration) age_string = "4.75" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P4Y9M", age_iso.age_string) + self.assertEqual("P4Y9M", age_iso.age.iso8601duration) age_string = "5.9" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P5Y11M", age_iso.age_string) + self.assertEqual("P5Y11M", age_iso.age.iso8601duration) age_string = "6.25" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P6Y3M", age_iso.age_string) + self.assertEqual("P6Y3M", age_iso.age.iso8601duration) age_string = "8.1" age_iso = ageMapper.map_cell(age_string) - self.assertEqual("P8Y1M", age_iso.age_string) + self.assertEqual("P8Y1M", age_iso.age.iso8601duration) def test_custom_dictionary(self): ageMapper = AgeColumnMapper.custom_dictionary(column_name=TEST_COLUMN, string_to_iso_d=string_to_iso_dict) age_iso = ageMapper.map_cell("1.5") - self.assertEqual("P1Y6M", age_iso.age_string) + self.assertEqual("P1Y6M", age_iso.age.iso8601duration) age_iso = ageMapper.map_cell("3.5") - self.assertEqual("P3Y6M", age_iso.age_string) + self.assertEqual("P3Y6M", age_iso.age.iso8601duration) age_iso = ageMapper.map_cell("birth") - self.assertEqual("P1D", age_iso.age_string) - age_iso = ageMapper.map_cell("NOT THERE") - self.assertFalse(age_iso.is_valid()) + self.assertEqual("P1D", age_iso.age.iso8601duration) + age_iso = ageMapper.map_cell("NOT THERE") ## Malformed + self.assertIsNone(age_iso) def test_by_month(self): ageMapper = AgeColumnMapper.by_month(column_name=TEST_COLUMN) age_iso = ageMapper.map_cell(5) - self.assertEqual("P5M", age_iso.age_string) + self.assertEqual("P5M", age_iso.age.iso8601duration) def test_by_month2(self): ageMapper = AgeColumnMapper.by_month(column_name=TEST_COLUMN) age_iso = ageMapper.map_cell(0.5) - self.assertEqual("P15D", age_iso.age_string) + self.assertEqual("P15D", age_iso.age.iso8601duration) def test_by_month(self): ageMapper = AgeColumnMapper.by_month(column_name=TEST_COLUMN) age_iso = ageMapper.map_cell("5") - self.assertEqual("P5M", age_iso.age_string) + self.assertEqual("P5M", age_iso.age.iso8601duration) age_iso = ageMapper.map_cell(0.8) - self.assertEqual("P24D", age_iso.age_string) + self.assertEqual("P24D", age_iso.age.iso8601duration) def test_by_month_one_year(self): ageMapper = AgeColumnMapper.by_month(column_name=TEST_COLUMN) age_iso = ageMapper.map_cell(12) - self.assertEqual("P1Y", age_iso.age_string) + self.assertEqual("P1Y", age_iso.age.iso8601duration) From 83f74b0963328af29a28d26ec1aa79ec356356db Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 10 Aug 2024 07:55:18 +0200 Subject: [PATCH 4/7] refactoring (3) --- src/pyphetools/creation/__init__.py | 2 +- src/pyphetools/creation/individual.py | 9 +- src/pyphetools/creation/pyphetools_age.py | 159 ++++++++++++++++------ test/test_ontology_qc.py | 4 +- test/test_pyphetools_age.py | 21 +-- 5 files changed, 136 insertions(+), 59 deletions(-) diff --git a/src/pyphetools/creation/__init__.py b/src/pyphetools/creation/__init__.py index 615bca6b..1ca74a3d 100644 --- a/src/pyphetools/creation/__init__.py +++ b/src/pyphetools/creation/__init__.py @@ -23,7 +23,7 @@ from .metadata import MetaData from .mode_of_inheritance import Moi from .option_column_mapper import OptionColumnMapper -from .pyphetools_age import PyPheToolsAge, IsoAge, HpoAge, GestationalAge, HPO_ONSET_TERMS +from .pyphetools_age import PyPheToolsAge, AgeSorter, HPO_ONSET_TERMS from .sex_column_mapper import SexColumnMapper from .simple_column_mapper import SimpleColumnMapper from .scm_generator import SimpleColumnMapperGenerator diff --git a/src/pyphetools/creation/individual.py b/src/pyphetools/creation/individual.py index 1de7f5be..251cca43 100644 --- a/src/pyphetools/creation/individual.py +++ b/src/pyphetools/creation/individual.py @@ -11,7 +11,7 @@ from .hp_term import HpTerm from .hgvs_variant import Variant from .metadata import MetaData, Resource -from .pyphetools_age import PyPheToolsAge +from .pyphetools_age import PyPheToolsAge, AgeSorter from ..pp.v202 import TimeElement as TimeElement202 from ..pp.v202 import VitalStatus as VitalStatus202 from ..pp.v202 import OntologyClass as OntologyClass202 @@ -240,11 +240,8 @@ def _get_onset(self) -> typing.Optional[TimeElement202]: if len(phenotypic_feature_onsets) == 0: return None age_format_list = list(phenotypic_feature_onsets) - #for o in phenotypic_feature_onsets: - # age_format_list.append(IsoAge.from_iso8601(o)) - #sorted_age = sorted(age_format_list, lambda x: x.total_days) - # TODO SORT!!!!!!!!!!!!!!!!!!!!!!! - youngest_age = age_format_list[0] + sorted_age_list = AgeSorter.sort_by_age(age_format_list) + youngest_age = sorted_age_list[0] return youngest_age def _get_disease_object(self): diff --git a/src/pyphetools/creation/pyphetools_age.py b/src/pyphetools/creation/pyphetools_age.py index 37ae3fff..c16cffb8 100644 --- a/src/pyphetools/creation/pyphetools_age.py +++ b/src/pyphetools/creation/pyphetools_age.py @@ -2,20 +2,23 @@ import abc import re import typing +import numpy as np DAYS_IN_WEEK = 7 AVERAGE_DAYS_IN_MONTH = 30.437 AVERAGE_DAYS_IN_YEAR = 365.25 import phenopackets as PPKt -from ..pp.v202 import GestationalAge as GestationalAge202 from ..pp.v202 import OntologyClass as OntologyClass202 from ..pp.v202 import TimeElement as TimeElement202 from ..pp.v202 import Age as Age202 +from ..pp.v202 import GestationalAge as GestationalAge202 +from ..pp.v202 import AgeRange as AgeRange202 +from ..pp.v202 import Timestamp as Timestamp202 +from ..pp.v202 import TimeInterval as TimeInterval202 from .constants import Constants - # The following terms are to simplify making HpoAge objects HPO_ONSET_TERMS = { # Onset of symptoms after the age of 60 years. @@ -23,7 +26,7 @@ # Onset of symptoms after the age of 40 years. "Middle age onset": "HP:0003596", # Onset of symptoms after the age of 16 years. - "Young adult onset":"HP:0011462", + "Young adult onset": "HP:0011462", # Onset of disease at an age of greater than or equal to 25 to under 40 years. "Late young adult onset": "HP:0025710", # Onset of disease at an age of greater than or equal to 19 to under 25 years. @@ -31,15 +34,15 @@ # Onset of disease at an age of greater than or equal to 16 to under 19 years. "Early young adult onset": "HP:0025708", # Onset of disease after 16 years . - "Adult onset": "HP:0003581", + "Adult onset": "HP:0003581", #Onset of signs or symptoms of disease between the age of 5 and 15 years. "Juvenile onset": "HP:0003621", #Onset of disease at the age of between 1 and 5 years. - "Childhood onset":"HP:0011463", + "Childhood onset": "HP:0011463", # Onset of signs or symptoms of disease between 28 days to one year of life. "Infantile onset": "HP:0003593", # Onset of signs or symptoms of disease within the first 28 days of life. - "Neonatal onset":"HP:0003623", + "Neonatal onset": "HP:0003623", # A phenotypic abnormality that is present at birth. "Congenital onset": "HP:0003577", # onset prior to birth @@ -49,14 +52,97 @@ # Onset prior to birth but after 8 weeks of embryonic development (corresponding to a gestational age of 10 weeks). "Fetal onset": "HP:0011461", #late first trimester during the early fetal period, which is defined as 11 0/7 to 13 6/7 weeks of gestation (inclusive). - "Late first trimester onset":"HP:0034199", + "Late first trimester onset": "HP:0034199", # second trimester, which comprises the range of gestational ages from 14 0/7 weeks to 27 6/7 (inclusive) - "Second trimester onset":"HP:0034198", + "Second trimester onset": "HP:0034198", #third trimester, which is defined as 28 weeks and zero days (28+0) of gestation and beyond. - "Third trimester onset": "HP:0034197", + "Third trimester onset": "HP:0034197", } +class AgeSorter: + MOST_NEGATIVE_INT32 = np.iinfo(np.int32).min + + ISO8601_REGEX = r"^P(\d+Y)?(\d+M)?(\d+D)?" + + HPO_AGE_TO_DAYS = { + "Antenatal onset": -1, + "Embryonal onset": -7 * 40, + "Fetal onset": -7 * 29, + "Late first trimester onset": -7 * 29, + "Second trimester onset": -7 * 26, + "Third trimester onset": -7 * 22, + "Congenital onset": 0, + "Neonatal onset": 1, + "Pediatrial onset": 29, + "Infantile onset": 29, + "Childhood onset": 365.25, + "Juvenile onset": 5 * 365.25, + "Adult onset": 16 * 365.25, + "Young adult onset": 16 * 365.25, + "Early young adult onset": 16 * 365.25, + "Intermediate young adult onset": 19 * 365.25, + "Late young adult onset": 25 * 365.25, + "Middle age onset": 40 * 365.25, + "Late onset": 60 * 365.25, + } + + def __init__(self, + element: typing.Union[ + GestationalAge202, Age202, AgeRange202, OntologyClass202, Timestamp202, TimeInterval202] + ): + self._element = element + if isinstance(element, GestationalAge202): + days = 7 * element.weeks + element.days + self._num_days = -1 * days + elif isinstance(element, Age202): + age_str = element.iso8601duration + match = re.search(AgeSorter.ISO8601_REGEX, age_str) + if match: + years = int(match.group(1)) + months = int(match.group(2)) + days = int(match.group(3)) + self._num_days = years * 365.25 + months * 30.436875 + days + else: + self._num_days = 0 + elif isinstance(element, AgeRange202): + age_str = element.start.iso8601duration + match = re.search(AgeSorter.ISO8601_REGEX, age_str) + if match: + years = int(match.group(1)) + months = int(match.group(2)) + days = int(match.group(3)) + self._num_days = years * 365.25 + months * 30.436875 + days + else: + self._num_days = 0 + elif isinstance(element, OntologyClass202): + if element.label not in AgeSorter.HPO_AGE_TO_DAYS: + raise ValueError(f"Could not find HPO class for {element.label}") + self._num_days = AgeSorter.HPO_AGE_TO_DAYS[element.label] + elif isinstance(element, Timestamp202): + self._num_days = AgeSorter.MOST_NEGATIVE_INT32 + elif isinstance(element, TimeInterval202): + self._num_days = AgeSorter.MOST_NEGATIVE_INT32 + else: + raise ValueError(f"Unknown element type: {type(element)}") + + @property + def element(self) -> typing.Union[ + GestationalAge202, Age202, AgeRange202, OntologyClass202, Timestamp202, TimeInterval202]: + return self._element + + @property + def num_days(self) -> int: + return self._num_days + + @staticmethod + def sort_by_age(onset_list: typing.List[TimeElement202]) -> typing.List[TimeElement202]: + agesorter_list = [AgeSorter(x) for x in onset_list] + sorted_list = sorted(agesorter_list, key=lambda x: x.num_days) + sorted_time_elements = [x.element for x in sorted_list] + return sorted_time_elements + + class PyPheToolsAge(metaclass=abc.ABCMeta): """Class for managing the various ways we have of representing Age as either an ISI 8601 string, a gestational age, or an HPO onset term. @@ -100,17 +186,15 @@ def age_string(self): else: return Constants.NOT_PROVIDED - - @staticmethod - def get_age_pp201(age_string:str) -> typing.Optional[TimeElement202]: + def get_age_pp201(age_string: str) -> typing.Optional[TimeElement202]: """ Encode the age string as a TimeElement if possible """ if age_string is None or len(age_string) == 0: return None if isinstance(age_string, float) and math.isnan(age_string): - return None # sometimes pandas returns an empty cell as a float NaN + return None # sometimes pandas returns an empty cell as a float NaN if age_string.startswith("P"): return TimeElement202(Age202(age_string)) elif age_string in HPO_ONSET_TERMS: @@ -125,20 +209,19 @@ def get_age_pp201(age_string:str) -> typing.Optional[TimeElement202]: # only warn if the user did not enter na=not available if age_string != 'na': raise ValueError(f"Could not parse \"{age_string}\" as age.") - return NoneAge - - + return None + """ @staticmethod def get_age(age_string) -> "PyPheToolsAge": - """Return an appropriate subclass of PyPheToolsAge or None + Return an appropriate subclass of PyPheToolsAge or None - if starts with P interpret as an ISO 8601 age string - if starts with HP interpret as an HPO Onset term - if is a string such as 34+2 interpret as a gestation age - If we cannot parse, return a NoneAge obejct, a signal that no age is available :returns:PyPheToolsAge object (one of the subclasses) :rtype: PyPheToolsAge - """ + if age_string is None: return NoneAge("na") if isinstance(age_string, float) and math.isnan(age_string): @@ -156,29 +239,28 @@ def get_age(age_string) -> "PyPheToolsAge": if age_string != 'na': raise ValueError(f"Could not parse \"{age_string}\" as age.") return NoneAge(age_string=age_string) - + """ + @staticmethod - def age_key_to_ga4gh(age_string) : + def age_key_to_ga4gh(age_string): """ Transform an age key such as either an iso8601 string (e.g. P41Y) or an HPO Onset label (e.g., Congenital onset) into a TimeElement The age keys are used in the Excel template files. Currently, only iso8601 and HPO Onset are supported. """ if not isinstance(age_string, str): raise ValueError(f"age_string argument {age_string} must be a string but was {type(age_string)}") - + age_obj = PyPheToolsAge.get_age(age_string=age_string) if not age_obj.is_valid(): raise ValueError(f"Could not parse age key \"{age_string}\"") return age_obj.to_ga4gh_time_element() - - class IsoAge(PyPheToolsAge): """Class to record and sort ages formated according to iso8601 """ - def __init__(self, age_string:str, y=None, m=None, w=None, d=None): + def __init__(self, age_string: str, y=None, m=None, w=None, d=None): super().__init__(age_string) total_days = 0 if y is None: @@ -204,7 +286,8 @@ def __init__(self, age_string:str, y=None, m=None, w=None, d=None): if days > AVERAGE_DAYS_IN_MONTH: extra_months = math.floor(days / AVERAGE_DAYS_IN_MONTH) months = months + int(extra_months) - days = days % int(AVERAGE_DAYS_IN_MONTH) # modulo arithmetic, get remaining days after months are subtracted + days = days % int( + AVERAGE_DAYS_IN_MONTH) # modulo arithmetic, get remaining days after months are subtracted if months > 11: extra_years = months // 12 months = months % 12 @@ -242,7 +325,7 @@ def to_iso8601(self): if self._days > 0: components.append(f"{self._days}D") if len(components) == 1: - return "P0D" # newborn + return "P0D" # newborn else: return "".join(components) @@ -268,7 +351,6 @@ def to_hpo_age(self): else: raise ValueError(f"[ERROR] Could not calculate HpoAge for {self.age_string}") - def to_ga4gh_time_element(self) -> PPKt.TimeElement: """ :returns: a representation of Age formated as one of the options of GA4GH TimeElement @@ -278,11 +360,11 @@ def to_ga4gh_time_element(self) -> PPKt.TimeElement: iso8601_age = self.to_iso8601() if iso8601_age is None: raise ValueError(f"iso8601 was None") - time_elem.age.iso8601duration = iso8601_age + time_elem.age.iso8601duration = iso8601_age return time_elem @staticmethod - def from_iso8601(iso_age:str): + def from_iso8601(iso_age: str): """ :returns: IsoAge object representing the years, months, and days of the Age :rtype: IsoAge @@ -290,23 +372,23 @@ def from_iso8601(iso_age:str): original_age_string = iso_age if not iso_age.startswith("P"): raise ValueError(f"Malformed isoage string {iso_age}") - iso_age = iso_age[1:] # remove P + iso_age = iso_age[1:] # remove P y_idx = iso_age.find("Y") if y_idx > 0: y = int(iso_age[:y_idx]) - iso_age = iso_age[(1+y_idx):] + iso_age = iso_age[(1 + y_idx):] else: y = 0 m_idx = iso_age.find("M") if m_idx > 0: m = int(iso_age[:m_idx]) - iso_age = iso_age[(1+m_idx):] + iso_age = iso_age[(1 + m_idx):] else: m = 0 w_idx = iso_age.find("W") if w_idx > 0: w = int(iso_age[:w_idx]) - iso_age = iso_age[(1+w_idx):] + iso_age = iso_age[(1 + w_idx):] else: w = 0 d_idx = iso_age.find("D") @@ -316,6 +398,7 @@ def from_iso8601(iso_age:str): d = 0 return IsoAge(y=y, m=m, w=w, d=d, age_string=original_age_string) + class HpoAge(PyPheToolsAge): def __init__(self, hpo_onset_label) -> None: super().__init__(age_string=hpo_onset_label) @@ -344,6 +427,7 @@ def to_hpo_age(self): def is_valid(self): return True + class GestationalAge(PyPheToolsAge): def __init__(self, age_string) -> None: @@ -355,7 +439,6 @@ def __init__(self, age_string) -> None: else: raise ValueError(f"Could not extract gestation age from \"{age_string}\".") - @property def weeks(self): return self._weeks @@ -384,10 +467,10 @@ def to_hpo_age(self): """ if self._weeks >= 28: # prior to birth during the third trimester, which is defined as 28 weeks and zero days (28+0) of gestation and beyond. - return HpoAge("Third trimester onset") # HP:0034197 + return HpoAge("Third trimester onset") # HP:0034197 elif self._weeks >= 14: # prior to birth during the second trimester, which comprises the range of gestational ages from 14 0/7 weeks to 27 6/7 (inclusive). - return HpoAge("Second trimester onset") # HP:0034198 + return HpoAge("Second trimester onset") # HP:0034198 elif self._weeks >= 11: # 11 0/7 to 13 6/7 weeks of gestation (inclusive). return HpoAge("Late first trimester onset") # HP:0034199 @@ -406,7 +489,3 @@ def is_gestational_age(age_string): return True else: return False - - - - diff --git a/test/test_ontology_qc.py b/test/test_ontology_qc.py index 0c433b6e..15dfdb13 100644 --- a/test/test_ontology_qc.py +++ b/test/test_ontology_qc.py @@ -1,7 +1,7 @@ import hpotk import pytest -from pyphetools.creation import HpTerm, Individual, IsoAge +from pyphetools.creation import HpTerm, Individual, PyPheToolsAge from pyphetools.validation import OntologyQC @@ -122,7 +122,7 @@ def test_redundancy_with_and_without_onset(self, hpo: hpotk.Ontology): Myoclonic seizure HP:0032794 ("grandchild" of Seizure) Seizure HP:0001250 """ - onset = IsoAge.from_iso8601("P1Y") + onset = PyPheToolsAge.get_age_pp201("P1Y") myoclonic_seiz = HpTerm(hpo_id="HP:0032794", label="Myoclonic seizure", observed=True, onset=onset) seiz = HpTerm(hpo_id="HP:0001250", label="Seizure", observed=True) hpo_terms = [myoclonic_seiz, seiz] diff --git a/test/test_pyphetools_age.py b/test/test_pyphetools_age.py index b754ecc2..09248805 100644 --- a/test/test_pyphetools_age.py +++ b/test/test_pyphetools_age.py @@ -1,27 +1,28 @@ import unittest import pytest -from pyphetools.creation import PyPheToolsAge, HpoAge, IsoAge +from pyphetools.creation import PyPheToolsAge +from pyphetools.pp.v202 import TimeElement as TimeElement202 class TestSimpleAge(unittest.TestCase): #'age_of_onset': 'Infantile onset', 'age_at_last_encounter': 'P3Y9M', 'sex': 'M'} def test_infantile_onset(self): - onset_age = PyPheToolsAge.get_age('Infantile onset') + onset_age = PyPheToolsAge.get_age_pp201('Infantile onset') self.assertIsNotNone(onset_age) - self.assertEqual('Infantile onset', onset_age.age_string) - self.assertTrue(isinstance(onset_age, HpoAge)) + self.assertEqual('Infantile onset', onset_age.ontology_class.label) + self.assertTrue(isinstance(onset_age, TimeElement202)) def test_3m9m(self): - onset_age = PyPheToolsAge.get_age('P3Y9M') + onset_age = PyPheToolsAge.get_age_pp201('P3Y9M') self.assertIsNotNone(onset_age) - self.assertEqual('P3Y9M', onset_age.age_string) - self.assertTrue(isinstance(onset_age, IsoAge)) + self.assertEqual('P3Y9M', onset_age.age.iso8601duration) + self.assertTrue(isinstance(onset_age, TimeElement202)) def test_2m(self): - onset_age = PyPheToolsAge.get_age('P2M') + onset_age = PyPheToolsAge.get_age_pp201('P2M') self.assertIsNotNone(onset_age) - self.assertEqual('P2M', onset_age.age_string) - self.assertTrue(isinstance(onset_age, IsoAge)) + self.assertEqual('P2M', onset_age.age.iso8601duration) + self.assertTrue(isinstance(onset_age, TimeElement202)) def test_age_key_converter_iso(self): From 43edc65319b49f3ad57c684d723408880c72718d Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 10 Aug 2024 15:04:27 +0200 Subject: [PATCH 5/7] preparing for Kaplan Meier Visualization --- src/pyphetools/__init__.py | 2 +- src/pyphetools/creation/__init__.py | 2 +- src/pyphetools/creation/hp_term.py | 2 +- src/pyphetools/creation/pyphetools_age.py | 188 ++++++++---------- src/pyphetools/pp/v202/_base.py | 3 + .../visualization/individual_table.py | 3 +- .../visualization/kaplan_meier_visualizer.py | 42 ++-- .../visualization/simple_patient.py | 53 ++--- 8 files changed, 131 insertions(+), 164 deletions(-) diff --git a/src/pyphetools/__init__.py b/src/pyphetools/__init__.py index 75c230a4..98630198 100644 --- a/src/pyphetools/__init__.py +++ b/src/pyphetools/__init__.py @@ -5,7 +5,7 @@ from . import validation -__version__ = "0.9.97" +__version__ = "0.9.98" __all__ = [ diff --git a/src/pyphetools/creation/__init__.py b/src/pyphetools/creation/__init__.py index 1ca74a3d..ce353d9d 100644 --- a/src/pyphetools/creation/__init__.py +++ b/src/pyphetools/creation/__init__.py @@ -62,7 +62,7 @@ "Individual", "MetaData", "OptionColumnMapper", - "PyPheToolsAge", "IsoAge", "HpoAge", "GestationalAge", "HPO_ONSET_TERMS", + "PyPheToolsAge", "AgeSorter", "HPO_ONSET_TERMS", "SexColumnMapper", "SimpleColumnMapper", "SimpleColumnMapperGenerator", diff --git a/src/pyphetools/creation/hp_term.py b/src/pyphetools/creation/hp_term.py index 1a634704..2b863819 100644 --- a/src/pyphetools/creation/hp_term.py +++ b/src/pyphetools/creation/hp_term.py @@ -128,7 +128,7 @@ def hpo_term_and_id(self) -> str: return f"{self._label} ({self._id})" def _term_and_id_with_onset(self) -> str: - if self._onset is not None and self._onset.is_valid(): + if self._onset is not None: return f"{self.hpo_term_and_id}: onset {self._onset}" else: return self.hpo_term_and_id diff --git a/src/pyphetools/creation/pyphetools_age.py b/src/pyphetools/creation/pyphetools_age.py index c16cffb8..7ac059a8 100644 --- a/src/pyphetools/creation/pyphetools_age.py +++ b/src/pyphetools/creation/pyphetools_age.py @@ -7,7 +7,7 @@ DAYS_IN_WEEK = 7 AVERAGE_DAYS_IN_MONTH = 30.437 AVERAGE_DAYS_IN_YEAR = 365.25 -import phenopackets as PPKt + from ..pp.v202 import OntologyClass as OntologyClass202 from ..pp.v202 import TimeElement as TimeElement202 @@ -87,44 +87,106 @@ class AgeSorter: "Late onset": 60 * 365.25, } + HPO_AGE_TO_YEARS = { + "Antenatal onset": 0, + "Embryonal onset": 0, + "Fetal onset": 0, + "Late first trimester onset": 0, + "Second trimester onset": 0, + "Third trimester onset": 0, + "Congenital onset": 0, + "Neonatal onset": 0, + "Pediatrial onset": 0, + "Infantile onset": 0, + "Childhood onset": 1, + "Juvenile onset": 5, + "Adult onset": 16, + "Young adult onset": 16, + "Early young adult onset": 16 , + "Intermediate young adult onset": 19, + "Late young adult onset": 25 , + "Middle age onset": 40 , + "Late onset": 60, + } + def __init__(self, - element: typing.Union[ + time_element: typing.Union[ GestationalAge202, Age202, AgeRange202, OntologyClass202, Timestamp202, TimeInterval202] ): - self._element = element + if not isinstance(time_element, TimeElement202): + time_element = TimeElement202.from_message(time_element) + element = time_element.element if isinstance(element, GestationalAge202): days = 7 * element.weeks + element.days self._num_days = -1 * days + self._num_years = 0 elif isinstance(element, Age202): age_str = element.iso8601duration match = re.search(AgeSorter.ISO8601_REGEX, age_str) if match: - years = int(match.group(1)) - months = int(match.group(2)) - days = int(match.group(3)) - self._num_days = years * 365.25 + months * 30.436875 + days + years = match.group(1) + months = match.group(2) + days = match.group(3) + self._num_days = AgeSorter.get_days_from_match(years, months, days) + self._num_years = AgeSorter.get_years_from_match(years, months, days) else: self._num_days = 0 + self._num_years = 0 elif isinstance(element, AgeRange202): age_str = element.start.iso8601duration match = re.search(AgeSorter.ISO8601_REGEX, age_str) if match: - years = int(match.group(1)) - months = int(match.group(2)) - days = int(match.group(3)) - self._num_days = years * 365.25 + months * 30.436875 + days + years = match.group(1) + months = match.group(2) + days = match.group(3) + self._num_days = AgeSorter.get_days_from_match(years, months, days) + self._num_years = AgeSorter.get_years_from_match(years, months, days) else: self._num_days = 0 + self._num_years = 0 elif isinstance(element, OntologyClass202): if element.label not in AgeSorter.HPO_AGE_TO_DAYS: raise ValueError(f"Could not find HPO class for {element.label}") - self._num_days = AgeSorter.HPO_AGE_TO_DAYS[element.label] + self._num_days = AgeSorter.HPO_AGE_TO_DAYS.get(element.label) + self._num_years = AgeSorter.HPO_AGE_TO_YEARS.get(element.label) elif isinstance(element, Timestamp202): self._num_days = AgeSorter.MOST_NEGATIVE_INT32 + self._num_years = AgeSorter.MOST_NEGATIVE_INT32 elif isinstance(element, TimeInterval202): self._num_days = AgeSorter.MOST_NEGATIVE_INT32 + self._num_years = AgeSorter.MOST_NEGATIVE_INT32 else: - raise ValueError(f"Unknown element type: {type(element)}") + print(f"[WARN] Unknown element type: {type(element)}") + self._num_days = None + self._num_years = None + + @staticmethod + def get_days_from_match(years: typing.Optional[str], + months: typing.Optional[str], + days: typing.Optional[str]) -> int: + total_days = 0 + if years is not None: + # years with be something like 42Y + total_days += 365.25 * int(years[:-1]) + if months is not None: + total_days += int(months[:-1]) * 30.436875 + if days is not None: + total_days += int(days[:-1]) + return int(total_days) + + @staticmethod + def get_years_from_match(years: typing.Optional[str], + months: typing.Optional[str], + days: typing.Optional[str]) -> float: + total_years = 0 + if years is not None: + # years with be something like 42Y + total_years += float(years[:-1]) + if months is not None: + total_years += float(months[:-1])/12 + if days is not None: + total_years += float(days[:-1])/365.25 + return total_years @property def element(self) -> typing.Union[ @@ -134,6 +196,15 @@ def element(self) -> typing.Union[ @property def num_days(self) -> int: return self._num_days + + @property + def num_years(self) -> float: + return self._num_years + + @staticmethod + def convert_to_years(time_elem:TimeElement202) -> float: + age_sorter = AgeSorter(time_element=time_elem) + return age_sorter.num_years @staticmethod def sort_by_age(onset_list: typing.List[TimeElement202]) -> typing.List[TimeElement202]: @@ -151,13 +222,6 @@ class PyPheToolsAge(metaclass=abc.ABCMeta): def __init__(self, age_string) -> None: self._age_string = age_string - @abc.abstractmethod - def to_ga4gh_time_element(self) -> PPKt.TimeElement: - """ - :returns: a representation of Age formated as one of the options of GA4GH TimeElement - :rtype: PPKt.TimeElement - """ - pass @abc.abstractmethod def is_valid(self) -> bool: @@ -179,12 +243,7 @@ def to_hpo_age(self) -> "HpoAge": """ pass - @property - def age_string(self): - if self.is_valid(): - return self._age_string - else: - return Constants.NOT_PROVIDED + @staticmethod def get_age_pp201(age_string: str) -> typing.Optional[TimeElement202]: @@ -211,49 +270,6 @@ def get_age_pp201(age_string: str) -> typing.Optional[TimeElement202]: raise ValueError(f"Could not parse \"{age_string}\" as age.") return None - """ - @staticmethod - def get_age(age_string) -> "PyPheToolsAge": - Return an appropriate subclass of PyPheToolsAge or None - - if starts with P interpret as an ISO 8601 age string - - if starts with HP interpret as an HPO Onset term - - if is a string such as 34+2 interpret as a gestation age - - If we cannot parse, return a NoneAge obejct, a signal that no age is available - :returns:PyPheToolsAge object (one of the subclasses) - :rtype: PyPheToolsAge - - if age_string is None: - return NoneAge("na") - if isinstance(age_string, float) and math.isnan(age_string): - return NoneAge("na") # sometimes pandas returns an empty cell as a float NaN - if len(age_string) == 0: - return NoneAge("na") - elif age_string.startswith("P"): - return IsoAge.from_iso8601(age_string) - elif age_string in HPO_ONSET_TERMS: - return HpoAge(hpo_onset_label=age_string) - elif GestationalAge.is_gestational_age(age_string): - return GestationalAge(age_string) - else: - # only warn if the user did not enter na=not available - if age_string != 'na': - raise ValueError(f"Could not parse \"{age_string}\" as age.") - return NoneAge(age_string=age_string) - """ - - @staticmethod - def age_key_to_ga4gh(age_string): - """ - Transform an age key such as either an iso8601 string (e.g. P41Y) or an HPO Onset label (e.g., Congenital onset) into a TimeElement - The age keys are used in the Excel template files. Currently, only iso8601 and HPO Onset are supported. - """ - if not isinstance(age_string, str): - raise ValueError(f"age_string argument {age_string} must be a string but was {type(age_string)}") - - age_obj = PyPheToolsAge.get_age(age_string=age_string) - if not age_obj.is_valid(): - raise ValueError(f"Could not parse age key \"{age_string}\"") - return age_obj.to_ga4gh_time_element() class IsoAge(PyPheToolsAge): @@ -351,17 +367,6 @@ def to_hpo_age(self): else: raise ValueError(f"[ERROR] Could not calculate HpoAge for {self.age_string}") - def to_ga4gh_time_element(self) -> PPKt.TimeElement: - """ - :returns: a representation of Age formated as one of the options of GA4GH TimeElement - :rtype: PPKt.TimeElement - """ - time_elem = PPKt.TimeElement() - iso8601_age = self.to_iso8601() - if iso8601_age is None: - raise ValueError(f"iso8601 was None") - time_elem.age.iso8601duration = iso8601_age - return time_elem @staticmethod def from_iso8601(iso_age: str): @@ -407,18 +412,6 @@ def __init__(self, hpo_onset_label) -> None: self._onset_label = hpo_onset_label self._onset_id = HPO_ONSET_TERMS.get(hpo_onset_label) - def to_ga4gh_time_element(self) -> PPKt.TimeElement: - """ - :returns: a representation of Age formated as an OntologyClass (HPO Onset term) - :rtype: PPKt.TimeElement - """ - time_elem = PPKt.TimeElement() - clz = PPKt.OntologyClass() - clz.id = self._onset_id - clz.label = self._onset_label - time_elem.ontology_class.CopyFrom(clz) - return time_elem - def to_hpo_age(self): """Return self, this is already an HpoAge object """ @@ -450,17 +443,6 @@ def days(self): def is_valid(self): return True - def to_ga4gh_time_element(self) -> PPKt.TimeElement: - """ - :returns: a representation of Age formated as an OntologyClass (HPO Onset term) - :rtype: PPKt.TimeElement - """ - time_elem = PPKt.TimeElement() - gest_age = PPKt.GestationalAge() - gest_age.weeks = self._weeks - gest_age.days = self._days - time_elem.age.gestational_age.CopyFrom(gest_age) - return time_elem def to_hpo_age(self): """Return self, this is already an HpoAge object diff --git a/src/pyphetools/pp/v202/_base.py b/src/pyphetools/pp/v202/_base.py index 1474d977..20dcc40b 100644 --- a/src/pyphetools/pp/v202/_base.py +++ b/src/pyphetools/pp/v202/_base.py @@ -91,6 +91,9 @@ def __eq__(self, other): and self._id == other._id \ and self._label == other._label + def __hash__(self): + return hash((self._id, self._label,)) + def __repr__(self): return f'OntologyClass(id={self._id}, label={self._label})' diff --git a/src/pyphetools/visualization/individual_table.py b/src/pyphetools/visualization/individual_table.py index 209aae3d..7e49dbfc 100644 --- a/src/pyphetools/visualization/individual_table.py +++ b/src/pyphetools/visualization/individual_table.py @@ -126,7 +126,8 @@ def _simple_patient_to_table_row(self, spat:SimplePatient) -> List[str]: """ row_items = [] # Patient information - pat_info = spat.get_subject_id() + " (" + spat.get_sex() + "; " + spat.get_age() + ")" + age_string = spat.get_age() or "age: n/a" + pat_info = spat.get_subject_id() + " (" + spat.get_sex() + "; " + age_string + ")" row_items.append( pat_info) row_items.append( spat.get_disease()) # Variant information diff --git a/src/pyphetools/visualization/kaplan_meier_visualizer.py b/src/pyphetools/visualization/kaplan_meier_visualizer.py index 5b503b37..7d13a9d4 100644 --- a/src/pyphetools/visualization/kaplan_meier_visualizer.py +++ b/src/pyphetools/visualization/kaplan_meier_visualizer.py @@ -1,5 +1,6 @@ import typing import hpotk +import numpy as np from pyphetools.visualization.simple_patient import SimplePatient @@ -24,37 +25,30 @@ def __init__(self, n_invalid = 0 for spat in simple_patient_list: years_at_last_exam = spat.get_age_in_years() ## float or None - ## check for target HPO + if spat.contains_observed_term_id(target_tid) and spat.contains_excluded_term_id(target_tid): + raise ValueError(f"{spat.pat_id} listed as both observed/excluded for {target_tid}") if spat.contains_observed_term_id(target_tid): observed_term = spat.get_observed_term_by_id(target_tid) - print("found " + target_tid) - else: - observed_term = None - if spat.contains_excluded_term_id(target_tid): - excluded_term = spat.get_excluded_term_by_id(target_tid) - else: - excluded_term = None - ## If we do not have a last date, we cannot include this in the KM analysis. - ## We can tak the last date from the last_observed or from the data of the feature if it was present - if years_at_last_exam is None and observed_term is None: - print(f"[WARN] skipping {spat.get_phenopacket_id()} because we could not find last age/event age") - if observed_term is not None: event_age = observed_term.onset - event_years = SimplePatient.age_in_years(iso_age=event_age) - print("event years", event_years) - else: - event_years = None - if event_years is not None: - time_in_years.append(event_years) + event_years = SimplePatient.age_in_years(time_elem=event_age) + if event_years is None or np.isnan(event_years): + print(f"[WARN] could not find age at event for {spat.get_phenopacket_id()} (Omitting)") + continue + time_in_years.append(years_at_last_exam) event.append(1) n_observed += 1 - elif excluded_term is not None and years_at_last_exam is not None: - time_in_years.append(years_at_last_exam) - event.append(0) - n_excluded += 1 + elif spat.contains_excluded_term_id(target_tid): + if years_at_last_exam is None: + print(f"[WARN] {target_tid} is excluded in {spat.get_phenopacket_id()} but we did not find a last exam age") + continue + else: + time_in_years.append(years_at_last_exam) + event.append(0) + n_excluded += 1 else: - print(f"[WARN] skipping {spat.get_phenopacket_id()} because we could not find last age/event age (2)") + print(f"[WARN] skipping {spat.get_phenopacket_id()} because target term {target_tid} was neither observed nor excluded") n_invalid += 1 + continue print(f"observed events {n_observed}, right-censored cases {n_excluded}, invalid {n_invalid}") self._T = time_in_years self._E = event diff --git a/src/pyphetools/visualization/simple_patient.py b/src/pyphetools/visualization/simple_patient.py index 92db2ea2..1e692dfd 100644 --- a/src/pyphetools/visualization/simple_patient.py +++ b/src/pyphetools/visualization/simple_patient.py @@ -8,9 +8,10 @@ from ..creation.constants import Constants from ..creation.hp_term import HpTerm from ..creation.individual import Individual -from ..creation.pyphetools_age import IsoAge +from ..creation.pyphetools_age import AgeSorter from .simple_variant import SimpleVariant -from ..pp.v202 import VitalStatus +from ..pp.v202 import TimeElement as TimeElement202 +from ..pp.v202 import VitalStatus as VitalStatus202 class SimplePatient: """ @@ -43,13 +44,7 @@ def __init__(self, ga4gh_phenopacket) -> None: self._subject_id = subj.id self._time_at_last_encounter = None if subj.HasField("time_at_last_encounter"): - time_at_last_encounter = phenopackets.TimeElement() - time_at_last_encounter.CopyFrom(subj.time_at_last_encounter) - if time_at_last_encounter.HasField("age"): - self._time_at_last_encounter = time_at_last_encounter.age.iso8601duration - elif time_at_last_encounter.HasField("ontology_class"): - clz = time_at_last_encounter.ontology_class - self._time_at_last_encounter = f"{clz.label} ({clz.id})" + self._time_at_last_encounter = TimeElement202.from_message(subj.time_at_last_encounter) if ppack.subject.sex == phenopackets.MALE: self._sex = "MALE" elif ppack.subject.sex == phenopackets.FEMALE: @@ -59,32 +54,31 @@ def __init__(self, ga4gh_phenopacket) -> None: else: self._sex = "UNKNOWN" ## get vital status if possible - self._vstat = None self._survival_time_in_days = None self._cause_of_death = None if ppack.subject.HasField("vital_status"): - vstat = ppack.subject.vital_status - if vstat.status == VitalStatus.Status.DECEASED: + vstat = VitalStatus202.from_message(ppack.subject.vital_status) + if vstat.status == VitalStatus202.Status.DECEASED: self._vstat = "DECEASED" - elif vstat.status == VitalStatus.Status.ALIVE: + elif vstat.status == VitalStatus202.Status.ALIVE: self._vstat = "ALIVE" else: - pass # keep self._vstat as None + self._vstat = None if vstat.survival_time_in_days is not None: self._survival_time_in_days = vstat.survival_time_in_days self._cause_of_death = vstat.cause_of_death for pf in ppack.phenotypic_features: - hpterm = HpTerm(hpo_id=pf.type.id, label=pf.type.label, observed=not pf.excluded) + hpterm = HpTerm(hpo_id=pf.type.id, label=pf.type.label, onset=pf.onset, observed=not pf.excluded) if pf.excluded: self._excluded_hpo_terms[pf.type.id] = hpterm else: self._observed_hpo_terms[pf.type.id] = hpterm - if pf.onset is not None and pf.onset.age is not None and pf.onset.age.iso8601duration: - term_onset = pf.onset.age.iso8601duration + if pf.onset is not None: + telem = TimeElement202.from_message(pf.onset) + self._by_age_dictionary[telem].append(hpterm) else: - term_onset = Constants.NOT_PROVIDED - self._by_age_dictionary[term_onset].append(hpterm) + self._by_age_dictionary[Constants.NOT_PROVIDED].append(hpterm) for k, v in self._observed_hpo_terms.items(): if k in self._excluded_hpo_terms: self._excluded_hpo_terms.pop(k) # remove observed terms that may have been excluded at other occasion @@ -148,30 +142,23 @@ def get_phenopacket_id(self) -> str: def get_subject_id(self) -> str: return self._subject_id - def get_sex(self): + def get_sex(self) -> str: return self._sex - def get_age(self)-> typing.Optional[str]: + def get_age(self)-> typing.Optional[TimeElement202]: return self._time_at_last_encounter @staticmethod - def age_in_years(iso_age:str) -> typing.Optional[float]: - if iso_age is None: + def age_in_years(time_elem:TimeElement202) -> typing.Optional[float]: + if time_elem is None: return None - if len(iso_age) == 0: - return None - if not iso_age.startswith("P"): - print(f"ERROR-isoage malformed: {iso_age}") - return None - ppt_age = IsoAge.from_iso8601(iso_age=iso_age) - years = ppt_age.years + (1/12) * ppt_age.months + (1/365) * ppt_age.days - return years + return AgeSorter.convert_to_years(time_elem) def get_age_in_years(self) -> typing.Optional[float]: - if self._time_at_last_encounter is None or not self._time_at_last_encounter: + if self._time_at_last_encounter is None: return None - return SimplePatient.age_in_years(iso_age=self._time_at_last_encounter) + return SimplePatient.age_in_years(time_elem=self._time_at_last_encounter) def get_disease(self) -> str: From 0c0f242ec9a6facf8ac450b8fb8d06748df9b4f2 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 10 Aug 2024 16:11:58 +0200 Subject: [PATCH 6/7] fixing vital status parse --- .../creation/case_template_encoder.py | 4 +- .../visualization/kaplan_meier_visualizer.py | 65 +++++++++++++++++-- .../visualization/simple_patient.py | 17 ++++- 3 files changed, 77 insertions(+), 9 deletions(-) diff --git a/src/pyphetools/creation/case_template_encoder.py b/src/pyphetools/creation/case_template_encoder.py index 2a3c0be8..7694454f 100644 --- a/src/pyphetools/creation/case_template_encoder.py +++ b/src/pyphetools/creation/case_template_encoder.py @@ -421,8 +421,10 @@ def _parse_individual(self, decsd = data_items.get("deceased") if decsd == "yes" and encounter_age is not None: vitStat = VitalStatus(status=VitalStatus.Status.DECEASED, time_of_death=encounter_age) + elif decsd == "no": + vitStat = VitalStatus(status=VitalStatus.Status.ALIVE) else: - vitStat = VitalStatus(status=VitalStatus.Status.DECEASED) + vitStat = VitalStatus(status=VitalStatus.Status.UNKNOWN_STATUS) disease_id = data_items.get("disease_id") disease_label = data_items.get("disease_label") # common error -- e.g. PMID: 3000312 or OMIM: 600123 (whitespace after colon) diff --git a/src/pyphetools/visualization/kaplan_meier_visualizer.py b/src/pyphetools/visualization/kaplan_meier_visualizer.py index 7d13a9d4..5633f7f6 100644 --- a/src/pyphetools/visualization/kaplan_meier_visualizer.py +++ b/src/pyphetools/visualization/kaplan_meier_visualizer.py @@ -6,10 +6,28 @@ class KaplanMeierVisualizer: + """ + Display a Kaplan Meier (survival) curve for a cohort with respect to the age of onset of a specific feature. + For instance + from pyphetools.visualization import KaplanMeierVisualizer, PhenopacketIngestor, SimplePatient + from lifelines import KaplanMeierFitter + umod_dir = "../phenopackets/" # directory containing phenopackets to plot + ingestor = PhenopacketIngestor(indir=umod_dir) + ppkt_list = ingestor.get_phenopacket_list() + simple_pt_list = [SimplePatient(ppkt) for ppkt in ppkt_list] + hpo_id = "HP:0003774" # TermId of HPO term for the KM plot + kmv = KaplanMeierVisualizer(simple_patient_list=simple_pt_list, target_tid=stage5crd) + T, E = kmv.get_time_and_event() + # plot Kaplan Meier curve + kmf = KaplanMeierFitter() + kmf.fit(T, E, label="Age at stage 5 kidney disease") + ax = kmf.plot_survival_function() + ax.set_xlabel("Years"); + """ def __init__(self, simple_patient_list: typing.List[SimplePatient], - target_tid: typing.Union[str, hpotk.TermId]=None) -> None: + target_tid: typing.Union[str, hpotk.TermId] = None) -> None: """ The goal of this class is to provide a data for a visualization as a KaplanMeier survival curve with respect to the age of onset of a specific HPO term (feature of the disease). @@ -17,7 +35,17 @@ def __init__(self, This will be performed if the target term is None """ if target_tid is None: - raise ValueError("VitalStatus based KM curve not implemented yet") + self._T, self._E = self._get_time_and_event_for_vital_status(simple_patient_list=simple_patient_list) + else: + self._T, self._E = self._get_time_and_event_for_hpo_term(target_tid=target_tid, simple_patient_list=simple_patient_list) + + + + + + def _get_time_and_event_for_hpo_term(self, + target_tid:str, + simple_patient_list: typing.List[SimplePatient]) -> typing.Tuple[typing.List[int], typing.List[int]]: time_in_years = list() event = list() n_observed = 0 @@ -50,9 +78,36 @@ def __init__(self, n_invalid += 1 continue print(f"observed events {n_observed}, right-censored cases {n_excluded}, invalid {n_invalid}") - self._T = time_in_years - self._E = event + return time_in_years, event + + + def _get_time_and_event_for_vital_status(self, + simple_patient_list: typing.List[SimplePatient]) -> typing.Tuple[typing.List[int], typing.List[int]]: + time_in_years = list() + event = list() + n_observed = 0 + n_excluded = 0 + n_invalid = 0 + for spat in simple_patient_list: + years_at_last_exam = spat.get_age_in_years() ## float or None + if years_at_last_exam is None: + print(f"[WARN] skipping individual, age at last encounter not available") + continue + if spat.is_deceased(): + time_in_years.append(years_at_last_exam) + event.append(1) + n_observed += 1 + else: + time_in_years.append(years_at_last_exam) + event.append(0) + n_excluded += 1 + return time_in_years, event + + - def get_time_and_event(self): + def get_time_and_event(self) -> typing.Tuple[typing.List[int], typing.List[int]]: + """ + Return lists of times and event status suitable for plotting a Kaplan Meier curve + """ return self._T, self._E \ No newline at end of file diff --git a/src/pyphetools/visualization/simple_patient.py b/src/pyphetools/visualization/simple_patient.py index 1e692dfd..855a1966 100644 --- a/src/pyphetools/visualization/simple_patient.py +++ b/src/pyphetools/visualization/simple_patient.py @@ -13,6 +13,7 @@ from ..pp.v202 import TimeElement as TimeElement202 from ..pp.v202 import VitalStatus as VitalStatus202 + class SimplePatient: """ This class flattens all observed terms into a set and also recorded variants, sex, identifier, and age @@ -25,6 +26,10 @@ class SimplePatient: :type ga4gh_phenopacket: phenopackets.schema.v2.phenopackets_pb2.Phenopacket """ + CONST_DECEASED = "deceased" + CONST_ALIVE = "alive" + CONST_UNKNOWN_VITAL_STATUS = "unknown vital status" + def __init__(self, ga4gh_phenopacket) -> None: if str(type(ga4gh_phenopacket)) != "": raise ValueError(f"phenopacket argument must be GA4GH Phenopacket Schema Phenopacket but was {type(ga4gh_phenopacket)}") @@ -56,14 +61,15 @@ def __init__(self, ga4gh_phenopacket) -> None: ## get vital status if possible self._survival_time_in_days = None self._cause_of_death = None + self._vstat = None if ppack.subject.HasField("vital_status"): vstat = VitalStatus202.from_message(ppack.subject.vital_status) if vstat.status == VitalStatus202.Status.DECEASED: - self._vstat = "DECEASED" + self._vstat = SimplePatient.CONST_DECEASED elif vstat.status == VitalStatus202.Status.ALIVE: - self._vstat = "ALIVE" + self._vstat = SimplePatient.CONST_ALIVE else: - self._vstat = None + self._vstat = SimplePatient.CONST_UNKNOWN_VITAL_STATUS if vstat.survival_time_in_days is not None: self._survival_time_in_days = vstat.survival_time_in_days self._cause_of_death = vstat.cause_of_death @@ -208,5 +214,10 @@ def get_excluded_term_by_id(self, hpo_term_id)-> typing.Optional[HpTerm]: def get_term_by_age_dict(self): return self._by_age_dictionary + def is_deceased(self) -> bool: + return self._vstat == SimplePatient.CONST_DECEASED + + def is_alive(self) -> bool: + return self._vstat == SimplePatient.CONST_ALIVE From 2de632b42a41b22d93ab21184da60dfd5ab2a975 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 10 Aug 2024 17:16:44 +0200 Subject: [PATCH 7/7] documentation --- docs/api/creation/iso_age.md | 3 - docs/img/kmf_esrd.png | Bin 0 -> 18408 bytes docs/img/kmf_plot_vstatus.png | Bin 0 -> 12593 bytes docs/visualization/index.md | 3 + docs/visualization/kaplan_meier_visualizer.md | 72 ++++++++++++++++++ mkdocs.yml | 4 +- 6 files changed, 78 insertions(+), 4 deletions(-) delete mode 100644 docs/api/creation/iso_age.md create mode 100644 docs/img/kmf_esrd.png create mode 100644 docs/img/kmf_plot_vstatus.png create mode 100644 docs/visualization/index.md create mode 100644 docs/visualization/kaplan_meier_visualizer.md diff --git a/docs/api/creation/iso_age.md b/docs/api/creation/iso_age.md deleted file mode 100644 index 925b330c..00000000 --- a/docs/api/creation/iso_age.md +++ /dev/null @@ -1,3 +0,0 @@ -# IsoAge - -::: pyphetools.creation.IsoAge \ No newline at end of file diff --git a/docs/img/kmf_esrd.png b/docs/img/kmf_esrd.png new file mode 100644 index 0000000000000000000000000000000000000000..6428a235613088ce9dfc31d96bb2f51b11b51e78 GIT binary patch literal 18408 zcmeIac{r8r+cta|G8B?ZW-1z#DYL>78B*psbCDr431MkKGFOy{%(DuasfbGEF=Ldl z#4^vrcbx9u?|q-=d%ov;zi->#Ki_S;?=9B4*0s*-Jdbfd_T3{5H6_};EPF8wqg7E> z(84fMJcg0{q@sXVMB4^_!#|SlmkivsovhtGEnKazs}}Cgx18K>*;;blwsLi|b#fHu zm*5vU%Vp#4?(8NdAmH#nF5q`^y(z%SbKC=NLhY<<=!RkYEztiYnX(zS7^bAFq9CVp zCw_X+>(0}z!JWC5D<@vib5Y;pCcmHbPT8w;E7@Ejr{3EfqCI9L%60EG>BmDn+yO7X&u`o}Q#?_2^81bM?^}K*J-*WF z*3z}Tvm@~fTO5yjrk2ptrdx!qz&{xMHF6yM^W-Q-ieb-bNa!)_!wo7tymUeyBgZg7 zW>PK;!+)j6NHFY5AV!5@=I8#;NB%$WSUn?=r051mgwpxje>M8wJ6OFn7?oy&eJ3yI z$?@G-J}n@iu9qV@{)q~2(fjs#(EP%}FKHSKYo+Ngaq7`jVn0l@sM|v#NaN7|{+au% zY|{7d-z6fzI>}SAvoEi$tyR}FGzDW!f$LVo-@ji$hs78AqQvQi+XyQhW!--|%4LjH z`Lbz&K7XvLW6Aj;c@p~Tr%#`L_v2Rj&@WF3I8~K~Nq4*P=7j9Lys#AwtWHHyF;L2b zkXlq!bZ0-$(E}I>_u+_!l)P&tygWRYyu7?FtEygS!~_o%^qk4)$l~-{mAh8rsJk*V zc(wm;*=cmg&y()oe$Abdl{I5#W}f4q!%Nt7Wk1u>)?OO)9R51#&MPe~JtTiwLctPK zdE`Gw_NShRPoK*mLw2Te5ZhTQ@XvblMgzXA6764%WHkC{-Nu`raWBmbUW3(lnQhM> z31gEyXH-iQ(lgbyUB0@)Z~rXr;u1Ig^~a2S?ZrK&IhP(Y+BSrARcUa%} zb-z>1;EG%6LwO}O7ll&!%S^Vmw%?kX!d3+7c~peSn6jgz84e#hv^pr}`dj(g)2HNj z!)r%0bakH%AEw=y8XyR7G?ki-%gzT>nmXyo9LLvZ=j0UM`f<+0cd1KiJl>6_B&~9t zkV6=%s%Y=%@K~SBm~!&pxRzp7UToh(c&e?YnWLBYG%>MYD!)Pv_LOAUvoVHGidebu z^vRPHF^9g1x`XPSu*&I*ouUr44$ooVjLD9S4eM{AcdASWy{GaB?P-ty-maLRpLZYh z=n-9G+1VK05pf)l@R%7e1k)i5*VJs!Mfk5)%~bnti1=;ZDRvw*N|5qshb@a$RL!Fc zk{2IvzAxoDliQtl&A#`Ig;Ak(lf`!3ev0aW0psCkFJ5@{j!`G`x{6qQBG=W^yBz(>Z{4=oY1ou5sFo<1q7>2nfchY5xbI-O2aAMr5^S01 zjW6`Rux)-T;>M)gGZjP~1B1-l(|uWB0fhdN0=-;=1D#AfwqxI(u?Sf{cvo21RqgAO z;4@R!_s;4u(GR>UOFJW5J45ZVx_VEW5@+7%sO9jM?AGEF{+dl!cCX)2-@bqEEbv=8 zb~f+yaLw?nA0=vKovoXrH#xksUD91(MqC+!ozw+yj^Q`S4r3K3BtPbF&(usQ z@>=+FV&{9J@3NuCM62A^`cg-(Vc|tP4nJbmirt^@FTheu-0PWdGRgdDR~6`5p6*|Q z!z4UBV%mB<;zIIZg{QdpfJ4b(`tar-@xD@*lxCTYTVb5CrasGmqud2oHwpSR?H>Zj zmcE`$w1LCU&dh9e@>^h4X}@&DguA4F`Us0BaVT%BF?MRDW~XYT6dciGrffW0JyC|2 zkFP7QWGI`}vGn!Vmh)EM4oR}nZB3c&w2vqH=Yy#zIrJ4#+l97&%rm-l>C*b|C+bDs zOLm5NdI|p9Yx8$5M5j`6+`%DBe&sVG`Rv)V$sacpb$jwnd|PTJk{=iOtTd&o<1(skTm?EVWD+aX6Ba_H(gy_or~rkI@C9iBU)k8b_|TF<_R~l1)r6fE+1l5Mu*z@tI>AiLpe-}V0C?Q|a1nAMBjPt;{5OGHdfP4iDC-qji#8>{Yj zDjlK7GcMComsv}lDWBr6oin{V_gQLvBIQ|ZYzCN{U41As@0l|lssdGqW`(<73SAG} z-d?Ghssg)&yVG(O+IE~kM`2Xrn4orHPCXBE*K?uko2E{4 zqo1YbzZ_qJ-OM(t@dJ<5hX^WaSx04fmXY0ev8}ox-=sossynY^X0XDlq@PD+W!S&Q z)NAZ;)$FHz<@4XpkcG3>Utz`EV9PZ%M?9}xy7cgzuy9wl?wQ*0-s#)J{&(7Lyft_l zWxa1wAY6IgyNack=UQUS&Y|w9yVnel7xW|-J#8g3PH0FO)ZHk)U3ex}=)v))*kuES zQiyBF@v9(dM{~byN`HI3!LBX&QguyT2~nE$H#Q7^1nF znV;W{JPX{;9^#E_nQ>o@e~lA`purIn_ozQDi3QigB^6T?!`k7qd7nOA{BbM5KelYp z5mJ?jH@N2_1er#tbVi(4mq&d!rc1zW7dGMz4GjrD3Nwh~EwVetjwRQmy%%SF9D-_J zL)@UeOO%(F-`U>k%F;fW;8M?w0)4pjaxcE+<$2G1J3G5Yh!Lv~lLD*P31@!X5~Jm~ z(Cc}vz_jXVLc-w0TQUlo*{vO@SoEx|)1A_13tMD5Du~rnFCETz#_(P1Oi|iz;_~^h z)Lh-cC0Jb!lo{UO8}5^7u@sw>tNLcXX(VJ6t6TTYS7%2^WFK>zcn*4$Ly}+WzUGvb zl!UupA37Iy{<)ANgxkWKE!|3-zNz27T^{~!2L7_XG?f9?t5dVRRM1s;Qyn?&_$#0M z<_il~hNtxP^~Kj^AUqRGLPLMP5X0y=Wd@vx_qg+niV1Ia$OOqAe@@Qum(@8_)0-t` zn{`=zA78J8F~?QEbe(9C=`z|T4yBcF2N5@&g3?3Don9B#Erv<*1~=t3BEn;g>c#JJJMT= z9a0P5g_ja{HkBG<1tPxm5={Ko##kg>Q)2|o@=J!iRUz)hftBP}E%znW8^C>1B4mvA z9pTq8HGKs>4aYOSkxxm>l8Q>&F!7;QsJ-$~PU1avs&$`0WfCM{4?^cxXR{2=r1}w_ zt5`1h0Id4gh8Os5-y4hj;S!@%C>TN&)ebIPkdf$T6fNR4e?xk)RlXd8(-Kr@rcmOA zLpCW3-r1j?24(QOeytnoSE(iIdL8tZzT4)!3-PrGvsjq1>+-66*Kf&*b!g=hp8mM} zitD}ep1qGp_$>}{9Z!K&5LIT(kDeSomQ#Jo&_r9xQiT7e{F9UFdM&S{S+{DJenrSG zZ+JoFzdqk6;4$FRuxD$VShG{jU$v-(QXJIxGK23OUJhSxZ*Tv7o72zC5zZ4;THvx* zU!1#C<-N39)cG~>yivvbImQ&|{=(SL z>s@aM?}BOsY8gda*ftgsn@A`#VPStt1n8S$dutBM&q7?8fF@{V%eY40X|(PUiVH<< z<7zcyp^k&)wrlfaaWcN;+v`0w1XPI~RKIXwg;DV92k4^IEI1|O%KYbAnd{bfSocf2 z_1UReB-S?L-fM-lJDhgqjAoQikQ0fvD6y?`VW$c>F}prhFx9~6-)R+V(ruIO^!>Sz zQISp53H4XGIr-(67ydMLgfa`e|FDX+g$}Ly^2qv*+Rg9JUz=@?9znZ`s#{d)oB6G! z>E#)rH1}n83r-;)HCj+Jg3XnW#az2vsJ>Kwdpg@63ZnICC;u}QgZXsqQX<|78LzxA z!@#Fer=IiZ;7I}) zV9Kr^{O#3r`|~}j%Wlc{49oROr2Z zj~ZGZ_*K4If>c&zWo6sn$x5$bqslvO9Nana^>j%4&0K9-g($NH|f@(?5~DGqwuNxoDh}%yd!5B6QV|AZ?&A zEMII_y)H9z!|E+te3f1EvlouqE%EQ?wyAh?dNZ=gD08Crd#0rubn^0j_OwdR;1xI{ z@9;OPsa#7tH`}AMh=_HyS-*jT5qyV}UEJ$1S0B3w_ZeBi56gzW971F!rOECwQ zEoQOK^sB6pNbJ`)7g|J}f8qw+JFW&cmIS$0491c1AN6y| z`=#`KrhNIET=zZPH)sZpCd{a9N(^6C`L|8Z+cXRr6^j{^->`mTOG(Gp3hjRvwDPqf zJvZFO6Fluc-rLg$&2(iDgD^l7gn_ccKg|g_jr+aYzbJP>J^>b9z*NC6@-lqPc9M_axnM+e+Wu@X;I#B&W zuIK2=TeH>+9A3WofjgFB`fAj`qJaB)2RvWIY_2 zYQWd`wnUHT8t<0lO}*9&VopQ3vO8-neT6q8%H1b-jS23iR~dBJ1C`ydsZV2K(m&p( zoISuZsxtgG=adGao$=#y#p4V}>EzF;A8qiVF6FT;H6D!Txp6$enm&O)V)%Pf z?U-Idf67S1Z7M>=<>en23;9UM1nU``JD!s$JYmcKbA%0+# zIxHse{+8Q0f}w$#LHhtvO7m^S9zk(GN|7)u82^b{@H>~-@JLJ~86PcW<3vkg*|6rZ zY+0?G_wU7i2UcLwN)FQ>M=Nm+^l|JY^qNsr!oedUJv29xcz!D%jwOFL%sk-NqA)D* zJUTl0@84U~Lsi$JpGHL;h+Rk}sS5&J#K9r&Hni%0|Nd1Rm;c2@%9VU#2;F0x<~MvR zYhbN&oLYtfU$#s6C*EV!m(G&jaCFS)Uc1PJEkGPu-r~nyFAAV0v-;iUk<`py;%mDk z9Qmk|VjP!Hp;I18K^ZuW8mSv?`RROO>62#*8XflGi5547=P*G8_Sfj*f0K_MI%`ewr zS=MXg^z-!w>HR@YrkPt7rHMi>_Nrk?X~!^9ii@W$v&{G%XL1cr9cy#%vw1HVFa4mY z?afc_wtcD?*N-=pf;4}R;uja*dS#u~`{5uP^xQ!AfcS+F{>b#~;M#=4G%+1xZQ)hL zVaxRG!7RA5b1UM)cw8?j-;svw5Q*OOj-Y;dCN-yz=FAC3(YaxnBwSGkumC}3c;D4f z!T`N{=Ic-}3uZ)0%0*8SVBLH#@0b`v7|-u?>wWj7Ds~V3OBQL(- z6Jdx5xsKt5NV)Jyd(K~@!jx|T&MnmA%qY~Y)JB5;0T08NY2p9Tfc^ZD)3mYou6rC} z$9^WoEQnKJW**MqPIgKOMC8WJ)-z3ypRFHbIKK4oD2DGfr=Yiyi<7$DoOv@T>P{1* z*vhH$z>^Q0rKqq65g1jGAZ@LBW=443`hzI>hgu2@6A$RsC`s_M=o|T;YAYqTDG6$Y z#^BFey>EXlKju!41=EmnMUr1kpjE9q#*mU`o-+FHPmmm>E;42-uo|gN;AfcVj11f$ zmKlk?J17YS%s!qz;vhfzJhlDsEXW3PPsExj=-E0Mi9 zXiHncu8K|t#AN+orNTa@91uYRBSA7MoYt>zKXDANEnx#WNV&{i`Ld!GN1W}bF!CU{ zmGL=|=s%tG7!@rEJ+3z_RsKL2&y5s?76+r-%D4 zdiI#D0TYDLF5!&4Z@kN*-j4sos@gz_e}Cn3Y8{34H-dHl6uLEIWrjy4 z%+V=d*HK`|g!ifavtquPV}gnpIlXq2fwzMJ(=kmpQtUMsMrz{{8goyckB$TrR;{eP zK=J|SYoW}w^chaL_BsVU?xLWS=}arZJmpj5{g!=s666>T?sD@gyTDBy_A48V7!@b1 z*LaVVY54VD7?(GUa7M!orLkZ>*p$?dWgjtmHuTehzhxh3TPR~Bjx*uQxOG{d2XXot zm{i=>_4Y0gq-9NqCRKE8b!}lh2e30c7`i~4i^@=T31=?Fls)v+SBryeds+UpE(8p( zWq%?XZCp*G>lCF6#3DnZttp&;ytCrvJ8#RcdRF`U~2SI0MPYab{%{bxge`ciJTP+RNK z>s&Toqpl4zccZ`<- z{Oox+D{l3o?f&cTGR`4g$?QJ%y6@83iOVzn3dnetqi;8si?AcS@ zpR1R%ckf;uEiKE{_$O*oDrsqH)mEcUHCuXT^m1DP&S8HdM5fHPBzzw#%<40upR)4-&-?4pD-zc&h(*h_lYaorjsT|I`u9g6443#5oIg<; zmAR%B2&Pt64mi~lVHg+l*|H6l+tB=*DsgzF!k2kKrB^?7925NMyhi!vP|oK!=D##v zFH+F_ZcT3sN*1NT-!Pw}Fe27jK&^Qi>R+hCbgiweD}%_mpOJYu=53{G71PBIPm8L> z&c8cH$4VMz>eQ~ysT%#Na$c`Vym7toMCpO^K zt}5zR+k$^dtuBr96Ww!es9L2wxsc4GVb7FO_#o_!F%6d{8lUDW>{Abjc}Qka#)M0T zRD_EpP2jnex_#F|je#UG7<`@np2jkVYcv&J;SxsPIx3P&6LT+zjsjy-fL|?pa~Z9A z9iRzfY04KZkz&HW2V+!8p9QmNVl2(rX)r#hWpIx#UgF!$7$4l}q>rY)8nhm}YmTMM zVdUmgib|glEF<~Qmns0e5-`|~UHRx(n0-^NAHNc-`&T%gnH+zkj)n`*b-di!NwH#) z@9zZhH$KyF;TZ)q?oXJ<$+Pm&a+aT^!M^UUR@jc7{j%FcEgx;r^_q^IUa6O&-o!oF zN7xVLy^k2PwQ?D|BusdJXia+hK;3wt!9%%pj#TgnOt%%}{#AQ3D+e{^fvdJ@-p1(d zAvOmyq*Mh3&1jP2?*N$rcW{E1ii;Umn?&6CgZjg) zZzL`FG%SJ@T&VE9=1C>8+0z~%@j=a3q2Hs1@oZDdq~aA zCTnsC|55Cbm=QnSV)g%g$I)#fFwTgPjyrzL5(_?ou0H(rr&ZBeHY%mBB+4j3QE7=p zgp%P6?7R=cpdX8-6UJkaTrA6!sp4FC8?^TLJ%s(rPB^UXB1Vpv5dPOMaQ=95^G?n; z&C?^j5|}0&C!W0StqYd?2xuA9!H_Hz4g!IL;ZQ?8NigI3Ka38z`IUwdJ7a?=%VP3) z7c%?};B}a@h~->jzWvgUJzudg^B;@fH3y99gGjr!w#aBHUD)1J%jXB-5PoF-JjTGr zzmsHJVhHqwW7{$q{a!c*%e84E%4hef`=xgh$6rZ0h-SRme-x6}+isE%i-g#~ zsR0h%^Mt!SP!775Uo0ps?$0H>=;)rU23Ro6dJhR#_j3|*b8kh}+L5#Su0iTJ{rA_A zMK*r*lpk=S=)f~4(?V9SQ7p(q1OaGAZku`6Gfk_7+SNtHWzry;1NG#IHim#vMHN zgH5UriUstDae#5;JgIsC;zl!k;>?~bPEJ6PV|e=RA3w|N~(i;ZSx zQMB#8t8y3v>V$>Ibf50z@6f_Wb_kzdm^q!Im)PjvjQ>kKmAohB|~Pj1lv5 zq~H3Tc5*#`PYvteis6y@30y)yao*dodk_69nAe%Yuoc2|5l{?($8cp~yrt($)NurI zhqIp-^RDce>;yd9-UDDQfy&u?XlM?@HMJia$h zd2&J}n(?eb5}K007zhE{DGdUql?E`|Y(psB`9kT)upnCUJp~Q(y;E2n054ueJCSz8sI}M0ggn zu=OdPhi2K>Xpq}|5`}A8un|g+=BjB>uh%&jKO>B7Vz|V+tC+ZWCh5IyF#n zT)uCEeLaA6w6}}gnKjQdt?Ki9`LC>ech$LeJpS(d*LBExzvnh98>|7%BXnjx0fd_- z@p?%=&>AK^NpQ)R3k|yfE+f9{f4)Q($^B27E^;2ZxCLNn2b%W4?9&MFkLv87s{Cbj z1xz{!z$(LC;l7GJO@*CC{)hz0ILEXnzl6d8RQ<_$KZcuvrSY)H&LYf*$*xZ1KdVqUl?kGj#JJp&%T)CFpY`cU|~i zSZCsX8tfx9Rai2YwCE)$5P$YKd)0L0Me{v#6)&VNY-9cyzn>;a68SJsn}i*o)=hFU zL29{YmU4hTGRY&>;uGHT!ny-JXE*`iG~n=pzuQTkk#e zmQsegmL)@V7vgDw$vK2#krxzKp!vf1j8A3UPyLY#=*jFE3766VR0kZ+z?4lm72?S1}Kn`Cn8PY;2~zR;yeTL=sJcR-xl2AJaC;rPXorfykju(NF~+ zCN5^^8@Z@q2p4=HrCX3;+5&$AavH|8x2PdNvBWuSvKm8TUIm@UTuSmw#4I3TFeNmGA#=LNvpX|!N3oyKj zLXFi0j4PXyaLOwH;QgSwa!h5|Tr@>AzLVv%dRswM9Fkno+t0zf< znXECA4|Q}UXW48a_b`ybBV5VPvKmoNkrat!f_37bCrgUZ`@_cIF7^^{Rqk)K4jcT- zZGHm1f>XT273H}9Ae?yJ5RS_TJy#veE4pK%z896{6+fdlum6J=&|ifQC|_L4xZf#d z9Jbs86&=_u6%*;ywc4jpXjax#OJCu>IYt`xuqprtSOwSua{}El=0y7TNKJdD)8*3^ zSREs*FIq=J*!}G-S~9NXXaaZSe{FO3ELEnd#(smYeQ~0#jhzbu38``%Rtf|*2+p?6 z@sdV0epP6+3e-9+m>oIJd%=XKgI0VXNn1^lNAdU`3=1y!&$%uO7&k_5%mdDyJ6CjP zZWQ0*QZb0ACXN-;V(h?)5Mtwl@7d(Ogt<#40A&VcGon~dS2m1Eb{TdgY@(*O6O@2)9cu->sbR5HDONs@$jeZ%|y69qwQW`y_#kBZV?n?)hp|>1xECGJm zren+3uWdbM{tm10&2DEHYjcWbeEhj$39umgC z?kjOhJaOfD(!+g6vY|%Z{oeog5uv6p}_yZ?RLAm;sQ&@pz< zF=BRtB*_ZAj%j)b50M3K_(0&a^?#GxW)&ldFyoNf*>rR0ee=N{5RV*RH?j3xLiV4O4MXPD7XRmeT4k_+aa05Ut zpKi{blO6sKa|+12%)}AwgN5#SWAE{ty~S@cvUQDaTzt_pviL~R1=fFN6+gI>&o00}_0uw-5qX|k2`TQBAMbFT(z zVB5$QgYE5{%q2ER(XmjVVB^w8U^W+@AUZWGja)y;-W0lB3Po1>GtTN@h-6{oKz zhLRUEB!r_ujWxl?KHCY!8``U|Fa|0^n-Tzi_CeuaNmI($uQC0f>fh)F6^+g=hD8od z2X^q^-R$8je?7|)#!~PmnMa71Ch8O&NgTT33AG3-VG(J*fElovQa zAlusJS!77CXvMAS4>$t68+;Z*7Dgg^i}v`X_tK$l)gSH?S<9*dXw|7k-04vU5~PQW za%gQ?@aptF8l|Lz7bV!qAtGWlOvQmwM%`IQ6?Z{YXL{H|^Ha>$UqY=n-{Lmf*NgVObSL8fv;zJ|iB^5ozL!%lx{4$%kyQbj zWqWrssN()S5aqoea37sl5;Sg1VMBOYRt2SONfBr#(4mp0Y~$Mqv`l#vZ5X%dT0hB!e*s5` zX&z{L6%ciSom}uZ*Oq`W`&WFQh&{qbC+m(onpd7TEwj)O128T za>PnUq}?|bwZKg)$48g|YeP+P><~nBwduZMZ8S}2vZ*Ja2qvUm<0$jh~is4i}Z9Hbro9V9R7Z2+&kVZvDpTL@5imM~qz=^AI zsWQInHh{-|`|_oA^z&m{&Yi!7ErdAx{5I;7@kypt-jl$Kj{K)P`R9K6X6ys?uLu<| zo7bT@Zo%{4<-;YG;^JZ;J#`#VVmpv$0rqDAP>!DM>FP$u6izM%eXTpxg!zN>pzq3{QMr4Tso$Cv z#CyuS#x5>z-@SWxSKrUij|OrbM$fEr06XtKkpyHdT^*g(K~qi5*8>9sClY;jwpONq z!V9FgDny7AS?dL=t1WS;s@i^IdDf$*i9p^1*n3L#ur7 zq*`a>QF#&f2@Qm*Lr=B~gHJ}Q@@oS7P}kV_tV%hM=|p^%Z}AzvQ-CJ23pkbBD!^Sp zBm2>1Nem0PR4yrEt_Knc;9%q+`5B z%k(CcL*>BnyDsUD;i%fRv-0V!B!a;>NNJmB1{zCb7h!pK*%>HD^hwdm%(F zxR$acmI8`&itkfH70Fi=cx~DU`{eD|`Mb)t_Q~1B^TSMt{4goI9Agx)zYNdDBsG)_ zPjX&y(A(G88+;zTEa{Rk#FmQ!bT2lq*lI*fz39+$+q=OgM$X|aGy57ExiE=*vZM4a z0HR<;cbaImZ@<)qSx$DYrCDps>73q^L5*&`E4i|qtF0KpxlEFii8=t*A;<@OLVz^y z8Et!%UXW{H=oU8eb}YR@TZigfb34X;KJ(@>PZi;5p3^|1h1D6Irfp`!(_xyud+E9)8X8r49D#+V2n_dUpK+EOM zvzbm1Ca^LQ4!Gbhplm@Er71sQ9uZ8&K;jF(rvGv-D;{237^4~Gj$FU~i!@927@Z9@ z0ASd2MTv(;1H!}Xo8hyS=(Fhu94)9C>W>$#q2b*oS-_#TW2Nib#HX}!mA zQqOK>Emla^I(7hLO;37&5#5&hRFk!?sa~L&56dpL+E@@q+sX!c5My|%d7U87YCzU> z>98QU-M{5vJmwhY?(As5q^1jCBsLeC? zj-r!plVY#S6urXTYFdw4SOA5K)F`Q~b}UZW2wdmaAtC#3j`^+m3aMhUC6gdZ0bB}- z(q#L4INxM}C49q3D z!)TW+_7;f?cDmgUFhIseeA(1s?f)U0Xi1Gm(z;!F@<^SaPOho<*n{?W3hhE@ek_V98tq-GYz2?Wm%W z1K>{uTiZXkIZL8#Y{|d-Hn)-Roi>BL>7{kXXam6=d;D23(8Gm|L zD7m}-DBK1qnWTG7lBR_+w}!E$8oGsLosF3}m*U2r(=>9ayO$j$Wn)0SKBgR^=HcQd zNf-cf%)EDP&htPRmSb4tp#;LHb3$KXN?&?Ly$_Sh$*k~uKC2@^UB&kL5Vm(EJ0*@L zPa=TIIo_3{FMw)y|DA24x0aVe754%<^zSLdfmdnvgkDMRg47oz#{ch7=k6>K2=iH` zr3Q$gjCgrK%d!V)2cf6Hj15XnOm^Q2ca-_1oN@_c%6LcJ^HZH6(oZxk=1zPaU5Bjg z4um_x>mZJ3Zm$a*W8>pfK>6x|(I>04-{0Q1sOgw0sF|slK2o##5W!=725-1BS8}tn zwe<92mS+ZYK(qJdE^N$p4YYl=PZS_kzUx268hTGMMIvSw2kM;M9Vv1ASmWK^R*Po- z?~qb$$mq$*$7=c&!`ZU*O}%*H z68dmk5GJ#>vp+u?fh3411}a0a0DAz116!D7?v@V!;;{vV012mn%Iyl17cXA;9l@ZO zc>=v|w2qEWDv&kpzC<2nk#f&OB<;93maB0hD$(v=d15nx`OeGFAH$7MNvWkyZBZa? zCb%_AqLM*o?e{^1`p#`KVHlH0_zC(6E1ovEITcfO~WZ#;Qs@f4_&mLk8U zqcJj?exnJb?AD|@QpF*CgE#myJ~dXv7yA_;a}mReE^KZkaQlG!V)Pe~2mFt?39kOX zmeIaQmE6nELD;XEm7N{3pbU@uhOjdvwqRjH{z33>e~EfD+QacExUUAVIMqRHFvp>2 z?9AxQ1PpMP(Z2=;9)KbWi^c!!cL7}WaykA}%spcE*v|?(!_{h|@R(&2uPRgYc_gtk zYARz5XZ|2)kf+D)(W$BF8v(Yh)t=d+J; zx;}s%z(Ljo>_Hcb9W=HEqh6E#13D@+G61Zj0K_hcB?*8a2wF=J%BK757f`TDj(0vAZt4$9!S6P zB6+{;rUrao%fuuBX(PHiJ6Ab`frWwimfjHOmVlIs)SQUEIVxdy7jEMX-z8%H0jud> z%V%hakloHiT!^f!Ebm`o;)^ixx0@L#LyT3T5LBH79Cv<_K*+&y`YuL+7$`65xDo+& z#)YkHZ2&la-ivKYORe(zCjtL&2O&ZV=&JPh%d935h2U6%k5F3ye|_k*cKWK{?TW#n z%E}8)E-o%?N)H2}pO}93+ZUb$)D=A&pqlm4*4C~y502ciWSbC zRi(3m!8=3{-p(Nv)Jnzly?y+%;heHVQ%mzEFiplz(El?zLK0TTLZdtoF=zm(jZG;q z5aR0@6-1BbM%0d7+0Bgi5+RM>q!3g7g+VSpdGzyCUm=9<)rNrp$jovxF?ak2Ie22{k*9%T485< zYmNi+)BCUVkOo^Fn=ilSkaSpT?e~+e0y9~ab|}jc4HIaDH4T4$|F(e{nF~`|8J|hH z&Qwo94{$&SCSgEA5MB+8jMMZ5dX2?zgGrJokMj1s#B3I)j+Ex;nwoHF-gyaED z2EfG2pl8qm85@9wGnET!X!;esns3Aew7FzB(22(a)vd5*@J6QIYTt_dnr&|o;9Q0o zBM7nSYGIWSGRr}-%dG;kgI!hKo_&lqpa^i`V5eIG>p=p+u4o?B&Jrgxgvr5}NZZtu zV_0`z9fLFJWb5nuActnrJ+8Mq{t{P9`ScpK5r9~lz&tTOT#9gNcpAV%=srOFnz+f) z;(5de&I!Dvgo2g@Bw8mxr3F^3zW>539^ga(Jy-J?lrz*4Iy2NcYx)uQ7}^+>dEb+p zrlB>mAOVKCV>+S`XG3WaCungmT|F@$fIbzlW?B-#fYkrzG~&X>3|5R57i;Ech*T@( z9#eljcVN`_EG~|)G6*3B2=MVl{`zK#Uo^hpnn(mggk~865DyuzqM)clS5Dr3hNUPm$&l4kZ4L{_79Jz?iNA58D9u z@T%FIS0z5vH=S$(Cg@CG@tGlc;bL&mzL(FQjhPf$hj~IHSPfLw?v>|2F7l~xpB(iC zhD#;lxqCuZ92%(&YH;by^t3trrl-;KnKDd_$BTd58bY$j3aA_x+y;BK!4TM+!HiFLqJaY+7HQ` z2>PK39~y|~a@Z9d3LPb&su}{DB(}bmuR^2i%AgzG(KH1RQrlOa!wr23%Kor8RotGg z1nKtBLW}Hh1!Un>S{4~V@rQLSy@1VL2_@J<-}G%Kkjqp;lq((H;)rPQY2+)p15~R@ z*eV}Tpxy!D-l&Mt0+{Xm;QlN@kk|Cjx=ZY0N zLIN+uwg0bb^7U60u49+On1n9EI37{So26Hb9e%v~?70I89h8wpFxN-`#yqtm4ZQ-2vu^OQED$6i#Y^ZyA}O{C{iw1XWC%Tww-(=Q;iy_2eY_4|vq<#c zx#m>8mQEBx;?rlaS?}JRTLSS{n?0aGyG#J2ZU&vf8tG4ww80?`=`jpsACP<> zeZ5JB|6@tbgwvXWke**o;6;B-0jX4OCiFP?GKW!x9z}emIk^Wl@{wlQ0%Bg|4Y0F z+=^9`m*P&#d|iy7909!*Qe*+#avX%PO3~aI9g{2Nupi4CYoJ)`0fAu-^}!Pp;14|j z04@H!zrX(HK~kIzN+wuc`+pFLRcqYIL>q!s!boxQ?R186G{8Hoi&Nv{{qY1OV?(;qp}PxLQ9%qkD1XI5j10yu@}R9;4`QVs&=+Fpq)V0+g0Q5 z4}W9hDp=_|qdfQz)zk0?K~T!OnkZue0Hm2l0i<|QP^#}9iDDR{Ml1rmLt*V832q`2 n&HM$K5pw#!>u<|^-y!Qse&~LcBUTIo0j6?EO(FlH`Mv)Ou9y+sJCYnQqiM7}%+LrbPmHrO-Qc20{Gql1Rf3^%VZ__hS|W-Z^m{ zA%u@TMh?MHlBwhqDxu=|J8QyS0b$-NWPNzLa2X1og|CU`BKA z=thMyNn{UKd$-sLts0yn*d+3S&)iZ_IKFWTK}Jt72s=poZm%8PICb|bvX2;6x<#&S z_&xaPr=Q-5lOl)(X;@QAadC0ok|PzRzf_%2v@g|9KcOopdisTNjp^3_dsx1Qlz%w9 z-&sT%MP$GIU^nc2C?ggfoLSKUhF zL2~l*ao@-4OYVOR*UGJRUov`jn(s!mkylo!{Yai}k(uSn=GT7~r_X&jY`5OWc_G8D7Yu>(1QNFb75fK&D_ce@) zexZYd{A*U;EBR-w`+_4<9jXlWE3?fU2ioBps-8M+=kKL)H;SaVjMd#|rKpn+A+3vu zIH{?veS{mEm#1~@+O?b6Z(iIfk9^liS8T zbAb1wKxelwVfpcrNZSxTY~UeED*vWX#_l8uZ^Dg@#Ubp%KpED1V{vYvEFT^W->Ypr z>t4H)$Ywd$ljXHNn;~G{#xUeHpZlfUF~!IiuUU=n(;xFX=I?kfg^{jPNZv13{HH}# z)~@{n>)RpqEF7=8%&J-?1w2Hs!EL;-PJ0_8xU^^^y7GfoYdpJBTq-q}*39Uy53_S} zPV810*J9w_W&GajHWFKtFXp;4!uIz@4kW%Y6A;BT!0vM|3+<|4JVMP(#jSnfRToWF zD@Mssmq*^IQCL{L_kEmCl^}hhal@g}bI}H@Eu#2?UrN@vnWGJyg7&v@wWSjpNKII) z*$ck8CG#v2YJ`N;$Pvot_wY;RQ#_Op$LfmhHQVNa$(=SK2ExR{fOeGM;=>+Z9hglX zlocTgZ};%7kw3&pfWGnmv?=cGV%`@+U;e=qnN(h@d3+m8*zQoT&kGV!LcmDYkXAGG z&d)3c`?e+{`~C}?F*AJIkEzi+{J8Z8vxzGv-rFoA1@DA{UY;E_tb?-s7g2YKkhp`z zBi?t6iDg{)n_#2N^1&9Od;=jQ!Je;Fa%?*<;1SPqM(17nPY`3nLxRpUM9nuBg8C2&c4PAzU^|GoPUO8lf zUQh4Wfp6*$%VO=1{&ro1J6{>o2>CE(v+N%{pzwZBS9kZ6)7yx|#N1r{vLvUW z^mKNK&C!Q+m+tr*@I*f*XU=n-?+b9y1NizL7=sv8dzBnJc1%Mepk4g24%$|`kiF-6Raq{qJ z-Me>hGzw5Fy&Xn><|HdC(%$ewMKX(dZ$^%e-{LvE&u{Yn1UyTNofMu~{_fpZb8~Zq zWdQn*9PYu4l~Pg?#3UqKd{U3T@p7uSxA&I3yrpA&QJbg&v>r5?*3wzY!Z7dx4vq^y2Ey9=vKQE4R7Z&BLzB^J9IM^9@wjGtj%U~gq-FOJV#SvZ*(Ddw~<;vGCIbHbj=FDj4$ z;BcZT4EO5#6MnyaA5Be7&a-E)&-djXuezLCSa@Xy@5`Z_&{5^Ny4FZ*d|p!0NZZ)A z#K>#*9Q+qBdm51*fc9{5Iz;VZnXZzgA3!q6Na$?3S*<~5=p|*M-AjTkg*gu7ow}al zOy}6yv-9#oaYTeO4$AynJ>RDDH%q%ZKIf>CAMB;wcxw0I_C15O(+f?fO`}`QtXn!+ zyOwVxhkC}l^xr8PPqg)}2_6}tbRnidC_?x&Y+Lqo&EoNM>wLrQ074h%5z=*Tvp zC}v>~z4F;i{My5fqRQ>es#b$2IS)s;&|j;d-)4I#H>KFF>1tz>ObfNMm$^>$4^SOe z?k321f*e9#MnVrtP$d&dVKlfWm+}7G9oTUH4-vxOg9*&^_EQ?}E+}>^lv`R_n&tYp zeo@D9nZWeJ2tpDTc(kq;tBr*nC@U+QotfDIgeNjOIwLMFjt1v96{*edzGy&AO>H^d zmZ(>16RVYPlt23K5o8!lG4J)-^f4bf@1Mxxik;kixEo#orMd`|k+lkTs)qiDB@f@(| z6xS#niN7n_d?}U>6JH5T^89*QQ$xd|HBJES+G`!x$rGL8$H(u%&L+0Aw6tVN0P_0M zR;J-}-Q?tb5eBj!Bqtw8&aGRwo(>GUkNC!2y8AP`O43DnIzgM5x%qkgyzznF#;|w( zxAhkTsfRWq}6;KoEPyO>&djePNCqk8dg@>!B3x_ zlQ(G!?L`s%UIU4#xw& zgB%_{rKYQU+U33daFu6azfGKweG&>diLQORbYgBfOqk6v2MO3c{?7|^Gb)CImH@mX zVq!8YDnxmX5cv~8WtTXbmy9QN20s(2#>#TU`7|59Q3_vs$1N4b6^B(Cnes?cX{u7H zuqn`AZYz~AZd_U-@gzM1E29?%h5A3L#$*D#VH}DRw(C#4ACa{WOui`V=V&QJnFA}R zMv^yKTrKqy*D@TJ-%9lMQZp`IW+jXEhc^h!BMhf2Dj$2Fkp9UW{p8X3*7VnJu9xo) z+~T0~747?+`q@IRTrTM554v!#sDF~xLa(tfmA%$jFrFKj78?%%bgzF+kj4Zat2?I< zbzVZkkSAK9zrX)Wh3h;gu6iBc&HS&%+ant|uigD`CN#H>#k0np;~*FBT0fz^kUN zt}b@{jZOcRy_M)%jc6XjFV>2%t;kp|3=$OweEFic5~RP!kyTWC2dQ~EKn#l&I33jZzZFAZJ`mL)eXte?pX~GKOJz< z%c7t_+&@{e4@>m{VBbHZ&tQ`9dSqgQ78s~d1}RJx8KY?G*6K7CG~Vgcr}fJ03`d`_ zfKV(Pc&OsBI9#nCFJ#~6w}8(L?TU%dWu70ia-dVX&i|w^p zD2?a@<%~9wX$I&&f->Cznvdi(%8yuZVYTzM0E7YiQ8EsEjgyyGrP6qV#yuNF3X zv@zzb*ZQ}tgWdK66p`1LM)FfrQ#;7Xe>5K||KqegJrSJh5y{EC^1!S+_3f+Erdpz{ z7YE;R{Y$uRgLzpjD;j9X6ZomPJCk zo68ffFy8Fc)X9i}lYdimV-0B0o&CL{>lzoV_QH3GOedUuQctNP=<)pyFx9L#Z{9T8 z{!RV8{PWk!AnzL*&hzo91CM!Nim5e**XEBOfuwh)$e|q$`0xYMWXyJ!7k(}+CMJfa z%uzJ)*3FwF6<|)e2tomeYXID(DMZf!?d@qx6ce!h!rzO-9T}z;2U0{$Lqpt`ENB}? z#w?i=%V$2QnE{zi*`&}@Cl52xbGDG1@x~-V`kW#7NGAJ+Ja!RdSlgbj+9 zQ0a`<&|P=@Pi5s)IdrvU$gT|hW{_Pdqbf9QDfZo2*OChT8>Y$63#%%OGR({>F(4RflIT4hs|}`g>bR#B#4)>cC)UeWCmF88hl?RF4m8qIiY|+UIy^jk>d^kUJzqIu9?DBFQ zkPX+Lkdc0oT7?`{F~pe_{9C0Y-Yo2I5|-`vg`uRwlA z(E!;S^&*QX7sNOX4KhHmIayd(1}7lDDK{B5vmMmU^)N5KXTmVXlyO~bc&7WZ{RI|Ci=r~--!frj$mCj!wp>keUZdq#SCxDnmsQNJyG_WeBCoEvp zjY`YTzQ8Er#5R~R$`1w^=ecvTKNK5+bbKYLb6Zo|#eeHpcXV`Ykl}y`jE;^DTE$s; zppe=z9E8Zr1xo*ghUwMAuDX<-#L{1?TjviDu#kRf*_`yx? zCLJ;#kDqvNEkuC)5{THC8>}xR2-_k4H^J=wp$}y;3ZZGiNDeQKfDH(EPBI`46?Ws$ z8D*yy?y?VNMQF~!v1bEB79@PdRFpoZ!=zZVe8_V;5w#}=YvEV?c7x^2RmKRydYT}R z1~rF3|A=CCJ?`{_@wnE^m7i%pQe$3a+)~&xI5;Igfe8I zBuBT1y}$;{6EtkO5ps%B=PTaJwTaO^)4V+isCC8ncgIU?2TJSOg>1jDfhd+b*lGvP z7|}i3(;Uf#uOlL#fzo^qc}A;QH$-r3@+dtnxLeJk2><3*ndCLc>ASn!gx;jYapFd6EMJD^>M+c7#n@wd z7ckL?*S&u|Oltvop%?r#iT5I!&og+d2K}t!TVYGvOgl07$06iDlf9Zyk|d3-m7EOQ z`S{Q?_^HU=bzh+MSb&3_jiqR|>Yc?(J-}5#m`M3-$R~^1C9!Dol#R^`XX7@k(14(< z18~a)XXt)-9Y6Yzd~T(MmwYe`ENcyo2oUB!GdcI==sxgM21Cx?Z%1D#QB<>CeD%{t z8$B~~KA41_)x$6MjNFG@VM-qldrXiHInwI-Tsd{}!x6ey_Eey26k)g-OL*nJE?XI_=eV_4y{B#HuRyWhUEQoNuNxzuh!h5J%oYrN1r z0KU43$prbfR^-PQ99&?98)LT8j6B8;qgzLG=UY*`hac3B*`YfmKn3|KPSPuD8+oSD z(bJm(d5flH$R{E`aT@@5*mq+{S`iwSN@F9%?88L%z$oyK%`cb7mSZ97t9 z2ywb9gc1S{qtfWOmd6`WmktJM7#R_f&{qZG5mM@Ex254+FcwfhptG|xd>=#O@IK`9 zTvyuDyz2EY-CuZ-;(-53+VhXTJl12=7mK+;GWll@vf>5nY(o&$mrD2iZT!5PjSB_3E1;_u+YSgq)W0Hz zo7yChp4`qG8A%V!Y$y1=XbqV0c~~-ed3kh#u3WhSPs9t++M`?Q>Mx-E@!JoJ?@e+@ zZ4GY{A!oXPZRQzO>j8VcaN&ZQtZV=p##L2`gN80H$3PtYLmKDgC^BOE%p~mR`_T5z9R!XE*mhmj~_oe2{V9>Ak#B4G|Y{Nh)}mPFB?Gj zv^+{R+!hD3z+qG;z5~G>gqnmK5=>zrF7ym=V-DvzIXSFtm?-(|Aw+t*9-Q?p25I2B#Y{h}f|x*0(mTRF&iE<;W)g5HMIYN^ z?BjPY2orukA`Au1mbg*`dR@8+CAW}&JoXML7*#qWQ5{zvhWxgaB1W#3LX?8y7l*`VsIDo#14p*bt_-V^EV;0K3c}LF zaVd6(98s!J&FOc`bK;`iZ?51!jR<}EWCVl!%gG+G^BLV)U_W(lFW!B0Ux!r4?>`U`Ls1^J-ml5BMEI1JYTfY}@2#uEx`zG_iL!Cjy zV*XM5<`8C93}izNY$u%Y{tB(_=Yp%?%vX#Db3qi%tYZ#A-u49ZK_==wFIBCE*6eRv z8$#Q@n?MJ>sT)Ln#%jBil!ZuB_QdAE$bLGQWL%=6dVc$>$v;wmIGQvE9R5yX6SJRz ziUd9a49#NG78=*tE?LOl(3J2Lhfupom_#}}jFyVjUNKsSO-$a3XfvYWHDb_tV$vI= zGjkifc2k%^35SvCRo!aujZ!cZ>V>KIb(NH!o%`qovJR}S9B@{3p!f7y=U3C{m_wo% z#fF6x%fN(gVWu;sFATGG7ESj1z?-lEtE{{5VyLE^^Tv?7J|uc$oPLWNI{*zQ=Yu$s zNG^&wO$JW6h~^$_h8>VJ25uezB$}WndakzQRW6For^nkBUVLiO+wVYJKxAoSqv7fK z*2&qKs0A1sylQG`(Y*ioy&b}SHhI_u(QFWVVzaV1XFikC@!tg5H}yH-Snzyj3lSiYsGsNv=&*u>SqQ$9)v zP=h*%k!-%-e?9W0%CiWLhFy?nz_zhNz%k1FcM&F+qWV}e9IJyCs*oZV)(6qtIeEBm zFeHV*+&U^C2Z0~&#&p?WwQ|uuQ))Zl*42(~Oj=`~SLiB152~0>@yoc+<(Lkb$g8e(6TepCZ(l^mk<-Wv}~fcPFm_*+_Fau`70b>n{Q-dNByL2!%i6&A+N^ zov#}^pt7?Q5)v*c6GLIc!I8cFjanQ4MsL1x?aO3yGqVe#qM|dO7a)6q9EPEiVyC|y zlMW*M=0KS}`5@3Da3+Kf@5h?-I)*SwY^R24Y_D~zxPrB(5DvT~16th62{3-qf(z9S z6yWFoU`O6Y{G^_xc7zwkH2Y}B@H|PWRqutCSrZDB!@!{<7ufd%E(2LsLpcp3qd{0;E)cftZS%M0!V`ZP`1 zXZtH$v0!bQO6-2G?}MFVR=vB@q6H^>N}Q&e>qOyEGSM%pUVcyC_k`sU^ehj4ceVY<-r&-PK0rhAlv^ctMz`hOH*={Xpf`-^maQH4n9N}~U98~~ISb4jj zuU#C72Al8JRP^1L6No4=A%%3qn|>Rwu>e|hbtN_iogYlW4D}|6I2U^?-@`*fqr_YU z1VCxy2(l1_9HRIIn8@1X3zfMxv85h3plYfcWmmZ%Xz18TO?Ua;BNR6po0`yA2Y7dO zZZ4@!rQMLIA{PUI-~Qf?fZI16 zAnHnn6)v41PBS3vc|0*oROdck=}#b2>^Sj0D=$wOgkvI;*9;4)XhosI_&rzdqvk6t z6mH0UBo_`b*$$SghOceU=cDInVKGo`j&?sdbEyK`KYGXx2WDPRzOhtVMus)C8T1)o zA<`9NU%!8UgIlLS8XW^ivd`;Bj}95Sbjse{S4aHsz+t>VEigq^+r&@_-Z!6Li$c+i z-@%4o2^^-a6GdekOjWV>hFzj_n+V533LN(^Pjrov+@3j0$|%ZD;g247`4r1XG@t0V zzf}R+9+K^o;nT#3d9Xqq!m0-`5uW#8g_lm;qX1<96EkUc-c$-8s%Q0G^7+M1h>RrrCoF$3mtE_%2HvUA_fu_p@$hX_VbfOpGk z1Ueg-3!6N89u@4p64-_>2jGr0dvFQ0{mB3kXTm07q(#? zVUE4f6LQn5Sx}90oheU}U`K(?F9-Rc#?`A=vFj4neHYPC3+;H&e}*wTwxmqr<%!9Z zNP;9Gg} +![Validation results](../img/kmf_esrd.png){ width="1000" } +
Kaplan Meier Survival Plot of a cohort of individuals with pathogenic variants in the UMOD gene with respect to age of onset of stage 5 kidney failure. +
+ +``` + +It is also possible to plot a curve for survival, which makes use of the VitalStatus message of the phenopackets. The code is exactly the same +as the above, except that we do not pass the target_tid argument. +```python +# same as above +kmv = KaplanMeierVisualizer(simple_patient_list=simple_pt_list) +# same as above except that we change the title of the plot +kmf.fit(T, E, label="Survival") +``` + +![Kaplan-Meier Plot]() + +
+![Validation results](../img/kmf_plot_vstatus.png){ width="1000" } +
Kaplan Meier Survival Plot of a cohort of individuals with pathogenic variants in the UMOD gene. +
+
\ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index eb6b1f46..e4249826 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -52,6 +52,9 @@ nav: - Cohort encoder: 'tabular/cohort_encoder.md' - Validation: 'tabular/validation.md' - Visualization: 'tabular/visualization.md' + - Visualization: + - Overview: 'visualization/index.md' + - Kaplan Meier: 'visualization/kaplan_meier_visualizer.md' - Developers: - For developers: 'developers/developers.md' - Installation: 'developers/installation.md' @@ -75,7 +78,6 @@ nav: - HpoParser: "api/creation/hpo_parser.md" - HpTerm: "api/creation/hp_term.md" - Individual: "api/creation/individual.md" - - IsoAge: "api/creation/iso_age.md" - MetaData: "api/creation/metadata.md" - OptionColumnMapper: "api/creation/option_column_mapper.md" - SexColumnMapper: "api/creation/sex_column_mapper.md"