Skip to content

Commit

Permalink
age output
Browse files Browse the repository at this point in the history
  • Loading branch information
pnrobinson committed Mar 1, 2024
1 parent 7c77d08 commit abb6da1
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 14 deletions.
2 changes: 1 addition & 1 deletion src/pyphetools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from . import visualization
from . import validation

__version__ = "0.9.50"
__version__ = "0.9.51"

__all__ = [
"creation",
Expand Down
61 changes: 61 additions & 0 deletions src/pyphetools/creation/pyphetools_age.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,18 @@ def is_valid(self) -> bool:
"""
pass

@abc.abstractmethod
def to_hpo_age(self) -> "HpoAge":
"""convert this PyPheToolsAge object to an HpoAge object.
This is the identity function if the object already is of class HpoAge. For other classes, we choose the
HpoAge object that is most appropriate given the ISO8601 or Gestational age. The purpose of this
function is that for HPOA output, we need to have an HPO term to denote the age of onset frequencies
:returns: HpoAge object representing the age
:rtype: HpoAge
"""
pass

@property
def age_string(self):
if self.is_valid():
Expand Down Expand Up @@ -115,6 +127,12 @@ def to_ga4gh_time_element(self):
def is_valid(self):
return False

def to_hpo_age(self):
"""There is no information about age, so return the NoneAge object to denote this.
Client code should always check the is_valid function
"""
return self


class IsoAge(PyPheToolsAge):
"""Class to record and sort ages formated according to iso8601
Expand Down Expand Up @@ -188,6 +206,29 @@ def to_iso8601(self):
else:
return "".join(components)

def to_hpo_age(self):
"""Convert to HpoAge object
"""
if self._years >= 60:
return HpoAge("Late onset")
elif self._years >= 40:
return HpoAge("Middle age onset")
elif self._years >= 16:
return HpoAge("Young adult onset")
elif self._years >= 5:
return HpoAge("Juvenile onset")
elif self._years >= 1:
return HpoAge("Childhood onset")
elif self._months >= 1:
return HpoAge("Infantile onset")
elif self._days >= 1:
return HpoAge("Neonatal onset")
elif self._days == 0:
return HpoAge("Congenital onset")
else:
raise ValueError(f"[ERROR] Could not calculate HpoAge for {self.age_string}")


def to_ga4gh_time_element(self) -> PPKt.TimeElement:
"""
:returns: a representation of Age formated as one of the options of GA4GH TimeElement
Expand Down Expand Up @@ -255,6 +296,11 @@ def to_ga4gh_time_element(self) -> PPKt.TimeElement:
time_elem.ontology_class.CopyFrom(clz)
return time_elem

def to_hpo_age(self):
"""Return self, this is already an HpoAge object
"""
return self

def is_valid(self):
return True

Expand Down Expand Up @@ -284,6 +330,21 @@ def to_ga4gh_time_element(self) -> PPKt.TimeElement:
time_elem.age.gestational_age.CopyFrom(gest_age)
return time_elem

def to_hpo_age(self):
"""Return self, this is already an HpoAge object
"""
if self._weeks >= 28:
# prior to birth during the third trimester, which is defined as 28 weeks and zero days (28+0) of gestation and beyond.
return HpoAge("Third trimester onset") # HP:0034197
elif self._weeks >= 14:
# prior to birth during the second trimester, which comprises the range of gestational ages from 14 0/7 weeks to 27 6/7 (inclusive).
return HpoAge("Second trimester onset") # HP:0034198
elif self._weeks >= 11:
# 11 0/7 to 13 6/7 weeks of gestation (inclusive).
return HpoAge("Late first trimester onset") # HP:0034199
else:
return HpoAge("Embryonal onset")

@staticmethod
def is_gestational_age(age_string):
"""Gestational age should be formated as W+D, e.g. 33+2
Expand Down
88 changes: 75 additions & 13 deletions src/pyphetools/visualization/onset_calculator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from collections import defaultdict
from typing import Dict, List, Optional, Union
from ..creation.constants import Constants
from ..creation.pyphetools_age import PyPheToolsAge
from typing import Dict, List
import re
from ..creation.pyphetools_age import HPO_ONSET_TERMS
from ..creation.hp_term import HpTerm




class OnsetCalculator:
"""
This class calculates the age of onset anntoations for the HPOA file
Expand All @@ -27,20 +29,80 @@ def __init__(self, phenopacket_list):
print("Warning: Identified multiple disease element")
disease = ppack.diseases[0]
if disease.HasField("onset"):
# onset is a GA4GH TimeElement
# In pyphetools, it can be an OntologyClass, an Age, or a GestationalAge
onset = disease.onset
if onset.HasField("ontology_class"):
onset_term = onset.ontology_class
hpo_onset_term = HpTerm(hpo_id=onset_term.id, label=onset_term.label)
self._pmid_to_onsetlist_d[pmid].append(hpo_onset_term)
elif onset.HasField("age"):
isoage = onset.age.iso8601duration
hpo_onset_term = PyPheToolsAge.onset_to_hpo_term(onset_string=isoage)
if hpo_onset_term is not None and hpo_onset_term != Constants.NOT_PROVIDED:
onset_term = onset.ontology_class
hpo_onset_term = HpTerm(hpo_id=onset_term.id, label=onset_term.label)
self._pmid_to_onsetlist_d[pmid].append(hpo_onset_term)
elif onset.HasField("age"):
hpo_onset_term = self._get_hpo_onset_term_from_iso8601(onset.age.iso8601duration)
self._pmid_to_onsetlist_d[pmid].append(hpo_onset_term)
elif onset.HasField("gestational_age"):
hpo_onset_term = self._get_hpo_onset_term_from_gestational_age(onset.age.iso8601duration)
self._pmid_to_onsetlist_d[pmid].append(hpo_onset_term)
else:
print("[ERROR] Could not parse disease onset")
print(disease.onset)
raise ValueError(f"Could not parse disease onset {disease}")
raise ValueError(f"onset was present but could not be decoded: {onset}")


def _get_hpo_onset_term_from_iso8601(self, isostring):
# the following regex gets years, months, days - optionally (when we get to this point in pyphetools, we cannot have weeks)
ISO8601_REGEX = r"^P(\d+Y)?(\d+M)?(\d+D)?"
match = re.search(ISO8601_REGEX, isostring)
if match:
y = match.group(1) or "0Y"
m = match.group(2) or "0M"
d = match.group(3) or "0D"
y = int(y[0:-1]) # all but last character
m = int(m[0:-1])
d = int(d[0:-1])
label = None
if y >= 60:
label = "Late onset"
elif y >= 40:
label = "Middle age onset"
elif y >= 16:
label = "Young adult onset"
elif y >= 5:
label = "Juvenile onset"
elif y >= 1:
label = "Childhood onset"
elif m >= 1:
label = "Infantile onset"
elif d >= 1:
label = "Neonatal onset"
elif d == 0:
label = "Congenital onset"
else:
raise ValueError(f"[ERROR] Could not parse iso8601 \"{isostring}\"")
if label not in HPO_ONSET_TERMS:
# should never happen ...
raise ValueError(f"Could not identify onset label {label}")
hpo_id = HPO_ONSET_TERMS.get(label)
return HpTerm(hpo_id=hpo_id, label=label)

def _get_hpo_onset_term_from_gestational_age(self, gestational_age):
weeks = gestational_age.weeks
# days not relevant to identifying the HPO Onset term
label = None
if weeks >= 28:
# prior to birth during the third trimester, which is defined as 28 weeks and zero days (28+0) of gestation and beyond.
label = "Third trimester onset" # HP:0034197
elif weeks >= 14:
# prior to birth during the second trimester, which comprises the range of gestational ages from 14 0/7 weeks to 27 6/7 (inclusive).
label = "Second trimester onset" # HP:0034198
elif weeks >= 11:
# 11 0/7 to 13 6/7 weeks of gestation (inclusive).
label = "Late first trimester onset" # HP:0034199
else:
label = "Embryonal onset"
if label not in HPO_ONSET_TERMS:
# should never happen ...
raise ValueError(f"Could not identify onset label {label}")
hpo_id = HPO_ONSET_TERMS.get(label)
return HpTerm(hpo_id=hpo_id, label=label)


def get_pmid_to_onset_d(self)-> Dict[str, List[HpTerm]]:
return self._pmid_to_onsetlist_d
Expand Down

0 comments on commit abb6da1

Please sign in to comment.