From 8231038642017665d0d38d4da6e98074e0c9b8a7 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 25 Mar 2024 13:33:07 -0400 Subject: [PATCH 01/13] Start working on `vrs` members. --- src/pyphetools/pp/v202/__init__.py | 9 + src/pyphetools/pp/v202/_vrs.py | 475 +++++++++++++++++++++++++++++ 2 files changed, 484 insertions(+) create mode 100644 src/pyphetools/pp/v202/_vrs.py diff --git a/src/pyphetools/pp/v202/__init__.py b/src/pyphetools/pp/v202/__init__.py index 4c00565b..d81a1412 100644 --- a/src/pyphetools/pp/v202/__init__.py +++ b/src/pyphetools/pp/v202/__init__.py @@ -18,6 +18,10 @@ from ._disease import Disease from ._meta_data import MetaData, Resource, Update from ._phenopackets import Phenopacket +from ._vrs import Gene, Text, Number, IndefiniteRange, DefiniteRange, SimpleInterval, SequenceInterval +from ._vrs import SequenceLocation, SequenceState, LiteralSequenceExpression, DerivedSequenceExpression +from ._vrs import RepeatedSequenceExpression, CytobandInterval, ChromosomeLocation, Allele, Haplotype, CopyNumber +from ._vrs import VariationSet, Variation from ._vrsatile import Expression, Extension, VcfRecord, MoleculeContext, VariationDescriptor __all__ = [ @@ -32,4 +36,9 @@ 'MetaData', 'Resource', 'Update', 'OntologyClass', 'ExternalReference', 'Evidence', 'Procedure', 'GestationalAge', 'Age', 'AgeRange', 'TimeInterval', 'TimeElement', 'Timestamp', 'File', + # and the VRS members + 'Gene', 'Text', 'Number', 'IndefiniteRange', 'DefiniteRange', 'SimpleInterval', 'SequenceInterval', + 'SequenceLocation', 'SequenceState', 'LiteralSequenceExpression', 'DerivedSequenceExpression', + 'RepeatedSequenceExpression', 'CytobandInterval', 'ChromosomeLocation', 'Allele', 'Haplotype', 'CopyNumber', + 'VariationSet', ] diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py new file mode 100644 index 00000000..8540378b --- /dev/null +++ b/src/pyphetools/pp/v202/_vrs.py @@ -0,0 +1,475 @@ +import typing + +import phenopackets as pp202 +from google.protobuf.message import Message + +from .._api import MessageMixin +from ..parse import extract_message_scalar, extract_message_sequence, extract_pb_message_scalar, extract_pb_message_seq + + +class Gene(MessageMixin): + + def __init__( + self, + gene_id: str, + ): + self._gene_id = gene_id + + @property + def gene_id(self) -> str: + return self._gene_id + + @gene_id.setter + def gene_id(self, value: str): + self._gene_id = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'gene_id', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'gene_id', + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return Gene( + gene_id=values['gene_id'], + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.Gene(gene_id=self._gene_id) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.Gene + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return Gene( + gene_id=msg.gene_id, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, Gene) and self._gene_id == other._gene_id + + def __repr__(self): + return f'Gene(gene_id={self._gene_id})' + + +class Text(MessageMixin): + + def __init__( + self, + definition: str, + ): + self._definition = definition + + @property + def definition(self) -> str: + return self._definition + + @definition.setter + def definition(self, value: str): + self._definition = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'definition', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'definition', + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return Text( + definition=values['definition'], + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.Text(definition=self._definition) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.Text + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return Text( + definition=msg.definition, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, Text) and self._definition == other._definition + + def __repr__(self): + return f'Text(definition={self._definition})' + + +class Number(MessageMixin): + + def __init__( + self, + value: int, + ): + self._value = value + + @property + def value(self) -> int: + return self._value + + @value.setter + def value(self, value: int): + self._value = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'value', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'value', + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return Number( + value=values['value'], + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.Number(value=self._value) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.Number + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return Number( + value=msg.value, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, Number) and self._value == other._value + + def __repr__(self): + return f'Number(value={self._value})' + + +class IndefiniteRange: + + def __init__( + self, + value: int, + comparator: str, + ): + self._value = value + self._comparator = comparator + + +class DefiniteRange: + + def __init__( + self, + min: int, + max: int, + ): + self._min = min + self._max = max + + +class SimpleInterval: + + def __init__( + self, + start: int, + end: int, + ): + self._start = start + self._end = end + + +class SequenceInterval: + + def __init__( + self, + start: typing.Union[Number, IndefiniteRange, DefiniteRange], + end: typing.Union[Number, IndefiniteRange, DefiniteRange], + ): + self._start = start + self._end = end + + +class SequenceLocation: + + def __init__( + self, + _id: str, + sequence_id: str, + interval: typing.Union[SequenceInterval, SimpleInterval], + ): + self._id = _id + self._sequence_id = sequence_id + self._interval = interval + + +class SequenceState(MessageMixin): + + def __init__( + self, + sequence: str, + ): + self._sequence = sequence + + @property + def sequence(self) -> str: + return self._sequence + + @sequence.setter + def sequence(self, value: str): + self._sequence = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'sequence', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'sequence', + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return SequenceState( + sequence=values['sequence'], + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.SequenceState(sequence=self._sequence) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.SequenceState + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return SequenceState( + sequence=msg.sequence, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, SequenceState) and self._sequence == other._sequence + + def __repr__(self): + return f'SequenceState(sequence={self._sequence})' + + +class LiteralSequenceExpression(MessageMixin): + + def __init__( + self, + sequence: str, + ): + self._sequence = sequence + + @property + def sequence(self) -> str: + return self._sequence + + @sequence.setter + def sequence(self, value: str): + self._sequence = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'sequence', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'sequence', + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return LiteralSequenceExpression( + sequence=values['sequence'], + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.LiteralSequenceExpression(sequence=self._sequence) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.LiteralSequenceExpression + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return LiteralSequenceExpression( + sequence=msg.sequence, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, LiteralSequenceExpression) and self._sequence == other._sequence + + def __repr__(self): + return f'LiteralSequenceExpression(sequence={self._sequence})' + + +class DerivedSequenceExpression: + + def __init__( + self, + location: SequenceLocation, + reverse_complement: bool, + ): + self._location = location + self._reverse_complement = reverse_complement + + +class RepeatedSequenceExpression: + + def __init__( + self, + seq_expr: typing.Union[LiteralSequenceExpression, DerivedSequenceExpression], + count: typing.Union[Number, IndefiniteRange, DefiniteRange], + ): + self._seq_expr = seq_expr + self._count = count, + + +class CytobandInterval: + + def __init__( + self, + start: str, + end: str, + ): + self._start = start + self._end = end + + +class ChromosomeLocation: + + def __init__( + self, + _id: str, + species_id: str, + chr: str, + interval: CytobandInterval, + ): + self._id = _id + self._species_id = species_id + self._chr = chr + self._interval = interval + + +class Allele: + + def __init__( + self, + _id: str, + location: typing.Union[str, ChromosomeLocation, SequenceLocation], + state: typing.Union[ + SequenceState, LiteralSequenceExpression, + DerivedSequenceExpression, RepeatedSequenceExpression, + ], + ): + self._id = _id + self._location = location + self._state = state + + +class Haplotype: + class Member: + def __init__( + self, + value: typing.Union[Allele, str], + ): + self._value = value + + def __init__( + self, + _id: str, + members: typing.Iterable[Member], + ): + self._id = _id + self._members = list(members) + + +class CopyNumber: + + def __init__( + self, + _id: str, + subject: typing.Union[ + Allele, Haplotype, Gene, + LiteralSequenceExpression, DerivedSequenceExpression, RepeatedSequenceExpression, + str, + ], + copies: typing.Union[Number, IndefiniteRange, DefiniteRange], + ): + self._id = _id + self._subject = subject + self._copies = copies + + +class VariationSet: + class Member: + """ + + **IMPORTANT**: `value` can also be an instance of :class:`VariationSet`! + """ + + def __init__( + self, + value: typing.Union[str, Allele, Haplotype, CopyNumber, Text], + ): + self._value = value + if isinstance(value, VariationSet): + pass + + def __init__( + self, + _id: str, + members: typing.Iterable[Member], + ): + self._id = _id + self._members = list(members) + + +class Variation: + + def __init__( + self, + variation: typing.Union[Allele, Haplotype, CopyNumber, Text, VariationSet], + ): + self._variation = variation From 32485a53021a1d1a56eab91ba1d86a3fed4cfaeb Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 27 Mar 2024 09:50:18 -0400 Subject: [PATCH 02/13] Implement `IndefiniteRange`, `DefiniteRange`, `SimpleInterval`, and `CytobandInterval`. --- src/pyphetools/pp/v202/_vrs.py | 251 ++++++++++++++++++++++++++++++++- 1 file changed, 244 insertions(+), 7 deletions(-) diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index 8540378b..cf8ec18d 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -175,7 +175,7 @@ def __repr__(self): return f'Number(value={self._value})' -class IndefiniteRange: +class IndefiniteRange(MessageMixin): def __init__( self, @@ -185,8 +185,70 @@ def __init__( self._value = value self._comparator = comparator + @property + def value(self) -> int: + return self._value + + @value.setter + def value(self, value: int): + self._value = value + + @property + def comparator(self) -> str: + return self._comparator + + @comparator.setter + def comparator(self, value: str): + self._comparator = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'value', 'comparator' + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'value', 'comparator' + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return IndefiniteRange( + value=values['value'], + comparator=values['comparator'], + ) + else: + cls._complain_about_missing_field(values) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.IndefiniteRange + + def to_message(self) -> Message: + return pp202.IndefiniteRange( + value=self._value, + comparator=self._comparator, + ) -class DefiniteRange: + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return IndefiniteRange( + value=msg.value, + comparator=msg.comparator, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, IndefiniteRange) \ + and self._value == other._value \ + and self._comparator == other._comparator + + def __repr__(self): + return f'IndefiniteRange(value={self._value}, comparator={self._comparator})' + + +class DefiniteRange(MessageMixin): def __init__( self, @@ -196,8 +258,67 @@ def __init__( self._min = min self._max = max + @property + def min(self) -> int: + return self._min + + @min.setter + def min(self, value: int): + self._min = value + + @property + def max(self) -> int: + return self._max + + @max.setter + def max(self, value: int): + self._max = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'min', 'max' + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'min', 'max' + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return DefiniteRange( + min=values['min'], + max=values['max'], + ) + else: + cls._complain_about_missing_field(values) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.DefiniteRange + + def to_message(self) -> Message: + return pp202.DefiniteRange(min=self._min, max=self._max) + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return DefiniteRange( + min=msg.min, + max=msg.max, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, DefiniteRange) \ + and self._min == other._min \ + and self._max == other._max + + def __repr__(self): + return f'DefiniteRange(min={self._min}, max={self._max})' + -class SimpleInterval: +class SimpleInterval(MessageMixin): def __init__( self, @@ -207,6 +328,65 @@ def __init__( self._start = start self._end = end + @property + def start(self) -> int: + return self._start + + @start.setter + def start(self, value: int): + self._start = value + + @property + def end(self) -> int: + return self._end + + @end.setter + def end(self, value: int): + self._end = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'start', 'end' + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'start', 'end' + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return SimpleInterval( + start=values['start'], + end=values['end'], + ) + else: + cls._complain_about_missing_field(values) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.SimpleInterval + + def to_message(self) -> Message: + return pp202.SimpleInterval(start=self._start, end=self._end) + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return SimpleInterval( + start=msg.start, + end=msg.end, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, SimpleInterval) \ + and self._start == other._start \ + and self._end == other._end + + def __repr__(self): + return f'SimpleInterval(start={self._start}, end={self._end})' + class SequenceInterval: @@ -286,7 +466,7 @@ def __eq__(self, other): def __repr__(self): return f'SequenceState(sequence={self._sequence})' - + class LiteralSequenceExpression(MessageMixin): @@ -366,7 +546,7 @@ def __init__( self._count = count, -class CytobandInterval: +class CytobandInterval(MessageMixin): def __init__( self, @@ -376,17 +556,74 @@ def __init__( self._start = start self._end = end + @property + def start(self) -> str: + return self._start + + @start.setter + def start(self, value: str): + self._start = value + + @property + def end(self) -> str: + return self._end + + @end.setter + def end(self, value: str): + self._end = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'start', 'end' + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'start', 'end' + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return CytobandInterval( + start=values['start'], + end=values['end'], + ) + else: + cls._complain_about_missing_field(values) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.CytobandInterval + + def to_message(self) -> Message: + return pp202.CytobandInterval(start=self._start, end=self._end) + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return CytobandInterval( + start=msg.start, + end=msg.end, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, CytobandInterval) \ + and self._start == other._start \ + and self._end == other._end + + def __repr__(self): + return f'CytobandInterval(start={self._start}, end={self._end})' + class ChromosomeLocation: def __init__( self, - _id: str, species_id: str, chr: str, interval: CytobandInterval, ): - self._id = _id self._species_id = species_id self._chr = chr self._interval = interval From d7a8e31344d3f9c12f8f56f3af992d261f905dc3 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 27 Mar 2024 10:59:03 -0400 Subject: [PATCH 03/13] Implement `DerivedSequenceExpression`, `RepeatedSequenceExpression` and `ChromosomeLocation`. --- src/pyphetools/pp/v202/_vrs.py | 287 +++++++++++++++++++++++++++++++-- 1 file changed, 275 insertions(+), 12 deletions(-) diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index cf8ec18d..51985753 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -5,6 +5,7 @@ from .._api import MessageMixin from ..parse import extract_message_scalar, extract_message_sequence, extract_pb_message_scalar, extract_pb_message_seq +from ..parse import extract_oneof_scalar, extract_pb_oneof_scalar class Gene(MessageMixin): @@ -400,14 +401,13 @@ def __init__( class SequenceLocation: + # TODO: def __init__( self, - _id: str, sequence_id: str, interval: typing.Union[SequenceInterval, SimpleInterval], ): - self._id = _id self._sequence_id = sequence_id self._interval = interval @@ -524,7 +524,7 @@ def __repr__(self): return f'LiteralSequenceExpression(sequence={self._sequence})' -class DerivedSequenceExpression: +class DerivedSequenceExpression(MessageMixin): def __init__( self, @@ -534,8 +534,77 @@ def __init__( self._location = location self._reverse_complement = reverse_complement + @property + def location(self) -> SequenceLocation: + return self._location + + @location.setter + def location(self, value: SequenceLocation): + self._location = value + + @property + def reverse_complement(self) -> bool: + return self._reverse_complement + + @reverse_complement.setter + def reverse_complement(self, value: bool): + self._reverse_complement = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'location', 'reverse_complement' + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'location', 'reverse_complement' + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return DerivedSequenceExpression( + location=extract_message_scalar('location', SequenceLocation, values), + reverse_complement=values['reverse_complement'], + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.DerivedSequenceExpression( + location=self._location.to_message(), + reverse_complement=self._reverse_complement, + ) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.DerivedSequenceExpression + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return DerivedSequenceExpression( + location=extract_pb_message_scalar('location', SequenceLocation, msg), + reverse_complement=msg.reverse_complement, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, DerivedSequenceExpression) \ + and self._location == other._location \ + and self._reverse_complement == other._reverse_complement + + def __repr__(self): + return f'DerivedSequenceExpression(location={self._location}, reverse_complement={self._reverse_complement})' -class RepeatedSequenceExpression: + +class RepeatedSequenceExpression(MessageMixin): + _ONEOF_SEQ_EXPRESSION = { + 'literal_sequence_expression': LiteralSequenceExpression, + 'derived_sequence_expression': DerivedSequenceExpression, + } + _ONEOF_COUNT = { + 'number': Number, 'indefinite_range': IndefiniteRange, 'definite_range': DefiniteRange, + } def __init__( self, @@ -543,7 +612,126 @@ def __init__( count: typing.Union[Number, IndefiniteRange, DefiniteRange], ): self._seq_expr = seq_expr - self._count = count, + self._count = count + + @property + def seq_expr(self) -> typing.Union[LiteralSequenceExpression, DerivedSequenceExpression]: + return self._seq_expr + + @property + def literal_sequence_expression(self) -> typing.Optional[LiteralSequenceExpression]: + return self._seq_expr if isinstance(self._seq_expr, LiteralSequenceExpression) else None + + @literal_sequence_expression.setter + def literal_sequence_expression(self, value: LiteralSequenceExpression): + self._seq_expr = value + + @property + def derived_sequence_expression(self) -> typing.Optional[DerivedSequenceExpression]: + return self._seq_expr if isinstance(self._seq_expr, DerivedSequenceExpression) else None + + @derived_sequence_expression.setter + def derived_sequence_expression(self, value: DerivedSequenceExpression): + self._seq_expr = value + + @property + def count(self) -> typing.Union[Number, IndefiniteRange, DefiniteRange]: + return self._count + + @property + def number(self) -> typing.Optional[Number]: + return self._count if isinstance(self._count, Number) else None + + @number.setter + def number(self, value: Number): + self._count = value + + @property + def indefinite_range(self) -> typing.Optional[IndefiniteRange]: + return self._count if isinstance(self._count, IndefiniteRange) else None + + @indefinite_range.setter + def indefinite_range(self, value: IndefiniteRange): + self._count = value + + @property + def definite_range(self): + return self._count if isinstance(self._count, DefiniteRange) else None + + @definite_range.setter + def definite_range(self, value: DefiniteRange): + self._count = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return ( + 'literal_sequence_expression', 'derived_sequence_expression', + 'number', 'indefinite_range', 'definite_range', + ) + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + raise NotImplementedError('Should not be called!') + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if any(f in values for f in cls._ONEOF_SEQ_EXPRESSION) \ + and any(f in values for f in cls._ONEOF_COUNT): + return RepeatedSequenceExpression( + seq_expr=extract_oneof_scalar(cls._ONEOF_SEQ_EXPRESSION, values), + count=extract_oneof_scalar(cls._ONEOF_COUNT, values), + ) + else: + raise ValueError( + 'Missing one of required fields: ' + '`literal_sequence_expression|derived_sequence_expression` or ' + f'`number|indefinite_range|definite_range`: {values}' + ) + + def to_message(self) -> Message: + e = pp202.RepeatedSequenceExpression() + + # `seq_expr` + if isinstance(self._seq_expr, LiteralSequenceExpression): + e.literal_sequence_expression.CopyFrom(self._seq_expr.to_message()) + elif isinstance(self._seq_expr, DerivedSequenceExpression): + e.derived_sequence_expression.CopyFrom(self._seq_expr.to_message()) + else: + raise ValueError('Bug') + + # `count` + if isinstance(self._count, Number): + e.number.CopyFrom(self._count.to_message()) + elif isinstance(self._count, IndefiniteRange): + e.indefinite_range.CopyFrom(self._count.to_message()) + elif isinstance(self._count, DefiniteRange): + e.definite_range.CopyFrom(self._count.to_message()) + else: + raise ValueError('Bug') + + return e + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.RepeatedSequenceExpression + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return RepeatedSequenceExpression( + seq_expr=extract_pb_oneof_scalar('seq_expr', cls._ONEOF_SEQ_EXPRESSION, msg), + count=extract_pb_oneof_scalar('count', cls._ONEOF_COUNT, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, RepeatedSequenceExpression) \ + and self._seq_expr == other._seq_expr \ + and self._count == other._count + + def __repr__(self): + return f'RepeatedSequenceExpression(seq_expr={self._seq_expr}, count={self._count})' class CytobandInterval(MessageMixin): @@ -616,7 +804,7 @@ def __repr__(self): return f'CytobandInterval(start={self._start}, end={self._end})' -class ChromosomeLocation: +class ChromosomeLocation(MessageMixin): def __init__( self, @@ -628,8 +816,83 @@ def __init__( self._chr = chr self._interval = interval + @property + def species_id(self) -> str: + return self._species_id + + @species_id.setter + def species_id(self, value: str): + self._species_id = value + + @property + def chr(self) -> str: + return self._chr + + @chr.setter + def chr(self, value: str): + self._chr = value + + @property + def interval(self) -> CytobandInterval: + return self._interval + + @interval.setter + def interval(self, value: CytobandInterval): + self._interval = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'species_id', 'chr', 'interval' + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'species_id', 'chr', 'interval' + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return ChromosomeLocation( + species_id=values['species_id'], + chr=values['chr'], + interval=extract_message_scalar('interval', CytobandInterval, values), + ) + else: + cls._complain_about_missing_field(values) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.ChromosomeLocation + + def to_message(self) -> Message: + return pp202.ChromosomeLocation( + species_id=self._species_id, + chr=self._chr, + interval=self._interval.to_message(), + ) + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return ChromosomeLocation( + species_id=msg.species_id, + chr=msg.chr, + interval=extract_pb_message_scalar('interval', CytobandInterval, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, ChromosomeLocation) \ + and self._species_id == other._species_id \ + and self._chr == other._chr \ + and self._interval == other._interval + + def __repr__(self): + return f'ChromosomeLocation(species_id={self._species_id}, chr={self._chr}, interval={self._interval})' + class Allele: + # TODO: def __init__( self, @@ -646,6 +909,8 @@ def __init__( class Haplotype: + # TODO: + class Member: def __init__( self, @@ -655,18 +920,16 @@ def __init__( def __init__( self, - _id: str, members: typing.Iterable[Member], ): - self._id = _id self._members = list(members) class CopyNumber: + # TODO: def __init__( self, - _id: str, subject: typing.Union[ Allele, Haplotype, Gene, LiteralSequenceExpression, DerivedSequenceExpression, RepeatedSequenceExpression, @@ -674,12 +937,13 @@ def __init__( ], copies: typing.Union[Number, IndefiniteRange, DefiniteRange], ): - self._id = _id self._subject = subject self._copies = copies class VariationSet: + # TODO: + class Member: """ @@ -696,14 +960,13 @@ def __init__( def __init__( self, - _id: str, members: typing.Iterable[Member], ): - self._id = _id self._members = list(members) class Variation: + # TODO: def __init__( self, From e488aa4e0bde8f9db709620dfe6257a9c3b48c4a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 27 Mar 2024 11:10:29 -0400 Subject: [PATCH 04/13] Implement `SequenceLocation`. --- src/pyphetools/pp/v202/_vrs.py | 77 ++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index 51985753..7c1c6f3e 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -403,6 +403,9 @@ def __init__( class SequenceLocation: # TODO: +class SequenceLocation(MessageMixin): + _ONEOF_INTERVAL_VALUE = {'sequence_interval': SequenceInterval, 'simple_interval': SimpleInterval} + def __init__( self, sequence_id: str, @@ -411,6 +414,80 @@ def __init__( self._sequence_id = sequence_id self._interval = interval + @property + def sequence_id(self) -> str: + return self._sequence_id + + @sequence_id.setter + def sequence_id(self, value: str): + self._sequence_id = value + + @property + def interval(self) -> typing.Union[SequenceInterval, SimpleInterval]: + return self._interval + + @property + def sequence_interval(self): + return self._interval if isinstance(self._interval, SequenceInterval) else None + + @sequence_interval.setter + def sequence_interval(self, value: SequenceInterval): + self._interval = value + + @property + def simple_interval(self): + return self._interval if isinstance(self._interval, SimpleInterval) else None + + @simple_interval.setter + def simple_interval(self, value: SimpleInterval): + self._interval = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'sequence_id', 'sequence_interval', 'simple_interval' + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + raise NotImplementedError('Should not be called!') + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if 'sequence_id' in values and any(f in values for f in cls._ONEOF_INTERVAL_VALUE): + return SequenceLocation( + sequence_id=values['sequence_id'], + interval=extract_oneof_scalar(cls._ONEOF_INTERVAL_VALUE, values), + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.SequenceLocation( + sequence_id=self._sequence_id, + interval=self._interval.to_message(), + ) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.SequenceLocation + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return SequenceLocation( + sequence_id=msg.sequence_id, + interval=extract_pb_oneof_scalar('interval', cls._ONEOF_INTERVAL_VALUE, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, SequenceLocation) \ + and self._sequence_id == other._sequence_id \ + and self._interval == other._interval + + def __repr__(self): + return f'SequenceLocation(sequence_id={self._sequence_id}, interval={self._interval})' + class SequenceState(MessageMixin): From dd0fc35c0cc06999bd220f803b4cc281cb77fb96 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 28 Mar 2024 09:15:41 -0400 Subject: [PATCH 05/13] Implement `Allele`. --- src/pyphetools/pp/v202/_vrs.py | 160 ++++++++++++++++++++++++++++++++- 1 file changed, 156 insertions(+), 4 deletions(-) diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index 7c1c6f3e..36504ada 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -968,22 +968,174 @@ def __repr__(self): return f'ChromosomeLocation(species_id={self._species_id}, chr={self._chr}, interval={self._interval})' -class Allele: - # TODO: +class Allele(MessageMixin): + _ONEOF_LOCATION = { + 'curie': str, + 'chromosome_location': ChromosomeLocation, + 'sequence_location': SequenceLocation, + } + _ONEOF_STATE = { + 'sequence_state': SequenceState, + 'literal_sequence_expression': LiteralSequenceExpression, + 'derived_sequence_expression': DerivedSequenceExpression, + 'repeated_sequence_expression': RepeatedSequenceExpression, + } def __init__( self, - _id: str, location: typing.Union[str, ChromosomeLocation, SequenceLocation], state: typing.Union[ SequenceState, LiteralSequenceExpression, DerivedSequenceExpression, RepeatedSequenceExpression, ], ): - self._id = _id self._location = location self._state = state + @property + def location(self) -> typing.Union[str, ChromosomeLocation, SequenceLocation]: + return self._location + + @property + def curie(self) -> typing.Optional[str]: + return self._location if isinstance(self._location, str) else None + + @curie.setter + def curie(self, value: str): + self._location = value + + @property + def chromosome_location(self) -> typing.Optional[ChromosomeLocation]: + return self._location if isinstance(self._location, ChromosomeLocation) else None + + @chromosome_location.setter + def chromosome_location(self, value: ChromosomeLocation): + self._location = value + + @property + def sequence_location(self) -> typing.Optional[SequenceLocation]: + return self._location if isinstance(self._location, SequenceLocation) else None + + @sequence_location.setter + def sequence_location(self, value: SequenceLocation): + self._location = value + + @property + def state(self) -> typing.Union[ + SequenceState, LiteralSequenceExpression, + DerivedSequenceExpression, RepeatedSequenceExpression, + ]: + return self._state + + @property + def sequence_state(self) -> typing.Optional[SequenceState]: + return self._state if isinstance(self._state, SequenceState) else None + + @sequence_state.setter + def sequence_state(self, value: SequenceState): + self._state = value + + @property + def literal_sequence_expression(self) -> typing.Optional[LiteralSequenceExpression]: + return self._state if isinstance(self._state, LiteralSequenceExpression) else None + + @literal_sequence_expression.setter + def literal_sequence_expression(self, value: LiteralSequenceExpression): + self._state = value + + @property + def derived_sequence_expression(self) -> typing.Optional[DerivedSequenceExpression]: + return self._state if isinstance(self._state, DerivedSequenceExpression) else None + + @derived_sequence_expression.setter + def derived_sequence_expression(self, value: DerivedSequenceExpression): + self._state = value + + @property + def repeated_sequence_expression(self) -> typing.Optional[RepeatedSequenceExpression]: + return self._state if isinstance(self._state, RepeatedSequenceExpression) else None + + @repeated_sequence_expression.setter + def repeated_sequence_expression(self, value: RepeatedSequenceExpression): + self._state = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return ( + 'curie', 'chromosome_location', 'sequence_location', + 'sequence_state', 'literal_sequence_expression', + 'derived_sequence_expression', 'repeated_sequence_expression', + ) + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + raise NotImplementedError('Should not be called!') + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if any(f in values for f in cls._ONEOF_LOCATION) and any(f in values for f in cls._ONEOF_STATE): + # We must extract the location in a special way because `not isinstance(curie, Deserializable)`. + if 'curie' in values: + location = values['curie'] + else: + location = extract_oneof_scalar(cls._ONEOF_LOCATION, values) + + return Allele( + location=location, + state=extract_oneof_scalar(cls._ONEOF_STATE, values), + ) + else: + raise ValueError( + 'Missing one of required fields: ' + '`curie|chromosome_location|,sequence_location` ' + '`sequence_state|literal_sequence_expression|derived_sequence_expression|repeated_sequence_expression`' + f' {values}') + + def to_message(self) -> Message: + a = pp202.Allele( + state=self._state.to_message(), + ) + + if isinstance(self._location, str): + a.curie = self._location + elif isinstance(self._location, ChromosomeLocation): + a.chromosome_location.CopyFrom(self._location.to_message()) + elif isinstance(self._location, SequenceLocation): + a.sequence_location.CopyFrom(self._location.to_message()) + else: + raise ValueError('Bug') + + return a + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.Allele + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + location = msg.WhichOneof('location') + if location == 'curie': + loc = msg.curie + else: + loc = extract_pb_oneof_scalar('curie', cls._ONEOF_LOCATION, msg) + return Allele( + location=loc, + state=extract_pb_oneof_scalar('state', cls._ONEOF_STATE, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, Allele) \ + and self._location == other._location \ + and self._state == other._state + + def __repr__(self): + return f'Allele(' \ + f'location={self._location}, ' \ + f'state={self._state})' + class Haplotype: # TODO: From 9e7b658d8bf0fe3bfe6d3b479baf0434cb148b4b Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 28 Mar 2024 09:33:53 -0400 Subject: [PATCH 06/13] Implement `Haplotype` and `VariationSet`. --- src/pyphetools/pp/v202/_vrs.py | 257 +++++++++++++++++++++++++++++++-- 1 file changed, 248 insertions(+), 9 deletions(-) diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index 36504ada..ebb7a08d 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -389,7 +389,11 @@ def __repr__(self): return f'SimpleInterval(start={self._start}, end={self._end})' -class SequenceInterval: +class SequenceInterval(MessageMixin): + # TODO: this is a hard nut to crack + _ONEOF_START_END_VALUES = { + 'number': Number, 'indefinite_range': IndefiniteRange, 'definite_range': DefiniteRange, + } def __init__( self, @@ -399,9 +403,75 @@ def __init__( self._start = start self._end = end + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'start', 'end' + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + raise NotImplementedError('Should not be called!') + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if any(field in values for field in cls._ONEOF_START_END_VALUES): + # TODO: this is a hard nut to crack + return SequenceInterval( + # This is a degenerate case, because `start` and `end` oneof fields consist of the same values. + # Protobuf solves this by appending the type to field name. + # For instance, it stores `start` in one of `startNumber`, `startIndefiniteRange`, `startDefiniteRange` + # depending on the type, and `end` as `endNumber`, `endIndefiniteRange`, `endDefiniteRange` + # for the other field. + start=extract_oneof_scalar(cls._ONEOF_START_END_VALUES, values), + end=extract_oneof_scalar(cls._ONEOF_START_END_VALUES, values), + ) + else: + raise ValueError(f'Missing one of required fields: `assay, value|complex_value` {values}') + + def to_message(self) -> Message: + si = pp202.SequenceInterval() + + # TODO: + # I am not sure about the attribute where we should be setting this. + # Both for `start` and `end` + if isinstance(self._start, Number): + si.start.CopyFrom(self._start.to_message()) + elif isinstance(self._start, IndefiniteRange): + si.start.CopyFrom(self._start.to_message()) + elif isinstance(self._start, DefiniteRange): + si.start.CopyFrom(self._start.to_message()) + else: + raise ValueError('Bug') + + if isinstance(self._end, Number): + si.end.CopyFrom(self._end.to_message()) + elif isinstance(self._end, IndefiniteRange): + si.end.CopyFrom(self._end.to_message()) + elif isinstance(self._end, DefiniteRange): + si.end.CopyFrom(self._end.to_message()) + else: + raise ValueError('Bug') + + return si + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.SequenceInterval + + @classmethod + def from_message(cls, msg: Message): + # TODO: + pass + + def __eq__(self, other): + return isinstance(other, SequenceInterval) \ + and self._start == other._start \ + and self._end == other._end + + def __repr__(self): + return f'SequenceInterval(' \ + f'start={self._start}, ' \ + f'end={self._end})' -class SequenceLocation: - # TODO: class SequenceLocation(MessageMixin): _ONEOF_INTERVAL_VALUE = {'sequence_interval': SequenceInterval, 'simple_interval': SimpleInterval} @@ -1137,22 +1207,146 @@ def __repr__(self): f'state={self._state})' -class Haplotype: - # TODO: +class Haplotype(MessageMixin): + + class Member(MessageMixin): - class Member: def __init__( self, value: typing.Union[Allele, str], ): self._value = value + @property + def value(self) -> typing.Union[Allele, str]: + return self._value + + @property + def allele(self) -> typing.Optional[Allele]: + return self._value if isinstance(self._value, Allele) else None + + @allele.setter + def allele(self, value: Allele): + self._value = value + + @property + def curie(self) -> typing.Optional[str]: + return self._value if isinstance(self._value, str) else None + + @curie.setter + def curie(self, value: str): + self._value = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'allele', 'curie', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + raise NotImplementedError('Should not be called!') + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if any(f in values for f in ('allele', 'curie')): + # We must extract the value in a special way because `not isinstance(curie, Deserializable)`. + if 'curie' in values: + value = values['curie'] + else: + value = extract_message_scalar('allele', Allele, values) + + return Haplotype.Member( + value=value, + ) + else: + raise ValueError(f'Missing one of required fields: `curie|allele` {values}') + + def to_message(self) -> Message: + hm = pp202.Haplotype.Member() + + if isinstance(self._value, str): + hm.curie = self._value + elif isinstance(self._value, Allele): + hm.allele.CopyFrom(self._value.to_message()) + else: + raise ValueError('Bug') + + return hm + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.Haplotype.Member + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + which = msg.WhichOneof('value') + if which == 'curie': + value = msg.curie + else: + value = extract_pb_message_scalar('allele', Allele, msg) + return pp202.Haplotype.Member( + value=value, + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, Haplotype.Member) \ + and self._value == other._value + + def __repr__(self): + return f'Haplotype.Member(value={self._value})' + def __init__( self, members: typing.Iterable[Member], ): self._members = list(members) + @property + def members(self) -> typing.MutableSequence[Member]: + return self._members + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'members', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'members', + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return Haplotype( + members=extract_message_sequence('members', Haplotype.Member, values) + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.Haplotype(members=(m.to_message() for m in self._members)) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.Haplotype + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return Haplotype( + members=extract_pb_message_seq('members', Haplotype.Member, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, Haplotype) \ + and self._members == other._members + + def __repr__(self): + return f'Haplotype(members={self._members})' + class CopyNumber: # TODO: @@ -1170,10 +1364,9 @@ def __init__( self._copies = copies -class VariationSet: - # TODO: +class VariationSet(MessageMixin): - class Member: + class Member(MessageMixin): """ **IMPORTANT**: `value` can also be an instance of :class:`VariationSet`! @@ -1187,12 +1380,58 @@ def __init__( if isinstance(value, VariationSet): pass + # TODO: implement + def __init__( self, members: typing.Iterable[Member], ): self._members = list(members) + @property + def members(self) -> typing.MutableSequence[Member]: + return self._members + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'members', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + return 'members', + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if cls._all_required_fields_are_present(values): + return VariationSet( + members=extract_message_sequence('members', VariationSet.Member, values) + ) + else: + cls._complain_about_missing_field(values) + + def to_message(self) -> Message: + return pp202.VariationSet(members=(m.to_message() for m in self._members)) + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.VariationSet + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return VariationSet( + members=extract_pb_message_seq('members', VariationSet.Member, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, VariationSet) \ + and self._members == other._members + + def __repr__(self): + return f'VariationSet(members={self._members})' + class Variation: # TODO: From e70849ca4e35e815807bbc538ffb218d33132da1 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sat, 30 Mar 2024 22:38:49 -0400 Subject: [PATCH 07/13] Fix oneof `call` field in `GenomicInterpretation`. --- src/pyphetools/pp/v202/_interpretation.py | 129 +++++++--------------- test/pp/conftest.py | 4 +- 2 files changed, 43 insertions(+), 90 deletions(-) diff --git a/src/pyphetools/pp/v202/_interpretation.py b/src/pyphetools/pp/v202/_interpretation.py index aca746c3..f4162c33 100644 --- a/src/pyphetools/pp/v202/_interpretation.py +++ b/src/pyphetools/pp/v202/_interpretation.py @@ -10,6 +10,7 @@ from ._gene_descriptor import GeneDescriptor from .._api import MessageMixin from ..parse import extract_message_scalar, extract_message_sequence, extract_pb_message_scalar, extract_pb_message_seq +from ..parse import extract_oneof_scalar, extract_pb_oneof_scalar class AcmgPathogenicityClassification(enum.Enum): @@ -124,6 +125,11 @@ def __repr__(self): class GenomicInterpretation(MessageMixin): + _ONEOF_CALL = { + 'gene_descriptor': GeneDescriptor, + 'variant_interpretation': VariantInterpretation, + } + class InterpretationStatus(enum.Enum): UNKNOWN_STATUS = 0 REJECTED = 1 @@ -135,25 +141,11 @@ def __init__( self, subject_or_biosample_id: str, interpretation_status: InterpretationStatus, - gene_descriptor: typing.Optional[GeneDescriptor] = None, - variant_interpretation: typing.Optional[VariantInterpretation] = None, + call: typing.Union[GeneDescriptor, VariantInterpretation], ): self._subject_or_biosample_id = subject_or_biosample_id self._interpretation_status = interpretation_status - one_ofs = (gene_descriptor, variant_interpretation) - if sum(1 for arg in one_ofs if arg is not None) != 1: - cnt = sum(1 for arg in one_ofs if arg is not None) - raise ValueError( - f'GenomicInterpretation must be provided with exactly 1 argument but {cnt} arguments were provided!') - - if gene_descriptor is not None: - self._discriminant = 0 - self._call = gene_descriptor - elif variant_interpretation is not None: - self._discriminant = 1 - self._call = variant_interpretation - else: - raise ValueError('Bug') # TODO: wording + self._call = call @property def subject_or_biosample_id(self) -> str: @@ -171,22 +163,24 @@ def interpretation_status(self) -> InterpretationStatus: def interpretation_status(self, value: InterpretationStatus): self._interpretation_status = value + @property + def call(self) -> typing.Union[GeneDescriptor, VariantInterpretation]: + return self._call + @property def gene_descriptor(self) -> typing.Optional[GeneDescriptor]: - return self._call if self._discriminant == 0 else None + return self._call if isinstance(self._call, GeneDescriptor) else None @gene_descriptor.setter def gene_descriptor(self, value: GeneDescriptor): - self._discriminant = 0 self._call = value @property def variant_interpretation(self) -> typing.Optional[VariantInterpretation]: - return self._call if self._discriminant == 1 else None + return self._call if isinstance(self._call, VariantInterpretation) else None @variant_interpretation.setter def variant_interpretation(self, value: VariantInterpretation): - self._discriminant = 1 self._call = value @staticmethod @@ -195,39 +189,25 @@ def field_names() -> typing.Iterable[str]: @classmethod def required_fields(cls) -> typing.Sequence[str]: - return 'subject_or_biosample_id', 'interpretation_status', + raise NotImplementedError('Should not be called!') @classmethod def from_dict(cls, values: typing.Mapping[str, typing.Any]): - if cls._all_required_fields_are_present(values): - if 'gene_descriptor' in values: - assert 'variant_interpretation' not in values, \ - 'Variant interpretation must be unset when Gene descriptor is set!' - return GenomicInterpretation( - subject_or_biosample_id=values['subject_or_biosample_id'], - interpretation_status=MessageMixin._extract_enum_field( - 'interpretation_status', GenomicInterpretation.InterpretationStatus, values - ), - gene_descriptor=extract_message_scalar('gene_descriptor', GeneDescriptor, values), - ) - - elif 'variant_interpretation' in values: - assert 'gene_descriptor' not in values, \ - 'Gene descriptor must be unset when Variant interpretation is set!' - return GenomicInterpretation( - subject_or_biosample_id=values['subject_or_biosample_id'], - interpretation_status=MessageMixin._extract_enum_field( - 'interpretation_status', GenomicInterpretation.InterpretationStatus, values - ), - variant_interpretation=extract_message_scalar( - 'variant_interpretation', VariantInterpretation, values - ), - ) - - else: - raise ValueError('Either `gene_descriptor` or `variant_interpretation` must be set!') + if 'subject_or_biosample_id' in values \ + and 'interpretation_status' in values \ + and any(field in values for field in cls._ONEOF_CALL): + return GenomicInterpretation( + subject_or_biosample_id=values['subject_or_biosample_id'], + interpretation_status=MessageMixin._extract_enum_field( + 'interpretation_status', GenomicInterpretation.InterpretationStatus, values, + ), + call=extract_oneof_scalar(cls._ONEOF_CALL, values), + ) else: - cls._complain_about_missing_field(values) + raise ValueError( + 'Missing one of required fields: ' + f'`subject_or_biosample_id, interpretation_status, gene_descriptor|variant_interpretation` in {values}' + ) def to_message(self) -> Message: msg = pp202.GenomicInterpretation( @@ -237,13 +217,12 @@ def to_message(self) -> Message: ), ) - val = self._call.to_message() - if self._discriminant == 0: - msg.gene_descriptor.CopyFrom(val) - elif self._discriminant == 1: - msg.variant_interpretation.CopyFrom(val) + if isinstance(self._call, GeneDescriptor): + msg.gene_descriptor.CopyFrom(self._call.to_message()) + elif isinstance(self._call, VariantInterpretation): + msg.variant_interpretation.CopyFrom(self._call.to_message()) else: - raise ValueError(f'Invalid discriminant {self._discriminant}') + raise ValueError('Bug') return msg @@ -253,30 +232,12 @@ def message_type(cls) -> typing.Type[Message]: @classmethod def from_message(cls, msg: Message): - if isinstance(msg, pp202.GenomicInterpretation): - subject_or_biosample_id = msg.subject_or_biosample_id - interpretation_status = GenomicInterpretation.InterpretationStatus(msg.interpretation_status) - - case = msg.WhichOneof('call') - if case == 'gene_descriptor': - return GenomicInterpretation( - subject_or_biosample_id=subject_or_biosample_id, - interpretation_status=interpretation_status, - gene_descriptor=extract_pb_message_scalar( - 'gene_descriptor', GeneDescriptor, msg - ), - ) - elif case == 'variant_interpretation': - return GenomicInterpretation( - subject_or_biosample_id=subject_or_biosample_id, - interpretation_status=interpretation_status, - variant_interpretation=extract_pb_message_scalar( - 'variant_interpretation', VariantInterpretation, msg - ), - ) - else: - raise ValueError(f'Unknown one of field set {case}') - + if isinstance(msg, cls.message_type()): + return GenomicInterpretation( + subject_or_biosample_id=msg.subject_or_biosample_id, + interpretation_status=GenomicInterpretation.InterpretationStatus(msg.interpretation_status), + call=extract_pb_oneof_scalar('call', cls._ONEOF_CALL, msg), + ) else: cls.complain_about_incompatible_msg_type(msg) @@ -284,21 +245,13 @@ def __eq__(self, other): return isinstance(other, GenomicInterpretation) \ and self._subject_or_biosample_id == other._subject_or_biosample_id \ and self._interpretation_status == other._interpretation_status \ - and self._discriminant == other._discriminant \ and self._call == other._call def __repr__(self): - if self._discriminant == 0: - val = f'gene_descriptor={self._call}' - elif self._discriminant == 1: - val = f'variant_interpretation={self._call}' - else: - raise ValueError(f'Invalid discriminant {self._discriminant}') - return f'GenomicInterpretation(' \ f'subject_or_biosample_id={self._subject_or_biosample_id}, ' \ f'interpretation_status={self._interpretation_status}, ' \ - f'{val})' + f'call={self._call})' class Diagnosis(MessageMixin): diff --git a/test/pp/conftest.py b/test/pp/conftest.py index 8eb94244..8197b24e 100644 --- a/test/pp/conftest.py +++ b/test/pp/conftest.py @@ -150,7 +150,7 @@ def interpretations() -> typing.Sequence[Interpretation]: GenomicInterpretation( subject_or_biosample_id='proband A', interpretation_status=GenomicInterpretation.InterpretationStatus.CAUSATIVE, - variant_interpretation=VariantInterpretation( + call=VariantInterpretation( acmg_pathogenicity_classification=AcmgPathogenicityClassification.PATHOGENIC, therapeutic_actionability=TherapeuticActionability.ACTIONABLE, variation_descriptor=VariationDescriptor( @@ -166,7 +166,7 @@ def interpretations() -> typing.Sequence[Interpretation]: GenomicInterpretation( subject_or_biosample_id='biosample.1', interpretation_status=GenomicInterpretation.InterpretationStatus.CAUSATIVE, - variant_interpretation=VariantInterpretation( + call=VariantInterpretation( acmg_pathogenicity_classification=AcmgPathogenicityClassification.PATHOGENIC, therapeutic_actionability=TherapeuticActionability.ACTIONABLE, variation_descriptor=VariationDescriptor( From c7e7339c2bc037dcfcee2a2a0a26991477880e21 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sat, 30 Mar 2024 23:03:28 -0400 Subject: [PATCH 08/13] Implement `CopyNumber`. --- src/pyphetools/pp/v202/_vrs.py | 217 +++++++++++++++++++++++++++++++-- 1 file changed, 207 insertions(+), 10 deletions(-) diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index ebb7a08d..e2352092 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -830,10 +830,10 @@ def from_dict(cls, values: typing.Mapping[str, typing.Any]): ) else: raise ValueError( - 'Missing one of required fields: ' + 'Missing one of required fields: ' '`literal_sequence_expression|derived_sequence_expression` or ' f'`number|indefinite_range|definite_range`: {values}' - ) + ) def to_message(self) -> Message: e = pp202.RepeatedSequenceExpression() @@ -1092,9 +1092,9 @@ def sequence_location(self, value: SequenceLocation): @property def state(self) -> typing.Union[ - SequenceState, LiteralSequenceExpression, - DerivedSequenceExpression, RepeatedSequenceExpression, - ]: + SequenceState, LiteralSequenceExpression, + DerivedSequenceExpression, RepeatedSequenceExpression, + ]: return self._state @property @@ -1188,7 +1188,7 @@ def from_message(cls, msg: Message): if location == 'curie': loc = msg.curie else: - loc = extract_pb_oneof_scalar('curie', cls._ONEOF_LOCATION, msg) + loc = extract_pb_oneof_scalar('location', cls._ONEOF_LOCATION, msg) return Allele( location=loc, state=extract_pb_oneof_scalar('state', cls._ONEOF_STATE, msg), @@ -1208,7 +1208,6 @@ def __repr__(self): class Haplotype(MessageMixin): - class Member(MessageMixin): def __init__( @@ -1348,8 +1347,21 @@ def __repr__(self): return f'Haplotype(members={self._members})' -class CopyNumber: - # TODO: +class CopyNumber(MessageMixin): + _ONEOF_SUBJECT = { + 'allele': Allele, + 'haplotype': Haplotype, + 'gene': Gene, + 'literal_sequence_expression': LiteralSequenceExpression, + 'derived_sequence_expression': DerivedSequenceExpression, + 'repeated_sequence_expression': RepeatedSequenceExpression, + 'curie': str, + } + _ONEOF_COPIES = { + 'number': Number, + 'indefinite_range': DefiniteRange, + 'definite_range': DefiniteRange, + } def __init__( self, @@ -1363,9 +1375,194 @@ def __init__( self._subject = subject self._copies = copies + @property + def subject(self) -> typing.Union[ + Allele, Haplotype, Gene, + LiteralSequenceExpression, DerivedSequenceExpression, RepeatedSequenceExpression, + str, + ]: + return self._subject -class VariationSet(MessageMixin): + @property + def allele(self) -> typing.Optional[Allele]: + return self._subject if isinstance(self._subject, Allele) else None + + @allele.setter + def allele(self, value: Allele): + self._subject = value + + @property + def haplotype(self) -> typing.Optional[Haplotype]: + return self._subject if isinstance(self._subject, Haplotype) else None + + @haplotype.setter + def haplotype(self, value: Haplotype): + self._subject = value + + @property + def gene(self) -> typing.Optional[Gene]: + return self._subject if isinstance(self._subject, Gene) else None + + @gene.setter + def gene(self, value: Gene): + self._subject = value + + @property + def literal_sequence_expression(self) -> typing.Optional[LiteralSequenceExpression]: + return self._subject if isinstance(self._subject, LiteralSequenceExpression) else None + + @literal_sequence_expression.setter + def literal_sequence_expression(self, value: LiteralSequenceExpression): + self._subject = value + + @property + def derived_sequence_expression(self) -> typing.Optional[DerivedSequenceExpression]: + return self._subject if isinstance(self._subject, DerivedSequenceExpression) else None + + @derived_sequence_expression.setter + def derived_sequence_expression(self, value: DerivedSequenceExpression): + self._subject = value + + @property + def repeated_sequence_expression(self) -> typing.Optional[RepeatedSequenceExpression]: + return self._subject if isinstance(self._subject, RepeatedSequenceExpression) else None + + @repeated_sequence_expression.setter + def repeated_sequence_expression(self, value: RepeatedSequenceExpression): + self._subject = value + + @property + def curie(self) -> typing.Optional[str]: + return self._subject if isinstance(self._subject, str) else None + + @curie.setter + def curie(self, value: str): + self._subject = value + + @property + def copies(self) -> typing.Union[Number, IndefiniteRange, DefiniteRange]: + return self._copies + + @property + def number(self) -> typing.Optional[Number]: + return self._copies if isinstance(self._copies, Number) else None + + @number.setter + def number(self, value: Number): + self._copies = value + + @property + def indefinite_range(self) -> typing.Optional[IndefiniteRange]: + return self._copies if isinstance(self._copies, IndefiniteRange) else None + + @indefinite_range.setter + def indefinite_range(self, value: IndefiniteRange): + self._copies = value + + @property + def definite_range(self) -> typing.Optional[DefiniteRange]: + return self._copies if isinstance(self._copies, DefiniteRange) else None + + @definite_range.setter + def definite_range(self, value: DefiniteRange): + self._copies = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return ( + 'allele', 'haplotype', 'gene', + 'literal_sequence_expression', 'derived_sequence_expression', 'repeated_sequence_expression', 'curie', + 'number', 'indefinite_range', 'definite_range', + ) + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + raise NotImplementedError('Should not be called!') + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if any(f in values for f in cls._ONEOF_SUBJECT) and any(f in values for f in cls._ONEOF_COPIES): + # We must extract the subject in a special way because `not isinstance(curie, Deserializable)`. + if 'curie' in values: + subject = values['curie'] + else: + subject = extract_oneof_scalar(cls._ONEOF_SUBJECT, values) + + return CopyNumber( + subject=subject, + copies=extract_oneof_scalar(cls._ONEOF_COPIES, values), + ) + else: + raise ValueError( + 'Missing one of required fields: ' + f'`{"|".join(cls._ONEOF_SUBJECT)}` ', + f'`{"|".join(cls._ONEOF_COPIES)}` in ', + f'{values}') + + def to_message(self) -> Message: + cn = pp202.CopyNumber() + + # subject + if isinstance(self._subject, Allele): + cn.allele = self._subject.to_message() + elif isinstance(self._subject, Haplotype): + cn.haplotype.CopyFrom(self._subject.to_message()) + elif isinstance(self._subject, Gene): + cn.gene.CopyFrom(self._subject.to_message()) + elif isinstance(self._subject, LiteralSequenceExpression): + cn.literal_sequence_expression.CopyFrom(self._subject.to_message()) + elif isinstance(self._subject, DerivedSequenceExpression): + cn.derived_sequence_expression.CopyFrom(self._subject.to_message()) + elif isinstance(self._subject, RepeatedSequenceExpression): + cn.repeated_sequence_expression.CopyFrom(self._subject.to_message()) + elif isinstance(self._subject, str): + cn.curie = self._subject + else: + raise ValueError('Bug') + + # copies + if isinstance(self._copies, Number): + cn.number = self._copies.to_message() + elif isinstance(self._copies, IndefiniteRange): + cn.indefinite_range.CopyFrom(self._copies.to_message()) + elif isinstance(self._copies, DefiniteRange): + cn.definite_range.CopyFrom(self._copies.to_message()) + else: + raise ValueError('Bug') + + return cn + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.CopyNumber + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + subject = msg.WhichOneof('subject') + if subject == 'curie': + sub = msg.curie + else: + sub = extract_pb_oneof_scalar('subject', cls._ONEOF_SUBJECT, msg) + return CopyNumber( + subject=sub, + copies=extract_pb_oneof_scalar('copies', cls._ONEOF_COPIES, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, CopyNumber) \ + and self._subject == other._subject \ + and self._copies == other._copies + + def __repr__(self): + return f'CopyNumber(' \ + f'subject={self._subject}, ' \ + f'copies={self._copies})' + + +class VariationSet(MessageMixin): class Member(MessageMixin): """ From e7f8d866c77d85bee989d65ff068098b586f0e13 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sat, 30 Mar 2024 23:25:43 -0400 Subject: [PATCH 09/13] Implement `Variation`. --- src/pyphetools/pp/v202/_vrs.py | 117 +++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 6 deletions(-) diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index e2352092..a9803f5e 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -1377,10 +1377,10 @@ def __init__( @property def subject(self) -> typing.Union[ - Allele, Haplotype, Gene, - LiteralSequenceExpression, DerivedSequenceExpression, RepeatedSequenceExpression, - str, - ]: + Allele, Haplotype, Gene, + LiteralSequenceExpression, DerivedSequenceExpression, RepeatedSequenceExpression, + str, + ]: return self._subject @property @@ -1630,11 +1630,116 @@ def __repr__(self): return f'VariationSet(members={self._members})' -class Variation: - # TODO: +class Variation(MessageMixin): + _ONEOF_VARIATION = { + 'allele': Allele, 'haplotype': Haplotype, 'copy_number': CopyNumber, + 'text': Text, 'variation_set': VariationSet, + } def __init__( self, variation: typing.Union[Allele, Haplotype, CopyNumber, Text, VariationSet], ): self._variation = variation + + @property + def variation(self) -> typing.Union[Allele, Haplotype, CopyNumber, Text, VariationSet]: + return self._variation + + @property + def allele(self) -> typing.Optional[Allele]: + return self._variation if isinstance(self._variation, Allele) else None + + @allele.setter + def allele(self, value: Allele): + self._variation = value + + @property + def haplotype(self) -> typing.Optional[Haplotype]: + return self._variation if isinstance(self._variation, Haplotype) else None + + @haplotype.setter + def haplotype(self, value: Haplotype): + self._variation = value + + @property + def copy_number(self) -> typing.Optional[CopyNumber]: + return self._variation if isinstance(self._variation, CopyNumber) else None + + @copy_number.setter + def copy_number(self, value: CopyNumber): + self._variation = value + + @property + def text(self) -> typing.Optional[Text]: + return self._variation if isinstance(self._variation, Text) else None + + @text.setter + def text(self, value: Text): + self._variation = value + + @property + def variation_set(self) -> typing.Optional[VariationSet]: + return self._variation if isinstance(self._variation, VariationSet) else None + + @variation_set.setter + def variation_set(self, value: VariationSet): + self._variation = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'allele', 'haplotype', 'copy_number', 'text', 'variation_set', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + raise NotImplementedError('Should not be called!') + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if any(f in values for f in cls._ONEOF_VARIATION): + return Variation( + variation=extract_oneof_scalar(cls._ONEOF_VARIATION, values), + ) + else: + raise ValueError( + 'Missing one of required fields: ' + f'`{"|".join(cls._ONEOF_VARIATION)}` in ', + f'{values}') + + def to_message(self) -> Message: + v = pp202.Variation() + + if isinstance(self._variation, Allele): + v.allele = self._variation.to_message() + elif isinstance(self._variation, Haplotype): + v.haplotype.CopyFrom(self._variation.to_message()) + elif isinstance(self._variation, CopyNumber): + v.copy_number.CopyFrom(self._variation.to_message()) + elif isinstance(self._variation, Text): + v.text.CopyFrom(self._variation.to_message()) + elif isinstance(self._variation, VariationSet): + v.variation_set.CopyFrom(self._variation.to_message()) + else: + raise ValueError('Bug') + + return v + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.Variation + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + return Variation( + variation=extract_pb_oneof_scalar('variation', cls._ONEOF_VARIATION, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, Variation) \ + and self._variation == other._variation + + def __repr__(self): + return f'Variation(variation={self._variation})' From 6bdf7e5b4581f699801aa4117580b91ef2aa2f24 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sun, 31 Mar 2024 00:25:07 -0400 Subject: [PATCH 10/13] Implement `VariationSet.Member`. --- src/pyphetools/pp/v202/_vrs.py | 135 ++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 2 deletions(-) diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index a9803f5e..93c34b72 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -1564,6 +1564,12 @@ def __repr__(self): class VariationSet(MessageMixin): class Member(MessageMixin): + _ONEOF_VALUE = { + # 'curie': str, + 'allele': Allele, 'haplotype': Haplotype, + 'copy_number': CopyNumber, 'text': Text, + # 'variation_set': VariationSet, + } """ **IMPORTANT**: `value` can also be an instance of :class:`VariationSet`! @@ -1574,10 +1580,135 @@ def __init__( value: typing.Union[str, Allele, Haplotype, CopyNumber, Text], ): self._value = value + + @property + def value(self) -> typing.Union[str, Allele, Haplotype, CopyNumber, Text]: + return self._value + + @property + def curie(self) -> typing.Optional[str]: + return self._value if isinstance(self._value, str) else None + + @curie.setter + def curie(self, value: str): + self._value = value + + @property + def allele(self) -> typing.Optional[Allele]: + return self._value if isinstance(self._value, Allele) else None + + @allele.setter + def allele(self, value: Allele): + self._value = value + + @property + def haplotype(self) -> typing.Optional[Haplotype]: + return self._value if isinstance(self._value, Haplotype) else None + + @haplotype.setter + def haplotype(self, value: Haplotype): + self._value = value + + @property + def copy_number(self) -> typing.Optional[CopyNumber]: + return self._value if isinstance(self._value, CopyNumber) else None + + @copy_number.setter + def copy_number(self, value: CopyNumber): + self._value = value + + @property + def text(self) -> typing.Optional[Text]: + return self._value if isinstance(self._value, Text) else None + + @text.setter + def text(self, value: Text): + self._value = value + + @property + def variation_set(self): + """ + Get :class:`VariationSet` if present or `None` if `value` contains a different type. + """ + return self._value if isinstance(self._value, VariationSet) else None + + @variation_set.setter + def variation_set(self, value): if isinstance(value, VariationSet): - pass + self._value = value + + @staticmethod + def field_names() -> typing.Iterable[str]: + return 'curie', 'allele', 'haplotype', 'copy_number', 'text', 'variation_set', + + @classmethod + def required_fields(cls) -> typing.Sequence[str]: + raise NotImplementedError('Should not be called!') + + @classmethod + def from_dict(cls, values: typing.Mapping[str, typing.Any]): + if 'curie' in values or any(f in values for f in cls._ONEOF_VALUE) or 'variation_set' in values: + if 'curie' in values: + return VariationSet.Member(value=values['curie']) + elif 'variation_set' in values: + # Disabling the false positive warning below - we cannot add VariationSet into the __init__ + # but it is an acceptable value. + # noinspection PyTypeChecker + return VariationSet.Member(value=extract_message_scalar('variation_set', VariationSet, values)) + else: + return VariationSet.Member(value=extract_oneof_scalar(cls._ONEOF_VALUE, values)) + else: + raise ValueError( + 'Missing one of required fields: `curie|allele|haplotype|copy_number|text|variation_set` in ', + f'{values}') + + def to_message(self) -> Message: + m = pp202.VariationSet.Member() + + if isinstance(self._value, str): + m.curie = self._value + elif isinstance(self._value, Allele): + m.allele = self._value.to_message() + elif isinstance(self._value, Haplotype): + m.haplotype.CopyFrom(self._value.to_message()) + elif isinstance(self._value, CopyNumber): + m.copy_number.CopyFrom(self._value.to_message()) + elif isinstance(self._value, Text): + m.text.CopyFrom(self._value.to_message()) + elif isinstance(self._value, VariationSet): + m.variation_set.CopyFrom(self._value.to_message()) + else: + raise ValueError('Bug') - # TODO: implement + return m + + @classmethod + def message_type(cls) -> typing.Type[Message]: + return pp202.VariationSet.Member + + @classmethod + def from_message(cls, msg: Message): + if isinstance(msg, cls.message_type()): + which = msg.WhichOneof('value') + if which == 'curie': + return VariationSet.Member(value=msg.curie) + elif which == 'variation_set': + # Same as in `from_dict`, the warning is false positive. + # noinspection PyTypeChecker + return VariationSet.Member(value=extract_pb_message_scalar('variation_set', VariationSet, msg)) + else: + return VariationSet.Member( + value=extract_pb_oneof_scalar('value', cls._ONEOF_VALUE, msg), + ) + else: + cls.complain_about_incompatible_msg_type(msg) + + def __eq__(self, other): + return isinstance(other, VariationSet.Member) \ + and self._value == other._value + + def __repr__(self): + return f'VariationSet.Member(value={self._value})' def __init__( self, From 980a0333336637b8869a88a722f505e0348a5d4a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sun, 31 Mar 2024 00:31:36 -0400 Subject: [PATCH 11/13] Enable `Variation` in `VariationDescriptor`. --- src/pyphetools/pp/v202/_vrsatile.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/pyphetools/pp/v202/_vrsatile.py b/src/pyphetools/pp/v202/_vrsatile.py index 5798c8b4..1a514b76 100644 --- a/src/pyphetools/pp/v202/_vrsatile.py +++ b/src/pyphetools/pp/v202/_vrsatile.py @@ -6,6 +6,7 @@ from ._gene_descriptor import GeneDescriptor from ._base import OntologyClass +from ._vrs import Variation from .._api import MessageMixin from ..parse import extract_message_scalar, extract_message_sequence, extract_pb_message_scalar, extract_pb_message_seq @@ -389,7 +390,7 @@ def __init__( self, id: str, molecule_context: MoleculeContext, - # variation: Variation # TODO: implement + variation: typing.Optional[Variation] = None, label: typing.Optional[str] = None, description: typing.Optional[str] = None, gene_context: typing.Optional[GeneDescriptor] = None, @@ -404,6 +405,7 @@ def __init__( ): self._id = id self._molecule_context = molecule_context + self._variation = variation self._label = label self._description = description self._gene_context = gene_context @@ -432,6 +434,18 @@ def molecule_context(self) -> MoleculeContext: def molecule_context(self, value: MoleculeContext): self._molecule_context = value + @property + def variation(self) -> typing.Optional[Variation]: + return self._variation + + @variation.setter + def variation(self, value: Variation): + self._variation = value + + @variation.deleter + def variation(self): + self._variation = None + @property def label(self) -> typing.Optional[str]: return self._label @@ -535,8 +549,8 @@ def allelic_state(self): @staticmethod def field_names() -> typing.Iterable[str]: return ( - 'id', 'molecule_context', 'label', 'description', 'gene_context', 'expressions', 'vcf_record', 'xrefs', - 'alternate_labels', 'extensions', 'structural_type', 'vrs_ref_allele_seq', 'allelic_state', + 'id', 'molecule_context', 'variation', 'label', 'description', 'gene_context', 'expressions', 'vcf_record', + 'xrefs', 'alternate_labels', 'extensions', 'structural_type', 'vrs_ref_allele_seq', 'allelic_state', ) @classmethod @@ -549,6 +563,7 @@ def from_dict(cls, values: typing.Mapping[str, typing.Any]): return VariationDescriptor( id=values['id'], molecule_context=MessageMixin._extract_enum_field('molecule_context', MoleculeContext, values), + variation=extract_message_scalar('variation', Variation, values), label=values.get('label', None), description=values.get('description', None), gene_context=extract_message_scalar('gene_context', GeneDescriptor, values), @@ -570,6 +585,9 @@ def to_message(self) -> Message: molecule_context=pp202.MoleculeContext.Value(self._molecule_context.name), ) + if self._variation is not None: + vd.variation.CopyFrom(self._variation.to_message()) + if self._label is not None: vd.label = self._label @@ -609,6 +627,7 @@ def from_message(cls, msg: Message): return VariationDescriptor( id=msg.id, molecule_context=MoleculeContext(msg.molecule_context), + variation=extract_pb_message_scalar('variation', Variation, msg), label=None if msg.label == '' else msg.label, description=None if msg.description == '' else msg.description, gene_context=extract_pb_message_scalar('gene_context', GeneDescriptor, msg), @@ -628,6 +647,7 @@ def __eq__(self, other): return isinstance(other, VariationDescriptor) \ and self._id == other._id \ and self._molecule_context == other._molecule_context \ + and self._variation == other._variation \ and self._label == other._label \ and self._description == other._description \ and self._gene_context == other._gene_context \ @@ -644,6 +664,7 @@ def __repr__(self): return 'VariationDescriptor(' \ f'id={self._id}, ' \ f'molecule_context={self._molecule_context}, ' \ + f'variation={self._variation}, ' \ f'label={self._label}, ' \ f'description={self._description}, ' \ f'gene_context={self._gene_context}, ' \ From cd3f4d0223d839a66df907992610f7a9c4087afa Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Apr 2024 16:10:56 -0400 Subject: [PATCH 12/13] Finalize VRS. --- src/pyphetools/pp/parse/_io.py | 2 +- src/pyphetools/pp/v202/__init__.py | 2 +- src/pyphetools/pp/v202/_vrs.py | 158 ++++++++++++++++++++++++---- src/pyphetools/pp/v202/_vrsatile.py | 41 ++++---- test/data/pp/retinoblastoma.pb | Bin 3468 -> 3468 bytes test/pp/conftest.py | 31 +++++- 6 files changed, 189 insertions(+), 45 deletions(-) diff --git a/src/pyphetools/pp/parse/_io.py b/src/pyphetools/pp/parse/_io.py index ef9545b7..c7904e56 100644 --- a/src/pyphetools/pp/parse/_io.py +++ b/src/pyphetools/pp/parse/_io.py @@ -64,7 +64,7 @@ def _put_field_to_mapping( elif isinstance(field, enum.Enum): out[name] = field.name elif hasattr(field, 'seconds') and hasattr(field, 'nanos') and hasattr(field, 'as_str') and callable(field.as_str): - # This quack *exactly* as a Timestamp! + # This quacks *exactly* as a Timestamp! out[name] = field.as_str() else: raise ValueError(f'Unexpected field {field}') diff --git a/src/pyphetools/pp/v202/__init__.py b/src/pyphetools/pp/v202/__init__.py index d81a1412..317e7333 100644 --- a/src/pyphetools/pp/v202/__init__.py +++ b/src/pyphetools/pp/v202/__init__.py @@ -40,5 +40,5 @@ 'Gene', 'Text', 'Number', 'IndefiniteRange', 'DefiniteRange', 'SimpleInterval', 'SequenceInterval', 'SequenceLocation', 'SequenceState', 'LiteralSequenceExpression', 'DerivedSequenceExpression', 'RepeatedSequenceExpression', 'CytobandInterval', 'ChromosomeLocation', 'Allele', 'Haplotype', 'CopyNumber', - 'VariationSet', + 'VariationSet', 'Variation', ] diff --git a/src/pyphetools/pp/v202/_vrs.py b/src/pyphetools/pp/v202/_vrs.py index 93c34b72..dff9e55b 100644 --- a/src/pyphetools/pp/v202/_vrs.py +++ b/src/pyphetools/pp/v202/_vrs.py @@ -124,9 +124,12 @@ class Number(MessageMixin): def __init__( self, - value: int, + value: typing.Union[int, str], ): - self._value = value + if isinstance(value, str): + self._value = int(value) + else: + self._value = value @property def value(self) -> int: @@ -390,10 +393,19 @@ def __repr__(self): class SequenceInterval(MessageMixin): - # TODO: this is a hard nut to crack - _ONEOF_START_END_VALUES = { - 'number': Number, 'indefinite_range': IndefiniteRange, 'definite_range': DefiniteRange, - } + """ + `SequenceInterval` is a complicated case which is + """ + _ONE_OF_START_FIELDS = ('start_number', 'start_indefinite_range', 'start_definite_range') + _ONE_OF_END_FIELDS = ('end_number', 'end_indefinite_range', 'end_definite_range') + + # `SequenceInterval` is a degenerate case, because `start` and `end` oneof fields consist of the same value types. + # Protobuf solves this by appending the type to field name. + # For instance, it stores `start` in one of `startNumber`, `startIndefiniteRange`, `startDefiniteRange` + # depending on the type, and `end` as `endNumber`, `endIndefiniteRange`, `endDefiniteRange` + # for the other field., + # + # We will do the same for the purpose of (de)serialization. def __init__( self, @@ -403,26 +415,124 @@ def __init__( self._start = start self._end = end + @property + def start(self) -> typing.Union[Number, IndefiniteRange, DefiniteRange]: + return self._start + + @property + def start_number(self) -> typing.Optional[Number]: + return self._start if isinstance(self._start, Number) else None + + @start_number.setter + def start_number(self, value: Number): + self._start = value + + @property + def start_indefinite_range(self) -> typing.Optional[IndefiniteRange]: + return self._start if isinstance(self._start, IndefiniteRange) else None + + @start_indefinite_range.setter + def start_indefinite_range(self, value: IndefiniteRange): + self._start = value + + @property + def start_definite_range(self) -> typing.Optional[DefiniteRange]: + return self._start if isinstance(self._start, DefiniteRange) else None + + @start_definite_range.setter + def start_definite_range(self, value: DefiniteRange): + self._start = value + + @property + def end(self) -> typing.Union[Number, IndefiniteRange, DefiniteRange]: + return self._end + + @property + def end_number(self) -> typing.Optional[Number]: + return self._end if isinstance(self._end, Number) else None + + @end_number.setter + def end_number(self, value: Number): + self._end = value + + @property + def end_indefinite_range(self) -> typing.Optional[IndefiniteRange]: + return self._end if isinstance(self._end, IndefiniteRange) else None + + @end_indefinite_range.setter + def end_indefinite_range(self, value: IndefiniteRange): + self._end = value + + @property + def end_definite_range(self) -> typing.Optional[DefiniteRange]: + return self._end if isinstance(self._end, DefiniteRange) else None + + @end_definite_range.setter + def end_definite_range(self, value: DefiniteRange): + self._end = value + @staticmethod def field_names() -> typing.Iterable[str]: - return 'start', 'end' + raise NotImplementedError('Should not be called!') @classmethod def required_fields(cls) -> typing.Sequence[str]: raise NotImplementedError('Should not be called!') + def to_dict(self, out: typing.MutableMapping[str, typing.Any]): + # A rather verbose implementation. + + # start + start = {} + self._start.to_dict(start) + if isinstance(self._start, Number): + name = 'start_number' + elif isinstance(self._start, IndefiniteRange): + name = 'start_indefinite_range' + elif isinstance(self._start, DefiniteRange): + name = 'start_definite_range' + else: + raise ValueError('Bug') + out[name] = start + + # end + end = {} + self._end.to_dict(end) + if isinstance(self._end, Number): + name = 'end_number' + elif isinstance(self._end, IndefiniteRange): + name = 'end_indefinite_range' + elif isinstance(self._end, DefiniteRange): + name = 'end_definite_range' + else: + raise ValueError('Bug') + out[name] = end + @classmethod def from_dict(cls, values: typing.Mapping[str, typing.Any]): - if any(field in values for field in cls._ONEOF_START_END_VALUES): - # TODO: this is a hard nut to crack + if any(f in values for f in cls._ONE_OF_START_FIELDS) \ + and any(f in values for f in cls._ONE_OF_END_FIELDS): + if 'start_number' in values: + start = extract_message_scalar('start_number', Number, values) + elif 'start_indefinite_range' in values: + start = extract_message_scalar('start_indefinite_range', IndefiniteRange, values) + elif 'start_definite_range' in values: + start = extract_message_scalar('start_definite_range', DefiniteRange, values) + else: + raise ValueError('Bug') + + if 'end_number' in values: + end = extract_message_scalar('end_number', Number, values) + elif 'end_indefinite_range' in values: + end = extract_message_scalar('end_indefinite_range', IndefiniteRange, values) + elif 'end_definite_range' in values: + end = extract_message_scalar('end_definite_range', DefiniteRange, values) + else: + raise ValueError('Bug') + return SequenceInterval( - # This is a degenerate case, because `start` and `end` oneof fields consist of the same values. - # Protobuf solves this by appending the type to field name. - # For instance, it stores `start` in one of `startNumber`, `startIndefiniteRange`, `startDefiniteRange` - # depending on the type, and `end` as `endNumber`, `endIndefiniteRange`, `endDefiniteRange` - # for the other field. - start=extract_oneof_scalar(cls._ONEOF_START_END_VALUES, values), - end=extract_oneof_scalar(cls._ONEOF_START_END_VALUES, values), + start=start, + end=end, ) else: raise ValueError(f'Missing one of required fields: `assay, value|complex_value` {values}') @@ -459,8 +569,8 @@ def message_type(cls) -> typing.Type[Message]: @classmethod def from_message(cls, msg: Message): - # TODO: - pass + print(msg) + raise NotImplementedError() # TODO: implement def __eq__(self, other): return isinstance(other, SequenceInterval) \ @@ -676,7 +786,7 @@ class DerivedSequenceExpression(MessageMixin): def __init__( self, location: SequenceLocation, - reverse_complement: bool, + reverse_complement: bool = False, ): self._location = location self._reverse_complement = reverse_complement @@ -703,14 +813,14 @@ def field_names() -> typing.Iterable[str]: @classmethod def required_fields(cls) -> typing.Sequence[str]: - return 'location', 'reverse_complement' + return 'location', @classmethod def from_dict(cls, values: typing.Mapping[str, typing.Any]): if cls._all_required_fields_are_present(values): return DerivedSequenceExpression( location=extract_message_scalar('location', SequenceLocation, values), - reverse_complement=values['reverse_complement'], + reverse_complement=values['reverse_complement'] if 'reverse_complement' in values else False, ) else: cls._complain_about_missing_field(values) @@ -1634,6 +1744,12 @@ def variation_set(self): @variation_set.setter def variation_set(self, value): + """ + Set the :class:`VariationSet` as the value. Setting value is a no-op if `value` + is not an instance of :class:`VariationSet`. + + Note, there is no type annotation on this method, but it should + """ if isinstance(value, VariationSet): self._value = value diff --git a/src/pyphetools/pp/v202/_vrsatile.py b/src/pyphetools/pp/v202/_vrsatile.py index 1a514b76..d8ab91ea 100644 --- a/src/pyphetools/pp/v202/_vrsatile.py +++ b/src/pyphetools/pp/v202/_vrsatile.py @@ -390,7 +390,7 @@ def __init__( self, id: str, molecule_context: MoleculeContext, - variation: typing.Optional[Variation] = None, + # variation: typing.Optional[Variation] = None, label: typing.Optional[str] = None, description: typing.Optional[str] = None, gene_context: typing.Optional[GeneDescriptor] = None, @@ -405,7 +405,7 @@ def __init__( ): self._id = id self._molecule_context = molecule_context - self._variation = variation + # self._variation = variation self._label = label self._description = description self._gene_context = gene_context @@ -434,17 +434,17 @@ def molecule_context(self) -> MoleculeContext: def molecule_context(self, value: MoleculeContext): self._molecule_context = value - @property - def variation(self) -> typing.Optional[Variation]: - return self._variation - - @variation.setter - def variation(self, value: Variation): - self._variation = value - - @variation.deleter - def variation(self): - self._variation = None + # @property + # def variation(self) -> typing.Optional[Variation]: + # return self._variation + # + # @variation.setter + # def variation(self, value: Variation): + # self._variation = value + # + # @variation.deleter + # def variation(self): + # self._variation = None @property def label(self) -> typing.Optional[str]: @@ -549,7 +549,8 @@ def allelic_state(self): @staticmethod def field_names() -> typing.Iterable[str]: return ( - 'id', 'molecule_context', 'variation', 'label', 'description', 'gene_context', 'expressions', 'vcf_record', + 'id', 'molecule_context', # 'variation', + 'label', 'description', 'gene_context', 'expressions', 'vcf_record', 'xrefs', 'alternate_labels', 'extensions', 'structural_type', 'vrs_ref_allele_seq', 'allelic_state', ) @@ -563,7 +564,7 @@ def from_dict(cls, values: typing.Mapping[str, typing.Any]): return VariationDescriptor( id=values['id'], molecule_context=MessageMixin._extract_enum_field('molecule_context', MoleculeContext, values), - variation=extract_message_scalar('variation', Variation, values), + # variation=extract_message_scalar('variation', Variation, values), label=values.get('label', None), description=values.get('description', None), gene_context=extract_message_scalar('gene_context', GeneDescriptor, values), @@ -585,8 +586,8 @@ def to_message(self) -> Message: molecule_context=pp202.MoleculeContext.Value(self._molecule_context.name), ) - if self._variation is not None: - vd.variation.CopyFrom(self._variation.to_message()) + # if self._variation is not None: + # vd.variation.CopyFrom(self._variation.to_message()) if self._label is not None: vd.label = self._label @@ -627,7 +628,7 @@ def from_message(cls, msg: Message): return VariationDescriptor( id=msg.id, molecule_context=MoleculeContext(msg.molecule_context), - variation=extract_pb_message_scalar('variation', Variation, msg), + # variation=extract_pb_message_scalar('variation', Variation, msg), label=None if msg.label == '' else msg.label, description=None if msg.description == '' else msg.description, gene_context=extract_pb_message_scalar('gene_context', GeneDescriptor, msg), @@ -644,10 +645,10 @@ def from_message(cls, msg: Message): cls.complain_about_incompatible_msg_type(msg) def __eq__(self, other): + # and self._variation == other._variation \ return isinstance(other, VariationDescriptor) \ and self._id == other._id \ and self._molecule_context == other._molecule_context \ - and self._variation == other._variation \ and self._label == other._label \ and self._description == other._description \ and self._gene_context == other._gene_context \ @@ -661,10 +662,10 @@ def __eq__(self, other): and self._allelic_state == other._allelic_state def __repr__(self): + # f'variation={self._variation}, ' \ return 'VariationDescriptor(' \ f'id={self._id}, ' \ f'molecule_context={self._molecule_context}, ' \ - f'variation={self._variation}, ' \ f'label={self._label}, ' \ f'description={self._description}, ' \ f'gene_context={self._gene_context}, ' \ diff --git a/test/data/pp/retinoblastoma.pb b/test/data/pp/retinoblastoma.pb index ad894fc5786c4f638f5e3d8bac44dc3d86dbcd24..c86d3745a4a9aa7073989d121d4f1a7112b03f32 100644 GIT binary patch delta 25 hcmeB??vdW`mu2!KPMyt~teGs6|FY<8-p6^4832F{39|qI delta 25 hcmeB??vdW`mu2!KPMyt~teGs6dAM{o@8dkj3;=kq2*>~c diff --git a/test/pp/conftest.py b/test/pp/conftest.py index 8197b24e..538b6502 100644 --- a/test/pp/conftest.py +++ b/test/pp/conftest.py @@ -156,7 +156,21 @@ def interpretations() -> typing.Sequence[Interpretation]: variation_descriptor=VariationDescriptor( id='example-cnv', molecule_context=MoleculeContext.genomic, - # TODO: variation + # variation=Variation( + # variation=CopyNumber( + # subject=DerivedSequenceExpression( + # location=SequenceLocation( + # sequence_id='refseq:NC_000013.14', + # interval=SequenceInterval( + # start=Number(value=25_981_249), + # end=Number(value=61_706_822), + # ) + # ), + # reverse_complement=False, + # ), + # copies=Number(value=1), + # ) + # ), extensions=( Extension(name='mosaicism', value='40.0%'), ), @@ -172,7 +186,20 @@ def interpretations() -> typing.Sequence[Interpretation]: variation_descriptor=VariationDescriptor( id='rs121913300', molecule_context=MoleculeContext.genomic, - # TODO: variation + # variation=Variation( + # variation=Allele( + # location=SequenceLocation( + # sequence_id='refseq:NC_000013.11', + # interval=SequenceInterval( + # start=Number(value=48_367_511), + # end=Number(value=48_367_512), + # ) + # ), + # state=LiteralSequenceExpression( + # sequence='T', + # ) + # ) + # ), label='RB1 c.958C>T (p.Arg320Ter)', gene_context=GeneDescriptor(value_id='HGNC:9884', symbol='RB1'), expressions=( From dcd87d8b2fd50abaa22ac8cfa664b28c9bb8021b Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Apr 2024 17:17:20 -0400 Subject: [PATCH 13/13] Add documentation. --- src/pyphetools/pp/__init__.py | 117 ++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/src/pyphetools/pp/__init__.py b/src/pyphetools/pp/__init__.py index 108561af..8b8a8f33 100644 --- a/src/pyphetools/pp/__init__.py +++ b/src/pyphetools/pp/__init__.py @@ -1,5 +1,122 @@ """ A package with strongly typed Phenopacket Schema types and the code for I/O and validation. + +Examples +^^^^^^^^ + +Create phenopacket programatically +################################## + +We recommend to bring the classes into scope all at once using the import star: + +>>> from pyphetools.pp.v202 import * + +Then, we can build a phenopacket from the individual building blocks. + +Let's start with the subject: + +>>> subject = Individual( +... id='proband A', +... time_at_last_encounter=TimeElement( +... element=Age(iso8601duration='P6M'), +... ), +... sex=Sex.FEMALE, +... ) +>>> subject.id +'proband A' +>>> subject.sex.name +'FEMALE' + +The created subject represents a female proband who had 6 months at the time of the last encounter. + +We can update the fields using a simple assignment: + +>>> subject.karyotypic_sex = KaryotypicSex.XX +>>> subject.karyotypic_sex.name +'XX' + +We assigned an enum constant `KaryotypicSex.XX` to previously unset `karyotypic_sex` attribute. + + +The same can be done with object attributes: + +>>> subject.vital_status = VitalStatus( +... status=VitalStatus.Status.DECEASED, +... time_of_death=TimeElement( +... element=Age(iso8601duration='P1Y') +... ), +... cause_of_death=OntologyClass( +... id='NCIT:C7541', label='Retinoblastoma', +... ), +... ) + +We set the vital status to indicate that the proband died at 1 year of age due to *Retinoblastoma*. + +Now we can create a phenopacket. The phenopacket requires an identifier, `MetaData` and an optional subject. + +>>> pp = Phenopacket( +... id='example.retinoblastoma.phenopacket.id', +... meta_data=MetaData( +... created=Timestamp.from_str('2021-05-14T10:35:00Z'), +... created_by='anonymous biocurator', +... ), +... ) + +To create a phenopacket, we must provide the `id` and `meta_data` fields +since they are required by the Phenopacket Schema. +The same applies to `created` and `created_by` fields of `MetaData`. + +`MetaData` contextualizes the used ontology classes, such as `NCIT:C7541` *Retinoblastoma*, +to a particular ontology, such as NCI Thesaurus. We can store the ontology resource in `MetaData.resources` +field: + +>>> pp.meta_data.resources.append( +... Resource( +... id='ncit', name='NCI Thesaurus', url='http://purl.obolibrary.org/obo/ncit.owl', +... version='23.09d', namespace_prefix='NCIT', iri_prefix='http://purl.obolibrary.org/obo/NCIT_', +... ), +... ) + +All repeated elements, such as `MetaData.resources`, can be accessed via a `list`. + +Read/write JSON and Protobuf +############################ + +We can read and write phenopackets in JSON format using the `JsonDeserializer` and `JsonSerializer` classes: + +>>> from pyphetools.pp.parse.json import JsonSerializer, JsonDeserializer +>>> serializer = JsonSerializer() + +The serializer can write a Phenopacket Schema building block, such as `OntologyClass` or `Phenopacket` into +a file handle: + +>>> from io import StringIO +>>> buf = StringIO() +>>> serializer.serialize(subject.vital_status, buf) +>>> buf.getvalue() +'{"status": "DECEASED", "timeOfDeath": {"age": {"iso8601duration": "P1Y"}}, "causeOfDeath": {"id": "NCIT:C7541", "label": "Retinoblastoma"}}' + +and the JSON can be read back from a file handle: + +>>> _ = buf.seek(0) # Rewind and ignore the result +>>> deserializer = JsonDeserializer() +>>> decoded = deserializer.deserialize(buf, VitalStatus) +>>> decoded == subject.vital_status +True + +The building block can also be written into Protobuf wire format. +We can do a similar round-trip as above, but we will need a byte IO handle: + +>>> from io import BytesIO +>>> byte_buf = BytesIO() + +We can write the subject into the buffer and get the same data back: + +>>> subject.dump_pb(byte_buf) +>>> _ = byte_buf.seek(0) # Rewind to start +>>> other = Individual.from_pb(byte_buf) +>>> subject == other +True """ from . import parse