From 11118f6f9df2f8d87d9ec5243a57e94b934ed2c7 Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Fri, 13 Sep 2024 15:04:16 -0400 Subject: [PATCH 1/3] Add spatial presence matrices and use in ExperimentAxisQuery This adds dataframes for creating a join table between the scenes and observations/variables. New methods are added to the `ExperimentAxisQuery` class for getting a Arrow array of scene names that relate to the obs or var in the query. --- python-spec/src/somacore/experiment.py | 11 ++++++ python-spec/src/somacore/measurement.py | 11 ++++++ python-spec/src/somacore/query/query.py | 46 +++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/python-spec/src/somacore/experiment.py b/python-spec/src/somacore/experiment.py index bb0d7c65..a0732064 100644 --- a/python-spec/src/somacore/experiment.py +++ b/python-spec/src/somacore/experiment.py @@ -66,6 +66,17 @@ class Experiment( spatial = _mixin.item[_SceneColl]() # TODO: Discuss the name of this element. """A collection of named spatial scenes.""" + obs_spatial_presence = _mixin.item[_DF]() + """A dataframe that stores the presence of obs in the spatial scenes. + + This provides a join table for the obs ``soma_joinid`` and the scene names used in + the ``spatial`` collection. This dataframe must contain dimensions ``soma_joinid`` + and ``scene_id``. The ``scene_id`` dimension must be a ``string`` dimension. The + dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are + ``True`` if the obs with obsid ``soma_joinid`` is contained in the scene + ``scene_id`` and ``False`` otherwise. + """ + def axis_query( self, measurement_name: str, diff --git a/python-spec/src/somacore/measurement.py b/python-spec/src/somacore/measurement.py index e7d0c9f4..5bb692f3 100644 --- a/python-spec/src/somacore/measurement.py +++ b/python-spec/src/somacore/measurement.py @@ -99,3 +99,14 @@ class Measurement( This is indexed by ``[varid_1, varid_2]``. """ + + var_spatial_presence = _mixin.item[_DF]() + """A dataframe that stores the presence of var in the spatial scenes. + + This provides a join tabel for the var ``soma_joinid`` and the scene names used in + the ``spatial`` collection. This dataframe must contain dimensions ``soma_joinid`` + and ``scene_id``. The ``scene_id`` dimension must be a ``string`` dimension. The + dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are + ``True`` if the var with varid ``soma_joinid`` is contained in scene with name + ``scene_id`` and ``False`` otherwise. + """ diff --git a/python-spec/src/somacore/query/query.py b/python-spec/src/somacore/query/query.py index ac22792b..947d8b0a 100644 --- a/python-spec/src/somacore/query/query.py +++ b/python-spec/src/somacore/query/query.py @@ -21,6 +21,7 @@ import numpy.typing as npt import pandas as pd import pyarrow as pa +import pyarrow.compute as pacomp from scipy import sparse from typing_extensions import Literal, Protocol, Self, TypedDict @@ -267,6 +268,48 @@ def varm(self, layer: str) -> data.SparseRead: """ return self._axism_inner(_Axis.VAR, layer) + def obs_scene_ids(self) -> pa.Array: + """Returns a pyarrow array with scene ids that contain obs from this + query. + + Lifecycle: experimental + """ + try: + obs_scene = self.experiment.obs_spatial_presence + except KeyError as ke: + raise KeyError("Missing obs_scene") from ke + if not isinstance(obs_scene, data.DataFrame): + raise TypeError("obs_scene must be a dataframe.") + + full_table = obs_scene.read( + coords=((_Axis.OBS.getattr_from(self._joinids), slice(None))), + result_order=options.ResultOrder.COLUMN_MAJOR, + value_filter="data != 0", + ).concat() + + return pacomp.unique(full_table["scene_id"]) + + def var_scene_ids(self) -> pa.Array: + """Return a pyarrow array with scene ids that contain var from this + query. + + Lifecycle: experimental + """ + try: + var_scene = self._ms.var_spatial_presence + except KeyError as ke: + raise KeyError("Missing var_scene") from ke + if not isinstance(var_scene, data.DataFrame): + raise TypeError("var_scene must be a dataframe.") + + full_table = var_scene.read( + coords=((_Axis.OBS.getattr_from(self._joinids), slice(None))), + result_order=options.ResultOrder.COLUMN_MAJOR, + value_filter="data != 0", + ).concat() + + return pacomp.unique(full_table["scene_id"]) + def to_anndata( self, X_name: str, @@ -826,6 +869,9 @@ def obs(self) -> data.DataFrame: ... @property def context(self) -> Optional[base_types.ContextBase]: ... + @property + def obs_spatial_presence(self) -> data.DataFrame: ... + class _HasObsVar(Protocol[_T_co]): """Something which has an ``obs`` and ``var`` field. From f2a025e3be6ebeec043da861ceb4bd5fd9c2314a Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Thu, 19 Sep 2024 15:45:16 -0400 Subject: [PATCH 2/3] Replace "dimension" with "index column" --- python-spec/src/somacore/experiment.py | 4 ++-- python-spec/src/somacore/measurement.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python-spec/src/somacore/experiment.py b/python-spec/src/somacore/experiment.py index a0732064..13b540b4 100644 --- a/python-spec/src/somacore/experiment.py +++ b/python-spec/src/somacore/experiment.py @@ -70,8 +70,8 @@ class Experiment( """A dataframe that stores the presence of obs in the spatial scenes. This provides a join table for the obs ``soma_joinid`` and the scene names used in - the ``spatial`` collection. This dataframe must contain dimensions ``soma_joinid`` - and ``scene_id``. The ``scene_id`` dimension must be a ``string`` dimension. The + the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid`` + and ``scene_id``. The ``scene_id`` column must have type ``string``. The dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are ``True`` if the obs with obsid ``soma_joinid`` is contained in the scene ``scene_id`` and ``False`` otherwise. diff --git a/python-spec/src/somacore/measurement.py b/python-spec/src/somacore/measurement.py index 5bb692f3..4d9c944a 100644 --- a/python-spec/src/somacore/measurement.py +++ b/python-spec/src/somacore/measurement.py @@ -104,8 +104,8 @@ class Measurement( """A dataframe that stores the presence of var in the spatial scenes. This provides a join tabel for the var ``soma_joinid`` and the scene names used in - the ``spatial`` collection. This dataframe must contain dimensions ``soma_joinid`` - and ``scene_id``. The ``scene_id`` dimension must be a ``string`` dimension. The + the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid`` + and ``scene_id``. The ``scene_id`` column must have type ``string``. The dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are ``True`` if the var with varid ``soma_joinid`` is contained in scene with name ``scene_id`` and ``False`` otherwise. From 0f16387ca2fbeae2725ca77821dbd83a49af5ee1 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:00:07 -0400 Subject: [PATCH 3/3] Fix typos and improve clarity in docstrings Co-authored-by: Aaron Wolen --- python-spec/src/somacore/experiment.py | 4 ++-- python-spec/src/somacore/measurement.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python-spec/src/somacore/experiment.py b/python-spec/src/somacore/experiment.py index 13b540b4..b570aa2a 100644 --- a/python-spec/src/somacore/experiment.py +++ b/python-spec/src/somacore/experiment.py @@ -72,8 +72,8 @@ class Experiment( This provides a join table for the obs ``soma_joinid`` and the scene names used in the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid`` and ``scene_id``. The ``scene_id`` column must have type ``string``. The - dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are - ``True`` if the obs with obsid ``soma_joinid`` is contained in the scene + dataframe must contain a ``boolean`` column ``soma_data``. The values of ``soma_data`` are + ``True`` if the obs ``soma_joinid`` is contained in the scene ``scene_id`` and ``False`` otherwise. """ diff --git a/python-spec/src/somacore/measurement.py b/python-spec/src/somacore/measurement.py index 4d9c944a..273b58f2 100644 --- a/python-spec/src/somacore/measurement.py +++ b/python-spec/src/somacore/measurement.py @@ -103,7 +103,7 @@ class Measurement( var_spatial_presence = _mixin.item[_DF]() """A dataframe that stores the presence of var in the spatial scenes. - This provides a join tabel for the var ``soma_joinid`` and the scene names used in + This provides a join table for the var ``soma_joinid`` and the scene names used in the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid`` and ``scene_id``. The ``scene_id`` column must have type ``string``. The dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are