Fixup typing inconsitencies in mouse_id and ophys_session_id.

isort + black.
AllenInstitute · Jan 17, 2023 · 7d3889d · 7d3889d
1 parent a18646a
commit 7d3889d
Show file tree

Hide file tree

Showing 4 changed files with 469 additions and 343 deletions.
diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache/tables/sessions_table.py b/allensdk/brain_observatory/behavior/behavior_project_cache/tables/sessions_table.py
@@ -1,53 +1,55 @@
 import re
-from typing import Optional, List, Dict
+from typing import Dict, List, Optional
 
 import pandas as pd
-
-from allensdk.brain_observatory.behavior.behavior_project_cache.tables \
-    .ophys_sessions_table import \
-    BehaviorOphysSessionsTable
-from allensdk.brain_observatory.behavior.behavior_project_cache.tables \
-    .util.prior_exposure_processing import \
-    get_prior_exposures_to_session_type, get_prior_exposures_to_image_set, \
-    get_prior_exposures_to_omissions
-from allensdk.brain_observatory.behavior.behavior_project_cache.tables \
-    .project_table import \
-    ProjectTable
-from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io import BehaviorProjectLimsApi  # noqa: E501
-
+from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io import (  # noqa: E501
+    BehaviorProjectLimsApi,
+)
+from allensdk.brain_observatory.behavior.behavior_project_cache.tables.ophys_mixin import (  # noqa: E501
+    OphysMixin,
+)
+from allensdk.brain_observatory.behavior.behavior_project_cache.tables.ophys_sessions_table import (  # noqa: E501
+    BehaviorOphysSessionsTable,
+)
+from allensdk.brain_observatory.behavior.behavior_project_cache.tables.project_table import (  # noqa: E501
+    ProjectTable,
+)
+from allensdk.brain_observatory.behavior.behavior_project_cache.tables.util.prior_exposure_processing import (  # noqa: E501
+    get_prior_exposures_to_image_set,
+    get_prior_exposures_to_omissions,
+    get_prior_exposures_to_session_type,
+)
 from allensdk.brain_observatory.behavior.data_files import BehaviorStimulusFile
 from allensdk.brain_observatory.behavior.data_objects import StimulusTimestamps
 from allensdk.brain_observatory.behavior.data_objects.licks import Licks
-
-from allensdk.brain_observatory.behavior.data_objects.metadata\
-    .subject_metadata.full_genotype import \
-    FullGenotype
-
-from allensdk.brain_observatory.behavior.data_objects.metadata\
-    .subject_metadata.reporter_line import \
-    ReporterLine
+from allensdk.brain_observatory.behavior.data_objects.metadata.subject_metadata.full_genotype import (  # noqa: E501
+    FullGenotype,
+)
+from allensdk.brain_observatory.behavior.data_objects.metadata.subject_metadata.reporter_line import (  # noqa: E501
+    ReporterLine,
+)
 from allensdk.brain_observatory.behavior.data_objects.rewards import Rewards
-from allensdk.brain_observatory.behavior.data_objects.trials.trials import \
-    Trials
+from allensdk.brain_observatory.behavior.data_objects.trials.trials import (
+    Trials,
+)
 from allensdk.core.auth_config import LIMS_DB_CREDENTIAL_MAP
 from allensdk.internal.api import db_connection_creator
-from allensdk.internal.brain_observatory.util.multi_session_utils import \
-    multiprocessing_helper
-from allensdk.brain_observatory.behavior.behavior_project_cache.tables\
-    .ophys_mixin import \
-    OphysMixin
+from allensdk.internal.brain_observatory.util.multi_session_utils import (
+    multiprocessing_helper,
+)
 
 
 class SessionsTable(ProjectTable, OphysMixin):
     """Class for storing and manipulating project-level data
     at the session level"""
 
     def __init__(
-            self, df: pd.DataFrame,
-            fetch_api: BehaviorProjectLimsApi,
-            suppress: Optional[List[str]] = None,
-            ophys_session_table: Optional[BehaviorOphysSessionsTable] = None,
-            include_trial_metrics: bool = False
+        self,
+        df: pd.DataFrame,
+        fetch_api: BehaviorProjectLimsApi,
+        suppress: Optional[List[str]] = None,
+        ophys_session_table: Optional[BehaviorOphysSessionsTable] = None,
+        include_trial_metrics: bool = False,
     ):
         """
         Parameters
@@ -73,73 +75,91 @@ def __init__(
 
     def postprocess_additional(self):
         # Add subject metadata
-        self._df['reporter_line'] = self._df['reporter_line'].apply(
-            ReporterLine.parse)
-        self._df['cre_line'] = self._df['full_genotype'].apply(
-            lambda x: FullGenotype(x).parse_cre_line())
-        self._df['indicator'] = self._df['reporter_line'].apply(
-            lambda x: ReporterLine(x).parse_indicator())
+        self._df["reporter_line"] = self._df["reporter_line"].apply(
+            ReporterLine.parse
+        )
+        self._df["cre_line"] = self._df["full_genotype"].apply(
+            lambda x: FullGenotype(x).parse_cre_line()
+        )
+        self._df["indicator"] = self._df["reporter_line"].apply(
+            lambda x: ReporterLine(x).parse_indicator()
+        )
 
         # add session number
         self.__add_session_number()
 
         # add prior exposure
-        self._df['prior_exposures_to_session_type'] = \
-            get_prior_exposures_to_session_type(df=self._df)
-        self._df['prior_exposures_to_image_set'] = \
-            get_prior_exposures_to_image_set(df=self._df)
-        self._df['prior_exposures_to_omissions'] = \
-            get_prior_exposures_to_omissions(df=self._df,
-                                             fetch_api=self._fetch_api)
+        self._df[
+            "prior_exposures_to_session_type"
+        ] = get_prior_exposures_to_session_type(df=self._df)
+        self._df[
+            "prior_exposures_to_image_set"
+        ] = get_prior_exposures_to_image_set(df=self._df)
+        self._df[
+            "prior_exposures_to_omissions"
+        ] = get_prior_exposures_to_omissions(
+            df=self._df, fetch_api=self._fetch_api
+        )
 
         if self._include_trial_metrics:
             # add trial metrics
             trial_metrics = multiprocessing_helper(
                 target=self._get_trial_metrics_helper,
                 behavior_session_ids=self._df.index.tolist(),
                 lims_engine=db_connection_creator(
-                    fallback_credentials=LIMS_DB_CREDENTIAL_MAP),
-                progress_bar_title='Getting trial metrics for each session'
+                    fallback_credentials=LIMS_DB_CREDENTIAL_MAP
+                ),
+                progress_bar_title="Getting trial metrics for each session",
             )
             trial_metrics = pd.DataFrame(trial_metrics).set_index(
-                'behavior_session_id')
+                "behavior_session_id"
+            )
             self._df = self._df.merge(
-                trial_metrics,
-                left_index=True,
-                right_index=True)
+                trial_metrics, left_index=True, right_index=True
+            )
 
         # Add data from ophys session
         if self._ophys_session_table is not None:
             # Merge in ophys data
-            self._df = self._df.reset_index() \
-                .merge(self._ophys_session_table.table.reset_index(),
-                       on='behavior_session_id',
-                       how='left',
-                       suffixes=('_behavior', '_ophys'))
-            self._df = self._df.set_index('behavior_session_id')
+            self._df = self._df.reset_index().merge(
+                self._ophys_session_table.table.reset_index(),
+                on="behavior_session_id",
+                how="left",
+                suffixes=("_behavior", "_ophys"),
+            )
+            self._df = self._df.set_index("behavior_session_id")
 
             # Prioritize behavior date_of_acquisition
-            self._df['date_of_acquisition'] = \
-                self._df['date_of_acquisition_behavior']
-            self._df = self._df.drop(['date_of_acquisition_behavior',
-                                      'date_of_acquisition_ophys'], axis=1)
+            self._df["date_of_acquisition"] = self._df[
+                "date_of_acquisition_behavior"
+            ]
+            self._df = self._df.drop(
+                ["date_of_acquisition_behavior", "date_of_acquisition_ophys"],
+                axis=1,
+            )
+            # Enforce an integer type on due to there not being a value for
+            # ophys_session_id for every behavior_session. Pandas defaults to
+            # NaN here, changing the type to float unless otherwise fixed.
+            self._df["ophys_session_id"] = self._df["ophys_session_id"].astype(
+                "Int64"
+            )
 
     def __add_session_number(self):
         """Parses session number from session type and and adds to dataframe"""
 
         def parse_session_number(session_type: str):
             """Parse the session number from session type"""
-            match = re.match(r'OPHYS_(?P<session_number>\d+)',
-                             session_type)
+            match = re.match(r"OPHYS_(?P<session_number>\d+)", session_type)
             if match is None:
                 return None
-            return int(match.group('session_number'))
+            return int(match.group("session_number"))
 
-        session_type = self._df['session_type']
+        session_type = self._df["session_type"]
         session_type = session_type[session_type.notnull()]
 
-        self._df.loc[session_type.index, 'session_number'] = \
-            session_type.apply(parse_session_number)
+        self._df.loc[
+            session_type.index, "session_number"
+        ] = session_type.apply(parse_session_number)
 
     @staticmethod
     def _get_trial_metrics_helper(*args) -> Dict:
@@ -148,35 +168,33 @@ def _get_trial_metrics_helper(*args) -> Dict:
         behavior_session_id, db_conn = args[0]
 
         stimulus_file = BehaviorStimulusFile.from_lims(
-            behavior_session_id=behavior_session_id,
-            db=db_conn
+            behavior_session_id=behavior_session_id, db=db_conn
         )
         stimulus_timestamps = StimulusTimestamps.from_stimulus_file(
-            stimulus_file=stimulus_file,
-            monitor_delay=0.0
+            stimulus_file=stimulus_file, monitor_delay=0.0
         )
 
         trials = Trials.from_stimulus_file(
             stimulus_file=stimulus_file,
             stimulus_timestamps=stimulus_timestamps,
             licks=Licks.from_stimulus_file(
                 stimulus_file=stimulus_file,
-                stimulus_timestamps=stimulus_timestamps
+                stimulus_timestamps=stimulus_timestamps,
             ),
             rewards=Rewards.from_stimulus_file(
                 stimulus_file=stimulus_file,
-                stimulus_timestamps=stimulus_timestamps
-            )
+                stimulus_timestamps=stimulus_timestamps,
+            ),
         )
 
         return {
-            'behavior_session_id': behavior_session_id,
-            'trial_count': trials.trial_count,
-            'go_trial_count': trials.go_trial_count,
-            'catch_trial_count': trials.catch_trial_count,
-            'hit_trial_count': trials.hit_trial_count,
-            'miss_trial_count': trials.miss_trial_count,
-            'false_alarm_trial_count': trials.false_alarm_trial_count,
-            'correct_reject_trial_count': trials.correct_reject_trial_count,
-            'engaged_trial_count': trials.get_engaged_trial_count()
+            "behavior_session_id": behavior_session_id,
+            "trial_count": trials.trial_count,
+            "go_trial_count": trials.go_trial_count,
+            "catch_trial_count": trials.catch_trial_count,
+            "hit_trial_count": trials.hit_trial_count,
+            "miss_trial_count": trials.miss_trial_count,
+            "false_alarm_trial_count": trials.false_alarm_trial_count,
+            "correct_reject_trial_count": trials.correct_reject_trial_count,
+            "engaged_trial_count": trials.get_engaged_trial_count(),
         }
diff --git a/allensdk/brain_observatory/behavior/data_objects/metadata/subject_metadata/mouse_id.py b/allensdk/brain_observatory/behavior/data_objects/metadata/subject_metadata/mouse_id.py
@@ -1,26 +1,37 @@
-from pynwb import NWBFile
-
-from allensdk.core import DataObject
-from allensdk.core import \
-    JsonReadableInterface, LimsReadableInterface, NwbReadableInterface
+from allensdk.core import (
+    DataObject,
+    JsonReadableInterface,
+    LimsReadableInterface,
+    NwbReadableInterface,
+)
 from allensdk.internal.api import PostgresQueryMixin
+from pynwb import NWBFile
 
 
-class MouseId(DataObject, LimsReadableInterface, JsonReadableInterface,
-              NwbReadableInterface):
+class MouseId(
+    DataObject,
+    LimsReadableInterface,
+    JsonReadableInterface,
+    NwbReadableInterface,
+):
     """the LabTracks ID"""
-    def __init__(self, mouse_id: int):
+
+    def __init__(self, mouse_id: str):
         super().__init__(name="mouse_id", value=mouse_id)
 
     @classmethod
     def from_json(cls, dict_repr: dict) -> "MouseId":
-        mouse_id = dict_repr['external_specimen_name']
-        mouse_id = int(mouse_id)
+        mouse_id = dict_repr["external_specimen_name"]
+        # Check to make sure the dictionary value is string type and if not
+        # make it so.
+        if not isinstance(mouse_id, str):
+            mouse_id = str(mouse_id)
         return cls(mouse_id=mouse_id)
 
     @classmethod
-    def from_lims(cls, behavior_session_id: int,
-                  lims_db: PostgresQueryMixin) -> "MouseId":
+    def from_lims(
+        cls, behavior_session_id: int, lims_db: PostgresQueryMixin
+    ) -> "MouseId":
         # TODO: Should this even be included?
         # Found sometimes there were entries with NONE which is
         # why they are filtered out; also many entries in the table
@@ -33,9 +44,9 @@ def from_lims(cls, behavior_session_id: int,
             WHERE bs.id={behavior_session_id}
             AND sp.external_specimen_name IS NOT NULL;
             """
-        mouse_id = int(lims_db.fetchone(query, strict=True))
+        mouse_id = lims_db.fetchone(query, strict=True)
         return cls(mouse_id=mouse_id)
 
     @classmethod
     def from_nwb(cls, nwbfile: NWBFile) -> "MouseId":
-        return cls(mouse_id=int(nwbfile.subject.subject_id))
+        return cls(mouse_id=nwbfile.subject.subject_id)