Skip to content

Commit

Permalink
Fixup typing inconsitencies in mouse_id and ophys_session_id.
Browse files Browse the repository at this point in the history
isort + black.
  • Loading branch information
morriscb committed Jan 17, 2023
1 parent a18646a commit 7d3889d
Show file tree
Hide file tree
Showing 4 changed files with 469 additions and 343 deletions.
Original file line number Diff line number Diff line change
@@ -1,53 +1,55 @@
import re
from typing import Optional, List, Dict
from typing import Dict, List, Optional

import pandas as pd

from allensdk.brain_observatory.behavior.behavior_project_cache.tables \
.ophys_sessions_table import \
BehaviorOphysSessionsTable
from allensdk.brain_observatory.behavior.behavior_project_cache.tables \
.util.prior_exposure_processing import \
get_prior_exposures_to_session_type, get_prior_exposures_to_image_set, \
get_prior_exposures_to_omissions
from allensdk.brain_observatory.behavior.behavior_project_cache.tables \
.project_table import \
ProjectTable
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io import BehaviorProjectLimsApi # noqa: E501

from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io import ( # noqa: E501
BehaviorProjectLimsApi,
)
from allensdk.brain_observatory.behavior.behavior_project_cache.tables.ophys_mixin import ( # noqa: E501
OphysMixin,
)
from allensdk.brain_observatory.behavior.behavior_project_cache.tables.ophys_sessions_table import ( # noqa: E501
BehaviorOphysSessionsTable,
)
from allensdk.brain_observatory.behavior.behavior_project_cache.tables.project_table import ( # noqa: E501
ProjectTable,
)
from allensdk.brain_observatory.behavior.behavior_project_cache.tables.util.prior_exposure_processing import ( # noqa: E501
get_prior_exposures_to_image_set,
get_prior_exposures_to_omissions,
get_prior_exposures_to_session_type,
)
from allensdk.brain_observatory.behavior.data_files import BehaviorStimulusFile
from allensdk.brain_observatory.behavior.data_objects import StimulusTimestamps
from allensdk.brain_observatory.behavior.data_objects.licks import Licks

from allensdk.brain_observatory.behavior.data_objects.metadata\
.subject_metadata.full_genotype import \
FullGenotype

from allensdk.brain_observatory.behavior.data_objects.metadata\
.subject_metadata.reporter_line import \
ReporterLine
from allensdk.brain_observatory.behavior.data_objects.metadata.subject_metadata.full_genotype import ( # noqa: E501
FullGenotype,
)
from allensdk.brain_observatory.behavior.data_objects.metadata.subject_metadata.reporter_line import ( # noqa: E501
ReporterLine,
)
from allensdk.brain_observatory.behavior.data_objects.rewards import Rewards
from allensdk.brain_observatory.behavior.data_objects.trials.trials import \
Trials
from allensdk.brain_observatory.behavior.data_objects.trials.trials import (
Trials,
)
from allensdk.core.auth_config import LIMS_DB_CREDENTIAL_MAP
from allensdk.internal.api import db_connection_creator
from allensdk.internal.brain_observatory.util.multi_session_utils import \
multiprocessing_helper
from allensdk.brain_observatory.behavior.behavior_project_cache.tables\
.ophys_mixin import \
OphysMixin
from allensdk.internal.brain_observatory.util.multi_session_utils import (
multiprocessing_helper,
)


class SessionsTable(ProjectTable, OphysMixin):
"""Class for storing and manipulating project-level data
at the session level"""

def __init__(
self, df: pd.DataFrame,
fetch_api: BehaviorProjectLimsApi,
suppress: Optional[List[str]] = None,
ophys_session_table: Optional[BehaviorOphysSessionsTable] = None,
include_trial_metrics: bool = False
self,
df: pd.DataFrame,
fetch_api: BehaviorProjectLimsApi,
suppress: Optional[List[str]] = None,
ophys_session_table: Optional[BehaviorOphysSessionsTable] = None,
include_trial_metrics: bool = False,
):
"""
Parameters
Expand All @@ -73,73 +75,91 @@ def __init__(

def postprocess_additional(self):
# Add subject metadata
self._df['reporter_line'] = self._df['reporter_line'].apply(
ReporterLine.parse)
self._df['cre_line'] = self._df['full_genotype'].apply(
lambda x: FullGenotype(x).parse_cre_line())
self._df['indicator'] = self._df['reporter_line'].apply(
lambda x: ReporterLine(x).parse_indicator())
self._df["reporter_line"] = self._df["reporter_line"].apply(
ReporterLine.parse
)
self._df["cre_line"] = self._df["full_genotype"].apply(
lambda x: FullGenotype(x).parse_cre_line()
)
self._df["indicator"] = self._df["reporter_line"].apply(
lambda x: ReporterLine(x).parse_indicator()
)

# add session number
self.__add_session_number()

# add prior exposure
self._df['prior_exposures_to_session_type'] = \
get_prior_exposures_to_session_type(df=self._df)
self._df['prior_exposures_to_image_set'] = \
get_prior_exposures_to_image_set(df=self._df)
self._df['prior_exposures_to_omissions'] = \
get_prior_exposures_to_omissions(df=self._df,
fetch_api=self._fetch_api)
self._df[
"prior_exposures_to_session_type"
] = get_prior_exposures_to_session_type(df=self._df)
self._df[
"prior_exposures_to_image_set"
] = get_prior_exposures_to_image_set(df=self._df)
self._df[
"prior_exposures_to_omissions"
] = get_prior_exposures_to_omissions(
df=self._df, fetch_api=self._fetch_api
)

if self._include_trial_metrics:
# add trial metrics
trial_metrics = multiprocessing_helper(
target=self._get_trial_metrics_helper,
behavior_session_ids=self._df.index.tolist(),
lims_engine=db_connection_creator(
fallback_credentials=LIMS_DB_CREDENTIAL_MAP),
progress_bar_title='Getting trial metrics for each session'
fallback_credentials=LIMS_DB_CREDENTIAL_MAP
),
progress_bar_title="Getting trial metrics for each session",
)
trial_metrics = pd.DataFrame(trial_metrics).set_index(
'behavior_session_id')
"behavior_session_id"
)
self._df = self._df.merge(
trial_metrics,
left_index=True,
right_index=True)
trial_metrics, left_index=True, right_index=True
)

# Add data from ophys session
if self._ophys_session_table is not None:
# Merge in ophys data
self._df = self._df.reset_index() \
.merge(self._ophys_session_table.table.reset_index(),
on='behavior_session_id',
how='left',
suffixes=('_behavior', '_ophys'))
self._df = self._df.set_index('behavior_session_id')
self._df = self._df.reset_index().merge(
self._ophys_session_table.table.reset_index(),
on="behavior_session_id",
how="left",
suffixes=("_behavior", "_ophys"),
)
self._df = self._df.set_index("behavior_session_id")

# Prioritize behavior date_of_acquisition
self._df['date_of_acquisition'] = \
self._df['date_of_acquisition_behavior']
self._df = self._df.drop(['date_of_acquisition_behavior',
'date_of_acquisition_ophys'], axis=1)
self._df["date_of_acquisition"] = self._df[
"date_of_acquisition_behavior"
]
self._df = self._df.drop(
["date_of_acquisition_behavior", "date_of_acquisition_ophys"],
axis=1,
)
# Enforce an integer type on due to there not being a value for
# ophys_session_id for every behavior_session. Pandas defaults to
# NaN here, changing the type to float unless otherwise fixed.
self._df["ophys_session_id"] = self._df["ophys_session_id"].astype(
"Int64"
)

def __add_session_number(self):
"""Parses session number from session type and and adds to dataframe"""

def parse_session_number(session_type: str):
"""Parse the session number from session type"""
match = re.match(r'OPHYS_(?P<session_number>\d+)',
session_type)
match = re.match(r"OPHYS_(?P<session_number>\d+)", session_type)
if match is None:
return None
return int(match.group('session_number'))
return int(match.group("session_number"))

session_type = self._df['session_type']
session_type = self._df["session_type"]
session_type = session_type[session_type.notnull()]

self._df.loc[session_type.index, 'session_number'] = \
session_type.apply(parse_session_number)
self._df.loc[
session_type.index, "session_number"
] = session_type.apply(parse_session_number)

@staticmethod
def _get_trial_metrics_helper(*args) -> Dict:
Expand All @@ -148,35 +168,33 @@ def _get_trial_metrics_helper(*args) -> Dict:
behavior_session_id, db_conn = args[0]

stimulus_file = BehaviorStimulusFile.from_lims(
behavior_session_id=behavior_session_id,
db=db_conn
behavior_session_id=behavior_session_id, db=db_conn
)
stimulus_timestamps = StimulusTimestamps.from_stimulus_file(
stimulus_file=stimulus_file,
monitor_delay=0.0
stimulus_file=stimulus_file, monitor_delay=0.0
)

trials = Trials.from_stimulus_file(
stimulus_file=stimulus_file,
stimulus_timestamps=stimulus_timestamps,
licks=Licks.from_stimulus_file(
stimulus_file=stimulus_file,
stimulus_timestamps=stimulus_timestamps
stimulus_timestamps=stimulus_timestamps,
),
rewards=Rewards.from_stimulus_file(
stimulus_file=stimulus_file,
stimulus_timestamps=stimulus_timestamps
)
stimulus_timestamps=stimulus_timestamps,
),
)

return {
'behavior_session_id': behavior_session_id,
'trial_count': trials.trial_count,
'go_trial_count': trials.go_trial_count,
'catch_trial_count': trials.catch_trial_count,
'hit_trial_count': trials.hit_trial_count,
'miss_trial_count': trials.miss_trial_count,
'false_alarm_trial_count': trials.false_alarm_trial_count,
'correct_reject_trial_count': trials.correct_reject_trial_count,
'engaged_trial_count': trials.get_engaged_trial_count()
"behavior_session_id": behavior_session_id,
"trial_count": trials.trial_count,
"go_trial_count": trials.go_trial_count,
"catch_trial_count": trials.catch_trial_count,
"hit_trial_count": trials.hit_trial_count,
"miss_trial_count": trials.miss_trial_count,
"false_alarm_trial_count": trials.false_alarm_trial_count,
"correct_reject_trial_count": trials.correct_reject_trial_count,
"engaged_trial_count": trials.get_engaged_trial_count(),
}
Original file line number Diff line number Diff line change
@@ -1,26 +1,37 @@
from pynwb import NWBFile

from allensdk.core import DataObject
from allensdk.core import \
JsonReadableInterface, LimsReadableInterface, NwbReadableInterface
from allensdk.core import (
DataObject,
JsonReadableInterface,
LimsReadableInterface,
NwbReadableInterface,
)
from allensdk.internal.api import PostgresQueryMixin
from pynwb import NWBFile


class MouseId(DataObject, LimsReadableInterface, JsonReadableInterface,
NwbReadableInterface):
class MouseId(
DataObject,
LimsReadableInterface,
JsonReadableInterface,
NwbReadableInterface,
):
"""the LabTracks ID"""
def __init__(self, mouse_id: int):

def __init__(self, mouse_id: str):
super().__init__(name="mouse_id", value=mouse_id)

@classmethod
def from_json(cls, dict_repr: dict) -> "MouseId":
mouse_id = dict_repr['external_specimen_name']
mouse_id = int(mouse_id)
mouse_id = dict_repr["external_specimen_name"]
# Check to make sure the dictionary value is string type and if not
# make it so.
if not isinstance(mouse_id, str):
mouse_id = str(mouse_id)
return cls(mouse_id=mouse_id)

@classmethod
def from_lims(cls, behavior_session_id: int,
lims_db: PostgresQueryMixin) -> "MouseId":
def from_lims(
cls, behavior_session_id: int, lims_db: PostgresQueryMixin
) -> "MouseId":
# TODO: Should this even be included?
# Found sometimes there were entries with NONE which is
# why they are filtered out; also many entries in the table
Expand All @@ -33,9 +44,9 @@ def from_lims(cls, behavior_session_id: int,
WHERE bs.id={behavior_session_id}
AND sp.external_specimen_name IS NOT NULL;
"""
mouse_id = int(lims_db.fetchone(query, strict=True))
mouse_id = lims_db.fetchone(query, strict=True)
return cls(mouse_id=mouse_id)

@classmethod
def from_nwb(cls, nwbfile: NWBFile) -> "MouseId":
return cls(mouse_id=int(nwbfile.subject.subject_id))
return cls(mouse_id=nwbfile.subject.subject_id)
Loading

0 comments on commit 7d3889d

Please sign in to comment.