Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ability to save multi-animal pose tracks to single-animal files #83

Merged
merged 11 commits into from
Nov 16, 2023
Prev Previous commit
Next Next commit
added changes for pull request 83
  • Loading branch information
DhruvSkyy committed Nov 14, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit e164d4f8b251681f70467f4763d8d707f402a6b5
70 changes: 32 additions & 38 deletions movement/io/save_poses.py
Original file line number Diff line number Diff line change
@@ -7,11 +7,12 @@
import xarray as xr

from movement.io.validators import ValidFile
from movement.logging import log_error

logger = logging.getLogger(__name__)


def _xarraytodf(ds: xr.Dataset, columns: pd.MultiIndex) -> pd.DataFrame:
def _xarry_to_dlc_df(ds: xr.Dataset, columns: pd.MultiIndex) -> pd.DataFrame:
"""Takes an xarray dataset and DLC-style multi-index columns and outputs
a pandas dataframe.

@@ -45,7 +46,7 @@ def _xarraytodf(ds: xr.Dataset, columns: pd.MultiIndex) -> pd.DataFrame:

def _auto_split_individuals(ds: xr.Dataset):
"""Returns True if there is only one individual in the dataset,
else returns False.
else returns False.

Parameters
----------
@@ -57,9 +58,8 @@ def _auto_split_individuals(ds: xr.Dataset):
return True if len(individuals) == 1 else False


def _savedataframe(suffix: str, filepath: Path, dataframe: pd.DataFrame):
"""Given the suffix and filepath, will save the dataframe
as either a .h5 or .csv.
def _save_dlc_df(filepath: Path, dataframe: pd.DataFrame):
"""Given a filepath, will save the dataframe as either a .h5 or .csv.

Parameters
----------
@@ -71,15 +71,13 @@ def _savedataframe(suffix: str, filepath: Path, dataframe: pd.DataFrame):
Pandas Dataframe to save to .csv or .h5.
"""

if suffix == ".csv":
if filepath.suffix == ".csv":
dataframe.to_csv(filepath, sep=",")
elif suffix == ".h5":
elif filepath.suffix == ".h5":
dataframe.to_hdf(filepath, key="df_with_missing")
# for invalid suffix
else:
error_msg = "Expected filepath to end in .csv or .h5."
logger.error(error_msg)
raise ValueError(error_msg)
log_error(ValueError, "Expected filepath to end in .csv or .h5.")


def to_dlc_df(
@@ -121,18 +119,18 @@ def to_dlc_df(
to a DeepLabCut-style ".h5" or ".csv" file.
"""
if not isinstance(ds, xr.Dataset):
error_msg = f"Expected an xarray Dataset, but got {type(ds)}. "
logger.error(error_msg)
raise ValueError(error_msg)
log_error(
ValueError, f"Expected an xarray Dataset, but got {type(ds)}."
)

ds.poses.validate() # validate the dataset

scorer = ["movement"]
bodyparts = ds.coords["keypoints"].data.tolist()
coords = ds.coords["space"].data.tolist() + ["likelihood"]
individuals = ds.coords["individuals"].data.tolist()

if split_individuals:
individuals = ds.coords["individuals"].data.tolist()
result = {}

for individual in individuals:
@@ -146,7 +144,7 @@ def to_dlc_df(
)

# Uses the columns and data to make a df
df = _xarraytodf(individual_data, columns)
df = _xarry_to_dlc_df(individual_data, columns)

""" Add the DataFrame to the result
dictionary with individual's name as key """
@@ -160,12 +158,11 @@ def to_dlc_df(
else:
# Create the DLC-style multi-index columns
index_levels = ["scorer", "individuals", "bodyparts", "coords"]
individuals = ds.coords["individuals"].data.tolist()
columns = pd.MultiIndex.from_product(
[scorer, individuals, bodyparts, coords], names=index_levels
)

df = _xarraytodf(ds, columns)
df = _xarry_to_dlc_df(ds, columns)

logger.info("Converted PoseTracks dataset to DLC-style DataFrame.")
return df
@@ -187,25 +184,24 @@ def to_dlc_file(
Path to the file to save the DLC poses to. The file extension
must be either ".h5" (recommended) or ".csv".
split_individuals : bool, optional
Format of the DeepLabcut output file.
- If True, the file will be formatted as in a single-animal
If True, the file will be formatted as in a single-animal
DeepLabCut project: no "individuals" level, and each individual will be
saved in a separate file. The individual's name will be appended to the
file path, just before the file extension, i.e.
"/path/to/filename_individual1.h5".
- If False, the file will be formatted as in a multi-animal
If False, the file will be formatted as in a multi-animal
DeepLabCut project: the columns will include the
"individuals" level and all individuals will be saved to the same file.
- If "auto" the format will be determined based on the number of
If "auto" the format will be determined based on the number of
individuals in the dataset: True if there is only one, and
False if there are more than one. This is the default.

See Also
--------
to_dlc_df : Convert an xarray dataset containing pose tracks into a
DeepLabCut-style pandas DataFrame with multi-index columns
for each individual or a dictionary of DataFrames for each individual
based on the 'split_individuals' argument.
DeepLabCut-style pandas DataFrame with multi-index columns
for each individual or a dictionary of DataFrames for each individual
based on the 'split_individuals' argument.

Examples
--------
@@ -229,33 +225,31 @@ def to_dlc_file(
split_individuals = _auto_split_individuals(ds)

elif not isinstance(split_individuals, bool):
error_msg = (
raise log_error(
ValueError,
f"Expected 'split_individuals' to be a boolean or 'auto', but got "
f"{type(split_individuals)}."
f"{type(split_individuals)}.",
)
logger.error(error_msg)
raise ValueError(error_msg)

"""If split_individuals is True then it will split the file into a
dictionary of pandas dataframes for each individual."""
if split_individuals:
"""If split_individuals is True then it will split the file into a
dictionary of pandas dataframes for each individual."""

df_dict = to_dlc_df(ds, split_individuals=True)

for key, df in df_dict.items():
"""Iterates over dictionary, the key is the name of the
individual and the value is the corresponding df."""
filepath = f"{file.path.with_suffix('')}_{key}{file.path.suffix}"
filepath_posix = Path(filepath)

if isinstance(df, pd.DataFrame):
_savedataframe(file.path.suffix, filepath_posix, df)
_save_dlc_df(Path(filepath), df)

logger.info(f"Saved PoseTracks dataset to {file.path}.")

"""If split_individuals is False then it will save the file as a dataframe
with multi-index columns for each individual."""
if not split_individuals:
logger.info(f"Saved PoseTracks dataset to {file.path}.")
else:
"""If split_individuals is False then it will save the file as
a dataframe with multi-index columns for each individual."""
dataframe = to_dlc_df(ds, split_individuals=False)
if isinstance(dataframe, pd.DataFrame):
_savedataframe(file.path.suffix, file.path, dataframe)
_save_dlc_df(file.path, dataframe)
logger.info(f"Saved PoseTracks dataset to {file.path}.")