Skip to content

Commit

Permalink
updates to chunking, only copy 1st level data
Browse files Browse the repository at this point in the history
  • Loading branch information
akhanf committed Jan 13, 2025
1 parent dcfb77d commit aa1e285
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 23 deletions.
2 changes: 1 addition & 1 deletion config/samples.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
subject sample acq stain_0 stain_1 stain_2 sample_path
brown brain imaris Lectin PI Abeta /cifs/trident/projects/Brown/241129_AMR_1wk_App_tau_E3_34-3_M2_1x1_09-57-23/09-57-23_AMR_1wk_App_tau_E3_34-3_M2_1x1_Blaze_C00_xyz-Table Z0000.ome.ims
brown brain imaris Iba1 GFAP YOPRO /cifs/trident/projects/Brown/241129_AMR_1wk_App_tau_E3_34-3_M2_1x1_09-57-23/09-57-23_AMR_1wk_App_tau_E3_34-3_M2_1x1_Blaze_C00_xyz-Table Z0000.ome.ims
53 changes: 31 additions & 22 deletions workflow/scripts/imaris_to_ome_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,43 +11,54 @@
from lib.cloud_io import get_fsspec, is_remote


def convert_hdf5_to_zarr(hdf5_path, zarr_path):
def convert_hdf5_to_zarr(hdf5_path, zarr_path,chunks):
"""
Convert an HDF5 file to Zarr using h5py and zarr.
Parameters:
hdf5_path (str): Path to the input HDF5 (.ims) file.
zarr_path (str): Path to the output Zarr dataset.
"""


# Open the HDF5 file and create a Zarr root group
with h5py.File(hdf5_path, "r") as hdf5_file:
zarr_store = zarr.open_group(zarr_path, mode="w")

def copy_group(hdf5_group, zarr_group):
for key, item in hdf5_group.items():
if isinstance(item, h5py.Group): # Recursively copy groups
new_group = zarr_group.create_group(key)
copy_group(item, new_group)
elif isinstance(item, h5py.Dataset): # Copy datasets
zarr_group.create_dataset(
name=key,
data=item[()],
chunks=item.chunks,
dtype=item.dtype,
compression="blosc" # Optional compression
)
print(f"Copied dataset: {key}")

# Start copying from the root group
copy_group(hdf5_file, zarr_store)
# Define the specific path to copy
target_path = "DataSet/ResolutionLevel 0/TimePoint 0"

# Check if the target path exists in HDF5
if target_path in hdf5_file:
hdf5_group = hdf5_file[target_path]

def copy_group(hdf5_group, zarr_group):
for key, item in hdf5_group.items():
if isinstance(item, h5py.Group) and key.startswith("Channel"): # Only copy Channel groups
channel_group = item
if "Data" in channel_group: # Only copy the Data dataset in each Channel
data_item = channel_group["Data"]
zarr_group.create_dataset(
name=key + "/Data", # Store Data in the Channel group
data=data_item[()],
chunks=chunks,
dtype=data_item.dtype,
compression="blosc" # Optional compression
)
print(f"Copied Data dataset for {key}")
# No need to copy other groups or datasets, as we're only interested in 'Data'

# Start copying only the Channel groups
copy_group(hdf5_group, zarr_store)

print(f"Converted HDF5 file to Zarr at: {zarr_path}")

rechunk_size=snakemake.params.rechunk_size

#copy imaris (hdf5) to zarr -- TODO: don't need to copy everything
convert_hdf5_to_zarr(
hdf5_path=snakemake.input.ims,
zarr_path='copy_hdf5.zarr',
chunks=rechunk_size
)


Expand All @@ -56,7 +67,6 @@ def copy_group(hdf5_group, zarr_group):
metadata_json=snakemake.input.metadata_json
downsampling=snakemake.params.downsampling
max_layer=snakemake.params.max_downsampling_layers #number of downsamplings by 2 to include in zarr
rechunk_size=snakemake.params.rechunk_size
out_zarr=snakemake.output.zarr
stains=snakemake.params.stains
scaling_method=snakemake.params.scaling_method
Expand Down Expand Up @@ -105,8 +115,7 @@ def copy_group(hdf5_group, zarr_group):
for zarr_i,stain in enumerate(stains):
#open zarr to get group name
zi = zarr.open(in_zarr)
# darr_list.append(da.from_zarr(in_zarr,component=f'DataSet/ResolutionLevel 0/TimePoint 0/Channel {zarr_i}/Data',chunks=rechunk_size))
darr_list.append(da.from_zarr(in_zarr,component=f'DataSet/ResolutionLevel 0/TimePoint 0/Channel {zarr_i}/Data'))
darr_list.append(da.from_zarr(in_zarr,component=f'Channel {zarr_i}/Data'))


#append to omero metadata
Expand Down

0 comments on commit aa1e285

Please sign in to comment.