Imaris import support (#57)

* uses zarr copy and new metadata import function to import from imaris prestitched
khanlab · Feb 3, 2025 · 1bc5084 · 1bc5084
1 parent 4562271
commit 1bc5084
Show file tree

Hide file tree

Showing 11 changed files with 2,262 additions and 1,951 deletions.
diff --git a/README.md b/README.md
@@ -9,6 +9,13 @@ A Snakemake workflow for pre-processing single plane illumination microscopy (SP
 
 Takes TIF images (tiled or prestitched) and outputs a validated BIDS Microscopy dataset, with a multi-channel multi-scale OME-Zarr file for each scan, along with downsampled nifti images (in a derivatives folder). 
 
+## Supported inputs:
+
+SPIMprep supports a range of inputs, with the type of acquisition specified by including
+the short-hand name (in bold below) as a substring in the acquisition tag.
+  - **`blaze`**:  Raw Ultramicroscope Blaze OME TIFF files, either as 2D or 3D TIFF files
+  - **`prestitched`**: Prestitched images, as a stack of 2D TIF files (e.g. from LifeCanvas)
+  - **`imaris`**: Prestitched into a single Imaris (.ims) file.
 
 
 ## Requirements
@@ -17,10 +24,6 @@ Takes TIF images (tiled or prestitched) and outputs a validated BIDS Microscopy
     - (Note: container will be automatically pulled when you run the workflow)
  - Python >= 3.11
  - Lightsheet data:
-    - Raw Ultramicroscope Blaze OME TIFF files (include `blaze` in the acquisition tag)
-        - can be 2D or 3D TIFF files
-    - Prestitched TIFF files (include `prestitched` in the acquisition tag)
-
 
 ## Usage
 

diff --git a/config/config.yml b/config/config.yml
@@ -162,5 +162,5 @@ report:
 
 
 containers:
-  spimprep: 'docker://khanlab/spimprep-deps:v0.1.0'
+  spimprep: 'docker://khanlab/spimprep-deps:v0.1.1'
 
diff --git a/config/samples.tsv b/config/samples.tsv
@@ -1,3 +1,4 @@
 subject	sample	acq	stain_0	stain_1	stain_2	sample_path
 mouse1	brain	blaze	Lectin	PI	Abeta	.test/dryrun/data
 lifecanvas1	brain	prestitched	PI	Abeta	n/a	.test/dryrun/data
+brown	brain	imaris1x	Iba1	GFAP	YOPRO	.test/dryrun/data
diff --git a/poetry.lock b/poetry.lock
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -43,3 +43,4 @@ include: "rules/bigstitcher.smk"
 include: "rules/ome_zarr.smk"
 include: "rules/bids.smk"
 include: "rules/qc.smk"
+include: "rules/imaris.smk"
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
@@ -164,7 +164,6 @@ def sample_is_remote(wildcards):
 def get_input_sample(wildcards):
     """returns path to extracted sample or path to provided input folder"""
     sample_path = Path(get_sample_path(wildcards))
-
     if is_remote_gcs(sample_path):
         return rules.cp_from_gcs.output.ome_dir.format(**wildcards)
 
@@ -175,6 +174,8 @@ def get_input_sample(wildcards):
         # sample was a tar file, so point to the extracted folder
         return rules.extract_sample.output.ome_dir.format(**wildcards)
 
+    elif sample_path.suffixes[-1] == ".ims":
+        return get_sample_path_remote(wildcards)
     else:
         print(f"unsupported input: {sample_path}")
 
@@ -277,12 +278,12 @@ def get_output_ome_zarr(acq_type):
         if config["use_zipstore"]:
             return {
                 "zarr": bids(
-                    root=work,
+                    root=root,
                     subject="{subject}",
                     datatype="micr",
                     sample="{sample}",
                     acq=f"{{acq,[a-zA-Z0-9]*{acq_type}[a-zA-Z0-9]*}}",
-                    suffix="SPIM.ome.zarr",
+                    suffix="SPIM.ome.zarr.zip",
                 )
             }
         else:
@@ -302,12 +303,12 @@ def get_output_ome_zarr(acq_type):
         if config["use_zipstore"]:
             return {
                 "zarr": bids(
-                    root=work,
+                    root=root,
                     subject="{subject}",
                     datatype="micr",
                     sample="{sample}",
                     acq=f"{{acq,[a-zA-Z0-9]*{acq_type}[a-zA-Z0-9]*}}",
-                    suffix="SPIM.ome.zarr",
+                    suffix="SPIM.ome.zarr.zip",
                 )
             }
         else:
@@ -333,7 +334,7 @@ def get_input_ome_zarr_to_nii(wildcards):
                 datatype="micr",
                 sample="{sample}",
                 acq="{acq}",
-                suffix="SPIM.ome.zarr",
+                suffix="SPIM.ome.zarr.zip",
             ).format(**wildcards)
         else:
             return bids(

diff --git a/workflow/rules/imaris.smk b/workflow/rules/imaris.smk
@@ -0,0 +1,126 @@
+rule imaris_to_metadata:
+    input:
+        ims=get_input_sample,
+    output:
+        metadata_json=bids(
+            root=root,
+            subject="{subject}",
+            datatype="micr",
+            sample="{sample}",
+            acq="{acq,[a-zA-Z0-9]*imaris[a-zA-Z0-9]*}",
+            suffix="SPIM.json",
+        ),
+    benchmark:
+        bids(
+            root="benchmarks",
+            datatype="imaris_to_metdata",
+            subject="{subject}",
+            sample="{sample}",
+            acq="{acq}",
+            suffix="benchmark.tsv",
+        )
+    log:
+        bids(
+            root="logs",
+            datatype="prestitched_to_metdata",
+            subject="{subject}",
+            sample="{sample}",
+            acq="{acq}",
+            suffix="log.txt",
+        ),
+    group:
+        "preproc"
+    container:
+        config["containers"]["spimprep"]
+    script:
+        "../scripts/imaris_to_metadata.py"
+
+
+rule imaris_channel_to_zarr:
+    input:
+        ims=get_input_sample,
+    params:
+        channel=lambda wildcards: get_stains(wildcards).index(wildcards.stain),
+    output:
+        zarr=temp(
+            bids(
+                root=work,
+                subject="{subject}",
+                datatype="micr",
+                sample="{sample}",
+                acq="{acq}",
+                stain="{stain}",
+                suffix="imaris.zarr.zip",
+            )
+        ),
+    log:
+        bids(
+            root="logs",
+            subject="{subject}",
+            datatype="imaris_channel_to_zarr",
+            sample="{sample}",
+            acq="{acq}",
+            stain="{stain}",
+            suffix="log.txt",
+        ),
+    container:
+        config["containers"]["spimprep"]
+    group:
+        "preproc"
+    threads: 1
+    resources:
+        runtime=360,
+        mem_mb=1000,
+    shadow:
+        "minimal"
+    script:
+        "../scripts/imaris_channel_to_zarr.py"
+
+
+rule imaris_to_ome_zarr:
+    input:
+        zarr=lambda wildcards: expand(
+            bids(
+                root=work,
+                subject="{subject}",
+                datatype="micr",
+                sample="{sample}",
+                acq="{acq}",
+                stain="{stain}",
+                suffix="imaris.zarr.zip",
+            ),
+            stain=get_stains(wildcards),
+            allow_missing=True,
+        ),
+        metadata_json=rules.prestitched_to_metadata.output.metadata_json,
+    params:
+        max_downsampling_layers=config["ome_zarr"]["max_downsampling_layers"],
+        rechunk_size=config["ome_zarr"]["rechunk_size"],
+        scaling_method=config["ome_zarr"]["scaling_method"],
+        downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"],
+        stains=get_stains,
+        uri=get_output_ome_zarr_uri(),
+        storage_provider_settings=workflow.storage_provider_settings,
+    output:
+        **get_output_ome_zarr("imaris"),
+    log:
+        bids(
+            root="logs",
+            subject="{subject}",
+            datatype="imaris_to_ome_zarr",
+            sample="{sample}",
+            acq="{acq}",
+            suffix="log.txt",
+        ),
+    container:
+        config["containers"]["spimprep"]
+    group:
+        "preproc"
+    threads: config["total_cores"]
+    resources:
+        runtime=360,
+        mem_mb=config["total_mem_mb"],
+    shadow:
+        "minimal"
+    script:
+        "../scripts/imaris_to_ome_zarr.py"
diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk
@@ -72,7 +72,7 @@ rule tif_stacks_to_ome_zarr:
         bids(
             root="logs",
             subject="{subject}",
-            datatype="zarr_to_ome_zarr",
+            datatype="tif_stacks_to_ome_zarr",
             sample="{sample}",
             acq="{acq}",
             suffix="log.txt",
@@ -89,41 +89,6 @@ rule tif_stacks_to_ome_zarr:
         "../scripts/tif_stacks_to_ome_zarr.py"
 
 
-rule ome_zarr_to_zipstore:
-    """ use 7zip to create a zipstore """
-    input:
-        zarr=bids(
-            root=work,
-            subject="{subject}",
-            datatype="micr",
-            sample="{sample}",
-            acq="{acq}",
-            suffix="SPIM.ome.zarr",
-        ),
-    output:
-        zarr_zip=bids(
-            root=root,
-            subject="{subject}",
-            datatype="micr",
-            sample="{sample}",
-            acq="{acq}",
-            suffix="SPIM.ome.zarr.zip",
-        ),
-    log:
-        bids(
-            root="logs",
-            subject="{subject}",
-            datatype="micr",
-            sample="{sample}",
-            acq="{acq}",
-            suffix="log.txt",
-        ),
-    group:
-        "preproc"
-    shell:
-        "7z a -mx0 -tzip {output.zarr_zip} {input.zarr}/. &> {log}"
-
-
 rule ome_zarr_to_nii:
     input:
         **get_storage_creds(),

diff --git a/workflow/scripts/imaris_channel_to_zarr.py b/workflow/scripts/imaris_channel_to_zarr.py
@@ -0,0 +1,13 @@
+import h5py
+import hdf5plugin
+import zarr
+from sys import stdout #change this to log file later..
+
+source = h5py.File(snakemake.input.ims, mode='r')
+
+store = zarr.ZipStore(snakemake.output.zarr,dimension_separator='/',mode='x') 
+dest = zarr.group(store)
+
+zarr.copy(source['DataSet/ResolutionLevel 0/TimePoint 0/Channel {chan}/Data'.format(chan=snakemake.params.channel)], dest,log=stdout,compressor=None)
+source.close()
+
diff --git a/workflow/scripts/imaris_to_metadata.py b/workflow/scripts/imaris_to_metadata.py
@@ -0,0 +1,31 @@
+import h5py
+import xmltodict 
+import json
+
+with h5py.File(snakemake.input.ims, "r") as hdf5_file:
+    xml_data = hdf5_file['DataSetInfo/OME Image Tags/Image 0'][:]
+
+
+# Convert byte array to string and then to a dictionary
+xml_str = bytes(xml_data).decode('utf-8', errors='ignore')  # Decode byte array to string
+
+try:
+    xml_dict = xmltodict.parse(f"<root>{xml_str}</root>", namespace_separator=':')
+except Exception as e:
+    print(f"Error parsing XML: {e}")
+
+
+metadata={}
+metadata['physical_size_x'] = float(xml_dict['root']['ca:CustomAttributes']['DataAxis0']['@PhysicalUnit'])
+metadata['physical_size_y'] = float(xml_dict['root']['ca:CustomAttributes']['DataAxis1']['@PhysicalUnit'])
+metadata['physical_size_z'] = abs(float(xml_dict['root']['ca:CustomAttributes']['DataAxis3']['@PhysicalUnit']))
+metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0] #zyx since OME-Zarr is ZYX
+metadata['PixelSizeUnits'] = 'mm' 
+
+#write metadata to json
+with open(snakemake.output.metadata_json, 'w') as fp:
+    json.dump(metadata, fp,indent=4)
+
+
+
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -162,5 +162,5 @@ report:


		containers:
		spimprep: 'docker://khanlab/spimprep-deps:v0.1.0'
		spimprep: 'docker://khanlab/spimprep-deps:v0.1.1'