Skip to content

Commit

Permalink
Merge pull request #128 from gustaveroussy/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
quentinblampey authored Sep 25, 2024
2 parents 92412be + ff9ff66 commit c4ec001
Show file tree
Hide file tree
Showing 19 changed files with 84 additions and 59 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## [1.x.x] - 2024-xx-xx

### Fix
- Support `baysor>=0.7.0` (#125, @lguerard).
- NB: For Snakemake, please remove the `new_component_*` arguments from the Baysor config.

## [1.1.5] - 2024-09-17

### Fix
Expand Down
60 changes: 34 additions & 26 deletions docs/tutorials/api_usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@
"outputs": [],
"source": [
"image_key = \"image\"\n",
"points_key = \"transcripts\" # (ignore this for multiplex imaging)\n",
"gene_column = \"genes\" # (optional) column of sdata[points_key] containing the gene names"
"points_key = \"transcripts\" # (ignore this for multiplex imaging)\n",
"gene_column = (\n",
" \"genes\" # (optional) column of sdata[points_key] containing the gene names\n",
")"
]
},
{
Expand Down Expand Up @@ -117,7 +119,9 @@
}
],
"source": [
"patches = sopa.segmentation.Patches2D(sdata, image_key, patch_width=1200, patch_overlap=50)\n",
"patches = sopa.segmentation.Patches2D(\n",
" sdata, image_key, patch_width=1200, patch_overlap=50\n",
")\n",
"patches.write();"
]
},
Expand Down Expand Up @@ -163,8 +167,12 @@
"source": [
"channels = [\"DAPI\"]\n",
"\n",
"method = sopa.segmentation.methods.cellpose_patch(diameter=35, channels=channels, flow_threshold=2, cellprob_threshold=-6)\n",
"segmentation = sopa.segmentation.StainingSegmentation(sdata, method, channels, min_area=2500)\n",
"method = sopa.segmentation.methods.cellpose_patch(\n",
" diameter=35, channels=channels, flow_threshold=2, cellprob_threshold=-6\n",
")\n",
"segmentation = sopa.segmentation.StainingSegmentation(\n",
" sdata, method, channels, min_area=2500\n",
")\n",
"\n",
"# The cellpose boundaries will be temporary saved here. You can choose a different path\n",
"cellpose_temp_dir = \"tuto.zarr/.sopa_cache/cellpose\""
Expand Down Expand Up @@ -231,7 +239,7 @@
],
"source": [
"# parallelize this for loop yourself (or use the Snakemake pipeline)\n",
"for patch_index in range(len(sdata['sopa_patches'])):\n",
"for patch_index in range(len(sdata[\"sopa_patches\"])):\n",
" segmentation.write_patch_cells(cellpose_temp_dir, patch_index)"
]
},
Expand Down Expand Up @@ -269,7 +277,7 @@
"cells = sopa.segmentation.StainingSegmentation.read_patches_cells(cellpose_temp_dir)\n",
"cells = sopa.segmentation.shapes.solve_conflicts(cells)\n",
"\n",
"shapes_key = \"cellpose_boundaries\" # name of the key given to the cells in sdata.shapes\n",
"shapes_key = \"cellpose_boundaries\" # name of the key given to the cells in sdata.shapes\n",
"\n",
"sopa.segmentation.StainingSegmentation.add_shapes(sdata, cells, image_key, shapes_key)"
]
Expand All @@ -287,7 +295,7 @@
"metadata": {},
"outputs": [],
"source": [
"shapes_key = \"baysor_boundaries\" # the name that we will give to the baysor \"shapes\""
"shapes_key = \"baysor_boundaries\" # the name that we will give to the baysor \"shapes\""
]
},
{
Expand Down Expand Up @@ -317,7 +325,7 @@
" \"gene\": \"genes\",\n",
" \"min_molecules_per_gene\": 0,\n",
" \"min_molecules_per_segment\": 3,\n",
" \"confidence_nn_id\": 6\n",
" \"confidence_nn_id\": 6,\n",
" },\n",
" \"segmentation\": {\n",
" \"scale\": 3, # Important parameter: typical cell diameter, in microns (see our configs)\n",
Expand All @@ -329,9 +337,7 @@
" \"n_cells_init\": 0,\n",
" \"nuclei_genes\": \"\",\n",
" \"cyto_genes\": \"\",\n",
" \"new_component_weight\": 0.2,\n",
" \"new_component_fraction\": 0.3\n",
" }\n",
" },\n",
"}"
]
},
Expand Down Expand Up @@ -373,7 +379,9 @@
"# The cellpose boundaries will be temporary saved here. You can choose a different path\n",
"baysor_temp_dir = \"tuto.zarr/.sopa_cache/baysor\"\n",
"\n",
"patches = sopa.segmentation.Patches2D(sdata, points_key, patch_width=3000, patch_overlap=50)\n",
"patches = sopa.segmentation.Patches2D(\n",
" sdata, points_key, patch_width=3000, patch_overlap=50\n",
")\n",
"valid_indices = patches.patchify_transcripts(baysor_temp_dir, config=config)"
]
},
Expand Down Expand Up @@ -409,7 +417,7 @@
"for patch_index in valid_indices:\n",
" command = f\"\"\"\n",
" cd {baysor_temp_dir}/{patch_index}\n",
" {baysor_executable_path} run --save-polygons GeoJSON -c config.toml transcripts.csv\n",
" {baysor_executable_path} run --polygon-format=GeometryCollection -c config.toml transcripts.csv\n",
" \"\"\"\n",
" subprocess.run(command, shell=True)"
]
Expand Down Expand Up @@ -501,7 +509,9 @@
}
],
"source": [
"aggregator = sopa.segmentation.Aggregator(sdata, image_key=image_key, shapes_key=shapes_key)\n",
"aggregator = sopa.segmentation.Aggregator(\n",
" sdata, image_key=image_key, shapes_key=shapes_key\n",
")\n",
"\n",
"aggregator.compute_table(gene_column=gene_column, average_intensities=True)"
]
Expand Down Expand Up @@ -611,11 +621,7 @@
"source": [
"from sopa.annotation import higher_z_score\n",
"\n",
"marker_cell_dict = {\n",
" \"CK\": \"Tumoral cell\",\n",
" \"CD20\": \"B cell\",\n",
" \"CD3\": \"T cell\"\n",
"}\n",
"marker_cell_dict = {\"CK\": \"Tumoral cell\", \"CD20\": \"B cell\", \"CD3\": \"T cell\"}\n",
"\n",
"higher_z_score(sdata.tables[\"table\"], marker_cell_dict)"
]
Expand Down Expand Up @@ -698,7 +704,9 @@
}
],
"source": [
"sopa.io.write(\"tuto.explorer\", sdata, image_key, points_key=points_key, gene_column=gene_column)"
"sopa.io.write(\n",
" \"tuto.explorer\", sdata, image_key, points_key=points_key, gene_column=gene_column\n",
")"
]
},
{
Expand Down Expand Up @@ -767,11 +775,11 @@
}
],
"source": [
"sdata\\\n",
" .pl.render_points(size=0.01, color=\"r\", alpha=0.5)\\\n",
" .pl.render_images()\\\n",
" .pl.render_shapes(shapes_key, outline=True, fill_alpha=0, outline_color=\"w\")\\\n",
" .pl.show(\"global\")"
"sdata.pl.render_points(\n",
" size=0.01, color=\"r\", alpha=0.5\n",
").pl.render_images().pl.render_shapes(\n",
" shapes_key, outline=True, fill_alpha=0, outline_color=\"w\"\n",
").pl.show(\"global\")"
]
},
{
Expand Down
2 changes: 0 additions & 2 deletions docs/tutorials/cli_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,6 @@ iters = 500
n_cells_init = 0
nuclei_genes = ""
cyto_genes = ""
new_component_weight = 0.2
new_component_fraction = 0.3
```

Then, we generate the bounding boxes of the patches on which Baysor will be run. Here, the patches have a width and height of 1200 microns and an overlap of 50 microns. We advise bigger sizes for real datasets (see our default parameters in one of our [config files](https://github.com/gustaveroussy/sopa/tree/master/workflow/config)). On the toy dataset, this will generate **4** patches.
Expand Down
21 changes: 20 additions & 1 deletion sopa/io/reader/macsima.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations

import logging
import re
from pathlib import Path

from spatialdata import SpatialData

from .utils import _general_tif_directory_reader
from .utils import _deduplicate_names, _general_tif_directory_reader

log = logging.getLogger(__name__)

Expand All @@ -23,4 +24,22 @@ def macsima(path: Path, **kwargs: int) -> SpatialData:
Returns:
A `SpatialData` object with a 2D-image of shape `(C, Y, X)`
"""
files = list(Path(path).glob("*.tif"))

if any("A-" in file.name for file in files): # non-ome.tif format
return _general_tif_directory_reader(path, files_to_channels=_get_channel_names_macsima, **kwargs)

return _general_tif_directory_reader(path, **kwargs)


def _parse_name_macsima(file):
match = re.search(r"_A-(.*?)_C-", file.name)
if match:
antibody = match.group(1)
else:
antibody = re.search(r"_A-(.*?)\.tif", file.name).group(1)
return antibody


def _get_channel_names_macsima(files):
return _deduplicate_names([_parse_name_macsima(file) for file in files])
19 changes: 16 additions & 3 deletions sopa/segmentation/transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def _read_one_segmented_patch(
directory: str, min_area: float = 0, min_vertices: int = 4
) -> tuple[list[Polygon], AnnData]:
directory: Path = Path(directory)
id_as_string, polygon_file = _find_polygon_file(directory)

loom_file = directory / "segmentation_counts.loom"
if loom_file.exists():
Expand All @@ -106,16 +107,19 @@ def _read_one_segmented_patch(

adata.obs.rename(columns={"area": SopaKeys.ORIGINAL_AREA_OBS}, inplace=True)

cells_num = pd.Series(adata.obs["CellID"].astype(int), index=adata.obs_names)
cells_num = pd.Series(adata.obs_names if id_as_string else adata.obs["CellID"].astype(int), index=adata.obs_names)
del adata.obs["CellID"]

with open(directory / "segmentation_polygons.json") as f:
with open(polygon_file) as f:
polygons_dict = json.load(f)
polygons_dict = {c["cell"]: c for c in polygons_dict["geometries"]}

cells_num = cells_num[cells_num.map(lambda num: len(polygons_dict[num]["coordinates"][0]) >= min_vertices)]

gdf = gpd.GeoDataFrame(index=cells_num.index, geometry=[shape(polygons_dict[cell_num]) for cell_num in cells_num])
gdf = gpd.GeoDataFrame(
index=cells_num.index,
geometry=[shape(polygons_dict[cell_num]) for cell_num in cells_num],
)

gdf.geometry = gdf.geometry.map(lambda cell: shapes._ensure_polygon(cell))
gdf = gdf[~gdf.geometry.isna()]
Expand All @@ -129,6 +133,15 @@ def _read_one_segmented_patch(
return gdf.geometry.values, adata[gdf.index].copy()


def _find_polygon_file(directory: Path) -> tuple[bool, Path]:
old_baysor_path = directory / "segmentation_polygons.json"
if old_baysor_path.exists():
return False, old_baysor_path
new_baysor_path = directory / "segmentation_polygons_2d.json"
assert new_baysor_path.exists(), f"Could not find the segmentation polygons file in {directory}"
return True, new_baysor_path


def _read_all_segmented_patches(
temp_dir: str,
min_area: float = 0,
Expand Down
9 changes: 7 additions & 2 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ rule patch_segmentation_baysor:
patches_file = paths.smk_patches_file_baysor,
baysor_patch = paths.smk_baysor_temp_dir / "{index}",
output:
paths.smk_baysor_temp_dir / "{index}" / "segmentation_polygons.json",
paths.smk_baysor_temp_dir / "{index}" / "segmentation_counts.loom",
params:
args_baysor_prior_seg = args.baysor_prior_seg,
Expand All @@ -126,7 +125,13 @@ rule patch_segmentation_baysor:
fi
cd {input.baysor_patch}
{config[executables][baysor]} run --save-polygons GeoJSON -c config.toml transcripts.csv {params.args_baysor_prior_seg}
help_output=$({config[executables][baysor]} run --help 2>&1) # check if the polygon-format option is available
if [[ $help_output == *"polygon-format"* ]]; then
{config[executables][baysor]} run --polygon-format GeometryCollection -c config.toml transcripts.csv {params.args_baysor_prior_seg}
else
{config[executables][baysor]} run --save-polygons GeoJSON -c config.toml transcripts.csv {params.args_baysor_prior_seg}
fi
"""

rule patch_segmentation_comseg:
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/cosmx/baysor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/cosmx/cellpose_baysor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/example_commented.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3


aggregate:
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/merscope/baysor_cellpose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/merscope/baysor_vizgen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/toy/uniform_baysor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/toy/uniform_baysor_overlaps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/toy/uniform_baysor_vizgen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/toy/uniform_cellpose_baysor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/xenium/baysor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/xenium/baysor_multimodal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
2 changes: 0 additions & 2 deletions workflow/config/xenium/cellpose_baysor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ segmentation:
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
Expand Down
Loading

0 comments on commit c4ec001

Please sign in to comment.