diff --git a/docs/clusterplot.ipynb b/docs/clusterplot.ipynb index 5067002..894b59d 100644 --- a/docs/clusterplot.ipynb +++ b/docs/clusterplot.ipynb @@ -1,161 +1,84 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 1, - "id": "35d1c118-c57f-4ba4-9926-de78d5db451c", + "cell_type": "markdown", + "id": "96d5e526-425b-4319-8043-828208956ef9", "metadata": {}, - "outputs": [], "source": [ - "import bia_bob\n", - "import pyclesperanto_prototype as cle\n", - "import stackview" - ] - }, - { - "cell_type": "raw", - "id": "39df4e10-bdf9-4e6f-8289-71554fa11ee8", - "metadata": {}, - "source": [ - "%bob load the cells3d dataset and extract the nuclei channel" + "# Cluster plots\n", + "Using `stackview.clusterplot` we can visualize contents of pandas DataFrames and corresponding segmented objects in an sime side-by-side. In such a plot you can select objects and visualize the selection. This might be useful for exploring feature extraction parameter spaces." ] }, { "cell_type": "code", - "execution_count": 2, - "id": "e9bbbd3b-ac3c-4171-a42f-ea07cadc7d63", + "execution_count": 1, + "id": "f4ec4d56-e298-40d8-a5b5-836cbcc2897d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(60, 256, 256)" + "'0.12.0'" ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import numpy as np # Already imported modules should not be imported again\n", - "from skimage.data import cells3d\n", - "\n", - "# Extract the nuclei channel (assuming channel 1 is the nuclei channel)\n", - "nuclei_channel = cells3d()[:, 1, :, :]\n", + "import pandas as pd\n", + "import numpy as np\n", + "import stackview\n", + "import pandas as pd\n", + "from skimage.measure import regionprops_table\n", + "from skimage.io import imread\n", + "from skimage.filters import threshold_otsu\n", + "from skimage.measure import label\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.preprocessing import StandardScaler \n", + "from umap import UMAP\n", "\n", - "nuclei_channel.shape # to verify the extraction" + "stackview.__version__" ] }, { - "cell_type": "raw", - "id": "09769fd0-4606-437f-9fc5-f7f260f871a5", + "cell_type": "markdown", + "id": "dfddfa23-9290-425e-b8e5-2fd4d586db9a", "metadata": {}, "source": [ - "%bob segment the nuclei_channel image using otsu thresholding and connected component labeling" + "To demonstrate this, we need an image, a segmentation and a table of extracted features." ] }, { "cell_type": "code", - "execution_count": 3, - "id": "72a47ee0-be9b-456c-91d3-f91a58c95dff", + "execution_count": 2, + "id": "e252c694-f7af-4b0d-9bbe-e6f682e8ab5c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
shape(60, 256, 256)
dtypeuint32
size15.0 MB
min0
max740
\n", - "\n", - "
" - ], - "text/plain": [ - "StackViewNDArray([[[0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " ...,\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0]],\n", - "\n", - " [[0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " ...,\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0]],\n", - "\n", - " [[0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " ...,\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0]],\n", - "\n", - " ...,\n", - "\n", - " [[0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " ...,\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0]],\n", - "\n", - " [[0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " ...,\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0]],\n", - "\n", - " [[0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " ...,\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0]]], dtype=uint32)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Apply Otsu's threshold to create a binary image\n", - "binary_nuclei = cle.threshold_otsu(nuclei_channel)\n", - "\n", - "# Perform connected component labeling\n", - "labeled_nuclei = cle.connected_components_labeling_box(binary_nuclei)\n", + "image = imread('data/blobs.tif')\n", "\n", - "# Display the segmented label map\n", - "stackview.insight(labeled_nuclei)" + "# segment image\n", + "thresh = threshold_otsu(image)\n", + "binary_image = image > thresh\n", + "labeled_image = label(binary_image)" ] }, { "cell_type": "code", "execution_count": 4, - "id": "f94055dd-0aef-4de6-9d18-dc938bb0149e", + "id": "9e84d0b9-3b2f-42a9-b28a-b61f729d2b52", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\rober\\miniforge3\\envs\\bob-env\\Lib\\site-packages\\umap\\umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n", + " warn(\n" + ] + }, { "data": { "text/html": [ @@ -177,53 +100,109 @@ " \n", " \n", " \n", - " label\n", - " area\n", " mean_intensity\n", + " std_intensity\n", + " centroid-0\n", + " centroid-1\n", + " area\n", + " feret_diameter_max\n", + " minor_axis_length\n", + " major_axis_length\n", + " UMAP1\n", + " UMAP2\n", " \n", " \n", " \n", " \n", " 0\n", - " 1\n", - " 27119.0\n", - " 12862.000295\n", + " 190.854503\n", + " 30.269911\n", + " 13.212471\n", + " 19.986143\n", + " 433.0\n", + " 36.055513\n", + " 16.819060\n", + " 34.957399\n", + " 4.446589\n", + " 0.901159\n", " \n", " \n", " 1\n", - " 2\n", - " 5.0\n", - " 11143.800000\n", + " 179.286486\n", + " 21.824090\n", + " 4.270270\n", + " 62.945946\n", + " 185.0\n", + " 21.377558\n", + " 11.803854\n", + " 21.061417\n", + " 2.342915\n", + " -0.930705\n", " \n", " \n", " 2\n", - " 3\n", - " 168.0\n", - " 11268.273810\n", + " 205.617021\n", + " 29.358477\n", + " 12.568389\n", + " 108.329787\n", + " 658.0\n", + " 32.449961\n", + " 28.278264\n", + " 30.212552\n", + " 4.911047\n", + " 0.156550\n", " \n", " \n", " 3\n", - " 4\n", - " 60.0\n", - " 11758.683333\n", + " 217.327189\n", + " 36.019565\n", + " 9.806452\n", + " 154.520737\n", + " 434.0\n", + " 26.925824\n", + " 23.064079\n", + " 24.535398\n", + " 4.941196\n", + " -0.982479\n", " \n", " \n", " 4\n", - " 5\n", - " 1.0\n", - " 10527.000000\n", + " 212.142558\n", + " 29.872907\n", + " 13.545073\n", + " 246.809224\n", + " 477.0\n", + " 31.384710\n", + " 19.833058\n", + " 31.162612\n", + " 5.321925\n", + " -1.058476\n", " \n", " \n", "\n", "" ], "text/plain": [ - " label area mean_intensity\n", - "0 1 27119.0 12862.000295\n", - "1 2 5.0 11143.800000\n", - "2 3 168.0 11268.273810\n", - "3 4 60.0 11758.683333\n", - "4 5 1.0 10527.000000" + " mean_intensity std_intensity centroid-0 centroid-1 area \\\n", + "0 190.854503 30.269911 13.212471 19.986143 433.0 \n", + "1 179.286486 21.824090 4.270270 62.945946 185.0 \n", + "2 205.617021 29.358477 12.568389 108.329787 658.0 \n", + "3 217.327189 36.019565 9.806452 154.520737 434.0 \n", + "4 212.142558 29.872907 13.545073 246.809224 477.0 \n", + "\n", + " feret_diameter_max minor_axis_length major_axis_length UMAP1 \\\n", + "0 36.055513 16.819060 34.957399 4.446589 \n", + "1 21.377558 11.803854 21.061417 2.342915 \n", + "2 32.449961 28.278264 30.212552 4.911047 \n", + "3 26.925824 23.064079 24.535398 4.941196 \n", + "4 31.384710 19.833058 31.162612 5.321925 \n", + "\n", + " UMAP2 \n", + "0 0.901159 \n", + "1 -0.930705 \n", + "2 0.156550 \n", + "3 -0.982479 \n", + "4 -1.058476 " ] }, "execution_count": 4, @@ -232,51 +211,130 @@ } ], "source": [ - "import pandas as pd\n", - "from skimage.measure import regionprops_table\n", + "properties = regionprops_table(labeled_image, intensity_image=image, properties=[\n", + " 'mean_intensity', 'std_intensity',\n", + " 'centroid', 'area', 'feret_diameter_max', \n", + " 'minor_axis_length', 'major_axis_length'])\n", + "\n", + "df = pd.DataFrame(properties)\n", + "\n", + "# Select numeric columns\n", + "numeric_cols = df.select_dtypes(include=[np.number]).columns\n", "\n", - "# Define properties to extract\n", - "properties = ['label', 'area', 'mean_intensity']\n", + "# Scale the data\n", + "scaler = StandardScaler()\n", + "scaled_data = scaler.fit_transform(df[numeric_cols])\n", "\n", - "# Extract features\n", - "measurements = regionprops_table(np.asarray(labeled_nuclei), intensity_image=nuclei_channel, properties=properties)\n", + "# Create UMAP embedding\n", + "umap = UMAP(n_components=2, random_state=42) \n", + "umap_coords = umap.fit_transform(scaled_data)\n", "\n", - "# Store results in a DataFrame\n", - "df = pd.DataFrame(measurements)\n", - "df.head() # To display the first few rows of the extracted features" + "# Add UMAP coordinates to dataframe \n", + "df['UMAP1'] = umap_coords[:, 0]\n", + "df['UMAP2'] = umap_coords[:, 1]\n", + "\n", + "df.head()" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "1fba06ed-5ae5-4049-a656-d1d4ea18894d", + "execution_count": 5, + "id": "1e8ea1bf-3042-4220-9225-e478623c8261", + "metadata": {}, + "outputs": [], + "source": [ + "num_objects = df.shape[0]\n", + "pre_selection = np.zeros(num_objects)\n", + "pre_selection[:int(num_objects/2)] = 1\n", + "\n", + "df[\"selection\"] = pre_selection" + ] + }, + { + "cell_type": "markdown", + "id": "668151cb-4a89-43a8-bbb0-7fd4fe54414a", + "metadata": {}, + "source": [ + "## Interaction\n", + "Using some more involved code we can also draw the image and the scatter plot side-by-side and make them interact. You can select data points in the plot on the right and the visualization on the left will be updated accordingly." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7b2bbd63-3255-4ada-94a6-b77207c8efaf", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9e50d99d1f4b431787c2b422e15f6b49", + "model_id": "f05e304104fc4597bd76f6b244268060", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(VBox(children=(HBox(children=(VBox(children=(ImageWidget(height=256, width=256),…" + "VBox(children=(HBox(children=(HBox(children=(VBox(children=(VBox(children=(HBox(children=(VBox(children=(Image…" ] }, - "execution_count": 6, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import stackview\n", - "stackview.clusterplot(df, labeled_nuclei, column_x=\"area\", column_y=\"mean_intensity\", image=nuclei_channel)" + "stackview.clusterplot(image=image,\n", + " labels=labeled_image,\n", + " df=df,\n", + " column_x=\"centroid-0\",\n", + " column_y=\"centroid-1\",\n", + " zoom_factor=1.5,\n", + " markersize=15)" + ] + }, + { + "cell_type": "markdown", + "id": "f9b8afd9-9f0a-4f3e-967f-f7680de602a9", + "metadata": {}, + "source": [ + "Every time the user selects different data points, the selection in our dataframe is update" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0aa32ebb-7539-48e1-a8c4-be0deb255d05", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 True\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "59 True\n", + "60 True\n", + "61 True\n", + "62 True\n", + "63 True\n", + "Name: selection, Length: 64, dtype: bool" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"selection\"]" ] }, { "cell_type": "code", "execution_count": null, - "id": "5c07c9fc-873b-490b-a2b3-b280a9557f13", + "id": "e54890a5-e89c-4861-bf5b-8aa1c5f7f697", "metadata": {}, "outputs": [], "source": [] @@ -298,7 +356,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4,