From 15a594de85173a95130bbb555806fff099fafc9b Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Sun, 25 Dec 2022 16:17:30 -0500 Subject: [PATCH 1/4] use image tag kaczmarj/wsinfer --- dockerfiles/tils.dockerfile | 2 +- dockerfiles/tumor-brca.dockerfile | 2 +- dockerfiles/tumor-luad.dockerfile | 2 +- dockerfiles/tumor-paad.dockerfile | 2 +- dockerfiles/tumor-prad.dockerfile | 2 +- scripts/build_docker_images.sh | 24 ++++++++++++------------ 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/dockerfiles/tils.dockerfile b/dockerfiles/tils.dockerfile index ebaffee..89e12db 100644 --- a/dockerfiles/tils.dockerfile +++ b/dockerfiles/tils.dockerfile @@ -2,7 +2,7 @@ # # Note about versioning: We should not use the 'latest' tag because it is a moving # target. We should prefer using a versioned release of the wsinfer pipeline. -FROM kaczmarj/patch-classification-pipeline:v0.2.1 +FROM kaczmarj/wsinfer:v0.2.1 # The CLI will use these env vars for model and weights. ENV WSINFER_MODEL="inceptionv4" diff --git a/dockerfiles/tumor-brca.dockerfile b/dockerfiles/tumor-brca.dockerfile index 8417560..d0d072f 100644 --- a/dockerfiles/tumor-brca.dockerfile +++ b/dockerfiles/tumor-brca.dockerfile @@ -2,7 +2,7 @@ # # Note about versioning: We should not use the 'latest' tag because it is a moving # target. We should prefer using a versioned release of the wsinfer pipeline. -FROM kaczmarj/patch-classification-pipeline:v0.2.1 +FROM kaczmarj/wsinfer:v0.2.1 # The CLI will use these env vars for model and weights. ENV WSINFER_MODEL="resnet34" diff --git a/dockerfiles/tumor-luad.dockerfile b/dockerfiles/tumor-luad.dockerfile index db60c7b..2d10045 100644 --- a/dockerfiles/tumor-luad.dockerfile +++ b/dockerfiles/tumor-luad.dockerfile @@ -2,7 +2,7 @@ # # Note about versioning: We should not use the 'latest' tag because it is a moving # target. We should prefer using a versioned release of the wsinfer pipeline. -FROM kaczmarj/patch-classification-pipeline:v0.2.1 +FROM kaczmarj/wsinfer:v0.2.1 # The CLI will use these env vars for model and weights. ENV WSINFER_MODEL="resnet34" diff --git a/dockerfiles/tumor-paad.dockerfile b/dockerfiles/tumor-paad.dockerfile index 6211dee..1e3e11d 100644 --- a/dockerfiles/tumor-paad.dockerfile +++ b/dockerfiles/tumor-paad.dockerfile @@ -2,7 +2,7 @@ # # Note about versioning: We should not use the 'latest' tag because it is a moving # target. We should prefer using a versioned release of the wsinfer pipeline. -FROM kaczmarj/patch-classification-pipeline:v0.2.1 +FROM kaczmarj/wsinfer:v0.2.1 # The CLI will use these env vars for model and weights. ENV WSINFER_MODEL="resnet34_preact" diff --git a/dockerfiles/tumor-prad.dockerfile b/dockerfiles/tumor-prad.dockerfile index 9c524a5..dab9e2b 100644 --- a/dockerfiles/tumor-prad.dockerfile +++ b/dockerfiles/tumor-prad.dockerfile @@ -2,7 +2,7 @@ # # Note about versioning: We should not use the 'latest' tag because it is a moving # target. We should prefer using a versioned release of the wsinfer pipeline. -FROM kaczmarj/patch-classification-pipeline:v0.2.1 +FROM kaczmarj/wsinfer:v0.2.1 # The CLI will use these env vars for model and weights. ENV WSINFER_MODEL="resnet34" diff --git a/scripts/build_docker_images.sh b/scripts/build_docker_images.sh index a288f9e..ffa699a 100644 --- a/scripts/build_docker_images.sh +++ b/scripts/build_docker_images.sh @@ -24,22 +24,22 @@ fi version=$1 # Main image. -docker build -t kaczmarj/patch-classification-pipeline:$version . +docker build -t kaczmarj/wsinfer:$version . # TILs -docker build -t kaczmarj/patch-classification-pipeline:$version-tils - < dockerfiles/tils.dockerfile +docker build -t kaczmarj/wsinfer:$version-tils - < dockerfiles/tils.dockerfile # Tumor BRCA -docker build -t kaczmarj/patch-classification-pipeline:$version-tumor-brca - < dockerfiles/tumor-brca.dockerfile +docker build -t kaczmarj/wsinfer:$version-tumor-brca - < dockerfiles/tumor-brca.dockerfile # Tumor LUAD -docker build -t kaczmarj/patch-classification-pipeline:$version-tumor-luad - < dockerfiles/tumor-luad.dockerfile +docker build -t kaczmarj/wsinfer:$version-tumor-luad - < dockerfiles/tumor-luad.dockerfile # Tumor PAAD -docker build -t kaczmarj/patch-classification-pipeline:$version-tumor-paad - < dockerfiles/tumor-paad.dockerfile +docker build -t kaczmarj/wsinfer:$version-tumor-paad - < dockerfiles/tumor-paad.dockerfile # Tumor PRAD -docker build -t kaczmarj/patch-classification-pipeline:$version-tumor-prad - < dockerfiles/tumor-prad.dockerfile +docker build -t kaczmarj/wsinfer:$version-tumor-prad - < dockerfiles/tumor-prad.dockerfile # Push images. push_images="${2:-0}" @@ -47,10 +47,10 @@ if [ $push_images -eq 0 ]; then echo "Not pushing images. Pass a 1 to the script to push images." else echo "Pushing images." - docker push kaczmarj/patch-classification-pipeline:$version - docker push kaczmarj/patch-classification-pipeline:$version-tils - docker push kaczmarj/patch-classification-pipeline:$version-tumor-brca - docker push kaczmarj/patch-classification-pipeline:$version-tumor-luad - docker push kaczmarj/patch-classification-pipeline:$version-tumor-paad - docker push kaczmarj/patch-classification-pipeline:$version-tumor-prad + docker push kaczmarj/wsinfer:$version + docker push kaczmarj/wsinfer:$version-tils + docker push kaczmarj/wsinfer:$version-tumor-brca + docker push kaczmarj/wsinfer:$version-tumor-luad + docker push kaczmarj/wsinfer:$version-tumor-paad + docker push kaczmarj/wsinfer:$version-tumor-prad fi From 409f27daff4f700161f6a10ac8a2192a9eabcf5b Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Sun, 25 Dec 2022 16:17:46 -0500 Subject: [PATCH 2/4] use workdir /opt/wsinfer and use default cmd --help --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 70382c0..f26f4ca 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime -WORKDIR /opt/wsi-classification +WORKDIR /opt/wsinfer COPY . . RUN apt-get update \ && apt-get install -y --no-install-recommends gcc git libopenslide0 \ @@ -15,4 +15,5 @@ RUN mkdir -p "$TORCH_HOME" \ && chmod a+s "$TORCH_HOME" WORKDIR /work ENTRYPOINT ["wsinfer"] +CMD ["--help"] LABEL maintainer="Jakub Kaczmarzyk " From 18bc1909c2d4aefc0ff8a4dc65c12d864e7f324f Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Sun, 25 Dec 2022 16:41:42 -0500 Subject: [PATCH 3/4] add shields for python versions and pypi version --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 104145f..25f4cd3 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ Original H&E | Heatmap of Tumor Probability 🔥 🚀 Blazingly fast pipeline to run patch-based classification models on whole slide images. ![Continuous Integration](https://github.com/kaczmarj/patch-classification-pipeline/actions/workflows/ci.yml/badge.svg) +![Supported Python versions](https://img.shields.io/pypi/pyversions/wsinfer) +![Version on PyPI](https://img.shields.io/pypi/v/wsinfer.svg) # Table of contents From dae4efc7cab23614451e0965894312ffc699020d Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Sun, 25 Dec 2022 20:23:23 -0500 Subject: [PATCH 4/4] add version key to yaml configs --- README.md | 2 + tests/test_all.py | 57 +++++++++++++++++++ .../modeldefs/inceptionv4_tcga-brca-v1.yaml | 2 + .../inceptionv4nobn_tcga-tils-v1.yaml | 2 + .../preactresnet34_tcga-paad-v1.yaml | 2 + wsinfer/modeldefs/resnet34_tcga-brca-v1.yaml | 2 + wsinfer/modeldefs/resnet34_tcga-luad-v1.yaml | 2 + wsinfer/modeldefs/resnet34_tcga-prad-v1.yaml | 2 + wsinfer/modeldefs/vgg16mod_tcga-BRCA-v1.yaml | 2 + wsinfer/modellib/models.py | 6 +- 10 files changed, 78 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 25f4cd3..c64f862 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,8 @@ Define a new model with a YAML configuration file. Please see the example below an overview of the specification. ```yaml +# The version of the spec. At this time, only "1.0" is valid. (str) +version: "1.0" # Models are referenced by the pair of (architecture, weights), so this pair must be unique. # The name of the architecture. We use timm to supply hundreds or network architectures, # so the name can be one of those models. If the architecture is not provided in timm, diff --git a/tests/test_all.py b/tests/test_all.py index f8923ed..b32175d 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -62,6 +62,7 @@ def test_cli_list(tmp_path: Path): config_root_single.mkdir() configs = [ dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -73,6 +74,7 @@ def test_cli_list(tmp_path: Path): class_names=["tumor"], ), dict( + version="1.0", name="foo2", architecture="resnet34", url="foo", @@ -412,6 +414,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): dict(name="foo", architecture="resnet34"), # Missing url dict( + version="1.0", name="foo", architecture="resnet34", # url="foo", @@ -424,6 +427,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # missing url_file_name when url is given dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -436,6 +440,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # url and file used together dict( + version="1.0", name="foo", architecture="resnet34", file=__file__, @@ -449,6 +454,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # nonexistent file dict( + version="1.0", name="foo", architecture="resnet34", file="path/to/fake/file", @@ -462,6 +468,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # num_classes missing dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -474,6 +481,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # num classes not equal to len of class names dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -486,6 +494,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform missing dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -498,6 +507,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.resize_size missing dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -510,6 +520,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.mean missing dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -522,6 +533,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.std missing dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -534,6 +546,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.resize_size non int dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -546,6 +559,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.resize_size non int dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -560,6 +574,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.mean not a list of three floats dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -572,6 +587,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.mean not a list of three floats dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -584,6 +600,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.mean not a list of three floats dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -596,6 +613,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.std not a list of three floats dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -608,6 +626,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.std not a list of three floats dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -620,6 +639,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # transform.std not a list of three floats dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -632,6 +652,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # invalid patch_size_pixels -- list dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -644,6 +665,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # invalid patch_size_pixels -- float dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -656,6 +678,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # invalid patch_size_pixels -- negative dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -668,6 +691,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # invalid spacing_um_px -- zero dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -680,6 +704,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # invalid spacing_um_px -- list dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -692,6 +717,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # invalid class_names -- str dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -704,6 +730,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # invalid class_names -- len not equal to num_classes dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -716,6 +743,7 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): ), # invalid class_names -- not list of str dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -726,6 +754,33 @@ def test_cli_run_from_config(tiff_image: Path, tmp_path: Path): spacing_um_px=0.25, class_names=[1], ), + # unknown key + dict( + fakekey="foobar", + version="1.0", + name="foo", + architecture="resnet34", + url="foo", + url_file_name="foo", + num_classes=1, + transform=dict(resize_size=299, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + patch_size_pixels=350, + spacing_um_px=0.25, + class_names=["foo"], + ), + # version != '1.0' + dict( + version="2.0", + name="foo", + architecture="resnet34", + url="foo", + url_file_name="foo", + num_classes=1, + transform=dict(resize_size=299, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + patch_size_pixels=350, + spacing_um_px=0.25, + class_names=["foo"], + ), ], ) def test_invalid_modeldefs(modeldef, tmp_path: Path): @@ -744,6 +799,7 @@ def test_model_registration(tmp_path: Path): # Test that registering duplicate weights will error. d = dict( + version="1.0", name="foo", architecture="resnet34", url="foo", @@ -768,6 +824,7 @@ def test_model_registration(tmp_path: Path): path = tmp_path / "configs" / "foobar.yaml" path.parent.mkdir() d = dict( + version="1.0", name="foo2", architecture="resnet34", url="foo", diff --git a/wsinfer/modeldefs/inceptionv4_tcga-brca-v1.yaml b/wsinfer/modeldefs/inceptionv4_tcga-brca-v1.yaml index ff76566..2aad5f2 100644 --- a/wsinfer/modeldefs/inceptionv4_tcga-brca-v1.yaml +++ b/wsinfer/modeldefs/inceptionv4_tcga-brca-v1.yaml @@ -1,4 +1,6 @@ # Configuration of a breast cancer tumor detection model. +# The specification version. Only 1.0 is supported at this time. +version: "1.0" # The models are referenced by the pair of [architecture, weights], so this pair must # be unique. architecture: inceptionv4 # Must be a string. diff --git a/wsinfer/modeldefs/inceptionv4nobn_tcga-tils-v1.yaml b/wsinfer/modeldefs/inceptionv4nobn_tcga-tils-v1.yaml index 41506d6..2991dfa 100644 --- a/wsinfer/modeldefs/inceptionv4nobn_tcga-tils-v1.yaml +++ b/wsinfer/modeldefs/inceptionv4nobn_tcga-tils-v1.yaml @@ -1,4 +1,6 @@ # Configuration of a tumor infiltrating lymphocyte detection model. +# The specification version. Only 1.0 is supported at this time. +version: "1.0" # The models are referenced by the pair of [architecture, weights], so this pair must # be unique. # Inceptionv4 without batch normalization. diff --git a/wsinfer/modeldefs/preactresnet34_tcga-paad-v1.yaml b/wsinfer/modeldefs/preactresnet34_tcga-paad-v1.yaml index a6c72be..dc2229e 100644 --- a/wsinfer/modeldefs/preactresnet34_tcga-paad-v1.yaml +++ b/wsinfer/modeldefs/preactresnet34_tcga-paad-v1.yaml @@ -1,4 +1,6 @@ # Configuration of a pancreatic adenocarcinoma tumor detection model. +# The specification version. Only 1.0 is supported at this time. +version: "1.0" # The models are referenced by the pair of [architecture, weights], so this pair must # be unique. architecture: preactresnet34 # Must be a string. diff --git a/wsinfer/modeldefs/resnet34_tcga-brca-v1.yaml b/wsinfer/modeldefs/resnet34_tcga-brca-v1.yaml index 9210ce4..096494d 100644 --- a/wsinfer/modeldefs/resnet34_tcga-brca-v1.yaml +++ b/wsinfer/modeldefs/resnet34_tcga-brca-v1.yaml @@ -1,4 +1,6 @@ # Configuration of a breast cancer tumor detection model. +# The specification version. Only 1.0 is supported at this time. +version: "1.0" # The models are referenced by the pair of [architecture, weights], so this pair must # be unique. architecture: resnet34 # Must be a string. diff --git a/wsinfer/modeldefs/resnet34_tcga-luad-v1.yaml b/wsinfer/modeldefs/resnet34_tcga-luad-v1.yaml index 9a32315..bbc5b8d 100644 --- a/wsinfer/modeldefs/resnet34_tcga-luad-v1.yaml +++ b/wsinfer/modeldefs/resnet34_tcga-luad-v1.yaml @@ -1,4 +1,6 @@ # Configuration of a lung adenocarcinoma tumor detection model. +# The specification version. Only 1.0 is supported at this time. +version: "1.0" # The models are referenced by the pair of [architecture, weights], so this pair must # be unique. architecture: resnet34 # Must be a string. diff --git a/wsinfer/modeldefs/resnet34_tcga-prad-v1.yaml b/wsinfer/modeldefs/resnet34_tcga-prad-v1.yaml index eaba521..65ab836 100644 --- a/wsinfer/modeldefs/resnet34_tcga-prad-v1.yaml +++ b/wsinfer/modeldefs/resnet34_tcga-prad-v1.yaml @@ -1,4 +1,6 @@ # Configuration of a prostate adenocarcinoma tumor detection model. +# The specification version. Only 1.0 is supported at this time. +version: "1.0" # The models are referenced by the pair of [architecture, weights], so this pair must # be unique. architecture: resnet34 # Must be a string. diff --git a/wsinfer/modeldefs/vgg16mod_tcga-BRCA-v1.yaml b/wsinfer/modeldefs/vgg16mod_tcga-BRCA-v1.yaml index 824a8d9..43204d6 100644 --- a/wsinfer/modeldefs/vgg16mod_tcga-BRCA-v1.yaml +++ b/wsinfer/modeldefs/vgg16mod_tcga-BRCA-v1.yaml @@ -1,4 +1,6 @@ # Configuration of a tumor infiltrating lymphocyte detection model. +# The specification version. Only 1.0 is supported at this time. +version: "1.0" # The models are referenced by the pair of [architecture, weights], so this pair must # be unique. # Inceptionv4 without batch normalization. diff --git a/wsinfer/modellib/models.py b/wsinfer/modellib/models.py index e96b98a..0931e9f 100644 --- a/wsinfer/modellib/models.py +++ b/wsinfer/modellib/models.py @@ -10,7 +10,6 @@ from typing import Tuple from typing import Union -from PIL import Image import timm import torch from torch.hub import load_state_dict_from_url @@ -91,6 +90,7 @@ def from_yaml(cls, path): # Validate contents. # Validate keys. required_keys = [ + "version", "name", "architecture", "num_classes", @@ -113,6 +113,10 @@ def from_yaml(cls, path): f"required key not found in 'transform' section: '{req_key}'" ) + # We include a 'version' key so we can handle updates if needed in the future. + # At this point, we only support version 1.0. + if d["version"] != "1.0": + raise ValueError("config file must include version: '1.0'.") # Either 'url' or 'file' is required. If 'url' is used, then 'url_file_name' is # required. if "url" not in d.keys() and "file" not in d.keys():