diff --git a/notebooks/resources/Landcover_training_data_density_PhI.png b/notebooks/resources/Landcover_training_data_density_PhI.png new file mode 100755 index 00000000..d6e3fc0a Binary files /dev/null and b/notebooks/resources/Landcover_training_data_density_PhI.png differ diff --git a/notebooks/utils.py b/notebooks/utils.py index c0013084..d72186cc 100644 --- a/notebooks/utils.py +++ b/notebooks/utils.py @@ -118,6 +118,14 @@ def get_processing_period(self): return TemporalContext(start, end) +def get_input(label): + while True: + modelname = input(f"Enter a short name for your {label} (don't use spaces): ") + if " " not in modelname: + return modelname + print("Invalid input. Please enter a name without spaces.") + + LANDCOVER_LUT = { 10: "Unspecified cropland", 11: "Temporary crops", diff --git a/notebooks/worldcereal_v1_demo_custom_cropland.ipynb b/notebooks/worldcereal_v1_demo_custom_cropland.ipynb index 6fa4aabf..34353dfb 100644 --- a/notebooks/worldcereal_v1_demo_custom_cropland.ipynb +++ b/notebooks/worldcereal_v1_demo_custom_cropland.ipynb @@ -11,90 +11,61 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Table of contents** \n", - "- [Before you start](#toc1_) \n", - "- [Define a region of interest](#toc2_) \n", - "- [Extract public training data](#toc3_) \n", - "- [Define target classes](#toc4_) \n", - "- [Extract required model inputs](#toc5_) \n", - "- [Train custom classification model](#toc6_) \n", - "- [Deploy custom model](#toc7_) \n", - "- [Generate a map](#toc8_) \n", + "### Introduction\n", "\n", - "\n", - "" + "This notebook guides you through the process of training a custom cropland classification model using publicly available and harmonized in-situ reference data for your area of interest. Afterwards, the model can be applied to your area and season of interest to generate a cropland extent map.\n", + "\n", + "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run. On average, one such run consumes 35 credits on the Copernicus Data Space Ecosystem." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# [Before you start](#toc0_)\n", - "\n", - "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + "### Content\n", + " \n", + "- [Before you start](###-Before-you-start)\n", + "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", + "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", + "- [3. Create your custom cropland class](#3.-Create-your-custom-cropland-class)\n", + "- [4. Prepare training features](#4.-Prepare-training-features)\n", + "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", + "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", + "- [7. Generate a map](#7.-Generate-a-map)\n" ] }, { - "cell_type": "code", - "execution_count": 1, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# TEMPORARY CELL\n", + "### Before you start\n", "\n", - "import sys\n", - "sys.path.append('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/notebooks')\n", - "%load_ext autoreload\n", - "%autoreload 2" + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# [Define a region of interest](#toc0_)\n", + "### 1. Define your region of interest\n", "\n", "When running the code snippet below, an interactive map will be visualized.\n", "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", - "The widget will automatically store the coordinates of the last rectangle you drew on the map.\n" + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported to avoid excessive credit usage (roughly 120 credits will be consumed for this size of a processing extent).\n", + "\n", + "The widget will automatically store the coordinates of the last rectangle you drew on the map." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ca751d434b1940a29d0f50f8eae00b0b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Map(center=[51.1872, 5.1154], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoo…" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-12 16:57:20.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mhandle_draw\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mYour processing extent: (37.554018, -3.071952, 37.680361, -2.973213)\u001b[0m\n", - "\u001b[32m2024-10-12 16:57:20.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mhandle_draw\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mArea of processing extent: 153.72 km²\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from worldcereal.utils.map import ui_map\n", "\n", @@ -106,44 +77,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Extract public training data](#toc0_)\n", + "### 2. Extract public reference data\n", "\n", "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", - "To increase the number of hits, we expand the search area by 250 km in all directions." + "To increase the number of hits, we expand the search area by 250 km in all directions.\n", + "\n", + "We print the number of training samples retrieved per year." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-12 16:57:28.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mget_processing_extent\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYour processing extent: (37.554018, -3.071952, 37.680361, -2.973213)\u001b[0m\n", - "\u001b[32m2024-10-12 16:57:28.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.refdata\u001b[0m:\u001b[36mquery_public_extractions\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mApplying a buffer of 250 km to the selected area ...\u001b[0m\n", - "\u001b[32m2024-10-12 16:57:28.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.refdata\u001b[0m:\u001b[36mquery_public_extractions\u001b[0m:\u001b[36m81\u001b[0m - \u001b[1mQuerying WorldCereal global extractions database (this can take a while) ...\u001b[0m\n", - "\u001b[32m2024-10-12 16:57:43.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.refdata\u001b[0m:\u001b[36mprocess_parquet\u001b[0m:\u001b[36m127\u001b[0m - \u001b[1mProcessing selected samples ...\u001b[0m\n", - "\u001b[32m2024-10-12 16:57:44.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.refdata\u001b[0m:\u001b[36mprocess_parquet\u001b[0m:\u001b[36m130\u001b[0m - \u001b[1mExtracted and processed 10459 samples from global database.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "year\n", - "2019 3318\n", - "2021 3297\n", - "2020 3187\n", - "2018 657\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from worldcereal.utils.refdata import query_public_extractions\n", "\n", @@ -159,31 +105,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Define target classes](#toc0_)\n", + "### 3. Create your custom cropland class\n", "\n", "Run the next cell and select all land cover classes you would like to include in your \"cropland\" class. All classes that are not selected will be grouped under the \"other\" category. " ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d4e0ade3de024bf5b6733abbbfd8ae9c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Checkbox(value=False, description='No temporary crops (6895 samples)'), Checkbox(value=False, d…" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# from utils import pick_croptypes\n", "from utils import select_landcover\n", @@ -202,23 +133,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "downstream_class\n", - "other 8086\n", - "cropland 2373\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from utils import get_custom_cropland_labels\n", "\n", @@ -230,36 +147,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Extract required model inputs](#toc0_)\n", + "### 4. Prepare training features\n", "\n", - "Here we prepare presto inputs features for each sample by using a model pretrained on WorldCereal data. The resulting `encodings` and `targets` will be used for model training." + "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-12 16:59:55.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mprepare_training_dataframe\u001b[0m:\u001b[36m314\u001b[0m - \u001b[1mPresto URL: https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal/models/PhaseII/presto-ss-wc-ft-ct_cropland_CROPLAND2_30D_random_time-token=none_balance=True_augment=True.pt\u001b[0m\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-12 16:59:55.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.train.data\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m35\u001b[0m - \u001b[1mAugmentation is enabled. The horizontal jittering of the selected window will be performed.\u001b[0m\n", - "\u001b[32m2024-10-12 16:59:55.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.train.data\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mOriginal dataset size: 10459\u001b[0m\n", - "\u001b[32m2024-10-12 16:59:55.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.train.data\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mDataset size after 1 repeats: 10459\u001b[0m\n", - "\u001b[32m2024-10-12 16:59:55.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mprepare_training_dataframe\u001b[0m:\u001b[36m330\u001b[0m - \u001b[1mComputing Presto embeddings ...\u001b[0m\n", - "100%|██████████| 41/41 [00:35<00:00, 1.16it/s]\n", - "\u001b[32m2024-10-12 17:00:30.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mprepare_training_dataframe\u001b[0m:\u001b[36m338\u001b[0m - \u001b[1mDone.\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from utils import prepare_training_dataframe\n", "\n", @@ -270,72 +167,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Train custom classification model](#toc0_)\n", - "We train a catboost model for the selected crop types. Class weights are automatically determined to balance the individual classes." + "### 5. Train custom classification model\n", + "We train a catboost model for the selected land cover classes. Class weights are automatically determined to balance the individual classes." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-12 17:02:06.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mtrain_classifier\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1mSplit train/test ...\u001b[0m\n", - "\u001b[32m2024-10-12 17:02:06.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mtrain_classifier\u001b[0m:\u001b[36m401\u001b[0m - \u001b[1mComputing class weights ...\u001b[0m\n", - "\u001b[32m2024-10-12 17:02:06.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mtrain_classifier\u001b[0m:\u001b[36m414\u001b[0m - \u001b[1mClass weights: {'cropland': 2.204, 'other': 0.647}\u001b[0m\n", - "\u001b[32m2024-10-12 17:02:06.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mtrain_classifier\u001b[0m:\u001b[36m450\u001b[0m - \u001b[1mTraining CatBoost classifier ...\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Learning rate set to 0.021674\n", - "0:\tlearn: 0.8889410\ttest: 0.8789516\tbest: 0.8789516 (0)\ttotal: 84.3ms\tremaining: 11m 14s\n", - "25:\tlearn: 0.9214067\ttest: 0.9120715\tbest: 0.9120715 (25)\ttotal: 458ms\tremaining: 2m 20s\n", - "50:\tlearn: 0.9330262\ttest: 0.9148994\tbest: 0.9151916 (44)\ttotal: 827ms\tremaining: 2m 8s\n", - "75:\tlearn: 0.9415866\ttest: 0.9184952\tbest: 0.9184952 (74)\ttotal: 1.22s\tremaining: 2m 7s\n", - "100:\tlearn: 0.9480570\ttest: 0.9198608\tbest: 0.9211268 (84)\ttotal: 1.62s\tremaining: 2m 6s\n", - "125:\tlearn: 0.9536091\ttest: 0.9199263\tbest: 0.9211268 (84)\ttotal: 1.99s\tremaining: 2m 4s\n", - "Stopped by overfitting detector (50 iterations wait)\n", - "\n", - "bestTest = 0.9211267736\n", - "bestIteration = 84\n", - "\n", - "Shrink model to first 85 iterations.\n" - ] - } - ], + "outputs": [], "source": [ "from utils import train_cropland_classifier\n", "\n", "custom_model, report, confusion_matrix = train_cropland_classifier(training_dataframe)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training, the available training data has been automatically split into a calibration and validation part. By executing the next cell, you get an idea of how well the model performs on the independent validation set." + ] + }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " cropland 0.78 0.92 0.84 475\n", - " other 0.97 0.92 0.95 1617\n", - "\n", - " accuracy 0.92 2092\n", - " macro avg 0.88 0.92 0.90 2092\n", - "weighted avg 0.93 0.92 0.92 2092\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# Print the classification report\n", "print(report)" @@ -345,50 +203,31 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Deploy custom model](#toc0_)\n", + "### 6. Deploy your custom model\n", "\n", - "Once trained, we have to upload our model to the cloud so it can be used for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n" + "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-12 17:02:54.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.upload\u001b[0m:\u001b[36mdeploy_model\u001b[0m:\u001b[36m205\u001b[0m - \u001b[1mDeploying model ...\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Authenticated using refresh token.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-12 17:02:56.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.upload\u001b[0m:\u001b[36mdeploy_model\u001b[0m:\u001b[36m211\u001b[0m - \u001b[1mDeployed to: s3://OpenEO-artifacts/fd307620ba8a0a07c44a2dc28541b181d5c03cb4/2024/10/12/demo_cropland_TZA_custommodel.onnx\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from worldcereal.utils.upload import deploy_model\n", "from openeo_gfmap.backend import cdse_connection\n", + "from utils import get_input\n", "\n", - "model_url = deploy_model(cdse_connection(), custom_model, pattern=\"demo_cropland_TZA\")" + "modelname = get_input(\"model\")\n", + "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# [Generate a map](#toc0_)\n", + "### 7. Generate a map\n", "\n", "Using our custom model, we generate a map for our region and season of interest.\n", "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." @@ -396,27 +235,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-12 17:03:53.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mget_processing_extent\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYour processing extent: (37.554018, -3.071952, 37.680361, -2.973213)\u001b[0m\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from utils import retrieve_worldcereal_seasons\n", "\n", @@ -434,24 +255,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "daa6275553764bddba71eb528a3c9c41", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(HTML(value=\"\\n
\\n
\n", + "The next cell will submit a map inference job on CDSE through OpenEO.
\n", + "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", + "Then sit back and wait untill your map is ready..." + ] + }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Authenticated using refresh token.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-10-08 12:50:26,563 - openeo_gfmap.utils - INFO - Selected orbit state: DESCENDING. Reason: Orbit has more cumulative intersected area. 15.678082454846425 > 13.936101536993151\n", - "WARNING:PrestoFeatureExtractor:No additional dependencies are defined. If you wish to add dependencies to your feature extractor, override the `dependencies` method in your class.\n", - "WARNING:PrestoFeatureExtractor:No additional dependencies are defined. If you wish to add dependencies to your feature extractor, override the `dependencies` method in your class.\n" - ] - }, - { - "ename": "InvalidProtobuf", - "evalue": "[ONNXRuntimeError] : 7 : INVALID_PROTOBUF : Failed to load model because protobuf parsing failed.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mInvalidProtobuf\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[10], line 17\u001b[0m\n\u001b[1;32m 14\u001b[0m parameters\u001b[38;5;241m.\u001b[39mclassifier_parameters\u001b[38;5;241m.\u001b[39mclassifier_url \u001b[38;5;241m=\u001b[39m model_url\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# Launch the job\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m job_results \u001b[38;5;241m=\u001b[39m \u001b[43mgenerate_map\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43mspatial_extent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[43mtemporal_extent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./cropmap_newpresto.tif\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mproduct_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mWorldCerealProduct\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCROPTYPE\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mcroptype_parameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[43mpostprocess_parameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPostprocessParameters\u001b[49m\u001b[43m(\u001b[49m\u001b[43menable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[43mjob_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpython-memory\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m4g\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mout_format\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGTiff\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/worldcereal/worldcereal-classification/src/worldcereal/job.py:151\u001b[0m, in \u001b[0;36mgenerate_map\u001b[0;34m(spatial_extent, temporal_extent, output_path, product_type, cropland_parameters, croptype_parameters, postprocess_parameters, out_format, backend_context, tile_size, job_options)\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[38;5;66;03m# First compute cropland map\u001b[39;00m\n\u001b[1;32m 139\u001b[0m cropland_mask \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 140\u001b[0m _cropland_map(\n\u001b[1;32m 141\u001b[0m inputs,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 148\u001b[0m ) \u001b[38;5;66;03m# Temporary fix to make this work as mask\u001b[39;00m\n\u001b[1;32m 149\u001b[0m )\n\u001b[0;32m--> 151\u001b[0m classes \u001b[38;5;241m=\u001b[39m \u001b[43m_croptype_map\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 152\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 153\u001b[0m \u001b[43m \u001b[49m\u001b[43mcroptype_parameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcroptype_parameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[43m \u001b[49m\u001b[43mcropland_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcropland_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 155\u001b[0m \u001b[43m \u001b[49m\u001b[43mpostprocess_parameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpostprocess_parameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;66;03m# Submit the job\u001b[39;00m\n\u001b[1;32m 159\u001b[0m JOB_OPTIONS \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 160\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdriver-memory\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m4g\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 161\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexecutor-memory\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1g\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mudf-dependency-archives\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mONNX_DEPS_URL\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m#onnx_deps\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 166\u001b[0m }\n", - "File \u001b[0;32m~/git/worldcereal/worldcereal-classification/src/worldcereal/openeo/mapping.py:131\u001b[0m, in \u001b[0;36m_croptype_map\u001b[0;34m(inputs, croptype_parameters, postprocess_parameters, cropland_mask)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;66;03m# Run model inference on features\u001b[39;00m\n\u001b[1;32m 127\u001b[0m parameters \u001b[38;5;241m=\u001b[39m croptype_parameters\u001b[38;5;241m.\u001b[39mclassifier_parameters\u001b[38;5;241m.\u001b[39mmodel_dump(\n\u001b[1;32m 128\u001b[0m exclude\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclassifier\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 129\u001b[0m )\n\u001b[0;32m--> 131\u001b[0m lookup_table \u001b[38;5;241m=\u001b[39m \u001b[43mload_model_lut\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mcroptype_parameters\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclassifier_parameters\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclassifier_url\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 134\u001b[0m parameters\u001b[38;5;241m.\u001b[39mupdate({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlookup_table\u001b[39m\u001b[38;5;124m\"\u001b[39m: lookup_table})\n\u001b[1;32m 136\u001b[0m classes \u001b[38;5;241m=\u001b[39m apply_model_inference(\n\u001b[1;32m 137\u001b[0m model_inference_class\u001b[38;5;241m=\u001b[39mcroptype_parameters\u001b[38;5;241m.\u001b[39mclassifier,\n\u001b[1;32m 138\u001b[0m cube\u001b[38;5;241m=\u001b[39mfeatures,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 148\u001b[0m ],\n\u001b[1;32m 149\u001b[0m )\n", - "File \u001b[0;32m~/git/worldcereal/worldcereal-classification/src/worldcereal/utils/models.py:83\u001b[0m, in \u001b[0;36mload_model_lut\u001b[0;34m(model_url)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model_lut\u001b[39m(model_url: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mdict\u001b[39m:\n\u001b[1;32m 71\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Load the class names to labels mapping from a CatBoost model.\u001b[39;00m\n\u001b[1;32m 72\u001b[0m \n\u001b[1;32m 73\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[38;5;124;03m Look-up table with class names and labels.\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 83\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mvalidate_cb_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_url\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_url\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 84\u001b[0m metadata \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mget_modelmeta()\u001b[38;5;241m.\u001b[39mcustom_metadata_map\n\u001b[1;32m 85\u001b[0m class_params \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mloads(metadata[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclass_params\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n", - "File \u001b[0;32m~/git/worldcereal/worldcereal-classification/src/worldcereal/utils/models.py:51\u001b[0m, in \u001b[0;36mvalidate_cb_model\u001b[0;34m(model_url)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalidate_cb_model\u001b[39m(model_url: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ort\u001b[38;5;241m.\u001b[39mInferenceSession:\n\u001b[1;32m 33\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Validate a catboost model by loading it and checking if the required\u001b[39;00m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;124;03m metadata is present. Checks for the `class_names` and `class_to_labels`\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124;03m fields are present in the `class_params` field of the custom metadata of\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03m ONNX model loaded with ONNX runtime.\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 51\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mload_model_onnx\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_url\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_url\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 53\u001b[0m metadata \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mget_modelmeta()\u001b[38;5;241m.\u001b[39mcustom_metadata_map\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclass_params\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m metadata:\n", - "File \u001b[0;32m~/git/worldcereal/worldcereal-classification/src/worldcereal/utils/models.py:29\u001b[0m, in \u001b[0;36mload_model_onnx\u001b[0;34m(model_url)\u001b[0m\n\u001b[1;32m 26\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(model_url, timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m120\u001b[39m)\n\u001b[1;32m 27\u001b[0m model \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mcontent\n\u001b[0;32m---> 29\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mort\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceSession\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/worldcereal/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py:419\u001b[0m, in \u001b[0;36mInferenceSession.__init__\u001b[0;34m(self, path_or_bytes, sess_options, providers, provider_options, **kwargs)\u001b[0m\n\u001b[1;32m 416\u001b[0m disabled_optimizers \u001b[38;5;241m=\u001b[39m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisabled_optimizers\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisabled_optimizers\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 419\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_create_inference_session\u001b[49m\u001b[43m(\u001b[49m\u001b[43mproviders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprovider_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdisabled_optimizers\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mRuntimeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 421\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_enable_fallback:\n", - "File \u001b[0;32m~/miniconda3/envs/worldcereal/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py:454\u001b[0m, in \u001b[0;36mInferenceSession._create_inference_session\u001b[0;34m(self, providers, provider_options, disabled_optimizers)\u001b[0m\n\u001b[1;32m 452\u001b[0m sess \u001b[38;5;241m=\u001b[39m C\u001b[38;5;241m.\u001b[39mInferenceSession(session_options, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_model_path, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_read_config_from_model)\n\u001b[1;32m 453\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 454\u001b[0m sess \u001b[38;5;241m=\u001b[39m \u001b[43mC\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceSession\u001b[49m\u001b[43m(\u001b[49m\u001b[43msession_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model_bytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_config_from_model\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m disabled_optimizers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 457\u001b[0m disabled_optimizers \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", - "\u001b[0;31mInvalidProtobuf\u001b[0m: [ONNXRuntimeError] : 7 : INVALID_PROTOBUF : Failed to load model because protobuf parsing failed." - ] - } - ], + "outputs": [], "source": [ - "from worldcereal.job import generate_map, CropLandParameters\n", + "from worldcereal.job import generate_map, CropLandParameters, PostprocessParameters\n", "\n", "# Initializes default parameters\n", "parameters = CropLandParameters()\n", @@ -565,7 +321,7 @@ " processing_period,\n", " output_dir=output_dir,\n", " cropland_parameters=parameters,\n", - " postprocess_parameters=postprocess_parameters,\n", + " postprocess_parameters=PostprocessParameters,\n", ")" ] }, @@ -573,14 +329,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The classification results will be automatically downloaded to your output_dir in .tif format.\n", - "By default, OpenEO stores the class labels, confidence score and class probabilities in one file.\n", + "The classification results will be automatically downloaded to your *output_dir* in .tif format.\n", "\n", - "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", - "- \"xxx_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", - "- \"xxx_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]\n", + "The result will be a raster file containing two bands:\n", + "1. The label of the winning class\n", + "2. The probability of the winning class [0 - 100]\n", "\n", - "In case you chose to store the original per-class probabilities, these are NOT written to a separate file and need to be consulted in the original result downloaded from OpenEO." + "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", + "- \"cropland_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", + "- \"cropland_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]" ] }, { @@ -599,36 +356,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The resulting raster files can be visualized in QGIS, or using the function in the cell below..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import visualize_products\n", - "\n", - "visualize_products(rasters, port=8887)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, use the cell below to visualize the look-up table for the classification product." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import show_color_legend\n", - "\n", - "show_color_legend(rasters, \"cropland\")" + "The resulting raster files can be visualized in QGIS." ] } ], diff --git a/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb b/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb new file mode 100644 index 00000000..eaf6490c --- /dev/null +++ b/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb @@ -0,0 +1,433 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](./resources/System_v1_custom_cropland.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook guides you through the process of training a custom cropland classification model using publicly available and harmonized in-situ reference data for your area of interest. Afterwards, the model can be applied to your area and season of interest to generate a cropland extent map.\n", + "\n", + "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run. On average, one such run consumes 35 credits on the Copernicus Data Space Ecosystem." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Content\n", + " \n", + "- [Before you start](###-Before-you-start)\n", + "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", + "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", + "- [3. Create your custom cropland class](#3.-Create-your-custom-cropland-class)\n", + "- [4. Prepare training features](#4.-Prepare-training-features)\n", + "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", + "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", + "- [7. Generate a map](#7.-Generate-a-map)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Define your region of interest\n", + "\n", + "When running the code snippet below, an interactive map will be visualized.\n", + "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", + "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported to avoid excessive credit usage (roughly 120 credits will be consumed for this size of a processing extent).\n", + "\n", + "The widget will automatically store the coordinates of the last rectangle you drew on the map." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.map import ui_map\n", + "\n", + "map = ui_map()\n", + "map.show_map()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Extract public reference data\n", + "\n", + "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", + "To increase the number of hits, we expand the search area by 250 km in all directions.\n", + "\n", + "We print the number of training samples retrieved per year." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.refdata import query_public_extractions\n", + "\n", + "# retrieve the polygon you just drew\n", + "polygon = map.get_polygon_latlon()\n", + "\n", + "# Query our public database of training data\n", + "public_df = query_public_extractions(polygon, filter_cropland=False)\n", + "public_df.year.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Create your custom cropland class\n", + "\n", + "Run the next cell and select all land cover classes you would like to include in your \"cropland\" class. All classes that are not selected will be grouped under the \"other\" category. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from utils import pick_croptypes\n", + "from utils import select_landcover\n", + "from IPython.display import display\n", + "\n", + "checkbox, checkbox_widgets = select_landcover(public_df)\n", + "display(checkbox)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Based on your selection, a custom target label is now generated for each sample. Verify that only land cover classes of your choice are appearing in the `downstream_class`, all others will fall under `other`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import get_custom_cropland_labels\n", + "\n", + "public_df = get_custom_cropland_labels(public_df, checkbox_widgets)\n", + "public_df[\"downstream_class\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Prepare training features\n", + "\n", + "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_training_dataframe\n", + "\n", + "training_dataframe = prepare_training_dataframe(public_df, task_type=\"cropland\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Train custom classification model\n", + "We train a catboost model for the selected land cover classes. Class weights are automatically determined to balance the individual classes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import train_cropland_classifier\n", + "\n", + "custom_model, report, confusion_matrix = train_cropland_classifier(training_dataframe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training, the available training data has been automatically split into a calibration and validation part. By executing the next cell, you get an idea of how well the model performs on the independent validation set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print the classification report\n", + "print(report)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6. Deploy your custom model\n", + "\n", + "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.upload import deploy_model\n", + "from openeo_gfmap.backend import cdse_connection\n", + "from utils import get_input\n", + "\n", + "modelname = get_input(\"model\")\n", + "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7. Generate a map\n", + "\n", + "Using our custom model, we generate a map for our region and season of interest.\n", + "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import retrieve_worldcereal_seasons\n", + "\n", + "spatial_extent = map.get_processing_extent()\n", + "seasons = retrieve_worldcereal_seasons(spatial_extent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", + "Just make sure your season of interest is fully captured within the period you select." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import date_slider\n", + "\n", + "slider = date_slider()\n", + "slider.show_slider()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set some other customization options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import PostprocessParameters\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "# Choose whether or not you want to spatially clean the classification results\n", + "postprocess_result = True\n", + "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"]\n", + "# (\"smooth_probabilities will do limited spatial cleaning,\n", + "# while \"majority_vote\" will do more aggressive spatial cleaning, depending on the value of kernel_size)\n", + "postprocess_method = \"majority_vote\"\n", + "# Additional parameter for the majority vote method \n", + "# (the higher the value, the more aggressive the spatial cleaning,\n", + "# should be an odd number, not larger than 25, default = 5)\n", + "kernel_size = 5\n", + "# Do you want to save the intermediate results (before applying the postprocessing)\n", + "save_intermediate = True\n", + "# Do you want to save all class probabilities in the final product?\n", + "keep_class_probs = True\n", + "\n", + "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", + " method=postprocess_method,\n", + " kernel_size=kernel_size,\n", + " save_intermediate=save_intermediate,\n", + " keep_class_probs=keep_class_probs)\n", + "\n", + "# Specify the local directory where the resulting maps should be downloaded to.\n", + "run = get_input(\"model run\")\n", + "output_dir = Path(os.getcwd()) / f'CROPLAND_{modelname}_{run}'\n", + "print(f\"Output directory: {output_dir}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now have all information we need to generate our map!
\n", + "The next cell will submit a map inference job on CDSE through OpenEO.
\n", + "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", + "Then sit back and wait untill your map is ready..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import generate_map, CropLandParameters\n", + "\n", + "# Initializes default parameters\n", + "parameters = CropLandParameters()\n", + "\n", + "# Change the URL to your custom classification model\n", + "parameters.classifier_parameters.classifier_url = model_url\n", + "\n", + "# Get processing period and area\n", + "processing_period = slider.get_processing_period()\n", + "processing_extent = map.get_processing_extent()\n", + "\n", + "# Launch the job\n", + "job_results = generate_map(\n", + " processing_extent,\n", + " processing_period,\n", + " output_dir=output_dir,\n", + " cropland_parameters=parameters,\n", + " postprocess_parameters=postprocess_parameters,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The classification results will be automatically downloaded to your *output_dir* in .tif format.\n", + "\n", + "For a model with two classes, you get a raster file containing a maximum of four bands:\n", + "1. The label of the winning class\n", + "2. The probability of the winning class [0 - 100]\n", + "3. and beyond (optional,depending on settings): Class probabilities of each class, ordered according to the look-up table. The look-up table for each product can be consulted in the 'results' object as produced by the 'generate_map' function.\n", + "\n", + "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", + "- \"xxx_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", + "- \"xxx_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]\n", + "\n", + "In case you chose to store the original per-class probabilities, these are NOT written to a separate file and need to be consulted in the original result downloaded from OpenEO." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_visualization\n", + "\n", + "rasters = prepare_visualization(job_results)\n", + "print(rasters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting raster files can be visualized in QGIS.\n", + "\n", + "In case you are running this script on your local environment, you can alternatively use the following cells to visualize the outputs directly in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import visualize_products\n", + "\n", + "visualize_products(rasters, port=8887)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import show_color_legend\n", + "\n", + "show_color_legend(rasters, \"cropland\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "worldcereal", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/worldcereal_v1_demo_custom_croptype.ipynb b/notebooks/worldcereal_v1_demo_custom_croptype.ipynb index 5aa4730b..411f8f4c 100644 --- a/notebooks/worldcereal_v1_demo_custom_croptype.ipynb +++ b/notebooks/worldcereal_v1_demo_custom_croptype.ipynb @@ -11,99 +11,61 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Table of contents** \n", - "- [Before you start](#toc1_) \n", - "- [Define a region of interest](#toc2_) \n", - "- [Extract public training data](#toc3_) \n", - "- [Select desired crops for prediction](#toc4_) \n", - "- [Extract required model inputs](#toc5_) \n", - "- [Train custom classification model](#toc6_) \n", - "- [Deploy custom model](#toc7_) \n", - "- [Generate a map](#toc8_) \n", + "### Introduction\n", "\n", - "\n", - "" + "This notebook guides you through the process of training a custom crop type classification model using publicly available and harmonized in-situ reference data for your area and crop types of interest. Afterwards, the model can be applied to your season of interest to generate a crop type map.\n", + "\n", + "Please note that for the purpose of this demo, the processing area is limited to 250 km² per model run. On average, one such run consumes xxx credits on the Copernicus Data Space Ecosystem." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# [Before you start](#toc0_)\n", - "\n", - "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + "### Content\n", + " \n", + "- [Before you start](###-Before-you-start)\n", + "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", + "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", + "- [3. Select your desired crop types](#3.-Select-your-desired-crop-types)\n", + "- [4. Prepare training features](#4.-Prepare-training-features)\n", + "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", + "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", + "- [7. Generate a map](#7.-Generate-a-map)\n" ] }, { - "cell_type": "code", - "execution_count": 5, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], "source": [ - "# TEMPORARY CELL\n", + "### Before you start\n", "\n", - "import sys\n", - "sys.path.append('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/notebooks')\n", - "%load_ext autoreload\n", - "%autoreload 2" + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# [Define a region of interest](#toc0_)\n", + "### 1. Define your region of interest\n", "\n", "When running the code snippet below, an interactive map will be visualized.\n", "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported to avoid excessive credit usage (roughly 120 credits will be consumed for this size of a processing extent).\n", + "\n", "The widget will automatically store the coordinates of the last rectangle you drew on the map." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ba074684b1b34f3babdc149124fe94a3", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Map(center=[51.1872, 5.1154], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoo…" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:08:09.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mhandle_draw\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mYour processing extent: (10.349941, 53.119306, 10.408676, 53.145054)\u001b[0m\n", - "\u001b[32m2024-10-14 19:08:09.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mhandle_draw\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mArea of processing extent: 11.71 km²\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from worldcereal.utils.map import ui_map\n", "\n", @@ -115,41 +77,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Extract public training data](#toc0_)\n", + "### 2. Extract public reference data\n", "\n", "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", - "To increase the number of hits, we expand the search area by 250 km in all directions." + "To increase the number of hits, we expand the search area by 250 km in all directions.\n", + "\n", + "We print the number of training samples retrieved per year." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:08:17.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mget_processing_extent\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYour processing extent: (10.349941, 53.119306, 10.408676, 53.145054)\u001b[0m\n", - "\u001b[32m2024-10-14 19:08:17.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.refdata\u001b[0m:\u001b[36mquery_public_extractions\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mApplying a buffer of 250 km to the selected area ...\u001b[0m\n", - "\u001b[32m2024-10-14 19:08:17.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.refdata\u001b[0m:\u001b[36mquery_public_extractions\u001b[0m:\u001b[36m81\u001b[0m - \u001b[1mQuerying WorldCereal global extractions database (this can take a while) ...\u001b[0m\n", - "\u001b[32m2024-10-14 19:08:35.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.refdata\u001b[0m:\u001b[36mprocess_parquet\u001b[0m:\u001b[36m127\u001b[0m - \u001b[1mProcessing selected samples ...\u001b[0m\n", - "\u001b[32m2024-10-14 19:08:35.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.refdata\u001b[0m:\u001b[36mprocess_parquet\u001b[0m:\u001b[36m130\u001b[0m - \u001b[1mExtracted and processed 582 samples from global database.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "year\n", - "2018 582\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from worldcereal.utils.refdata import query_public_extractions\n", "\n", @@ -165,31 +105,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Select desired crops for prediction](#toc0_)\n", + "### 3. Select your desired crop types\n", "\n", - "Run the next cell and select all crop types you wish to include in your model. All the crops that are not selected will be grouped under the \"other_crop\" category. The model will be trained in a multi-class setting." + "Run the next cell and select all crop types you wish to include in your model. All the crops that are not selected will be grouped under the \"other\" category." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b1be511b4fd1451abc657b389f3df5d6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Checkbox(value=False, description='unspecified_wheat (192 samples)'), Checkbox(value=False, des…" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from utils import pick_croptypes\n", "from IPython.display import display\n", @@ -207,24 +132,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "downstream_class\n", - "other 252\n", - "unspecified_wheat 192\n", - "maize 138\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from utils import get_custom_croptype_labels\n", "\n", @@ -236,36 +146,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Extract required model inputs](#toc0_)\n", + "### 4. Prepare training features\n", "\n", - "Here we prepare presto inputs features for each sample by using a model pretrained on WorldCereal data. The resulting `encodings` and `targets` will be used for model training." + "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:09:21.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mprepare_training_dataframe\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mPresto URL: https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal/models/PhaseII/presto-ss-wc-ft-ct_croptype_CROPTYPE0_30D_random_time-token=month_balance=True_augment=True.pt\u001b[0m\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:09:21.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.train.data\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m35\u001b[0m - \u001b[1mAugmentation is enabled. The horizontal jittering of the selected window will be performed.\u001b[0m\n", - "\u001b[32m2024-10-14 19:09:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.train.data\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mOriginal dataset size: 582\u001b[0m\n", - "\u001b[32m2024-10-14 19:09:21.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.train.data\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mDataset size after 1 repeats: 582\u001b[0m\n", - "\u001b[32m2024-10-14 19:09:21.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mprepare_training_dataframe\u001b[0m:\u001b[36m340\u001b[0m - \u001b[1mComputing Presto embeddings ...\u001b[0m\n", - "100%|██████████| 3/3 [00:02<00:00, 1.14it/s]\n", - "\u001b[32m2024-10-14 19:09:24.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mprepare_training_dataframe\u001b[0m:\u001b[36m348\u001b[0m - \u001b[1mDone.\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from utils import prepare_training_dataframe\n", "\n", @@ -276,81 +166,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Train custom classification model](#toc0_)\n", + "### 5. Train custom classification model\n", "We train a catboost model for the selected crop types. Class weights are automatically determined to balance the individual classes." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:09:31.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mtrain_classifier\u001b[0m:\u001b[36m395\u001b[0m - \u001b[1mSplit train/test ...\u001b[0m\n", - "\u001b[32m2024-10-14 19:09:31.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mtrain_classifier\u001b[0m:\u001b[36m414\u001b[0m - \u001b[1mComputing class weights ...\u001b[0m\n", - "\u001b[32m2024-10-14 19:09:31.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mtrain_classifier\u001b[0m:\u001b[36m427\u001b[0m - \u001b[1mClass weights: {'maize': 1.409, 'other': 0.771, 'unspecified_wheat': 1.006}\u001b[0m\n", - "\u001b[32m2024-10-14 19:09:31.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mtrain_classifier\u001b[0m:\u001b[36m467\u001b[0m - \u001b[1mTraining CatBoost classifier ...\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Learning rate set to 0.04805\n", - "0:\tlearn: 1.0586911\ttest: 1.0637480\tbest: 1.0637480 (0)\ttotal: 91.7ms\tremaining: 12m 13s\n", - "25:\tlearn: 0.5650713\ttest: 0.6863680\tbest: 0.6863680 (25)\ttotal: 768ms\tremaining: 3m 55s\n", - "50:\tlearn: 0.3771754\ttest: 0.5571138\tbest: 0.5571138 (50)\ttotal: 1.39s\tremaining: 3m 36s\n", - "75:\tlearn: 0.2801689\ttest: 0.4972990\tbest: 0.4972990 (75)\ttotal: 2.04s\tremaining: 3m 32s\n", - "100:\tlearn: 0.2221873\ttest: 0.4668236\tbest: 0.4668236 (100)\ttotal: 2.64s\tremaining: 3m 26s\n", - "125:\tlearn: 0.1820294\ttest: 0.4457419\tbest: 0.4457419 (125)\ttotal: 3.2s\tremaining: 3m 20s\n", - "150:\tlearn: 0.1503336\ttest: 0.4347274\tbest: 0.4347274 (150)\ttotal: 3.78s\tremaining: 3m 16s\n", - "175:\tlearn: 0.1263781\ttest: 0.4280751\tbest: 0.4280751 (175)\ttotal: 4.35s\tremaining: 3m 13s\n", - "200:\tlearn: 0.1079100\ttest: 0.4259335\tbest: 0.4256174 (197)\ttotal: 4.92s\tremaining: 3m 11s\n", - "225:\tlearn: 0.0934925\ttest: 0.4247271\tbest: 0.4247271 (225)\ttotal: 5.49s\tremaining: 3m 8s\n", - "250:\tlearn: 0.0830328\ttest: 0.4214331\tbest: 0.4214331 (250)\ttotal: 6.07s\tremaining: 3m 7s\n", - "275:\tlearn: 0.0740736\ttest: 0.4203690\tbest: 0.4201436 (274)\ttotal: 6.63s\tremaining: 3m 5s\n", - "300:\tlearn: 0.0669315\ttest: 0.4195972\tbest: 0.4190787 (298)\ttotal: 7.21s\tremaining: 3m 4s\n", - "325:\tlearn: 0.0610666\ttest: 0.4212918\tbest: 0.4190787 (298)\ttotal: 7.8s\tremaining: 3m 3s\n", - "Stopped by overfitting detector (50 iterations wait)\n", - "\n", - "bestTest = 0.419078654\n", - "bestIteration = 298\n", - "\n", - "Shrink model to first 299 iterations.\n" - ] - } - ], + "outputs": [], "source": [ "from utils import train_classifier\n", "\n", "custom_model, report, confusion_matrix = train_classifier(training_dataframe)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training, the available training data has been automatically split into a calibration and validation part. By executing the next cell, you get an idea of how well the model performs on the independent validation set." + ] + }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " maize 0.84 0.93 0.88 28\n", - " other 0.85 0.80 0.83 51\n", - "unspecified_wheat 0.82 0.82 0.82 38\n", - "\n", - " accuracy 0.84 117\n", - " macro avg 0.84 0.85 0.84 117\n", - " weighted avg 0.84 0.84 0.84 117\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# Print the classification report\n", "print(report)" @@ -360,50 +202,30 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# [Deploy custom model](#toc0_)\n", + "### 6. Deploy your custom model\n", "\n", - "Once trained, we have to upload our model to the cloud so it can be used for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n" + "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:10:03.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.upload\u001b[0m:\u001b[36mdeploy_model\u001b[0m:\u001b[36m205\u001b[0m - \u001b[1mDeploying model ...\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Authenticated using refresh token.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:10:07.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.upload\u001b[0m:\u001b[36mdeploy_model\u001b[0m:\u001b[36m211\u001b[0m - \u001b[1mDeployed to: s3://OpenEO-artifacts/fd307620ba8a0a07c44a2dc28541b181d5c03cb4/2024/10/14/demo_croptype_multiclass_DE_custommodel.onnx\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from worldcereal.utils.upload import deploy_model\n", "from openeo_gfmap.backend import cdse_connection\n", + "from utils import get_input\n", "\n", - "model_url = deploy_model(cdse_connection(), custom_model, pattern=\"demo_croptype_multiclass_DE\")" + "modelname = get_input(\"model\")\n", + "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# [Generate a map](#toc0_)\n", + "### 7. Generate a map\n", "\n", "Using our custom model, we generate a map for our region and season of interest.\n", "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." @@ -411,27 +233,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:10:13.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mget_processing_extent\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYour processing extent: (10.349941, 53.119306, 10.408676, 53.145054)\u001b[0m\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from utils import retrieve_worldcereal_seasons\n", "\n", @@ -449,24 +253,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "58fbf1920eb141448a8240dec13e496b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(HTML(value=\"\\n
\\n
\n", + "The next cell will submit a map inference job on CDSE through OpenEO.
\n", + "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", + "Then sit back and wait untill your map is ready..." + ] + }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 20:20:20.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mget_processing_period\u001b[0m:\u001b[36m120\u001b[0m - \u001b[1mSelected processing period: 2018-12-01 to 2019-11-30\u001b[0m\n", - "\u001b[32m2024-10-14 20:20:20.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mget_processing_extent\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYour processing extent: (10.349941, 53.119306, 10.408676, 53.145054)\u001b[0m\n", - "INFO:openeo.rest.connection:Found OIDC providers: ['CDSE']\n", - "INFO:openeo.rest.connection:No OIDC provider given, but only one available: 'CDSE'. Using that one.\n", - "INFO:openeo.rest.connection:Using default client_id 'sh-b1c3a958-52d4-40fe-a333-153595d1c71e' from OIDC provider 'CDSE' info.\n", - "INFO:openeo.rest.connection:Found refresh token: trying refresh token based authentication.\n", - "INFO:openeo.rest.auth.oidc:Doing 'refresh_token' token request 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token' with post data fields ['grant_type', 'client_id', 'refresh_token'] (client_id 'sh-b1c3a958-52d4-40fe-a333-153595d1c71e')\n", - "INFO:openeo.rest.connection:Obtained tokens: ['access_token', 'id_token', 'refresh_token']\n", - "INFO:openeo.rest.auth.config:Storing refresh token for issuer 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE' (client 'sh-b1c3a958-52d4-40fe-a333-153595d1c71e')\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Authenticated using refresh token.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-10-14 20:20:35,372 - openeo_gfmap.utils - INFO - Selected orbit state: ASCENDING. Reason: Orbit has more cumulative intersected area. 0.9639606281260852 > 0.5250584645381887\n", - "INFO:openeo_gfmap.utils:Selected orbit state: ASCENDING. Reason: Orbit has more cumulative intersected area. 0.9639606281260852 > 0.5250584645381887\n", - "WARNING:PrestoFeatureExtractor:No additional dependencies are defined. If you wish to add dependencies to your feature extractor, override the `dependencies` method in your class.\n", - "WARNING:PrestoFeatureExtractor:No additional dependencies are defined. If you wish to add dependencies to your feature extractor, override the `dependencies` method in your class.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0:00:00 Job 'j-2410145b1b3c43b8851ed611261b9ecb': send 'start'\n", - "0:00:20 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:00:25 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:00:32 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:00:40 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:00:50 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:01:03 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:01:19 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:01:39 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:02:03 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:02:33 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:03:11 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:03:58 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:04:57 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:05:58 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:06:58 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:07:58 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:08:59 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:09:59 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:10:59 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:12:00 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:13:00 Job 'j-2410145b1b3c43b8851ed611261b9ecb': running (progress N/A)\n", - "0:14:01 Job 'j-2410145b1b3c43b8851ed611261b9ecb': finished (progress 100%)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:openeo.rest.job:Downloading Job result asset 'cropland-raw_20181201_20191130.tif' from https://openeo.creo.vito.be/openeo/jobs/j-2410145b1b3c43b8851ed611261b9ecb/results/assets/ZGNjYWI2ZDktODQ2Yy00OGE5LTlkOTQtNDk3MTQ2Y2IyMjg1/990ac36cfcb6a2797bcf85bfa5b3ed7c/cropland-raw_20181201_20191130.tif?expires=1729535690 to /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/cropland-raw_20181201_20191130.tif\n", - "INFO:openeo.rest.job:Downloading Job result asset 'cropland_20181201_20191130.tif' from https://openeo.creo.vito.be/openeo/jobs/j-2410145b1b3c43b8851ed611261b9ecb/results/assets/ZGNjYWI2ZDktODQ2Yy00OGE5LTlkOTQtNDk3MTQ2Y2IyMjg1/1199815155d5ed90ed8be9d0eede8b30/cropland_20181201_20191130.tif?expires=1729535690 to /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/cropland_20181201_20191130.tif\n", - "INFO:openeo.rest.job:Downloading Job result asset 'croptype-raw_20181201_20191130.tif' from https://openeo.creo.vito.be/openeo/jobs/j-2410145b1b3c43b8851ed611261b9ecb/results/assets/ZGNjYWI2ZDktODQ2Yy00OGE5LTlkOTQtNDk3MTQ2Y2IyMjg1/6b7a8e0051698e4ecafda95322eeda9a/croptype-raw_20181201_20191130.tif?expires=1729535690 to /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/croptype-raw_20181201_20191130.tif\n", - "INFO:openeo.rest.job:Downloading Job result asset 'croptype_20181201_20191130.tif' from https://openeo.creo.vito.be/openeo/jobs/j-2410145b1b3c43b8851ed611261b9ecb/results/assets/ZGNjYWI2ZDktODQ2Yy00OGE5LTlkOTQtNDk3MTQ2Y2IyMjg1/2662e69753718ad3f2abade4416eb16e/croptype_20181201_20191130.tif?expires=1729535690 to /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/croptype_20181201_20191130.tif\n" - ] - } - ], + "outputs": [], "source": [ - "from worldcereal.job import WorldCerealProductType, generate_map, CropTypeParameters\n", + "from worldcereal.job import PostprocessParameters, WorldCerealProductType, generate_map, CropTypeParameters\n", "\n", "# Initializes default parameters\n", "parameters = CropTypeParameters()\n", "\n", "# Change the URL to your custom classification model\n", "parameters.classifier_parameters.classifier_url = model_url\n", - "parameters.save_mask = save_mask\n", + "parameters.save_mask = True\n", "\n", "# Get processing period and area\n", "processing_period = slider.get_processing_period()\n", @@ -622,7 +321,7 @@ " output_dir=output_dir,\n", " product_type=WorldCerealProductType.CROPTYPE,\n", " croptype_parameters=parameters,\n", - " postprocess_parameters=postprocess_parameters,\n", + " postprocess_parameters=PostprocessParameters,\n", ")" ] }, @@ -630,36 +329,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The classification results will be automatically downloaded to your output_dir in .tif format.\n", - "By default, OpenEO stores the class labels, confidence score and class probabilities in one file.\n", + "The classification results will be automatically downloaded to your *output_dir* in .tif format.
\n", + "You will get two outputs, one containing the cropland mask and one containing the crop type results.
\n", "\n", - "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", - "- \"xxx_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", - "- \"xxx_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]\n", + "The result will be a raster file containing two bands:\n", + "1. The label of the winning class\n", + "2. The probability of the winning class [0 - 100]\n", "\n", - "In case you chose to store the original per-class probabilities, these are NOT written to a separate file and need to be consulted in the original result downloaded from OpenEO." + "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", + "- \"croptype_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", + "- \"croptype_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 20:35:48.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36m_get_colormap\u001b[0m:\u001b[36m540\u001b[0m - \u001b[1mAssigning random color map for product croptype. \u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'cropland-raw': {'classification': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/cropland-raw_classification_20181201_20191130.tif'), 'confidence': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/cropland-raw_confidence_20181201_20191130.tif')}, 'cropland': {'classification': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/cropland_classification_20181201_20191130.tif'), 'confidence': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/cropland_confidence_20181201_20191130.tif')}, 'croptype-raw': {'classification': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/croptype-raw_classification_20181201_20191130.tif'), 'confidence': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/croptype-raw_confidence_20181201_20191130.tif')}, 'croptype': {'classification': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/croptype_classification_20181201_20191130.tif'), 'confidence': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPTYPE_germany/croptype_confidence_20181201_20191130.tif')}}\n" - ] - } - ], + "outputs": [], "source": [ "from utils import prepare_visualization\n", "\n", @@ -671,63 +357,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The resulting raster files can be visualized in QGIS, or using the function in the cell below..." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1382fa85ebf645e8809a579f3c5a1265", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Map(center=[53.1321915, 10.379311000000001], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_…" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from utils import visualize_products\n", - "\n", - "visualize_products(rasters, port=8887)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, use the cell below to visualize the look-up table for the classification product." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from utils import show_color_legend\n", - "\n", - "show_color_legend(rasters, \"croptype\")" + "The resulting raster files can be visualized in QGIS." ] } ], diff --git a/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb b/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb new file mode 100644 index 00000000..1fff3dcf --- /dev/null +++ b/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb @@ -0,0 +1,440 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](./resources/System_v1_custom_croptype.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook guides you through the process of training a custom crop type classification model using publicly available and harmonized in-situ reference data for your area and crop types of interest. Afterwards, the model can be applied to your season of interest to generate a crop type map.\n", + "\n", + "Please note that for the purpose of this demo, the processing area is limited to 250 km² per model run. On average, one such run consumes xxx credits on the Copernicus Data Space Ecosystem." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Content\n", + " \n", + "- [Before you start](###-Before-you-start)\n", + "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", + "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", + "- [3. Select your desired crop types](#3.-Select-your-desired-crop-types)\n", + "- [4. Prepare training features](#4.-Prepare-training-features)\n", + "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", + "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", + "- [7. Generate a map](#7.-Generate-a-map)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Define your region of interest\n", + "\n", + "When running the code snippet below, an interactive map will be visualized.\n", + "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", + "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported to avoid excessive credit usage (roughly 120 credits will be consumed for this size of a processing extent).\n", + "\n", + "The widget will automatically store the coordinates of the last rectangle you drew on the map." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.map import ui_map\n", + "\n", + "map = ui_map()\n", + "map.show_map()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Extract public reference data\n", + "\n", + "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", + "To increase the number of hits, we expand the search area by 250 km in all directions.\n", + "\n", + "We print the number of training samples retrieved per year." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.refdata import query_public_extractions\n", + "\n", + "# retrieve the polygon you just drew\n", + "polygon = map.get_polygon_latlon()\n", + "\n", + "# Query our public database of training data\n", + "public_df = query_public_extractions(polygon)\n", + "public_df.year.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Select your desired crop types\n", + "\n", + "Run the next cell and select all crop types you wish to include in your model. All the crops that are not selected will be grouped under the \"other\" category." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import pick_croptypes\n", + "from IPython.display import display\n", + "\n", + "checkbox, checkbox_widgets = pick_croptypes(public_df, samples_threshold=100)\n", + "display(checkbox)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Based on your selection, a custom target label is now generated for each sample. Verify that only crops of your choice are appearing in the `downstream_class`, all others will fall under `other`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import get_custom_croptype_labels\n", + "\n", + "public_df = get_custom_croptype_labels(public_df, checkbox_widgets)\n", + "public_df[\"downstream_class\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Prepare training features\n", + "\n", + "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_training_dataframe\n", + "\n", + "training_dataframe = prepare_training_dataframe(public_df, task_type=\"croptype\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Train custom classification model\n", + "We train a catboost model for the selected crop types. Class weights are automatically determined to balance the individual classes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import train_classifier\n", + "\n", + "custom_model, report, confusion_matrix = train_classifier(training_dataframe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training, the available training data has been automatically split into a calibration and validation part. By executing the next cell, you get an idea of how well the model performs on the independent validation set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print the classification report\n", + "print(report)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6. Deploy your custom model\n", + "\n", + "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.upload import deploy_model\n", + "from openeo_gfmap.backend import cdse_connection\n", + "from utils import get_input\n", + "\n", + "modelname = get_input(\"model\")\n", + "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7. Generate a map\n", + "\n", + "Using our custom model, we generate a map for our region and season of interest.\n", + "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import retrieve_worldcereal_seasons\n", + "\n", + "spatial_extent = map.get_processing_extent()\n", + "seasons = retrieve_worldcereal_seasons(spatial_extent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", + "Just make sure your season of interest is fully captured within the period you select." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import date_slider\n", + "\n", + "slider = date_slider()\n", + "slider.show_slider()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set some other customization options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import PostprocessParameters\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "# Choose whether you want to store the cropland mask as separate output file\n", + "save_mask = True\n", + "\n", + "# Choose whether or not you want to spatially clean the classification results\n", + "postprocess_result = True\n", + "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"]\n", + "# (\"smooth_probabilities will do limited spatial cleaning,\n", + "# while \"majority_vote\" will do more aggressive spatial cleaning, depending on the value of kernel_size)\n", + "postprocess_method = \"majority_vote\"\n", + "# Additional parameter for the majority vote method \n", + "# (the higher the value, the more aggressive the spatial cleaning,\n", + "# should be an odd number, not larger than 25, default = 5)\n", + "kernel_size = 5\n", + "# Do you want to save the intermediate results (before applying the postprocessing)\n", + "save_intermediate = True\n", + "# Do you want to save all class probabilities in the final product? (default is False)\n", + "keep_class_probs = True\n", + "\n", + "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", + " method=postprocess_method,\n", + " kernel_size=kernel_size,\n", + " save_intermediate=save_intermediate,\n", + " keep_class_probs=keep_class_probs)\n", + "\n", + "# Specify the local directory where the resulting maps should be downloaded to.\n", + "run = get_input(\"model run\")\n", + "output_dir = Path(os.getcwd()) / f'CROPTYPE_{modelname}_{run}'\n", + "print(f\"Output directory: {output_dir}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now have all information we need to generate our map!
\n", + "The next cell will submit a map inference job on CDSE through OpenEO.
\n", + "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", + "Then sit back and wait untill your map is ready..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import WorldCerealProductType, generate_map, CropTypeParameters\n", + "\n", + "# Initializes default parameters\n", + "parameters = CropTypeParameters()\n", + "\n", + "# Change the URL to your custom classification model\n", + "parameters.classifier_parameters.classifier_url = model_url\n", + "parameters.save_mask = save_mask\n", + "\n", + "# Get processing period and area\n", + "processing_period = slider.get_processing_period()\n", + "processing_extent = map.get_processing_extent()\n", + "\n", + "# Launch the job\n", + "job_results = generate_map(\n", + " processing_extent,\n", + " processing_period,\n", + " output_dir=output_dir,\n", + " product_type=WorldCerealProductType.CROPTYPE,\n", + " croptype_parameters=parameters,\n", + " postprocess_parameters=postprocess_parameters,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The classification results will be automatically downloaded to your *output_dir* in .tif format.\n", + "\n", + "For a model with two classes, you get a raster file containing up to four bands:\n", + "1. The label of the winning class\n", + "2. The probability of the winning class [0 - 100]\n", + "3. and beyond (optional, depending on settings): Class probabilities of each class, ordered according to the look-up table. The look-up table for each product can be consulted in the 'results' object as produced by the 'generate_map' function.\n", + "\n", + "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", + "- \"croptype_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", + "- \"croptype_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]\n", + "\n", + "In case you chose to store the original per-class probabilities, these are NOT written to a separate file and need to be consulted in the original result downloaded from OpenEO.\n", + "\n", + "Note that in case you chose to apply post-processing AND save intermediate results, you will also get a \"croptype-raw_xxx.tif\" output, which holds the classification labels and probabilities BEFORE post-processing.\n", + "\n", + "Also note that if you chose to save the cropland mask as a separate output, you will also get a cropland (and potentially cropland-raw) product." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_visualization\n", + "\n", + "rasters = prepare_visualization(job_results)\n", + "print(rasters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting raster files can be visualized in QGIS.\n", + "\n", + "In case you are running this script on your local environment, you can alternatively use the following cells to visualize the outputs directly in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import visualize_products\n", + "\n", + "visualize_products(rasters, port=8887)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import show_color_legend\n", + "\n", + "show_color_legend(rasters, \"croptype\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "worldcereal", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/worldcereal_v1_demo_default_cropland.ipynb b/notebooks/worldcereal_v1_demo_default_cropland.ipynb new file mode 100644 index 00000000..259b9ff0 --- /dev/null +++ b/notebooks/worldcereal_v1_demo_default_cropland.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](./resources/System_v1_cropland.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook contains a short demo on how to use the WorldCereal system to generate a cropland extent map for your area and season of interest.
\n", + "The map is generated using a default model trained by the WorldCereal consortium to distinguish cropland from all other land use.\n", + "\n", + "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run. On average, one such run consumes 35 credits on the Copernicus Data Space Ecosystem.\n", + "\n", + "\n", + "
\n", + "Cropland definition: \n", + "Cropland is defined here as land used for temporary crops, i.e. crops with a less-than-1-year growing cycle which must be newly sown or planted for further production after the harvest. Sugar cane, asparagus, and cassava are also considered temporary crops, even though they remain in the field for more than 1 year. This cropland definition thus excludes perennial crops as well as (temporary) pastures.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Content\n", + " \n", + "- [Before you start](###-Before-you-start)\n", + "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", + "- [2. Define your year of interest](#2.-Define-your-year-of-interest)\n", + "- [3. Set some other options](#3.-Set-some-other-options)\n", + "- [4. Generate your map](#4.-Generate-your-map)\n", + "- [5. Final notes](#5.-Final-notes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem,\n", + "by completing the form [here](https://dataspace.copernicus.eu/).\n", + "This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Define your region of interest\n", + "\n", + "When running the code snippet below, an interactive map will be visualized.\n", + "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", + "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported to avoid excessive credit usage (roughly 120 credits will be consumed for this size of a processing extent).\n", + "\n", + "The widget will automatically store the coordinates of the last rectangle you drew on the map." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.map import ui_map\n", + "\n", + "map = ui_map()\n", + "map.show_map()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Define your year of interest\n", + "\n", + "The default WorldCereal cropland model always uses a time series of exactly one year as input for the model.\n", + "\n", + "However, instead of just using a calendar year (January 1st - December 31st), we recommend to define the exact start and end date of your time series, or processing period, based on the timing of the local growing seasons.\n", + "\n", + "Take the following example for Western Europe, where we typically have a winter season (s1) and summer season (s2):\n", + "\n", + "

\n", + "\n", + "

\n", + "\n", + "\n", + "The best timing to start and end the time series in this case would be October (green vertical line), as for both seasons this would result in nicely capturing the growing season within the time series. It would not make sense in this case to define the year from June to June (red vertical line), as you will miss the start of the summer season and the end of the winter season in your time series.\n", + "\n", + "So if you would like to map temporary crops in Western Europe for the year 2021, we would recommend to define your processing period as October 1st 2020 - September 30th 2021.\n", + "\n", + "In case you do not know the typical seasonality of crops in your area of interest, you can consult the WorldCereal crop calendars using the function below.\n", + "\n", + "Note that in case your area of interest is located in an extremely heterogeneous part of the world, the WorldCereal seasons cannot be retrieved at the moment. As a fall-back, please consult the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import retrieve_worldcereal_seasons\n", + "\n", + "spatial_extent = map.get_processing_extent()\n", + "seasons = retrieve_worldcereal_seasons(spatial_extent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Execute the next cell to select your processing period:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import date_slider\n", + "\n", + "slider = date_slider()\n", + "slider.show_slider()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Set some other options" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from utils import get_input\n", + "\n", + "# Specify the local directory where the resulting maps should be downloaded to.\n", + "run = get_input('model run')\n", + "output_dir = Path(os.getcwd()) / f'CROPLAND_default_{run}'\n", + "print(f\"Output directory: {output_dir}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Generate your map\n", + "\n", + "We now have all information we need to generate our map!
\n", + "The next cell will submit a map inference job on CDSE through OpenEO.
\n", + "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", + "Then sit back and wait untill your map is ready..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import generate_map, PostprocessParameters\n", + "\n", + "processing_period = slider.get_processing_period()\n", + "processing_extent = map.get_processing_extent()\n", + "\n", + "# Launch the job on CDSE through OpenEO\n", + "results = generate_map(\n", + " processing_extent,\n", + " processing_period,\n", + " output_dir=output_dir,\n", + " postprocess_parameters=PostprocessParameters,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The classification results will be automatically downloaded to your *output_dir* in .tif format.
\n", + "\n", + "The result will be a raster file containing two bands:\n", + "1. The label of the winning class\n", + "2. The probability of the winning class [0 - 100]\n", + "\n", + "Using the function below, we split all this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", + "- \"cropland_classification_start-date_end-date.tif\" --> contains the classification labels. The class look-up table is included in the .tif metadata.\n", + "- \"cropland_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_visualization\n", + "\n", + "filepaths = prepare_visualization(results)\n", + "filepaths" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting raster files can be visualized in QGIS." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Final notes\n", + "\n", + "Both the quantity and quality of training data are main drivers affecting the quality of the cropland extent map.
\n", + "Using the figure below, you get a relative indication how much training data was available for training our current default cropland model:\n", + "\n", + "

\n", + "\n", + "

\n", + "\n", + "In case you own good quality reference data on land cover and/or crop types, consider contributing these data to the WorldCereal project through our [Reference Data Module](https://ewoc-rdm-ui.iiasa.ac.at/)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "worldcereal", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/worldcereal_v1_demo_default_cropland_EXTENDED.ipynb b/notebooks/worldcereal_v1_demo_default_cropland_EXTENDED.ipynb deleted file mode 100644 index ca5b377d..00000000 --- a/notebooks/worldcereal_v1_demo_default_cropland_EXTENDED.ipynb +++ /dev/null @@ -1,451 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![](./resources/System_v1_cropland.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook contains a short demo on how to use the WorldCereal system to generate a temporary crop extent map for your area and season of interest.\n", - "The map is generated using a default model trained by the WorldCereal consortium to distinguish temporary crops from all other land use.\n", - "\n", - "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Content\n", - " \n", - "- [Before you start](###-Before-you-start)\n", - "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", - "- [2. Define your year of interest](#2.-Define-your-year-of-interest)\n", - "- [3. Set some other options](#3.-Set-some-other-options)\n", - "- [4. Generate your map](#2.-Generate-your-map)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Before you start\n", - "\n", - "In order to run this notebook, you need to create an account on the Copernicus Data Space Ecosystem,\n", - "by completing the form [HERE](https://identity.dataspace.copernicus.eu/auth/realms/CDSE/login-actions/registration?client_id=cdse-public&tab_id=eRKGqDvoYI0)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# TEMPORARY CELL\n", - "\n", - "import sys\n", - "sys.path.append('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/notebooks')\n", - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Define your region of interest\n", - "\n", - "When running the code snippet below, an interactive map will be visualized. Click the Rectangle button on the left hand side of the map to start drawing your region of interest. When finished, execute the second cell to store the coordinates of your region of interest.\n", - "\n", - "In case your region exceeds the 250 km² limit, you will be asked to draw again." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e27f91e03cf6427cb43d889e53cd466c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Map(center=[51.1872, 5.1154], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoo…" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:02:03.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mhandle_draw\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mYour processing extent: (-2.299034, 51.209393, -2.267434, 51.224876)\u001b[0m\n", - "\u001b[32m2024-10-14 19:02:03.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mhandle_draw\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mArea of processing extent: 3.88 km²\u001b[0m\n" - ] - } - ], - "source": [ - "from worldcereal.utils.map import ui_map\n", - "\n", - "map = ui_map()\n", - "map.show_map()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Define your year of interest\n", - "\n", - "The default WorldCereal temporary crop model always uses a time series of exactly one year as input for the model.\n", - "\n", - "However, instead of just using a calendar year (January 1st - December 31st), we recommend to define the exact start and end date of your time series, or processing period, based on the timing of the local growing seasons.\n", - "\n", - "Take the following example for Western Europe, where we typically have a winter season (s1) and summer season (s2):\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "\n", - "The best timing to start and end the time series in this case would be October (green vertical line), as for both seasons this would result in nicely capturing the growing season within the time series. It would not make sense in this case to define the year from June to June (red vertical line), as you will miss the start of the summer season and the end of the winter season in your time series.\n", - "\n", - "So if you would like to map temporary crops in Western Europe for the year 2021, we would recommend to define your processing period as October 1st 2020 - September 30th 2021.\n", - "\n", - "In case you do not know the typical seasonality of crops in your area of interest, you can consult the WorldCereal crop calendars using the function below.\n", - "\n", - "Note that in case your area of interest is located in an extremely heterogeneous part of the world, the WorldCereal seasons cannot be retrieved at the moment. As a fall-back, please consult the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-14 19:02:11.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mget_processing_extent\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYour processing extent: (-2.299034, 51.209393, -2.267434, 51.224876)\u001b[0m\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from utils import retrieve_worldcereal_seasons\n", - "\n", - "spatial_extent = map.get_processing_extent()\n", - "seasons = retrieve_worldcereal_seasons(spatial_extent)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Execute the next cell to select your processing period:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5b646621b65e4f5eb02a40eda8f95e00", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(HTML(value=\"\\n
\\n
0.0631313673349114\n", - "WARNING:PrestoFeatureExtractor:No additional dependencies are defined. If you wish to add dependencies to your feature extractor, override the `dependencies` method in your class.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0:00:00 Job 'j-241014a157564fe2aa11cfc08be67c3a': send 'start'\n", - "0:00:16 Job 'j-241014a157564fe2aa11cfc08be67c3a': created (progress 0%)\n", - "0:00:22 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:00:29 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:00:38 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:00:48 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:01:02 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:01:20 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:01:42 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:02:07 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:02:38 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:03:16 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:04:04 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:05:04 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:06:05 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:07:06 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:08:08 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:09:08 Job 'j-241014a157564fe2aa11cfc08be67c3a': running (progress N/A)\n", - "0:10:09 Job 'j-241014a157564fe2aa11cfc08be67c3a': finished (progress 100%)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:openeo.rest.job:Downloading Job result asset 'cropland-raw_20211201_20221130.tif' from https://openeo.creo.vito.be/openeo/jobs/j-241014a157564fe2aa11cfc08be67c3a/results/assets/ZGNjYWI2ZDktODQ2Yy00OGE5LTlkOTQtNDk3MTQ2Y2IyMjg1/069a930f1ee92b4b285afda2f08e88c3/cropland-raw_20211201_20221130.tif?expires=1729530975 to /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_ENGLAND/cropland-raw_20211201_20221130.tif\n", - "INFO:openeo.rest.job:Downloading Job result asset 'cropland_20211201_20221130.tif' from https://openeo.creo.vito.be/openeo/jobs/j-241014a157564fe2aa11cfc08be67c3a/results/assets/ZGNjYWI2ZDktODQ2Yy00OGE5LTlkOTQtNDk3MTQ2Y2IyMjg1/6462b4d77c6a8bac07319fd3f9f6fbd4/cropland_20211201_20221130.tif?expires=1729530975 to /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_ENGLAND/cropland_20211201_20221130.tif\n" - ] - } - ], - "source": [ - "from worldcereal.job import generate_map\n", - "\n", - "processing_period = slider.get_processing_period()\n", - "processing_extent = map.get_processing_extent()\n", - "\n", - "# Launch the job on CDSE through OpenEO\n", - "results = generate_map(\n", - " processing_extent,\n", - " processing_period,\n", - " output_dir=output_dir,\n", - " postprocess_parameters=postprocess_parameters,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The classification results will be automatically downloaded to your output_dir in .tif format.\n", - "By default, OpenEO stores the class labels and confidence score in one file.\n", - "\n", - "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", - "- \"xxx_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", - "- \"xxx_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'cropland-raw': {'classification': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_ENGLAND/cropland-raw_classification_20211201_20221130.tif'), 'confidence': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_ENGLAND/cropland-raw_confidence_20211201_20221130.tif')}, 'cropland': {'classification': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_ENGLAND/cropland_classification_20211201_20221130.tif'), 'confidence': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_ENGLAND/cropland_confidence_20211201_20221130.tif')}}\n" - ] - } - ], - "source": [ - "from utils import prepare_visualization\n", - "\n", - "rasters = prepare_visualization(results)\n", - "print(rasters)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The resulting raster files can be visualized in QGIS, or using the function in the cell below..." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4d40017e7b1b41e5af029f9a32a9da0d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Map(center=[51.217128, -2.2832285], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title'…" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from utils import visualize_products\n", - "\n", - "visualize_products(rasters, port=8889)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, use the cell below to visualize the look-up table for the classification product." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOIAAABQCAYAAAAEEqmpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAAsTAAALEwEAmpwYAAAMUklEQVR4nO3db0xT5x4H8G+Z9I90m0yQDMoApR3rghlM+bOxCSrJ3CArksyomRaZQxgqi1IoJiauwJTBlGRTluvAF0yHyswyVJw0o9rMRRG6KKBuhG5TGQPNsolSDH3uCy/ncgYUuBfpqf19XsFznvP0OU2+nHOe9vwQMcYYCCFO5eHsCRBCKIiECAIFkRABoCASIgAUREIEgIJIiABQEAkRAAoiIQIww9HG06dPw2azTddcppxEIkFiYqKzp0HIuBwG0WazQSKRTNdcppwr/xEh7oUuTf8H8fHxyM7OdvY0yCOEguhAY2MjRCIRent7nT0V8oijIArI/fv3nT0F4iRuH0SbzYacnBz4+flBKpUiJiYGZrMZVqsVCQkJAABfX1+IRCJotVpuP7vdjoKCAvj4+GDOnDnYunUr7HY7t31gYAB5eXlQKBTw8vLCwoULcerUKW770Nn2xIkTiIqKglgs5m0n7sXtg6jT6VBTU4PKykq0tLQgPDwcr732Gjw9PVFbWwsAaG1tRVdXF8rLy7n9vvjiC8yYMQPff/89PvnkE+zZswc1NTXc9rS0NJhMJhw8eBCXLl3C2rVrkZycjB9//JH3+nl5eSgsLMSVK1cQHR09PQdNBEfk6HnEuro6l181TUpKGnN7X18fvL29sX//fqxZswYAMDg4CJVKhZUrV2Lp0qVISEhAT08PfHx8uP3i4+Nhs9lw7tw5ri0xMRFBQUHYv38/Ojo6oFQqYbVa8cwzz3B9NBoN/P39sXfvXjQ2NiIhIQFHjx5FamrqQzh64kocfnzxqOvo6MD9+/fx8ssvc22PPfYYYmNj0dbWhqVLl4657/z583m/+/v7448//gAANDc3gzEGtVrN62Oz2bB48WJe24IFC/7fwyCPALcO4tDFgEgkGrFttLbhPD09R/Qfuke02+0QiUS4cOHCiH4ymYz3u5eX16TnTR49bh3E0NBQiMVimM1mzJ07F8CDS9Nz585h1apVEIvFXNtkREREgDGG33//nVvwIcQRtw6il5cXMjMzkZ+fDx8fH4SEhGD37t3o7u5GVlYWd2Y7fvw4kpOTIZPJIJfLxx1XpVJh9erV0Gq1KCsrQ2RkJG7fvo3GxkbMnTsXy5cvn4ajI67ErYMIALt27QLwYJXzzz//REREBOrr6/H0008DAHbs2IFt27bhnXfewZo1a3DgwIEJjVtVVYWioiLodDpcv34dTz31FKKiougMSUbl1qumhAiF23+OSIgQUBAJEQAKIiECQEEkRAAcrppKJBKXfrjWlReaiHtxuGpKCJkedGlKiABQEAkRAIf3iA0nTqB/2MOurkbq4YGlr7/u7GkQMi6HQey32xG+PnO65jLlLv1rn7OnQMiE0KWpE1itVohEIjQ1NT3010pKSuKV+CDCREEkRAAoiJNAVdbIw+L2QWSMoaysDEqlEhKJBAqFAnq9nrt8PHToEBYvXgyZTIbPPvsMdrsdBoMBgYGBkEgkCA8Px9dff82NN7TfwYMHERcXB6lUirCwMHz77bdjzmFwcBDp6ekICQmBTCaDUqlESUkJryqcVqtFUlISysvLERAQAG9vb6SlpeHu3btcn7t370Kr1UIul8PPzw/FxcUP500jU87tg1hQUACDwQC9Xo/W1lYcOXIEgYGB3Ha9Xo+srCy0tbVBo9GgvLwcH330EXbt2oVLly4hJSUFy5cvh8Vi4Y2r0+mwadMmWCwWJCYm4s0338SNGzdGnYPdbkdAQAAOHz6M9vZ2FBUVobi4GFVVVbx+Z8+exeXLl9HQ0ICamhocO3aMV1lu69atOH36NGpra2E0GtHS0oIzZ85M3ZtFHppxn0d09VVTR88j3rlzBz4+PtizZw82bNjA22a1WhESEoLS0lJs2bKFaw8ICEBGRga2b9/OtcXHx0OhUKC6uprbr7CwENu2bQPwIGhhYWF46623UFhYyPW5cOHCmMWj8vPz0dTUhIaGBgAPzohGoxGdnZ2YMePBYvf69evR2dmJhoYG3LlzB7Nnz0ZlZSVWr17NHZ9CoYBGo5nwA83EOdz6jNjW1gabzYYlS5aM2Wd4UP766y/cvHmTV/UNAOLi4tDW1sZri42N5X728PBAdHT0iD7DVVRUYMGCBfD19YVcLsfu3bvx66+/8vqo1WouhAC/clxHRwcGBgZ4ryuXyxEeHj7maxLhcOsgTuRrtqNVWftfqr45UlNTg5ycHGi1Wpw6dQoWiwVZWVkYGBjg9XNUOY6+Muza3DqIarUaEokERqNxQv2feOIJ+Pv7w2w289rNZvOIGqY//PAD9zNjDOfPn8dzzz036rhmsxnR0dHIzs5GZGQkQkND0dHRMaljCQ0NhaenJ+91+/r6cPny5UmNQ5zDrYtHPf7449i8eTP0ej0kEgleffVV3Lp1CxcvXsSyZctG3Sc3Nxfbt2+HUqnEiy++iOrqapw9exYXL17k9du3bx9UKhXCw8Oxd+9e/PLLL8jMHP1+W6VS4cCBAzh58iRCQ0Px5ZdfwmQywdvbe8LHIpfLkZ6ejry8PPj6+sLf3x8ffPDBpEtBEudw6yACwIcffghvb28YDAZcv34dfn5+XPn90WzatAl///03dDoduru78eyzz6K2thYvvPACr9/OnTvx8ccfo7m5GUFBQTh27BgUCsWoY2ZkZMBisWDVqlVgjCE1NRVbtmxBZWXlpI6ltLQUfX19SElJwcyZM7Fx40b09fVNagziHG69avowTGRFlJB/cut7REKEgoJIiAC4/T3iVAsODqaPEsik0RmREAFweEaUeni49MO1Ug/6O0NcA1VxI0QA6JRBiABQEAkRAIf3iEajEffu3ZuuuUw5mUzm8MkKQoTCYRDv3bvn0v9fsK6uztlTIGRC6NJ0GjU1NUEkEsFqtTp7KkRg3DqIWq0WIpEIhYWFvPbGxkaIRCL09vY6aWb/FR8fj+zsbGdPgzxkbh1EAJBKpSgpKUFPT4+zp0LcmNsHMSEhAcHBwTAYDA77nTlzBtHR0ZBKpfDz88P7778/4gn6f6qvr0dYWBikUileeeUVXLt2jbf91q1bWLlyJRQKBWQyGZ5//nlewSitVguTyYRPP/0UIpGIu6ydSNU34lrcPogeHh7YuXMnKioqxnwq/saNG1i2bBkiIiLQ0tKCzz//HIcOHYJerx9z3N9++w0ajQaJiYmwWCzYuHEjdDodr09/fz8iIyNRV1eH1tZWbN68GRkZGVzFgPLycsTGxiItLQ1dXV3o6upCYGDghKu+ERfCHPjmm28cbRa88ea/du1a9sYbbzDGGIuPj2crVqxgjDH23XffMQCsp6eHMcZYQUEBmzdvHhscHOT2raqqYmKxmPX19Y06tl6vZ0qlktntdq7NYDAwAKyzs3PMOa1YsYKlp6dzvy9atIi99957jg+UMZaXl8eWLFkybj8iTG5/RhxSUlKCI0eOjPr/KNrb2xEbGwuPYd9djYuLw8DAAH7++edRx2tvb0dMTAyvqNTwCmvAg8LCRUVFmD9/PmbPng25XI6vvvpqRPW20Uyk6htxHRTE/1i4cCFSU1ORl5c3YhtjbMwqbWO1swl8hbe0tBRlZWXIzc2F0WiExWKBRqMZ995zolXfiOug5xGHKS4uhlqtRn19Pa9drVbj8OHDsNvt3FnRbDZDLBZj3rx5o46lVqtRW1vLC/HwCmtDYyQnJ+Ptt98G8CC8165dw6xZs7g+YrF4RAGo4VXfhky26hsRFjojDhMaGop3332XV8YeALKysnDz5k1kZWWhvb0dx48fR35+PrKzszFz5sxRx9qwYQOsVitycnJw9epVHD16FBUVFbw+KpUKRqMRZrMZV65cQXZ2Njo7O3l9goODcf78eVitVvT29sJut0OlUqG5uRknT57ETz/9BIPBAJPJNLVvBplejm4g3WmxZkh3dzeTy+W8xRrGGDOZTCwqKoqJxWI2Z84clpOTw/r7+x2OX1dXx1QqFZNIJOyll15i1dXVvMWa27dvs5SUFCaXy5mvry/Lzc1lmZmZbNGiRdwYV69eZTExMUwmk3H72mw2tm7dOjZr1iz25JNPsnXr1rEdO3awoKCgSb0/RDjGreLm6t81deX5E/dBl6aECAAFkRABoCASIgAUREIEwOHniDKZzKUfrpXJZM6eAiETQlXcCBEAujQlRAAoiIQIAAWREAGgIBIiABREQgSAgkiIAFAQCRGAfwMShaprE0W8AwAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from utils import show_color_legend\n", - "\n", - "show_color_legend(rasters, \"cropland\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Add section where user can check the availability of cropland reference data!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "worldcereal", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/worldcereal_v1_demo_default_cropland_SHORT.ipynb b/notebooks/worldcereal_v1_demo_default_cropland_SHORT.ipynb deleted file mode 100644 index 3de91d98..00000000 --- a/notebooks/worldcereal_v1_demo_default_cropland_SHORT.ipynb +++ /dev/null @@ -1,424 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![](./resources/System_v1_cropland.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook contains a short demo on how to use the WorldCereal system to generate a temporary crop extent map for your area and season of interest.\n", - "The map is generated using a default model trained by the WorldCereal consortium to distinguish temporary crops from all other land use.\n", - "\n", - "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Content\n", - " \n", - "- [Before you start](###-Before-you-start)\n", - "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", - "- [2. Define your year of interest](#2.-Define-your-year-of-interest)\n", - "- [3. Set some other options](#3.-Set-some-other-options)\n", - "- [4. Generate your map](#2.-Generate-your-map)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Before you start\n", - "\n", - "In order to run this notebook, you need to create an account on the Copernicus Data Space Ecosystem,\n", - "by completing the form [HERE](https://identity.dataspace.copernicus.eu/auth/realms/CDSE/login-actions/registration?client_id=cdse-public&tab_id=eRKGqDvoYI0)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# TEMPORARY CELL\n", - "\n", - "import sys\n", - "sys.path.append('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/notebooks')\n", - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Define your region of interest\n", - "\n", - "When running the code snippet below, an interactive map will be visualized. Click the Rectangle button on the left hand side of the map to start drawing your region of interest. When finished, execute the second cell to store the coordinates of your region of interest.\n", - "\n", - "In case your region exceeds the 250 km² limit, you will be asked to draw again." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "659014fc3caa44a991c98b8ee67056e7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Map(center=[51.1872, 5.1154], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoo…" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-10 14:26:31.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mhandle_draw\u001b[0m:\u001b[36m141\u001b[0m - \u001b[1mYour processing extent: (4.515724, 50.982641, 4.609108, 51.020018)\u001b[0m\n", - "\u001b[32m2024-10-10 14:26:31.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mhandle_draw\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1mArea of processing extent: 28.52 km²\u001b[0m\n" - ] - } - ], - "source": [ - "from worldcereal.utils.map import ui_map\n", - "\n", - "map = ui_map()\n", - "map.show_map()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Define your year of interest\n", - "\n", - "The default WorldCereal temporary crop model always uses a time series of exactly one year as input for the model.\n", - "\n", - "However, instead of just using a calendar year (January 1st - December 31st), we recommend to define the exact start and end date of your time series, or processing period, based on the timing of the local growing seasons.\n", - "\n", - "Take the following example for Western Europe, where we typically have a winter season (s1) and summer season (s2):\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "\n", - "The best timing to start and end the time series in this case would be October (green vertical line), as for both seasons this would result in nicely capturing the growing season within the time series. It would not make sense in this case to define the year from June to June (red vertical line), as you will miss the start of the summer season and the end of the winter season in your time series.\n", - "\n", - "So if you would like to map temporary crops in Western Europe for the year 2021, we would recommend to define your processing period as October 1st 2020 - September 30th 2021.\n", - "\n", - "In case you do not know the typical seasonality of crops in your area of interest, you can consult the WorldCereal crop calendars using the function below.\n", - "\n", - "Note that in case your area of interest is located in an extremely heterogeneous part of the world, the WorldCereal seasons cannot be retrieved at the moment. As a fall-back, please consult the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-10-10 14:26:45.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mworldcereal.utils.map\u001b[0m:\u001b[36mget_processing_extent\u001b[0m:\u001b[36m236\u001b[0m - \u001b[1mYour processing extent: (4.515724, 50.982641, 4.609108, 51.020018)\u001b[0m\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from utils import retrieve_worldcereal_seasons\n", - "\n", - "spatial_extent = map.get_processing_extent()\n", - "seasons = retrieve_worldcereal_seasons(spatial_extent)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Execute the next cell to select your processing period:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f3ed0a9f8b6e44368f676b4652854537", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(HTML(value=\"\\n
\\n
2.9861386687313125\n", - "INFO:openeo_gfmap.utils:Selected orbit state: ASCENDING. Reason: Orbit has more cumulative intersected area. 3.0794083284461435 > 2.9861386687313125\n", - "WARNING:PrestoFeatureExtractor:No additional dependencies are defined. If you wish to add dependencies to your feature extractor, override the `dependencies` method in your class.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0:00:00 Job 'j-2410101e92934a5da839ca7a10984c2b': send 'start'\n", - "0:00:14 Job 'j-2410101e92934a5da839ca7a10984c2b': created (progress 0%)\n", - "0:00:19 Job 'j-2410101e92934a5da839ca7a10984c2b': created (progress 0%)\n", - "0:00:25 Job 'j-2410101e92934a5da839ca7a10984c2b': created (progress 0%)\n", - "0:00:33 Job 'j-2410101e92934a5da839ca7a10984c2b': created (progress 0%)\n", - "0:00:43 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:00:56 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:01:11 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:01:31 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:01:55 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:02:25 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:03:02 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:03:49 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:04:47 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:05:48 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:06:48 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:07:48 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:08:48 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:09:49 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:10:49 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:11:49 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:12:50 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:13:50 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:14:50 Job 'j-2410101e92934a5da839ca7a10984c2b': running (progress N/A)\n", - "0:15:51 Job 'j-2410101e92934a5da839ca7a10984c2b': finished (progress 100%)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:openeo.rest.job:Downloading Job result asset 'cropland_2020-01-01Z.tif' from https://openeo.creo.vito.be/openeo/jobs/j-2410101e92934a5da839ca7a10984c2b/results/assets/ZGNjYWI2ZDktODQ2Yy00OGE5LTlkOTQtNDk3MTQ2Y2IyMjg1/5ef2ad85691d9dd85a2280ff0cbbd4db/cropland_2020-01-01Z.tif?expires=1729176998 to /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_2020-01-01Z.tif\n" - ] - } - ], - "source": [ - "from worldcereal.job import generate_map\n", - "\n", - "processing_period = slider.get_processing_period()\n", - "processing_extent = map.get_processing_extent()\n", - "\n", - "# Launch the job on CDSE through OpenEO\n", - "results = generate_map(\n", - " processing_extent,\n", - " processing_period,\n", - " output_dir=output_dir,\n", - " postprocess_parameters=postprocess_parameters,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The classification results will be automatically downloaded to your output_dir in .tif format.\n", - "By default, OpenEO stores the class labels and confidence score in one file.\n", - "\n", - "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", - "- \"xxx_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", - "- \"xxx_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:rasterio._env:CPLE_AppDefined in /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_2020-01-01Z.tif: /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_2020-01-01Z.tif:Nonstandard tile width 100, convert file\n", - "WARNING:rasterio._env:CPLE_AppDefined in /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_2020-01-01Z.tif: /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_2020-01-01Z.tif:Nonstandard tile length 100, convert file\n", - "WARNING:rasterio._env:CPLE_AppDefined in /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_2020-01-01Z.tif: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", - "WARNING:rasterio._env:CPLE_AppDefined in /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_2020-01-01Z.tif:Nonstandard tile width 100, convert file\n", - "WARNING:rasterio._env:CPLE_AppDefined in /home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_2020-01-01Z.tif:Nonstandard tile length 100, convert file\n", - "WARNING:rasterio._env:CPLE_AppDefined in TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", - "INFO:rasterio._env:GDAL signalled an error: err_no=1, msg='TIFFSetField:/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_classification_20181001_20190930.tif: Cannot modify tag \"PhotometricInterpretation\" while writing'\n", - "INFO:rasterio._env:GDAL signalled an error: err_no=1, msg='TIFFSetField:/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_confidence_20181001_20190930.tif: Cannot modify tag \"PhotometricInterpretation\" while writing'\n" - ] - }, - { - "data": { - "text/plain": [ - "{'cropland': {'classification': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_classification_20181001_20190930.tif'),\n", - " 'confidence': PosixPath('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/CROPLAND_test_nopostprocess/cropland_confidence_20181001_20190930.tif')}}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from utils import prepare_visualization\n", - "\n", - "filepaths = prepare_visualization(results)\n", - "filepaths" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The resulting raster files can be visualized in QGIS." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Add section where user can check the availability of cropland reference data!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "worldcereal", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb b/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb new file mode 100644 index 00000000..0e62d718 --- /dev/null +++ b/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb @@ -0,0 +1,316 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](./resources/System_v1_cropland.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook contains a short demo on how to use the WorldCereal system to generate a cropland extent map for your area and season of interest.
\n", + "The map is generated using a default model trained by the WorldCereal consortium to distinguish cropland from all other land use.\n", + "\n", + "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run. On average, one such run consumes 35 credits on the Copernicus Data Space Ecosystem.\n", + "\n", + "
\n", + "Cropland definition: \n", + "Cropland is defined here as land used for temporary crops, i.e. crops with a less-than-1-year growing cycle which must be newly sown or planted for further production after the harvest. Sugar cane, asparagus, and cassava are also considered temporary crops, even though they remain in the field for more than 1 year. This cropland definition thus excludes perennial crops as well as (temporary) pastures.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Content\n", + " \n", + "- [Before you start](###-Before-you-start)\n", + "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", + "- [2. Define your year of interest](#2.-Define-your-year-of-interest)\n", + "- [3. Set some other options](#3.-Set-some-other-options)\n", + "- [4. Generate your map](#4.-Generate-your-map)\n", + "- [5. Final notes](#5.-Final-notes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem,\n", + "by completing the form [here](https://dataspace.copernicus.eu/).\n", + "This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Define your region of interest\n", + "\n", + "When running the code snippet below, an interactive map will be visualized.\n", + "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", + "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported to avoid excessive credit usage (roughly 120 credits will be consumed for this size of a processing extent).\n", + "\n", + "The widget will automatically store the coordinates of the last rectangle you drew on the map." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.map import ui_map\n", + "\n", + "map = ui_map()\n", + "map.show_map()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Define your year of interest\n", + "\n", + "The default WorldCereal cropland model always uses a time series of exactly one year as input for the model.\n", + "\n", + "However, instead of just using a calendar year (January 1st - December 31st), we recommend to define the exact start and end date of your time series, or processing period, based on the timing of the local growing seasons.\n", + "\n", + "Take the following example for Western Europe, where we typically have a winter season (s1) and summer season (s2):\n", + "\n", + "

\n", + "\n", + "

\n", + "\n", + "\n", + "The best timing to start and end the time series in this case would be October (green vertical line), as for both seasons this would result in nicely capturing the growing season within the time series. It would not make sense in this case to define the year from June to June (red vertical line), as you will miss the start of the summer season and the end of the winter season in your time series.\n", + "\n", + "So if you would like to map temporary crops in Western Europe for the year 2021, we would recommend to define your processing period as October 1st 2020 - September 30th 2021.\n", + "\n", + "In case you do not know the typical seasonality of crops in your area of interest, you can consult the WorldCereal crop calendars using the function below.\n", + "\n", + "Note that in case your area of interest is located in an extremely heterogeneous part of the world, the WorldCereal seasons cannot be retrieved at the moment. As a fall-back, please consult the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import retrieve_worldcereal_seasons\n", + "\n", + "spatial_extent = map.get_processing_extent()\n", + "seasons = retrieve_worldcereal_seasons(spatial_extent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Execute the next cell to select your processing period:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import date_slider\n", + "\n", + "slider = date_slider()\n", + "slider.show_slider()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Set some other options" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import PostprocessParameters\n", + "import os\n", + "from pathlib import Path\n", + "from utils import get_input\n", + "\n", + "# Choose whether or not you want to spatially clean the classification results\n", + "postprocess_result = True\n", + "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"]\n", + "# (\"smooth_probabilities will do limited spatial cleaning,\n", + "# while \"majority_vote\" will do more aggressive spatial cleaning, depending on the value of kernel_size)\n", + "postprocess_method = \"majority_vote\"\n", + "# Additional parameter for the majority vote method \n", + "# (the higher the value, the more aggressive the spatial cleaning,\n", + "# should be an odd number, not larger than 25, default = 5)\n", + "kernel_size = 5\n", + "# Do you want to save the intermediate results (before applying the postprocessing)\n", + "save_intermediate = True\n", + "# Do you want to save all class probabilities in the final product? \n", + "keep_class_probs = True\n", + "\n", + "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", + " method=postprocess_method,\n", + " kernel_size=kernel_size,\n", + " save_intermediate=save_intermediate,\n", + " keep_class_probs=keep_class_probs)\n", + "\n", + "# Specify the local directory where the resulting maps should be downloaded to.\n", + "run = get_input('model run')\n", + "output_dir = Path(os.getcwd()) / f'CROPLAND_default_{run}'\n", + "print(f\"Output directory: {output_dir}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Generate your map\n", + "\n", + "We now have all information we need to generate our map!
\n", + "The next cell will submit a map inference job on CDSE through OpenEO.
\n", + "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", + "Then sit back and wait untill your map is ready..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import generate_map\n", + "\n", + "processing_period = slider.get_processing_period()\n", + "processing_extent = map.get_processing_extent()\n", + "\n", + "# Launch the job on CDSE through OpenEO\n", + "results = generate_map(\n", + " processing_extent,\n", + " processing_period,\n", + " output_dir=output_dir,\n", + " postprocess_parameters=postprocess_parameters,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The classification results will be automatically downloaded to your *output_dir* in .tif format.
\n", + "For a model with two classes, you get a raster file containing up to four bands:\n", + "1. The label of the winning class\n", + "2. The probability of the winning class [0 - 100]\n", + "3. and beyond (optional, depending on settings): Class probabilities of each class, ordered according to the look-up table. The look-up table for each product can be consulted in the 'results' object as produced by the 'generate_map' function.\n", + "\n", + "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", + "- \"cropland_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", + "- \"cropland_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]\n", + "\n", + "The individual class probabilities are currently NOT saved to a separate file and need to be consulted in the original output from OpenEO.\n", + "\n", + "Note that in case you chose to apply post-processing AND save intermediate results, you will also get a \"cropland-raw_xxx.tif\" output, which holds the classification labels and probabilities BEFORE post-processing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_visualization\n", + "\n", + "rasters = prepare_visualization(results)\n", + "print(rasters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting raster files can be visualized in QGIS.\n", + "\n", + "In case you are running this script on your local environment, you can alternatively use the following cells to visualize the outputs directly in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import visualize_products\n", + "\n", + "visualize_products(rasters, port=8889)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import show_color_legend\n", + "\n", + "show_color_legend(rasters, \"cropland\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Final notes\n", + "\n", + "Both the quantity and quality of training data are main drivers affecting the quality of the cropland extent map.
\n", + "Using the figure below, you get a relative indication how much training data was available for training our current default cropland model:\n", + "\n", + "

\n", + "\n", + "

\n", + "\n", + "In case you own good quality reference data on land cover and/or crop types, consider contributing these data to the WorldCereal project through our [Reference Data Module](https://ewoc-rdm-ui.iiasa.ac.at/)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "worldcereal", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/worldcereal/parameters.py b/src/worldcereal/parameters.py index 69a1fa44..7b9b0e7e 100644 --- a/src/worldcereal/parameters.py +++ b/src/worldcereal/parameters.py @@ -167,7 +167,7 @@ class PostprocessParameters(BaseModel): method: str (default="smooth_probabilities") The method to use for postprocessing. Must be one of ["smooth_probabilities", "majority_vote"] kernel_size: int (default=5) - Used for majority vote postprocessing. Must be smaller than 25. + Used for majority vote postprocessing. Must be an odd number, larger than 1 and smaller than 25. save_intermediate: bool (default=False) Whether to save intermediate results (before applying the postprocessing). The intermediate results will be saved in the GeoTiff format. @@ -206,9 +206,17 @@ def check_parameters(self): ) if self.method == "majority_vote": + if self.kernel_size % 2 == 0: + raise ValueError( + f"Kernel size for majority filtering should be an odd number, got {self.kernel_size}" + ) if self.kernel_size > 25: raise ValueError( - f"Kernel size must be smaller than 25, got {self.kernel_size}" + f"Kernel size for majority filtering should be an odd number smaller than 25, got {self.kernel_size}" + ) + if self.kernel_size < 3: + raise ValueError( + f"Kernel size for majority filtering should be an odd number larger than 1, got {self.kernel_size}" ) return self