From c94a5f1366bb8f8caaeb17070defbd01baad83b9 Mon Sep 17 00:00:00 2001 From: Victoria <112418493+veni-vidi-vici-dormivi@users.noreply.github.com> Date: Thu, 22 Aug 2024 07:06:15 +0200 Subject: [PATCH] add mesmer_m example script (#491) * add mesmer_m example script * changelog * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused imports * adjustments * replace script with notebook * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * restructure and add some headings * remove inputs * add some comments * typos * date_range: update freq string (#504) * date_range: update freq string * add freq conditionals * reference: only year as link (#505) * nits * more info on time coordinate * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update comment & clear output --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mathias Hauser Co-authored-by: Mathias Hauser --- CHANGELOG.rst | 1 + examples/example_mesmer_m.ipynb | 510 ++++++++++++++++++++++++++++++++ 2 files changed, 511 insertions(+) create mode 100644 examples/example_mesmer_m.ipynb diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e657bbe3..3e526533 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -63,6 +63,7 @@ that this led to some numerical changes compared to the MESMER-M publication `#421 `_). - move the harmonic model and power transformer functionalities to the stats module ( `#484 `_). +- add example script for MESMER-M workflow (`#491 `_) Auto-Regression ~~~~~~~~~~~~~~~ diff --git a/examples/example_mesmer_m.ipynb b/examples/example_mesmer_m.ipynb new file mode 100644 index 00000000..e3613e96 --- /dev/null +++ b/examples/example_mesmer_m.ipynb @@ -0,0 +1,510 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example execution of MESMER-M workflow\n", + "Training and emulation of monthly local temperature from yearly local temperature. We use an example data set on a coarse (20° x 20°) grid.\n", + "\n", + "Import libraries and check MESMER version:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib\n", + "\n", + "import xarray as xr\n", + "\n", + "import mesmer\n", + "\n", + "mesmer.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calibrate emulator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "LOCALISATION_RADII = list(range(1250, 6251, 250)) + list(range(6500, 8501, 500))\n", + "THRESHOLD_LAND = 1 / 3\n", + "REF_PERIOD = slice(\"1850\", \"1900\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# define paths of the example data\n", + "\n", + "model = \"IPSL-CM6A-LR\"\n", + "TEST_DATA_PATH = importlib.resources.files(\"mesmer\").parent / \"tests\" / \"test-data\"\n", + "cmip6_data_path = TEST_DATA_PATH / \"calibrate-coarse-grid\" / \"cmip6-ng\"\n", + "\n", + "path_tas_ann = cmip6_data_path / \"tas\" / \"ann\" / \"g025\"\n", + "fN_hist_ann = path_tas_ann / f\"tas_ann_{model}_historical_r1i1p1f1_g025.nc\"\n", + "fN_proj_ann = path_tas_ann / f\"tas_ann_{model}_ssp585_r1i1p1f1_g025.nc\"\n", + "\n", + "path_tas_mon = cmip6_data_path / \"tas\" / \"mon\" / \"g025\"\n", + "fN_hist_mon = path_tas_mon / f\"tas_mon_{model}_historical_r1i1p1f1_g025.nc\"\n", + "fN_proj_mon = path_tas_mon / f\"tas_mon_{model}_ssp585_r1i1p1f1_g025.nc\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load Data for training the emulator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# yearly temperature\n", + "tas_y = xr.open_mfdataset(\n", + " [fN_hist_ann, fN_proj_ann],\n", + " combine=\"by_coords\",\n", + " use_cftime=True,\n", + " combine_attrs=\"override\",\n", + " data_vars=\"minimal\",\n", + " compat=\"override\",\n", + " coords=\"minimal\",\n", + " drop_variables=[\"height\", \"file_qf\"],\n", + ").load()\n", + "\n", + "# monthly temperature\n", + "tas_m = xr.open_mfdataset(\n", + " [fN_hist_mon, fN_proj_mon],\n", + " combine=\"by_coords\",\n", + " use_cftime=True,\n", + " combine_attrs=\"override\",\n", + " data_vars=\"minimal\",\n", + " compat=\"override\",\n", + " coords=\"minimal\",\n", + " drop_variables=[\"height\", \"file_qf\"],\n", + ").load()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preprocessing\n", + "\n", + "Calculate anomalies w.r.t the reference period" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ref_y = tas_y.sel(time=REF_PERIOD).mean(\"time\", keep_attrs=True)\n", + "ref_m = tas_m.sel(time=REF_PERIOD).mean(\"time\", keep_attrs=True)\n", + "\n", + "tas_y = tas_y - ref_y\n", + "tas_m = tas_m - ref_m" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We only use land grid points and exclude Antarctica. The 3D data with dimensions `('time', 'lat', 'lon')` is stacked to 2D data with dimensions `('time', 'gridcell')`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def mask_and_stack(ds, threshold_land):\n", + " ds = mesmer.mask.mask_ocean_fraction(ds, threshold_land)\n", + " ds = mesmer.mask.mask_antarctica(ds)\n", + " ds = mesmer.grid.stack_lat_lon(ds)\n", + " return ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tas_stacked_y = mask_and_stack(tas_y, threshold_land=THRESHOLD_LAND)\n", + "tas_stacked_m = mask_and_stack(tas_m, threshold_land=THRESHOLD_LAND)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fit the harmonic model\n", + "\n", + "Fit the seasonal cycle with a harmonic model which can vary with local annual mean temperature\n", + "(fourier regression). Removes annual mean and, determines the optimal order and the coefficients\n", + "of the harmonic model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "harmonic_model_fit = mesmer.stats.fit_harmonic_model(\n", + " tas_stacked_y.tas, tas_stacked_m.tas\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the power transformer\n", + "\n", + "The residuals are not necessarily symmetric - make them more normal using a Yeo-Johnson\n", + "transformation. The parameter $\\lambda$ is modelled with a logistic regression using\n", + "local annual mean temperature as covariate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "resids_after_hm = tas_stacked_m - harmonic_model_fit.predictions\n", + "\n", + "pt_coefficients = mesmer.stats.fit_yeo_johnson_transform(\n", + " resids_after_hm.tas, tas_stacked_y.tas\n", + ")\n", + "transformed_hm_resids = mesmer.stats.yeo_johnson_transform(\n", + " resids_after_hm.tas, pt_coefficients, tas_stacked_y.tas\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fit cyclo-stationary AR(1) process\n", + "\n", + "The monthly residuals are now assumed to follow a cyclo-stationary AR(1) process, where e.g. the July residuals depend on the ones from June and the ones of June on May's with distinct parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "AR1_fit = mesmer.stats.fit_auto_regression_monthly(\n", + " transformed_hm_resids.transformed, time_dim=\"time\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find localized empirical covariance\n", + "\n", + "Finally, we determine the localized empirical spatial covariance for each month separately:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "geodist = mesmer.geospatial.geodist_exact(tas_stacked_y.lon, tas_stacked_y.lat)\n", + "\n", + "phi_gc_localizer = mesmer.stats.gaspari_cohn_correlation_matrices(\n", + " geodist, localisation_radii=LOCALISATION_RADII\n", + ")\n", + "\n", + "weights = xr.ones_like(AR1_fit.residuals.isel(gridcell=0))\n", + "weights.name = \"weights\"\n", + "\n", + "localized_ecov = mesmer.stats.find_localized_empirical_covariance_monthly(\n", + " AR1_fit.residuals, weights, phi_gc_localizer, \"time\", 30\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Saving" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### time coordinate\n", + "We need to get the original time coordinate to be able to validate our results later on. If it is not needed to align the final emulations with the original data, this can be omitted, the time coordinates are then given by to forcing data set or can later be generated for example with \n", + "\n", + "\n", + "```python\n", + "monthly_time = xr.cftime_range(\"1850-01-01\", \"2100-12-31\", freq=\"MS\", calendar=\"gregorian\")\n", + "monthly_time = xr.DataArray(monthly_time, dims=\"time\", coords={\"time\": monthly_time})\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# extract and save time coordinate\n", + "m_time = tas_stacked_m.time\n", + "\n", + "# TODO\n", + "# save the parameters to a file\n", + "# harmonic_model_fit\n", + "# pt_coefficients\n", + "# AR1_fit\n", + "# localized_ecov\n", + "# m_time" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make emulations\n", + "\n", + "To generate emulations the workflow of the calibration is reversed, using the estimated parameters from above. Here, we use the same local annual mean temperatures to force the emulations, but temperatures from other models, scenarios, ensemble members or emulated annual local temperatures can be used as well." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Re-import necessary libraries\n", + "# import numpy as np\n", + "# import pandas as pd\n", + "# import matplotlib.pyplot as plt\n", + "# import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parameters\n", + "NR_EMUS = 10\n", + "BUFFER = 20\n", + "# REF_PERIOD = slice(\"1850\", \"1900\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Random number seed\n", + "\n", + "The `seed` determines the initial state for the random number generator. To avoid generating the same noise for different models and scenarios different seeds are required for each individual paring. For reproducibility the seed needs to be the same for any subsequent draw of the same emulator. To avoid human chosen standard seeds (e.g. `0`, `1234`) its recommended to also randomly generate the seeds and save them for later, using\n", + "\n", + "```python\n", + "import secrets\n", + "secrets.randbits(128)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# random but constant\n", + "SEED = 234361146192407661971285321853135632294" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data needed for emulations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO\n", + "# load the parameters from a file\n", + "# in this example notebook we directly use the calibration from above" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO\n", + "# load yearly temperature\n", + "# in this example we are using the original yearly temperature for demonstration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# preprocess tas\n", + "# ref = tas_y.sel(time=REF_PERIOD).mean(\"time\", keep_attrs=True)\n", + "# tas_y = tas_y - ref\n", + "# tas_stacked_y = mask_and_stack(tas_y, threshold_land=THRESHOLD_LAND)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get the original grid for transforming back later\n", + "grid_orig = ref_y[[\"lat\", \"lon\"]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate emulations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# generate monthly data with harmonic model\n", + "monthly_harmonic_emu = mesmer.stats.predict_harmonic_model(\n", + " tas_stacked_y.tas, harmonic_model_fit.coeffs, m_time\n", + ")\n", + "\n", + "# generate variability around 0 with AR(1) model\n", + "local_variability_transformed = mesmer.stats.draw_auto_regression_monthly(\n", + " AR1_fit,\n", + " localized_ecov.localized_covariance,\n", + " time=m_time,\n", + " n_realisations=NR_EMUS,\n", + " seed=SEED,\n", + " buffer=BUFFER,\n", + ")\n", + "\n", + "# invert the power transformation\n", + "local_variability_inverted = mesmer.stats.inverse_yeo_johnson_transform(\n", + " local_variability_transformed, pt_coefficients, tas_stacked_y.tas\n", + ")\n", + "\n", + "# add the local variability to the monthly harmonic\n", + "emulations = monthly_harmonic_emu + local_variability_inverted.inverted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# unstack to original grid\n", + "emulations_unstacked = mesmer.grid.unstack_lat_lon_and_align(emulations, grid_orig)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Saving and/or Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO\n", + "# save" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mesmer_m_dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}