Skip to content

Commit

Permalink
outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
VisheshSaluja committed Aug 16, 2024
1 parent ff3465c commit bc0d1cb
Show file tree
Hide file tree
Showing 5 changed files with 294 additions and 20 deletions.
314 changes: 294 additions & 20 deletions book/chapters/ghcnd.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,42 @@
"Accurate Snow Water Equivalent (SWE) predictions rely heavily on reliable input data, including snow depth measurements. This section details the process of retrieving, processing, and refining snow depth data from the Global Historical Climatology Network Daily (GHCNd) dataset. The script discussed here automates the retrieval of snow depth data from ground stations, filters and cleans this data, and prepares it for use in SWE prediction models."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import requests\n",
"import re\n",
"from io import StringIO\n",
"import dask\n",
"import dask.dataframe as dd\n",
"\n",
"all_ghcd_station_file = '../Data/all_ghcn_station_list.csv'\n",
"only_active_ghcd_station_in_west_conus_file = '../Data/active_station_only_list.csv'\n",
"snowdepth_csv_file = '../Data/active_station_only_list.csv_all_vars.csv'\n",
"mask_non_snow_days_ghcd_csv_file = '../Data/active_station_only_list.csv_all_vars_masked_non_snow.csv'\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"\n",
"southwest_lon = -125.0\n",
"southwest_lat = 25.0\n",
"northeast_lon = -100.0\n",
"northeast_lat = 49.0\n",
"\n",
"# the training period is three years from 2018 to 2021\n",
"train_start_date = \"2018-01-03\"\n",
"train_end_date = \"2021-12-31\""
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -57,7 +93,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -111,10 +147,189 @@
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"len(parsed_data) = 4537908\n",
"rows[0:20] = [['ACW00011604', '17.1167', '-61.7833', 'TMAX', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'TMIN', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'PRCP', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'SNOW', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'SNWD', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'PGTM', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'WDFG', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'WSFG', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'WT03', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'WT08', '1949', '1949'], ['ACW00011604', '17.1167', '-61.7833', 'WT16', '1949', '1949'], ['ACW00011647', '17.1333', '-61.7833', 'TMAX', '1961', '1961'], ['ACW00011647', '17.1333', '-61.7833', 'TMIN', '1961', '1961'], ['ACW00011647', '17.1333', '-61.7833', 'PRCP', '1957', '1970'], ['ACW00011647', '17.1333', '-61.7833', 'SNOW', '1957', '1970'], ['ACW00011647', '17.1333', '-61.7833', 'SNWD', '1957', '1970'], ['ACW00011647', '17.1333', '-61.7833', 'WT03', '1961', '1961'], ['ACW00011647', '17.1333', '-61.7833', 'WT16', '1961', '1966'], ['AE000041196', '25.3330', '55.5170', 'TMAX', '1944', '2024'], ['AE000041196', '25.3330', '55.5170', 'TMIN', '1944', '2024']]\n",
" Station Latitude Longitude Variable Year_Start Year_End\n",
"0 ACW00011604 17.1167 -61.7833 TMAX 1949 1949\n",
"1 ACW00011604 17.1167 -61.7833 TMIN 1949 1949\n",
"2 ACW00011604 17.1167 -61.7833 PRCP 1949 1949\n",
"3 ACW00011604 17.1167 -61.7833 SNOW 1949 1949\n",
"4 ACW00011604 17.1167 -61.7833 SNWD 1949 1949\n",
"Removed non-active stations: Station Latitude Longitude Variable Year_Start Year_End\n",
"424 AJ000037575 41.5500 46.6670 SNWD 1973 2024\n",
"446 AJ000037675 41.3670 48.5170 SNWD 1973 2024\n",
"454 AJ000037735 40.7167 46.4167 SNWD 1973 2024\n",
"475 AJ000037749 40.6500 47.7500 SNWD 1973 2024\n",
"485 AJ000037756 40.5330 48.9330 SNWD 1973 2024\n",
"saved to ../Data/active_station_only_list.csv\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Station</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" <th>Variable</th>\n",
" <th>Year_Start</th>\n",
" <th>Year_End</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>70926</th>\n",
" <td>CA001011500</td>\n",
" <td>48.9333</td>\n",
" <td>-123.7500</td>\n",
" <td>SNWD</td>\n",
" <td>1991</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71000</th>\n",
" <td>CA001012055</td>\n",
" <td>48.8333</td>\n",
" <td>-124.0500</td>\n",
" <td>SNWD</td>\n",
" <td>1980</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71151</th>\n",
" <td>CA001015105</td>\n",
" <td>48.3667</td>\n",
" <td>-123.5667</td>\n",
" <td>SNWD</td>\n",
" <td>1980</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71190</th>\n",
" <td>CA001015628</td>\n",
" <td>48.8167</td>\n",
" <td>-123.7167</td>\n",
" <td>SNWD</td>\n",
" <td>1981</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71196</th>\n",
" <td>CA001015630</td>\n",
" <td>48.8167</td>\n",
" <td>-123.7167</td>\n",
" <td>SNWD</td>\n",
" <td>2007</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>750909</th>\n",
" <td>USW00094143</td>\n",
" <td>43.5317</td>\n",
" <td>-112.9422</td>\n",
" <td>SNWD</td>\n",
" <td>1954</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>750992</th>\n",
" <td>USW00094178</td>\n",
" <td>42.4786</td>\n",
" <td>-114.4775</td>\n",
" <td>SNWD</td>\n",
" <td>1998</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>751016</th>\n",
" <td>USW00094182</td>\n",
" <td>44.8942</td>\n",
" <td>-116.0997</td>\n",
" <td>SNWD</td>\n",
" <td>1998</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>751035</th>\n",
" <td>USW00094185</td>\n",
" <td>43.5947</td>\n",
" <td>-118.9578</td>\n",
" <td>SNWD</td>\n",
" <td>1973</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>751532</th>\n",
" <td>USW00094290</td>\n",
" <td>47.6872</td>\n",
" <td>-122.2553</td>\n",
" <td>SNWD</td>\n",
" <td>1986</td>\n",
" <td>2024</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4529 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Station Latitude Longitude Variable Year_Start Year_End\n",
"70926 CA001011500 48.9333 -123.7500 SNWD 1991 2024\n",
"71000 CA001012055 48.8333 -124.0500 SNWD 1980 2024\n",
"71151 CA001015105 48.3667 -123.5667 SNWD 1980 2024\n",
"71190 CA001015628 48.8167 -123.7167 SNWD 1981 2024\n",
"71196 CA001015630 48.8167 -123.7167 SNWD 2007 2024\n",
"... ... ... ... ... ... ...\n",
"750909 USW00094143 43.5317 -112.9422 SNWD 1954 2024\n",
"750992 USW00094178 42.4786 -114.4775 SNWD 1998 2024\n",
"751016 USW00094182 44.8942 -116.0997 SNWD 1998 2024\n",
"751035 USW00094185 43.5947 -118.9578 SNWD 1973 2024\n",
"751532 USW00094290 47.6872 -122.2553 SNWD 1986 2024\n",
"\n",
"[4529 rows x 6 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"![](../img/ghcnd/download_convert_read.png)"
"download_convert_and_read()"
]
},
{
Expand Down Expand Up @@ -166,11 +381,13 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"def get_snow_depth_observations_from_ghcn():\n",
" warnings.filterwarnings(\"ignore\")\n",
" \n",
" new_base_df = pd.read_csv(only_active_ghcd_station_in_west_conus_file)\n",
" print(new_base_df.shape)\n",
Expand All @@ -194,7 +411,7 @@
" @dask.delayed\n",
" def process_station(station):\n",
" station_name = station['Station']\n",
" print(f\"retrieving for {station_name}\")\n",
" # print(f\"retrieving for {station_name}\")\n",
" station_lat = station['Latitude']\n",
" station_long = station['Longitude']\n",
" try:\n",
Expand Down Expand Up @@ -232,17 +449,59 @@
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![](../img/ghcnd/get_snow_depth_observations.png)"
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4529, 6)\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
"An error occurred: \"None of [Index(['STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'SNWD'], dtype='object')] are in the [columns]\"\n",
" STATION DATE LATITUDE LONGITUDE SNWD\n",
"0 CA001011500 2018-01-03 48.9333 -123.75 0.0\n",
"1 CA001011500 2018-01-04 48.9333 -123.75 0.0\n",
"2 CA001011500 2018-01-05 48.9333 -123.75 0.0\n",
"3 CA001011500 2018-01-06 48.9333 -123.75 0.0\n",
"4 CA001011500 2018-01-07 48.9333 -123.75 0.0\n",
"All the data are saved to ../Data/active_station_only_list.csv_all_vars.csv\n"
]
}
],
"source": [
"![](../img/ghcnd/get_snow_2.png)"
"get_snow_depth_observations_from_ghcn()"
]
},
{
Expand Down Expand Up @@ -286,7 +545,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -306,10 +565,25 @@
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" STATION DATE LATITUDE LONGITUDE SNWD swe_value\n",
"0 CA001011500 2018-01-03 48.9333 -123.75 0.0 0\n",
"1 CA001011500 2018-01-04 48.9333 -123.75 0.0 0\n",
"2 CA001011500 2018-01-05 48.9333 -123.75 0.0 0\n",
"3 CA001011500 2018-01-06 48.9333 -123.75 0.0 0\n",
"4 CA001011500 2018-01-07 48.9333 -123.75 0.0 0\n"
]
}
],
"source": [
"![](../img/ghcnd/masked_out_snow_depth.png)"
"mask_out_all_non_zero_snowdepth_days()"
]
},
{
Expand All @@ -323,7 +597,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -337,9 +611,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
Binary file removed book/img/ghcnd/download_convert_read.png
Binary file not shown.
Binary file removed book/img/ghcnd/get_snow_2.png
Binary file not shown.
Binary file removed book/img/ghcnd/get_snow_depth_observations.png
Binary file not shown.
Binary file removed book/img/ghcnd/masked_out_snow_depth.png
Binary file not shown.

0 comments on commit bc0d1cb

Please sign in to comment.