Skip to content

Commit

Permalink
apply suggestions from @dcherian wrt xarray-contrib/flox#363
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas-Moore-Creative committed May 2, 2024
1 parent faff371 commit 118845a
Showing 1 changed file with 192 additions and 38 deletions.
230 changes: 192 additions & 38 deletions notebooks/flox_examples_on_laptop.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"id": "c3ab2c16-4f1f-4c56-ae76-e0eb86b8389f",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -147,14 +147,38 @@
"name": "stderr",
"output_type": "stream",
"text": [
"2024-05-02 12:42:53,981 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 12:42:53,982 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 12:42:53,998 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 12:42:54,000 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 12:48:07,995 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 12:48:08,027 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 12:48:08,030 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 12:48:08,038 - distributed.nanny - WARNING - Restarting worker\n"
"2024-05-02 14:24:15,129 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:24:15,180 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:24:15,193 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:24:15,195 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:31:47,273 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:31:47,298 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:31:47,301 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:31:47,302 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:33:13,733 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:33:13,773 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:33:13,775 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:33:13,783 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:34:38,867 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:34:38,903 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:34:38,905 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:34:38,914 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:37:24,488 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:37:24,505 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:37:24,507 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:37:24,508 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:15,321 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:15,350 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:15,352 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:15,353 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:23,375 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:23,389 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:23,396 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:23,410 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:28,292 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:28,311 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:28,312 - distributed.nanny - WARNING - Restarting worker\n",
"2024-05-02 14:39:28,325 - distributed.nanny - WARNING - Restarting worker\n"
]
}
],
Expand Down Expand Up @@ -188,7 +212,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "a7fba188-b1c5-4cbe-bbf1-73b87ae2d16c",
"metadata": {},
"outputs": [],
Expand All @@ -205,7 +229,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "6eada978-56b4-49c3-b4f1-bc77be62c030",
"metadata": {},
"outputs": [
Expand All @@ -224,7 +248,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"id": "8b2a2bb1-be72-481f-9818-b7032bac0264",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -598,7 +622,7 @@
"dask.array<transpose, shape=(12, 720, 1440), dtype=float64, chunksize=(1, 720, 1440), chunktype=numpy.ndarray>\n",
"Coordinates:\n",
" * month (month) int64 96B 1 2 3 4 5 6 7 8 9 10 11 12\n",
"Dimensions without coordinates: lat, lon</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.DataArray</div><div class='xr-array-name'>'sst'</div><ul class='xr-dim-list'><li><span class='xr-has-index'>month</span>: 12</li><li><span>lat</span>: 720</li><li><span>lon</span>: 1440</li></ul></div><ul class='xr-sections'><li class='xr-section-item'><div class='xr-array-wrap'><input id='section-38008b29-fc66-4a6d-b2a1-4c3508858a99' class='xr-array-in' type='checkbox' checked><label for='section-38008b29-fc66-4a6d-b2a1-4c3508858a99' title='Show/hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-array-preview xr-preview'><span>dask.array&lt;chunksize=(1, 720, 1440), meta=np.ndarray&gt;</span></div><div class='xr-array-data'><table>\n",
"Dimensions without coordinates: lat, lon</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.DataArray</div><div class='xr-array-name'>'sst'</div><ul class='xr-dim-list'><li><span class='xr-has-index'>month</span>: 12</li><li><span>lat</span>: 720</li><li><span>lon</span>: 1440</li></ul></div><ul class='xr-sections'><li class='xr-section-item'><div class='xr-array-wrap'><input id='section-c3e8993d-06f3-4a7a-9c4c-189c62c57db2' class='xr-array-in' type='checkbox' checked><label for='section-c3e8993d-06f3-4a7a-9c4c-189c62c57db2' title='Show/hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-array-preview xr-preview'><span>dask.array&lt;chunksize=(1, 720, 1440), meta=np.ndarray&gt;</span></div><div class='xr-array-data'><table>\n",
" <tr>\n",
" <td>\n",
" <table style=\"border-collapse: collapse;\">\n",
Expand Down Expand Up @@ -698,7 +722,7 @@
"</svg>\n",
" </td>\n",
" </tr>\n",
"</table></div></div></li><li class='xr-section-item'><input id='section-d552d7c6-59e3-4276-902d-3c1c6a0958f5' class='xr-section-summary-in' type='checkbox' checked><label for='section-d552d7c6-59e3-4276-902d-3c1c6a0958f5' class='xr-section-summary' >Coordinates: <span>(1)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>month</span></div><div class='xr-var-dims'>(month)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>1 2 3 4 5 6 7 8 9 10 11 12</div><input id='attrs-d309342d-d92d-40da-8662-4b463ff0da79' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-d309342d-d92d-40da-8662-4b463ff0da79' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-5cf948e1-6415-4e8a-91ea-297aec9edade' class='xr-var-data-in' type='checkbox'><label for='data-5cf948e1-6415-4e8a-91ea-297aec9edade' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-5402a4a8-227e-4136-86f3-0d59b6d3520e' class='xr-section-summary-in' type='checkbox' ><label for='section-5402a4a8-227e-4136-86f3-0d59b6d3520e' class='xr-section-summary' >Indexes: <span>(1)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>month</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-88a79ebd-ce96-44b0-a55f-09194db30d8f' class='xr-index-data-in' type='checkbox'/><label for='index-88a79ebd-ce96-44b0-a55f-09194db30d8f' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=&#x27;int64&#x27;, name=&#x27;month&#x27;))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-b98d8703-4079-4e5b-aa0d-30fe3acfe43f' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-b98d8703-4079-4e5b-aa0d-30fe3acfe43f' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>"
"</table></div></div></li><li class='xr-section-item'><input id='section-0843423c-1cbe-4b45-be37-d3794f62f57c' class='xr-section-summary-in' type='checkbox' checked><label for='section-0843423c-1cbe-4b45-be37-d3794f62f57c' class='xr-section-summary' >Coordinates: <span>(1)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>month</span></div><div class='xr-var-dims'>(month)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>1 2 3 4 5 6 7 8 9 10 11 12</div><input id='attrs-0fe1b943-6838-479b-a9cc-2daf16384f0e' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-0fe1b943-6838-479b-a9cc-2daf16384f0e' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-fa17954f-5847-4915-8e20-adaf1d95b3bf' class='xr-var-data-in' type='checkbox'><label for='data-fa17954f-5847-4915-8e20-adaf1d95b3bf' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-8954a078-ed62-43a5-9d35-254adc56798e' class='xr-section-summary-in' type='checkbox' ><label for='section-8954a078-ed62-43a5-9d35-254adc56798e' class='xr-section-summary' >Indexes: <span>(1)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>month</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-6cf69c5c-7950-49cc-9514-2a568e3130c7' class='xr-index-data-in' type='checkbox'/><label for='index-6cf69c5c-7950-49cc-9514-2a568e3130c7' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=&#x27;int64&#x27;, name=&#x27;month&#x27;))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-4e55471a-52e7-46b9-885e-201b3cc1d7bb' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-4e55471a-52e7-46b9-885e-201b3cc1d7bb' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>"
],
"text/plain": [
"<xarray.DataArray 'sst' (month: 12, lat: 720, lon: 1440)> Size: 100MB\n",
Expand All @@ -708,7 +732,7 @@
"Dimensions without coordinates: lat, lon"
]
},
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -722,12 +746,12 @@
"id": "f2d2ffa0-38f4-4d38-a604-1dda72541384",
"metadata": {},
"source": [
"#### force `map-reduce`"
"#### force `map-reduce` with engine=`flox`"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"id": "ff0b9cfa-63c2-48a0-8a77-7ead02b80843",
"metadata": {},
"outputs": [
Expand All @@ -736,38 +760,78 @@
"output_type": "stream",
"text": [
"Option: use_flox, Value: True\n",
"CPU times: user 7.07 s, sys: 1.44 s, total: 8.51 s\n",
"Wall time: 2min 9s\n"
"CPU times: user 6.52 s, sys: 1.25 s, total: 7.78 s\n",
"Wall time: 1min 29s\n"
]
}
],
"source": [
"%%time\n",
"print_flox_options()\n",
"clim_flox = oisst.groupby('time.month').mean('time',method='map-reduce').compute()"
"clim_flox = oisst.groupby('time.month').mean('time',engine='flox',method='map-reduce').compute()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"id": "d152dd76-ba8b-47e8-8fcb-8e9f1fe5f334",
"metadata": {},
"outputs": [],
"source": [
"clear_and_restart(['clim_flox'],client)"
]
},
{
"cell_type": "markdown",
"id": "c817db68-0a15-4263-b886-97641d60c649",
"metadata": {},
"source": [
"#### force `map-reduce` with engine=`flox` with `skipna = False`"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "6f20dae7-5274-4d41-a6d9-b1d46483c3b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Option: use_flox, Value: True\n",
"CPU times: user 5.26 s, sys: 998 ms, total: 6.26 s\n",
"Wall time: 1min 22s\n"
]
}
],
"source": [
"%%time\n",
"print_flox_options()\n",
"clim_flox = oisst.groupby('time.month').mean('time',engine='flox',method='map-reduce',skipna=False).compute()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "0426ec05-3bc3-4e6c-8970-dcb3ec82ad0a",
"metadata": {},
"outputs": [],
"source": [
"clear_and_restart(['clim_flox'],client)"
]
},
{
"cell_type": "markdown",
"id": "13ce82b2-c31d-4aa1-9fde-73e344da91b1",
"metadata": {},
"source": [
"#### force `cohorts`"
"#### force `cohorts` with engine=`flox`"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "fa7b5d10-85e4-45fb-8eaa-7bc4f17cd382",
"metadata": {},
"outputs": [
Expand All @@ -776,30 +840,78 @@
"output_type": "stream",
"text": [
"Option: use_flox, Value: True\n",
"CPU times: user 5.82 s, sys: 1.16 s, total: 6.98 s\n",
"Wall time: 1min 20s\n"
"CPU times: user 4.59 s, sys: 773 ms, total: 5.36 s\n",
"Wall time: 42.6 s\n"
]
}
],
"source": [
"%%time\n",
"print_flox_options()\n",
"clim_flox = oisst.groupby('time.month').mean('time',method='cohorts').compute()"
"clim_flox = oisst.groupby('time.month').mean('time',engine='flox',method='cohorts').compute()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "b5ea6233-037c-4c76-9bd8-9e0fe9bc9546",
"execution_count": 20,
"id": "04d92604-1a95-4d58-9f94-4e64ea9a653d",
"metadata": {},
"outputs": [],
"source": [
"clear_and_restart(['clim_flox'],client)"
]
},
{
"cell_type": "markdown",
"id": "1484bd95-8fda-4d2a-80f5-9536b2b9d4c6",
"metadata": {},
"source": [
"#### force `cohorts` with engine=`flox`with `skipna = False`"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "75b1be5d-c0f6-4fc4-a6a7-53dab726e315",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Option: use_flox, Value: True\n",
"CPU times: user 4.09 s, sys: 1.45 s, total: 5.54 s\n",
"Wall time: 36.4 s\n"
]
}
],
"source": [
"%%time\n",
"print_flox_options()\n",
"clim_flox = oisst.groupby('time.month').mean('time',engine='flox',method='cohorts',skipna=False).compute()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "b5ea6233-037c-4c76-9bd8-9e0fe9bc9546",
"metadata": {},
"outputs": [],
"source": [
"clear_and_restart(['clim_flox'],client)"
]
},
{
"cell_type": "markdown",
"id": "38cf8d2f-ba0d-4193-be24-37940923b274",
"metadata": {},
"source": [
"#### `use_flox=False`"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "e5c92d52-526d-495d-9a8e-0d6356406e1a",
"metadata": {},
"outputs": [
Expand All @@ -808,8 +920,8 @@
"output_type": "stream",
"text": [
"Option: use_flox, Value: False\n",
"CPU times: user 3.37 s, sys: 1.39 s, total: 4.77 s\n",
"Wall time: 29.5 s\n"
"CPU times: user 3.4 s, sys: 1.22 s, total: 4.62 s\n",
"Wall time: 29.4 s\n"
]
}
],
Expand All @@ -820,15 +932,57 @@
" clim_noflox = oisst.groupby('time.month').mean('time').compute()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "d9bcdfb6-83d8-4216-880f-94d48b146dac",
"metadata": {},
"outputs": [],
"source": [
"clear_and_restart(['clim_flox'],client)"
]
},
{
"cell_type": "markdown",
"id": "5d145dea-9b82-4a9c-88c7-7f6f2388466a",
"metadata": {},
"source": [
"#### `use_flox=False` with `skipna = False`"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "5461e93a-8e9c-4387-9907-b0e713abc84f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Option: use_flox, Value: False\n",
"CPU times: user 2.67 s, sys: 1.38 s, total: 4.05 s\n",
"Wall time: 20.9 s\n"
]
}
],
"source": [
"%%time\n",
"with xr.set_options(use_flox=False):\n",
" print_flox_options()\n",
" clim_noflox = oisst.groupby('time.month').mean('time',skipna=False).compute()"
]
},
{
"cell_type": "markdown",
"id": "3264bc23-b1a4-42f9-a1c5-7fcc242df4d9",
"metadata": {},
"source": [
"### results\n",
"with flox map-reduce = CPU times: user 7.07 s, sys: 1.44 s, total: 8.51 s = Wall time: 2min 9s<br>\n",
"with flox cohorts = CPU times: user 5.82 s, sys: 1.16 s, total: 6.98 s = Wall time: 1min 20s<br>\n",
"without flox = CPU times: user 3.37 s, sys: 1.39 s, total: 4.77 s = Wall time: 29.5 s"
"~~with flox map-reduce = CPU times: user 7.07 s, sys: 1.44 s, total: 8.51 s = Wall time: 2min 9s~~<br>\n",
"~~with flox cohorts = CPU times: user 5.82 s, sys: 1.16 s, total: 6.98 s = Wall time: 1min 20s~~<br>\n",
"~~without flox = CPU times: user 3.37 s, sys: 1.39 s, total: 4.77 s = Wall time: 29.5 s~~\n",
"\n"
]
},
{
Expand Down Expand Up @@ -857,22 +1011,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"id": "19a0cb93-a07b-44c8-bf07-d5f857242e0b",
"execution_count": 17,
"id": "a9864b20-7d5c-48ad-a05d-18ab68f36867",
"metadata": {},
"outputs": [],
"source": [
"clear_and_restart([],client)"
"clear_and_restart(['clim_flox','clim_noflox'],client)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a9864b20-7d5c-48ad-a05d-18ab68f36867",
"id": "19a0cb93-a07b-44c8-bf07-d5f857242e0b",
"metadata": {},
"outputs": [],
"source": [
"clear_and_restart(['clim_flox','clim_noflox'],client)"
"clear_and_restart([],client)"
]
},
{
Expand Down

0 comments on commit 118845a

Please sign in to comment.