diff --git a/notebooks/flox_examples_on_laptop.ipynb b/notebooks/flox_examples_on_laptop.ipynb index ee89aa0..701f28c 100644 --- a/notebooks/flox_examples_on_laptop.ipynb +++ b/notebooks/flox_examples_on_laptop.ipynb @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "c3ab2c16-4f1f-4c56-ae76-e0eb86b8389f", "metadata": {}, "outputs": [], @@ -147,14 +147,38 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-05-02 12:42:53,981 - distributed.nanny - WARNING - Restarting worker\n", - "2024-05-02 12:42:53,982 - distributed.nanny - WARNING - Restarting worker\n", - "2024-05-02 12:42:53,998 - distributed.nanny - WARNING - Restarting worker\n", - "2024-05-02 12:42:54,000 - distributed.nanny - WARNING - Restarting worker\n", - "2024-05-02 12:48:07,995 - distributed.nanny - WARNING - Restarting worker\n", - "2024-05-02 12:48:08,027 - distributed.nanny - WARNING - Restarting worker\n", - "2024-05-02 12:48:08,030 - distributed.nanny - WARNING - Restarting worker\n", - "2024-05-02 12:48:08,038 - distributed.nanny - WARNING - Restarting worker\n" + "2024-05-02 14:24:15,129 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:24:15,180 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:24:15,193 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:24:15,195 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:31:47,273 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:31:47,298 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:31:47,301 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:31:47,302 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:33:13,733 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:33:13,773 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:33:13,775 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:33:13,783 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:34:38,867 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:34:38,903 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:34:38,905 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:34:38,914 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:37:24,488 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:37:24,505 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:37:24,507 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:37:24,508 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:15,321 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:15,350 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:15,352 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:15,353 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:23,375 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:23,389 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:23,396 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:23,410 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:28,292 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:28,311 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:28,312 - distributed.nanny - WARNING - Restarting worker\n", + "2024-05-02 14:39:28,325 - distributed.nanny - WARNING - Restarting worker\n" ] } ], @@ -188,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "a7fba188-b1c5-4cbe-bbf1-73b87ae2d16c", "metadata": {}, "outputs": [], @@ -205,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "6eada978-56b4-49c3-b4f1-bc77be62c030", "metadata": {}, "outputs": [ @@ -224,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "8b2a2bb1-be72-481f-9818-b7032bac0264", "metadata": {}, "outputs": [ @@ -598,7 +622,7 @@ "dask.array<transpose, shape=(12, 720, 1440), dtype=float64, chunksize=(1, 720, 1440), chunktype=numpy.ndarray>\n", "Coordinates:\n", " * month (month) int64 96B 1 2 3 4 5 6 7 8 9 10 11 12\n", - "Dimensions without coordinates: lat, lon" ], "text/plain": [ " Size: 100MB\n", @@ -708,7 +732,7 @@ "Dimensions without coordinates: lat, lon" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -722,12 +746,12 @@ "id": "f2d2ffa0-38f4-4d38-a604-1dda72541384", "metadata": {}, "source": [ - "#### force `map-reduce`" + "#### force `map-reduce` with engine=`flox`" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "ff0b9cfa-63c2-48a0-8a77-7ead02b80843", "metadata": {}, "outputs": [ @@ -736,20 +760,20 @@ "output_type": "stream", "text": [ "Option: use_flox, Value: True\n", - "CPU times: user 7.07 s, sys: 1.44 s, total: 8.51 s\n", - "Wall time: 2min 9s\n" + "CPU times: user 6.52 s, sys: 1.25 s, total: 7.78 s\n", + "Wall time: 1min 29s\n" ] } ], "source": [ "%%time\n", "print_flox_options()\n", - "clim_flox = oisst.groupby('time.month').mean('time',method='map-reduce').compute()" + "clim_flox = oisst.groupby('time.month').mean('time',engine='flox',method='map-reduce').compute()" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "d152dd76-ba8b-47e8-8fcb-8e9f1fe5f334", "metadata": {}, "outputs": [], @@ -757,17 +781,57 @@ "clear_and_restart(['clim_flox'],client)" ] }, + { + "cell_type": "markdown", + "id": "c817db68-0a15-4263-b886-97641d60c649", + "metadata": {}, + "source": [ + "#### force `map-reduce` with engine=`flox` with `skipna = False`" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6f20dae7-5274-4d41-a6d9-b1d46483c3b7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Option: use_flox, Value: True\n", + "CPU times: user 5.26 s, sys: 998 ms, total: 6.26 s\n", + "Wall time: 1min 22s\n" + ] + } + ], + "source": [ + "%%time\n", + "print_flox_options()\n", + "clim_flox = oisst.groupby('time.month').mean('time',engine='flox',method='map-reduce',skipna=False).compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "0426ec05-3bc3-4e6c-8970-dcb3ec82ad0a", + "metadata": {}, + "outputs": [], + "source": [ + "clear_and_restart(['clim_flox'],client)" + ] + }, { "cell_type": "markdown", "id": "13ce82b2-c31d-4aa1-9fde-73e344da91b1", "metadata": {}, "source": [ - "#### force `cohorts`" + "#### force `cohorts` with engine=`flox`" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "fa7b5d10-85e4-45fb-8eaa-7bc4f17cd382", "metadata": {}, "outputs": [ @@ -776,30 +840,78 @@ "output_type": "stream", "text": [ "Option: use_flox, Value: True\n", - "CPU times: user 5.82 s, sys: 1.16 s, total: 6.98 s\n", - "Wall time: 1min 20s\n" + "CPU times: user 4.59 s, sys: 773 ms, total: 5.36 s\n", + "Wall time: 42.6 s\n" ] } ], "source": [ "%%time\n", "print_flox_options()\n", - "clim_flox = oisst.groupby('time.month').mean('time',method='cohorts').compute()" + "clim_flox = oisst.groupby('time.month').mean('time',engine='flox',method='cohorts').compute()" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "b5ea6233-037c-4c76-9bd8-9e0fe9bc9546", + "execution_count": 20, + "id": "04d92604-1a95-4d58-9f94-4e64ea9a653d", "metadata": {}, "outputs": [], "source": [ "clear_and_restart(['clim_flox'],client)" ] }, + { + "cell_type": "markdown", + "id": "1484bd95-8fda-4d2a-80f5-9536b2b9d4c6", + "metadata": {}, + "source": [ + "#### force `cohorts` with engine=`flox`with `skipna = False`" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "75b1be5d-c0f6-4fc4-a6a7-53dab726e315", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Option: use_flox, Value: True\n", + "CPU times: user 4.09 s, sys: 1.45 s, total: 5.54 s\n", + "Wall time: 36.4 s\n" + ] + } + ], + "source": [ + "%%time\n", + "print_flox_options()\n", + "clim_flox = oisst.groupby('time.month').mean('time',engine='flox',method='cohorts',skipna=False).compute()" + ] + }, { "cell_type": "code", "execution_count": 13, + "id": "b5ea6233-037c-4c76-9bd8-9e0fe9bc9546", + "metadata": {}, + "outputs": [], + "source": [ + "clear_and_restart(['clim_flox'],client)" + ] + }, + { + "cell_type": "markdown", + "id": "38cf8d2f-ba0d-4193-be24-37940923b274", + "metadata": {}, + "source": [ + "#### `use_flox=False`" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "e5c92d52-526d-495d-9a8e-0d6356406e1a", "metadata": {}, "outputs": [ @@ -808,8 +920,8 @@ "output_type": "stream", "text": [ "Option: use_flox, Value: False\n", - "CPU times: user 3.37 s, sys: 1.39 s, total: 4.77 s\n", - "Wall time: 29.5 s\n" + "CPU times: user 3.4 s, sys: 1.22 s, total: 4.62 s\n", + "Wall time: 29.4 s\n" ] } ], @@ -820,15 +932,57 @@ " clim_noflox = oisst.groupby('time.month').mean('time').compute()" ] }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d9bcdfb6-83d8-4216-880f-94d48b146dac", + "metadata": {}, + "outputs": [], + "source": [ + "clear_and_restart(['clim_flox'],client)" + ] + }, + { + "cell_type": "markdown", + "id": "5d145dea-9b82-4a9c-88c7-7f6f2388466a", + "metadata": {}, + "source": [ + "#### `use_flox=False` with `skipna = False`" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "5461e93a-8e9c-4387-9907-b0e713abc84f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Option: use_flox, Value: False\n", + "CPU times: user 2.67 s, sys: 1.38 s, total: 4.05 s\n", + "Wall time: 20.9 s\n" + ] + } + ], + "source": [ + "%%time\n", + "with xr.set_options(use_flox=False):\n", + " print_flox_options()\n", + " clim_noflox = oisst.groupby('time.month').mean('time',skipna=False).compute()" + ] + }, { "cell_type": "markdown", "id": "3264bc23-b1a4-42f9-a1c5-7fcc242df4d9", "metadata": {}, "source": [ "### results\n", - "with flox map-reduce = CPU times: user 7.07 s, sys: 1.44 s, total: 8.51 s = Wall time: 2min 9s
\n", - "with flox cohorts = CPU times: user 5.82 s, sys: 1.16 s, total: 6.98 s = Wall time: 1min 20s
\n", - "without flox = CPU times: user 3.37 s, sys: 1.39 s, total: 4.77 s = Wall time: 29.5 s" + "~~with flox map-reduce = CPU times: user 7.07 s, sys: 1.44 s, total: 8.51 s = Wall time: 2min 9s~~
\n", + "~~with flox cohorts = CPU times: user 5.82 s, sys: 1.16 s, total: 6.98 s = Wall time: 1min 20s~~
\n", + "~~without flox = CPU times: user 3.37 s, sys: 1.39 s, total: 4.77 s = Wall time: 29.5 s~~\n", + "\n" ] }, { @@ -857,22 +1011,22 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "19a0cb93-a07b-44c8-bf07-d5f857242e0b", + "execution_count": 17, + "id": "a9864b20-7d5c-48ad-a05d-18ab68f36867", "metadata": {}, "outputs": [], "source": [ - "clear_and_restart([],client)" + "clear_and_restart(['clim_flox','clim_noflox'],client)" ] }, { "cell_type": "code", "execution_count": null, - "id": "a9864b20-7d5c-48ad-a05d-18ab68f36867", + "id": "19a0cb93-a07b-44c8-bf07-d5f857242e0b", "metadata": {}, "outputs": [], "source": [ - "clear_and_restart(['clim_flox','clim_noflox'],client)" + "clear_and_restart([],client)" ] }, {