diff --git a/examples/04_Drug_Names_ATC.ipynb b/examples/04_Drug_Names_ATC.ipynb index 83e1de8..1c3db21 100644 --- a/examples/04_Drug_Names_ATC.ipynb +++ b/examples/04_Drug_Names_ATC.ipynb @@ -11,6 +11,7 @@ "\n", "- Get German ATC 2023 version from: https://www.wido.de/publikationen-produkte/arzneimittel-klassifikation/ \n", "- Optional: get access to DrugBank (https://go.drugbank.com/releases/latest) for much more aliases (e.g., trade names)\n", + "- `pip install openpyxl`\n", "- Prepare xMEN KB and indices:\n", " - `xmen dict examples/conf/atc.yaml --code examples/dicts/atc2023_de.py`\n", " - `xmen index examples/conf/atc.yaml --all`" @@ -18,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "2cbdaaa9-1b2d-4f6b-a193-554da8226217", "metadata": { "tags": [] @@ -33,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "0756e7cf-9478-4bb2-bdf0-2cc147c44e55", "metadata": { "tags": [] @@ -46,19 +47,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "449f6fe0-bcbd-4572-b48a-8f47a98c52f2", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
[03/07/24 10:43:03] INFO     Loading hierarchical faiss index                                sap_bert_linker.py:153\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[03/07/24 10:43:03]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading hierarchical faiss index \u001b]8;id=280662;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/sap_bert_linker.py\u001b\\\u001b[2msap_bert_linker.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=664469;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/sap_bert_linker.py#153\u001b\\\u001b[2m153\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     Loading index from                                                 faiss_indexer.py:64\n",
+       "                             /home/Florian.Borchert/.cache/xmen/atc/index/sapbert/embed_faiss_h                    \n",
+       "                             ier.pickle                                                                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading index from \u001b]8;id=926064;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/faiss_indexer.py\u001b\\\u001b[2mfaiss_indexer.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=529795;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/faiss_indexer.py#64\u001b\\\u001b[2m64\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m \u001b[35m/home/Florian.Borchert/.cache/xmen/atc/index/sapbert/\u001b[0m\u001b[95membed_faiss_h\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[95mier.pickle\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/07/24 10:43:04] INFO     Loaded index of type <class 'faiss.swigfaiss.IndexHNSWFlat'> and   faiss_indexer.py:66\n",
+       "                             size 470941                                                                           \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[03/07/24 10:43:04]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loaded index of type \u001b[1m<\u001b[0m\u001b[1;95mclass\u001b[0m\u001b[39m \u001b[0m\u001b[32m'faiss.swigfaiss.IndexHNSWFlat'\u001b[0m\u001b[1m>\u001b[0m and \u001b]8;id=99502;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/faiss_indexer.py\u001b\\\u001b[2mfaiss_indexer.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=2245;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/faiss_indexer.py#66\u001b\\\u001b[2m66\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m size \u001b[1;36m470941\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "linker = default_ensemble(base_path / 'index')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "89561707-0f5e-480e-b96c-c29229e1fd70", "metadata": { "tags": [] @@ -78,24 +125,102 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "2c09eda3-462c-4f6b-b055-52ff77a6ba65", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cd11b10d37194fa2b3f37d2777bdcb4d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map: 0%| | 0/7 [00:00