diff --git a/examples/04_Drug_Names_ATC.ipynb b/examples/04_Drug_Names_ATC.ipynb index 83e1de8..1c3db21 100644 --- a/examples/04_Drug_Names_ATC.ipynb +++ b/examples/04_Drug_Names_ATC.ipynb @@ -11,6 +11,7 @@ "\n", "- Get German ATC 2023 version from: https://www.wido.de/publikationen-produkte/arzneimittel-klassifikation/ \n", "- Optional: get access to DrugBank (https://go.drugbank.com/releases/latest) for much more aliases (e.g., trade names)\n", + "- `pip install openpyxl`\n", "- Prepare xMEN KB and indices:\n", " - `xmen dict examples/conf/atc.yaml --code examples/dicts/atc2023_de.py`\n", " - `xmen index examples/conf/atc.yaml --all`" @@ -18,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "2cbdaaa9-1b2d-4f6b-a193-554da8226217", "metadata": { "tags": [] @@ -33,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "0756e7cf-9478-4bb2-bdf0-2cc147c44e55", "metadata": { "tags": [] @@ -46,19 +47,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "449f6fe0-bcbd-4572-b48a-8f47a98c52f2", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
[03/07/24 10:43:03] INFO Loading hierarchical faiss index sap_bert_linker.py:153\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/07/24 10:43:03]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading hierarchical faiss index \u001b]8;id=280662;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/sap_bert_linker.py\u001b\\\u001b[2msap_bert_linker.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=664469;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/sap_bert_linker.py#153\u001b\\\u001b[2m153\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Loading index from faiss_indexer.py:64\n", + " /home/Florian.Borchert/.cache/xmen/atc/index/sapbert/embed_faiss_h \n", + " ier.pickle \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading index from \u001b]8;id=926064;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/faiss_indexer.py\u001b\\\u001b[2mfaiss_indexer.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=529795;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/faiss_indexer.py#64\u001b\\\u001b[2m64\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m \u001b[35m/home/Florian.Borchert/.cache/xmen/atc/index/sapbert/\u001b[0m\u001b[95membed_faiss_h\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[95mier.pickle\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/07/24 10:43:04] INFO Loaded index of type <class 'faiss.swigfaiss.IndexHNSWFlat'> and faiss_indexer.py:66\n", + " size 470941 \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/07/24 10:43:04]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loaded index of type \u001b[1m<\u001b[0m\u001b[1;95mclass\u001b[0m\u001b[39m \u001b[0m\u001b[32m'faiss.swigfaiss.IndexHNSWFlat'\u001b[0m\u001b[1m>\u001b[0m and \u001b]8;id=99502;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/faiss_indexer.py\u001b\\\u001b[2mfaiss_indexer.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=2245;file:///mnt/nfs/home/Florian.Borchert/workspace/xmen/xmen/linkers/faiss_indexer.py#66\u001b\\\u001b[2m66\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m size \u001b[1;36m470941\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "linker = default_ensemble(base_path / 'index')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "89561707-0f5e-480e-b96c-c29229e1fd70", "metadata": { "tags": [] @@ -78,24 +125,102 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "2c09eda3-462c-4f6b-b055-52ff77a6ba65", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cd11b10d37194fa2b3f37d2777bdcb4d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map: 0%| | 0/7 [00:00, ? examples/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "predictions = linker.predict_no_context(drug_mentions)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "8158be67-5c2e-4358-b986-c72b9a74e43c", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input: Ursodeoxycholsäure, 250 mg - Kapsel\n", + "Confidence: 0.7435341477394104\n", + "CUI: A05AA02, Name: Ursodeoxycholsäure\n", + "Definition: None\n", + "TUI(s): \n", + "Aliases (abbreviated, total: 47): \n", + "\t Litursol, Solutrat, Ursochol, Ag-ursodiol, 3alpha,7beta-Dihydroxy-5beta-cholan-24-oic acid, Urso DS, PMS-ursodiol, Ursodeoxycholic acid, (3alpha,5beta,7beta)-3,7-dihydroxycholan-24-oic acid, Urusa\n", + "------\n", + "Input: Propofol 2%, 20 mg/ml 1000 mg/50 ml Injektionslösung\n", + "Confidence: 0.6950462460517883\n", + "CUI: N01AX10, Name: Propofol\n", + "Definition: None\n", + "TUI(s): \n", + "Aliases (abbreviated, total: 45): \n", + "\t Diprivan, Gobbifol, Hypro, Disoprivan, Propofil, Anesthesia S/I-50, Anesthesia S/I-60, Propofol-II Injection, Propoven, Anepol\n", + "------\n", + "Input: Norepinephrin 20 µg/ml\n", + "Confidence: 0.848741888999939\n", + "CUI: C01CA03, Name: Norepinephrin\n", + "Definition: None\n", + "TUI(s): \n", + "Aliases (abbreviated, total: 27): \n", + "\t (R)-norepinephrine, Norepinephrine Bitartrate In 5% Dextrose Injection, (R)-4-(2-amino-1-hydroxyethyl)-1,2-benzenediol, Norepinephrine Bitartrate Injection USP, Levophed(r) Norepinephrine Bitartrate, Norépinéphrine, Norepinephrine, Levophed, (R)-(−)-norepinephrine, L-noradrenaline\n", + "------\n", + "Input: Amphotericin B, 10 mg - Lutschtablette\n", + "Confidence: 0.6953610181808472\n", + "CUI: J02AA01, Name: Amphotericin B\n", + "Definition: None\n", + "TUI(s): \n", + "Aliases (abbreviated, total: 22): \n", + "\t Amphocin, Amphotericinum B, Liposomal amphotericin B, Amphotericin, Fungizone, Amphotec 50 mg, Amphotec 100 mg, Abelect, Amphotericin B, Amphocil\n", + "------\n", + "Input: Fentanyl (50 µg/ml) i.v.\n", + "Confidence: 0.8534790277481079\n", + "CUI: N02AB03, Name: Fentanyl\n", + "Definition: None\n", + "TUI(s): \n", + "Aliases (abbreviated, total: 70): \n", + "\t Instanyl, Duragesic 12, Fentanyl Buccal, Fentanyl Transdermal, Pecfent, N-(1-phenethylpiperidin-4-yl)-N-phenylpropionamide, Fentora, Lazanda, Mylan-fentanyl Matrix Patch, Abstral\n", + "------\n", + "Input: Vollelektrolyt-Lösung\n", + "Confidence: 0.8366699814796448\n", + "CUI: B05XA, Name: Elektrolytlösungen\n", + "Definition: None\n", + "TUI(s): \n", + "Aliases: (total: 0): \n", + "\t \n", + "------\n", + "Input: Sufentanil 5µg/ml 250 µg/50 ml Injektionslösung\n", + "Confidence: 0.7572200894355774\n", + "CUI: N01AH03, Name: Sufentanil\n", + "Definition: None\n", + "TUI(s): \n", + "Aliases (abbreviated, total: 21): \n", + "\t Sufentanyl, N-(4-(Methoxymethyl)-1-(2-(2-thienyl)ethyl)-4-piperidyl)propionanilide, Sufentanilum, Sufentil, Dsuvia, N-(4-(Methoxymethyl)-1-(2-(2-thienyl)ethyl)-4-piperidinyl)-N-phenylpropanamide, Zalviso, Sufentanil Citrate, Sufenta, Chronogesic\n", + "------\n" + ] + } + ], "source": [ "for d, p in zip(drug_mentions, predictions):\n", " print('Input:', d)\n",