diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_anatomy_general_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_anatomy_general_en.md new file mode 100644 index 00000000000000..571b330d1ba383 --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_anatomy_general_en.md @@ -0,0 +1,148 @@ +--- +layout: model +title: Extract Anatomical Entities from Oncology Texts +author: John Snow Labs +name: ner_oncology_anatomy_general +date: 2022-11-24 +tags: [licensed, clinical, en, oncology, anatomy, ner] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts anatomical entities using an unspecific label. + +## Predicted Entities + +`Anatomical_Site`, `Direction` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_anatomy_general_en_4.2.2_3.0_1669298930681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_anatomy_general", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The patient presented a mass in her left breast, and a possible metastasis in her lungs and in her liver."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_anatomy_general", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The patient presented a mass in her left breast, and a possible metastasis in her lungs and in her liver.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:--------|:----------------| +| left | Direction | +| breast | Anatomical_Site | +| lungs | Anatomical_Site | +| liver | Anatomical_Site | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_anatomy_general| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.3 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 +Anatomical_Site 2946 549 638 3584 0.84 0.82 0.83 + Direction 864 209 120 984 0.81 0.88 0.84 + macro_avg 3810 758 758 4568 0.82 0.85 0.84 + micro_avg 3810 758 758 4568 0.83 0.83 0.83 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_anatomy_granular_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_anatomy_granular_en.md new file mode 100644 index 00000000000000..54a16844d633be --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_anatomy_granular_en.md @@ -0,0 +1,155 @@ +--- +layout: model +title: Extract Granular Anatomical Entities from Oncology Texts +author: John Snow Labs +name: ner_oncology_anatomy_granular +date: 2022-11-24 +tags: [licensed, clinical, en, oncology, ner, anatomy] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts mentions of anatomical entities using granular labels. + +## Predicted Entities + +`Direction`, `Site_Lymph_Node`, `Site_Breast`, `Site_Other_Body_Part`, `Site_Bone`, `Site_Liver`, `Site_Lung`, `Site_Brain` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_anatomy_granular_en_4.2.2_3.0_1669299394344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_anatomy_granular", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The patient presented a mass in her left breast, and a possible metastasis in her lungs and in her liver."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_anatomy_granular", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The patient presented a mass in her left breast, and a possible metastasis in her lungs and in her liver.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:--------|:------------| +| left | Direction | +| breast | Site_Breast | +| lungs | Site_Lung | +| liver | Site_Liver | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_anatomy_granular| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.3 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Direction 822 221 162 984 0.79 0.84 0.81 + Site_Lymph_Node 481 38 70 551 0.93 0.87 0.90 + Site_Breast 88 14 59 147 0.86 0.60 0.71 +Site_Other_Body_Part 604 184 897 1501 0.77 0.40 0.53 + Site_Bone 252 74 61 313 0.77 0.81 0.79 + Site_Liver 178 92 56 234 0.66 0.76 0.71 + Site_Lung 398 98 161 559 0.80 0.71 0.75 + Site_Brain 197 44 82 279 0.82 0.71 0.76 + macro_avg 3020 765 1548 4568 0.80 0.71 0.74 + micro_avg 3020 765 1548 4568 0.80 0.66 0.71 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_biomarker_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_biomarker_en.md new file mode 100644 index 00000000000000..624baf3434747f --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_biomarker_en.md @@ -0,0 +1,163 @@ +--- +layout: model +title: Extract Biomarkers and their Results +author: John Snow Labs +name: ner_oncology_biomarker +date: 2022-11-24 +tags: [licensed, clinical, en, ner, oncology, biomarker] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts mentions of biomarkers and biomarker results from oncology texts. + +## Predicted Entities + +`Biomarker_Result`, `Biomarker` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_biomarker_en_4.2.2_3.0_1669299787628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_biomarker", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The results of immunohistochemical examination showed that she tested negative for CK7, synaptophysin (Syn), chromogranin A (CgA), Muc5AC, human epidermal growth factor receptor-2 (HER2), and Muc6; positive for CK20, Muc1, Muc2, E-cadherin, and p53; the Ki-67 index was about 87%."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_biomarker", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The results of immunohistochemical examination showed that she tested negative for CK7, synaptophysin (Syn), chromogranin A (CgA), Muc5AC, human epidermal growth factor receptor-2 (HER2), and Muc6; positive for CK20, Muc1, Muc2, E-cadherin, and p53; the Ki-67 index was about 87%.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-----------------------------------------|:-----------------| +| negative | Biomarker_Result | +| CK7 | Biomarker | +| synaptophysin | Biomarker | +| Syn | Biomarker | +| chromogranin A | Biomarker | +| CgA | Biomarker | +| Muc5AC | Biomarker | +| human epidermal growth factor receptor-2 | Biomarker | +| HER2 | Biomarker | +| Muc6 | Biomarker | +| positive | Biomarker_Result | +| CK20 | Biomarker | +| Muc1 | Biomarker | +| Muc2 | Biomarker | +| E-cadherin | Biomarker | +| p53 | Biomarker | +| Ki-67 index | Biomarker | +| 87% | Biomarker_Result | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_biomarker| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.3 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 +Biomarker_Result 1030 148 415 1445 0.87 0.71 0.79 + Biomarker 1685 272 279 1964 0.86 0.86 0.86 + macro_avg 2715 420 694 3409 0.87 0.79 0.82 + micro_avg 2715 420 694 3409 0.87 0.80 0.83 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_demographics_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_demographics_en.md new file mode 100644 index 00000000000000..9fa66e2ab8cf54 --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_demographics_en.md @@ -0,0 +1,150 @@ +--- +layout: model +title: Extract Demographic Entities from Oncology Texts +author: John Snow Labs +name: ner_oncology_demographics +date: 2022-11-24 +tags: [licensed, clinical, en, ner, oncology, demographics] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts demographic information from oncology texts, including age, gender, and smoking status. + +## Predicted Entities + +`Smoking_Status`, `Age`, `Race_Ethnicity`, `Gender` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_demographics_en_4.2.2_3.0_1669300163954.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_demographics", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The patient is a 40-year-old man with history of heavy smoking."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_demographics", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The patient is a 40-year-old man with history of heavy smoking.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:------------|:---------------| +| 40-year-old | Age | +| man | Gender | +| smoking | Smoking_Status | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_demographics| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.6 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 +Smoking_Status 60 19 8 68 0.76 0.88 0.82 + Age 934 33 15 949 0.97 0.98 0.97 +Race_Ethnicity 57 5 5 62 0.92 0.92 0.92 + Gender 1248 18 6 1254 0.99 1.00 0.99 + macro_avg 2299 75 34 2333 0.91 0.95 0.93 + micro_avg 2299 75 34 2333 0.97 0.99 0.98 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_diagnosis_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_diagnosis_en.md new file mode 100644 index 00000000000000..0181422deb71bd --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_diagnosis_en.md @@ -0,0 +1,163 @@ +--- +layout: model +title: Detect Entities Related to Cancer Diagnosis +author: John Snow Labs +name: ner_oncology_diagnosis +date: 2022-11-24 +tags: [licensed, clinical, en, ner, oncology] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts entities related to cancer diagnosis, such as Metastasis, Histological_Type or Invasion. + +## Predicted Entities + +`Histological_Type`, `Staging`, `Cancer_Score`, `Tumor_Finding`, `Invasion`, `Tumor_Size`, `Adenopathy`, `Performance_Status`, `Pathology_Result`, `Metastasis`, `Cancer_Dx`, `Grade` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_diagnosis_en_4.2.2_3.0_1669300474926.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_diagnosis", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["Two years ago, the patient presented with a tumor in her left breast and adenopathies. She was diagnosed with invasive ductal carcinoma. +Last week she was also found to have a lung metastasis."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_diagnosis", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("Two years ago, the patient presented with a tumor in her left breast and adenopathies. She was diagnosed with invasive ductal carcinoma. +Last week she was also found to have a lung metastasis.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-------------|:------------------| +| tumor | Tumor_Finding | +| adenopathies | Adenopathy | +| invasive | Histological_Type | +| ductal | Histological_Type | +| carcinoma | Cancer_Dx | +| metastasis | Metastasis | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_diagnosis| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.3 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Histological_Type 354 63 99 453 0.85 0.78 0.81 + Staging 234 27 24 258 0.90 0.91 0.90 + Cancer_Score 36 15 26 62 0.71 0.58 0.64 + Tumor_Finding 1121 83 136 1257 0.93 0.89 0.91 + Invasion 154 27 27 181 0.85 0.85 0.85 + Tumor_Size 1058 126 71 1129 0.89 0.94 0.91 + Adenopathy 66 10 30 96 0.87 0.69 0.77 +Performance_Status 116 15 19 135 0.89 0.86 0.87 + Pathology_Result 852 686 290 1142 0.55 0.75 0.64 + Metastasis 356 15 14 370 0.96 0.96 0.96 + Cancer_Dx 1302 88 92 1394 0.94 0.93 0.94 + Grade 201 23 35 236 0.90 0.85 0.87 + macro_avg 5850 1178 863 6713 0.85 0.83 0.84 + micro_avg 5850 1178 863 6713 0.85 0.87 0.86 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_en.md new file mode 100644 index 00000000000000..2577ea98f0f58f --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_en.md @@ -0,0 +1,219 @@ +--- +layout: model +title: Detect Oncology-Specific Entities +author: John Snow Labs +name: ner_oncology +date: 2022-11-24 +tags: [licensed, clinical, en, oncology, biomarker, treatment] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts more than 40 oncology-related entities, including therapies, tests and staging. + +## Predicted Entities + +`Histological_Type`, `Direction`, `Staging`, `Cancer_Score`, `Imaging_Test`, `Cycle_Number`, `Tumor_Finding`, `Site_Lymph_Node`, `Invasion`, `Response_To_Treatment`, `Smoking_Status`, `Tumor_Size`, `Cycle_Count`, `Adenopathy`, `Age`, `Biomarker_Result`, `Unspecific_Therapy`, `Site_Breast`, `Chemotherapy`, `Targeted_Therapy`, `Radiotherapy`, `Performance_Status`, `Pathology_Test`, `Site_Other_Body_Part`, `Cancer_Surgery`, `Line_Of_Therapy`, `Pathology_Result`, `Hormonal_Therapy`, `Site_Bone`, `Biomarker`, `Immunotherapy`, `Cycle_Day`, `Frequency`, `Route`, `Duration`, `Death_Entity`, `Metastasis`, `Site_Liver`, `Cancer_Dx`, `Grade`, `Date`, `Site_Lung`, `Site_Brain`, `Relative_Date`, `Race_Ethnicity`, `Gender`, `Oncogene`, `Dosage`, `Radiation_Dose` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_en_4.2.2_3.0_1669306355829.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The had previously undergone a left mastectomy and an axillary lymph node dissection for a left breast cancer twenty years ago. +The tumor was positive for ER and PR. Postoperatively, radiotherapy was administered to the residual breast. +The cancer recurred as a right lung metastasis 13 years later. The patient underwent a regimen consisting of adriamycin (60 mg/m2) and cyclophosphamide (600 mg/m2) over six courses, as first line therapy."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The had previously undergone a left mastectomy and an axillary lymph node dissection for a left breast cancer twenty years ago. +The tumor was positive for ER and PR. Postoperatively, radiotherapy was administered to the residual breast. +The cancer recurred as a right lung metastasis 13 years later. The patient underwent a regimen consisting of adriamycin (60 mg/m2) and cyclophosphamide (600 mg/m2) over six courses, as first line therapy.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-------------------------------|:----------------------| +| left | Direction | +| mastectomy | Cancer_Surgery | +| axillary lymph node dissection | Cancer_Surgery | +| left | Direction | +| breast cancer | Cancer_Dx | +| twenty years ago | Relative_Date | +| tumor | Tumor_Finding | +| positive | Biomarker_Result | +| ER | Biomarker | +| PR | Biomarker | +| radiotherapy | Radiotherapy | +| breast | Site_Breast | +| cancer | Cancer_Dx | +| recurred | Response_To_Treatment | +| right | Direction | +| lung | Site_Lung | +| metastasis | Metastasis | +| 13 years later | Relative_Date | +| adriamycin | Chemotherapy | +| 60 mg/m2 | Dosage | +| cyclophosphamide | Chemotherapy | +| 600 mg/m2 | Dosage | +| six courses | Cycle_Count | +| first line | Line_Of_Therapy | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.6 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Histological_Type 339 75 114 453 0.82 0.75 0.78 + Direction 832 163 152 984 0.84 0.85 0.84 + Staging 229 31 29 258 0.88 0.89 0.88 + Cancer_Score 37 8 25 62 0.82 0.60 0.69 + Imaging_Test 2027 214 177 2204 0.90 0.92 0.91 + Cycle_Number 73 29 24 97 0.72 0.75 0.73 + Tumor_Finding 1114 64 143 1257 0.95 0.89 0.91 + Site_Lymph_Node 491 53 60 551 0.90 0.89 0.90 + Invasion 158 36 23 181 0.81 0.87 0.84 +Response_To_Treatment 431 149 165 596 0.74 0.72 0.73 + Smoking_Status 66 18 2 68 0.79 0.97 0.87 + Tumor_Size 1050 112 79 1129 0.90 0.93 0.92 + Cycle_Count 177 62 53 230 0.74 0.77 0.75 + Adenopathy 67 12 29 96 0.85 0.70 0.77 + Age 930 33 19 949 0.97 0.98 0.97 + Biomarker_Result 1160 169 285 1445 0.87 0.80 0.84 + Unspecific_Therapy 198 86 80 278 0.70 0.71 0.70 + Site_Breast 125 15 22 147 0.89 0.85 0.87 + Chemotherapy 814 55 65 879 0.94 0.93 0.93 + Targeted_Therapy 195 27 33 228 0.88 0.86 0.87 + Radiotherapy 276 29 34 310 0.90 0.89 0.90 + Performance_Status 121 17 14 135 0.88 0.90 0.89 + Pathology_Test 888 296 162 1050 0.75 0.85 0.79 + Site_Other_Body_Part 909 275 592 1501 0.77 0.61 0.68 + Cancer_Surgery 693 119 126 819 0.85 0.85 0.85 + Line_Of_Therapy 101 11 5 106 0.90 0.95 0.93 + Pathology_Result 655 279 487 1142 0.70 0.57 0.63 + Hormonal_Therapy 169 4 16 185 0.98 0.91 0.94 + Site_Bone 264 81 49 313 0.77 0.84 0.80 + Biomarker 1259 238 256 1515 0.84 0.83 0.84 + Immunotherapy 103 47 25 128 0.69 0.80 0.74 + Cycle_Day 200 36 48 248 0.85 0.81 0.83 + Frequency 354 27 73 427 0.93 0.83 0.88 + Route 91 15 22 113 0.86 0.81 0.83 + Duration 625 161 136 761 0.80 0.82 0.81 + Death_Entity 34 2 4 38 0.94 0.89 0.92 + Metastasis 353 18 17 370 0.95 0.95 0.95 + Site_Liver 189 64 45 234 0.75 0.81 0.78 + Cancer_Dx 1301 103 93 1394 0.93 0.93 0.93 + Grade 190 27 46 236 0.88 0.81 0.84 + Date 807 21 24 831 0.97 0.97 0.97 + Site_Lung 469 110 90 559 0.81 0.84 0.82 + Site_Brain 221 64 58 279 0.78 0.79 0.78 + Relative_Date 1211 401 111 1322 0.75 0.92 0.83 + Race_Ethnicity 57 8 5 62 0.88 0.92 0.90 + Gender 1247 17 7 1254 0.99 0.99 0.99 + Oncogene 345 83 104 449 0.81 0.77 0.79 + Dosage 900 30 160 1060 0.97 0.85 0.90 + Radiation_Dose 108 5 18 126 0.96 0.86 0.90 + macro_avg 24653 3999 4406 29059 0.85 0.84 0.84 + micro_avg 24653 3999 4406 29059 0.86 0.85 0.85 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_posology_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_posology_en.md new file mode 100644 index 00000000000000..5cf4bb2eb160c1 --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_posology_en.md @@ -0,0 +1,161 @@ +--- +layout: model +title: Extract Cancer Therapies and Granular Posology Information +author: John Snow Labs +name: ner_oncology_posology +date: 2022-11-24 +tags: [licensed, clinical, en, oncology, ner, treatment, posology] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts cancer therapies (Cancer_Surgery, Radiotherapy and Cancer_Therapy) and posology information at a granular level. + +## Predicted Entities + +`Cycle_Number`, `Cycle_Count`, `Radiotherapy`, `Cancer_Surgery`, `Cycle_Day`, `Frequency`, `Route`, `Cancer_Therapy`, `Duration`, `Dosage`, `Radiation_Dose` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_posology_en_4.2.2_3.0_1669306988706.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_posology", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The patient underwent a regimen consisting of adriamycin (60 mg/m2) and cyclophosphamide (600 mg/m2) over six courses. She is currently receiving his second cycle of chemotherapy and is in good overall condition."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_posology", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The patient underwent a regimen consisting of adriamycin (60 mg/m2) and cyclophosphamide (600 mg/m2) over six courses. She is currently receiving his second cycle of chemotherapy and is in good overall condition.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-----------------|:---------------| +| adriamycin | Cancer_Therapy | +| 60 mg/m2 | Dosage | +| cyclophosphamide | Cancer_Therapy | +| 600 mg/m2 | Dosage | +| six courses | Cycle_Count | +| second cycle | Cycle_Number | +| chemotherapy | Cancer_Therapy | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_posology| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.3 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Cycle_Number 52 4 45 97 0.93 0.54 0.68 + Cycle_Count 200 63 30 230 0.76 0.87 0.81 + Radiotherapy 255 16 55 310 0.94 0.82 0.88 +Cancer_Surgery 592 66 227 819 0.90 0.72 0.80 + Cycle_Day 175 22 73 248 0.89 0.71 0.79 + Frequency 337 44 90 427 0.88 0.79 0.83 + Route 53 1 60 113 0.98 0.47 0.63 +Cancer_Therapy 1448 81 250 1698 0.95 0.85 0.90 + Duration 525 154 236 761 0.77 0.69 0.73 + Dosage 858 79 202 1060 0.92 0.81 0.86 +Radiation_Dose 86 4 40 126 0.96 0.68 0.80 + macro_avg 4581 534 1308 5889 0.90 0.72 0.79 + micro_avg 4581 534 1308 5889 0.90 0.78 0.83 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_response_to_treatment_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_response_to_treatment_en.md new file mode 100644 index 00000000000000..f119de14a2fd47 --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_response_to_treatment_en.md @@ -0,0 +1,147 @@ +--- +layout: model +title: Extract Mentions of Response to Cancer Treatment +author: John Snow Labs +name: ner_oncology_response_to_treatment +date: 2022-11-24 +tags: [licensed, clinical, en, oncology, ner, treatment] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts entities related to the patient”s response to the oncology treatment, including clinical response and changes in tumor size. + +## Predicted Entities + +`Response_To_Treatment`, `Size_Trend`, `Line_Of_Therapy` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_response_to_treatment_en_4.2.2_3.0_1669307329775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_response_to_treatment", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["She completed her first-line therapy, but some months later there was recurrence of the breast cancer. "]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_response_to_treatment", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("She completed her first-line therapy, but some months later there was recurrence of the breast cancer. ").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-----------|:----------------------| +| first-line | Line_Of_Therapy | +| recurrence | Response_To_Treatment | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_response_to_treatment| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.4 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 +Response_To_Treatment 326 101 157 483 0.76 0.67 0.72 + Size_Trend 43 28 70 113 0.61 0.38 0.47 + Line_Of_Therapy 99 11 7 106 0.90 0.93 0.92 + macro_avg 468 140 234 702 0.76 0.66 0.70 + micro_avg 468 140 234 702 0.76 0.67 0.71 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_test_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_test_en.md new file mode 100644 index 00000000000000..65fb87ee0d655c --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_test_en.md @@ -0,0 +1,152 @@ +--- +layout: model +title: Extract Oncology Tests +author: John Snow Labs +name: ner_oncology_test +date: 2022-11-24 +tags: [licensed, clinical, oncology, en, ner, test] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts mentions of tests from oncology texts, including pathology tests and imaging tests. + +## Predicted Entities + +`Imaging_Test`, `Biomarker_Result`, `Pathology_Test`, `Biomarker`, `Oncogene` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_test_en_4.2.2_3.0_1669307746859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_test", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["A biopsy was conducted using an ultrasound guided thick-needle. His chest computed tomography (CT) scan was negative."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_test", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("A biopsy was conducted using an ultrasound guided thick-needle. His chest computed tomography (CT) scan was negative.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-------------------------------|:---------------| +| biopsy | Pathology_Test | +| ultrasound guided thick-needle | Pathology_Test | +| chest computed tomography | Imaging_Test | +| CT | Imaging_Test | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_test| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.2 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Imaging_Test 2020 229 184 2204 0.90 0.92 0.91 +Biomarker_Result 1177 186 268 1445 0.86 0.81 0.84 + Pathology_Test 888 276 162 1050 0.76 0.85 0.80 + Biomarker 1287 254 228 1515 0.84 0.85 0.84 + Oncogene 365 89 84 449 0.80 0.81 0.81 + macro_avg 5737 1034 926 6663 0.83 0.85 0.84 + micro_avg 5737 1034 926 6663 0.85 0.86 0.85 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_therapy_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_therapy_en.md new file mode 100644 index 00000000000000..f60dc17183f344 --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_therapy_en.md @@ -0,0 +1,175 @@ +--- +layout: model +title: Detect Entities Related to Cancer Therapies +author: John Snow Labs +name: ner_oncology_therapy +date: 2022-11-24 +tags: [clinical, en, licensed, oncology, treatment, ner] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts entities related to oncology therapies using granular labels, including mentions of treatments, posology information and line of therapy. + +## Predicted Entities + +`Cycle_Number`, `Response_To_Treatment`, `Cycle_Count`, `Unspecific_Therapy`, `Chemotherapy`, `Targeted_Therapy`, `Radiotherapy`, `Cancer_Surgery`, `Line_Of_Therapy`, `Hormonal_Therapy`, `Immunotherapy`, `Cycle_Day`, `Frequency`, `Route`, `Duration`, `Dosage`, `Radiation_Dose` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_therapy_en_4.2.2_3.0_1669308088671.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_therapy", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The had previously undergone a left mastectomy and an axillary lymph node dissection for a left breast cancer twenty years ago. +The tumor was positive for ER and PR. Postoperatively, radiotherapy was administered to her breast. +The cancer recurred as a right lung metastasis 13 years later. The patient underwent a regimen consisting of adriamycin (60 mg/m2) and cyclophosphamide (600 mg/m2) over six courses, as first line therapy."]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_therapy", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The had previously undergone a left mastectomy and an axillary lymph node dissection for a left breast cancer twenty years ago. +The tumor was positive for ER and PR. Postoperatively, radiotherapy was administered to her breast. +The cancer recurred as a right lung metastasis 13 years later. The patient underwent a regimen consisting of adriamycin (60 mg/m2) and cyclophosphamide (600 mg/m2) over six courses, as first line therapy.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-------------------------------|:----------------------| +| mastectomy | Cancer_Surgery | +| axillary lymph node dissection | Cancer_Surgery | +| radiotherapy | Radiotherapy | +| recurred | Response_To_Treatment | +| adriamycin | Chemotherapy | +| 60 mg/m2 | Dosage | +| cyclophosphamide | Chemotherapy | +| 600 mg/m2 | Dosage | +| six courses | Cycle_Count | +| first line | Line_Of_Therapy | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_therapy| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.4 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Cycle_Number 78 41 19 97 0.66 0.80 0.72 +Response_To_Treatment 451 205 145 596 0.69 0.76 0.72 + Cycle_Count 210 75 20 230 0.74 0.91 0.82 + Unspecific_Therapy 189 76 89 278 0.71 0.68 0.70 + Chemotherapy 831 87 48 879 0.91 0.95 0.92 + Targeted_Therapy 194 28 34 228 0.87 0.85 0.86 + Radiotherapy 279 35 31 310 0.89 0.90 0.89 + Cancer_Surgery 720 192 99 819 0.79 0.88 0.83 + Line_Of_Therapy 95 6 11 106 0.94 0.90 0.92 + Hormonal_Therapy 170 6 15 185 0.97 0.92 0.94 + Immunotherapy 96 17 32 128 0.85 0.75 0.80 + Cycle_Day 205 38 43 248 0.84 0.83 0.84 + Frequency 363 33 64 427 0.92 0.85 0.88 + Route 93 6 20 113 0.94 0.82 0.88 + Duration 527 102 234 761 0.84 0.69 0.76 + Dosage 959 63 101 1060 0.94 0.90 0.92 + Radiation_Dose 106 12 20 126 0.90 0.84 0.87 + macro_avg 5566 1022 1025 6591 0.85 0.84 0.84 + micro_avg 5566 1022 1025 6591 0.85 0.84 0.84 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_tnm_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_tnm_en.md new file mode 100644 index 00000000000000..3c68a8869d1751 --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_tnm_en.md @@ -0,0 +1,156 @@ +--- +layout: model +title: Extract Entities Related to TNM Staging +author: John Snow Labs +name: ner_oncology_tnm +date: 2022-11-24 +tags: [licensed, en, clinical, oncology, ner] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts staging information and mentions related to tumors, lymph nodes and metastases. + +## Predicted Entities + +`Lymph_Node`, `Staging`, `Lymph_Node_Modifier`, `Tumor_Description`, `Tumor`, `Metastasis`, `Cancer_Dx` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_tnm_en_4.2.2_3.0_1669308699155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_tnm", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The final diagnosis was metastatic breast carcinoma, and it was classified as T2N1M1 stage IV. The histological grade of this 4 cm tumor was grade 2."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_tnm", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The final diagnosis was metastatic breast carcinoma, and it was classified as T2N1M1 stage IV. The histological grade of this 4 cm tumor was grade 2.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-----------------|:------------------| +| metastatic | Metastasis | +| breast carcinoma | Cancer_Dx | +| T2N1M1 stage IV | Staging | +| 4 cm | Tumor_Description | +| tumor | Tumor | +| grade 2 | Tumor_Description | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_tnm| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.2 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Lymph_Node 570 77 77 647 0.88 0.88 0.88 + Staging 232 22 26 258 0.91 0.90 0.91 +Lymph_Node_Modifier 30 5 5 35 0.86 0.86 0.86 + Tumor_Description 2651 581 490 3141 0.82 0.84 0.83 + Tumor 1116 72 141 1257 0.94 0.89 0.91 + Metastasis 358 15 12 370 0.96 0.97 0.96 + Cancer_Dx 1302 87 92 1394 0.94 0.93 0.94 + macro_avg 6259 859 843 7102 0.90 0.90 0.90 + micro_avg 6259 859 843 7102 0.88 0.88 0.88 +``` \ No newline at end of file diff --git a/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_unspecific_posology_en.md b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_unspecific_posology_en.md new file mode 100644 index 00000000000000..70b089ad9fc003 --- /dev/null +++ b/docs/_posts/Ahmetemintek/2022-11-24-ner_oncology_unspecific_posology_en.md @@ -0,0 +1,152 @@ +--- +layout: model +title: Extract Cancer Therapies and Posology Information +author: John Snow Labs +name: ner_oncology_unspecific_posology +date: 2022-11-24 +tags: [licensed, clinical, oncology, en, ner, treatment, posology] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.2 +spark_version: 3.0 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts mentions of treatments and posology information using unspecific labels (low granularity). + +## Predicted Entities + +`Posology_Information`, `Cancer_Therapy` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/27.Oncology_Model.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_oncology_unspecific_posology_en_4.2.2_3.0_1669309081671.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_oncology_unspecific_posology", "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter() \ + .setInputCols(["sentence", "token", "ner"]) \ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["The patient underwent a regimen consisting of adriamycin (60 mg/m2) and cyclophosphamide (600 mg/m2) over six courses. She is currently receiving his second cycle of chemotherapy and is in good overall condition."]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel().pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_oncology_unspecific_posology", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + + +val pipeline = new Pipeline().setStages(Array(document_assembler, + sentence_detector, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("The patient underwent a regimen consisting of adriamycin (60 mg/m2) and cyclophosphamide (600 mg/m2) over six courses. She is currently receiving his second cycle of chemotherapy and is in good overall condition.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash +| chunk | ner_label | +|:-----------------|:---------------------| +| adriamycin | Cancer_Therapy | +| 60 mg/m2 | Posology_Information | +| cyclophosphamide | Cancer_Therapy | +| 600 mg/m2 | Posology_Information | +| over six courses | Posology_Information | +| second cycle | Posology_Information | +| chemotherapy | Cancer_Therapy | +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oncology_unspecific_posology| +|Compatibility:|Healthcare NLP 4.2.2+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|34.3 MB| + +## References + +In-house annotated oncology case reports. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 +Posology_Information 2663 244 399 3062 0.92 0.87 0.89 + Cancer_Therapy 2580 317 247 2827 0.89 0.91 0.90 + macro_avg 5243 561 646 5889 0.90 0.89 0.90 + micro_avg 5243 561 646 5889 0.90 0.89 0.90 +``` \ No newline at end of file