From 0e44ef8b982c3259f1fcf060eb315b4fd57e1efa Mon Sep 17 00:00:00 2001 From: Cabir C <64752006+Cabir40@users.noreply.github.com> Date: Fri, 10 Feb 2023 19:34:04 +0300 Subject: [PATCH] Models hub internal (#13502) --- ...3-02-09-rxnorm_drug_brandname_mapper_en.md | 148 +++++++++++++ .../2023-01-06-redl_clinical_biobert_en.md | 2 +- ...2023-02-10-ner_sdoh_demographics_wip_en.md | 166 +++++++++++++++ ...10-ner_sdoh_income_social_status_wip_en.md | 157 ++++++++++++++ ...2-10-ner_sdoh_social_environment_wip_en.md | 161 +++++++++++++++ .../2023-01-25-ner_eu_clinical_case_en.md | 180 ++++++++++++++++ .../2023-02-01-ner_eu_clinical_case_es.md | 189 +++++++++++++++++ .../2023-02-01-ner_eu_clinical_case_fr.md | 182 ++++++++++++++++ .../2023-02-02-ner_eu_clinical_case_eu.md | 194 ++++++++++++++++++ ...2023-02-06-ner_eu_clinical_condition_en.md | 153 ++++++++++++++ ...2023-02-06-ner_eu_clinical_condition_es.md | 149 ++++++++++++++ ...2023-02-06-ner_eu_clinical_condition_eu.md | 153 ++++++++++++++ ...2023-02-06-ner_eu_clinical_condition_fr.md | 152 ++++++++++++++ ...2023-02-06-ner_eu_clinical_condition_it.md | 155 ++++++++++++++ 14 files changed, 2140 insertions(+), 1 deletion(-) create mode 100644 docs/_posts/Ahmetemintek/2023-02-09-rxnorm_drug_brandname_mapper_en.md create mode 100644 docs/_posts/Meryem1425/2023-02-10-ner_sdoh_demographics_wip_en.md create mode 100644 docs/_posts/Meryem1425/2023-02-10-ner_sdoh_income_social_status_wip_en.md create mode 100644 docs/_posts/Meryem1425/2023-02-10-ner_sdoh_social_environment_wip_en.md create mode 100644 docs/_posts/gpirge/2023-01-25-ner_eu_clinical_case_en.md create mode 100644 docs/_posts/gpirge/2023-02-01-ner_eu_clinical_case_es.md create mode 100644 docs/_posts/gpirge/2023-02-01-ner_eu_clinical_case_fr.md create mode 100644 docs/_posts/gpirge/2023-02-02-ner_eu_clinical_case_eu.md create mode 100644 docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_en.md create mode 100644 docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_es.md create mode 100644 docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_eu.md create mode 100644 docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_fr.md create mode 100644 docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_it.md diff --git a/docs/_posts/Ahmetemintek/2023-02-09-rxnorm_drug_brandname_mapper_en.md b/docs/_posts/Ahmetemintek/2023-02-09-rxnorm_drug_brandname_mapper_en.md new file mode 100644 index 00000000000000..d12d1677eb0116 --- /dev/null +++ b/docs/_posts/Ahmetemintek/2023-02-09-rxnorm_drug_brandname_mapper_en.md @@ -0,0 +1,148 @@ +--- +layout: model +title: Mapping RxNorm and RxNorm Extension Codes with Corresponding Drug Brand Names +author: John Snow Labs +name: rxnorm_drug_brandname_mapper +date: 2023-02-09 +tags: [chunk_mappig, rxnorm, drug_brand_name, rxnorm_extension, en, clinical, licensed] +task: Chunk Mapping +language: en +edition: Healthcare NLP 4.3.0 +spark_version: 3.0 +supported: true +annotator: ChunkMapperModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This pretrained model maps RxNorm and RxNorm Extension codes with their corresponding drug brand names. It returns 2 types of brand names for the corresponding RxNorm or RxNorm Extension code. + +## Predicted Entities + +`rxnorm_brandname`, `rxnorm_extension_brandname` + +{:.btn-box} + +[Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/26.Chunk_Mapping.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/rxnorm_drug_brandname_mapper_en_4.3.0_3.0_1675966478332.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/rxnorm_drug_brandname_mapper_en_4.3.0_3.0_1675966478332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("chunk") + +sbert_embedder = BertSentenceEmbeddings\ + .pretrained("sbiobert_base_cased_mli", "en","clinical/models")\ + .setInputCols(["chunk"])\ + .setOutputCol("sbert_embeddings") + +rxnorm_resolver = SentenceEntityResolverModel\ + .pretrained("sbiobertresolve_rxnorm_augmented", "en", "clinical/models")\ + .setInputCols(["chunk", "sbert_embeddings"])\ + .setOutputCol("rxnorm_code")\ + .setDistanceFunction("EUCLIDEAN") + +resolver2chunk = Resolution2Chunk()\ + .setInputCols(["rxnorm_code"]) \ + .setOutputCol("rxnorm_chunk")\ + +chunkerMapper = ChunkMapperModel.pretrained("rxnorm_drug_brandname_mapper", "en", "clinical/models")\ + .setInputCols(["rxnorm_chunk"])\ + .setOutputCol("mappings")\ + .setRels(["rxnorm_brandname", "rxnorm_extension_brandname"]) + + +pipeline = Pipeline( + stages = [ + documentAssembler, + sbert_embedder, + rxnorm_resolver, + resolver2chunk, + chunkerMapper + ]) + +model = pipeline.fit(spark.createDataFrame([['']]).toDF('text')) + +pipeline = LightPipeline(model) + +result = pipeline.fullAnnotate(['metformin', 'advil']) + +``` +```scala +val documentAssembler = new DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("chunk") + +val sbert_embedder = BertSentenceEmbeddings\ + .pretrained("sbiobert_base_cased_mli", "en","clinical/models")\ + .setInputCols(["chunk"])\ + .setOutputCol("sbert_embeddings") + +val rxnorm_resolver = SentenceEntityResolverModel\ + .pretrained("sbiobertresolve_rxnorm_augmented", "en", "clinical/models")\ + .setInputCols(["chunk", "sbert_embeddings"])\ + .setOutputCol("rxnorm_code")\ + .setDistanceFunction("EUCLIDEAN") + +val resolver2chunk = new Resolution2Chunk()\ + .setInputCols(["rxnorm_code"]) \ + .setOutputCol("rxnorm_chunk")\ + +val chunkerMapper = ChunkMapperModel.pretrained("rxnorm_drug_brandname_mapper", "en", "clinical/models")\ + .setInputCols(["rxnorm_chunk"])\ + .setOutputCol("mappings")\ + .setRels(["rxnorm_brandname", "rxnorm_extension_brandname"]) + + + +val pipeline = new Pipeline(stages = Array( +documentAssembler, +sbert_embedder, +rxnorm_resolver, +resolver2chunk +chunkerMapper +)) + +val data = Seq(Array("metformin", "advil")).toDS.toDF("text") + +val result= pipeline.fit(data).transform(data) + +``` +
+ +## Results + +```bash ++--------------+-------------+--------------------------------------------------+--------------------------+ +| drug_name|rxnorm_result| mapping_result| relation | ++--------------+-------------+--------------------------------------------------+--------------------------+ +| metformin| 6809|Actoplus Met (metformin):::Avandamet (metformin...| rxnorm_brandname| +| metformin| 6809|A FORMIN (metformin):::ABERIN MAX (metformin)::...|rxnorm_extension_brandname| +| advil| 153010| Advil (Advil)| rxnorm_brandname| +| advil| 153010| NONE|rxnorm_extension_brandname| ++--------------+-------------+--------------------------------------------------+--------------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rxnorm_drug_brandname_mapper| +|Compatibility:|Healthcare NLP 4.3.0+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[rxnorm_chunk]| +|Output Labels:|[mappings]| +|Language:|en| +|Size:|4.0 MB| \ No newline at end of file diff --git a/docs/_posts/Cabir40/2023-01-06-redl_clinical_biobert_en.md b/docs/_posts/Cabir40/2023-01-06-redl_clinical_biobert_en.md index 2902ec2bac5700..e60a538ef591b5 100644 --- a/docs/_posts/Cabir40/2023-01-06-redl_clinical_biobert_en.md +++ b/docs/_posts/Cabir40/2023-01-06-redl_clinical_biobert_en.md @@ -221,4 +221,4 @@ TrIP 0.517 0.796 0.627 151 TrNAP 0.402 0.672 0.503 112 TrWP 0.257 0.824 0.392 109 Avg. 0.635 0.803 0.691 - -``` \ No newline at end of file +``` diff --git a/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_demographics_wip_en.md b/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_demographics_wip_en.md new file mode 100644 index 00000000000000..0400a7a9b1d975 --- /dev/null +++ b/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_demographics_wip_en.md @@ -0,0 +1,166 @@ +--- +layout: model +title: Extract Demographic Entities from Social Determinants of Health Texts +author: John Snow Labs +name: ner_sdoh_demographics_wip +date: 2023-02-10 +tags: [licensed, clinical, social_determinants, en, ner, demographics, sdoh, public_health] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts demographic information related to Social Determinants of Health from various kinds of biomedical documents. + +## Predicted Entities + +`Family_Member`, `Age`, `Gender`, `Geographic_Entity`, `Race_Ethnicity`, `Language`, `Spiritual_Beliefs` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_sdoh_demographics_wip_en_4.2.8_3.0_1675998706136.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_sdoh_demographics_wip_en_4.2.8_3.0_1675998706136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} + +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"])\ + .setOutputCol("embeddings") + +ner_model = MedicalNerModel.pretrained("ner_sdoh_demographics_wip", "en", "clinical/models")\ + .setInputCols(["sentence", "token", "embeddings"])\ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentence_detector, + tokenizer, + clinical_embeddings, + ner_model, + ner_converter + ]) + +sample_texts = ["SOCIAL HISTORY: He is a former tailor from Korea.", + "He lives alone,single and no children.", + "Pt is a 61 years old married, Caucasian, Catholic woman. Pt speaks English reasonably well."] + + +data = spark.createDataFrame(sample_texts, StringType()).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_sdoh_demographics_wip", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array( + document_assembler, + sentence_detector, + tokenizer, + clinical_embeddings, + ner_model, + ner_converter +)) + +val data = Seq("Pt is a 61 years old married, Caucasian, Catholic woman. Pt speaks English reasonably well.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++-----------------+-----+---+------------+ +|ner_label |begin|end|chunk | ++-----------------+-----+---+------------+ +|Gender |16 |17 |He | +|Geographic_Entity|43 |47 |Korea | +|Gender |0 |1 |He | +|Family_Member |29 |36 |children | +|Age |8 |19 |61 years old| +|Race_Ethnicity |30 |38 |Caucasian | +|Spiritual_Beliefs|41 |48 |Catholic | +|Gender |50 |54 |woman | +|Language |67 |73 |English | ++-----------------+-----+---+------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_sdoh_demographics_wip| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.4 KB| + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Age 1346.0 73.0 74.0 1420.0 0.948555 0.947887 0.948221 +Spiritual_Beliefs 100.0 13.0 16.0 116.0 0.884956 0.862069 0.873362 + Family_Member 4468.0 134.0 43.0 4511.0 0.970882 0.990468 0.980577 + Race_Ethnicity 56.0 0.0 13.0 69.0 1.000000 0.811594 0.896000 + Gender 9825.0 67.0 247.0 10072.0 0.993227 0.975477 0.984272 +Geographic_Entity 225.0 9.0 29.0 254.0 0.961538 0.885827 0.922131 + Language 51.0 9.0 5.0 56.0 0.850000 0.910714 0.879310 +``` diff --git a/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_income_social_status_wip_en.md b/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_income_social_status_wip_en.md new file mode 100644 index 00000000000000..86f5568e5f4ee6 --- /dev/null +++ b/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_income_social_status_wip_en.md @@ -0,0 +1,157 @@ +--- +layout: model +title: Extract Income and Social Status Entities from Social Determinants of Health Texts +author: John Snow Labs +name: ner_sdoh_income_social_status_wip +date: 2023-02-10 +tags: [licensed, clinical, social_determinants, en, ner, income, social_status, sdoh, public_health] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts income and social status information related to Social Determinants of Health from various kinds of biomedical documents. + +## Predicted Entities + +`Education`, `Marital_Status`, `Financial_Status`, `Population_Group`, `Employment` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_sdoh_income_social_status_wip_en_4.2.8_3.0_1675999206708.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_sdoh_income_social_status_wip_en_4.2.8_3.0_1675999206708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"])\ + .setOutputCol("embeddings") + +ner_model = MedicalNerModel.pretrained("ner_sdoh_income_social_status_wip", "en", "clinical/models")\ + .setInputCols(["sentence", "token", "embeddings"])\ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentence_detector, + tokenizer, + clinical_embeddings, + ner_model, + ner_converter + ]) + +sample_texts = ["Pt is described as divorced and pleasant when approached but keeps to himself. Pt is working as a plumber, but he gets financial diffuculties. He has a son student at college. His family is imigrant for 2 years."] + +data = spark.createDataFrame(sample_texts, StringType()).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_sdoh_income_social_status_wip", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array( + document_assembler, + sentence_detector, + tokenizer, + clinical_embeddings, + ner_model, + ner_converter +)) + +val data = Seq("Pt is described as divorced and pleasant when approached but keeps to himself. Pt is working as a plumber, but he gets financial diffuculties. He has a son student at college. His family is imigrant for 2 years.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++-----------+----------------+-----+---+----------------------+ +|sentence_id|ner_label |begin|end|chunk | ++-----------+----------------+-----+---+----------------------+ +|0 |Marital_Status |19 |26 |divorced | +|1 |Employment |98 |104|plumber | +|1 |Financial_Status|119 |140|financial diffuculties| +|2 |Education |156 |162|student | +|2 |Education |167 |173|college | +|3 |Population_Group|190 |197|imigrant | ++-----------+----------------+-----+---+----------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_sdoh_income_social_status_wip| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|856.8 KB| + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Education 95.0 20.0 18.0 113.0 0.826087 0.840708 0.833333 +Population_Group 41.0 0.0 5.0 46.0 1.000000 0.891304 0.942529 +Financial_Status 286.0 52.0 82.0 368.0 0.846154 0.777174 0.810198 + Employment 3968.0 142.0 215.0 4183.0 0.965450 0.948601 0.956952 + Marital_Status 167.0 1.0 7.0 174.0 0.994048 0.959770 0.976608 +``` \ No newline at end of file diff --git a/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_social_environment_wip_en.md b/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_social_environment_wip_en.md new file mode 100644 index 00000000000000..046ce924a564b0 --- /dev/null +++ b/docs/_posts/Meryem1425/2023-02-10-ner_sdoh_social_environment_wip_en.md @@ -0,0 +1,161 @@ +--- +layout: model +title: Detect SDOH of Social Environment +author: John Snow Labs +name: ner_sdoh_social_environment_wip +date: 2023-02-10 +tags: [licensed, clinical, social_determinants, en, ner, social, environment, sdoh, public_health] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model extracts social environment terminologies related to Social Determinants of Health from various kinds of documents. + +## Predicted Entities + +`Social_Support`, `Chidhood_Event`, `Social_Exclusion`, `Violence_Abuse_Legal` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_sdoh_social_environment_wip_en_4.2.8_3.0_1675998295035.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_sdoh_social_environment_wip_en_4.2.8_3.0_1675998295035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} + +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\ + .setInputCols(["sentence", "token"])\ + .setOutputCol("embeddings") + +ner_model = MedicalNerModel.pretrained("ner_sdoh_social_environment_wip", "en", "clinical/models")\ + .setInputCols(["sentence", "token", "embeddings"])\ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentence_detector, + tokenizer, + clinical_embeddings, + ner_model, + ner_converter + ]) + +sample_texts = ["He is the primary caregiver.", + "There is some evidence of abuse.", + "She stated that she was in a safe environment in prison, but that her siblings lived in an unsafe neighborhood, she was very afraid for them and witnessed their ostracism by other people.", + "Medical history: Jane was born in a low - income household and experienced significant trauma during her childhood, including physical and emotional abuse."] + +data = spark.createDataFrame(sample_texts, StringType()).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_sdoh_social_environment_wip", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array( + document_assembler, + sentence_detector, + tokenizer, + clinical_embeddings, + ner_model, + ner_converter +)) + +val data = Seq("Medical history: Jane was born in a low - income household and experienced significant trauma during her childhood, including physical and emotional abuse.").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + +``` +
+ +## Results + +```bash ++--------------------+-----+---+---------------------------+ +|ner_label |begin|end|chunk | ++--------------------+-----+---+---------------------------+ +|Social_Support |10 |26 |primary caregiver | +|Violence_Abuse_Legal|26 |30 |abuse | +|Violence_Abuse_Legal|49 |54 |prison | +|Social_Exclusion |161 |169|ostracism | +|Chidhood_Event |87 |113|trauma during her childhood| +|Violence_Abuse_Legal|139 |153|emotional abuse | ++--------------------+-----+---+---------------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_sdoh_social_environment_wip| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.7 KB| + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + Chidhood_Event 34.0 6.0 5.0 39.0 0.850000 0.871795 0.860759 + Social_Exclusion 45.0 6.0 12.0 57.0 0.882353 0.789474 0.833333 + Social_Support 1139.0 57.0 103.0 1242.0 0.952341 0.917069 0.934372 +Violence_Abuse_Legal 235.0 38.0 44.0 279.0 0.860806 0.842294 0.851449 +``` diff --git a/docs/_posts/gpirge/2023-01-25-ner_eu_clinical_case_en.md b/docs/_posts/gpirge/2023-01-25-ner_eu_clinical_case_en.md new file mode 100644 index 00000000000000..0adc7f6307b107 --- /dev/null +++ b/docs/_posts/gpirge/2023-01-25-ner_eu_clinical_case_en.md @@ -0,0 +1,180 @@ +--- +layout: model +title: Detect Clinical Entities (ner_eu_clinical_case) +author: John Snow Labs +name: ner_eu_clinical_case +date: 2023-01-25 +tags: [clinical, licensed, ner, en] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.7 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for clinical entities. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nicols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Predicted Entities + +`clinical_event`, `bodypart`, `clinical_condition`, `units_measurements`, `patient`, `date_time` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_case_en_4.2.7_3.2_1674657662344.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_case_en_4.2.7_3.2_1674657662344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained('ner_eu_clinical_case', "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""A 3-year-old boy with autistic disorder on hospital of pediatric ward A at university hospital. He has no family history of illness or autistic spectrum disorder. The child was diagnosed with a severe communication disorder, with social interaction difficulties and sensory processing delay. Blood work was normal (thyroid-stimulating hormone (TSH), hemoglobin, mean corpuscular volume (MCV), and ferritin). Upper endoscopy also showed a submucosal tumor causing subtotal obstruction of the gastric outlet. Because a gastrointestinal stromal tumor was suspected, distal gastrectomy was performed. Histopathological examination revealed spindle cell proliferation in the submucosal layer."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documenter = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained() + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_eu_clinical_case", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documenter, sentenceDetector, tokenizer, word_embeddings, ner_model, ner_converter)) + +val data = Seq(Array("""A 3-year-old boy with autistic disorder on hospital of pediatric ward A at university hospital. He has no family history of illness or autistic spectrum disorder. The child was diagnosed with a severe communication disorder, with social interaction difficulties and sensory processing delay. Blood work was normal (thyroid-stimulating hormone (TSH), hemoglobin, mean corpuscular volume (MCV), and ferritin). Upper endoscopy also showed a submucosal tumor causing subtotal obstruction of the gastric outlet. Because a gastrointestinal stromal tumor was suspected, distal gastrectomy was performed. Histopathological examination revealed spindle cell proliferation in the submucosal layer.""")).toDS().toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++------------------------------+------------------+ +|chunk |ner_label | ++------------------------------+------------------+ +|A 3-year-old boy |patient | +|autistic disorder |clinical_condition| +|He |patient | +|illness |clinical_event | +|autistic spectrum disorder |clinical_condition| +|The child |patient | +|diagnosed |clinical_event | +|disorder |clinical_event | +|difficulties |clinical_event | +|Blood |bodypart | +|work |clinical_event | +|normal |units_measurements| +|hormone |clinical_event | +|hemoglobin |clinical_event | +|volume |clinical_event | +|endoscopy |clinical_event | +|showed |clinical_event | +|tumor |clinical_condition| +|causing |clinical_event | +|obstruction |clinical_event | +|the gastric outlet |bodypart | +|gastrointestinal stromal tumor|clinical_condition| +|suspected |clinical_event | +|gastrectomy |clinical_event | +|examination |clinical_event | +|revealed |clinical_event | +|spindle cell proliferation |clinical_condition| +|the submucosal layer |bodypart | ++------------------------------+------------------+ + + +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_case| +|Compatibility:|Healthcare NLP 4.2.7+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|849.0 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + date_time 54.0 7.0 15.0 69.0 0.8852 0.7826 0.8308 +units_measurements 111.0 48.0 12.0 123.0 0.6981 0.9024 0.7872 +clinical_condition 93.0 47.0 81.0 174.0 0.6643 0.5345 0.5924 + patient 119.0 16.0 5.0 124.0 0.8815 0.9597 0.9189 + clinical_event 331.0 126.0 89.0 420.0 0.7243 0.7881 0.7548 + bodypart 171.0 58.0 84.0 255.0 0.7467 0.6706 0.7066 + macro - - - - - - 0.7651 + micro - - - - - - 0.7454 +``` diff --git a/docs/_posts/gpirge/2023-02-01-ner_eu_clinical_case_es.md b/docs/_posts/gpirge/2023-02-01-ner_eu_clinical_case_es.md new file mode 100644 index 00000000000000..04cc1cbc2853e5 --- /dev/null +++ b/docs/_posts/gpirge/2023-02-01-ner_eu_clinical_case_es.md @@ -0,0 +1,189 @@ +--- +layout: model +title: Detect Clinical Entities (ner_eu_clinical_case - es) +author: John Snow Labs +name: ner_eu_clinical_case +date: 2023-02-01 +tags: [es, clinical, licensed, ner] +task: Named Entity Recognition +language: es +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for extracting clinical entities from Spanish texts. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nichols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Predicted Entities + +`clinical_event`, `bodypart`, `clinical_condition`, `units_measurements`, `patient`, `date_time` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_case_es_4.2.8_3.0_1675285093855.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_case_es_4.2.8_3.0_1675285093855.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} + +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","es")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained("ner_eu_clinical_case", "es", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""Un niño de 3 años con trastorno autista en el hospital de la sala pediátrica A del hospital universitario. No tiene antecedentes familiares de enfermedad o trastorno del espectro autista. El niño fue diagnosticado con un trastorno de comunicación severo, con dificultades de interacción social y retraso en el procesamiento sensorial. Los análisis de sangre fueron normales (hormona estimulante de la tiroides (TSH), hemoglobina, volumen corpuscular medio (MCV) y ferritina). La endoscopia alta también mostró un tumor submucoso que causaba una obstrucción subtotal de la salida gástrica. Ante la sospecha de tumor del estroma gastrointestinal, se realizó gastrectomía distal. El examen histopatológico reveló proliferación de células fusiformes en la capa submucosa."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","es") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner = MedicalNerModel.pretrained("ner_eu_clinical_case", "es", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array( + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter)) + +val data = Seq("""Un niño de 3 años con trastorno autista en el hospital de la sala pediátrica A del hospital universitario. No tiene antecedentes familiares de enfermedad o trastorno del espectro autista. El niño fue diagnosticado con un trastorno de comunicación severo, con dificultades de interacción social y retraso en el procesamiento sensorial. Los análisis de sangre fueron normales (hormona estimulante de la tiroides (TSH), hemoglobina, volumen corpuscular medio (MCV) y ferritina). La endoscopia alta también mostró un tumor submucoso que causaba una obstrucción subtotal de la salida gástrica. Ante la sospecha de tumor del estroma gastrointestinal, se realizó gastrectomía distal. El examen histopatológico reveló proliferación de células fusiformes en la capa submucosa.""").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++--------------------------------+------------------+ +|chunk |ner_label | ++--------------------------------+------------------+ +|Un niño de 3 años |patient | +|trastorno autista |clinical_event | +|antecedentes |clinical_event | +|enfermedad |clinical_event | +|trastorno del espectro autista |clinical_event | +|El niño |patient | +|diagnosticado |clinical_event | +|trastorno de comunicación severo|clinical_event | +|dificultades |clinical_event | +|retraso |clinical_event | +|análisis |clinical_event | +|sangre |bodypart | +|normales |units_measurements| +|hormona |clinical_event | +|la tiroides |bodypart | +|TSH |clinical_event | +|hemoglobina |clinical_event | +|volumen |clinical_event | +|MCV |clinical_event | +|ferritina |clinical_event | +|endoscopia |clinical_event | +|mostró |clinical_event | +|tumor submucoso |clinical_event | +|obstrucción |clinical_event | +|tumor |clinical_event | +|del estroma gastrointestinal |bodypart | +|gastrectomía |clinical_event | +|examen |clinical_event | +|reveló |clinical_event | +|proliferación |clinical_event | +|células fusiformes |bodypart | +|la capa submucosa |bodypart | ++--------------------------------+------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_case| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|895.1 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + date_time 87.0 10.0 17.0 104.0 0.8969 0.8365 0.8657 +units_measurements 37.0 5.0 11.0 48.0 0.8810 0.7708 0.8222 +clinical_condition 50.0 34.0 70.0 120.0 0.5952 0.4167 0.4902 + patient 76.0 8.0 11.0 87.0 0.9048 0.8736 0.8889 + clinical_event 399.0 44.0 79.0 478.0 0.9007 0.8347 0.8664 + bodypart 153.0 56.0 13.0 166.0 0.7321 0.9217 0.8160 + macro - - - - - - 0.7916 + micro - - - - - - 0.8128 +``` diff --git a/docs/_posts/gpirge/2023-02-01-ner_eu_clinical_case_fr.md b/docs/_posts/gpirge/2023-02-01-ner_eu_clinical_case_fr.md new file mode 100644 index 00000000000000..7e21761fdf5af8 --- /dev/null +++ b/docs/_posts/gpirge/2023-02-01-ner_eu_clinical_case_fr.md @@ -0,0 +1,182 @@ +--- +layout: model +title: Detect Clinical Entities (ner_eu_clinical_case - fr) +author: John Snow Labs +name: ner_eu_clinical_case +date: 2023-02-01 +tags: [fr, clinical, licensed, ner] +task: Named Entity Recognition +language: fr +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for extracting clinical entities from French texts. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nichols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Predicted Entities + +`clinical_event`, `bodypart`, `clinical_condition`, `units_measurements`, `patient`, `date_time` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_case_fr_4.2.8_3.0_1675293960896.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_case_fr_4.2.8_3.0_1675293960896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","fr")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained('ner_eu_clinical_case', "fr", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""Un garçon de 3 ans atteint d'un trouble autistique à l'hôpital du service pédiatrique A de l'hôpital universitaire. Il n'a pas d'antécédents familiaux de troubles ou de maladies du spectre autistique. Le garçon a été diagnostiqué avec un trouble de communication sévère, avec des difficultés d'interaction sociale et un traitement sensoriel retardé. Les tests sanguins étaient normaux (thyréostimuline (TSH), hémoglobine, volume globulaire moyen (MCV) et ferritine). L'endoscopie haute a également montré une tumeur sous-muqueuse provoquant une obstruction subtotale de la sortie gastrique. Devant la suspicion d'une tumeur stromale gastro-intestinale, une gastrectomie distale a été réalisée. L'examen histopathologique a révélé une prolifération de cellules fusiformes dans la couche sous-muqueuse."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documenter = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","fr") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_eu_clinical_case", "fr", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documenter, sentenceDetector, tokenizer, word_embeddings, ner_model, ner_converter)) + +val data = Seq(Array("""Un garçon de 3 ans atteint d'un trouble autistique à l'hôpital du service pédiatrique A de l'hôpital universitaire. Il n'a pas d'antécédents familiaux de troubles ou de maladies du spectre autistique. Le garçon a été diagnostiqué avec un trouble de communication sévère, avec des difficultés d'interaction sociale et un traitement sensoriel retardé. Les tests sanguins étaient normaux (thyréostimuline (TSH), hémoglobine, volume globulaire moyen (MCV) et ferritine). L'endoscopie haute a également montré une tumeur sous-muqueuse provoquant une obstruction subtotale de la sortie gastrique. Devant la suspicion d'une tumeur stromale gastro-intestinale, une gastrectomie distale a été réalisée. L'examen histopathologique a révélé une prolifération de cellules fusiformes dans la couche sous-muqueuse.""")).toDS().toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++-----------------------------------------------------+------------------+ +|chunk |ner_label | ++-----------------------------------------------------+------------------+ +|Un garçon de 3 ans |patient | +|trouble autistique à l'hôpital du service pédiatrique|clinical_condition| +|l'hôpital |clinical_event | +|Il n'a |patient | +|d'antécédents |clinical_event | +|troubles |clinical_condition| +|maladies |clinical_condition| +|du spectre autistique |bodypart | +|Le garçon |patient | +|diagnostiqué |clinical_event | +|trouble |clinical_condition| +|difficultés |clinical_event | +|traitement |clinical_event | +|tests |clinical_event | +|normaux |units_measurements| +|thyréostimuline |clinical_event | +|TSH |clinical_event | +|ferritine |clinical_event | +|L'endoscopie |clinical_event | +|montré |clinical_event | +|tumeur sous-muqueuse |clinical_condition| +|provoquant |clinical_event | +|obstruction |clinical_condition| +|la sortie gastrique |bodypart | +|suspicion |clinical_event | +|tumeur stromale gastro-intestinale |clinical_condition| +|gastrectomie |clinical_event | +|L'examen |clinical_event | +|révélé |clinical_event | +|prolifération |clinical_event | +|cellules fusiformes |bodypart | +|la couche sous-muqueuse |bodypart | ++-----------------------------------------------------+------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_case| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|fr| +|Size:|895.0 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + date_time 49.0 14.0 70.0 104.0 0.7778 0.7000 0.7368 +units_measurements 92.0 19.0 6.0 48.0 0.8288 0.9388 0.8804 +clinical_condition 178.0 74.0 73.0 120.0 0.7063 0.7092 0.7078 + patient 114.0 6.0 15.0 87.0 0.9500 0.8837 0.9157 + clinical_event 265.0 81.0 71.0 478.0 0.7659 0.7887 0.7771 + bodypart 243.0 34.0 64.0 166.0 0.8773 0.7915 0.8322 + macro - - - - - - 0.8083 + micro - - - - - - 0.7978 +``` diff --git a/docs/_posts/gpirge/2023-02-02-ner_eu_clinical_case_eu.md b/docs/_posts/gpirge/2023-02-02-ner_eu_clinical_case_eu.md new file mode 100644 index 00000000000000..4c98890d5cecf6 --- /dev/null +++ b/docs/_posts/gpirge/2023-02-02-ner_eu_clinical_case_eu.md @@ -0,0 +1,194 @@ +--- +layout: model +title: Detect Clinical Entities (ner_eu_clinical_case - eu) +author: John Snow Labs +name: ner_eu_clinical_case +date: 2023-02-02 +tags: [eu, clinical, licensed, ner] +task: Named Entity Recognition +language: eu +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for extracting clinical entities from Basque texts. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nicols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Predicted Entities + +`clinical_event`, `bodypart`, `clinical_condition`, `units_measurements`, `patient`, `date_time` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_case_eu_4.2.8_3.0_1675359410041.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_case_eu_4.2.8_3.0_1675359410041.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","eu")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained('ner_eu_clinical_case', "eu", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""3 urteko mutiko bat nahasmendu autistarekin unibertsitateko ospitaleko A pediatriako ospitalean. Ez du autismoaren espektroaren nahaste edo gaixotasun familiaren aurrekaririk. Mutilari komunikazio-nahaste larria diagnostikatu zioten, elkarrekintza sozialeko zailtasunak eta prozesamendu sentsorial atzeratua. Odol-analisiak normalak izan ziren (tiroidearen hormona estimulatzailea (TSH), hemoglobina, batez besteko bolumen corpuskularra (MCV) eta ferritina). Goiko endoskopiak mukosaren azpiko tumore bat ere erakutsi zuen, urdail-irteeren guztizko oztopoa eragiten zuena. Estroma gastrointestinalaren tumore baten susmoa ikusita, distaleko gastrektomia egin zen. Azterketa histopatologikoak agerian utzi zuen mukosaren azpiko zelulen ugaltzea."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documenter = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","eu") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_eu_clinical_case", "eu", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documenter, sentenceDetector, tokenizer, word_embeddings, ner_model, ner_converter)) + +val data = Seq(Array("""3 urteko mutiko bat nahasmendu autistarekin unibertsitateko ospitaleko A pediatriako ospitalean. Ez du autismoaren espektroaren nahaste edo gaixotasun familiaren aurrekaririk. Mutilari komunikazio-nahaste larria diagnostikatu zioten, elkarrekintza sozialeko zailtasunak eta prozesamendu sentsorial atzeratua. Odol-analisiak normalak izan ziren (tiroidearen hormona estimulatzailea (TSH), hemoglobina, batez besteko bolumen corpuskularra (MCV) eta ferritina). Goiko endoskopiak mukosaren azpiko tumore bat ere erakutsi zuen, urdail-irteeren guztizko oztopoa eragiten zuena. Estroma gastrointestinalaren tumore baten susmoa ikusita, distaleko gastrektomia egin zen. Azterketa histopatologikoak agerian utzi zuen mukosaren azpiko zelulen ugaltzea.""")).toDS().toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++----------------------------+------------------+ +|chunk |ner_label | ++----------------------------+------------------+ +|3 urteko mutiko bat |patient | +|nahasmendu |clinical_event | +|autismoaren espektroaren |clinical_condition| +|nahaste |clinical_event | +|gaixotasun |clinical_event | +|familiaren |patient | +|aurrekaririk |clinical_event | +|Mutilari |patient | +|komunikazio-nahaste |clinical_event | +|diagnostikatu |clinical_event | +|elkarrekintza |clinical_event | +|zailtasunak |clinical_event | +|prozesamendu sentsorial |clinical_event | +|Odol-analisiak |clinical_event | +|normalak |units_measurements| +|tiroidearen |bodypart | +|hormona estimulatzailea |clinical_event | +|TSH |clinical_event | +|hemoglobina |clinical_event | +|bolumen |clinical_event | +|MCV |clinical_event | +|ferritina |clinical_event | +|Goiko |bodypart | +|endoskopiak |clinical_event | +|mukosaren azpiko |bodypart | +|tumore |clinical_event | +|erakutsi |clinical_event | +|oztopoa |clinical_event | +|Estroma gastrointestinalaren|clinical_event | +|tumore |clinical_event | +|ikusita |clinical_event | +|distaleko |bodypart | +|gastrektomia |clinical_event | +|Azterketa |clinical_event | +|agerian |clinical_event | +|utzi |clinical_event | +|mukosaren azpiko zelulen |bodypart | +|ugaltzea |clinical_event | ++----------------------------+------------------+ + + +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_case| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|eu| +|Size:|896.1 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Sample text from the training dataset + +3 urteko mutiko bat nahasmendu autistarekin unibertsitateko ospitaleko A pediatriako ospitalean. Ez du autismoaren espektroaren nahaste edo gaixotasun familiaren aurrekaririk. Mutilari komunikazio-nahaste larria diagnostikatu zioten, elkarrekintza sozialeko zailtasunak eta prozesamendu sentsorial atzeratua. Odol-analisiak normalak izan ziren (tiroidearen hormona estimulatzailea (TSH), hemoglobina, batez besteko bolumen corpuskularra (MCV) eta ferritina). Goiko endoskopiak mukosaren azpiko tumore bat ere erakutsi zuen, urdail-irteeren guztizko oztopoa eragiten zuena. Estroma gastrointestinalaren tumore baten susmoa ikusita, distaleko gastrektomia egin zen. Azterketa histopatologikoak agerian utzi zuen mukosaren azpiko zelulen ugaltzea. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + date_time 103.0 13.0 26.0 129.0 0.8879 0.7984 0.8408 +units_measurements 257.0 37.0 9.0 266.0 0.8741 0.9662 0.9179 +clinical_condition 20.0 22.0 33.0 53.0 0.4782 0.3774 0.4211 + patient 69.0 3.0 8.0 77.0 0.9583 0.8961 0.9262 + clinical_event 712.0 121.0 95.0 807.0 0.8547 0.8823 0.8683 + bodypart 182.0 33.0 15.0 197.0 0.8465 0.9239 0.8835 + macro - - - - - - 0.8096 + micro - - - - - - 0.8640 +``` \ No newline at end of file diff --git a/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_en.md b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_en.md new file mode 100644 index 00000000000000..e417d46c363974 --- /dev/null +++ b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_en.md @@ -0,0 +1,153 @@ +--- +layout: model +title: Detect Clinical Conditions (ner_eu_clinical_condition) +author: John Snow Labs +name: ner_eu_clinical_condition +date: 2023-02-06 +tags: [en, clinical, licensed, ner] +task: Named Entity Recognition +language: en +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for clinical conditions. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nichols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Predicted Entities + +`clinical_condition` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_en_4.2.8_3.0_1675718793293.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_en_4.2.8_3.0_1675718793293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained('ner_eu_clinical_condition', "en", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""Hyperparathyroidism was considered upon the fourth occasion. The history of weakness and generalized joint pains were present. He also had history of epigastric pain diagnosed informally as gastritis. He had previously had open reduction and internal fixation for the initial two fractures under general anesthesia. He sustained mandibular fracture."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documenter = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained() + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_eu_clinical_condition", "en", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documenter, sentenceDetector, tokenizer, word_embeddings, ner_model, ner_converter)) + +val data = Seq(Array("""Hyperparathyroidism was considered upon the fourth occasion. The history of weakness and generalized joint pains were present. He also had history of epigastric pain diagnosed informally as gastritis. He had previously had open reduction and internal fixation for the initial two fractures under general anesthesia. He sustained mandibular fracture.""")).toDS().toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++-----------------------+------------------+ +|chunk |ner_label | ++-----------------------+------------------+ +|Hyperparathyroidism |clinical_condition| +|weakness |clinical_condition| +|generalized joint pains|clinical_condition| +|epigastric pain |clinical_condition| +|gastritis |clinical_condition| +|fractures |clinical_condition| +|anesthesia |clinical_condition| +|mandibular fracture |clinical_condition| ++-----------------------+------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_condition| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|851.3 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + clinical_event 230.0 28.0 70.0 300.0 0.8915 0.7667 0.8244 + macro - - - - - - 0.8244 + micro - - - - - - 0.8244 +``` \ No newline at end of file diff --git a/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_es.md b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_es.md new file mode 100644 index 00000000000000..39305dec73d697 --- /dev/null +++ b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_es.md @@ -0,0 +1,149 @@ +--- +layout: model +title: Detect Clinical Conditions (ner_eu_clinical_condition - es) +author: John Snow Labs +name: ner_eu_clinical_condition +date: 2023-02-06 +tags: [es, clinical, licensed, ner, clinical_condition] +task: Named Entity Recognition +language: es +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for extracting clinical conditions from Spanish texts. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nichols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Predicted Entities + +`clinical_condition` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_es_4.2.8_3.0_1675721390266.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_es_4.2.8_3.0_1675721390266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","es")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained('ner_eu_clinical_condition', "es", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""La exploración abdominal revela una cicatriz de laparotomía media infraumbilical, la presencia de ruidos disminuidos, y dolor a la palpación de manera difusa sin claros signos de irritación peritoneal. No existen hernias inguinales o crurales."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documenter = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","es") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_eu_clinical_condition", "es", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documenter, sentenceDetector, tokenizer, word_embeddings, ner_model, ner_converter)) + +val data = Seq(Array("""La exploración abdominal revela una cicatriz de laparotomía media infraumbilical, la presencia de ruidos disminuidos, y dolor a la palpación de manera difusa sin claros signos de irritación peritoneal. No existen hernias inguinales o crurales.""")).toDS().toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++--------------------+------------------+ +|chunk |ner_label | ++--------------------+------------------+ +|cicatriz |clinical_condition| +|dolor a la palpación|clinical_condition| +|signos |clinical_condition| +|irritación |clinical_condition| +|hernias inguinales |clinical_condition| +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_condition| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|898.1 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 +clinical_condition 354.0 42.0 84.0 438.0 0.8939 0.8082 0.8489 + macro - - - - - - 0.8489 + micro - - - - - - 0.8489 +``` \ No newline at end of file diff --git a/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_eu.md b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_eu.md new file mode 100644 index 00000000000000..6f25f06fe7d8d3 --- /dev/null +++ b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_eu.md @@ -0,0 +1,153 @@ +--- +layout: model +title: Detect Clinical Conditions (ner_eu_clinical_case - eu) +author: John Snow Labs +name: ner_eu_clinical_condition +date: 2023-02-06 +tags: [eu, clinical, licensed, ner, clinical_condition] +task: Named Entity Recognition +language: eu +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for extracting clinical conditions from Basque texts. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nichols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated +clinical narratives. + +## Predicted Entities + +`clinical_condition` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_eu_4.2.8_3.0_1675723038941.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_eu_4.2.8_3.0_1675723038941.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","eu")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained('ner_eu_clinical_condition', "eu", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""Gertaera honetatik bi hilabetetara, umea Larrialdietako Zerbitzura dator 4 egunetan zehar buruko mina eta bekokiko hantura azaltzeagatik, sukarrik izan gabe. Miaketan, haztapen mingarria duen bekokiko hantura bigunaz gain, ez da beste zeinurik azaltzen. Polakiuria eta tenesmo arina ere izan zuen egun horretan hematuriarekin batera. Geroztik sintomarik gabe dago."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documenter = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","eu") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_eu_clinical_condition", "eu", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documenter, sentenceDetector, tokenizer, word_embeddings, ner_model, ner_converter)) + +val data = Seq(Array("""Gertaera honetatik bi hilabetetara, umea Larrialdietako Zerbitzura dator 4 egunetan zehar buruko mina eta bekokiko hantura azaltzeagatik, sukarrik izan gabe. Miaketan, haztapen mingarria duen bekokiko hantura bigunaz gain, ez da beste zeinurik azaltzen. Polakiuria eta tenesmo arina ere izan zuen egun horretan hematuriarekin batera. Geroztik sintomarik gabe dago.""")).toDS().toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++----------+------------------+ +|chunk |ner_label | ++----------+------------------+ +|mina |clinical_condition| +|hantura |clinical_condition| +|sukarrik |clinical_condition| +|mingarria |clinical_condition| +|hantura |clinical_condition| +|Polakiuria|clinical_condition| +|sintomarik|clinical_condition| ++----------+------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_condition| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|eu| +|Size:|899.6 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 +clinical_condition 45.0 4.0 13.0 58.0 0.9184 0.7759 0.8411 + macro - - - - - - 0.8411 + micro - - - - - - 0.8411 +``` \ No newline at end of file diff --git a/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_fr.md b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_fr.md new file mode 100644 index 00000000000000..0b93dfb9a2968e --- /dev/null +++ b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_fr.md @@ -0,0 +1,152 @@ +--- +layout: model +title: Detect Clinical Conditions (ner_eu_clinical_case - fr) +author: John Snow Labs +name: ner_eu_clinical_condition +date: 2023-02-06 +tags: [fr, clinical, licensed, ner, clinical_condition] +task: Named Entity Recognition +language: fr +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for extracting clinical conditions from French texts. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nichols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Predicted Entities + +`clinical_condition` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_fr_4.2.8_3.0_1675725809666.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_fr_4.2.8_3.0_1675725809666.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","fr")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained('ner_eu_clinical_condition', "fr", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""Il aurait présenté il y’ a environ 30 ans des ulcérations génitales non traitées spontanément guéries. L’interrogatoire retrouvait une toux sèche depuis trois mois, des douleurs rétro-sternales constrictives, une dyspnée stade III de la NYHA et un contexte d’ apyrexie. Sur ce tableau s’ est greffé des œdèmes des membres inférieurs puis un tableau d’ anasarque d’ où son hospitalisation en cardiologie pour décompensation cardiaque globale."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documenter = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","fr") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_eu_clinical_condition", "fr", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documenter, sentenceDetector, tokenizer, word_embeddings, ner_model, ner_converter)) + +val data = Seq(Array("""Il aurait présenté il y’ a environ 30 ans des ulcérations génitales non traitées spontanément guéries. L’interrogatoire retrouvait une toux sèche depuis trois mois, des douleurs rétro-sternales constrictives, une dyspnée stade III de la NYHA et un contexte d’ apyrexie. Sur ce tableau s’ est greffé des œdèmes des membres inférieurs puis un tableau d’ anasarque d’ où son hospitalisation en cardiologie pour décompensation cardiaque globale.""")).toDS().toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++------------------------+------------------+ +|chunk |ner_label | ++------------------------+------------------+ +|ulcérations |clinical_condition| +|toux sèche |clinical_condition| +|douleurs |clinical_condition| +|dyspnée |clinical_condition| +|apyrexie |clinical_condition| +|anasarque |clinical_condition| +|décompensation cardiaque|clinical_condition| ++------------------------+------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_condition| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|fr| +|Size:|899.9 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 + clinical_event 269.0 51.0 52.0 321.0 0.8406 0.8380 0.8393 + macro - - - - - - 0.8393 + micro - - - - - - 0.8393 +``` \ No newline at end of file diff --git a/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_it.md b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_it.md new file mode 100644 index 00000000000000..109bdbffa230e6 --- /dev/null +++ b/docs/_posts/gpirge/2023-02-06-ner_eu_clinical_condition_it.md @@ -0,0 +1,155 @@ +--- +layout: model +title: Detect Clinical Conditions (ner_eu_clinical_condition - it) +author: John Snow Labs +name: ner_eu_clinical_condition +date: 2023-02-06 +tags: [it, clinical, licensed, ner, clinical_condition] +task: Named Entity Recognition +language: it +edition: Healthcare NLP 4.2.8 +spark_version: 3.0 +supported: true +annotator: MedicalNerModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained named entity recognition (NER) deep learning model for extracting clinical conditions from Italian texts. The SparkNLP deep learning model (MedicalNerModel) is inspired by a former state of the art model for NER: Chiu & Nichols, Named Entity Recognition with Bidirectional LSTM-CNN. + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Predicted Entities + +`clinical_condition` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_it_4.2.8_3.0_1675726754516.zip){:.button.button-orange} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/models/ner_eu_clinical_condition_it_4.2.8_3.0_1675726754516.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer()\ + .setInputCols(["sentence"])\ + .setOutputCol("token") + +word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","it")\ + .setInputCols(["sentence","token"])\ + .setOutputCol("embeddings") + +ner = MedicalNerModel.pretrained('ner_eu_clinical_condition', "it", "clinical/models") \ + .setInputCols(["sentence", "token", "embeddings"]) \ + .setOutputCol("ner") + +ner_converter = NerConverterInternal()\ + .setInputCols(["sentence", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[ + document_assembler, + sentenceDetectorDL, + tokenizer, + word_embeddings, + ner, + ner_converter]) + +data = spark.createDataFrame([["""Donna, 64 anni, ricovero per dolore epigastrico persistente, irradiato a barra e posteriormente, associato a dispesia e anoressia. Poche settimane dopo compaiono, però, iperemia, intenso edema vulvare ed una esione ulcerativa sul lato sinistro della parete rettale che la RM mostra essere una fistola transfinterica. Questi trattamenti determinano un miglioramento dell’ infiammazione ed una riduzione dell’ ulcera, ma i condilomi permangono inalterati."""]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documenter = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols("document") + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols("sentence") + .setOutputCol("token") + +val word_embeddings = WordEmbeddingsModel.pretrained("w2v_cc_300d","it") + .setInputCols(Array("sentence","token")) + .setOutputCol("embeddings") + +val ner_model = MedicalNerModel.pretrained("ner_eu_clinical_condition", "it", "clinical/models") + .setInputCols(Array("sentence", "token", "embeddings")) + .setOutputCol("ner") + +val ner_converter = new NerConverterInternal() + .setInputCols(Array("sentence", "token", "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documenter, sentenceDetector, tokenizer, word_embeddings, ner_model, ner_converter)) + +val data = Seq(Array("""Donna, 64 anni, ricovero per dolore epigastrico persistente, irradiato a barra e posteriormente, associato a dispesia e anoressia. Poche settimane dopo compaiono, però, iperemia, intenso edema vulvare ed una esione ulcerativa sul lato sinistro della parete rettale che la RM mostra essere una fistola transfinterica. Questi trattamenti determinano un miglioramento dell’ infiammazione ed una riduzione dell’ ulcera, ma i condilomi permangono inalterati.""")).toDS().toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +## Results + +```bash ++----------------------+------------------+ +|chunk |ner_label | ++----------------------+------------------+ +|dolore epigastrico |clinical_condition| +|anoressia |clinical_condition| +|iperemia |clinical_condition| +|edema |clinical_condition| +|fistola transfinterica|clinical_condition| +|infiammazione |clinical_condition| ++----------------------+------------------+ + + + + +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_eu_clinical_condition| +|Compatibility:|Healthcare NLP 4.2.8+| +|License:|Licensed| +|Edition:|Official| +|Input Labels:|[sentence, token, embeddings]| +|Output Labels:|[ner]| +|Language:|it| +|Size:|903.5 KB| + +## References + +The corpus used for model training is provided by European Clinical Case Corpus (E3C), a project aimed at offering a freely available multilingual corpus of semantically annotated clinical narratives. + +## Benchmarking + +```bash + label tp fp fn total precision recall f1 +clinical_condition 208.0 35.0 46.0 254.0 0.8560 0.8189 0.8370 + macro - - - - - - 0.8370 + micro - - - - - - 0.8370 +``` \ No newline at end of file