Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[textanalytics] adds extractive summarization action #19824

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

## 5.2.0b1 (Unreleased)

This version of the SDK defaults to the latest supported API version, which currently is `v3.2-preview.1`.

### Features Added
- Added support for Extractive Summarization actions through the `ExtractSummaryAction` type.

### Breaking Changes

### Bugs Fixed
- `RecognizePiiEntitiesAction` option `disable_service_logs` now correctly defaults to `True`.

### Other Changes

Expand Down
9 changes: 5 additions & 4 deletions sdk/textanalytics/azure-ai-textanalytics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ Install the Azure Text Analytics client library for Python with [pip][pip]:
pip install azure-ai-textanalytics --pre
```

> Note: This version of the client library defaults to the v3.1 version of the service
> Note: This version of the client library defaults to the v3.2-preview.1 version of the service

This table shows the relationship between SDK versions and supported API versions of the service

Expand Down Expand Up @@ -377,7 +377,7 @@ The returned response is a heterogeneous list of result and error objects: list[

Please refer to the service documentation for [supported PII entity types][pii_entity_categories].

Note: The Recognize PII Entities service is available only in the v3.1 API version.
Note: The Recognize PII Entities service is available in API version v3.1 and up.

### Extract key phrases

Expand Down Expand Up @@ -493,7 +493,7 @@ for idx, doc in enumerate(docs):
print("------------------------------------------")
```

Note: The Healthcare Entities Analysis service is available only in the v3.1 API version.
Note: The Healthcare Entities Analysis service is available in API version v3.1 and up.

### Multiple Analysis

Expand All @@ -504,6 +504,7 @@ Note: The Healthcare Entities Analysis service is available only in the v3.1 API
- Linked Entity Recognition
- Key Phrase Extraction
- Sentiment Analysis
- Extractive Summarization

```python
from azure.core.credentials import AzureKeyCredential
Expand Down Expand Up @@ -563,7 +564,7 @@ for doc, action_results in zip(documents, document_results):

The returned response is an object encapsulating multiple iterables, each representing results of individual analyses.

Note: Multiple analysis is available only in the v3.1 API version.
Note: Multiple analysis is available in API version v3.1 and up.

## Optional Configuration

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
EntityCertainty,
EntityAssociation,
HealthcareEntityCategory,
ExtractSummaryAction,
ExtractSummaryResult,
SummarySentence
)

from ._lro import AnalyzeHealthcareEntitiesLROPoller, AnalyzeActionsLROPoller
Expand Down Expand Up @@ -101,6 +104,9 @@
"AnalyzeHealthcareEntitiesLROPoller",
"AnalyzeActionsLROPoller",
"HealthcareEntityCategory",
"ExtractSummaryAction",
"ExtractSummaryResult",
"SummarySentence"
]

__version__ = VERSION
Original file line number Diff line number Diff line change
Expand Up @@ -1168,10 +1168,13 @@ class ExtractiveSummarizationTask(msrest.serialization.Model):
:param parameters:
:type parameters:
~azure.ai.textanalytics.v3_2_preview_1.models.ExtractiveSummarizationTaskParameters
:param task_name:
:type task_name: str
"""

_attribute_map = {
'parameters': {'key': 'parameters', 'type': 'ExtractiveSummarizationTaskParameters'},
'task_name': {'key': 'taskName', 'type': 'str'},
}

def __init__(
Expand All @@ -1180,6 +1183,7 @@ def __init__(
):
super(ExtractiveSummarizationTask, self).__init__(**kwargs)
self.parameters = kwargs.get('parameters', None)
self.task_name = kwargs.get('task_name', None)


class ExtractiveSummarizationTaskParameters(msrest.serialization.Model):
Expand Down Expand Up @@ -1213,7 +1217,7 @@ def __init__(
):
super(ExtractiveSummarizationTaskParameters, self).__init__(**kwargs)
self.model_version = kwargs.get('model_version', "latest")
self.logging_opt_out = kwargs.get('logging_opt_out', True)
self.logging_opt_out = kwargs.get('logging_opt_out', False)
self.string_index_type = kwargs.get('string_index_type', None)
self.sentence_count = kwargs.get('sentence_count', 3)
self.sort_by = kwargs.get('sort_by', "Offset")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1311,20 +1311,25 @@ class ExtractiveSummarizationTask(msrest.serialization.Model):
:param parameters:
:type parameters:
~azure.ai.textanalytics.v3_2_preview_1.models.ExtractiveSummarizationTaskParameters
:param task_name:
:type task_name: str
"""

_attribute_map = {
'parameters': {'key': 'parameters', 'type': 'ExtractiveSummarizationTaskParameters'},
'task_name': {'key': 'taskName', 'type': 'str'},
}

def __init__(
self,
*,
parameters: Optional["ExtractiveSummarizationTaskParameters"] = None,
task_name: Optional[str] = None,
**kwargs
):
super(ExtractiveSummarizationTask, self).__init__(**kwargs)
self.parameters = parameters
self.task_name = task_name


class ExtractiveSummarizationTaskParameters(msrest.serialization.Model):
Expand Down Expand Up @@ -1356,7 +1361,7 @@ def __init__(
self,
*,
model_version: Optional[str] = "latest",
logging_opt_out: Optional[bool] = True,
logging_opt_out: Optional[bool] = False,
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
sentence_count: Optional[int] = 3,
sort_by: Optional[Union[str, "ExtractiveSummarizationTaskParametersSortBy"]] = "Offset",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
)

from ._generated.v3_0 import models as _v3_0_models
from ._generated.v3_2_preview_1 import models as _v3_2_preview_1_models
from ._version import DEFAULT_API_VERSION


Expand Down Expand Up @@ -1737,6 +1738,7 @@ class _AnalyzeActionsType(str, Enum):
"recognize_linked_entities" #: Linked Entities Recognition action.
)
ANALYZE_SENTIMENT = "analyze_sentiment" #: Sentiment Analysis action.
EXTRACT_SUMMARY = "extract_summary"


class RecognizeEntitiesAction(DictMixin):
Expand Down Expand Up @@ -1931,7 +1933,7 @@ def __init__(self, **kwargs):
self.domain_filter = kwargs.get("domain_filter", None)
self.categories_filter = kwargs.get("categories_filter", None)
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")
self.disable_service_logs = kwargs.get("disable_service_logs", False)
self.disable_service_logs = kwargs.get("disable_service_logs", True)

def __repr__(self, **kwargs):
return (
Expand Down Expand Up @@ -2074,3 +2076,165 @@ def _to_generated(self, api_version):
logging_opt_out=self.disable_service_logs,
)
)


class ExtractSummaryAction(DictMixin):
"""ExtractSummaryAction encapsulates the parameters for starting a long-running Extractive
Text Summarization operation.

:keyword str model_version: The model version to use for the analysis.
:keyword str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or TextElement_v8`. For additional information
see https://aka.ms/text-analytics-offsets
:keyword bool disable_service_logs: If set to true, you opt-out of having your text input
logged on the service side for troubleshooting. By default, Text Analytics logs your
input text for 48 hours, solely to allow for troubleshooting issues in providing you with
the Text Analytics natural language processing functions. Setting this parameter to true,
disables input logging and may limit our ability to remediate issues that occur. Please see
Cognitive Services Compliance and Privacy notes at https://aka.ms/cs-compliance for
additional details, and Microsoft Responsible AI principles at
https://www.microsoft.com/ai/responsible-ai.
:keyword int max_sentence_count: Maximum number of sentences to return. Defaults to 3.
:keyword str order_by: Possible values include: "Offset", "Rank". Default value: "Offset".
:ivar str model_version: The model version to use for the analysis.
:ivar str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or TextElement_v8`. For additional information
see https://aka.ms/text-analytics-offsets
:ivar bool disable_service_logs: If set to true, you opt-out of having your text input
logged on the service side for troubleshooting. By default, Text Analytics logs your
input text for 48 hours, solely to allow for troubleshooting issues in providing you with
the Text Analytics natural language processing functions. Setting this parameter to true,
disables input logging and may limit our ability to remediate issues that occur. Please see
Cognitive Services Compliance and Privacy notes at https://aka.ms/cs-compliance for
additional details, and Microsoft Responsible AI principles at
https://www.microsoft.com/ai/responsible-ai.
:ivar int max_sentence_count: Number of sentences to return. Defaults to 3.
:ivar str order_by: Possible values include: "Offset", "Rank". Default value: "Offset".
"""

def __init__(self, **kwargs):
self.model_version = kwargs.get('model_version', "latest")
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")
self.disable_service_logs = kwargs.get("disable_service_logs", False)
self.max_sentence_count = kwargs.get('max_sentence_count', 3)
self.order_by = kwargs.get('order_by', "Offset")

def __repr__(self):
return (
"ExtractSummaryAction(model_version={}, string_index_type={}, disable_service_logs={}, "
"max_sentence_count={}, order_by={})".format(
self.model_version,
self.string_index_type,
self.disable_service_logs,
self.max_sentence_count,
self.order_by
)[:1024]
)

def _to_generated(self):
return _v3_2_preview_1_models.ExtractiveSummarizationTask(
parameters=_v3_2_preview_1_models.ExtractiveSummarizationTaskParameters(
model_version=self.model_version,
string_index_type=self.string_index_type,
logging_opt_out=self.disable_service_logs,
sentence_count=self.max_sentence_count,
sort_by=self.order_by
)
)


class ExtractSummaryResult(DictMixin):
"""ExtractSummaryResult is a result object which contains
the extractive text summarization from a particular document.

:ivar str id: Unique, non-empty document identifier.
:ivar sentences: A ranked list of sentences representing the extracted summary.
:vartype sentences: list[~azure.ai.textanalytics.SummarySentence]
:ivar warnings: Warnings encountered while processing document.
:vartype warnings: list[~azure.ai.textanalytics.TextAnalyticsWarning]
:ivar statistics: If `show_stats=True` was specified in the request this
field will contain information about the document payload.
:vartype statistics: ~azure.ai.textanalytics.TextDocumentStatistics
:ivar bool is_error: Boolean check for error item when iterating over list of
results. Always False for an instance of an ExtractSummaryResult.
"""

def __init__(
self,
**kwargs
):
self.id = kwargs.get('id', None)
self.sentences = kwargs.get('sentences', None)
self.warnings = kwargs.get('warnings', None)
self.statistics = kwargs.get('statistics', None)
self.is_error = False

def __repr__(self):
return (
"ExtractSummaryResult(id={}, sentences={}, warnings={}, statistics={}, is_error={})".format(
self.id,
repr(self.sentences),
repr(self.warnings),
repr(self.statistics),
self.is_error
)[:1024]
)

@classmethod
def _from_generated(cls, summary):
return cls(
id=summary.id,
sentences=[SummarySentence._from_generated(sentence) for sentence in summary.sentences],
warnings=[
TextAnalyticsWarning._from_generated(w) # pylint: disable=protected-access
for w in summary.warnings
],
statistics=TextDocumentStatistics._from_generated( # pylint: disable=protected-access
summary.statistics
),
)


class SummarySentence(DictMixin):
"""Represents a single sentence from the extractive text summarization.

:ivar str text: The extracted sentence text.
:ivar float rank_score: A float value representing the relevance of the sentence within
the summary. Higher values indicate higher importance.
:ivar int offset: The sentence offset from the start of the document.
The value depends on the value of the `string_index_type` parameter
set in the original request, which is UnicodeCodePoint by default.
:ivar int length: The length of the sentence. This value depends on the value of the
`string_index_type` parameter set in the original request, which is UnicodeCodePoint
by default.
"""

def __init__(
self,
**kwargs
):
self.text = kwargs.get('text', None)
self.rank_score = kwargs.get('rank_score', None)
self.offset = kwargs.get('offset', None)
self.length = kwargs.get('length', None)

def __repr__(self):
return (
"SummarySentence(text={}, rank_score={}, offset={}, length={})".format(
self.text,
self.rank_score,
self.offset,
self.length,
)[:1024]
)

@classmethod
def _from_generated(cls, sentence):
return cls(
text=sentence.text,
rank_score=sentence.rank_score,
offset=sentence.offset,
length=sentence.length
)
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
RecognizePiiEntitiesAction,
RecognizeLinkedEntitiesAction,
AnalyzeSentimentAction,
ExtractSummaryAction,
_AnalyzeActionsType,
)

Expand Down Expand Up @@ -96,6 +97,8 @@ def _determine_action_type(action):
return _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES
if isinstance(action, AnalyzeSentimentAction):
return _AnalyzeActionsType.ANALYZE_SENTIMENT
if isinstance(action, ExtractSummaryAction):
return _AnalyzeActionsType.EXTRACT_SUMMARY
return _AnalyzeActionsType.EXTRACT_KEY_PHRASES


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
RecognizePiiEntitiesResult,
PiiEntity,
AnalyzeHealthcareEntitiesResult,
ExtractSummaryResult,
_AnalyzeActionsType,
)

Expand Down Expand Up @@ -236,6 +237,15 @@ def healthcare_result(
)


@prepare_result
def summary_result(
summary, results, *args, **kwargs
): # pylint: disable=unused-argument
return ExtractSummaryResult._from_generated( # pylint: disable=protected-access
summary
)


def healthcare_extract_page_data(
doc_id_order, obj, response_headers, health_job_state
): # pylint: disable=unused-argument
Expand All @@ -256,6 +266,8 @@ def _get_deserialization_callback_from_task_type(task_type):
return linked_entities_result
if task_type == _AnalyzeActionsType.ANALYZE_SENTIMENT:
return sentiment_result
if task_type == _AnalyzeActionsType.EXTRACT_SUMMARY:
return summary_result
return key_phrases_result


Expand All @@ -268,6 +280,8 @@ def _get_property_name_from_task_type(task_type):
return "entity_linking_tasks"
if task_type == _AnalyzeActionsType.ANALYZE_SENTIMENT:
return "sentiment_analysis_tasks"
if task_type == _AnalyzeActionsType.EXTRACT_SUMMARY:
return "extractive_summarization_tasks"
return "key_phrase_extraction_tasks"


Expand Down
Loading