From 1188c6d3363a388f3eacf35c8a8669084fdefd5b Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Fri, 6 Dec 2024 14:59:50 -0700 Subject: [PATCH] feat(semcov): audio conventions (#1153) --- js/.changeset/mighty-mangos-search.md | 5 ++++ .../src/trace/SemanticConventions.ts | 29 +++++++++++++++++++ .../openinference/semconv/trace/__init__.py | 19 ++++++++++++ spec/semantic_conventions.md | 25 +++++++++------- 4 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 js/.changeset/mighty-mangos-search.md diff --git a/js/.changeset/mighty-mangos-search.md b/js/.changeset/mighty-mangos-search.md new file mode 100644 index 000000000..aaa89d60c --- /dev/null +++ b/js/.changeset/mighty-mangos-search.md @@ -0,0 +1,5 @@ +--- +"@arizeai/openinference-semantic-conventions": minor +--- + +add semantic conventions for audio diff --git a/js/packages/openinference-semantic-conventions/src/trace/SemanticConventions.ts b/js/packages/openinference-semantic-conventions/src/trace/SemanticConventions.ts index cda84a9d1..e25f8db14 100644 --- a/js/packages/openinference-semantic-conventions/src/trace/SemanticConventions.ts +++ b/js/packages/openinference-semantic-conventions/src/trace/SemanticConventions.ts @@ -21,6 +21,7 @@ export const SemanticAttributePrefixes = { openinference: "openinference", message_content: "message_content", image: "image", + audio: "audio", } as const; export const LLMAttributePostfixes = { @@ -114,6 +115,12 @@ export const UserAttributePostfixes = { id: "id", } as const; +export const AudioAttributesPostfixes = { + url: "url", + mime_type: "mime_type", + transcript: "transcript", +} as const; + /** * The input to any span */ @@ -427,6 +434,24 @@ export const PROMPT_TEMPLATE_VERSION = export const TAG_TAGS = `${SemanticAttributePrefixes.tag}.${TagAttributePostfixes.tags}` as const; +/** + * The url of an audio file + */ +export const AUDIO_URL = + `${SemanticAttributePrefixes.audio}.${AudioAttributesPostfixes.url}` as const; + +/** + * The audio mime type + */ +export const AUDIO_MIME_TYPE = + `${SemanticAttributePrefixes.audio}.${AudioAttributesPostfixes.mime_type}` as const; + +/** + * The audio transcript as text + */ +export const AUDIO_TRANSCRIPT = + `${SemanticAttributePrefixes.audio}.${AudioAttributesPostfixes.transcript}` as const; + export const SemanticConventions = { IMAGE_URL, INPUT_VALUE, @@ -499,9 +524,13 @@ export enum OpenInferenceSpanKind { EVALUATOR = "EVALUATOR", } +/** + * An enum of common mime types. Not exhaustive. + */ export enum MimeType { TEXT = "text/plain", JSON = "application/json", + AUDIO_WAV = "audio/wav", } export enum LLMSystem { diff --git a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py index 1c23ba22c..158a3f906 100644 --- a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py +++ b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py @@ -203,6 +203,25 @@ class ImageAttributes: """ +class AudioAttributes: + """ + Attributes for audio + """ + + AUDIO_URL = "audio.url" + """ + The url to an audio file + """ + AUDIO_MIME_TYPE = "audio.mime_type" + """ + The mime type of the audio file + """ + AUDIO_TRANSCRIPT = "audio.transcript" + """ + The transcript of the audio file + """ + + class DocumentAttributes: """ Attributes for a document. diff --git a/spec/semantic_conventions.md b/spec/semantic_conventions.md index e1ada2ab0..89f6a4d9b 100644 --- a/spec/semantic_conventions.md +++ b/spec/semantic_conventions.md @@ -8,7 +8,7 @@ operations used by applications. These conventions are used to populate the `att The following attributes are reserved and MUST be supported by all OpenInference Tracing SDKs: | Attribute | Type | Example | Description | -|----------------------------------------|-----------------------------|----------------------------------------------------------------------------|---------------------------------------------------------------------------------------| +| -------------------------------------- | --------------------------- | -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | | `document.content` | String | `"This is a sample document content."` | The content of a retrieved document | | `document.id` | String/Integer | `"1234"` or `1` | Unique identifier for a document | | `document.metadata` | JSON String | `"{'author': 'John Doe', 'date': '2023-09-09'}"` | Metadata associated with a document | @@ -69,6 +69,9 @@ The following attributes are reserved and MUST be supported by all OpenInference | `tool_call.function.name` | String | `"get_current_weather"` | The name of the function being invoked by a tool call | | `tool_call.id` | string | `"call_62136355"` | The id of the a tool call (useful when there are more than one call at the same time) | | `user.id` | String | `"9328ae73-7141-4f45-a044-8e06192aa465"` | Unique identifier for a user | +| `audio.url` | String | `https://storage.com/buckets/1/file.wav` | The url to an audio file (e.x. cloud storage) | +| `audio.mime_type` | String | `audio/mpeg` | The mime type of the audio file (e.x. `audio/mpeg`, `audio/wav` ) | +| `audio.transcript` | String | `"Hello, how are you?"` | The transcript of the audio file (e.x. whisper transcription) | To get a list of objects exported as OpenTelemetry span attributes, flattening of the list is necessary as shown in the examples below. @@ -77,7 +80,7 @@ shown in the examples below. used; otherwise, a custom value MAY be used. | Value | Description | -|-------------|-------------| +| ----------- | ----------- | | `anthropic` | Anthropic | | `openai` | OpenAI | | `vertexai` | Vertex AI | @@ -88,7 +91,7 @@ used; otherwise, a custom value MAY be used. used; otherwise, a custom value MAY be used. | Value | Description | -|-------------|-----------------| +| ----------- | --------------- | | `anthropic` | Anthropic | | `openai` | OpenAI | | `cohere` | Cohere | @@ -112,17 +115,17 @@ for i, obj in enumerate(messages): ```javascript const messages = [ - { "message.role": "user", "message.content": "hello" }, - { - "message.role": "assistant", - "message.content": "hi", - }, + { "message.role": "user", "message.content": "hello" }, + { + "message.role": "assistant", + "message.content": "hi", + }, ]; for (const [i, obj] of messages.entries()) { - for (const [key, value] of Object.entries(obj)) { - span.setAttribute(`input.messages.${i}.${key}`, value); - } + for (const [key, value] of Object.entries(obj)) { + span.setAttribute(`input.messages.${i}.${key}`, value); + } } ```