From da4fb555b2cec6716aa5b91657d9e984931dc89f Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 24 Apr 2024 06:23:10 +0000 Subject: [PATCH 01/31] Initial LLM metrics --- docs/gen-ai/llm-metrics.md | 111 +++++++++++++++++++++++++++++++++++++ model/metrics/gen-ai.yaml | 31 +++++++++++ model/registry/gen-ai.yaml | 6 ++ 3 files changed, 148 insertions(+) create mode 100644 docs/gen-ai/llm-metrics.md create mode 100644 model/metrics/gen-ai.yaml diff --git a/docs/gen-ai/llm-metrics.md b/docs/gen-ai/llm-metrics.md new file mode 100644 index 0000000000..ebd80b80a4 --- /dev/null +++ b/docs/gen-ai/llm-metrics.md @@ -0,0 +1,111 @@ + + +# Semantic Conventions for LLM Metrics + +**Status**: [Experimental][DocumentStatus] + +The conventions described in this section are specific to LLM clients. + +**Disclaimer:** These are initial database client metric instruments +and attributes but more may be added in the future. + + + + + +- [LLM Requests](#llm-requests) + - [Metric: `gen_ai.usage.tokens`](#metric-gen_aiusagetokens) + - [Metrics: `gen_ai.request.duration`](#metrics-gen_airequestduration) + + + +## LLM Requests + +The following metric instruments describe LLM request operations. + +### Metric: `gen_ai.usage.tokens` + +This metric is [required][MetricRequired]. + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `gen_ai.usage.tokens` | Histogram | `tokens` | LLM usage attributes | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + + +| Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | +|---|---|---|---|---|---| +| [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.usage.token_type`](../attributes-registry/llm.md) | string | The type of token being counted. | `prompt`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [2] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | + +**[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. + +**[2]:** The `error.type` SHOULD be predictable and SHOULD have low cardinality. +Instrumentations SHOULD document the list of errors they report. + +The cardinality of `error.type` within one instrumentation library SHOULD be low. +Telemetry consumers that aggregate data from multiple instrumentation libraries and applications +should be prepared for `error.type` to have high cardinality at query time when no +additional filters are applied. + +If the operation has completed successfully, instrumentations SHOULD NOT set `error.type`. + +If a specific domain defines its own set of error identifiers (such as HTTP or gRPC status codes), +it's RECOMMENDED to: + +* Use a domain-specific attribute +* Set `error.type` to capture all errors, regardless of whether they are defined within the domain-specific set or not. + +`error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `_OTHER` | A fallback error value to be used when the instrumentation doesn't define a custom value. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | + + +### Metrics: `gen_ai.request.duration` + +This metric is [required][MetricRequired]. + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `gen_ai.request.duration` | Histogram | `s` | LLM request duration | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + + +| Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | +|---|---|---|---|---|---| +| [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [2] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | + +**[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. + +**[2]:** The `error.type` SHOULD be predictable and SHOULD have low cardinality. +Instrumentations SHOULD document the list of errors they report. + +The cardinality of `error.type` within one instrumentation library SHOULD be low. +Telemetry consumers that aggregate data from multiple instrumentation libraries and applications +should be prepared for `error.type` to have high cardinality at query time when no +additional filters are applied. + +If the operation has completed successfully, instrumentations SHOULD NOT set `error.type`. + +If a specific domain defines its own set of error identifiers (such as HTTP or gRPC status codes), +it's RECOMMENDED to: + +* Use a domain-specific attribute +* Set `error.type` to capture all errors, regardless of whether they are defined within the domain-specific set or not. + +`error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `_OTHER` | A fallback error value to be used when the instrumentation doesn't define a custom value. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | + diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml new file mode 100644 index 0000000000..a13836d478 --- /dev/null +++ b/model/metrics/gen-ai.yaml @@ -0,0 +1,31 @@ +groups: + - id: metric_attributes.gen_ai + type: attribute_group + brief: 'LLM attributes' + attributes: + - ref: error.type + requirement_level: + conditionally_required: "if the operation ended in an error" + - ref: server.address + requirement_level: required + - ref: gen_ai.response.model + requirement_level: required + - id: metric.gen_ai.usage.tokens + type: metric + metric_name: gen_ai.usage.tokens + brief: 'LLM usage attributes' + instrument: histogram + unit: "tokens" + stability: experimental + extends: metric_attributes.gen_ai + attributes: + - ref: gen_ai.usage.token_type + requirement_level: required + - id: metric.gen_ai.request.duration + type: metric + metric_name: gen_ai.request.duration + brief: 'LLM request duration' + instrument: histogram + unit: "s" + stability: experimental + extends: metric_attributes.gen_ai diff --git a/model/registry/gen-ai.yaml b/model/registry/gen-ai.yaml index ab2ea3700c..2d5938f1e3 100644 --- a/model/registry/gen-ai.yaml +++ b/model/registry/gen-ai.yaml @@ -75,6 +75,12 @@ groups: brief: The number of tokens used in the LLM response (completion). examples: [180] tag: llm-generic-response + - id: usage.token_type + stability: experimental + type: string + brief: The type of token being counted. + examples: ['prompt', 'completion'] + tag: llm-generic-metrics - id: prompt stability: experimental type: string From 11697561b8bbf422c8e26a16c90b6edbe351189b Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 24 Apr 2024 06:27:26 +0000 Subject: [PATCH 02/31] Add link references --- docs/gen-ai/llm-metrics.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/gen-ai/llm-metrics.md b/docs/gen-ai/llm-metrics.md index ebd80b80a4..6da7087b25 100644 --- a/docs/gen-ai/llm-metrics.md +++ b/docs/gen-ai/llm-metrics.md @@ -109,3 +109,6 @@ it's RECOMMENDED to: |---|---|---| | `_OTHER` | A fallback error value to be used when the instrumentation doesn't define a custom value. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | + +[DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.22.0/specification/document-status.md +[MetricRequired]: /docs/general/metric-requirement-level.md#required From 383fa1fd59d961120c9e716755c51c6b3a2abbf4 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 24 Apr 2024 06:28:46 +0000 Subject: [PATCH 03/31] Add LLM Metrics to README --- docs/gen-ai/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/gen-ai/README.md b/docs/gen-ai/README.md index 1197a88522..1d24209db4 100644 --- a/docs/gen-ai/README.md +++ b/docs/gen-ai/README.md @@ -21,5 +21,6 @@ This document defines semantic conventions for the following kind of Generative Semantic conventions for LLM operations are defined for the following signals: * [LLM Spans](llm-spans.md): Semantic Conventions for LLM requests - *spans*. +* [LLM Metrics](llm-metrics.md): Semantic Conventions for LLM requests - *metrics*. [DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.26.0/specification/document-status.md From c4308be86620f688fead5b812346c868aa1d500c Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 24 Apr 2024 06:39:29 +0000 Subject: [PATCH 04/31] Add changelog --- .chloggen/811.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .chloggen/811.yaml diff --git a/.chloggen/811.yaml b/.chloggen/811.yaml new file mode 100644 index 0000000000..7159bf209e --- /dev/null +++ b/.chloggen/811.yaml @@ -0,0 +1,4 @@ +change_type: enhancement +component: gen-ai +note: Adding metrics for GenAI clients. +issues: [811] \ No newline at end of file From 1239fbd7d6f1ac641cea5ff2f1ff7bcb076a6e19 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 24 Apr 2024 06:50:50 +0000 Subject: [PATCH 05/31] Fix yamllint error on chloggen --- .chloggen/811.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.chloggen/811.yaml b/.chloggen/811.yaml index 7159bf209e..dd2304d58b 100644 --- a/.chloggen/811.yaml +++ b/.chloggen/811.yaml @@ -1,4 +1,4 @@ change_type: enhancement component: gen-ai note: Adding metrics for GenAI clients. -issues: [811] \ No newline at end of file +issues: [811] From 9565f2c8c5f5f195920b5f2f8657e5df7703a0b6 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 24 Apr 2024 07:12:34 +0000 Subject: [PATCH 06/31] Update reference to LLM --- docs/gen-ai/llm-metrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gen-ai/llm-metrics.md b/docs/gen-ai/llm-metrics.md index 6da7087b25..e326e1ab1f 100644 --- a/docs/gen-ai/llm-metrics.md +++ b/docs/gen-ai/llm-metrics.md @@ -8,7 +8,7 @@ linkTitle: LLM metrics The conventions described in this section are specific to LLM clients. -**Disclaimer:** These are initial database client metric instruments +**Disclaimer:** These are initial LLM client metric instruments and attributes but more may be added in the future. From b57937401596a52dd903e83f9b4605559fe7c609 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Thu, 25 Apr 2024 01:34:51 +0000 Subject: [PATCH 07/31] Change metric name to match semconv --- docs/gen-ai/llm-metrics.md | 10 +++++----- model/metrics/gen-ai.yaml | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/gen-ai/llm-metrics.md b/docs/gen-ai/llm-metrics.md index e326e1ab1f..43693aacc8 100644 --- a/docs/gen-ai/llm-metrics.md +++ b/docs/gen-ai/llm-metrics.md @@ -16,7 +16,7 @@ and attributes but more may be added in the future. - [LLM Requests](#llm-requests) - - [Metric: `gen_ai.usage.tokens`](#metric-gen_aiusagetokens) + - [Metric: `gen_ai.tokens.usage`](#metric-gen_aitokensusage) - [Metrics: `gen_ai.request.duration`](#metrics-gen_airequestduration) @@ -25,17 +25,17 @@ and attributes but more may be added in the future. The following metric instruments describe LLM request operations. -### Metric: `gen_ai.usage.tokens` +### Metric: `gen_ai.tokens.usage` This metric is [required][MetricRequired]. - + | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | -| `gen_ai.usage.tokens` | Histogram | `tokens` | LLM usage attributes | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `gen_ai.tokens.usage` | Histogram | `tokens` | LLM usage attributes | ![Experimental](https://img.shields.io/badge/-experimental-blue) | - + | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index a13836d478..b1132631e7 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -10,9 +10,9 @@ groups: requirement_level: required - ref: gen_ai.response.model requirement_level: required - - id: metric.gen_ai.usage.tokens + - id: metric.gen_ai.tokens.usage type: metric - metric_name: gen_ai.usage.tokens + metric_name: gen_ai.tokens.usage brief: 'LLM usage attributes' instrument: histogram unit: "tokens" From c9382638912e80d8edd6f8d4de7f678d67052946 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Thu, 25 Apr 2024 01:37:18 +0000 Subject: [PATCH 08/31] Add gen_ai.system --- docs/gen-ai/llm-metrics.md | 14 ++++++++++++++ model/metrics/gen-ai.yaml | 2 ++ 2 files changed, 16 insertions(+) diff --git a/docs/gen-ai/llm-metrics.md b/docs/gen-ai/llm-metrics.md index 43693aacc8..038d787e0b 100644 --- a/docs/gen-ai/llm-metrics.md +++ b/docs/gen-ai/llm-metrics.md @@ -39,6 +39,7 @@ This metric is [required][MetricRequired]. | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.system`](../attributes-registry/llm.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.usage.token_type`](../attributes-registry/llm.md) | string | The type of token being counted. | `prompt`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [2] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | @@ -61,6 +62,12 @@ it's RECOMMENDED to: * Use a domain-specific attribute * Set `error.type` to capture all errors, regardless of whether they are defined within the domain-specific set or not. +`gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + `error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. | Value | Description | Stability | @@ -82,6 +89,7 @@ This metric is [required][MetricRequired]. | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.system`](../attributes-registry/llm.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [2] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | @@ -103,6 +111,12 @@ it's RECOMMENDED to: * Use a domain-specific attribute * Set `error.type` to capture all errors, regardless of whether they are defined within the domain-specific set or not. +`gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + `error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. | Value | Description | Stability | diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index b1132631e7..d4ce3abc2c 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -10,6 +10,8 @@ groups: requirement_level: required - ref: gen_ai.response.model requirement_level: required + - ref: gen_ai.system + requirement_level: required - id: metric.gen_ai.tokens.usage type: metric metric_name: gen_ai.tokens.usage From d5b59dcc51d2170fdf9aaa5488afc8117f504cfe Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Thu, 25 Apr 2024 02:59:52 +0000 Subject: [PATCH 09/31] Updates for review comments --- docs/gen-ai/llm-metrics.md | 50 +++++++++++++++++++++++++++++--------- model/metrics/gen-ai.yaml | 18 +++++++++----- model/registry/gen-ai.yaml | 22 ++++++++++++++++- 3 files changed, 71 insertions(+), 19 deletions(-) diff --git a/docs/gen-ai/llm-metrics.md b/docs/gen-ai/llm-metrics.md index 038d787e0b..bf4164ccc7 100644 --- a/docs/gen-ai/llm-metrics.md +++ b/docs/gen-ai/llm-metrics.md @@ -17,7 +17,7 @@ and attributes but more may be added in the future. - [LLM Requests](#llm-requests) - [Metric: `gen_ai.tokens.usage`](#metric-gen_aitokensusage) - - [Metrics: `gen_ai.request.duration`](#metrics-gen_airequestduration) + - [Metric: `gen_ai.operation.duration`](#metric-gen_aioperationduration) @@ -32,21 +32,30 @@ This metric is [required][MetricRequired]. | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | -| `gen_ai.tokens.usage` | Histogram | `tokens` | LLM usage attributes | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `gen_ai.tokens.usage` | Histogram | `{token}` | LLM usage attributes | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| -| [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.operation.name`](../attributes-registry/llm.md) | string | The name of the operation being performed. | `generate` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](../attributes-registry/llm.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.usage.token_type`](../attributes-registry/llm.md) | string | The type of token being counted. | `prompt`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [2] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.port`](../attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [3] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`gen_ai.request.model`](../attributes-registry/llm.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | **[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. -**[2]:** The `error.type` SHOULD be predictable and SHOULD have low cardinality. +**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. + +**[3]:** The `error.type` SHOULD be predictable, and SHOULD have low cardinality. + +When `error.type` is set to a type (e.g., an exception type), its +canonical class name identifying the type within the artifact SHOULD be used. + Instrumentations SHOULD document the list of errors they report. The cardinality of `error.type` within one instrumentation library SHOULD be low. @@ -68,6 +77,14 @@ it's RECOMMENDED to: |---|---|---| | `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +`gen_ai.usage.token_type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `input` | Input tokens (Embeddings) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `prompt` | Prompt tokens | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `completion` | Completion tokens | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + `error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. | Value | Description | Stability | @@ -75,27 +92,36 @@ it's RECOMMENDED to: | `_OTHER` | A fallback error value to be used when the instrumentation doesn't define a custom value. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -### Metrics: `gen_ai.request.duration` +### Metric: `gen_ai.operation.duration` This metric is [required][MetricRequired]. - + | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | -| `gen_ai.request.duration` | Histogram | `s` | LLM request duration | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `gen_ai.operation.duration` | Histogram | `s` | GenAI operation duration | ![Experimental](https://img.shields.io/badge/-experimental-blue) | - + | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| -| [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.operation.name`](../attributes-registry/llm.md) | string | The name of the operation being performed. | `generate` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](../attributes-registry/llm.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [2] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.port`](../attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [3] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`gen_ai.request.model`](../attributes-registry/llm.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | **[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. -**[2]:** The `error.type` SHOULD be predictable and SHOULD have low cardinality. +**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. + +**[3]:** The `error.type` SHOULD be predictable, and SHOULD have low cardinality. + +When `error.type` is set to a type (e.g., an exception type), its +canonical class name identifying the type within the artifact SHOULD be used. + Instrumentations SHOULD document the list of errors they report. The cardinality of `error.type` within one instrumentation library SHOULD be low. diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index d4ce3abc2c..e7b0c2db7b 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -1,32 +1,38 @@ groups: - id: metric_attributes.gen_ai type: attribute_group - brief: 'LLM attributes' + brief: 'This group describes GenAI metrics attributes' attributes: - ref: error.type requirement_level: conditionally_required: "if the operation ended in an error" - ref: server.address requirement_level: required - - ref: gen_ai.response.model + - ref: server.port requirement_level: required + - ref: gen_ai.response.model + requirement_level: recommended + - ref: gen_ai.request.model + requirement_level: recommended - ref: gen_ai.system requirement_level: required + - ref: gen_ai.operation.name + requirement_level: required - id: metric.gen_ai.tokens.usage type: metric metric_name: gen_ai.tokens.usage brief: 'LLM usage attributes' instrument: histogram - unit: "tokens" + unit: "{token}" stability: experimental extends: metric_attributes.gen_ai attributes: - ref: gen_ai.usage.token_type requirement_level: required - - id: metric.gen_ai.request.duration + - id: metric.gen_ai.operation.duration type: metric - metric_name: gen_ai.request.duration - brief: 'LLM request duration' + metric_name: gen_ai.operation.duration + brief: 'GenAI operation duration' instrument: histogram unit: "s" stability: experimental diff --git a/model/registry/gen-ai.yaml b/model/registry/gen-ai.yaml index 2d5938f1e3..e6f40b6e37 100644 --- a/model/registry/gen-ai.yaml +++ b/model/registry/gen-ai.yaml @@ -77,7 +77,21 @@ groups: tag: llm-generic-response - id: usage.token_type stability: experimental - type: string + type: + allow_custom_values: true + members: + - id: input + stability: experimental + value: "input" + brief: 'Input tokens (Embeddings)' + - id: prompt + stability: experimental + value: "prompt" + brief: 'Prompt tokens' + - id: completion + stability: experimental + value: "completion" + brief: 'Completion tokens' brief: The type of token being counted. examples: ['prompt', 'completion'] tag: llm-generic-metrics @@ -95,3 +109,9 @@ groups: note: It's RECOMMENDED to format completions as JSON string matching [OpenAI messages format](https://platform.openai.com/docs/guides/text-generation) examples: ["[{'role': 'assistant', 'content': 'The capital of France is Paris.'}]"] tag: llm-generic-events + - id: operation.name + stability: experimental + type: string + brief: The name of the operation being performed. + examples: ['generate'] + tag: llm-generic-metrics From 2b942db8ffa61f72c4605103b507e8c4e9c7299b Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Fri, 26 Apr 2024 04:27:05 +0000 Subject: [PATCH 10/31] Rename/scope LLM to Gen AI metrics --- .../{llm-metrics.md => gen-ai-metrics.md} | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) rename docs/gen-ai/{llm-metrics.md => gen-ai-metrics.md} (94%) diff --git a/docs/gen-ai/llm-metrics.md b/docs/gen-ai/gen-ai-metrics.md similarity index 94% rename from docs/gen-ai/llm-metrics.md rename to docs/gen-ai/gen-ai-metrics.md index bf4164ccc7..94f512f5a8 100644 --- a/docs/gen-ai/llm-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -1,33 +1,37 @@ -# Semantic Conventions for LLM Metrics +# Semantic Conventions for Generative AI Metrics **Status**: [Experimental][DocumentStatus] -The conventions described in this section are specific to LLM clients. +The conventions described in this section are specific to Generative AI +applications. -**Disclaimer:** These are initial LLM client metric instruments +**Disclaimer:** These are initial Generative AI client metric instruments and attributes but more may be added in the future. -- [LLM Requests](#llm-requests) +- [Generative AI Operations](#generative-ai-operations) - [Metric: `gen_ai.tokens.usage`](#metric-gen_aitokensusage) - [Metric: `gen_ai.operation.duration`](#metric-gen_aioperationduration) -## LLM Requests +## Generative AI Operations -The following metric instruments describe LLM request operations. +The following metric instruments describe Generative AI operations. An +operation may be a request to an LLM, a function call, or some other +distinct action within a larger Generative AI workflow. ### Metric: `gen_ai.tokens.usage` -This metric is [required][MetricRequired]. +This metric is [required][MetricRequired] when an operation involves the usage +of tokens. | Name | Instrument Type | Unit (UCUM) | Description | Stability | From 53351757aecb3dacc45ae711441b1880488857b9 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Fri, 26 Apr 2024 04:28:39 +0000 Subject: [PATCH 11/31] Remove trailing spaces --- docs/gen-ai/gen-ai-metrics.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index 94f512f5a8..cb94df0e55 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -6,7 +6,7 @@ linkTitle: Generative AI metrics **Status**: [Experimental][DocumentStatus] -The conventions described in this section are specific to Generative AI +The conventions described in this section are specific to Generative AI applications. **Disclaimer:** These are initial Generative AI client metric instruments @@ -30,7 +30,7 @@ distinct action within a larger Generative AI workflow. ### Metric: `gen_ai.tokens.usage` -This metric is [required][MetricRequired] when an operation involves the usage +This metric is [required][MetricRequired] when an operation involves the usage of tokens. From 4f415b3cdd37903c9ccfc1a14f485448a662a132 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Tue, 30 Apr 2024 10:03:44 +0900 Subject: [PATCH 12/31] Update operation examples. --- model/registry/gen-ai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/registry/gen-ai.yaml b/model/registry/gen-ai.yaml index e6f40b6e37..a66fb2d80a 100644 --- a/model/registry/gen-ai.yaml +++ b/model/registry/gen-ai.yaml @@ -113,5 +113,5 @@ groups: stability: experimental type: string brief: The name of the operation being performed. - examples: ['generate'] + examples: ['chat', 'completion'] tag: llm-generic-metrics From f3e6586cfc26e934723d141c2e45be9cb451973a Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Tue, 30 Apr 2024 01:13:51 +0000 Subject: [PATCH 13/31] Replace pluralized tokens with token --- docs/gen-ai/gen-ai-metrics.md | 14 +++++++------- model/metrics/gen-ai.yaml | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index cb94df0e55..3338fd47ec 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -17,7 +17,7 @@ and attributes but more may be added in the future. - [Generative AI Operations](#generative-ai-operations) - - [Metric: `gen_ai.tokens.usage`](#metric-gen_aitokensusage) + - [Metric: `gen_ai.token.usage`](#metric-gen_aitokenusage) - [Metric: `gen_ai.operation.duration`](#metric-gen_aioperationduration) @@ -28,21 +28,21 @@ The following metric instruments describe Generative AI operations. An operation may be a request to an LLM, a function call, or some other distinct action within a larger Generative AI workflow. -### Metric: `gen_ai.tokens.usage` +### Metric: `gen_ai.token.usage` This metric is [required][MetricRequired] when an operation involves the usage of tokens. - + | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | -| `gen_ai.tokens.usage` | Histogram | `{token}` | LLM usage attributes | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `gen_ai.token.usage` | Histogram | `{token}` | Measures number of input and output tokens used | ![Experimental](https://img.shields.io/badge/-experimental-blue) | - + | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| -| [`gen_ai.operation.name`](../attributes-registry/llm.md) | string | The name of the operation being performed. | `generate` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.operation.name`](../attributes-registry/llm.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](../attributes-registry/llm.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.usage.token_type`](../attributes-registry/llm.md) | string | The type of token being counted. | `prompt`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | @@ -109,7 +109,7 @@ This metric is [required][MetricRequired]. | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| -| [`gen_ai.operation.name`](../attributes-registry/llm.md) | string | The name of the operation being performed. | `generate` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.operation.name`](../attributes-registry/llm.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](../attributes-registry/llm.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`server.port`](../attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index e7b0c2db7b..aba571b433 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -18,10 +18,10 @@ groups: requirement_level: required - ref: gen_ai.operation.name requirement_level: required - - id: metric.gen_ai.tokens.usage + - id: metric.gen_ai.token.usage type: metric - metric_name: gen_ai.tokens.usage - brief: 'LLM usage attributes' + metric_name: gen_ai.token.usage + brief: 'Measures number of input and output tokens used' instrument: histogram unit: "{token}" stability: experimental From fd01e6538d9cb0518cd5143a9931f2d23dd3e9b2 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Sun, 5 May 2024 01:56:52 +0000 Subject: [PATCH 14/31] Update table of contents --- docs/gen-ai/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/gen-ai/README.md b/docs/gen-ai/README.md index 1d24209db4..17d5ef099d 100644 --- a/docs/gen-ai/README.md +++ b/docs/gen-ai/README.md @@ -18,9 +18,12 @@ This document defines semantic conventions for the following kind of Generative * LLMs +Semantic conventions for Generative AI operations are defined for the following signals: + +* [Metrics](gen-ai-metrics.md): Semantic Conventions for Generative AI operations - *metrics*. + Semantic conventions for LLM operations are defined for the following signals: * [LLM Spans](llm-spans.md): Semantic Conventions for LLM requests - *spans*. -* [LLM Metrics](llm-metrics.md): Semantic Conventions for LLM requests - *metrics*. [DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.26.0/specification/document-status.md From b1ccbd6c5aaba0d0687346b4ed4db9f6aa5891a9 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Sun, 5 May 2024 02:09:55 +0000 Subject: [PATCH 15/31] Update token type --- docs/gen-ai/gen-ai-metrics.md | 37 +++++++++++++++++------------------ model/metrics/gen-ai.yaml | 2 +- model/registry/gen-ai.yaml | 16 ++++++--------- 3 files changed, 25 insertions(+), 30 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index 3338fd47ec..a265d38f28 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -42,14 +42,14 @@ of tokens. | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| -| [`gen_ai.operation.name`](../attributes-registry/llm.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`gen_ai.system`](../attributes-registry/llm.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`gen_ai.usage.token_type`](../attributes-registry/llm.md) | string | The type of token being counted. | `prompt`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`server.port`](../attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [3] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`gen_ai.request.model`](../attributes-registry/llm.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.token.type`](/docs/attributes-registry/gen-ai.md) | string | The type of token being counted. | `input`; `output` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [3] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | **[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. @@ -81,13 +81,12 @@ it's RECOMMENDED to: |---|---|---| | `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -`gen_ai.usage.token_type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. +`gen_ai.token.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. | Value | Description | Stability | |---|---|---| -| `input` | Input tokens (Embeddings) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| `prompt` | Prompt tokens | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| `completion` | Completion tokens | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `input` | Input tokens (prompt, input, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `output` | Output tokens (completion, response, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | `error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. @@ -109,13 +108,13 @@ This metric is [required][MetricRequired]. | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| -| [`gen_ai.operation.name`](../attributes-registry/llm.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`gen_ai.system`](../attributes-registry/llm.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`server.address`](../attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`server.port`](../attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`error.type`](../attributes-registry/error.md) | string | Describes a class of error the operation ended with. [3] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`gen_ai.request.model`](../attributes-registry/llm.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`gen_ai.response.model`](../attributes-registry/llm.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [3] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | **[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index aba571b433..35c0024ebd 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -27,7 +27,7 @@ groups: stability: experimental extends: metric_attributes.gen_ai attributes: - - ref: gen_ai.usage.token_type + - ref: gen_ai.token.type requirement_level: required - id: metric.gen_ai.operation.duration type: metric diff --git a/model/registry/gen-ai.yaml b/model/registry/gen-ai.yaml index a66fb2d80a..0f9f3fc505 100644 --- a/model/registry/gen-ai.yaml +++ b/model/registry/gen-ai.yaml @@ -75,25 +75,21 @@ groups: brief: The number of tokens used in the LLM response (completion). examples: [180] tag: llm-generic-response - - id: usage.token_type + - id: token.type stability: experimental type: - allow_custom_values: true + allow_custom_values: false members: - id: input stability: experimental value: "input" - brief: 'Input tokens (Embeddings)' - - id: prompt - stability: experimental - value: "prompt" - brief: 'Prompt tokens' + brief: 'Input tokens (prompt, input, etc.)' - id: completion stability: experimental - value: "completion" - brief: 'Completion tokens' + value: "output" + brief: 'Output tokens (completion, response, etc.)' brief: The type of token being counted. - examples: ['prompt', 'completion'] + examples: ['input', 'output'] tag: llm-generic-metrics - id: prompt stability: experimental From de89866dfc18c628e540ad3f218ff253ddd4a2bb Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Tue, 7 May 2024 02:43:43 +0000 Subject: [PATCH 16/31] Update requirement levels --- docs/gen-ai/gen-ai-metrics.md | 14 ++++++-------- model/metrics/gen-ai.yaml | 7 ++++--- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index a265d38f28..d79b6c5cd3 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -43,13 +43,12 @@ of tokens. | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.token.type`](/docs/attributes-registry/gen-ai.md) | string | The type of token being counted. | `input`; `output` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [3] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [3] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | **[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. @@ -109,12 +108,11 @@ This metric is [required][MetricRequired]. | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [1] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Required` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [3] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [3] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | **[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index 35c0024ebd..6a2afb927b 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -7,13 +7,14 @@ groups: requirement_level: conditionally_required: "if the operation ended in an error" - ref: server.address - requirement_level: required + requirement_level: recommended - ref: server.port - requirement_level: required + requirement_level: + conditionally_required: If `sever.address` is set. - ref: gen_ai.response.model requirement_level: recommended - ref: gen_ai.request.model - requirement_level: recommended + requirement_level: required - ref: gen_ai.system requirement_level: required - ref: gen_ai.operation.name From cfd8e861a8f0a74b3c785dbdf77e34699993226b Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Tue, 7 May 2024 02:44:11 +0000 Subject: [PATCH 17/31] Override error.type note --- docs/gen-ai/gen-ai-metrics.md | 50 +++++++++-------------------------- model/metrics/gen-ai.yaml | 6 +++++ 2 files changed, 18 insertions(+), 38 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index d79b6c5cd3..05b239eeb6 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -46,33 +46,20 @@ of tokens. | [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.token.type`](/docs/attributes-registry/gen-ai.md) | string | The type of token being counted. | `input`; `output` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [1] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [3] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -**[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. +**[1]:** The cardinality of `error.type` SHOULD be low. -**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. - -**[3]:** The `error.type` SHOULD be predictable, and SHOULD have low cardinality. - -When `error.type` is set to a type (e.g., an exception type), its -canonical class name identifying the type within the artifact SHOULD be used. +When working across multiple models, it is RECOMMENDED to use a common set of error types. -Instrumentations SHOULD document the list of errors they report. +Additional details may be captured in domain-specific attributes. -The cardinality of `error.type` within one instrumentation library SHOULD be low. -Telemetry consumers that aggregate data from multiple instrumentation libraries and applications -should be prepared for `error.type` to have high cardinality at query time when no -additional filters are applied. - -If the operation has completed successfully, instrumentations SHOULD NOT set `error.type`. - -If a specific domain defines its own set of error identifiers (such as HTTP or gRPC status codes), -it's RECOMMENDED to: +**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. -* Use a domain-specific attribute -* Set `error.type` to capture all errors, regardless of whether they are defined within the domain-specific set or not. +**[3]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. `gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. @@ -110,33 +97,20 @@ This metric is [required][MetricRequired]. | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [1] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [3] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -**[1]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. +**[1]:** The cardinality of `error.type` SHOULD be low. -**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. - -**[3]:** The `error.type` SHOULD be predictable, and SHOULD have low cardinality. - -When `error.type` is set to a type (e.g., an exception type), its -canonical class name identifying the type within the artifact SHOULD be used. +When working across multiple models, it is RECOMMENDED to use a common set of error types. -Instrumentations SHOULD document the list of errors they report. +Additional details may be captured in domain-specific attributes. -The cardinality of `error.type` within one instrumentation library SHOULD be low. -Telemetry consumers that aggregate data from multiple instrumentation libraries and applications -should be prepared for `error.type` to have high cardinality at query time when no -additional filters are applied. - -If the operation has completed successfully, instrumentations SHOULD NOT set `error.type`. - -If a specific domain defines its own set of error identifiers (such as HTTP or gRPC status codes), -it's RECOMMENDED to: +**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. -* Use a domain-specific attribute -* Set `error.type` to capture all errors, regardless of whether they are defined within the domain-specific set or not. +**[3]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. `gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index 6a2afb927b..c636160911 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -6,6 +6,12 @@ groups: - ref: error.type requirement_level: conditionally_required: "if the operation ended in an error" + note: | + The cardinality of `error.type` SHOULD be low. + + When working across multiple models, it is RECOMMENDED to use a common set of error types. + + Additional details may be captured in domain-specific attributes. - ref: server.address requirement_level: recommended - ref: server.port From 9ac4406b7660766c33e007910dbd7337f1e19d3f Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Tue, 7 May 2024 02:45:13 +0000 Subject: [PATCH 18/31] Allow custom values to true --- model/registry/gen-ai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/registry/gen-ai.yaml b/model/registry/gen-ai.yaml index 0f9f3fc505..efdcff1381 100644 --- a/model/registry/gen-ai.yaml +++ b/model/registry/gen-ai.yaml @@ -78,7 +78,7 @@ groups: - id: token.type stability: experimental type: - allow_custom_values: false + allow_custom_values: true members: - id: input stability: experimental From b2828f82da8205fd85174bb38d6c89a30fbc11e7 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Tue, 7 May 2024 03:05:11 +0000 Subject: [PATCH 19/31] Add ExplicitBucketBoundaries --- docs/gen-ai/gen-ai-metrics.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index 05b239eeb6..e6b25be2b4 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -30,8 +30,8 @@ distinct action within a larger Generative AI workflow. ### Metric: `gen_ai.token.usage` -This metric is [required][MetricRequired] when an operation involves the usage -of tokens. + +This metric SHOULD be specified with [ExplicitBucketBoundaries] of [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]. | Name | Instrument Type | Unit (UCUM) | Description | Stability | @@ -85,6 +85,8 @@ Additional details may be captured in domain-specific attributes. This metric is [required][MetricRequired]. +This metric SHOULD be specified with [ExplicitBucketBoundaries] of [ 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12,10.24, 20.48, 40.96, 81.92]. + | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | @@ -127,3 +129,4 @@ Additional details may be captured in domain-specific attributes. [DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.22.0/specification/document-status.md [MetricRequired]: /docs/general/metric-requirement-level.md#required +[ExplicitBucketBoundaries]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.31.0/specification/metrics/api.md#instrument-advisory-parameters From 979f7323777171a5d9ad27c9cf8cdc068628f0f9 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Tue, 7 May 2024 03:05:28 +0000 Subject: [PATCH 20/31] Make token metric recommended --- docs/gen-ai/gen-ai-metrics.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index e6b25be2b4..b26d21f7fb 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -30,6 +30,12 @@ distinct action within a larger Generative AI workflow. ### Metric: `gen_ai.token.usage` +This metric is [required][MetricRecommended] when an operation involves the usage +of tokens and the count is readily available. + +For example, if GenAI system returns usage information in the streaming response, it SHOULD be used. Or if GenAI system returns each token independently, instrumentation SHOULD count number of output tokens and record the result. + +If instrumentation cannot efficiently obtain number of input and/or output tokens, it MAY allow users to enable offline token counting. Otherwise it MUST NOT report usage metric. This metric SHOULD be specified with [ExplicitBucketBoundaries] of [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]. @@ -129,4 +135,5 @@ Additional details may be captured in domain-specific attributes. [DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.22.0/specification/document-status.md [MetricRequired]: /docs/general/metric-requirement-level.md#required +[MetricRecommended]: /docs/general/metric-requirement-level.md#recommended [ExplicitBucketBoundaries]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.31.0/specification/metrics/api.md#instrument-advisory-parameters From 84d78eb4b184e79bfe24920062186e09a79b5fed Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Tue, 7 May 2024 03:07:02 +0000 Subject: [PATCH 21/31] Remove trailing space --- model/metrics/gen-ai.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index c636160911..c6b992b2e7 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -15,8 +15,8 @@ groups: - ref: server.address requirement_level: recommended - ref: server.port - requirement_level: - conditionally_required: If `sever.address` is set. + requirement_level: + conditionally_required: If `sever.address` is set. - ref: gen_ai.response.model requirement_level: recommended - ref: gen_ai.request.model From 72cc2c91f4a1f386da1df55d5de1a8c2847440b2 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 8 May 2024 08:00:34 +0000 Subject: [PATCH 22/31] Fix recommended label. --- docs/gen-ai/gen-ai-metrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index b26d21f7fb..2271455cd9 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -30,7 +30,7 @@ distinct action within a larger Generative AI workflow. ### Metric: `gen_ai.token.usage` -This metric is [required][MetricRecommended] when an operation involves the usage +This metric is [recommended][MetricRecommended] when an operation involves the usage of tokens and the count is readily available. For example, if GenAI system returns usage information in the streaming response, it SHOULD be used. Or if GenAI system returns each token independently, instrumentation SHOULD count number of output tokens and record the result. From 04c6fb3c0fab8aad235ed25a86ce5f399f179fd0 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 8 May 2024 08:00:54 +0000 Subject: [PATCH 23/31] Update metrics to be for 'client' --- docs/gen-ai/gen-ai-metrics.md | 26 +++++++++++++------------- model/metrics/gen-ai.yaml | 8 ++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index 2271455cd9..9f8743c0b7 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -6,7 +6,7 @@ linkTitle: Generative AI metrics **Status**: [Experimental][DocumentStatus] -The conventions described in this section are specific to Generative AI +The conventions described in this section are specific to Generative AI client applications. **Disclaimer:** These are initial Generative AI client metric instruments @@ -16,19 +16,19 @@ and attributes but more may be added in the future. -- [Generative AI Operations](#generative-ai-operations) - - [Metric: `gen_ai.token.usage`](#metric-gen_aitokenusage) - - [Metric: `gen_ai.operation.duration`](#metric-gen_aioperationduration) +- [Generative AI Client Metrics](#generative-ai-client-metrics) + - [Metric: `gen_ai.client.token.usage`](#metric-gen_aiclienttokenusage) + - [Metric: `gen_ai.client.operation.duration`](#metric-gen_aiclientoperationduration) -## Generative AI Operations +## Generative AI Client Metrics The following metric instruments describe Generative AI operations. An operation may be a request to an LLM, a function call, or some other distinct action within a larger Generative AI workflow. -### Metric: `gen_ai.token.usage` +### Metric: `gen_ai.client.token.usage` This metric is [recommended][MetricRecommended] when an operation involves the usage of tokens and the count is readily available. @@ -39,13 +39,13 @@ If instrumentation cannot efficiently obtain number of input and/or output token This metric SHOULD be specified with [ExplicitBucketBoundaries] of [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]. - + | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | -| `gen_ai.token.usage` | Histogram | `{token}` | Measures number of input and output tokens used | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `gen_ai.client.token.usage` | Histogram | `{token}` | Measures number of input and output tokens used | ![Experimental](https://img.shields.io/badge/-experimental-blue) | - + | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | @@ -87,19 +87,19 @@ Additional details may be captured in domain-specific attributes. | `_OTHER` | A fallback error value to be used when the instrumentation doesn't define a custom value. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -### Metric: `gen_ai.operation.duration` +### Metric: `gen_ai.client.operation.duration` This metric is [required][MetricRequired]. This metric SHOULD be specified with [ExplicitBucketBoundaries] of [ 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12,10.24, 20.48, 40.96, 81.92]. - + | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | -| `gen_ai.operation.duration` | Histogram | `s` | GenAI operation duration | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `gen_ai.client.operation.duration` | Histogram | `s` | GenAI operation duration | ![Experimental](https://img.shields.io/badge/-experimental-blue) | - + | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index c6b992b2e7..3534da63e6 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -25,9 +25,9 @@ groups: requirement_level: required - ref: gen_ai.operation.name requirement_level: required - - id: metric.gen_ai.token.usage + - id: metric.gen_ai.client.token.usage type: metric - metric_name: gen_ai.token.usage + metric_name: gen_ai.client.token.usage brief: 'Measures number of input and output tokens used' instrument: histogram unit: "{token}" @@ -36,9 +36,9 @@ groups: attributes: - ref: gen_ai.token.type requirement_level: required - - id: metric.gen_ai.operation.duration + - id: metric.gen_ai.client.operation.duration type: metric - metric_name: gen_ai.operation.duration + metric_name: gen_ai.client.operation.duration brief: 'GenAI operation duration' instrument: histogram unit: "s" From 7db4c527ad9692a76f89afabaea06df600cd25ca Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 8 May 2024 08:02:47 +0000 Subject: [PATCH 24/31] Update title --- docs/gen-ai/gen-ai-metrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index 9f8743c0b7..c18d7341d0 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -2,7 +2,7 @@ linkTitle: Generative AI metrics ---> -# Semantic Conventions for Generative AI Metrics +# Semantic Conventions for Generative AI Client Metrics **Status**: [Experimental][DocumentStatus] From d9ab4d8c2b401a85278fa8e2db858c949eab75a3 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 8 May 2024 08:08:33 +0000 Subject: [PATCH 25/31] Update registry table --- docs/attributes-registry/gen-ai.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/attributes-registry/gen-ai.md b/docs/attributes-registry/gen-ai.md index a58d9c3989..0bde9ff5fd 100644 --- a/docs/attributes-registry/gen-ai.md +++ b/docs/attributes-registry/gen-ai.md @@ -13,6 +13,7 @@ This document defines the attributes used to describe telemetry in the context o | Attribute | Type | Description | Examples | Stability | | -------------------------------- | -------- | ------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------- | ---------------------------------------------------------------- | | `gen_ai.completion` | string | The full response received from the LLM. [1] | `[{'role': 'assistant', 'content': 'The capital of France is Paris.'}]` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `gen_ai.operation.name` | string | The name of the operation being performed. | `chat`; `completion` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `gen_ai.prompt` | string | The full prompt sent to an LLM. [2] | `[{'role': 'user', 'content': 'What is the capital of France?'}]` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `gen_ai.request.max_tokens` | int | The maximum number of tokens the LLM generates for a request. | `100` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `gen_ai.request.model` | string | The name of the LLM a request is being made to. | `gpt-4` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | @@ -22,6 +23,7 @@ This document defines the attributes used to describe telemetry in the context o | `gen_ai.response.id` | string | The unique identifier for the completion. | `chatcmpl-123` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `gen_ai.response.model` | string | The name of the LLM a response was generated from. | `gpt-4-0613` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `gen_ai.system` | string | The Generative AI product as identified by the client instrumentation. [3] | `openai` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `gen_ai.token.type` | string | The type of token being counted. | `input`; `output` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `gen_ai.usage.completion_tokens` | int | The number of tokens used in the LLM response (completion). | `180` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `gen_ai.usage.prompt_tokens` | int | The number of tokens used in the LLM prompt. | `100` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | @@ -36,3 +38,10 @@ This document defines the attributes used to describe telemetry in the context o | Value | Description | Stability | | -------- | ----------- | ---------------------------------------------------------------- | | `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + +`gen_ai.token.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +| -------- | ------------------------------------------ | ---------------------------------------------------------------- | +| `input` | Input tokens (prompt, input, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| `output` | Output tokens (completion, response, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | From 58b10b309f148480e586d53eb687687fe5204ec4 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 15 May 2024 08:31:17 +0000 Subject: [PATCH 26/31] Move error.type from common to duration metric. --- docs/gen-ai/gen-ai-metrics.md | 21 ++++----------------- model/metrics/gen-ai.yaml | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index c18d7341d0..19a1f122a9 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -52,20 +52,13 @@ This metric SHOULD be specified with [ExplicitBucketBoundaries] of [1, 4, 16, 64 | [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.token.type`](/docs/attributes-registry/gen-ai.md) | string | The type of token being counted. | `input`; `output` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [1] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [1] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [3] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [2] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -**[1]:** The cardinality of `error.type` SHOULD be low. +**[1]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. -When working across multiple models, it is RECOMMENDED to use a common set of error types. - -Additional details may be captured in domain-specific attributes. - -**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. - -**[3]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. +**[2]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. `gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. @@ -79,12 +72,6 @@ Additional details may be captured in domain-specific attributes. |---|---|---| | `input` | Input tokens (prompt, input, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `output` | Output tokens (completion, response, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | - -`error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. - -| Value | Description | Stability | -|---|---|---| -| `_OTHER` | A fallback error value to be used when the instrumentation doesn't define a custom value. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | ### Metric: `gen_ai.client.operation.duration` diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index 3534da63e6..2af68d3d6a 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -3,15 +3,6 @@ groups: type: attribute_group brief: 'This group describes GenAI metrics attributes' attributes: - - ref: error.type - requirement_level: - conditionally_required: "if the operation ended in an error" - note: | - The cardinality of `error.type` SHOULD be low. - - When working across multiple models, it is RECOMMENDED to use a common set of error types. - - Additional details may be captured in domain-specific attributes. - ref: server.address requirement_level: recommended - ref: server.port @@ -44,3 +35,13 @@ groups: unit: "s" stability: experimental extends: metric_attributes.gen_ai + attributes: + - ref: error.type + requirement_level: + conditionally_required: "if the operation ended in an error" + note: | + The cardinality of `error.type` SHOULD be low. + + When working across multiple models, it is RECOMMENDED to use a common set of error types. + + Additional details may be captured in domain-specific attributes. From b361e4a3953d7f133bf3b4c7c21373edfd1d02d9 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 15 May 2024 08:40:04 +0000 Subject: [PATCH 27/31] Add clarifation on used vs billed tokens. --- docs/gen-ai/gen-ai-metrics.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index 19a1f122a9..8c104e0b74 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -37,6 +37,8 @@ For example, if GenAI system returns usage information in the streaming response If instrumentation cannot efficiently obtain number of input and/or output tokens, it MAY allow users to enable offline token counting. Otherwise it MUST NOT report usage metric. +When systems report both used tokens and billable tokens, instrumentation MUST report billable tokens. + This metric SHOULD be specified with [ExplicitBucketBoundaries] of [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]. From aa02859ca7832cff1c0a111523afe1800e1c6b76 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Wed, 22 May 2024 08:30:25 +0000 Subject: [PATCH 28/31] Regenerate tables --- docs/gen-ai/gen-ai-metrics.md | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index 8c104e0b74..06b2ef3fc0 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -52,15 +52,17 @@ This metric SHOULD be specified with [ExplicitBucketBoundaries] of [1, 4, 16, 64 |---|---|---|---|---|---| | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The Generative AI product as identified by the client instrumentation. [1] | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.token.type`](/docs/attributes-registry/gen-ai.md) | string | The type of token being counted. | `input`; `output` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [1] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [2] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [3] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -**[1]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. +**[1]:** The actual GenAI product may differ from the one identified by the client. For example, when using OpenAI client libraries to communicate with Mistral, the `gen_ai.system` is set to `openai` based on the instrumentation's best knowledge. -**[2]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. +**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. + +**[3]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. `gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. @@ -93,21 +95,23 @@ This metric SHOULD be specified with [ExplicitBucketBoundaries] of [ 0.01, 0.02, |---|---|---|---|---|---| | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a request is being made to. | `gpt-4` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM foundation model vendor. | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [1] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | -| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [2] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The Generative AI product as identified by the client instrumentation. [1] | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | +| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [2] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.port`](/docs/attributes-registry/server.md) | int | Server port number. [3] | `80`; `8080`; `443` | `Conditionally Required` If `sever.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | | [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the LLM a response was generated from. | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | -| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [3] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | +| [`server.address`](/docs/attributes-registry/server.md) | string | Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name. [4] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | + +**[1]:** The actual GenAI product may differ from the one identified by the client. For example, when using OpenAI client libraries to communicate with Mistral, the `gen_ai.system` is set to `openai` based on the instrumentation's best knowledge. -**[1]:** The cardinality of `error.type` SHOULD be low. +**[2]:** The cardinality of `error.type` SHOULD be low. When working across multiple models, it is RECOMMENDED to use a common set of error types. Additional details may be captured in domain-specific attributes. -**[2]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. +**[3]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. -**[3]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. +**[4]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. `gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. From b35181147bfdbaf89779be84b26758a830c99b5b Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Thu, 23 May 2024 01:22:19 +0000 Subject: [PATCH 29/31] Regenerate tables --- docs/gen-ai/gen-ai-metrics.md | 60 ++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index 06b2ef3fc0..e6f1e826ba 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -42,12 +42,29 @@ When systems report both used tokens and billable tokens, instrumentation MUST r This metric SHOULD be specified with [ExplicitBucketBoundaries] of [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]. + + + + + + | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | | `gen_ai.client.token.usage` | Histogram | `{token}` | Measures number of input and output tokens used | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + + + + + + + + + + | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | @@ -64,18 +81,27 @@ This metric SHOULD be specified with [ExplicitBucketBoundaries] of [1, 4, 16, 64 **[3]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. + + `gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. | Value | Description | Stability | |---|---|---| | `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + `gen_ai.token.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. | Value | Description | Stability | |---|---|---| | `input` | Input tokens (prompt, input, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | | `output` | Output tokens (completion, response, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + + + + + ### Metric: `gen_ai.client.operation.duration` @@ -85,12 +111,29 @@ This metric is [required][MetricRequired]. This metric SHOULD be specified with [ExplicitBucketBoundaries] of [ 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12,10.24, 20.48, 40.96, 81.92]. + + + + + + | Name | Instrument Type | Unit (UCUM) | Description | Stability | | -------- | --------------- | ----------- | -------------- | --------- | | `gen_ai.client.operation.duration` | Histogram | `s` | GenAI operation duration | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + + + + + + + + + + | Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | |---|---|---|---|---|---| | [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. | `chat`; `completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) | @@ -113,17 +156,26 @@ Additional details may be captured in domain-specific attributes. **[4]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available. -`gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. -| Value | Description | Stability | -|---|---|---| -| `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) | `error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. | Value | Description | Stability | |---|---|---| | `_OTHER` | A fallback error value to be used when the instrumentation doesn't define a custom value. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) | + + +`gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + + + + + [DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.22.0/specification/document-status.md From 2cd0b9032589ed169fa9ea0ff26ebb6356564a99 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Sat, 25 May 2024 00:06:33 +0000 Subject: [PATCH 30/31] Remove unnecessary elements --- model/registry/gen-ai.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/model/registry/gen-ai.yaml b/model/registry/gen-ai.yaml index efdcff1381..47ec27bf60 100644 --- a/model/registry/gen-ai.yaml +++ b/model/registry/gen-ai.yaml @@ -78,7 +78,6 @@ groups: - id: token.type stability: experimental type: - allow_custom_values: true members: - id: input stability: experimental @@ -90,7 +89,6 @@ groups: brief: 'Output tokens (completion, response, etc.)' brief: The type of token being counted. examples: ['input', 'output'] - tag: llm-generic-metrics - id: prompt stability: experimental type: string @@ -110,4 +108,3 @@ groups: type: string brief: The name of the operation being performed. examples: ['chat', 'completion'] - tag: llm-generic-metrics From fa99804988cc5106c637d78244de66074c82ae45 Mon Sep 17 00:00:00 2001 From: Drew Robbins Date: Sat, 25 May 2024 00:09:15 +0000 Subject: [PATCH 31/31] Update description for error.type --- docs/gen-ai/gen-ai-metrics.md | 8 +++----- model/metrics/gen-ai.yaml | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/docs/gen-ai/gen-ai-metrics.md b/docs/gen-ai/gen-ai-metrics.md index e6f1e826ba..e68f0bf183 100644 --- a/docs/gen-ai/gen-ai-metrics.md +++ b/docs/gen-ai/gen-ai-metrics.md @@ -146,11 +146,9 @@ This metric SHOULD be specified with [ExplicitBucketBoundaries] of [ 0.01, 0.02, **[1]:** The actual GenAI product may differ from the one identified by the client. For example, when using OpenAI client libraries to communicate with Mistral, the `gen_ai.system` is set to `openai` based on the instrumentation's best knowledge. -**[2]:** The cardinality of `error.type` SHOULD be low. - -When working across multiple models, it is RECOMMENDED to use a common set of error types. - -Additional details may be captured in domain-specific attributes. +**[2]:** The `error.type` SHOULD match the error code returned by the Generative AI provider or the client library, +the canonical name of exception that occurred, or another low-cardinality error identifier. +Instrumentations SHOULD document the list of errors they report. **[3]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available. diff --git a/model/metrics/gen-ai.yaml b/model/metrics/gen-ai.yaml index 2af68d3d6a..8398e8f0c6 100644 --- a/model/metrics/gen-ai.yaml +++ b/model/metrics/gen-ai.yaml @@ -40,8 +40,6 @@ groups: requirement_level: conditionally_required: "if the operation ended in an error" note: | - The cardinality of `error.type` SHOULD be low. - - When working across multiple models, it is RECOMMENDED to use a common set of error types. - - Additional details may be captured in domain-specific attributes. + The `error.type` SHOULD match the error code returned by the Generative AI provider or the client library, + the canonical name of exception that occurred, or another low-cardinality error identifier. + Instrumentations SHOULD document the list of errors they report.