feat(semcov): llm system and llm provicer conventions (#1071)

Arize-ai · Oct 18, 2024 · d200d85 · d200d85
1 parent 67da961
commit d200d85
Show file tree

Hide file tree

Showing 5 changed files with 100 additions and 12 deletions.
diff --git a/cspell.json b/cspell.json
@@ -10,9 +10,11 @@
         "dspy",
         "instrumentator",
         "Instrumentor",
+        "instrumentors",
         "langchain",
         "litellm",
         "llms",
+        "mistralai",
         "nextjs",
         "openinference",
         "otel",
@@ -21,9 +23,8 @@
         "Streamlit",
         "uninstrument",
         "unsuppress",
+        "vertexai",
         "weaviate"
     ],
-    "flagWords": [
-        "hte"
-    ]
+    "flagWords": ["hte"]
 }
diff --git a/js/.changeset/wild-maps-notice.md b/js/.changeset/wild-maps-notice.md
@@ -0,0 +1,5 @@
+---
+"@arizeai/openinference-semantic-conventions": minor
+---
+
+Add semantic conventions for llm.system and llm.provider
diff --git a/js/packages/openinference-semantic-conventions/src/trace/SemanticConventions.ts b/js/packages/openinference-semantic-conventions/src/trace/SemanticConventions.ts
@@ -159,6 +159,17 @@ export const LLM_OUTPUT_MESSAGES =
 export const LLM_MODEL_NAME =
   `${SemanticAttributePrefixes.llm}.${LLMAttributePostfixes.model_name}` as const;
 
+/**
+ * The provider of the inferences. E.g. the cloud provider
+ */
+export const LLM_PROVIDER =
+  `${SemanticAttributePrefixes.llm}.provider` as const;
+
+/**
+ * The AI product as identified by the client or server
+ */
+export const LLM_SYSTEM = `${SemanticAttributePrefixes.llm}.system` as const;
+
 /** Token count for the completion by the llm */
 export const LLM_TOKEN_COUNT_COMPLETION =
   `${SemanticAttributePrefixes.llm}.${LLMAttributePostfixes.token_count}.completion` as const;
@@ -471,3 +482,23 @@ export enum MimeType {
   TEXT = "text/plain",
   JSON = "application/json",
 }
+
+export enum LLMSystem {
+  OPENAI = "openai",
+  ANTHROPIC = "anthropic",
+  MISTRALAI = "mistralai",
+  COHERE = "cohere",
+  VERTEXAI = "vertexai",
+}
+
+export enum LLMProvider {
+  OPENAI = "openai",
+  ANTHROPIC = "anthropic",
+  MISTRALAI = "mistralai",
+  COHERE = "cohere",
+  VERTEXAI = "vertexai",
+  // Cloud Providers of LLM systems
+  GCP = "gcp",
+  AWS = "aws",
+  AZURE = "azure",
+}
diff --git a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py
@@ -45,6 +45,14 @@ class SpanAttributes:
     """
     The name of the model being used.
     """
+    LLM_PROVIDER = "llm.provider"
+    """
+    The provider of the model, such as OpenAI, Azure, Google, etc.
+    """
+    LLM_SYSTEM = "llm.system"
+    """
+    The AI product as identified by the client or server
+    """
     LLM_PROMPTS = "llm.prompts"
     """
     Prompts provided to a completions API.
@@ -301,3 +309,21 @@ class OpenInferenceSpanKindValues(Enum):
 class OpenInferenceMimeTypeValues(Enum):
     TEXT = "text/plain"
     JSON = "application/json"
+
+
+class OpenInferenceLLMSystemValues(Enum):
+    OPENAI = "openai"
+    ANTHROPIC = "anthropic"
+    COHERE = "cohere"
+    MISTRALAI = "mistralai"
+    VERTEXAI = "vertexai"
+
+
+class OpenInferenceLLMProviderValues(Enum):
+    OPENAI = "openai"
+    ANTHROPIC = "anthropic"
+    COHERE = "cohere"
+    MISTRALAI = "mistralai"
+    GOOGLE = "google"
+    AZURE = "azure"
+    AWS = "aws"
diff --git a/spec/semantic_conventions.md b/spec/semantic_conventions.md
@@ -8,7 +8,7 @@ operations used by applications. These conventions are used to populate the `att
 The following attributes are reserved and MUST be supported by all OpenInference Tracing SDKs:
 
 | Attribute                              | Type                        | Example                                                                    | Description                                                                   |
-|----------------------------------------| --------------------------- |----------------------------------------------------------------------------| ----------------------------------------------------------------------------- |
+| -------------------------------------- | --------------------------- | -------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
 | `document.content`                     | String                      | `"This is a sample document content."`                                     | The content of a retrieved document                                           |
 | `document.id`                          | String/Integer              | `"1234"` or `1`                                                            | Unique identifier for a document                                              |
 | `document.metadata`                    | JSON String                 | `"{'author': 'John Doe', 'date': '2023-09-09'}"`                           | Metadata associated with a document                                           |
@@ -27,6 +27,8 @@ The following attributes are reserved and MUST be supported by all OpenInference
 | `llm.function_call`                    | JSON String                 | `"{function_name: 'add', args: [1, 2]}"`                                   | Object recording details of a function call in models or APIs                 |
 | `llm.input_messages`                   | List of objects<sup>†</sup> | `[{"message.role": "user", "message.content": "hello"}]`                   | List of messages sent to the LLM in a chat API request                        |
 | `llm.invocation_parameters`            | JSON string                 | `"{model_name: 'gpt-3', temperature: 0.7}"`                                | Parameters used during the invocation of an LLM or API                        |
+| `llm.provider`                         | String                      | `openai`, `azure`                                                          | The hosting provider of the llm, e.x. `azure`                                 |
+| `llm.system`                           | String                      | `anthropic`, `openai`                                                      | The AI product as identified by the client or server instrumentation.         |
 | `llm.model_name`                       | String                      | `"gpt-3.5-turbo"`                                                          | The name of the language model being utilized                                 |
 | `llm.output_messages`                  | List of objects<sup>†</sup> | `[{"message.role": "user", "message.content": "hello"}]`                   | List of messages received from the LLM in a chat API request                  |
 | `llm.prompt_template.template`         | String                      | `"Weather forecast for {city} on {date}"`                                  | Template used to generate prompts as Python f-strings                         |
@@ -68,6 +70,29 @@ The following attributes are reserved and MUST be supported by all OpenInference
 <sup>†</sup> To get a list of objects exported as OpenTelemetry span attributes, flattening of the list is necessary as
 shown in the examples below.
 
+`llm.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value       | Description |
+| ----------- | ----------- |
+| `anthropic` | Anthropic   |
+| `openai`    | OpenAI      |
+| `vertexai`  | Vertex AI   |
+| `cohere`    | Cohere      |
+| `mistralai` | Mistral AI  |
+
+`llm.provider` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value       | Description     |
+| ----------- | --------------- |
+| `anthropic` | Anthropic       |
+| `openai`    | OpenAI          |
+| `vertexai`  | Vertex AI       |
+| `cohere`    | Cohere          |
+| `mistralai` | Mistral AI      |
+| `azure`     | Azure           |
+| `google`    | Google (Vertex) |
+| `aws`       | AWS Bedrock     |
+
 #### Python
 
 ```python
@@ -83,17 +108,17 @@ for i, obj in enumerate(messages):
 
 ```javascript
 const messages = [
-  { "message.role": "user", "message.content": "hello" },
-  {
-    "message.role": "assistant",
-    "message.content": "hi",
-  },
+    { "message.role": "user", "message.content": "hello" },
+    {
+        "message.role": "assistant",
+        "message.content": "hi",
+    },
 ];
 
 for (const [i, obj] of messages.entries()) {
-  for (const [key, value] of Object.entries(obj)) {
-    span.setAttribute(`input.messages.${i}.${key}`, value);
-  }
+    for (const [key, value] of Object.entries(obj)) {
+        span.setAttribute(`input.messages.${i}.${key}`, value);
+    }
 }
 ```