diff --git a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml index 14501977d..99abf5dd5 100644 --- a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "openinference-instrumentation>=0.1.17", "openinference-semantic-conventions>=0.1.9", "wrapt", + "setuptools", ] [project.optional-dependencies] @@ -39,6 +40,7 @@ test = [ "opentelemetry-sdk", "opentelemetry-instrumentation-httpx", "tenacity", + "tokenizers==0.20.3; python_version == '3.8'" ] [project.urls] diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py index f15af0bcf..5e5782d81 100644 --- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py +++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py @@ -1,7 +1,18 @@ import json from enum import Enum from functools import wraps -from typing import Any, Callable, Collection, Dict, Iterable, Iterator, Mapping, Tuple, TypeVar +from typing import ( + Any, + Callable, + Collection, + Dict, + Iterable, + Iterator, + Mapping, + Tuple, + TypeVar, + Union, +) from openai.types.image import Image from opentelemetry import context as context_api @@ -15,6 +26,7 @@ Choices, EmbeddingResponse, ImageResponse, + Message, ModelResponse, ) from openinference.instrumentation import ( @@ -48,7 +60,7 @@ def is_iterable_of(lst: Iterable[object], tp: T) -> bool: def _get_attributes_from_message_param( - message: Mapping[str, Any], + message: Union[Mapping[str, Any], Message], ) -> Iterator[Tuple[str, AttributeValue]]: if not hasattr(message, "get"): return @@ -153,10 +165,18 @@ def _instrument_func_type_image_generation(span: trace_api.Span, kwargs: Dict[st def _finalize_span(span: trace_api.Span, result: Any) -> None: if isinstance(result, ModelResponse): - if (choices := result.choices) and len(choices) > 0: - choice = choices[0] - if isinstance(choice, Choices) and (output := choice.message.content): + for idx, choice in enumerate(result.choices): + if not isinstance(choice, Choices): + continue + + if idx == 0 and choice.message and (output := choice.message.content): _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output) + + for key, value in _get_attributes_from_message_param(choice.message): + _set_span_attribute( + span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value + ) + elif isinstance(result, EmbeddingResponse): if result_data := result.data: first_embedding = result_data[0] diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py index 2240eb027..6321a8d03 100644 --- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py +++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py @@ -5,7 +5,7 @@ import litellm import pytest from litellm import OpenAIChatCompletion # type: ignore[attr-defined] -from litellm.types.utils import EmbeddingResponse, ImageResponse, Usage +from litellm.types.utils import EmbeddingResponse, ImageObject, ImageResponse, Usage from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import SimpleSpanProcessor @@ -47,6 +47,7 @@ def test_oitracer( @pytest.mark.parametrize("use_context_attributes", [False, True]) +@pytest.mark.parametrize("n", [1, 5]) def test_completion( tracer_provider: TracerProvider, in_memory_span_exporter: InMemorySpanExporter, @@ -58,11 +59,13 @@ def test_completion( prompt_template: str, prompt_template_version: str, prompt_template_variables: Dict[str, Any], + n: int, ) -> None: in_memory_span_exporter.clear() LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider) input_messages = [{"content": "What's the capital of China?", "role": "user"}] + response = None if use_context_attributes: with using_attributes( session_id=session_id, @@ -73,15 +76,17 @@ def test_completion( prompt_template_version=prompt_template_version, prompt_template_variables=prompt_template_variables, ): - litellm.completion( + response = litellm.completion( model="gpt-3.5-turbo", messages=input_messages, + n=n, mock_response="Beijing", ) else: - litellm.completion( + response = litellm.completion( model="gpt-3.5-turbo", messages=input_messages, + n=n, mock_response="Beijing", ) @@ -94,6 +99,9 @@ def test_completion( assert attributes.get(SpanAttributes.INPUT_VALUE) == json.dumps(input_messages) assert attributes.get(SpanAttributes.OUTPUT_VALUE) == "Beijing" + for i, choice in enumerate(response["choices"]): + _check_llm_message(SpanAttributes.LLM_OUTPUT_MESSAGES, i, attributes, choice.message) + assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_PROMPT) == 10 assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_COMPLETION) == 20 assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_TOTAL) == 30 @@ -540,7 +548,7 @@ def test_image_generation_url( mock_response_image_gen = ImageResponse( created=1722359754, - data=[{"b64_json": None, "revised_prompt": None, "url": "https://dummy-url"}], + data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")], # type: ignore ) with patch.object( @@ -610,7 +618,7 @@ def test_image_generation_b64json( mock_response_image_gen = ImageResponse( created=1722359754, - data=[{"b64_json": "dummy_b64_json", "revised_prompt": None, "url": None}], + data=[ImageObject(b64_json="dummy_b64_json", revised_prompt=None, url=None)], # type: ignore ) with patch.object( @@ -680,7 +688,7 @@ async def test_aimage_generation( mock_response_image_gen = ImageResponse( created=1722359754, - data=[{"b64_json": None, "revised_prompt": None, "url": "https://dummy-url"}], + data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")], # type: ignore ) with patch.object( OpenAIChatCompletion, "aimage_generation", return_value=mock_response_image_gen diff --git a/python/tox.ini b/python/tox.ini index 387e96b00..61002a953 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -78,7 +78,7 @@ commands_pre = groq: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-groq[test] groq-latest: uv pip install -U groq 'httpx<0.28' litellm: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-litellm[test] - litellm-latest: uv pip install -U litellm 'httpx<0.28' + litellm-latest: uv pip install -U --only-binary=tokenizers litellm 'httpx<0.28' 'tokenizer<=0.20.3' ; instructor: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-instructor[test] ; instructor-latest: uv pip install -U instructor anthropic: uv pip uninstall -r test-requirements.txt