diff --git a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
index 14501977d..99abf5dd5 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
@@ -31,6 +31,7 @@ dependencies = [
   "openinference-instrumentation>=0.1.17",
   "openinference-semantic-conventions>=0.1.9",
   "wrapt",
+  "setuptools",
 ]
 
 [project.optional-dependencies]
@@ -39,6 +40,7 @@ test = [
   "opentelemetry-sdk",
   "opentelemetry-instrumentation-httpx",
   "tenacity",
+  "tokenizers==0.20.3; python_version == '3.8'"
 ]
 
 [project.urls]
diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index f15af0bcf..5e5782d81 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -1,7 +1,18 @@
 import json
 from enum import Enum
 from functools import wraps
-from typing import Any, Callable, Collection, Dict, Iterable, Iterator, Mapping, Tuple, TypeVar
+from typing import (
+    Any,
+    Callable,
+    Collection,
+    Dict,
+    Iterable,
+    Iterator,
+    Mapping,
+    Tuple,
+    TypeVar,
+    Union,
+)
 
 from openai.types.image import Image
 from opentelemetry import context as context_api
@@ -15,6 +26,7 @@
     Choices,
     EmbeddingResponse,
     ImageResponse,
+    Message,
     ModelResponse,
 )
 from openinference.instrumentation import (
@@ -48,7 +60,7 @@ def is_iterable_of(lst: Iterable[object], tp: T) -> bool:
 
 
 def _get_attributes_from_message_param(
-    message: Mapping[str, Any],
+    message: Union[Mapping[str, Any], Message],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     if not hasattr(message, "get"):
         return
@@ -153,10 +165,18 @@ def _instrument_func_type_image_generation(span: trace_api.Span, kwargs: Dict[st
 
 def _finalize_span(span: trace_api.Span, result: Any) -> None:
     if isinstance(result, ModelResponse):
-        if (choices := result.choices) and len(choices) > 0:
-            choice = choices[0]
-            if isinstance(choice, Choices) and (output := choice.message.content):
+        for idx, choice in enumerate(result.choices):
+            if not isinstance(choice, Choices):
+                continue
+
+            if idx == 0 and choice.message and (output := choice.message.content):
                 _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
+
+            for key, value in _get_attributes_from_message_param(choice.message):
+                _set_span_attribute(
+                    span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value
+                )
+
     elif isinstance(result, EmbeddingResponse):
         if result_data := result.data:
             first_embedding = result_data[0]
diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
index 2240eb027..6321a8d03 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
@@ -5,7 +5,7 @@
 import litellm
 import pytest
 from litellm import OpenAIChatCompletion  # type: ignore[attr-defined]
-from litellm.types.utils import EmbeddingResponse, ImageResponse, Usage
+from litellm.types.utils import EmbeddingResponse, ImageObject, ImageResponse, Usage
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
@@ -47,6 +47,7 @@ def test_oitracer(
 
 
 @pytest.mark.parametrize("use_context_attributes", [False, True])
+@pytest.mark.parametrize("n", [1, 5])
 def test_completion(
     tracer_provider: TracerProvider,
     in_memory_span_exporter: InMemorySpanExporter,
@@ -58,11 +59,13 @@ def test_completion(
     prompt_template: str,
     prompt_template_version: str,
     prompt_template_variables: Dict[str, Any],
+    n: int,
 ) -> None:
     in_memory_span_exporter.clear()
     LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)
 
     input_messages = [{"content": "What's the capital of China?", "role": "user"}]
+    response = None
     if use_context_attributes:
         with using_attributes(
             session_id=session_id,
@@ -73,15 +76,17 @@ def test_completion(
             prompt_template_version=prompt_template_version,
             prompt_template_variables=prompt_template_variables,
         ):
-            litellm.completion(
+            response = litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=input_messages,
+                n=n,
                 mock_response="Beijing",
             )
     else:
-        litellm.completion(
+        response = litellm.completion(
             model="gpt-3.5-turbo",
             messages=input_messages,
+            n=n,
             mock_response="Beijing",
         )
 
@@ -94,6 +99,9 @@ def test_completion(
     assert attributes.get(SpanAttributes.INPUT_VALUE) == json.dumps(input_messages)
 
     assert attributes.get(SpanAttributes.OUTPUT_VALUE) == "Beijing"
+    for i, choice in enumerate(response["choices"]):
+        _check_llm_message(SpanAttributes.LLM_OUTPUT_MESSAGES, i, attributes, choice.message)
+
     assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_PROMPT) == 10
     assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_COMPLETION) == 20
     assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_TOTAL) == 30
@@ -540,7 +548,7 @@ def test_image_generation_url(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[{"b64_json": None, "revised_prompt": None, "url": "https://dummy-url"}],
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],  # type: ignore
     )
 
     with patch.object(
@@ -610,7 +618,7 @@ def test_image_generation_b64json(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[{"b64_json": "dummy_b64_json", "revised_prompt": None, "url": None}],
+        data=[ImageObject(b64_json="dummy_b64_json", revised_prompt=None, url=None)],  # type: ignore
     )
 
     with patch.object(
@@ -680,7 +688,7 @@ async def test_aimage_generation(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[{"b64_json": None, "revised_prompt": None, "url": "https://dummy-url"}],
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],  # type: ignore
     )
     with patch.object(
         OpenAIChatCompletion, "aimage_generation", return_value=mock_response_image_gen
diff --git a/python/tox.ini b/python/tox.ini
index 387e96b00..61002a953 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -78,7 +78,7 @@ commands_pre =
   groq: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-groq[test]
   groq-latest: uv pip install -U groq 'httpx<0.28'
   litellm: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-litellm[test]
-  litellm-latest: uv pip install -U litellm 'httpx<0.28'
+  litellm-latest: uv pip install -U --only-binary=tokenizers litellm 'httpx<0.28' 'tokenizer<=0.20.3'
   ; instructor: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-instructor[test]
   ; instructor-latest: uv pip install -U instructor
   anthropic: uv pip uninstall -r test-requirements.txt