Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(llama-index): extract token counts for groq when streaming #1174

Merged
merged 3 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ test = [
"llama-index == 0.11.0",
"llama-index-core >= 0.11.0",
"llama-index-llms-openai",
"llama-index-llms-groq",
"pytest-vcr",
"llama-index-multi-modal-llms-openai>=0.1.7",
"openinference-instrumentation-openai",
"opentelemetry-sdk",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,16 @@ def _extract_token_counts(self, response: Union[ChatResponse, CompletionResponse
):
for k, v in _get_token_counts(usage):
self[k] = v
if (
(raw := getattr(response, "raw", None))
and (model_extra := getattr(raw, "model_extra", None))
and hasattr(model_extra, "get")
and (x_groq := model_extra.get("x_groq"))
and hasattr(x_groq, "get")
and (usage := x_groq.get("usage"))
):
for k, v in _get_token_counts(usage):
self[k] = v
# Look for token counts in additional_kwargs of the completion payload
# This is needed for non-OpenAI models
if additional_kwargs := getattr(response, "additional_kwargs", None):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "llama3-8b-8192",
"stream": true, "temperature": 0.1}'
headers: {}
method: POST
uri: https://api.groq.com/openai/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"x_groq":{"id":"req_01jf8nmy68fxxvgnfqz4m90h20"}}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
It"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"''s"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
nice"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
to"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
meet"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
you"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
Is"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
there"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
something"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
I"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
can"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
help"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
you"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
with"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
or"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
would"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
you"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
like"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
to"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
chat"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"x_groq":{"id":"req_01jf8nmy68fxxvgnfqz4m90h20","usage":{"queue_time":0.004289418,"prompt_tokens":12,"prompt_time":0.001769862,"completion_tokens":26,"completion_time":0.021666667,"total_tokens":38,"total_time":0.023436529}}}
data: [DONE]
'
headers: {}
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import Iterator

import pytest
from llama_index.core.base.llms.types import ChatMessage
from llama_index.llms.groq import Groq # type: ignore[import-untyped]
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from opentelemetry.trace import TracerProvider

from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from openinference.semconv.trace import SpanAttributes


@pytest.mark.vcr(
decode_compressed_response=True,
before_record_request=lambda _: _.headers.clear() or _,
before_record_response=lambda _: {**_, "headers": {}},
)
async def test_groq_astream_chat_token_count(
in_memory_span_exporter: InMemorySpanExporter,
) -> None:
result = await Groq(model="llama3-8b-8192").astream_chat([ChatMessage(content="Hello!")])
async for _ in result:
pass
span = in_memory_span_exporter.get_finished_spans()[0]
assert span.attributes
assert span.attributes.get(LLM_TOKEN_COUNT_TOTAL)
assert span.attributes.get(LLM_TOKEN_COUNT_COMPLETION)
assert span.attributes.get(LLM_TOKEN_COUNT_TOTAL)


@pytest.fixture(autouse=True)
def instrument(
tracer_provider: TracerProvider,
in_memory_span_exporter: InMemorySpanExporter,
) -> Iterator[None]:
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)
yield
LlamaIndexInstrumentor().uninstrument()


LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
2 changes: 1 addition & 1 deletion python/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ envlist =
py3{9,12}-ci-{mistralai,mistralai-latest}
py3{8,12}-ci-{openai,openai-latest}
py3{8,12}-ci-{vertexai,vertexai-latest}
py3{8,12}-ci-{llama_index,llama_index-latest}
py3{9,12}-ci-{llama_index,llama_index-latest}
py3{9,12}-ci-{dspy,dspy-latest}
py3{9,12}-ci-{langchain,langchain-latest}
; py3{9,12}-ci-{guardrails,guardrails-latest}
Expand Down
Loading