Skip to content

Commit

Permalink
fix(llama-index): extract token counts for groq when streaming (#1174)
Browse files Browse the repository at this point in the history
  • Loading branch information
RogerHYang authored Dec 17, 2024
1 parent 702c6aa commit 0aafe9c
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ test = [
"llama-index == 0.11.0",
"llama-index-core >= 0.11.0",
"llama-index-llms-openai",
"llama-index-llms-groq",
"pytest-vcr",
"llama-index-multi-modal-llms-openai>=0.1.7",
"openinference-instrumentation-openai",
"opentelemetry-sdk",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,16 @@ def _extract_token_counts(self, response: Union[ChatResponse, CompletionResponse
):
for k, v in _get_token_counts(usage):
self[k] = v
if (
(raw := getattr(response, "raw", None))
and (model_extra := getattr(raw, "model_extra", None))
and hasattr(model_extra, "get")
and (x_groq := model_extra.get("x_groq"))
and hasattr(x_groq, "get")
and (usage := x_groq.get("usage"))
):
for k, v in _get_token_counts(usage):
self[k] = v
# Look for token counts in additional_kwargs of the completion payload
# This is needed for non-OpenAI models
if additional_kwargs := getattr(response, "additional_kwargs", None):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "llama3-8b-8192",
"stream": true, "temperature": 0.1}'
headers: {}
method: POST
uri: https://api.groq.com/openai/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"x_groq":{"id":"req_01jf8nmy68fxxvgnfqz4m90h20"}}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
It"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"''s"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
nice"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
to"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
meet"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
you"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
Is"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
there"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
something"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
I"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
can"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
help"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
you"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
with"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
or"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
would"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
you"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
like"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
to"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"
chat"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"x_groq":{"id":"req_01jf8nmy68fxxvgnfqz4m90h20","usage":{"queue_time":0.004289418,"prompt_tokens":12,"prompt_time":0.001769862,"completion_tokens":26,"completion_time":0.021666667,"total_tokens":38,"total_time":0.023436529}}}
data: [DONE]
'
headers: {}
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import Iterator

import pytest
from llama_index.core.base.llms.types import ChatMessage
from llama_index.llms.groq import Groq # type: ignore[import-untyped]
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from opentelemetry.trace import TracerProvider

from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from openinference.semconv.trace import SpanAttributes


@pytest.mark.vcr(
decode_compressed_response=True,
before_record_request=lambda _: _.headers.clear() or _,
before_record_response=lambda _: {**_, "headers": {}},
)
async def test_groq_astream_chat_token_count(
in_memory_span_exporter: InMemorySpanExporter,
) -> None:
result = await Groq(model="llama3-8b-8192").astream_chat([ChatMessage(content="Hello!")])
async for _ in result:
pass
span = in_memory_span_exporter.get_finished_spans()[0]
assert span.attributes
assert span.attributes.get(LLM_TOKEN_COUNT_TOTAL)
assert span.attributes.get(LLM_TOKEN_COUNT_COMPLETION)
assert span.attributes.get(LLM_TOKEN_COUNT_TOTAL)


@pytest.fixture(autouse=True)
def instrument(
tracer_provider: TracerProvider,
in_memory_span_exporter: InMemorySpanExporter,
) -> Iterator[None]:
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)
yield
LlamaIndexInstrumentor().uninstrument()


LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
2 changes: 1 addition & 1 deletion python/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ envlist =
py3{9,12}-ci-{mistralai,mistralai-latest}
py3{8,12}-ci-{openai,openai-latest}
py3{8,12}-ci-{vertexai,vertexai-latest}
py3{8,12}-ci-{llama_index,llama_index-latest}
py3{9,12}-ci-{llama_index,llama_index-latest}
py3{9,12}-ci-{dspy,dspy-latest}
py3{9,12}-ci-{langchain,langchain-latest}
; py3{9,12}-ci-{guardrails,guardrails-latest}
Expand Down

0 comments on commit 0aafe9c

Please sign in to comment.