Skip to content

Commit

Permalink
feat: Cerebras Support (#2242)
Browse files Browse the repository at this point in the history
  • Loading branch information
tcapelle authored Aug 27, 2024
1 parent 3d96ce0 commit 51c2fcd
Show file tree
Hide file tree
Showing 12 changed files with 430 additions and 1 deletion.
107 changes: 107 additions & 0 deletions docs/docs/guides/integrations/cerebras.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Cerebras

Weave automatically tracks and logs LLM calls made via the [Cerebras Cloud SDK](https://inference-docs.cerebras.ai/introduction).

## Traces

Tracking LLM calls is crucial for debugging and performance monitoring. Weave helps you do this by automatically capturing traces for the Cerebras Cloud SDK.

Here's an example of how to use Weave with Cerebras:

```python
import os
import weave
from cerebras.cloud.sdk import Cerebras

# Initialise the weave project
weave.init("cerebras_speedster")

# Use the Cerebras SDK as usual
api_key = os.environ["CEREBRAS_API_KEY"]
model = "llama3.1-8b" # Cerebras model

client = Cerebras(api_key=api_key)

response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "What's the fastest land animal?"}],
)

print(response.choices[0].message.content)
```

Weave will now track and log all LLM calls made through the Cerebras SDK. You can view the traces in the Weave web interface, including details like token usage and response time.

[![cerebras_calls.png](imgs/cerebras_calls.png)](https://wandb.ai/capecape/cerebras_speedster/weave/traces)

## Wrapping with your own ops

Weave ops offer a powerful way to enhance reproducibility and traceability in your experiments. By automatically versioning your code and capturing inputs and outputs. Here's an example of how you can leverage Weave ops with the Cerebras SDK:

```python
import os
import weave
from cerebras.cloud.sdk import Cerebras

# Initialise the weave project
weave.init("cerebras_speedster")

client = Cerebras(api_key=os.environ["CEREBRAS_API_KEY"])

# Weave will track the inputs, outputs and code of this function
@weave.op
def animal_speedster(animal: str, model: str) -> str:
"Find out how fast an animal can run"

response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": f"How fast can a {animal} run?"}],
)
return response.choices[0].message.content

animal_speedster("cheetah", "llama3.1-8b")
animal_speedster("ostrich", "llama3.1-8b")
animal_speedster("human", "llama3.1-8b")
```

## Create a `Model` for easier experimentation

The [Model](/guides/core-types/models) class in Weave helps you organize and compare different iterations of your app. This is particularly useful when experimenting with Cerebras models. Here's an example:


```python
import os
import weave
from cerebras.cloud.sdk import Cerebras

# Initialise the weave project
weave.init("cerebras_speedster")

client = Cerebras(api_key=os.environ["CEREBRAS_API_KEY"])

class AnimalSpeedModel(weave.Model):
model: str
temperature: float

@weave.op
def predict(self, animal: str) -> str:
"Predict the top speed of an animal"

response = client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": f"What's the top speed of a {animal}?"}],
temperature=self.temperature
)
return response.choices[0].message.content

speed_model = AnimalSpeedModel(
model="llama3.1-8b",
temperature=0.7
)
result = speed_model.predict(animal="cheetah")
print(result)
```

With this setup, you can easily experiment with different models and parameters, all while keeping track of your Cerebras-powered inferences!

[![cerebras_model.png](imgs/cerebras_model.png)](https://wandb.ai/capecape/cerebras_speedster/weave/traces)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions docs/sidebars.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ const sidebars: SidebarsConfig = {
items: [
"guides/integrations/openai",
"guides/integrations/anthropic",
"guides/integrations/cerebras",
"guides/integrations/cohere",
"guides/integrations/mistral",
"guides/integrations/together_ai",
Expand Down
2 changes: 1 addition & 1 deletion requirements.test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ chromadb>=0.5.0 # LangChain
pysqlite3-binary==0.5.3 # LangChain
cohere>=5.5.8 # Cohere
groq>=0.9.0 # Groq

cerebras-cloud-sdk # cerebras-cloud-sdk
# Used for Integration Tests
semver
4 changes: 4 additions & 0 deletions weave/autopatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

def autopatch() -> None:
from .integrations.anthropic.anthropic_sdk import anthropic_patcher
from .integrations.cerebras.cerebras_sdk import cerebras_patcher
from .integrations.cohere.cohere_sdk import cohere_patcher
from .integrations.dspy.dspy_sdk import dspy_patcher
from .integrations.groq.groq_sdk import groq_patcher
Expand All @@ -24,11 +25,13 @@ def autopatch() -> None:
anthropic_patcher.attempt_patch()
groq_patcher.attempt_patch()
dspy_patcher.attempt_patch()
cerebras_patcher.attempt_patch()
cohere_patcher.attempt_patch()


def reset_autopatch() -> None:
from .integrations.anthropic.anthropic_sdk import anthropic_patcher
from .integrations.cerebras.cerebras_sdk import cerebras_patcher
from .integrations.cohere.cohere_sdk import cohere_patcher
from .integrations.dspy.dspy_sdk import dspy_patcher
from .integrations.groq.groq_sdk import groq_patcher
Expand All @@ -46,4 +49,5 @@ def reset_autopatch() -> None:
anthropic_patcher.undo_patch()
groq_patcher.undo_patch()
dspy_patcher.undo_patch()
cerebras_patcher.undo_patch()
cohere_patcher.undo_patch()
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "What is the capital of France?"}],
"model": "llama3.1-8b"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, br
connection:
- keep-alive
content-length:
- '101'
content-type:
- application/json
host:
- api.cerebras.ai
user-agent:
- AsyncCerebras/Python 0.6.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.6.0
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.9
method: POST
uri: https://api.cerebras.ai/v1/chat/completions
response:
body:
string: '{"id":"chatcmpl-90621a67-4431-4371-8e09-93e033e8ed13","choices":[{"finish_reason":"stop","index":0,"message":{"content":"The
capital of France is Paris.","role":"assistant"}}],"created":1724146770,"model":"llama3.1-8b","system_fingerprint":"fp_70185065a4","object":"chat.completion","usage":{"prompt_tokens":17,"completion_tokens":8,"total_tokens":25},"time_info":{"queue_time":6.701e-05,"prompt_time":0.0015992357142857144,"completion_time":0.004545298285714286,"total_time":0.022367477416992188,"created":1724146770}}'
headers:
Alt-Svc:
- h3=":443"; ma=86400
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Tue, 20 Aug 2024 09:39:30 GMT
Transfer-Encoding:
- chunked
Via:
- 1.1 e1c8225b86f394718e093d7bbdef7fa2.cloudfront.net (CloudFront)
X-Amz-Cf-Id:
- ckJufp8z4gvcTynRlh0KRHsjG0q9LL9-uLqUgADe0mwUZ5HWGCuUVQ==
X-Amz-Cf-Pop:
- CDG52-P5
X-Amzn-Trace-Id:
- root=1-66c46452-38959c0a6d258a734a804ae5;sampled=1;lineage=822f479c:0
X-Cache:
- Miss from cloudfront
x-amzn-Remapped-content-length:
- '519'
x-amzn-Remapped-date:
- Tue, 20 Aug 2024 09:39:03 GMT
x-amzn-Remapped-server:
- uvicorn
x-amzn-RequestId:
- cd8cad17-a3f5-40bc-b428-b769bf420996
x-ratelimit-limit-requests-day:
- '28800'
x-ratelimit-limit-tokens-minute:
- '60000'
x-ratelimit-remaining-requests-day:
- '28800'
x-ratelimit-remaining-tokens-minute:
- '59950'
x-ratelimit-reset-requests-day:
- '51629.819103479385'
x-ratelimit-reset-tokens-minute:
- '29.819103479385376'
x-request-id:
- ckJufp8z4gvcTynRlh0KRHsjG0q9LL9-uLqUgADe0mwUZ5HWGCuUVQ==
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "What is the capital of France?"}],
"model": "llama3.1-8b"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, br
connection:
- keep-alive
content-length:
- '101'
content-type:
- application/json
host:
- api.cerebras.ai
user-agent:
- Cerebras/Python 0.6.0
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.6.0
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.9
method: POST
uri: https://api.cerebras.ai/v1/chat/completions
response:
body:
string: '{"id":"chatcmpl-48b9a448-c1cc-4a11-9686-76dd8e744e02","choices":[{"finish_reason":"stop","index":0,"message":{"content":"The
capital of France is Paris.","role":"assistant"}}],"created":1724146768,"model":"llama3.1-8b","system_fingerprint":"fp_70185065a4","object":"chat.completion","usage":{"prompt_tokens":17,"completion_tokens":8,"total_tokens":25},"time_info":{"queue_time":6.615e-05,"prompt_time":0.0015992892857142858,"completion_time":0.004536781714285715,"total_time":0.02078700065612793,"created":1724146768}}'
headers:
Alt-Svc:
- h3=":443"; ma=86400
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Tue, 20 Aug 2024 09:39:28 GMT
Transfer-Encoding:
- chunked
Via:
- 1.1 e90132d2777b51acfb2fd86213866952.cloudfront.net (CloudFront)
X-Amz-Cf-Id:
- 5UD2_0bkNmZIqCzFZQJ51eWJpqqn_3TUueEW8n82e3VCib6M6Lf-Cg==
X-Amz-Cf-Pop:
- CDG52-P5
X-Amzn-Trace-Id:
- root=1-66c46450-1702385454adf3b15962ddd9;sampled=1;lineage=822f479c:0
X-Cache:
- Miss from cloudfront
x-amzn-Remapped-content-length:
- '518'
x-amzn-Remapped-date:
- Tue, 20 Aug 2024 09:39:03 GMT
x-amzn-Remapped-server:
- uvicorn
x-amzn-RequestId:
- 86a3b877-20d0-44cf-85ec-6a29da1bdc94
x-ratelimit-limit-requests-day:
- '28800'
x-ratelimit-limit-tokens-minute:
- '60000'
x-ratelimit-remaining-requests-day:
- '28800'
x-ratelimit-remaining-tokens-minute:
- '59975'
x-ratelimit-reset-requests-day:
- '51631.72441959381'
x-ratelimit-reset-tokens-minute:
- '31.724419593811035'
x-request-id:
- 5UD2_0bkNmZIqCzFZQJ51eWJpqqn_3TUueEW8n82e3VCib6M6Lf-Cg==
status:
code: 200
message: OK
version: 1
53 changes: 53 additions & 0 deletions weave/integrations/cerebras/cerebras_sdk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import importlib
import typing
from functools import wraps

import weave
from weave.trace.patcher import MultiPatcher, SymbolPatcher


def create_wrapper_sync(
name: str,
) -> typing.Callable[[typing.Callable], typing.Callable]:
def wrapper(fn: typing.Callable) -> typing.Callable:
op = weave.op()(fn)
op.name = name # type: ignore
return op

return wrapper


def create_wrapper_async(
name: str,
) -> typing.Callable[[typing.Callable], typing.Callable]:
def wrapper(fn: typing.Callable) -> typing.Callable:
def _fn_wrapper(fn: typing.Callable) -> typing.Callable:
@wraps(fn)
async def _async_wrapper(
*args: typing.Any, **kwargs: typing.Any
) -> typing.Any:
return await fn(*args, **kwargs)

return _async_wrapper

op = weave.op()(_fn_wrapper(fn))
op.name = name # type: ignore
return op

return wrapper


cerebras_patcher = MultiPatcher(
[
SymbolPatcher(
lambda: importlib.import_module("cerebras.cloud.sdk.resources.chat"),
"CompletionsResource.create",
create_wrapper_sync(name="cerebras.chat.completions.create"),
),
SymbolPatcher(
lambda: importlib.import_module("cerebras.cloud.sdk.resources.chat"),
"AsyncCompletionsResource.create",
create_wrapper_async(name="cerebras.chat.completions.create"),
),
]
)
Loading

0 comments on commit 51c2fcd

Please sign in to comment.