feat: Cerebras Support (#2242)

wandb · Aug 27, 2024 · 51c2fcd · 51c2fcd
1 parent 3d96ce0
commit 51c2fcd
Show file tree

Hide file tree

Showing 12 changed files with 430 additions and 1 deletion.
diff --git a/docs/docs/guides/integrations/cerebras.md b/docs/docs/guides/integrations/cerebras.md
@@ -0,0 +1,107 @@
+# Cerebras
+
+Weave automatically tracks and logs LLM calls made via the [Cerebras Cloud SDK](https://inference-docs.cerebras.ai/introduction).
+
+## Traces
+
+Tracking LLM calls is crucial for debugging and performance monitoring. Weave helps you do this by automatically capturing traces for the Cerebras Cloud SDK.
+
+Here's an example of how to use Weave with Cerebras:
+
+```python
+import os
+import weave
+from cerebras.cloud.sdk import Cerebras
+
+# Initialise the weave project
+weave.init("cerebras_speedster")
+
+# Use the Cerebras SDK as usual
+api_key = os.environ["CEREBRAS_API_KEY"]
+model = "llama3.1-8b"  # Cerebras model
+
+client = Cerebras(api_key=api_key)
+
+response = client.chat.completions.create(
+    model=model,
+    messages=[{"role": "user", "content": "What's the fastest land animal?"}],
+)
+
+print(response.choices[0].message.content)
+```
+
+Weave will now track and log all LLM calls made through the Cerebras SDK. You can view the traces in the Weave web interface, including details like token usage and response time.
+
+[![cerebras_calls.png](imgs/cerebras_calls.png)](https://wandb.ai/capecape/cerebras_speedster/weave/traces)
+
+## Wrapping with your own ops
+
+Weave ops offer a powerful way to enhance reproducibility and traceability in your experiments. By automatically versioning your code and capturing inputs and outputs. Here's an example of how you can leverage Weave ops with the Cerebras SDK:
+
+```python
+import os
+import weave
+from cerebras.cloud.sdk import Cerebras
+
+# Initialise the weave project
+weave.init("cerebras_speedster")
+
+client = Cerebras(api_key=os.environ["CEREBRAS_API_KEY"])
+
+# Weave will track the inputs, outputs and code of this function
+@weave.op
+def animal_speedster(animal: str, model: str) -> str:
+    "Find out how fast an animal can run"
+
+    response = client.chat.completions.create(
+        model=model,
+        messages=[{"role": "user", "content": f"How fast can a {animal} run?"}],
+    )
+    return response.choices[0].message.content
+
+animal_speedster("cheetah", "llama3.1-8b")
+animal_speedster("ostrich", "llama3.1-8b")
+animal_speedster("human", "llama3.1-8b")
+```
+
+## Create a `Model` for easier experimentation
+
+The [Model](/guides/core-types/models) class in Weave helps you organize and compare different iterations of your app. This is particularly useful when experimenting with Cerebras models. Here's an example:
+
+
+```python
+import os
+import weave
+from cerebras.cloud.sdk import Cerebras
+
+# Initialise the weave project
+weave.init("cerebras_speedster")
+
+client = Cerebras(api_key=os.environ["CEREBRAS_API_KEY"])
+
+class AnimalSpeedModel(weave.Model):
+    model: str
+    temperature: float
+
+    @weave.op
+    def predict(self, animal: str) -> str:
+        "Predict the top speed of an animal"        
+
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=[{"role": "user", "content": f"What's the top speed of a {animal}?"}],
+            temperature=self.temperature
+        )
+        return response.choices[0].message.content
+
+speed_model = AnimalSpeedModel(
+    model="llama3.1-8b",
+    temperature=0.7
+)
+result = speed_model.predict(animal="cheetah")
+print(result)
+```
+
+With this setup, you can easily experiment with different models and parameters, all while keeping track of your Cerebras-powered inferences!
+
+[![cerebras_model.png](imgs/cerebras_model.png)](https://wandb.ai/capecape/cerebras_speedster/weave/traces)
diff --git a/docs/docs/guides/integrations/imgs/cerebras_calls.png b/docs/docs/guides/integrations/imgs/cerebras_calls.png
diff --git a/docs/docs/guides/integrations/imgs/cerebras_model.png b/docs/docs/guides/integrations/imgs/cerebras_model.png
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
@@ -82,6 +82,7 @@ const sidebars: SidebarsConfig = {
               items: [
                 "guides/integrations/openai",
                 "guides/integrations/anthropic",
+                "guides/integrations/cerebras",
                 "guides/integrations/cohere",
                 "guides/integrations/mistral",
                 "guides/integrations/together_ai",

diff --git a/requirements.test.txt b/requirements.test.txt
@@ -43,6 +43,6 @@ chromadb>=0.5.0             # LangChain
 pysqlite3-binary==0.5.3     # LangChain
 cohere>=5.5.8               # Cohere
 groq>=0.9.0                 # Groq
-
+cerebras-cloud-sdk          # cerebras-cloud-sdk
 # Used for Integration Tests
 semver
diff --git a/weave/autopatch.py b/weave/autopatch.py
@@ -7,6 +7,7 @@
 
 def autopatch() -> None:
     from .integrations.anthropic.anthropic_sdk import anthropic_patcher
+    from .integrations.cerebras.cerebras_sdk import cerebras_patcher
     from .integrations.cohere.cohere_sdk import cohere_patcher
     from .integrations.dspy.dspy_sdk import dspy_patcher
     from .integrations.groq.groq_sdk import groq_patcher
@@ -24,11 +25,13 @@ def autopatch() -> None:
     anthropic_patcher.attempt_patch()
     groq_patcher.attempt_patch()
     dspy_patcher.attempt_patch()
+    cerebras_patcher.attempt_patch()
     cohere_patcher.attempt_patch()
 
 
 def reset_autopatch() -> None:
     from .integrations.anthropic.anthropic_sdk import anthropic_patcher
+    from .integrations.cerebras.cerebras_sdk import cerebras_patcher
     from .integrations.cohere.cohere_sdk import cohere_patcher
     from .integrations.dspy.dspy_sdk import dspy_patcher
     from .integrations.groq.groq_sdk import groq_patcher
@@ -46,4 +49,5 @@ def reset_autopatch() -> None:
     anthropic_patcher.undo_patch()
     groq_patcher.undo_patch()
     dspy_patcher.undo_patch()
+    cerebras_patcher.undo_patch()
     cohere_patcher.undo_patch()
diff --git a/weave/integrations/cerebras/__init__.py b/weave/integrations/cerebras/__init__.py
diff --git a/weave/integrations/cerebras/cassettes/cerebras_test/test_cerebras_async.yaml b/weave/integrations/cerebras/cassettes/cerebras_test/test_cerebras_async.yaml
@@ -0,0 +1,86 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "What is the capital of France?"}],
+      "model": "llama3.1-8b"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, br
+      connection:
+      - keep-alive
+      content-length:
+      - '101'
+      content-type:
+      - application/json
+      host:
+      - api.cerebras.ai
+      user-agent:
+      - AsyncCerebras/Python 0.6.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.6.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.9
+    method: POST
+    uri: https://api.cerebras.ai/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-90621a67-4431-4371-8e09-93e033e8ed13","choices":[{"finish_reason":"stop","index":0,"message":{"content":"The
+        capital of France is Paris.","role":"assistant"}}],"created":1724146770,"model":"llama3.1-8b","system_fingerprint":"fp_70185065a4","object":"chat.completion","usage":{"prompt_tokens":17,"completion_tokens":8,"total_tokens":25},"time_info":{"queue_time":6.701e-05,"prompt_time":0.0015992357142857144,"completion_time":0.004545298285714286,"total_time":0.022367477416992188,"created":1724146770}}'
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=86400
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 20 Aug 2024 09:39:30 GMT
+      Transfer-Encoding:
+      - chunked
+      Via:
+      - 1.1 e1c8225b86f394718e093d7bbdef7fa2.cloudfront.net (CloudFront)
+      X-Amz-Cf-Id:
+      - ckJufp8z4gvcTynRlh0KRHsjG0q9LL9-uLqUgADe0mwUZ5HWGCuUVQ==
+      X-Amz-Cf-Pop:
+      - CDG52-P5
+      X-Amzn-Trace-Id:
+      - root=1-66c46452-38959c0a6d258a734a804ae5;sampled=1;lineage=822f479c:0
+      X-Cache:
+      - Miss from cloudfront
+      x-amzn-Remapped-content-length:
+      - '519'
+      x-amzn-Remapped-date:
+      - Tue, 20 Aug 2024 09:39:03 GMT
+      x-amzn-Remapped-server:
+      - uvicorn
+      x-amzn-RequestId:
+      - cd8cad17-a3f5-40bc-b428-b769bf420996
+      x-ratelimit-limit-requests-day:
+      - '28800'
+      x-ratelimit-limit-tokens-minute:
+      - '60000'
+      x-ratelimit-remaining-requests-day:
+      - '28800'
+      x-ratelimit-remaining-tokens-minute:
+      - '59950'
+      x-ratelimit-reset-requests-day:
+      - '51629.819103479385'
+      x-ratelimit-reset-tokens-minute:
+      - '29.819103479385376'
+      x-request-id:
+      - ckJufp8z4gvcTynRlh0KRHsjG0q9LL9-uLqUgADe0mwUZ5HWGCuUVQ==
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/weave/integrations/cerebras/cassettes/cerebras_test/test_cerebras_sync.yaml b/weave/integrations/cerebras/cassettes/cerebras_test/test_cerebras_sync.yaml
@@ -0,0 +1,86 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "What is the capital of France?"}],
+      "model": "llama3.1-8b"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, br
+      connection:
+      - keep-alive
+      content-length:
+      - '101'
+      content-type:
+      - application/json
+      host:
+      - api.cerebras.ai
+      user-agent:
+      - Cerebras/Python 0.6.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.6.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.9
+    method: POST
+    uri: https://api.cerebras.ai/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-48b9a448-c1cc-4a11-9686-76dd8e744e02","choices":[{"finish_reason":"stop","index":0,"message":{"content":"The
+        capital of France is Paris.","role":"assistant"}}],"created":1724146768,"model":"llama3.1-8b","system_fingerprint":"fp_70185065a4","object":"chat.completion","usage":{"prompt_tokens":17,"completion_tokens":8,"total_tokens":25},"time_info":{"queue_time":6.615e-05,"prompt_time":0.0015992892857142858,"completion_time":0.004536781714285715,"total_time":0.02078700065612793,"created":1724146768}}'
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=86400
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 20 Aug 2024 09:39:28 GMT
+      Transfer-Encoding:
+      - chunked
+      Via:
+      - 1.1 e90132d2777b51acfb2fd86213866952.cloudfront.net (CloudFront)
+      X-Amz-Cf-Id:
+      - 5UD2_0bkNmZIqCzFZQJ51eWJpqqn_3TUueEW8n82e3VCib6M6Lf-Cg==
+      X-Amz-Cf-Pop:
+      - CDG52-P5
+      X-Amzn-Trace-Id:
+      - root=1-66c46450-1702385454adf3b15962ddd9;sampled=1;lineage=822f479c:0
+      X-Cache:
+      - Miss from cloudfront
+      x-amzn-Remapped-content-length:
+      - '518'
+      x-amzn-Remapped-date:
+      - Tue, 20 Aug 2024 09:39:03 GMT
+      x-amzn-Remapped-server:
+      - uvicorn
+      x-amzn-RequestId:
+      - 86a3b877-20d0-44cf-85ec-6a29da1bdc94
+      x-ratelimit-limit-requests-day:
+      - '28800'
+      x-ratelimit-limit-tokens-minute:
+      - '60000'
+      x-ratelimit-remaining-requests-day:
+      - '28800'
+      x-ratelimit-remaining-tokens-minute:
+      - '59975'
+      x-ratelimit-reset-requests-day:
+      - '51631.72441959381'
+      x-ratelimit-reset-tokens-minute:
+      - '31.724419593811035'
+      x-request-id:
+      - 5UD2_0bkNmZIqCzFZQJ51eWJpqqn_3TUueEW8n82e3VCib6M6Lf-Cg==
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/weave/integrations/cerebras/cerebras_sdk.py b/weave/integrations/cerebras/cerebras_sdk.py
@@ -0,0 +1,53 @@
+import importlib
+import typing
+from functools import wraps
+
+import weave
+from weave.trace.patcher import MultiPatcher, SymbolPatcher
+
+
+def create_wrapper_sync(
+    name: str,
+) -> typing.Callable[[typing.Callable], typing.Callable]:
+    def wrapper(fn: typing.Callable) -> typing.Callable:
+        op = weave.op()(fn)
+        op.name = name  # type: ignore
+        return op
+
+    return wrapper
+
+
+def create_wrapper_async(
+    name: str,
+) -> typing.Callable[[typing.Callable], typing.Callable]:
+    def wrapper(fn: typing.Callable) -> typing.Callable:
+        def _fn_wrapper(fn: typing.Callable) -> typing.Callable:
+            @wraps(fn)
+            async def _async_wrapper(
+                *args: typing.Any, **kwargs: typing.Any
+            ) -> typing.Any:
+                return await fn(*args, **kwargs)
+
+            return _async_wrapper
+
+        op = weave.op()(_fn_wrapper(fn))
+        op.name = name  # type: ignore
+        return op
+
+    return wrapper
+
+
+cerebras_patcher = MultiPatcher(
+    [
+        SymbolPatcher(
+            lambda: importlib.import_module("cerebras.cloud.sdk.resources.chat"),
+            "CompletionsResource.create",
+            create_wrapper_sync(name="cerebras.chat.completions.create"),
+        ),
+        SymbolPatcher(
+            lambda: importlib.import_module("cerebras.cloud.sdk.resources.chat"),
+            "AsyncCompletionsResource.create",
+            create_wrapper_async(name="cerebras.chat.completions.create"),
+        ),
+    ]
+)