Merge branch 'master' into feat/huggingface-inference

wandb · Nov 27, 2024 · 6138efd · 6138efd
2 parents 43c9c95 + dc7e949
commit 6138efd
Show file tree

Hide file tree

Showing 77 changed files with 2,296 additions and 467 deletions.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -240,6 +240,7 @@ jobs:
             'mistral1',
             'notdiamond',
             'openai',
+            'vertexai',
             'scorers_tests',
             'pandas-test',
             'huggingface',

diff --git a/docs/docs/guides/integrations/google-gemini.md b/docs/docs/guides/integrations/google-gemini.md
@@ -16,13 +16,28 @@ import os
 import google.generativeai as genai
 import weave
 
-weave.init(project_name="google_ai_studio-test")
+weave.init(project_name="google-ai-studio-test")
 
 genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
 model = genai.GenerativeModel("gemini-1.5-flash")
 response = model.generate_content("Write a story about an AI and magic")
 ```
 
+Weave will also automatically capture traces for [Vertex APIs](https://cloud.google.com/vertexai/docs). To start tracking, calling `weave.init(project_name="<YOUR-WANDB-PROJECT-NAME>")` and use the library as normal.
+
+```python
+import vertexai
+import weave
+from vertexai.generative_models import GenerativeModel
+
+weave.init(project_name="vertex-ai-test")
+vertexai.init(project="<YOUR-VERTEXAIPROJECT-NAME>", location="<YOUR-VERTEXAI-PROJECT-LOCATION>")
+model = GenerativeModel("gemini-1.5-flash-002")
+response = model.generate_content(
+    "What's a good name for a flower shop specialising in selling dried flower bouquets?"
+)
+```
+
 ## Track your own ops
 
 Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git.
@@ -97,11 +112,3 @@ Given a weave reference to any `weave.Model` object, you can spin up a fastapi s
 ```shell
 weave serve weave:///your_entity/project-name/YourModel:<hash>
 ```
-
-## Vertex API
-
-Full Weave support for the `Vertex AI SDK` python package is currently in development, however there is a way you can integrate Weave with the Vertex API. 
-
-Vertex API supports OpenAI SDK compatibility ([docs](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-gemini-using-openai-library)), and if this is a way you build your application, Weave will automatically track your LLM calls via our [OpenAI](/guides/integrations/openai) SDK integration.
-
-\* Please note that some features may not fully work as Vertex API doesn't implement the full OpenAI SDK capabilities.
diff --git a/docs/docs/guides/tools/playground.md b/docs/docs/guides/tools/playground.md
@@ -1,48 +1,159 @@
 # Playground
 
-Evaluating LLM prompts and responses is challenging. The Playground tool enables you to quickly iterate on prompts: editing, retrying, and deleting messages. The LLM Playground is currently in preview.
+> **The LLM Playground is currently in preview.**
+
+Evaluating LLM prompts and responses is challenging. The Weave Playground is designed to simplify the process of iterating on LLM prompts and responses, making it easier to experiment with different models and prompts. With features like prompt editing, message retrying, and model comparison, Playground helps you to quickly test and improve your LLM applications. Playground currently supports OpenAI, Anthropic, Gemini, and Groq.
+
+## Features
+
+- **Quick access:** Open the Playground from the W&B sidebar for a fresh session or from the Call page to test an existing project.
+- **Message controls:** Edit, retry, or delete messages directly within the chat.
+- **Flexible messaging:** Add new messages as either user or system inputs, and send them to the LLM.
+- **Customizable settings:** Configure your preferred LLM provider and adjust model settings.
+- **Multi-LLM support:** Switch between models, with team-level API key management.
+- **Compare models:** Compare how different models respond to prompts.
+
+Get started with the Playground to optimize your LLM interactions and streamline your prompt engineering process and LLM application development.
+
+- [Prerequisites](#prerequisites)
+   - [Add a provider API key](#add-a-provider-api-key)
+   - [Access the Playground](#access-the-playground)
+- [Select an LLM](#select-an-llm)
+- [Adjust LLM parameters](#adjust-llm-parameters)
+- [Add a function](#add-a-function) 
+- [Retry, edit, and delete messages](#retry-edit-and-delete-messages)
+- [Add a new message](#add-a-new-message)
+- [Compare LLMs](#compare-llms)
+
+## Prerequisites
+
+Before you can use Playground, you must [add an API key](#add-a-provider-api-key) for your preferred LLM provider(s), and [open the Playground UI](#access-the-playground). 
+
+### Add a provider API key 
+
+Playground currently supports OpenAI, Anthropic, Gemini, and Groq models.
+To use one of the available LLMs, your W&B admin must add the appropriate API key to your team secrets in W&B settings.
+
+- OpenAI: `OPENAI_API_KEY`
+- Anthropic: `ANTHROPIC_API_KEY`
+- Gemini: `GOOGLE_API_KEY`
+- Groq: `GEMMA_API_KEY`
+
+### Access the Playground
 
 There are two ways to access the Playground:
 
-1. From the sidebar, click **Playground**. This will open a fresh Playground page with a simple system prompt.
-2. From the Call page, click the **Open chat in playground** button from the call page's chat view.
+1. *Open a fresh Playground page with a simple system prompt*: In the sidebar, select **Playground**. Playground opens in the same tab.
+2. *Open Playground for a specific call*: 
+    1. In the sidebar, select the **Traces** tab. A list of traces displays.
+    2. In the list of traces, click the name of the call that you want to view. The call's details page opens.
+    3. Click **Open chat in playground**. Playground opens in a new tab.
 
 ![Screenshot of Open in Playground button](imgs/open_chat_in_playground.png)
 
-## Retry, edit, and delete messages
+## Select an LLM
+
+You can switch the LLM using the dropdown menu in the top left. Currently, the available models are:
+
+- gpt-40-mini
+- claude-3-5-sonnet-20240620
+- claude-3-5-sonnet-20241022
+- claude-3-haiku-20240307
+- claude-3-opus-20240229
+- claude-3-sonnet-20240229
+- gemini/gemini-1.5-flash-001
+- gemini/gemini-1.5-flash-002
+- gemini/gemini-1.5-flash-8b-exp-0827
+- gemini/gemini-1.5-flash-8b-exp-0924
+- gemini/gemini-1.5-flash-exp-0827
+- gemini/gemini-1.5-flash-latest
+- gemini/gemini-1.5-flash
+- gemini/gemini-1.5-pro-001
+- gemini/gemini-1.5-pro-002
+- gemini/gemini-1.5-pro-exp-0801
+- gemini/gemini-1.5-pro-exp-0827
+- gemini/gemini-1.5-pro-latest
+- gemini/gemini-1.5-pro
+- gemini/gemini-pro
+- gpt-3.5-turbo-0125
+- gpt-3.5-turbo-1106
+- gpt-3.5-turbo-16k
+- gpt-3.5-turbo
+- gpt-4-0125-preview
+- gpt-4-0314
+- gpt-4-0613
+- gpt-4-1106-preview
+- gpt-4-32k-0314
+- gpt-4-turbo-2024-04-09
+- gpt-4-turbo-preview
+- gpt-4-turbo
+- gpt-4
+- gpt-40-2024-05-13
+- gpt-40-2024-08-06
+- gpt-40-mini-2024-07-18
+- gpt-4o
+- groq/gemma-7b-it
+- groq/gemma2-9b-it
+- groq/llama-3.1-70b-versatile
+- groq/llama-3.1-8b-instant
+- groq/llama3-70b-8192
+- groq/llama3-8b-8192
+- groq/llama3-groq-70b-8192-tool-use-preview
+- groq/llama3-groq-8b-8192-tool-use-preview
+- groq/mixtral-8x7b-32768
+- o1-mini-2024-09-12
+- o1-mini
+- o1-preview-2024-09-12
+- o1-preview
+
+## Adjust LLM parameters
+
+You can experiment with different parameter values for your selected model. To adjust parameters, do the following:
+
+1. In the upper right corner of the Playground UI, click **Chat settings** to open the parameter settings dropdown.
+2. In the dropdown, adjust parameters as desired. You can also toggle Weave call tracking on or off, and [add a function](#add-a-function).
+3. Click **Chat settings** to close the dropdown and save your changes. 
 
-Once in the Playground, you can see the chat history.
-When hovering over a message, you will see three buttons: **Edit**, **Retry**, and **Delete**.
+![Screenshot of Playground settings](imgs/playground_settings.png)
 
-![Screenshot of Playground message buttons](imgs/playground_message_buttons.png)
+## Add a function
 
-1. **Retry**: Deletes all subsequent messages and retries the chat from the selected message.
-2. **Delete**: Removes the message from the chat.
-3. **Edit**: Allows you to modify the message content.
+You can test how different models use functions based on input it receives from the user. To add a function for testing in Playground, do the following:
 
-![Screenshot of Playground editing](imgs/playground_message_editor.png)
+1. In the upper right corner of the Playground UI, click **Chat settings** to open the parameter settings dropdown.
+2. In the dropdown, click **+ Add function**.
+3. In the pop-up, add your function information.
+4. To save your changes and close the function pop-up, click the **x** in the upper right corner.
+3. Click **Chat settings** to close the settings dropdown and save your changes.
 
-## Adding new messages
+## Retry, edit, and delete messages
 
-To add a new message to the chat without sending it to the LLM, select the role (e.g., **User**) and click **Add**.
-To send a new message to the LLM, click the **Send** button or press **Command + Enter**.
+With Playground, you can retry, edit, and delete messages. To use this feature,  hover over the message you want to edit, retry, or delete. Three buttons display: **Delete**, **Edit**, and **Retry**.
 
-![Screenshot of Playground sending a message](imgs/playground_chat_input.png)
+- **Delete**: Remove the message from the chat.
+- **Edit**: Modify the message content.
+- **Retry**: Delete all subsequent messages and retry the chat from the selected message.
 
-## Configuring the LLM
+![Screenshot of Playground message buttons](imgs/playground_message_buttons.png)
+![Screenshot of Playground editing](imgs/playground_message_editor.png)
 
-We currently support 4 LLM providers.
-To use each LLM, your team admin needs to add the relevant API key to your team's settings (found at **wandb.ai/[team-name]/settings**):
+## Add a new message
 
-- OpenAI: `OPENAI_API_KEY`
-- Anthropic: `ANTHROPIC_API_KEY`
-- Gemini: `GOOGLE_API_KEY`
-- Groq: `GEMMA_API_KEY`
+To add a new message to the chat, do the following:
 
-### Choosing the LLM and its settings
+1. In the chat box, select one of the available roles (**Assistant** or **User**)
+2. Click **+ Add**.
+3. To send a new message to the LLM, click the **Send** button. Alternatively, press the **Command** and **Enter** keys.
 
-Click the **Settings** button to open the settings drawer.
+![Screenshot of Playground sending a message](imgs/playground_chat_input.png)
 
-![Screenshot of Playground settings](imgs/playground_settings.png)
+## Compare LLMs
+
+Playground allows you to compare LLMs. To perform a comparison, do the following:
 
-You can also switch the LLM using the dropdown menu in the top left.
+1. In the Playground UI, click **Compare**. A second chat opens next to the original chat.
+2. In the second chat, you can:
+   - [Select the LLM to compare](#select-an-llm)
+   - [Adjust parameters](#adjust-llm-parameters)
+   - [Add functions](#add-a-function)
+3. In the message box, enter a message that you want to test with both models and press **Send**.
diff --git a/docs/docs/guides/tracking/feedback.md b/docs/docs/guides/tracking/feedback.md
@@ -180,3 +180,101 @@ Additionally, you can use call() method to execute the operation and retrieve th
     ```
   </TabItem>
 </Tabs>
+
+## Human annotation
+
+Human annotations are supported in the weave UI after configuration of a Human Annotation scorer, which can be accessed via the `Scorers` page in the navigation sidebar. Once configured, annotations can be used when inspecting individual calls in the main Call or Evaluation table, by selecting the marker icon in the call header (seen below).
+
+![Marker icon in call header](./imgs/marker-icon.png)
+
+### Creating a Human Annotation scorer
+
+To create a scorer, click "create scorer" in the "Scorers" page (accessed via the navigation sidebar). Select the type of scorer, in this case: "Human annotation". Then fill out the subsequent form to configure the scorer, paying special attention to the `Type`, which will be used to determine the type of feedback that will be collected. Here is an example scorer configuration where a human labeler is asked to choose which type of document the llm used:
+
+![Human Annotation scorer form](./imgs/human-annotation-scorer-form.png)
+
+This scorer will automatically show up in the "Feedback" sidebar with the options provided.
+
+![Human Annotation scorer feedback sidebar](./imgs/full-feedback-sidebar.png)
+
+Once labeled, the feedback can also be viewed in the calls table (refreshing the table may be required). The column can be ordered and filtered.
+
+![Human Annotation scorer feedback in calls table](./imgs/feedback-in-the-table.png)
+
+### Through the API
+
+Human annotation scorers can also be configured through the API. Each scorer is its own object, which is created and updated independently. The following example creates two scorers, one for the temperature of the llm call, and one for the tone of the response. Simply import the `AnnotationSpec` class from `weave.flow.annotation_spec` and use the `save` method on the weave client to create the scorer.
+
+<Tabs groupId="programming-language">
+  <TabItem value="python" label="Python" default>
+    ```python
+    import weave
+    from weave.flow.annotation_spec import AnnotationSpec
+
+    api = weave.init("feedback-example")
+
+    spec1 = AnnotationSpec(
+      name="Temperature",
+      description="The perceived temperature of the llm call",
+      field_schema={
+        "type": "number",
+        "minimum": -1,
+        "maximum": 1,
+      }
+    )
+    spec2 = AnnotationSpec(
+      name="Tone",
+      description="The tone of the llm response",
+      field_schema={
+        "type": "string",
+        "enum": ["Aggressive", "Neutral", "Polite", "N/A"],
+      },
+    )
+    api.save(spec1, "temperature-scorer")
+    api.save(spec2, "tone-scorer")
+    ```
+
+  </TabItem>
+  <TabItem value="typescript" label="TypeScript">
+    ```plaintext
+    This feature is not available in TypeScript yet.  Stay tuned!
+    ```
+  </TabItem>
+</Tabs>
+
+### Modifying a Human Annotation scorer
+
+Building on the previous example, the following code creates a new version of the temperature scorer, by using the same object-id when saving.
+
+<Tabs groupId="programming-language">
+  <TabItem value="python" label="Python" default>
+    ```python
+    import weave
+    from weave.flow.annotation_spec import AnnotationSpec
+
+    api = weave.init("feedback-example")
+
+    # create a new version of the scorer
+    spec1 = AnnotationSpec(
+      name="Temperature",
+      description="The perceived temperature of the llm call",
+      field_schema={
+        "type": "integer",  # <<- change type to integer
+        "minimum": -1,
+        "maximum": 1,
+      }
+    )
+    api.save(spec1, "temperature-scorer")
+    ```
+
+  </TabItem>
+  <TabItem value="typescript" label="TypeScript">
+    ```plaintext
+    This feature is not available in TypeScript yet.  Stay tuned!
+    ```
+  </TabItem>
+</Tabs>
+
+The result is an updated object, with a history of all versions. This can be viewed in the scorers tab, under "Human annotations".
+
+![Human Annotation scorer history](./imgs/human-annotation-scorer-history.png)
diff --git a/docs/docs/guides/tracking/imgs/feedback-in-the-table.png b/docs/docs/guides/tracking/imgs/feedback-in-the-table.png
diff --git a/docs/docs/guides/tracking/imgs/full-feedback-sidebar.png b/docs/docs/guides/tracking/imgs/full-feedback-sidebar.png
diff --git a/docs/docs/guides/tracking/imgs/human-annotation-scorer-form.png b/docs/docs/guides/tracking/imgs/human-annotation-scorer-form.png
diff --git a/docs/docs/guides/tracking/imgs/human-annotation-scorer-history.png b/docs/docs/guides/tracking/imgs/human-annotation-scorer-history.png
diff --git a/docs/docs/guides/tracking/imgs/marker-icon.png b/docs/docs/guides/tracking/imgs/marker-icon.png
diff --git a/noxfile.py b/noxfile.py
@@ -47,6 +47,7 @@ def lint(session):
         "mistral1",
         "notdiamond",
         "openai",
+        "vertexai",
         "scorers_tests",
         "pandas-test",
         "huggingface",

diff --git a/pyproject.toml b/pyproject.toml
@@ -34,7 +34,7 @@ classifiers = [
 requires-python = ">=3.9"
 dynamic = ["version"]
 dependencies = [
-  "pydantic>=2.0.0,<2.10.0", # Pinning to resolve issues caused in 2.10.0 release
+  "pydantic>=2.0.0",
   "wandb>=0.17.1",
   "packaging>=21.0",         # For version parsing in integrations
   "tenacity>=8.3.0,!=8.4.0", # Excluding 8.4.0 because it had a bug on import of AsyncRetrying
@@ -81,6 +81,7 @@ openai = ["openai>=1.0.0"]
 pandas-test = ["pandas>=2.2.3"]
 modal = ["modal", "python-dotenv"]
 huggingface = ["huggingface-hub>=0.26.2"]
+vertexai = ["vertexai>=1.70.0"]
 test = [
   "nox",
   "pytest>=8.2.0",

diff --git a/sdks/node/src/__tests__/login.test.ts b/sdks/node/src/__tests__/login.test.ts
@@ -43,7 +43,7 @@ describe('login', () => {
     });
     expect(mockSave).toHaveBeenCalled();
     expect(console.log).toHaveBeenCalledWith(
-      'Successfully logged in.  Credentials saved for api.wandb.ai'
+      'Successfully logged in. Credentials saved for api.wandb.ai'
     );
   });
 

diff --git a/sdks/node/src/clientApi.ts b/sdks/node/src/clientApi.ts
@@ -24,6 +24,9 @@ export async function login(apiKey: string, host?: string) {
     console.warn('No host provided, using default host:', defaultHost);
     host = defaultHost;
   }
+  if (!apiKey) {
+    throw new Error('API key is required for login. Please provide a valid API key.');
+  }
   const {traceBaseUrl} = getUrls(host);
 
   // Test the connection to the traceServerApi
@@ -45,9 +48,12 @@ export async function login(apiKey: string, host?: string) {
   }
 
   const netrc = new Netrc();
-  netrc.setEntry({machine: host, login: 'user', password: apiKey});
-  netrc.save();
-  console.log(`Successfully logged in.  Credentials saved for ${host}`);
+  // Only save to netrc if host and a non-empty apiKey are provided
+  if (host && apiKey.trim()) {
+    netrc.setEntry({machine: host, login: 'user', password: apiKey});
+    netrc.save();
+    console.log(`Successfully logged in. Credentials saved for ${host}`);
+  }
 }
 
 /**