Merge branch 'master' into IvanKirchev/DocsFix

wandb · Jan 14, 2025 · 0bf7389 · 0bf7389
2 parents 0a9f18e + 1701e88
commit 0bf7389
Show file tree

Hide file tree

Showing 144 changed files with 7,885 additions and 3,180 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -1,4 +1,5 @@
 * @wandb/weave-team
+/docs/ @wandb/docs-team @wandb/weave-team
 weave-js/src/common @wandb/fe-infra-reviewers
 weave-js/src/components @wandb/fe-infra-reviewers @wandb/weave-team
 weave-js/src/assets @wandb/fe-infra-reviewers @wandb/weave-team
diff --git a/.github/workflows/notify-wandb-core.yaml b/.github/workflows/notify-wandb-core.yaml
@@ -6,14 +6,38 @@ name: Notify wandb/core
 on:
   push:
     branches:
-      - '**'
+      - "**"
   workflow_dispatch:
 
+permissions:
+  packages: write
+
 jobs:
+  publish-package:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Configure npm for GitHub Packages
+        run: |
+          echo "//npm.pkg.github.com/:_authToken=${{ secrets.GITHUB_TOKEN }}" >> weave-js/.npmrc
+      - name: Publish package
+        run: |
+          cd weave-js
+          yarn install --frozen-lockfile
+          npm version 0.0.0-${{ github.sha }} --no-git-tag-version
+          yarn generate
+          cp package.json README.md .npmrc src/
+          cd src
+          if [ "${{ github.ref }}" = "refs/heads/master" ]; then
+            npm publish
+          else
+            npm publish --tag prerelease
+          fi
   check-which-tests-to-run:
     uses: ./.github/workflows/check-which-tests-to-run.yaml
   notify-wandb-core:
-    needs: check-which-tests-to-run
+    needs: [check-which-tests-to-run, publish-package]
     runs-on: ubuntu-latest
     steps:
       - name: Repository dispatch

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -83,9 +83,9 @@ jobs:
           WANDB_ENABLE_TEST_CONTAINER: true
           LOGGING_ENABLED: true
         ports:
-          - '8080:8080'
-          - '8083:8083'
-          - '9015:9015'
+          - "8080:8080"
+          - "8083:8083"
+          - "9015:9015"
         options: >-
           --health-cmd "wget -q -O /dev/null http://localhost:8080/healthz || exit 1"
           --health-interval=5s
@@ -165,7 +165,10 @@ jobs:
       - uses: actions/setup-node@v1
         if: steps.check_run.outputs.should_lint_and_compile == 'true'
         with:
-          node-version: '18.x'
+          node-version: "18.x"
+      - name: Configure npm for GitHub Packages
+        run: |
+          echo "//npm.pkg.github.com/:_authToken=${{ secrets.GITHUB_TOKEN }}" >> .npmrc
       - name: Run WeaveJS Lint and Compile
         if: steps.check_run.outputs.should_lint_and_compile == 'true'
         run: |
@@ -218,36 +221,36 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version-major: ['3']
+        python-version-major: ["3"]
         python-version-minor: [
-            '9',
-            '10',
-            '11',
-            '12',
-            '13',
+            "9",
+            "10",
+            "11",
+            "12",
+            "13",
             #
           ]
         nox-shard:
           [
-            'trace',
-            'trace_server',
-            'anthropic',
-            'cerebras',
-            'cohere',
-            'dspy',
-            'groq',
-            'google_ai_studio',
-            'instructor',
-            'langchain',
-            'litellm',
-            'llamaindex',
-            'mistral0',
-            'mistral1',
-            'notdiamond',
-            'openai',
-            'vertexai',
-            'scorers_tests',
-            'pandas-test',
+            "trace",
+            "trace_server",
+            "anthropic",
+            "cerebras",
+            "cohere",
+            "dspy",
+            "groq",
+            "google_ai_studio",
+            "instructor",
+            "langchain",
+            "litellm",
+            "llamaindex",
+            "mistral0",
+            "mistral1",
+            "notdiamond",
+            "openai",
+            "vertexai",
+            "scorers_tests",
+            "pandas-test",
           ]
       fail-fast: false
     services:
@@ -261,9 +264,9 @@ jobs:
           WANDB_ENABLE_TEST_CONTAINER: true
           LOGGING_ENABLED: true
         ports:
-          - '8080:8080'
-          - '8083:8083'
-          - '9015:9015'
+          - "8080:8080"
+          - "8083:8083"
+          - "9015:9015"
         options: >-
           --health-cmd "wget -q -O /dev/null http://localhost:8080/healthz || exit 1"
           --health-interval=5s
@@ -272,7 +275,7 @@ jobs:
       weave_clickhouse:
         image: clickhouse/clickhouse-server
         ports:
-          - '8123:8123'
+          - "8123:8123"
         options: --health-cmd "wget -nv -O- 'http://localhost:8123/ping' || exit 1" --health-interval=5s --health-timeout=3s
     steps:
       - name: Checkout

diff --git a/docs/docs/guides/cookbooks/prod_dashboard.md b/docs/docs/guides/cookbooks/prod_dashboard.md
diff --git a/docs/docs/guides/cookbooks/summarization/.gitignore b/docs/docs/guides/cookbooks/summarization/.gitignore
diff --git a/docs/docs/guides/core-types/datasets.md b/docs/docs/guides/core-types/datasets.md
@@ -13,7 +13,7 @@ This guide will show you how to:
 
 ## Sample code
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     ```python
     import weave

diff --git a/docs/docs/guides/core-types/env-vars.md b/docs/docs/guides/core-types/env-vars.md
@@ -0,0 +1,28 @@
+# Environment variables
+
+Weave provides a set of environment variables to configure and optimize its behavior. You can set these variables in your shell or within scripts to control specific functionality.
+
+```bash
+# Example of setting environment variables in the shell
+WEAVE_PARALLELISM=10  # Controls the number of parallel workers
+WEAVE_PRINT_CALL_LINK=false  # Disables call link output
+```
+
+```python
+# Example of setting environment variables in Python
+import os
+
+os.environ["WEAVE_PARALLELISM"] = "10"
+os.environ["WEAVE_PRINT_CALL_LINK"] = "false"
+```
+
+## Environment variables reference 
+
+| Variable Name            | Description                                                     |
+|--------------------------|-----------------------------------------------------------------|
+| WEAVE_CAPTURE_CODE      | Disable code capture for `weave.op` if set to `false`.                                    |
+| WEAVE_DEBUG_HTTP        | If set to `1`, turns on HTTP request and response logging for debugging.  |
+| WEAVE_DISABLED          | If set to `true`, all tracing to Weave is disabled.      |
+| WEAVE_PARALLELISM       | In evaluations, the number of examples to evaluate in parallel. `1` runs examples sequentially. Default value is `20`.    |
+| WEAVE_PRINT_CALL_LINK   | If set to `false`, call URL printing is suppressed. Default value is `false`.                            |
+| WEAVE_TRACE_LANGCHAIN   | When set to `false`,  explicitly disable global tracing for LangChain.  |                                                              |
diff --git a/docs/docs/guides/core-types/media.md b/docs/docs/guides/core-types/media.md
@@ -9,7 +9,7 @@ Weave supports logging and displaying multiple first class media types. Log imag
 
 Logging type: `PIL.Image.Image`. Here is an example of logging an image with the OpenAI DALL-E API:
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
 
     ```python
@@ -83,7 +83,7 @@ This image will be logged to weave and automatically displayed in the UI. The fo
 
 Logging type: `wave.Wave_read`. Here is an example of logging an audio file using openai's speech generation API.
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
 
     ```python

diff --git a/docs/docs/guides/core-types/models.md b/docs/docs/guides/core-types/models.md
@@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
 
 # Models
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     A `Model` is a combination of data (which can include configuration, trained model weights, or other information) and code that defines how the model operates. By structuring your code to be compatible with this API, you benefit from a structured way to version your application so you can more systematically keep track of your experiments.
 

diff --git a/docs/docs/guides/evaluation/scorers.md b/docs/docs/guides/evaluation/scorers.md
@@ -7,7 +7,7 @@ import TabItem from '@theme/TabItem';
 
 In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics. They take the AI's output, analyze it, and return a dictionary of results. Scorers can use your input data as reference if needed and can also output extra information, such as explanations or reasonings from the evaluation.
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     Scorers are passed to a `weave.Evaluation` object during evaluation. There are two types of Scorers in weave:
 
@@ -26,7 +26,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics.
 
 ### Function-based Scorers
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     These are functions decorated with `@weave.op` that return a dictionary. They're great for simple evaluations like:
 
@@ -68,7 +68,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics.
 
 ### Class-based Scorers
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     For more advanced evaluations, especially when you need to keep track of additional scorer metadata, try different prompts for your LLM-evaluators, or make multiple function calls, you can use the `Scorer` class.
 
@@ -139,7 +139,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics.
 
 ### Scorer Keyword Arguments
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     Scorers can access both the output from your AI system and the input data from the dataset row.
 
@@ -256,7 +256,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics.
 
 ### Final summarization of the scorer
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     During evaluation, the scorer will be computed for each row of your dataset. To provide a final score for the evaluation we provide an `auto_summarize` depending on the returning type of the output.
     - Averages are computed for numerical columns
@@ -305,7 +305,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics.
 
 ## Predefined Scorers
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     **Installation**
 

diff --git a/docs/docs/guides/integrations/azure.md b/docs/docs/guides/integrations/azure.md
@@ -0,0 +1,26 @@
+# Microsoft Azure
+
+Weights & Biases integrates with Microsoft Azure OpenAI services, helping teams to manage, debug, and optimize their Azure AI workflows at scale. This guide introduces the W&B integration, what it means for Weave users, its key features, and how to get started.
+
+## Key features
+
+- **LLM evaluations**: Evaluate and monitor LLM-powered applications using Weave, optimized for Azure infrastructure.  
+- **Seamless integration**: Deploy W&B Models on a dedicated Azure tenant with built-in integrations for Azure AI Studio, Azure ML, Azure OpenAI Service, and other Azure AI services.  
+- **Enhanced performance**: Use Azure’s infrastructure to train and deploy models faster, with auto-scaling clusters and optimized resources.  
+- **Scalable experiment tracking**: Automatically log hyperparameters, metrics, and artifacts for Azure AI Studio and Azure ML runs.  
+- **LLM fine-tuning**: Fine-tune models with W&B Models.
+- **Central repository for models and datasets**: Manage and version models and datasets with W&B Registry and Azure AI Studio.  
+- **Collaborative workspaces**: Support teamwork with shared workspaces, experiment commenting, and Microsoft Teams integration.  
+- **Governance framework**: Ensure security with fine-grained access controls, audit trails, and Microsoft Entra ID integration.  
+
+## Getting started
+
+To use W&B with Azure, add the W&B integration via the [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/weightsandbiasesinc1641502883483.weights_biases_for_azure?tab=Overview).
+
+For a detailed guide describing how to integrate Azure OpenAI fine-tuning with W&B, see [Integrating Weights & Biases with Azure AI Services](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/weights-and-biases-integration).
+
+## Learn more
+
+- [Weights & Biases + Microsoft Azure Overview](https://wandb.ai/site/partners/azure)
+- [How W&B and Microsoft Azure Are Empowering Enterprises](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/how-weights--biases-and-microsoft-azure-are-empowering-enterprises-to-fine-tune-/4303716)
+- [Microsoft Azure OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/)
diff --git a/docs/docs/guides/integrations/index.md b/docs/docs/guides/integrations/index.md
@@ -15,6 +15,7 @@ LLM providers are the vendors that offer access to large language models for gen
 - **[Cerebras](/guides/integrations/cerebras)**
 - **[Cohere](/guides/integrations/cohere)**
 - **[MistralAI](/guides/integrations/mistral)**
+- **[Microsoft Azure](/guides/integrations/azure)**
 - **[Google Gemini](/guides/integrations/google-gemini)**
 - **[Together AI](/guides/integrations/together_ai)**
 - **[Groq](/guides/integrations/groq)**

diff --git a/docs/docs/guides/integrations/nvidia_nim.md b/docs/docs/guides/integrations/nvidia_nim.md
@@ -9,7 +9,7 @@ Weave automatically tracks and logs LLM calls made via the [ChatNVIDIA](https://
 
 It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application.
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     Weave can automatically capture traces for the [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/).
 
@@ -43,7 +43,7 @@ It’s important to store traces of LLM applications in a central database, both
 
 ## Track your own ops
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
 Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git.
 
@@ -119,7 +119,7 @@ Navigate to Weave and you can click `get_pokemon_data` in the UI to see the inpu
 
 ## Create a `Model` for easier experimentation
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app.
 

diff --git a/docs/docs/guides/integrations/openai.md b/docs/docs/guides/integrations/openai.md
@@ -7,7 +7,7 @@ import TabItem from '@theme/TabItem';
 
 It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application.
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     Weave can automatically capture traces for the [openai python library](https://platform.openai.com/docs/libraries/python-library).
 
@@ -79,7 +79,7 @@ It’s important to store traces of LLM applications in a central database, both
 
 ## Track your own ops
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
 Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git.
 
@@ -249,7 +249,7 @@ Wrapping a function with `weave.op` starts capturing inputs, outputs and app log
 
 ## Create a `Model` for easier experimentation
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app.