Merge branch 'master' into DOCS-1086

wandb · Jan 22, 2025 · 5062f03 · 5062f03
2 parents 07897e2 + 100d145
commit 5062f03
Show file tree

Hide file tree

Showing 224 changed files with 10,426 additions and 3,276 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -1,4 +1,5 @@
 * @wandb/weave-team
+/docs/ @wandb/docs-team @wandb/weave-team
 weave-js/src/common @wandb/fe-infra-reviewers
 weave-js/src/components @wandb/fe-infra-reviewers @wandb/weave-team
 weave-js/src/assets @wandb/fe-infra-reviewers @wandb/weave-team
diff --git a/.github/workflows/notify-wandb-core.yaml b/.github/workflows/notify-wandb-core.yaml
@@ -6,14 +6,38 @@ name: Notify wandb/core
 on:
   push:
     branches:
-      - '**'
+      - "**"
   workflow_dispatch:
 
+permissions:
+  packages: write
+
 jobs:
+  publish-package:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Configure npm for GitHub Packages
+        run: |
+          echo "//npm.pkg.github.com/:_authToken=${{ secrets.GITHUB_TOKEN }}" >> weave-js/.npmrc
+      - name: Publish package
+        run: |
+          cd weave-js
+          yarn install --frozen-lockfile
+          npm version 0.0.0-${{ github.sha }} --no-git-tag-version
+          yarn generate
+          cp package.json README.md .npmrc src/
+          cd src
+          if [ "${{ github.ref }}" = "refs/heads/master" ]; then
+            npm publish
+          else
+            npm publish --tag prerelease
+          fi
   check-which-tests-to-run:
     uses: ./.github/workflows/check-which-tests-to-run.yaml
   notify-wandb-core:
-    needs: check-which-tests-to-run
+    needs: [check-which-tests-to-run, publish-package]
     runs-on: ubuntu-latest
     steps:
       - name: Repository dispatch

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -83,9 +83,9 @@ jobs:
           WANDB_ENABLE_TEST_CONTAINER: true
           LOGGING_ENABLED: true
         ports:
-          - '8080:8080'
-          - '8083:8083'
-          - '9015:9015'
+          - "8080:8080"
+          - "8083:8083"
+          - "9015:9015"
         options: >-
           --health-cmd "wget -q -O /dev/null http://localhost:8080/healthz || exit 1"
           --health-interval=5s
@@ -165,7 +165,10 @@ jobs:
       - uses: actions/setup-node@v1
         if: steps.check_run.outputs.should_lint_and_compile == 'true'
         with:
-          node-version: '18.x'
+          node-version: "18.x"
+      - name: Configure npm for GitHub Packages
+        run: |
+          echo "//npm.pkg.github.com/:_authToken=${{ secrets.GITHUB_TOKEN }}" >> .npmrc
       - name: Run WeaveJS Lint and Compile
         if: steps.check_run.outputs.should_lint_and_compile == 'true'
         run: |
@@ -218,36 +221,37 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version-major: ['3']
+        python-version-major: ["3"]
         python-version-minor: [
-            '9',
-            '10',
-            '11',
-            '12',
-            '13',
+            "9",
+            "10",
+            "11",
+            "12",
+            "13",
             #
           ]
         nox-shard:
           [
-            'trace',
-            'trace_server',
-            'anthropic',
-            'cerebras',
-            'cohere',
-            'dspy',
-            'groq',
-            'google_ai_studio',
-            'instructor',
-            'langchain',
-            'litellm',
-            'llamaindex',
-            'mistral0',
-            'mistral1',
-            'notdiamond',
-            'openai',
-            'vertexai',
-            'scorers_tests',
-            'pandas-test',
+            "trace",
+            "trace_server",
+            "anthropic",
+            "bedrock",
+            "cerebras",
+            "cohere",
+            "dspy",
+            "groq",
+            "google_ai_studio",
+            "instructor",
+            "langchain",
+            "litellm",
+            "llamaindex",
+            "mistral0",
+            "mistral1",
+            "notdiamond",
+            "openai",
+            "vertexai",
+            "scorers_tests",
+            "pandas-test",
           ]
       fail-fast: false
     services:
@@ -261,9 +265,9 @@ jobs:
           WANDB_ENABLE_TEST_CONTAINER: true
           LOGGING_ENABLED: true
         ports:
-          - '8080:8080'
-          - '8083:8083'
-          - '9015:9015'
+          - "8080:8080"
+          - "8083:8083"
+          - "9015:9015"
         options: >-
           --health-cmd "wget -q -O /dev/null http://localhost:8080/healthz || exit 1"
           --health-interval=5s
@@ -272,13 +276,15 @@ jobs:
       weave_clickhouse:
         image: clickhouse/clickhouse-server
         ports:
-          - '8123:8123'
+          - "8123:8123"
         options: --health-cmd "wget -nv -O- 'http://localhost:8123/ping' || exit 1" --health-interval=5s --health-timeout=3s
     steps:
       - name: Checkout
         uses: actions/checkout@v3
       - name: Enable debug logging
         run: echo "ACTIONS_STEP_DEBUG=true" >> $GITHUB_ENV
+      - name: Install SQLite dev package
+        run: sudo apt update && sudo apt install -y libsqlite3-dev
       - name: Set up Python ${{ matrix.python-version-major }}.${{ matrix.python-version-minor }}
         uses: actions/setup-python@v5
         with:
@@ -305,6 +311,7 @@ jobs:
           WB_SERVER_HOST: http://wandbservice
           WF_CLICKHOUSE_HOST: weave_clickhouse
           WEAVE_SERVER_DISABLE_ECOSYSTEM: 1
+          WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
           GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}

diff --git a/Makefile b/Makefile
@@ -14,6 +14,9 @@ docs:
 build:
 	uv build
 
+prerelease-dry-run:
+	uv run ./weave/scripts/prerelease_dry_run.py
+
 prepare-release: docs build
 
 synchronize-base-object-schemas:

diff --git a/dev_docs/RELEASE.md b/dev_docs/RELEASE.md
@@ -4,7 +4,9 @@ This document outlines how to publish a new Weave release to our public [PyPI pa
 
 1. Verify the head of master is ready for release and announce merge freeze to the Weave team while the release is being published (Either ask an admin on the Weave repo to place a freeze on https://www.mergefreeze.com/ or use the mergefreeze Slack app if it is set up or just post in Slack)
 
-2. You should also run through this [sample notebook](https://colab.research.google.com/drive/1DmkLzhFCFC0OoN-ggBDoG1nejGw2jQZy#scrollTo=29hJrcJQA7jZ) remember to install from master. You can also just run the [quickstart](http://wandb.me/weave_colab).
+2. Manual Verifications:
+   - Run `make prerelease-dry-run` to verify that the dry run script works.
+   - You should also run through this [sample notebook](https://colab.research.google.com/drive/1DmkLzhFCFC0OoN-ggBDoG1nejGw2jQZy#scrollTo=29hJrcJQA7jZ) remember to install from master. You can also just run the [quickstart](http://wandb.me/weave_colab).
 
 3. To prepare a PATCH release, go to GitHub Actions and run the [bump-python-sdk-version](https://github.com/wandb/weave/actions/workflows/bump_version.yaml) workflow on master. This will:
 

diff --git a/docs/docs/guides/cookbooks/prod_dashboard.md b/docs/docs/guides/cookbooks/prod_dashboard.md
diff --git a/docs/docs/guides/cookbooks/summarization/.gitignore b/docs/docs/guides/cookbooks/summarization/.gitignore
diff --git a/docs/docs/guides/core-types/datasets.md b/docs/docs/guides/core-types/datasets.md
@@ -11,9 +11,9 @@ This guide will show you how to:
 - Download the latest version
 - Iterate over examples
 
-## Sample code
+## Quickstart
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     ```python
     import weave
@@ -68,3 +68,56 @@ This guide will show you how to:
 
   </TabItem>
 </Tabs>
+
+## Alternate constructors
+
+<Tabs groupId="programming-language" queryString>
+  <TabItem value="python" label="Python" default>
+  Datasets can also be constructed from common Weave objects like `Call`s, and popular python objects like `pandas.DataFrame`s.
+  <Tabs groupId="use-case">
+    <TabItem value="from-calls" label="From Calls">
+    This can be useful if you want to create an example from specific examples.
+
+    ```python
+    @weave.op
+    def model(task: str) -> str:
+        return f"Now working on {task}"
+
+    res1, call1 = model.call(task="fetch")
+    res2, call2 = model.call(task="parse")
+
+    dataset = Dataset.from_calls([call1, call2])
+    # Now you can use the dataset to evaluate the model, etc.
+    ```
+    </TabItem>
+
+    <TabItem value="from-pandas" label="From Pandas">
+    You can also freely convert between `Dataset`s and `pandas.DataFrame`s.
+
+    ```python
+    import pandas as pd
+
+    df = pd.DataFrame([
+        {'id': '0', 'sentence': "He no likes ice cream.", 'correction': "He doesn't like ice cream."},
+        {'id': '1', 'sentence': "She goed to the store.", 'correction': "She went to the store."},
+        {'id': '2', 'sentence': "They plays video games all day.", 'correction': "They play video games all day."}
+    ])
+    dataset = Dataset.from_pandas(df)
+    df2 = dataset.to_pandas()
+
+    assert df.equals(df2)
+    ```
+
+    </TabItem>
+
+  </Tabs>
+
+  </TabItem>
+  <TabItem value="typescript" label="TypeScript">
+
+```typescript
+This feature is not available in TypeScript yet.  Stay tuned!
+```
+
+  </TabItem>
+</Tabs>
diff --git a/docs/docs/guides/core-types/env-vars.md b/docs/docs/guides/core-types/env-vars.md
@@ -0,0 +1,28 @@
+# Environment variables
+
+Weave provides a set of environment variables to configure and optimize its behavior. You can set these variables in your shell or within scripts to control specific functionality.
+
+```bash
+# Example of setting environment variables in the shell
+WEAVE_PARALLELISM=10  # Controls the number of parallel workers
+WEAVE_PRINT_CALL_LINK=false  # Disables call link output
+```
+
+```python
+# Example of setting environment variables in Python
+import os
+
+os.environ["WEAVE_PARALLELISM"] = "10"
+os.environ["WEAVE_PRINT_CALL_LINK"] = "false"
+```
+
+## Environment variables reference 
+
+| Variable Name            | Description                                                     |
+|--------------------------|-----------------------------------------------------------------|
+| WEAVE_CAPTURE_CODE      | Disable code capture for `weave.op` if set to `false`.                                    |
+| WEAVE_DEBUG_HTTP        | If set to `1`, turns on HTTP request and response logging for debugging.  |
+| WEAVE_DISABLED          | If set to `true`, all tracing to Weave is disabled.      |
+| WEAVE_PARALLELISM       | In evaluations, the number of examples to evaluate in parallel. `1` runs examples sequentially. Default value is `20`.    |
+| WEAVE_PRINT_CALL_LINK   | If set to `false`, call URL printing is suppressed. Default value is `false`.                            |
+| WEAVE_TRACE_LANGCHAIN   | When set to `false`,  explicitly disable global tracing for LangChain.  |                                                              |
diff --git a/docs/docs/guides/core-types/media.md b/docs/docs/guides/core-types/media.md
@@ -9,7 +9,7 @@ Weave supports logging and displaying multiple first class media types. Log imag
 
 Logging type: `PIL.Image.Image`. Here is an example of logging an image with the OpenAI DALL-E API:
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
 
     ```python
@@ -83,7 +83,7 @@ This image will be logged to weave and automatically displayed in the UI. The fo
 
 Logging type: `wave.Wave_read`. Here is an example of logging an audio file using openai's speech generation API.
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
 
     ```python

diff --git a/docs/docs/guides/core-types/models.md b/docs/docs/guides/core-types/models.md
@@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
 
 # Models
 
-<Tabs groupId="programming-language">
+<Tabs groupId="programming-language" queryString>
   <TabItem value="python" label="Python" default>
     A `Model` is a combination of data (which can include configuration, trained model weights, or other information) and code that defines how the model operates. By structuring your code to be compatible with this API, you benefit from a structured way to version your application so you can more systematically keep track of your experiments.