);
case 'enum':
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ScorersPage/AnnotationScorerForm.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ScorersPage/AnnotationScorerForm.tsx
index 3b14910b12b3..bbdca3aea5a3 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ScorersPage/AnnotationScorerForm.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ScorersPage/AnnotationScorerForm.tsx
@@ -28,7 +28,7 @@ const AnnotationScorerFormSchema = z.object({
}),
z.object({
type: z.literal('string'),
- max_length: z
+ maxLength: z
.number()
.optional()
.describe('Optional maximum length of the string'),
@@ -95,7 +95,7 @@ export const onAnnotationScorerSave = async (
val: {
name: data.Name,
description: data.Description,
- json_schema: {
+ field_schema: {
...data.Type,
type,
},
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts
index 0b431394f6b4..da218f47a85c 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts
@@ -17,7 +17,7 @@ export type Config = z.infer
;
export const AnnotationSpecSchema = z.object({
description: z.union([z.null(), z.string()]).optional(),
- json_schema: z.record(z.string(), z.any()).optional(),
+ field_schema: z.record(z.string(), z.any()).optional(),
name: z.union([z.null(), z.string()]).optional(),
op_scope: z.union([z.array(z.string()), z.null()]).optional(),
unique_among_creators: z.boolean().optional(),
diff --git a/weave/trace_server/clickhouse_trace_server_batched.py b/weave/trace_server/clickhouse_trace_server_batched.py
index f28e204aa41d..67bce362890c 100644
--- a/weave/trace_server/clickhouse_trace_server_batched.py
+++ b/weave/trace_server/clickhouse_trace_server_batched.py
@@ -1330,7 +1330,7 @@ def cost_purge(self, req: tsi.CostPurgeReq) -> tsi.CostPurgeRes:
def feedback_create(self, req: tsi.FeedbackCreateReq) -> tsi.FeedbackCreateRes:
assert_non_null_wb_user_id(req)
- validate_feedback_create_req(req)
+ validate_feedback_create_req(req, self)
# Augment emoji with alias.
res_payload = {}
diff --git a/weave/trace_server/feedback.py b/weave/trace_server/feedback.py
index 7efedcd8b8c1..33d7c9c31cbf 100644
--- a/weave/trace_server/feedback.py
+++ b/weave/trace_server/feedback.py
@@ -3,6 +3,9 @@
from weave.trace_server import refs_internal as ri
from weave.trace_server import trace_server_interface as tsi
from weave.trace_server.errors import InvalidRequest
+from weave.trace_server.interface.base_object_classes.annotation_spec import (
+ AnnotationSpec,
+)
from weave.trace_server.interface.feedback_types import (
ANNOTATION_FEEDBACK_TYPE_PREFIX,
FEEDBACK_PAYLOAD_SCHEMAS,
@@ -38,7 +41,9 @@
)
-def validate_feedback_create_req(req: tsi.FeedbackCreateReq) -> None:
+def validate_feedback_create_req(
+ req: tsi.FeedbackCreateReq, trace_server: tsi.TraceServerInterface
+) -> None:
payload_schema = FEEDBACK_PAYLOAD_SCHEMAS.get(req.feedback_type)
if payload_schema:
try:
@@ -104,7 +109,25 @@ def validate_feedback_create_req(req: tsi.FeedbackCreateReq) -> None:
# Validate the ref formats (we could even query the DB to ensure they exist and are valid)
if req.annotation_ref:
- ensure_ref_is_valid(req.annotation_ref, (ri.InternalObjectRef,))
+ parsed = ensure_ref_is_valid(req.annotation_ref, (ri.InternalObjectRef,))
+ if parsed.project_id != req.project_id:
+ raise InvalidRequest(
+ f"Annotation ref {req.annotation_ref} does not match project id {req.project_id}"
+ )
+
+ # 2. Read the annotation spec
+ data = trace_server.refs_read_batch(
+ tsi.RefsReadBatchReq(refs=[req.annotation_ref])
+ )
+ if len(data.vals) == 0:
+ raise InvalidRequest(f"Annotation ref {req.annotation_ref} not found")
+
+ # 3. Validate the payload against the annotation spec
+ value = req.payload["value"]
+ spec = data.vals[0]
+ is_valid = AnnotationSpec.model_validate(spec).value_is_valid(value)
+ if not is_valid:
+ raise InvalidRequest("Feedback payload does not match annotation spec")
if req.runnable_ref:
ensure_ref_is_valid(req.runnable_ref, (ri.InternalOpRef, ri.InternalObjectRef))
if req.call_ref:
diff --git a/weave/trace_server/interface/base_object_classes/annotation_spec.py b/weave/trace_server/interface/base_object_classes/annotation_spec.py
index a5dc3e1c1695..a28e06d512c8 100644
--- a/weave/trace_server/interface/base_object_classes/annotation_spec.py
+++ b/weave/trace_server/interface/base_object_classes/annotation_spec.py
@@ -1,18 +1,21 @@
-from typing import Optional
+from typing import Any, Optional
import jsonschema
-from pydantic import Field, field_validator
+from pydantic import BaseModel, Field, create_model, field_validator, model_validator
+from pydantic.fields import FieldInfo
from weave.trace_server.interface.base_object_classes import base_object_def
+SUPPORTED_PRIMITIVES = (int, float, bool, str)
+
class AnnotationSpec(base_object_def.BaseObject):
- json_schema: dict = Field(
+ field_schema: dict[str, Any] = Field(
default={},
- description="Expected to be valid JSON Schema",
+ description="Expected to be valid JSON Schema. Can be provided as a dict, a Pydantic model class, a tuple of a primitive type and a Pydantic Field, or primitive type",
examples=[
# String feedback
- {"type": "string", "max_length": 100},
+ {"type": "string", "maxLength": 100},
# Number feedback
{"type": "number", "minimum": 0, "maximum": 100},
# Integer feedback
@@ -41,12 +44,74 @@ class AnnotationSpec(base_object_def.BaseObject):
],
)
- @field_validator("json_schema")
- def validate_json_schema(cls, v: dict) -> dict:
+ @model_validator(mode="before")
+ @classmethod
+ def preprocess_field_schema(cls, data: dict[str, Any]) -> dict[str, Any]:
+ if "field_schema" not in data:
+ return data
+
+ field_schema = data["field_schema"]
+
+ temp_field_tuple = None
+ # Handle Pydantic Field
+ if isinstance(field_schema, tuple):
+ if len(field_schema) != 2:
+ raise ValueError("Expected a tuple of length 2")
+ annotation, field = field_schema
+ if (
+ not isinstance(annotation, type)
+ ) or annotation not in SUPPORTED_PRIMITIVES:
+ raise TypeError("Expected annotation to be a primitive type")
+ if not isinstance(field, FieldInfo):
+ raise TypeError("Expected field to be a Pydantic Field")
+ temp_field_tuple = (annotation, field)
+ elif field_schema in SUPPORTED_PRIMITIVES:
+ temp_field_tuple = (field_schema, Field())
+
+ if temp_field_tuple is not None:
+ # Create a temporary model to leverage Pydantic's schema generation
+ TempModel = create_model("TempModel", field=temp_field_tuple)
+
+ schema = TempModel.model_json_schema()["properties"]["field"]
+
+ if (
+ "title" in schema and schema["title"] == "Field"
+ ): # default title for Field
+ schema.pop("title")
+
+ data["field_schema"] = schema
+ return data
+
+ # Handle Pydantic model
+ if isinstance(field_schema, type) and issubclass(field_schema, BaseModel):
+ data["field_schema"] = field_schema.model_json_schema() # type: ignore
+ return data
+
+ return data
+
+ @field_validator("field_schema")
+ def validate_field_schema(cls, schema: dict[str, Any]) -> dict[str, Any]:
+ # Validate the schema
try:
- jsonschema.validate(None, v)
+ jsonschema.validate(None, schema)
except jsonschema.exceptions.SchemaError as e:
raise e
except jsonschema.exceptions.ValidationError:
pass # we don't care that `None` does not conform
- return v
+ return schema
+
+ def value_is_valid(self, payload: Any) -> bool:
+ """
+ Validates a payload against this annotation spec's schema.
+
+ Args:
+ payload: The data to validate against the schema
+
+ Returns:
+ bool: True if validation succeeds, False otherwise
+ """
+ try:
+ jsonschema.validate(payload, self.field_schema)
+ except jsonschema.exceptions.ValidationError:
+ return False
+ return True
diff --git a/weave/trace_server/interface/base_object_classes/generated/generated_base_object_class_schemas.json b/weave/trace_server/interface/base_object_classes/generated/generated_base_object_class_schemas.json
index 0bcdcb589af6..a50c85688bd1 100644
--- a/weave/trace_server/interface/base_object_classes/generated/generated_base_object_class_schemas.json
+++ b/weave/trace_server/interface/base_object_classes/generated/generated_base_object_class_schemas.json
@@ -77,31 +77,36 @@
"default": null,
"title": "Description"
},
- "json_schema": {
+ "field_schema": {
"default": {},
- "description": "Expected to be valid JSON Schema",
+ "description": "Expected to be valid JSON Schema. Can be provided as a dict or a Pydantic model class",
"examples": [
{
"max_length": 100,
"type": "string"
},
{
- "max": 100,
- "min": 0,
+ "maximum": 100,
+ "minimum": 0,
"type": "number"
},
+ {
+ "maximum": 100,
+ "minimum": 0,
+ "type": "integer"
+ },
{
"type": "boolean"
},
{
- "options": [
+ "enum": [
"option1",
"option2"
],
- "type": "categorical"
+ "type": "string"
}
],
- "title": "Json Schema",
+ "title": "Field Schema",
"type": "object"
},
"unique_among_creators": {
diff --git a/weave/trace_server/sqlite_trace_server.py b/weave/trace_server/sqlite_trace_server.py
index 0ff9afa325dd..e18a06ce373c 100644
--- a/weave/trace_server/sqlite_trace_server.py
+++ b/weave/trace_server/sqlite_trace_server.py
@@ -974,7 +974,7 @@ def read_ref(r: ri.InternalObjectRef) -> Any:
def feedback_create(self, req: tsi.FeedbackCreateReq) -> tsi.FeedbackCreateRes:
assert_non_null_wb_user_id(req)
- validate_feedback_create_req(req)
+ validate_feedback_create_req(req, self)
# Augment emoji with alias.
res_payload = {}
From 1e4f7ed00ccc1ce3ddadf248185b99876523d317 Mon Sep 17 00:00:00 2001
From: Jamie Rasmussen <112953339+jamie-rasmussen@users.noreply.github.com>
Date: Mon, 25 Nov 2024 16:17:54 -0600
Subject: [PATCH 03/31] fix(ui): compare cells with boolean values appearing
empty (#3079)
---
.../Home/Browse3/compare/CompareGridCellValue.tsx | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/compare/CompareGridCellValue.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/compare/CompareGridCellValue.tsx
index 473f60cd8a13..654f2437ff69 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/compare/CompareGridCellValue.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/compare/CompareGridCellValue.tsx
@@ -66,5 +66,9 @@ export const CompareGridCellValue = ({
return ;
}
+ if (valueType === 'boolean') {
+ return {value.toString()};
+ }
+
return {value}
;
};
From f9dd53d1b955db333ae309e852c68eda141a9d17 Mon Sep 17 00:00:00 2001
From: Griffin Tarpenning
Date: Mon, 25 Nov 2024 14:33:00 -0800
Subject: [PATCH 04/31] chore(weave): enable server side object-id validation
(#3074)
---
tests/trace/test_client_trace.py | 8 ++---
weave/trace_server/base_object_class_util.py | 2 +-
.../clickhouse_trace_server_batched.py | 4 +--
weave/trace_server/sqlite_trace_server.py | 4 +--
weave/trace_server/validation.py | 30 ++++---------------
5 files changed, 14 insertions(+), 34 deletions(-)
diff --git a/tests/trace/test_client_trace.py b/tests/trace/test_client_trace.py
index b0048536f15e..5b45abc8432d 100644
--- a/tests/trace/test_client_trace.py
+++ b/tests/trace/test_client_trace.py
@@ -31,6 +31,7 @@
from weave.trace.weave_client import sanitize_object_name
from weave.trace_server import trace_server_interface as tsi
from weave.trace_server.clickhouse_trace_server_batched import ENTITY_TOO_LARGE_PAYLOAD
+from weave.trace_server.errors import InvalidFieldError
from weave.trace_server.ids import generate_id
from weave.trace_server.refs_internal import extra_value_quoter
from weave.trace_server.sqlite_trace_server import SqliteTraceServer
@@ -39,7 +40,6 @@
WILDCARD_ARTIFACT_VERSION_AND_PATH,
extract_refs_from_values,
)
-from weave.trace_server.validation import SHOULD_ENFORCE_OBJ_ID_CHARSET
## Hacky interface compatibility helpers
@@ -2671,10 +2671,8 @@ def test_object_with_disallowed_keys(client):
}
}
)
-
- if SHOULD_ENFORCE_OBJ_ID_CHARSET:
- with pytest.raises(Exception):
- client.server.obj_create(create_req)
+ with pytest.raises(InvalidFieldError):
+ client.server.obj_create(create_req)
CHAR_LIMIT = 128
diff --git a/weave/trace_server/base_object_class_util.py b/weave/trace_server/base_object_class_util.py
index 969c378f32f7..1c52f766c0c1 100644
--- a/weave/trace_server/base_object_class_util.py
+++ b/weave/trace_server/base_object_class_util.py
@@ -29,7 +29,7 @@ def get_base_object_class(val: Any) -> Optional[str]:
return None
-def process_incoming_object(
+def process_incoming_object_val(
val: Any, req_base_object_class: Optional[str] = None
) -> tuple[dict, Optional[str]]:
"""
diff --git a/weave/trace_server/clickhouse_trace_server_batched.py b/weave/trace_server/clickhouse_trace_server_batched.py
index 67bce362890c..7dd19ba02ff3 100644
--- a/weave/trace_server/clickhouse_trace_server_batched.py
+++ b/weave/trace_server/clickhouse_trace_server_batched.py
@@ -45,7 +45,7 @@
from weave.trace_server import refs_internal as ri
from weave.trace_server import trace_server_interface as tsi
from weave.trace_server.actions_worker.dispatcher import execute_batch
-from weave.trace_server.base_object_class_util import process_incoming_object
+from weave.trace_server.base_object_class_util import process_incoming_object_val
from weave.trace_server.calls_query_builder import (
CallsQuery,
HardCodedFilter,
@@ -578,7 +578,7 @@ def ops_query(self, req: tsi.OpQueryReq) -> tsi.OpQueryRes:
return tsi.OpQueryRes(op_objs=objs)
def obj_create(self, req: tsi.ObjCreateReq) -> tsi.ObjCreateRes:
- val, base_object_class = process_incoming_object(
+ val, base_object_class = process_incoming_object_val(
req.obj.val, req.obj.set_base_object_class
)
diff --git a/weave/trace_server/sqlite_trace_server.py b/weave/trace_server/sqlite_trace_server.py
index e18a06ce373c..e14cef8041e6 100644
--- a/weave/trace_server/sqlite_trace_server.py
+++ b/weave/trace_server/sqlite_trace_server.py
@@ -14,7 +14,7 @@
from weave.trace_server import refs_internal as ri
from weave.trace_server import trace_server_interface as tsi
-from weave.trace_server.base_object_class_util import process_incoming_object
+from weave.trace_server.base_object_class_util import process_incoming_object_val
from weave.trace_server.emoji_util import detone_emojis
from weave.trace_server.errors import InvalidRequest
from weave.trace_server.feedback import (
@@ -611,7 +611,7 @@ def ops_query(self, req: tsi.OpQueryReq) -> tsi.OpQueryRes:
def obj_create(self, req: tsi.ObjCreateReq) -> tsi.ObjCreateRes:
conn, cursor = get_conn_cursor(self.db_path)
- val, base_object_class = process_incoming_object(
+ val, base_object_class = process_incoming_object_val(
req.obj.val, req.obj.set_base_object_class
)
json_val = json.dumps(val)
diff --git a/weave/trace_server/validation.py b/weave/trace_server/validation.py
index 705ef7d435a9..9784778aa3b3 100644
--- a/weave/trace_server/validation.py
+++ b/weave/trace_server/validation.py
@@ -2,25 +2,8 @@
from typing import Any, Literal, Optional
from weave.trace_server import refs_internal, validation_util
-from weave.trace_server.constants import (
- MAX_DISPLAY_NAME_LENGTH,
- MAX_OBJECT_NAME_LENGTH,
- MAX_OP_NAME_LENGTH,
-)
-from weave.trace_server.errors import InvalidRequest
-
-# Temporary flag to disable database-side validation of object ids.
-# We want to enable this be default, but we need to wait until >95% of users
-# are on weave>=0.51.1, when we can enforce the charset check on the db
-# side.
-#
-# Actions:
-# 1. (ETA: Sept 30) - Verify that 95% of users are on weave>=0.51.1, or
-# that 95% of new objects have the valid charset.
-# 2. Remove this flag (thereby setting this to True), and add a check to the
-# server-side validation code to ensure that the charset is valid.
-# 3. Release and deploy backend.
-SHOULD_ENFORCE_OBJ_ID_CHARSET = False
+from weave.trace_server.constants import MAX_DISPLAY_NAME_LENGTH, MAX_OP_NAME_LENGTH
+from weave.trace_server.errors import InvalidFieldError, InvalidRequest
def project_id_validator(s: str) -> str:
@@ -80,18 +63,17 @@ def _validate_object_name_charset(name: str) -> None:
invalid_chars = re.findall(r"[^\w._-]", name)
if invalid_chars:
invalid_char_set = list(set(invalid_chars))
- raise ValueError(
+ raise InvalidFieldError(
f"Invalid object name: {name}. Contains invalid characters: {invalid_char_set}. Please upgrade your `weave` package to `>0.51.0` to prevent this error."
)
if not name:
- raise ValueError("Object name cannot be empty")
+ raise InvalidFieldError("Object name cannot be empty")
def object_id_validator(s: str) -> str:
- if SHOULD_ENFORCE_OBJ_ID_CHARSET:
- _validate_object_name_charset(s)
- return validation_util.require_max_str_len(s, MAX_OBJECT_NAME_LENGTH)
+ _validate_object_name_charset(s)
+ return validation_util.require_max_str_len(s, 128)
def refs_list_validator(s: list[str]) -> list[str]:
From 3fe9db7414c9da2896c03a4eebfdfff6c4804796 Mon Sep 17 00:00:00 2001
From: Griffin Tarpenning
Date: Mon, 25 Nov 2024 14:33:09 -0800
Subject: [PATCH 05/31] perf(ui): second cost query ignore all expensive
columns (#3077)
---
.../Browse3/pages/CallPage/cost/costUtils.ts | 17 ++++++++++++++++-
.../Browse3/pages/CallsPage/callsTableQuery.ts | 4 ++--
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/cost/costUtils.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/cost/costUtils.ts
index e97897a5b805..0151016bb276 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/cost/costUtils.ts
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/cost/costUtils.ts
@@ -102,7 +102,22 @@ export const addCostsToCallResults = (
return callResults.map(call => {
if (call.callId && costDict[call.callId]) {
- return {...call, ...costDict[call.callId]};
+ if (!call.traceCall) {
+ return call;
+ }
+ return {
+ ...call,
+ traceCall: {
+ ...call.traceCall,
+ summary: {
+ ...call.traceCall?.summary,
+ weave: {
+ ...call.traceCall?.summary?.weave,
+ costs: costDict[call.callId].traceCall?.summary?.weave?.costs,
+ },
+ },
+ },
+ };
}
return call;
});
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/callsTableQuery.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/callsTableQuery.ts
index 9d52bdf8b4fa..90e8e35cfe6d 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/callsTableQuery.ts
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/callsTableQuery.ts
@@ -96,6 +96,7 @@ export const useCallsForQuery = (
[calls.result]
);
+ const costCols = useMemo(() => ['id'], []);
const costs = useCalls(
entity,
project,
@@ -104,12 +105,11 @@ export const useCallsForQuery = (
undefined,
sortBy,
undefined,
- undefined,
+ costCols,
expandedColumns,
{
skip: calls.loading,
includeCosts: true,
- includeFeedback: true,
}
);
From c16ac3dcae10c8d02117898c361073af67e0988b Mon Sep 17 00:00:00 2001
From: Griffin Tarpenning
Date: Mon, 25 Nov 2024 14:48:46 -0800
Subject: [PATCH 06/31] chore(ui): improve cost query perf in trace tree too
(#3080)
---
.../Home/Browse3/pages/CallPage/CallTraceView.tsx | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallTraceView.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallTraceView.tsx
index 7dfff34bb97e..cfd403ea0f3e 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallTraceView.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallTraceView.tsx
@@ -399,6 +399,7 @@ export const useCallFlattenedTraceTree = (
[traceCallsResult, call.traceId]
);
+ const costCols = useMemo(() => ['id'], []);
const costs = useCalls(
call.entity,
call.project,
@@ -407,7 +408,7 @@ export const useCallFlattenedTraceTree = (
undefined,
undefined,
undefined,
- columns,
+ costCols,
undefined,
{
skip: traceCalls.loading,
From 61a7067fc0841d2d6a45b9b0266a896ac86a4256 Mon Sep 17 00:00:00 2001
From: J2-D2-3PO <188380414+J2-D2-3PO@users.noreply.github.com>
Date: Mon, 25 Nov 2024 16:41:26 -0700
Subject: [PATCH 07/31] docs(weave): Update Playground docs (#3084)
---
docs/docs/guides/tools/playground.md | 163 ++++++++++++++++++++++-----
1 file changed, 137 insertions(+), 26 deletions(-)
diff --git a/docs/docs/guides/tools/playground.md b/docs/docs/guides/tools/playground.md
index db222c12c6bc..a3a0654f4468 100644
--- a/docs/docs/guides/tools/playground.md
+++ b/docs/docs/guides/tools/playground.md
@@ -1,48 +1,159 @@
# Playground
-Evaluating LLM prompts and responses is challenging. The Playground tool enables you to quickly iterate on prompts: editing, retrying, and deleting messages. The LLM Playground is currently in preview.
+> **The LLM Playground is currently in preview.**
+
+Evaluating LLM prompts and responses is challenging. The Weave Playground is designed to simplify the process of iterating on LLM prompts and responses, making it easier to experiment with different models and prompts. With features like prompt editing, message retrying, and model comparison, Playground helps you to quickly test and improve your LLM applications. Playground currently supports OpenAI, Anthropic, Gemini, and Groq.
+
+## Features
+
+- **Quick access:** Open the Playground from the W&B sidebar for a fresh session or from the Call page to test an existing project.
+- **Message controls:** Edit, retry, or delete messages directly within the chat.
+- **Flexible messaging:** Add new messages as either user or system inputs, and send them to the LLM.
+- **Customizable settings:** Configure your preferred LLM provider and adjust model settings.
+- **Multi-LLM support:** Switch between models, with team-level API key management.
+- **Compare models:** Compare how different models respond to prompts.
+
+Get started with the Playground to optimize your LLM interactions and streamline your prompt engineering process and LLM application development.
+
+- [Prerequisites](#prerequisites)
+ - [Add a provider API key](#add-a-provider-api-key)
+ - [Access the Playground](#access-the-playground)
+- [Select an LLM](#select-an-llm)
+- [Adjust LLM parameters](#adjust-llm-parameters)
+- [Add a function](#add-a-function)
+- [Retry, edit, and delete messages](#retry-edit-and-delete-messages)
+- [Add a new message](#add-a-new-message)
+- [Compare LLMs](#compare-llms)
+
+## Prerequisites
+
+Before you can use Playground, you must [add an API key](#add-a-provider-api-key) for your preferred LLM provider(s), and [open the Playground UI](#access-the-playground).
+
+### Add a provider API key
+
+Playground currently supports OpenAI, Anthropic, Gemini, and Groq models.
+To use one of the available LLMs, your W&B admin must add the appropriate API key to your team secrets in W&B settings.
+
+- OpenAI: `OPENAI_API_KEY`
+- Anthropic: `ANTHROPIC_API_KEY`
+- Gemini: `GOOGLE_API_KEY`
+- Groq: `GEMMA_API_KEY`
+
+### Access the Playground
There are two ways to access the Playground:
-1. From the sidebar, click **Playground**. This will open a fresh Playground page with a simple system prompt.
-2. From the Call page, click the **Open chat in playground** button from the call page's chat view.
+1. *Open a fresh Playground page with a simple system prompt*: In the sidebar, select **Playground**. Playground opens in the same tab.
+2. *Open Playground for a specific call*:
+ 1. In the sidebar, select the **Traces** tab. A list of traces displays.
+ 2. In the list of traces, click the name of the call that you want to view. The call's details page opens.
+ 3. Click **Open chat in playground**. Playground opens in a new tab.
![Screenshot of Open in Playground button](imgs/open_chat_in_playground.png)
-## Retry, edit, and delete messages
+## Select an LLM
+
+You can switch the LLM using the dropdown menu in the top left. Currently, the available models are:
+
+- gpt-40-mini
+- claude-3-5-sonnet-20240620
+- claude-3-5-sonnet-20241022
+- claude-3-haiku-20240307
+- claude-3-opus-20240229
+- claude-3-sonnet-20240229
+- gemini/gemini-1.5-flash-001
+- gemini/gemini-1.5-flash-002
+- gemini/gemini-1.5-flash-8b-exp-0827
+- gemini/gemini-1.5-flash-8b-exp-0924
+- gemini/gemini-1.5-flash-exp-0827
+- gemini/gemini-1.5-flash-latest
+- gemini/gemini-1.5-flash
+- gemini/gemini-1.5-pro-001
+- gemini/gemini-1.5-pro-002
+- gemini/gemini-1.5-pro-exp-0801
+- gemini/gemini-1.5-pro-exp-0827
+- gemini/gemini-1.5-pro-latest
+- gemini/gemini-1.5-pro
+- gemini/gemini-pro
+- gpt-3.5-turbo-0125
+- gpt-3.5-turbo-1106
+- gpt-3.5-turbo-16k
+- gpt-3.5-turbo
+- gpt-4-0125-preview
+- gpt-4-0314
+- gpt-4-0613
+- gpt-4-1106-preview
+- gpt-4-32k-0314
+- gpt-4-turbo-2024-04-09
+- gpt-4-turbo-preview
+- gpt-4-turbo
+- gpt-4
+- gpt-40-2024-05-13
+- gpt-40-2024-08-06
+- gpt-40-mini-2024-07-18
+- gpt-4o
+- groq/gemma-7b-it
+- groq/gemma2-9b-it
+- groq/llama-3.1-70b-versatile
+- groq/llama-3.1-8b-instant
+- groq/llama3-70b-8192
+- groq/llama3-8b-8192
+- groq/llama3-groq-70b-8192-tool-use-preview
+- groq/llama3-groq-8b-8192-tool-use-preview
+- groq/mixtral-8x7b-32768
+- o1-mini-2024-09-12
+- o1-mini
+- o1-preview-2024-09-12
+- o1-preview
+
+## Adjust LLM parameters
+
+You can experiment with different parameter values for your selected model. To adjust parameters, do the following:
+
+1. In the upper right corner of the Playground UI, click **Chat settings** to open the parameter settings dropdown.
+2. In the dropdown, adjust parameters as desired. You can also toggle Weave call tracking on or off, and [add a function](#add-a-function).
+3. Click **Chat settings** to close the dropdown and save your changes.
-Once in the Playground, you can see the chat history.
-When hovering over a message, you will see three buttons: **Edit**, **Retry**, and **Delete**.
+![Screenshot of Playground settings](imgs/playground_settings.png)
-![Screenshot of Playground message buttons](imgs/playground_message_buttons.png)
+## Add a function
-1. **Retry**: Deletes all subsequent messages and retries the chat from the selected message.
-2. **Delete**: Removes the message from the chat.
-3. **Edit**: Allows you to modify the message content.
+You can test how different models use functions based on input it receives from the user. To add a function for testing in Playground, do the following:
-![Screenshot of Playground editing](imgs/playground_message_editor.png)
+1. In the upper right corner of the Playground UI, click **Chat settings** to open the parameter settings dropdown.
+2. In the dropdown, click **+ Add function**.
+3. In the pop-up, add your function information.
+4. To save your changes and close the function pop-up, click the **x** in the upper right corner.
+3. Click **Chat settings** to close the settings dropdown and save your changes.
-## Adding new messages
+## Retry, edit, and delete messages
-To add a new message to the chat without sending it to the LLM, select the role (e.g., **User**) and click **Add**.
-To send a new message to the LLM, click the **Send** button or press **Command + Enter**.
+With Playground, you can retry, edit, and delete messages. To use this feature, hover over the message you want to edit, retry, or delete. Three buttons display: **Delete**, **Edit**, and **Retry**.
-![Screenshot of Playground sending a message](imgs/playground_chat_input.png)
+- **Delete**: Remove the message from the chat.
+- **Edit**: Modify the message content.
+- **Retry**: Delete all subsequent messages and retry the chat from the selected message.
-## Configuring the LLM
+![Screenshot of Playground message buttons](imgs/playground_message_buttons.png)
+![Screenshot of Playground editing](imgs/playground_message_editor.png)
-We currently support 4 LLM providers.
-To use each LLM, your team admin needs to add the relevant API key to your team's settings (found at **wandb.ai/[team-name]/settings**):
+## Add a new message
-- OpenAI: `OPENAI_API_KEY`
-- Anthropic: `ANTHROPIC_API_KEY`
-- Gemini: `GOOGLE_API_KEY`
-- Groq: `GEMMA_API_KEY`
+To add a new message to the chat, do the following:
-### Choosing the LLM and its settings
+1. In the chat box, select one of the available roles (**Assistant** or **User**)
+2. Click **+ Add**.
+3. To send a new message to the LLM, click the **Send** button. Alternatively, press the **Command** and **Enter** keys.
-Click the **Settings** button to open the settings drawer.
+![Screenshot of Playground sending a message](imgs/playground_chat_input.png)
-![Screenshot of Playground settings](imgs/playground_settings.png)
+## Compare LLMs
+
+Playground allows you to compare LLMs. To perform a comparison, do the following:
-You can also switch the LLM using the dropdown menu in the top left.
+1. In the Playground UI, click **Compare**. A second chat opens next to the original chat.
+2. In the second chat, you can:
+ - [Select the LLM to compare](#select-an-llm)
+ - [Adjust parameters](#adjust-llm-parameters)
+ - [Add functions](#add-a-function)
+3. In the message box, enter a message that you want to test with both models and press **Send**.
From bf816117061920494d262aa91a6df70c0cb33ab5 Mon Sep 17 00:00:00 2001
From: Griffin Tarpenning
Date: Mon, 25 Nov 2024 15:43:32 -0800
Subject: [PATCH 08/31] chore(ui): fix error message in console, key prop in
jsx (#3082)
---
weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx | 1 -
1 file changed, 1 deletion(-)
diff --git a/weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx b/weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx
index 709c3ccb7817..ead31d264a04 100644
--- a/weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx
+++ b/weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx
@@ -113,7 +113,6 @@ const FancyPageSidebarSection = (props: FancyPageSidebarSectionProps) => {
}
const baseLinkProps = {
- key: item.name,
onClick: () => {
item.onClick?.();
},
From 0a7f5cedb93ad46a03888f142bb6421a5e5936d1 Mon Sep 17 00:00:00 2001
From: Griffin Tarpenning
Date: Mon, 25 Nov 2024 15:47:29 -0800
Subject: [PATCH 09/31] chore(ui): recyle -> refresh button (#3085)
---
.../Home/Browse3/pages/CallsPage/CallsTableButtons.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTableButtons.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTableButtons.tsx
index fa8e9205dca9..2d5322df584a 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTableButtons.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTableButtons.tsx
@@ -484,7 +484,7 @@ export const RefreshButton: FC<{
size="medium"
onClick={onClick}
tooltip="Refresh"
- icon="randomize-reset-reload"
+ icon="reload-refresh"
/>
);
From fbabcc6fe174b07a8d68cf397ae1ba6b31497fa4 Mon Sep 17 00:00:00 2001
From: Martin Mark
Date: Mon, 25 Nov 2024 20:00:09 -0500
Subject: [PATCH 10/31] chore(ui): Visual update summary area on call views
(#3041)
* Split tables / updated style for SimpleKeyValueTable
* Process all non-null attributes, flattening nested objects
* Use ObjectViewer component instead of the table for the attributes
* Added back comments
* Prettier
* Reverted attributes change - removed model from `Details` table
---
.../Browse3/pages/CallPage/CallSummary.tsx | 78 ++++++++++++-------
.../Browse3/pages/common/SimplePageLayout.tsx | 77 +++++++++---------
2 files changed, 87 insertions(+), 68 deletions(-)
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallSummary.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallSummary.tsx
index 8dd7b818670b..dfcdbd676937 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallSummary.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallSummary.tsx
@@ -8,6 +8,7 @@ import {SmallRef} from '../../../Browse2/SmallRef';
import {SimpleKeyValueTable} from '../common/SimplePageLayout';
import {CallSchema} from '../wfReactInterface/wfDataModelHooksInterface';
import {CostTable} from './cost';
+import {ObjectViewerSection} from './ObjectViewerSection';
const SUMMARY_FIELDS_EXCLUDED_FROM_GENERAL_RENDER = [
'latency_s',
@@ -19,6 +20,7 @@ export const CallSummary: React.FC<{
call: CallSchema;
}> = ({call}) => {
const span = call.rawSpan;
+ // Process attributes, only filtering out null values and keys starting with '_'
const attributes = _.fromPairs(
Object.entries(span.attributes ?? {}).filter(
([k, a]) => !k.startsWith('_') && a != null
@@ -52,36 +54,56 @@ export const CallSummary: React.FC<{
)}
-