From ea104310367544fe5f6d595718a85ceaa4e6de50 Mon Sep 17 00:00:00 2001 From: Griffin Tarpenning Date: Mon, 6 Jan 2025 14:18:14 -0800 Subject: [PATCH 01/51] chore(weave): allow call.feedback.add() for annotations (#3323) --- tests/trace/test_annotation_feedback.py | 57 +++++++++++++++++++++++++ weave/trace/feedback.py | 34 ++++++++++++--- weave/trace/refs.py | 6 +++ 3 files changed, 92 insertions(+), 5 deletions(-) diff --git a/tests/trace/test_annotation_feedback.py b/tests/trace/test_annotation_feedback.py index 6a3d6b6589d2..bdb8d0c90e9d 100644 --- a/tests/trace/test_annotation_feedback.py +++ b/tests/trace/test_annotation_feedback.py @@ -456,3 +456,60 @@ class FeedbackModel(BaseModel): # Invalid cases should return False assert not enum_spec.value_is_valid("invalid_choice") assert not enum_spec.value_is_valid(123) + + +def test_annotation_feedback_sdk(client): + number_spec = AnnotationSpec( + name="Number Rating", + field_schema={ + "type": "number", + "minimum": 1, + "maximum": 5, + }, + ) + ref = weave.publish(number_spec, "number spec") + assert ref + + @weave.op() + def do_call(): + return 3 + + do_call() + do_call() + + calls = do_call.calls() + assert len(list(calls)) == 2 + + # Add annotation feedback + calls[0].feedback.add( + "wandb.annotation.number-spec", + {"value": 3}, + annotation_ref=ref.uri(), + ) + + # Query the feedback + feedback = calls[0].feedback.refresh() + assert len(feedback) == 1 + assert feedback[0].payload["value"] == 3 + assert feedback[0].annotation_ref == ref.uri() + + # no annotation_ref + with pytest.raises(ValueError): + calls[0].feedback.add("wandb.annotation.number_rating", {"value": 3}) + + # empty annotation_ref + with pytest.raises(ValueError): + calls[0].feedback.add( + "wandb.annotation.number_rating", {"value": 3}, annotation_ref="" + ) + + # invalid annotation_ref + with pytest.raises(ValueError): + calls[0].feedback.add("number_rating", {"value": 3}, annotation_ref="ssss") + + # no wandb.annotation prefix + with pytest.raises( + ValueError, + match="To add annotation feedback, feedback_type must conform to the format: 'wandb.annotation.'.", + ): + calls[0].feedback.add("number_rating", {"value": 3}, annotation_ref=ref.uri()) diff --git a/weave/trace/feedback.py b/weave/trace/feedback.py index 884c751c0594..77f07d2449c3 100644 --- a/weave/trace/feedback.py +++ b/weave/trace/feedback.py @@ -10,7 +10,7 @@ from weave.trace import util from weave.trace.context import weave_client_context as weave_client_context -from weave.trace.refs import parse_uri +from weave.trace.refs import parse_object_uri, parse_uri from weave.trace.rich import pydantic_util from weave.trace.rich.container import AbstractRichContainer from weave.trace.rich.refs import Refs @@ -188,7 +188,11 @@ def __init__(self, ref: str) -> None: self.weave_ref = ref def _add( - self, feedback_type: str, payload: dict[str, Any], creator: str | None + self, + feedback_type: str, + payload: dict[str, Any], + creator: str | None, + annotation_ref: str | None = None, ) -> str: freq = tsi.FeedbackCreateReq( project_id=f"{self.entity}/{self.project}", @@ -197,6 +201,14 @@ def _add( payload=payload, creator=creator, ) + if annotation_ref: + try: + parse_object_uri(annotation_ref) + except TypeError: + raise TypeError( + "annotation_ref must be a valid object ref, eg weave://///object/:" + ) + freq.annotation_ref = annotation_ref response = self.client.server.feedback_create(freq) self.feedbacks = None # Clear cache return response.id @@ -206,6 +218,7 @@ def add( feedback_type: str, payload: dict[str, Any] | None = None, creator: str | None = None, + annotation_ref: str | None = None, **kwargs: dict[str, Any], ) -> str: """Add feedback to the ref. @@ -213,12 +226,11 @@ def add( feedback_type: A string identifying the type of feedback. The "wandb." prefix is reserved. creator: The name to display for the originator of the feedback. """ - if feedback_type.startswith("wandb."): - raise ValueError('Feedback type cannot start with "wandb."') + _validate_feedback_type(feedback_type, annotation_ref) feedback = {} feedback.update(payload or {}) feedback.update(kwargs) - return self._add(feedback_type, feedback, creator) + return self._add(feedback_type, feedback, creator, annotation_ref) def add_reaction(self, emoji: str, creator: str | None = None) -> str: return self._add( @@ -258,6 +270,18 @@ def purge(self, feedback_id: str) -> None: self.feedbacks = None # Clear cache +def _validate_feedback_type(feedback_type: str, annotation_ref: str | None) -> None: + if feedback_type.startswith("wandb.") and not annotation_ref: + raise ValueError( + 'Feedback type cannot start with "wandb", it is reserved for annotation feedback.' + "Provide an annotation_ref to add annotation feedback." + ) + elif not feedback_type.startswith("wandb.annotation.") and annotation_ref: + raise ValueError( + "To add annotation feedback, feedback_type must conform to the format: 'wandb.annotation.'." + ) + + __docspec__ = [ Feedbacks, FeedbackQuery, diff --git a/weave/trace/refs.py b/weave/trace/refs.py index 9190368b5926..e3f635f90ced 100644 --- a/weave/trace/refs.py +++ b/weave/trace/refs.py @@ -305,3 +305,9 @@ def parse_op_uri(uri: str) -> OpRef: if not isinstance(parsed := parse_uri(uri), OpRef): raise TypeError(f"URI is not for an Op: {uri}") return parsed + + +def parse_object_uri(uri: str) -> ObjectRef: + if not isinstance(parsed := parse_uri(uri), ObjectRef): + raise TypeError(f"URI is not for an Object: {uri}") + return parsed From 6c53ac5e01d632f2efc9d89bdf5bc312702e4127 Mon Sep 17 00:00:00 2001 From: J2-D2-3PO <188380414+J2-D2-3PO@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:11:09 -0700 Subject: [PATCH 02/51] Fix bad headers and titles in NBs (#3330) --- .../reference/gen_notebooks/audio_with_weave.md | 17 +++++++++++++---- .../gen_notebooks/notdiamond_custom_routing.md | 10 +++++----- .../gen_notebooks/online_monitoring.md | 8 ++++---- .../gen_notebooks/weave_via_service_api.md | 6 ++++++ docs/notebooks/audio_with_weave.ipynb | 8 ++++---- docs/notebooks/notdiamond_custom_routing.ipynb | 8 ++++---- docs/notebooks/online_monitoring.ipynb | 8 ++++---- docs/notebooks/weave_via_service_api.ipynb | 10 +++++++++- 8 files changed, 49 insertions(+), 26 deletions(-) diff --git a/docs/docs/reference/gen_notebooks/audio_with_weave.md b/docs/docs/reference/gen_notebooks/audio_with_weave.md index 6b25bb332713..8bfa9d9edf57 100644 --- a/docs/docs/reference/gen_notebooks/audio_with_weave.md +++ b/docs/docs/reference/gen_notebooks/audio_with_weave.md @@ -1,6 +1,6 @@ - -## title: Log Audio With Weave - +--- +title: Log Audio With Weave +--- :::tip[This is a notebook] @@ -12,7 +12,7 @@ ::: -## + @@ -40,6 +40,15 @@ Start by installing the OpenAI (`openai`) and Weave (`weave`) dependencies, as w !pip install set-env-colab-kaggle-dotenv -q # for env var ``` + +```python +%%capture +# Temporary workaround to fix bug in openai: +# TypeError: Client.__init__() got an unexpected keyword argument 'proxies' +# See https://community.openai.com/t/error-with-openai-1-56-0-client-init-got-an-unexpected-keyword-argument-proxies/1040332/15 +!pip install "httpx<0.28" +``` + Next, load the required API keys for OpenAI and Weave. Here, we use set_env which is compatible with google colab's secret keys manager, and is an alternative to colab's specific `google.colab.userdata`. See: [here](https://pypi.org/project/set-env-colab-kaggle-dotenv/) for usage instructions. diff --git a/docs/docs/reference/gen_notebooks/notdiamond_custom_routing.md b/docs/docs/reference/gen_notebooks/notdiamond_custom_routing.md index c590adc1627e..0ea66194a3a4 100644 --- a/docs/docs/reference/gen_notebooks/notdiamond_custom_routing.md +++ b/docs/docs/reference/gen_notebooks/notdiamond_custom_routing.md @@ -1,6 +1,6 @@ - -## title: NotDiamond Custom Routing - +--- +title: NotDiamond Custom Routing +--- :::tip[This is a notebook] @@ -12,7 +12,7 @@ ::: -## + @@ -202,7 +202,7 @@ best_provider_model, nd_model = evaluate_router( ```python @weave.op() -def is_correct(score: int, model_output: dict) -> dict: +def is_correct(score: int, output: dict) -> dict: # We hack score, since we already have model responses return {"correct": score} diff --git a/docs/docs/reference/gen_notebooks/online_monitoring.md b/docs/docs/reference/gen_notebooks/online_monitoring.md index 8d5a7323a06b..6f5ecb888699 100644 --- a/docs/docs/reference/gen_notebooks/online_monitoring.md +++ b/docs/docs/reference/gen_notebooks/online_monitoring.md @@ -1,6 +1,6 @@ - -## title: Integrating with Weave - Production Dashboard - +--- +title: Integrating with Weave - Production Dashboard +--- :::tip[This is a notebook] @@ -12,7 +12,7 @@ ::: -## + diff --git a/docs/docs/reference/gen_notebooks/weave_via_service_api.md b/docs/docs/reference/gen_notebooks/weave_via_service_api.md index af8a27b2f060..e366fda0374b 100644 --- a/docs/docs/reference/gen_notebooks/weave_via_service_api.md +++ b/docs/docs/reference/gen_notebooks/weave_via_service_api.md @@ -1,3 +1,6 @@ +--- +title: Service API +--- :::tip[This is a notebook] @@ -9,6 +12,9 @@ ::: + + + # Use the Service API to Log and Query Traces In the following guide, you will learn how to use the Weave Service API to log traces. Specifically, you will use the Service API to: diff --git a/docs/notebooks/audio_with_weave.ipynb b/docs/notebooks/audio_with_weave.ipynb index 881b39a5d8fe..b0332b4047aa 100644 --- a/docs/notebooks/audio_with_weave.ipynb +++ b/docs/notebooks/audio_with_weave.ipynb @@ -4,10 +4,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## \n", "\n", "\n" diff --git a/docs/notebooks/notdiamond_custom_routing.ipynb b/docs/notebooks/notdiamond_custom_routing.ipynb index 8ae3041b69aa..56b41691ac91 100644 --- a/docs/notebooks/notdiamond_custom_routing.ipynb +++ b/docs/notebooks/notdiamond_custom_routing.ipynb @@ -4,10 +4,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## \n", "\n", "\n" diff --git a/docs/notebooks/online_monitoring.ipynb b/docs/notebooks/online_monitoring.ipynb index ce35061e8a89..503f4ccfe584 100644 --- a/docs/notebooks/online_monitoring.ipynb +++ b/docs/notebooks/online_monitoring.ipynb @@ -4,10 +4,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## \n", "\n", "\n" diff --git a/docs/notebooks/weave_via_service_api.ipynb b/docs/notebooks/weave_via_service_api.ipynb index 0122e431ea15..0221d1519557 100644 --- a/docs/notebooks/weave_via_service_api.ipynb +++ b/docs/notebooks/weave_via_service_api.ipynb @@ -5,7 +5,15 @@ "metadata": { "id": "xSqO08zH3qRH" }, - "source": [] + "source": [ + "\n", + "\n", + "" + ] }, { "cell_type": "markdown", From 7058efae69040a0d80beea80929bf8e59c04ac1b Mon Sep 17 00:00:00 2001 From: Tim Sweeney Date: Mon, 6 Jan 2025 20:42:45 -0800 Subject: [PATCH 03/51] chore(weave): Remove dead code paths from `Evaluation.evaluate` by implementing correct typing. (#3325) * Initial Type Refactor * lint * lint * comment * small adjustment * chore(weave): Extract common functions in `eval.py` into re-usable common modules. (#3328) * initial refactor * Larger Refactor * Comments! * Comments! --- weave/flow/eval.py | 395 +++++++++-------------------------- weave/flow/model.py | 127 ++++++++++- weave/scorers/base_scorer.py | 248 ++++++++++++++++++++-- weave/trace/isinstance.py | 6 +- weave/trace/op_caller.py | 50 +++++ 5 files changed, 503 insertions(+), 323 deletions(-) create mode 100644 weave/trace/op_caller.py diff --git a/weave/flow/eval.py b/weave/flow/eval.py index bf78dc06d85d..7400fc45ce38 100644 --- a/weave/flow/eval.py +++ b/weave/flow/eval.py @@ -1,10 +1,6 @@ import asyncio -import inspect import logging -import textwrap -import time import traceback -from collections.abc import Coroutine from datetime import datetime from typing import Any, Callable, Literal, Optional, Union, cast @@ -15,7 +11,12 @@ import weave from weave.flow import util from weave.flow.dataset import Dataset -from weave.flow.model import Model, get_infer_method +from weave.flow.model import ( + ApplyModelError, + Model, + PreprocessModelInput, + apply_model_async, +) from weave.flow.obj import Object from weave.flow.util import make_memorable_name from weave.scorers import ( @@ -26,6 +27,7 @@ get_scorer_attributes, transpose, ) +from weave.scorers.base_scorer import apply_scorer_async from weave.trace.context.weave_client_context import get_weave_client from weave.trace.env import get_weave_parallelism from weave.trace.errors import OpCallError @@ -49,31 +51,14 @@ def default_evaluation_display_name(call: Call) -> str: return f"eval-{date}-{unique_name}" -def async_call(func: Union[Callable, Op], *args: Any, **kwargs: Any) -> Coroutine: - is_async = False - if is_op(func): - func = as_op(func) - is_async = inspect.iscoroutinefunction(func.resolve_fn) - else: - is_async = inspect.iscoroutinefunction(func) - if is_async: - return func(*args, **kwargs) # type: ignore - return asyncio.to_thread(func, *args, **kwargs) - - -def async_call_op( - func: Op, *args: Any, **kwargs: Any -) -> Coroutine[Any, Any, tuple[Any, "Call"]]: - call_res = func.call(*args, __should_raise=True, **kwargs) - if inspect.iscoroutine(call_res): - return call_res - return asyncio.to_thread(lambda: call_res) - - class EvaluationResults(Object): rows: weave.Table +DatasetLike = Union[Dataset, list[dict]] +ScorerLike = Union[Callable, Op, Scorer] + + class Evaluation(Object): """ Sets up an evaluation which includes a set of scorers and a dataset. @@ -119,9 +104,9 @@ def function_to_evaluate(question: str): ``` """ - dataset: Union[Dataset, list] - scorers: Optional[list[Union[Callable, Op, Scorer]]] = None - preprocess_model_input: Optional[Callable] = None + dataset: DatasetLike + scorers: Optional[list[ScorerLike]] = None + preprocess_model_input: Optional[PreprocessModelInput] = None trials: int = 1 # Custom evaluation name for display in the UI. This is the same API as passing a @@ -140,7 +125,7 @@ def _update_display_name(self) -> "Evaluation": return self def model_post_init(self, __context: Any) -> None: - scorers: list[Union[Callable, Scorer, Op]] = [] + scorers: list[Union[Op, Scorer]] = [] for scorer in self.scorers or []: if isinstance(scorer, Scorer): pass @@ -149,9 +134,9 @@ def model_post_init(self, __context: Any) -> None: f"Scorer {scorer.__name__} must be an instance, not a class. Did you forget to instantiate?" ) elif callable(scorer) and not is_op(scorer): - scorer = weave.op()(scorer) + scorer = weave.op(scorer) elif is_op(scorer): - pass + scorer = as_op(scorer) else: raise ValueError(f"Invalid scorer: {scorer}") @@ -166,7 +151,11 @@ def model_post_init(self, __context: Any) -> None: logger, "Using 'model_output' key for compatibility with older scorers. Please update scorers to use 'output' parameter.", ) - self.scorers = scorers + + # I don't understand why we need a type ignore here, error: + # Incompatible types in assignment (expression has type "list[Op | Scorer]", variable has type "list[Callable[..., Any] | Op | Scorer] | None") + # This seems to be a bug in the type checker as the assignment is a valid subset of the type. + self.scorers = scorers # type: ignore if isinstance(self.dataset, list): self.dataset = Dataset(rows=self.dataset) @@ -174,265 +163,70 @@ def model_post_init(self, __context: Any) -> None: if self.name is None and self.dataset.name is not None: self.name = self.dataset.name + "-evaluation" # type: ignore + # _post_init_dataset and _post_init_scorers are a more tightly typed property. + # This is because the initialization code can accept lists and callables respectively, + # but after initialization, they are more tightly typed to the respective weave objects. + # Using these reduces casting below and allows us to have less logical branches + @property + def _post_init_dataset(self) -> Dataset: + if not weave_isinstance(self.dataset, Dataset): + raise TypeError( + f"Expected self.dataset to be converted to a Dataset in `model_post_init`. Found {str(type(self.dataset))}" + ) + return self.dataset + + @property + def _post_init_scorers(self) -> list[Union[Op, Scorer]]: + if not isinstance(self.scorers, list): + raise TypeError( + f"Expected self.scorers to be a list in `model_post_init`. Found {str(type(self.scorers))}" + ) + for scorer in self.scorers: + if not weave_isinstance(scorer, (Op, Scorer)) and not is_op(scorer): + raise TypeError( + f"Expected all elements in self.scorers to be an instance of Op or Scorer in `model_post_init`. Found {str(type(scorer))}" + ) + return cast(list[Union[Op, Scorer]], self.scorers) + @weave.op() - async def predict_and_score( - self, model: Union[Callable, Model], example: dict - ) -> dict: - if self.preprocess_model_input is None: - model_input = example - else: - model_input = self.preprocess_model_input(example) # type: ignore - - model_self = None - model_predict: Union[Callable, Model] - if callable(model): - model_predict = model - else: - model_self = model - model_predict = get_infer_method(model) - - model_predict_fn_name = ( - as_op(model_predict).name - if is_op(model_predict) - else model_predict.__name__ + async def predict_and_score(self, model: Union[Op, Model], example: dict) -> dict: + apply_model_result = await apply_model_async( + model, example, self.preprocess_model_input ) - predict_signature = inspect.signature(model_predict) - model_predict_arg_names = list(predict_signature.parameters.keys()) - - if isinstance(model_input, dict): - model_predict_args = { - k: v for k, v in model_input.items() if k in model_predict_arg_names + if isinstance(apply_model_result, ApplyModelError): + return { + self._output_key: None, + "scores": {}, + "model_latency": apply_model_result.model_latency, } - else: - if len(model_predict_arg_names) == 1: - model_predict_args = {model_predict_arg_names[0]: model_input} - else: - raise ValueError( - f"{model_predict} expects arguments: {model_predict_arg_names}, provide a preprocess_model_input function that returns a dict with those keys." - ) - try: - model_start_time = time.time() - model_call = None - if is_op(model_predict): - # I would expect this path to always be hit, but keeping the other - # path for backwards compatibility / safety - model_predict = as_op(model_predict) - if model_self is not None: - model_predict_args = { - **model_predict_args, - "self": model_self, - } - model_output, model_call = await async_call_op( - model_predict, **model_predict_args - ) - else: - # I would not expect this path to be hit, but keeping it for - # backwards compatibility / safety - model_output = await async_call(model_predict, **model_predict_args) - except OpCallError as e: - dataset_column_names = list(example.keys()) - dataset_column_names_str = ", ".join(dataset_column_names[:3]) - if len(dataset_column_names) > 3: - dataset_column_names_str += ", ..." - required_arg_names = [ - param.name - for param in predict_signature.parameters.values() - if param.default == inspect.Parameter.empty - ] - - message = textwrap.dedent( - f""" - Call error: {e} - - Options for resolving: - a. change {model_predict_fn_name} argument names to match a subset of dataset column names: {dataset_column_names_str} - b. change dataset column names to match expected {model_predict_fn_name} argument names: {required_arg_names} - c. construct Evaluation with a preprocess_model_input function that accepts a dataset example and returns a dict with keys expected by {model_predict_fn_name} - """ - ) - raise OpCallError(message) - except Exception as e: - print("model_output failed") - traceback.print_exc() - model_output = None - model_latency = time.time() - model_start_time - - scores = {} # TODO: Consider moving scorer setup and checks out of `predict_and_score` - scorers = cast(list[Union[Op, Scorer]], self.scorers or []) - for scorer in scorers: - scorer_self = None - if weave_isinstance(scorer, Scorer): - scorer_self = scorer - scorer_name, score_fn, _ = get_scorer_attributes(scorer) - score_signature = inspect.signature(score_fn) - score_arg_names = list(score_signature.parameters.keys()) - - # the actual kwarg name depends on the scorer - if "output" in score_arg_names: - score_output_name = "output" - elif "model_output" in score_arg_names: - score_output_name = "model_output" - else: - message = textwrap.dedent( - f""" - Scorer {scorer_name} must have an `output` or `model_output` argument, to receive the - output of the model function. - """ - ) - raise OpCallError(message) - - if isinstance(example, dict): - # The keys of `score_args` must match the argument names of the scorer's `score` method. - # If scorer.column_map is set, then user is indicating that the dataset column(s) - # being passed to the scorer have different names to the `score` functions' argument names. - # So we need to remap the dataset columns to the expected argument names in the scorer, - # - # column_map k:v pairs must be structured as `scorer param name : dataset column name` - # - # For instance, if the scorer expects "input" and "ground_truth" and we have a dataset - # with columns "question" and "answer", column_map should be defined as follows: - # {"input": "question", "ground_truth": "answer"} - # - # input: is the full row, we have access to it via example - # output: is the model output, we have access to it via model_output - score_arg_names = [ - param for param in score_arg_names if (param != "self") - ] - score_args = {} - - if isinstance(scorer, Scorer) and scorer.column_map is not None: - # Ensure that all keys in column_map are in score_arg_names - for key in scorer.column_map.keys(): - if key not in score_arg_names: - message = textwrap.dedent( - f""" - You have created `{scorer_name}(column_map={scorer.column_map}, ...)`. - - The `column_map` contains a key, `{key}`, which is not in the `score` methods' argument names. - `score` methods' argument names: {score_arg_names} - - Hint: - - Ensure that the keys in `column_map` match the scorer's argument names. - """ - ) - raise ValueError(message) - - for arg in score_arg_names: - if arg == "output" or arg == "model_output": - continue - if arg in example: - score_args[arg] = example[arg] - elif arg in scorer.column_map: - dataset_column_name = scorer.column_map[arg] - if dataset_column_name in example: - score_args[arg] = example[dataset_column_name] - else: - message = textwrap.dedent( - f""" - You have created `{scorer_name}(column_map={scorer.column_map}, ...)`. - - You are mapping `{arg}` to `{dataset_column_name}`, but `{dataset_column_name}` - was not found in the dataset columns. - - Available dataset columns: {list(example.keys())} - - Hint: - - Ensure that `column_map` maps the `score` methods' argument names to existing dataset column names. - """ - ) - raise ValueError(message) - else: - message = textwrap.dedent( - f""" - You have created `{scorer_name}(column_map={scorer.column_map}, ...)`. - - `score` method argument `{arg}` is not found in the dataset columns and is not mapped in `column_map`. - - Available dataset columns: {list(example.keys())} - `column_map`: {scorer.column_map} - - Hint: - Either: - - map the argument name to the dataset column using the scorers `column_map` attribute, in the form {{score_arg_name : dataset_column_name}} or - - rename a column in the dataset to `{arg}` or - - re-name the `{arg}` argument in your `score` method to match a dataset column name - """ - ) - raise ValueError(message) - else: - score_args = { - k: v for k, v in example.items() if k in score_arg_names - } + model_output = apply_model_result.model_output + model_call = apply_model_result.model_call + model_latency = apply_model_result.model_latency - else: - if len(score_arg_names) == 2: - score_args = {score_arg_names[0]: example} - else: - raise ValueError( - f"{score_fn} expects arguments: {score_arg_names}, provide a preprocess_model_input function that returns a dict with those keys." - ) - score_args[score_output_name] = model_output + scores = {} + scorers = self._post_init_scorers - try: - if is_op(score_fn) and model_call: - # I would expect this path to always be hit, but keeping the other - # path for backwards compatibility / safety - score_fn = as_op(score_fn) - if scorer_self is not None: - score_args = { - **score_args, - "self": scorer_self, - } - result, score_call = await async_call_op(score_fn, **score_args) - wc = get_weave_client() - if wc: - # Very important: if the score is generated from a Scorer subclass, - # then scorer_ref_uri will be None, and we will use the op_name from - # the score_call instead. - scorer_ref = get_ref(scorer_self) if scorer_self else None - scorer_ref_uri = scorer_ref.uri() if scorer_ref else None - wc._send_score_call(model_call, score_call, scorer_ref_uri) - - else: - # I would not expect this path to be hit, but keeping it for - # backwards compatibility / safety - result = await async_call(score_fn, **score_args) - except OpCallError as e: - dataset_column_names = list(example.keys()) - dataset_column_names_str = ", ".join(dataset_column_names[:3]) - if len(dataset_column_names) > 10: - dataset_column_names_str += ", ..." - required_arg_names = [ - param.name - for param in score_signature.parameters.values() - if param.default == inspect.Parameter.empty - ] - required_arg_names.remove(score_output_name) - - message = textwrap.dedent( - f""" - Call error: {e} - - If using the `Scorer` weave class, you can set the `scorer.column_map` - attribute to map scorer argument names to dataset columns. - - For example, if the `score` expects "output", "input" and "ground_truth" and we have a dataset - with columns "question" and "answer", `column_map` can be used to map the non-output parameter like so: - {{"input": "question", "ground_truth": "answer"}} - - scorer argument names: {score_arg_names} - dataset keys: {example.keys()} - scorer.column_map: {getattr(scorer, 'column_map', '{}')} - - Options for resolving: - a. if using the `Scorer` weave class, you can set the `scorer.column_map` attribute to map scorer argument names to dataset column names or - b. change the argument names the in the scoring function of {scorer_name} to match a subset of dataset column names: ({dataset_column_names_str}) or - c. change dataset column names to match expected {scorer_name} argument names: {required_arg_names} - """ - ) - raise OpCallError(message) + for scorer in scorers: + apply_scorer_result = await apply_scorer_async( + scorer, example, model_output + ) + result = apply_scorer_result.result + score_call = apply_scorer_result.score_call + + wc = get_weave_client() + if wc: + scorer_ref_uri = None + if weave_isinstance(scorer, Scorer): + # Very important: if the score is generated from a Scorer subclass, + # then scorer_ref_uri will be None, and we will use the op_name from + # the score_call instead. + scorer_ref = get_ref(scorer) + scorer_ref_uri = scorer_ref.uri() if scorer_ref else None + wc._send_score_call(model_call, score_call, scorer_ref_uri) + scorer_attributes = get_scorer_attributes(scorer) + scorer_name = scorer_attributes.scorer_name scores[scorer_name] = result return { @@ -449,9 +243,11 @@ async def summarize(self, eval_table: EvaluationResults) -> dict: for name, vals in cols.items(): if name == "scores": - scorers = self.scorers or [] + scorers = self._post_init_scorers for scorer in scorers: - scorer_name, _, summarize_fn = get_scorer_attributes(scorer) + scorer_attributes = get_scorer_attributes(scorer) + scorer_name = scorer_attributes.scorer_name + summarize_fn = scorer_attributes.summarize_fn scorer_stats = transpose(vals) score_table = scorer_stats[scorer_name] scored = summarize_fn(score_table) @@ -462,21 +258,17 @@ async def summarize(self, eval_table: EvaluationResults) -> dict: summary[name] = model_output_summary return summary - async def get_eval_results( - self, model: Union[Callable, Model] - ) -> EvaluationResults: + async def get_eval_results(self, model: Union[Op, Model]) -> EvaluationResults: if not is_valid_model(model): raise ValueError(INVALID_MODEL_ERROR) eval_rows = [] - start_time = time.time() - async def eval_example(example: dict) -> dict: try: eval_row = await self.predict_and_score(model, example) except OpCallError as e: raise e - except Exception as e: + except Exception: print("Predict and score failed") traceback.print_exc() return {self._output_key: None, "scores": {}} @@ -484,7 +276,7 @@ async def eval_example(example: dict) -> dict: n_complete = 0 # with console.status("Evaluating...") as status: - dataset = cast(Dataset, self.dataset) + dataset = self._post_init_dataset _rows = dataset.rows trial_rows = list(_rows) * self.trials async for example, eval_row in util.async_foreach( @@ -499,15 +291,16 @@ async def eval_example(example: dict) -> dict: eval_row = {self._output_key: None, "scores": {}} else: eval_row["scores"] = eval_row.get("scores", {}) - for scorer in self.scorers or []: - scorer_name, _, _ = get_scorer_attributes(scorer) + for scorer in self._post_init_scorers: + scorer_attributes = get_scorer_attributes(scorer) + scorer_name = scorer_attributes.scorer_name if scorer_name not in eval_row["scores"]: eval_row["scores"][scorer_name] = {} eval_rows.append(eval_row) return EvaluationResults(rows=weave.Table(eval_rows)) @weave.op(call_display_name=default_evaluation_display_name) - async def evaluate(self, model: Union[Callable, Model]) -> dict: + async def evaluate(self, model: Union[Op, Model]) -> dict: # The need for this pattern is quite unfortunate and highlights a gap in our # data model. As a user, I just want to pass a list of data `eval_rows` to # summarize. Under the hood, Weave should choose the appropriate storage @@ -528,12 +321,12 @@ async def evaluate(self, model: Union[Callable, Model]) -> dict: def evaluate( dataset: Union[Dataset, list], - model: Union[Callable, Model], - scores: Optional[list[Union[Callable, Scorer]]] = None, - preprocess_model_input: Optional[Callable] = None, + model: Union[Op, Model], + scorers: Optional[list[Union[Callable, Scorer]]] = None, + preprocess_model_input: Optional[PreprocessModelInput] = None, ) -> dict: eval = Evaluation( - dataset=dataset, scorers=scores, preprocess_model_input=preprocess_model_input + dataset=dataset, scorers=scorers, preprocess_model_input=preprocess_model_input ) return asyncio.run(eval.evaluate(model)) diff --git a/weave/flow/model.py b/weave/flow/model.py index 0cd23eabd546..0e7e9ba9e16a 100644 --- a/weave/flow/model.py +++ b/weave/flow/model.py @@ -1,6 +1,18 @@ -from typing import Callable +import inspect +import textwrap +import time +import traceback +from dataclasses import dataclass +from typing import Any, Callable, Optional, Union + +from rich import print from weave.flow.obj import Object +from weave.trace.errors import OpCallError +from weave.trace.isinstance import weave_isinstance +from weave.trace.op import Op, as_op, is_op +from weave.trace.op_caller import async_call_op +from weave.trace.weave_client import Call INFER_METHOD_NAMES = {"predict", "infer", "forward", "invoke"} @@ -45,10 +57,121 @@ def get_infer_method(self) -> Callable: ) -def get_infer_method(model: Model) -> Callable: +def get_infer_method(model: Model) -> Op: for name in INFER_METHOD_NAMES: if (infer_method := getattr(model, name, None)) is not None: + if not is_op(infer_method): + raise ValueError( + f"Model {model} must implement `{name}` as a weave.op() decorated function." + ) return infer_method raise MissingInferenceMethodError( f"Missing a method with name in ({INFER_METHOD_NAMES})" ) + + +# Using `dataclass` because pydantic does not like `Call` as a property +@dataclass +class ApplyModelSuccess: + model_output: Any + model_call: Call + model_latency: float + + +@dataclass +class ApplyModelError: + model_latency: float + + +ApplyModelResult = Union[ApplyModelSuccess, ApplyModelError] +PreprocessModelInput = Callable[[dict], dict] + + +async def apply_model_async( + model: Union[Op, Model], + example: dict, + preprocess_model_input: Optional[PreprocessModelInput] = None, +) -> ApplyModelResult: + """Asynchronously applies a model (class or operation) to a given example. + + This function handles the execution of a model against input data with proper type checking + and client context management. It supports both class-based models and operation-based models. + + Args: + model: The model to apply, can be either a class type or a Weave Operation (Op) + example: The input data to process through the model + preprocess_model_input: A function that preprocesses the example before passing it to the model + + Returns: + Any: The result of applying the model to the example + + Raises: + TypeError: If the model is neither a class type nor an Op + ValueError: If type checking fails between model input requirements and example + """ + if preprocess_model_input is None: + model_input = example + else: + model_input = preprocess_model_input(example) # type: ignore + + model_self = None + model_predict_op: Op + if is_op(model): + model_predict_op = as_op(model) + elif weave_isinstance(model, Model): + model_self = model + model_predict_op = get_infer_method(model) + else: + raise ValueError(f"Unknown model type: {model}") + + model_predict_fn_name = model_predict_op.name + + predict_signature = inspect.signature(model_predict_op) + model_predict_arg_names = list(predict_signature.parameters.keys()) + + model_predict_args = { + k: v for k, v in model_input.items() if k in model_predict_arg_names + } + try: + model_predict_op = as_op(model_predict_op) + if model_self is not None: + model_predict_args = { + **model_predict_args, + "self": model_self, + } + model_start_time = time.time() + model_output, model_call = await async_call_op( + model_predict_op, **model_predict_args + ) + except OpCallError as e: + dataset_column_names = list(example.keys()) + dataset_column_names_str = ", ".join(dataset_column_names[:3]) + if len(dataset_column_names) > 3: + dataset_column_names_str += ", ..." + required_arg_names = [ + param.name + for param in predict_signature.parameters.values() + if param.default == inspect.Parameter.empty + ] + + message = textwrap.dedent( + f""" + Call error: {e} + + Options for resolving: + a. change {model_predict_fn_name} argument names to match a subset of dataset column names: {dataset_column_names_str} + b. change dataset column names to match expected {model_predict_fn_name} argument names: {required_arg_names} + c. construct Evaluation with a preprocess_model_input function that accepts a dataset example and returns a dict with keys expected by {model_predict_fn_name} + """ + ) + raise OpCallError(message) + except Exception: + print("model_output failed") + traceback.print_exc() + return ApplyModelError(model_latency=time.time() - model_start_time) + + return ApplyModelSuccess( + model_output=model_output, + model_call=model_call, + model_latency=time.time() - model_start_time, + ) diff --git a/weave/scorers/base_scorer.py b/weave/scorers/base_scorer.py index 4ac27f1a76b1..462246914f5b 100644 --- a/weave/scorers/base_scorer.py +++ b/weave/scorers/base_scorer.py @@ -1,17 +1,20 @@ import inspect import textwrap from collections.abc import Sequence +from dataclasses import dataclass from numbers import Number -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, Optional, Union, cast import numpy as np from pydantic import BaseModel, Field import weave from weave.flow.obj import Object +from weave.trace.errors import OpCallError from weave.trace.isinstance import weave_isinstance from weave.trace.op import Op, as_op, is_op -from weave.trace.weave_client import sanitize_object_name +from weave.trace.op_caller import async_call_op +from weave.trace.weave_client import Call, sanitize_object_name class Scorer(Object): @@ -115,28 +118,39 @@ def auto_summarize(data: list) -> Optional[dict[str, Any]]: return None +@dataclass +class ScorerAttributes: + scorer_name: str + score_op: Op + summarize_fn: Callable + + def get_scorer_attributes( - scorer: Union[Callable, Op, Scorer], -) -> tuple[str, Callable, Callable]: - score_fn: Union[Op, Callable[..., Any]] + scorer: Union[Op, Scorer], +) -> ScorerAttributes: + score_op: Op + scorer_name: str if weave_isinstance(scorer, Scorer): - scorer_name = scorer.name - if scorer_name is None: + if scorer.name: + scorer_name = scorer.name + else: scorer_name = scorer.__class__.__name__ try: - score_fn = scorer.score + if not is_op(scorer.score): + raise TypeError( + f"Scorer {scorer_name} must implement `score` as a weave.op() decorated function." + ) + score_op = scorer.score summarize_fn = scorer.summarize # type: ignore + except AttributeError: raise ValueError( f"Scorer {scorer_name} must implement score and summarize methods. Did you forget to wrap with @weave.op()?" ) - elif callable(scorer): - if is_op(scorer): - scorer = as_op(scorer) - scorer_name = scorer.name - else: - scorer_name = scorer.__name__ - score_fn = scorer + elif is_op(scorer): + scorer = as_op(scorer) + scorer_name = cast(str, scorer.name) + score_op = scorer summarize_fn = auto_summarize # type: ignore else: raise ValueError(f"Unknown scorer type: {scorer}") @@ -144,14 +158,210 @@ def get_scorer_attributes( if scorer_name: scorer_name = sanitize_object_name(scorer_name) - return (scorer_name, score_fn, summarize_fn) # type: ignore + return ScorerAttributes( + scorer_name=scorer_name, score_op=score_op, summarize_fn=summarize_fn + ) -def _has_oldstyle_scorers(scorers: list[Union[Callable, Op, Scorer]]) -> bool: +def _has_oldstyle_scorers(scorers: list[Union[Op, Scorer]]) -> bool: """Check if any scorers use the deprecated 'model_output' parameter.""" for scorer in scorers: - _, score_fn, _ = get_scorer_attributes(scorer) - score_signature = inspect.signature(score_fn) + scorer_attributes = get_scorer_attributes(scorer) + score_op = scorer_attributes.score_op + score_signature = inspect.signature(score_op) if "model_output" in score_signature.parameters: return True return False + + +# Using `dataclass` because pydantic does not like `Call` as a property +@dataclass +class ApplyScorerSuccess: + result: Any + score_call: Call + + +ApplyScorerResult = ApplyScorerSuccess + + +async def apply_scorer_async( + scorer: Union[Op, Scorer], example: dict, model_output: dict +) -> ApplyScorerResult: + """Apply a scoring function to model output and example data asynchronously. + + This function handles the application of a scoring function to evaluate model outputs. + It supports both function-based scorers (Op) and class-based scorers (Scorer), + managing argument mapping and validation. + + Args: + scorer: Either an Op (function) or Scorer (class) that implements scoring logic + example: Dictionary containing the input example data with features to score against + model_output: Dictionary containing the model's output to be scored + + Returns: + ApplyScorerResult: Contains the scoring result and the Call object representing + the scoring operation + + Raises: + OpCallError: If there are issues with argument mapping or scorer execution + ValueError: If the column mapping configuration is invalid + """ + # For class-based scorers, we need to keep track of the instance + scorer_self = None + if weave_isinstance(scorer, Scorer): + scorer_self = scorer + + # Extract the core components of the scorer + scorer_attributes = get_scorer_attributes(scorer) + scorer_name = scorer_attributes.scorer_name + score_op = scorer_attributes.score_op + score_signature = inspect.signature(score_op) + score_arg_names = list(score_signature.parameters.keys()) + + # Determine which parameter name is used for model output + # Scorers must have either 'output' or 'model_output' (deprecated) parameter + if "output" in score_arg_names: + score_output_name = "output" + elif "model_output" in score_arg_names: + score_output_name = "model_output" + else: + message = textwrap.dedent( + f""" + Scorer {scorer_name} must have an `output` or `model_output` argument, to receive the + output of the model function. + """ + ) + raise OpCallError(message) + + # The keys of `score_args` must match the argument names of the scorer's `score` method. + # If scorer.column_map is set, then user is indicating that the dataset column(s) + # being passed to the scorer have different names to the `score` functions' argument names. + # So we need to remap the dataset columns to the expected argument names in the scorer, + # + # column_map k:v pairs must be structured as `scorer param name : dataset column name` + # + # For instance, if the scorer expects "input" and "ground_truth" and we have a dataset + # with columns "question" and "answer", column_map should be defined as follows: + # {"input": "question", "ground_truth": "answer"} + # + # input: is the full row, we have access to it via example + # output: is the model output, we have access to it via model_output + # Remove 'self' from argument names if present (for class-based scorers) + score_arg_names = [param for param in score_arg_names if (param != "self")] + score_args = {} + + # Handle column mapping if provided + # This allows dataset columns to be mapped to scorer argument names + if isinstance(scorer, Scorer) and scorer.column_map is not None: + # Validate that all mapped columns exist in scorer signature + for key in scorer.column_map.keys(): + if key not in score_arg_names: + message = textwrap.dedent( + f""" + You have created `{scorer_name}(column_map={scorer.column_map}, ...)`. + + The `column_map` contains a key, `{key}`, which is not in the `score` methods' argument names. + `score` methods' argument names: {score_arg_names} + + Hint: + - Ensure that the keys in `column_map` match the scorer's argument names. + """ + ) + raise ValueError(message) + + # Build arguments dictionary using column mapping + for arg in score_arg_names: + if arg == "output" or arg == "model_output": + continue + if arg in example: + score_args[arg] = example[arg] + elif arg in scorer.column_map: + dataset_column_name = scorer.column_map[arg] + if dataset_column_name in example: + score_args[arg] = example[dataset_column_name] + else: + message = textwrap.dedent( + f""" + You have created `{scorer_name}(column_map={scorer.column_map}, ...)`. + + You are mapping `{arg}` to `{dataset_column_name}`, but `{dataset_column_name}` + was not found in the dataset columns. + + Available dataset columns: {list(example.keys())} + + Hint: + - Ensure that `column_map` maps the `score` methods' argument names to existing dataset column names. + """ + ) + raise ValueError(message) + else: + message = textwrap.dedent( + f""" + You have created `{scorer_name}(column_map={scorer.column_map}, ...)`. + + `score` method argument `{arg}` is not found in the dataset columns and is not mapped in `column_map`. + + Available dataset columns: {list(example.keys())} + `column_map`: {scorer.column_map} + + Hint: + Either: + - map the argument name to the dataset column using the scorers `column_map` attribute, in the form {{score_arg_name : dataset_column_name}} or + - rename a column in the dataset to `{arg}` or + - re-name the `{arg}` argument in your `score` method to match a dataset column name + """ + ) + raise ValueError(message) + else: + # Without column mapping, directly match scorer arguments to example keys + score_args = {k: v for k, v in example.items() if k in score_arg_names} + + # Add the model output to the arguments + score_args[score_output_name] = model_output + + try: + # Execute the scoring operation + score_op = as_op(score_op) + if scorer_self is not None: + score_args = { + **score_args, + "self": scorer_self, + } + result, score_call = await async_call_op(score_op, **score_args) + except OpCallError as e: + # Provide detailed error message if scoring fails + dataset_column_names = list(example.keys()) + dataset_column_names_str = ", ".join(dataset_column_names[:3]) + if len(dataset_column_names) > 10: + dataset_column_names_str += ", ..." + required_arg_names = [ + param.name + for param in score_signature.parameters.values() + if param.default == inspect.Parameter.empty + ] + required_arg_names.remove(score_output_name) + + message = textwrap.dedent( + f""" + Call error: {e} + + If using the `Scorer` weave class, you can set the `scorer.column_map` + attribute to map scorer argument names to dataset columns. + + For example, if the `score` expects "output", "input" and "ground_truth" and we have a dataset + with columns "question" and "answer", `column_map` can be used to map the non-output parameter like so: + {{"input": "question", "ground_truth": "answer"}} + + scorer argument names: {score_arg_names} + dataset keys: {example.keys()} + scorer.column_map: {getattr(scorer, 'column_map', '{}')} + + Options for resolving: + a. if using the `Scorer` weave class, you can set the `scorer.column_map` attribute to map scorer argument names to dataset column names or + b. change the argument names the in the scoring function of {scorer_name} to match a subset of dataset column names: ({dataset_column_names_str}) or + c. change dataset column names to match expected {scorer_name} argument names: {required_arg_names} + """ + ) + raise OpCallError(message) + + return ApplyScorerSuccess(result=result, score_call=score_call) diff --git a/weave/trace/isinstance.py b/weave/trace/isinstance.py index 809509373a91..16bb784c6845 100644 --- a/weave/trace/isinstance.py +++ b/weave/trace/isinstance.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any, TypeVar from typing_extensions import TypeGuard @@ -8,7 +10,9 @@ C = TypeVar("C") -def weave_isinstance(obj: Any, cls: type[C]) -> TypeGuard[C]: +def weave_isinstance(obj: Any, cls: type[C] | tuple[type[C], ...]) -> TypeGuard[C]: + if isinstance(cls, tuple): + return any(weave_isinstance(obj, c) for c in cls) if isinstance(obj, cls): # type: ignore return True if isinstance(obj, ObjectRecord): diff --git a/weave/trace/op_caller.py b/weave/trace/op_caller.py new file mode 100644 index 000000000000..4903df0768a3 --- /dev/null +++ b/weave/trace/op_caller.py @@ -0,0 +1,50 @@ +import asyncio +import inspect +from collections.abc import Coroutine +from typing import Any, Callable, Union + +from weave.trace.op import Op, as_op, is_op +from weave.trace.weave_client import Call + + +def async_call(func: Union[Callable, Op], *args: Any, **kwargs: Any) -> Coroutine: + """For async functions, calls them directly. For sync functions, runs them in a thread. + This provides a common async interface for both sync and async functions. + + Args: + func: The callable or Op to wrap + *args: Positional arguments to pass to the function + **kwargs: Keyword arguments to pass to the function + + Returns: + A coroutine that will execute the function + """ + is_async = False + if is_op(func): + func = as_op(func) + is_async = inspect.iscoroutinefunction(func.resolve_fn) + else: + is_async = inspect.iscoroutinefunction(func) + if is_async: + return func(*args, **kwargs) # type: ignore + return asyncio.to_thread(func, *args, **kwargs) + + +def async_call_op( + func: Op, *args: Any, **kwargs: Any +) -> Coroutine[Any, Any, tuple[Any, "Call"]]: + """Similar to async_call but specifically for Ops, handling the Weave tracing + functionality. For sync Ops, runs them in a thread. + + Args: + func: The Op to wrap + *args: Positional arguments to pass to the Op + **kwargs: Keyword arguments to pass to the Op + + Returns: + A coroutine that will execute the Op and return a tuple of (result, Call) + """ + call_res = func.call(*args, __should_raise=True, **kwargs) + if inspect.iscoroutine(call_res): + return call_res + return asyncio.to_thread(lambda: call_res) From 06abda96d2e7eab296f72f58169a847f16cc0374 Mon Sep 17 00:00:00 2001 From: Griffin Tarpenning Date: Tue, 7 Jan 2025 09:14:35 -0800 Subject: [PATCH 04/51] chore(ui): fix scorer create field cursor behavior (#3307) --- .../pages/ScorersPage/ZodSchemaForm.tsx | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ScorersPage/ZodSchemaForm.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ScorersPage/ZodSchemaForm.tsx index 94106a7a3b20..1a03082e9c47 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ScorersPage/ZodSchemaForm.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ScorersPage/ZodSchemaForm.tsx @@ -6,7 +6,7 @@ import { InputLabel, } from '@material-ui/core'; import {Button} from '@wandb/weave/components/Button'; -import React, {useEffect, useMemo, useState} from 'react'; +import React, {useCallback, useEffect, useMemo, useState} from 'react'; import {z} from 'zod'; import { @@ -167,11 +167,24 @@ const NestedForm: React.FC<{ const [currentValue, setCurrentValue] = useState( getNestedValue(config, currentPath) ); + + // Only update parent config on blur, for string fields + const handleBlur = useCallback(() => { + if (currentValue !== getNestedValue(config, currentPath)) { + updateConfig(currentPath, currentValue, config, setConfig); + } + }, [currentValue, currentPath, config, setConfig]); + const handleChange = useCallback((value: string) => { + setCurrentValue(value); + }, []); + + // set current value for non-string fields useEffect(() => { setCurrentValue(getNestedValue(config, currentPath)); }, [config, currentPath]); const unwrappedSchema = unwrapSchema(fieldSchema); + const isOptional = fieldSchema instanceof z.ZodOptional; if (unwrappedSchema instanceof z.ZodDiscriminatedUnion) { return ( @@ -294,7 +307,6 @@ const NestedForm: React.FC<{ } else if (isZodType(fieldSchema, s => s instanceof z.ZodBoolean)) { fieldType = 'checkbox'; } - const isOptional = fieldSchema instanceof z.ZodOptional; return ( updateConfig(currentPath, value, config, setConfig)} + onChange={handleChange} + onBlur={handleBlur} autoFocus={autoFocus} /> ); From e4003bf24516a0f7cae8c5bea9904efd63df373d Mon Sep 17 00:00:00 2001 From: Griffin Tarpenning Date: Tue, 7 Jan 2025 09:26:11 -0800 Subject: [PATCH 05/51] chore(weave): make annotation values in the traces table clickable (#3332) --- .../Home/Browse3/pages/CallsPage/callsTableColumns.tsx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/callsTableColumns.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/callsTableColumns.tsx index 93bd2ee030b1..4eee09ece141 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/callsTableColumns.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/callsTableColumns.tsx @@ -18,6 +18,7 @@ import {monthRoundedTime} from '../../../../../../common/util/time'; import {isWeaveObjectRef, parseRef} from '../../../../../../react'; import {makeRefCall} from '../../../../../../util/refs'; import {Timestamp} from '../../../../../Timestamp'; +import {CellValueString} from '../../../Browse2/CellValueString'; import { convertFeedbackFieldToBackendFilter, parseFeedbackType, @@ -372,6 +373,9 @@ function buildCallsTableColumns( if (typeof params.value === 'boolean') { return
{params.value ? 'true' : 'false'}
; } + if (typeof params.value === 'string') { + return ; + } return
{params.value}
; }, }; From 5a61b84dc74612796957a3de7d27fc12ce4bf7e4 Mon Sep 17 00:00:00 2001 From: Jamie Rasmussen <112953339+jamie-rasmussen@users.noreply.github.com> Date: Tue, 7 Jan 2025 11:51:54 -0600 Subject: [PATCH 06/51] chore(ui): replace circle icon (#3292) --- .../pages/CompareEvaluationsPage/ecpConstants.ts | 1 - .../EvaluationDefinition.tsx | 11 +---------- .../ExampleCompareSection.tsx | 14 ++++---------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpConstants.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpConstants.ts index 1d09d64a13cc..71bf0731c4f6 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpConstants.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpConstants.ts @@ -2,7 +2,6 @@ import {MOON_300} from '../../../../../../common/css/color.styles'; export const EVAL_DEF_HEIGHT = 45; export const STANDARD_PADDING = 16; -export const CIRCLE_SIZE = '16px'; export const BOX_RADIUS = '6px'; export const STANDARD_BORDER = `1px solid ${MOON_300}`; export const PLOT_HEIGHT = 300; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/EvaluationDefinition.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/EvaluationDefinition.tsx index 1894398e553f..0de9421ba1a9 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/EvaluationDefinition.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/EvaluationDefinition.tsx @@ -1,5 +1,4 @@ import {Box} from '@material-ui/core'; -import {Circle} from '@mui/icons-material'; import React, {useMemo} from 'react'; import { @@ -14,7 +13,6 @@ import {SmallRef} from '../../../../../Browse2/SmallRef'; import {CallLink, ObjectVersionLink} from '../../../common/Links'; import {useWFHooks} from '../../../wfReactInterface/context'; import {ObjectVersionKey} from '../../../wfReactInterface/wfDataModelHooksInterface'; -import {CIRCLE_SIZE} from '../../ecpConstants'; import {EvaluationComparisonState} from '../../ecpState'; export const EvaluationCallLink: React.FC<{ @@ -33,14 +31,7 @@ export const EvaluationCallLink: React.FC<{ projectName={project} opName={evaluationCall.name} callId={props.callId} - icon={ - - } + icon={} color={MOON_800} /> ); diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleCompareSection/ExampleCompareSection.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleCompareSection/ExampleCompareSection.tsx index 4af9900f2dfc..de552332adbf 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleCompareSection/ExampleCompareSection.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleCompareSection/ExampleCompareSection.tsx @@ -1,5 +1,5 @@ import {Box, Tooltip} from '@material-ui/core'; -import {Circle, WarningAmberOutlined} from '@mui/icons-material'; +import {WarningAmberOutlined} from '@mui/icons-material'; import _ from 'lodash'; import React, {useCallback, useEffect, useMemo, useRef} from 'react'; import styled from 'styled-components'; @@ -16,6 +16,7 @@ import { WeaveObjectRef, } from '../../../../../../../../react'; import {Button} from '../../../../../../../Button'; +import {Icon} from '../../../../../../../Icon'; import {CellValue} from '../../../../../Browse2/CellValue'; import {NotApplicable} from '../../../../../Browse2/NotApplicable'; import {SmallRef} from '../../../../../Browse2/SmallRef'; @@ -31,7 +32,7 @@ import { DERIVED_SCORER_REF_PLACEHOLDER, resolvePeerDimension, } from '../../compositeMetricsUtil'; -import {CIRCLE_SIZE, SIGNIFICANT_DIGITS} from '../../ecpConstants'; +import {SIGNIFICANT_DIGITS} from '../../ecpConstants'; import {EvaluationComparisonState} from '../../ecpState'; import {MetricDefinition, MetricValueType} from '../../ecpTypes'; import { @@ -495,14 +496,7 @@ export const ExampleCompareSection: React.FC<{ projectName={trialProject} opName={trialOpName} callId={trialCallId} - icon={ - - } + icon={} color={MOON_800} /> From 2880ba84a2bbb14e8a9748f456e6b9f15d2d68cc Mon Sep 17 00:00:00 2001 From: Griffin Tarpenning Date: Tue, 7 Jan 2025 11:47:23 -0800 Subject: [PATCH 07/51] chore(weave): add annotation spec name to feedback grid (#3331) --- .../Home/Browse3/feedback/FeedbackGrid.tsx | 4 ++ .../Browse3/feedback/FeedbackGridInner.tsx | 42 ++++++++++++++++--- .../humanAnnotationTypes.ts | 3 ++ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/FeedbackGrid.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/FeedbackGrid.tsx index 72e436c62bc7..5194af00fe31 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/FeedbackGrid.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/FeedbackGrid.tsx @@ -43,6 +43,9 @@ export const FeedbackGrid = ({ return getTsClient().registerOnFeedbackListener(weaveRef, query.refetch); // eslint-disable-next-line react-hooks/exhaustive-deps }, []); + const hasAnnotationFeedback = query.result?.some(f => + f.feedback_type.startsWith(ANNOTATION_PREFIX) + ); // Group by feedback on this object vs. descendent objects const grouped = useMemo(() => { @@ -127,6 +130,7 @@ export const FeedbackGrid = ({ ); diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/FeedbackGridInner.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/FeedbackGridInner.tsx index b8b2f4154c28..92b59d06e4d6 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/FeedbackGridInner.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/FeedbackGridInner.tsx @@ -1,4 +1,8 @@ -import {GridColDef, GridRowHeightParams} from '@mui/x-data-grid-pro'; +import { + GridColDef, + GridRenderCellParams, + GridRowHeightParams, +} from '@mui/x-data-grid-pro'; import React from 'react'; import {Timestamp} from '../../../../Timestamp'; @@ -9,16 +13,21 @@ import {Feedback} from '../pages/wfReactInterface/traceServerClientTypes'; import {StyledDataGrid} from '../StyledDataGrid'; import {FeedbackGridActions} from './FeedbackGridActions'; import {FeedbackTypeChip} from './FeedbackTypeChip'; -import {isHumanAnnotationType} from './StructuredFeedback/humanAnnotationTypes'; +import { + getHumanAnnotationNameFromFeedbackType, + isHumanAnnotationType, +} from './StructuredFeedback/humanAnnotationTypes'; type FeedbackGridInnerProps = { feedback: Feedback[]; currentViewerId: string | null; + showAnnotationName?: boolean; }; export const FeedbackGridInner = ({ feedback, currentViewerId, + showAnnotationName, }: FeedbackGridInnerProps) => { const columns: GridColDef[] = [ { @@ -31,6 +40,25 @@ export const FeedbackGridInner = ({ ), }, + ...(showAnnotationName + ? [ + { + field: 'annotation_name', + headerName: 'Name', + flex: 1, + renderCell: (params: GridRenderCellParams) => { + const feedbackType = params.row.feedback_type; + const annotationName = isHumanAnnotationType(feedbackType) + ? getHumanAnnotationNameFromFeedbackType(feedbackType) + : null; + if (!annotationName) { + return null; + } + return ; + }, + }, + ] + : []), { field: 'payload', headerName: 'Feedback', @@ -61,8 +89,8 @@ export const FeedbackGridInner = ({ { field: 'created_at', headerName: 'Timestamp', - minWidth: 120, - width: 120, + minWidth: 105, + width: 105, renderCell: params => ( ), @@ -70,7 +98,8 @@ export const FeedbackGridInner = ({ { field: 'id', headerName: 'ID', - width: 50, + width: 48, + minWidth: 48, display: 'flex', renderCell: params => , }, @@ -98,7 +127,8 @@ export const FeedbackGridInner = ({ { field: 'actions', headerName: '', - width: 50, + width: 36, + minWidth: 36, filterable: false, sortable: false, resizable: false, diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/StructuredFeedback/humanAnnotationTypes.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/StructuredFeedback/humanAnnotationTypes.ts index ce28f3113fec..83bb501d9f0c 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/StructuredFeedback/humanAnnotationTypes.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/feedback/StructuredFeedback/humanAnnotationTypes.ts @@ -26,3 +26,6 @@ export type HumanAnnotation = Feedback & {}; export const isHumanAnnotationType = (feedbackType: string) => feedbackType.startsWith(HUMAN_ANNOTATION_BASE_TYPE); + +export const getHumanAnnotationNameFromFeedbackType = (feedbackType: string) => + feedbackType.split('.').pop(); From 09796cd57d235df4ce3d1b3ba1f3ec65f7d39b5f Mon Sep 17 00:00:00 2001 From: J2-D2-3PO <188380414+J2-D2-3PO@users.noreply.github.com> Date: Tue, 7 Jan 2025 14:31:07 -0700 Subject: [PATCH 08/51] docs(weave): Add basic Azure integration page (#3112) --- docs/docs/guides/integrations/azure.md | 26 ++++++++++++++++++++++++++ docs/docs/guides/integrations/index.md | 1 + docs/sidebars.ts | 1 + 3 files changed, 28 insertions(+) create mode 100644 docs/docs/guides/integrations/azure.md diff --git a/docs/docs/guides/integrations/azure.md b/docs/docs/guides/integrations/azure.md new file mode 100644 index 000000000000..87e2287afe3d --- /dev/null +++ b/docs/docs/guides/integrations/azure.md @@ -0,0 +1,26 @@ +# Microsoft Azure + +Weights & Biases integrates with Microsoft Azure OpenAI services, helping teams to manage, debug, and optimize their Azure AI workflows at scale. This guide introduces the W&B integration, what it means for Weave users, its key features, and how to get started. + +## Key features + +- **LLM evaluations**: Evaluate and monitor LLM-powered applications using Weave, optimized for Azure infrastructure. +- **Seamless integration**: Deploy W&B Models on a dedicated Azure tenant with built-in integrations for Azure AI Studio, Azure ML, Azure OpenAI Service, and other Azure AI services. +- **Enhanced performance**: Use Azure’s infrastructure to train and deploy models faster, with auto-scaling clusters and optimized resources. +- **Scalable experiment tracking**: Automatically log hyperparameters, metrics, and artifacts for Azure AI Studio and Azure ML runs. +- **LLM fine-tuning**: Fine-tune models with W&B Models. +- **Central repository for models and datasets**: Manage and version models and datasets with W&B Registry and Azure AI Studio. +- **Collaborative workspaces**: Support teamwork with shared workspaces, experiment commenting, and Microsoft Teams integration. +- **Governance framework**: Ensure security with fine-grained access controls, audit trails, and Microsoft Entra ID integration. + +## Getting started + +To use W&B with Azure, add the W&B integration via the [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/weightsandbiasesinc1641502883483.weights_biases_for_azure?tab=Overview). + +For a detailed guide describing how to integrate Azure OpenAI fine-tuning with W&B, see [Integrating Weights & Biases with Azure AI Services](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/weights-and-biases-integration). + +## Learn more + +- [Weights & Biases + Microsoft Azure Overview](https://wandb.ai/site/partners/azure) +- [How W&B and Microsoft Azure Are Empowering Enterprises](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/how-weights--biases-and-microsoft-azure-are-empowering-enterprises-to-fine-tune-/4303716) +- [Microsoft Azure OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/) diff --git a/docs/docs/guides/integrations/index.md b/docs/docs/guides/integrations/index.md index 78504dc154b4..d5352b601c1b 100644 --- a/docs/docs/guides/integrations/index.md +++ b/docs/docs/guides/integrations/index.md @@ -15,6 +15,7 @@ LLM providers are the vendors that offer access to large language models for gen - **[Cerebras](/guides/integrations/cerebras)** - **[Cohere](/guides/integrations/cohere)** - **[MistralAI](/guides/integrations/mistral)** +- **[Microsoft Azure](/guides/integrations/azure)** - **[Google Gemini](/guides/integrations/google-gemini)** - **[Together AI](/guides/integrations/together_ai)** - **[Groq](/guides/integrations/groq)** diff --git a/docs/sidebars.ts b/docs/sidebars.ts index b132e5ac6476..447e26e44bbb 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -96,6 +96,7 @@ const sidebars: SidebarsConfig = { "guides/integrations/cerebras", "guides/integrations/cohere", "guides/integrations/mistral", + "guides/integrations/azure", "guides/integrations/google-gemini", "guides/integrations/together_ai", "guides/integrations/groq", From fb3e3a7bc915e9c475b0e92e7561cf38ae0d7be4 Mon Sep 17 00:00:00 2001 From: wylerz Date: Tue, 7 Jan 2025 14:49:31 -0700 Subject: [PATCH 09/51] fix urls (#3327) --- sdks/node/src/urls.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/node/src/urls.ts b/sdks/node/src/urls.ts index e90033d21e00..db829106082e 100644 --- a/sdks/node/src/urls.ts +++ b/sdks/node/src/urls.ts @@ -9,7 +9,7 @@ export function getUrls(host?: string) { baseUrl: isDefault ? `https://api.wandb.ai` : `https://${resolvedHost}`, traceBaseUrl: isDefault ? `https://trace.wandb.ai` - : `https://${resolvedHost}`, + : `https://${resolvedHost}/traces`, domain: isDefault ? defaultDomain : resolvedHost, host: isDefault ? defaultHost : resolvedHost, }; From 2f48650c1c91f815a32e76d0047db169efe6b7f2 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 7 Jan 2025 15:33:18 -0800 Subject: [PATCH 10/51] chore(weave): add obj create and table update bindings to wf react interface (#3337) --- .../wfReactInterface/traceServerClient.ts | 6 ++ .../traceServerClientTypes.ts | 32 ++++++++++ .../traceServerDirectClient.ts | 9 +++ .../wfReactInterface/tsDataModelHooks.ts | 59 ++++++++++++++++++- .../wfDataModelHooksInterface.ts | 13 +++- 5 files changed, 116 insertions(+), 3 deletions(-) diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClient.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClient.ts index b3a15a8a5953..9b45049ffc7b 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClient.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClient.ts @@ -8,6 +8,8 @@ import { FeedbackCreateRes, FeedbackPurgeReq, FeedbackPurgeRes, + TableUpdateReq, + TableUpdateRes, TraceCallsDeleteReq, TraceCallUpdateReq, TraceObjCreateReq, @@ -115,6 +117,10 @@ export class TraceServerClient extends CachingTraceServerClient { return res; } + public tableUpdate(req: TableUpdateReq): Promise { + return super.tableUpdate(req); + } + public feedbackCreate(req: FeedbackCreateReq): Promise { const res = super.feedbackCreate(req).then(createRes => { const listeners = this.onFeedbackListeners[req.weave_ref] ?? []; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClientTypes.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClientTypes.ts index 2ad7efb25642..e039747042cb 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClientTypes.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClientTypes.ts @@ -347,3 +347,35 @@ export type ActionsExecuteBatchReq = { }; export type ActionsExecuteBatchRes = {}; + +export type TableUpdateSpec = TableAppendSpec | TablePopSpec | TableInsertSpec; + +export interface TableAppendSpec { + append: { + row: Record; + }; +} + +export interface TablePopSpec { + pop: { + index: number; + }; +} + +export interface TableInsertSpec { + insert: { + index: number; + row: Record; + }; +} + +export type TableUpdateReq = { + project_id: string; + base_digest: string; + updates: TableUpdateSpec[]; +}; + +export type TableUpdateRes = { + digest: string; + updated_row_digests: string[]; +}; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerDirectClient.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerDirectClient.ts index e162476920af..22d2ec797810 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerDirectClient.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerDirectClient.ts @@ -27,6 +27,8 @@ import { FeedbackPurgeRes, FeedbackQueryReq, FeedbackQueryRes, + TableUpdateReq, + TableUpdateRes, TraceCallReadReq, TraceCallReadRes, TraceCallSchema, @@ -253,6 +255,13 @@ export class DirectTraceServerClient { ); } + public tableUpdate(req: TableUpdateReq): Promise { + return this.makeRequest( + '/table/update', + req + ); + } + public tableQuery(req: TraceTableQueryReq): Promise { return this.makeRequest( '/table/query', diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooks.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooks.ts index 8e2f42035a1f..02187b0f65be 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooks.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooks.ts @@ -1713,14 +1713,67 @@ export const traceCallToUICallSchema = ( }; }; +export const useObjCreate = (): (( + projectId: string, + objectId: string, + val: any, + baseObjectClass?: string +) => Promise) => { + const getTsClient = useGetTraceServerClientContext(); + + return useCallback( + ( + projectId: string, + objectId: string, + val: any, + baseObjectClass?: string + ) => { + return getTsClient() + .objCreate({ + obj: { + project_id: projectId, + object_id: objectId, + val, + builtin_object_class: baseObjectClass, + }, + }) + .then(res => { + return res.digest; + }); + }, + [getTsClient] + ); +}; + +export const useTableUpdate = (): (( + projectId: string, + digest: string, + updates: traceServerTypes.TableUpdateSpec[] +) => Promise) => { + const getTsClient = useGetTraceServerClientContext(); + + return useCallback( + ( + projectId: string, + baseDigest: string, + updates: traceServerTypes.TableUpdateSpec[] + ) => { + return getTsClient().tableUpdate({ + project_id: projectId, + base_digest: baseDigest, + updates, + }); + }, + [getTsClient] + ); +}; + /// Utility Functions /// export const convertISOToDate = (iso: string): Date => { return new Date(iso); }; -// Export // - export const tsWFDataModelHooks: WFDataModelHooksInterface = { useCall, useCalls, @@ -1728,6 +1781,7 @@ export const tsWFDataModelHooks: WFDataModelHooksInterface = { useCallsDeleteFunc, useCallUpdateFunc, useCallsExport, + useObjCreate, useOpVersion, useOpVersions, useObjectVersion, @@ -1738,6 +1792,7 @@ export const tsWFDataModelHooks: WFDataModelHooksInterface = { useFileContent, useTableRowsQuery, useTableQueryStats, + useTableUpdate, derived: { useChildCallsForCompare, useGetRefsType, diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/wfDataModelHooksInterface.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/wfDataModelHooksInterface.ts index f28c7be67144..6b08bbba26f4 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/wfDataModelHooksInterface.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/wfDataModelHooksInterface.ts @@ -14,7 +14,7 @@ import {WeaveKind} from '../../../../../../react'; import {KNOWN_BASE_OBJECT_CLASSES, OP_CATEGORIES} from './constants'; import {Query} from './traceServerClientInterface/query'; // TODO: This import is not ideal, should delete this whole interface import * as traceServerClientTypes from './traceServerClientTypes'; // TODO: This import is not ideal, should delete this whole interface -import {ContentType} from './traceServerClientTypes'; +import {ContentType, TableUpdateSpec} from './traceServerClientTypes'; export type OpCategory = (typeof OP_CATEGORIES)[number]; export type KnownBaseObjectClassType = @@ -214,6 +214,12 @@ export type WFDataModelHooksInterface = { expandedRefCols?: string[], includeFeedback?: boolean ) => Promise; + useObjCreate: () => ( + projectId: string, + objectId: string, + val: any, + baseObjectClass?: string + ) => Promise; useOpVersion: (key: OpVersionKey | null) => Loadable; useOpVersions: ( entity: string, @@ -270,6 +276,11 @@ export type WFDataModelHooksInterface = { key: FeedbackKey | null, sortBy?: traceServerClientTypes.SortBy[] ) => LoadableWithError & Refetchable; + useTableUpdate: () => ( + projectId: string, + baseDigest: string, + updates: traceServerClientTypes.TableUpdateSpec[] + ) => Promise; derived: { useChildCallsForCompare: ( entity: string, From f50697fe905c454c5d762c7701c3bc45db8ddc87 Mon Sep 17 00:00:00 2001 From: Griffin Tarpenning Date: Tue, 7 Jan 2025 16:27:42 -0800 Subject: [PATCH 11/51] chore(ui): add a loading state var to avoid "no results" on load (#3339) --- .../Home/Browse3/pages/CallPage/DataTableView.tsx | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/DataTableView.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/DataTableView.tsx index 9a954858926c..800b6a0fdf2b 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/DataTableView.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/DataTableView.tsx @@ -152,10 +152,14 @@ export const WeaveCHTable: FC<{ ); const [loadedRows, setLoadedRows] = useState>([]); + const [fetchQueryLoaded, setFetchQueryLoaded] = useState(false); useEffect(() => { - if (!fetchQuery.loading && fetchQuery.result) { - setLoadedRows(fetchQuery.result.rows); + if (!fetchQuery.loading) { + if (fetchQuery.result) { + setLoadedRows(fetchQuery.result.rows); + } + setFetchQueryLoaded(true); } }, [fetchQuery.loading, fetchQuery.result]); @@ -224,7 +228,7 @@ export const WeaveCHTable: FC<{ }}> Date: Wed, 8 Jan 2025 09:36:56 -0800 Subject: [PATCH 12/51] refactor(weave): dedicated component for dataset versions (#3338) --- .../Browse3/datasets/DatasetVersionPage.tsx | 158 ++++++++++++++++++ .../Home/Browse3/pages/ObjectVersionPage.tsx | 16 +- 2 files changed, 165 insertions(+), 9 deletions(-) create mode 100644 weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/DatasetVersionPage.tsx diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/DatasetVersionPage.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/DatasetVersionPage.tsx new file mode 100644 index 000000000000..f3aed14e9efd --- /dev/null +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/DatasetVersionPage.tsx @@ -0,0 +1,158 @@ +import Box from '@mui/material/Box'; +import React, {useMemo} from 'react'; + +import {Icon} from '../../../../Icon'; +import {LoadingDots} from '../../../../LoadingDots'; +import {Tailwind} from '../../../../Tailwind'; +import {WeaveCHTableSourceRefContext} from '../pages/CallPage/DataTableView'; +import {ObjectViewerSection} from '../pages/CallPage/ObjectViewerSection'; +import {objectVersionText} from '../pages/common/Links'; +import {ObjectVersionsLink} from '../pages/common/Links'; +import {CenteredAnimatedLoader} from '../pages/common/Loader'; +import { + ScrollableTabContent, + SimplePageLayoutWithHeader, +} from '../pages/common/SimplePageLayout'; +import {TabUseDataset} from '../pages/TabUseDataset'; +import {useWFHooks} from '../pages/wfReactInterface/context'; +import {objectVersionKeyToRefUri} from '../pages/wfReactInterface/utilities'; +import {ObjectVersionSchema} from '../pages/wfReactInterface/wfDataModelHooksInterface'; +import {CustomWeaveTypeProjectContext} from '../typeViews/CustomWeaveTypeDispatcher'; + +export const DatasetVersionPage: React.FC<{ + objectVersion: ObjectVersionSchema; +}> = ({objectVersion}) => { + const {useRootObjectVersions, useRefsData} = useWFHooks(); + const entityName = objectVersion.entity; + const projectName = objectVersion.project; + const objectName = objectVersion.objectId; + const objectVersionIndex = objectVersion.versionIndex; + + const objectVersions = useRootObjectVersions( + entityName, + projectName, + { + objectIds: [objectName], + }, + undefined, + true + ); + const objectVersionCount = (objectVersions.result ?? []).length; + const refUri = objectVersionKeyToRefUri(objectVersion); + + const data = useRefsData([refUri]); + const viewerData = useMemo(() => { + if (data.loading) { + return {}; + } + return data.result?.[0] ?? {}; + }, [data.loading, data.result]); + + const viewerDataAsObject = useMemo(() => { + const dataIsPrimitive = + typeof viewerData !== 'object' || + viewerData === null || + Array.isArray(viewerData); + if (dataIsPrimitive) { + return {_result: viewerData}; + } + return viewerData; + }, [viewerData]); + + return ( + +
+
+ +
+ {objectVersionText(objectName, objectVersionIndex)} +
+ + } + headerContent={ + +
+
+

Name

+ +
+ {objectName} + {objectVersions.loading ? ( + + ) : ( + + ({objectVersionCount} version + {objectVersionCount !== 1 ? 's' : ''}) + + )} + +
+
+
+
+

Version

+

{objectVersionIndex}

+
+
+
+ } + tabs={[ + { + label: 'Rows', + content: ( + + + {data.loading ? ( + + ) : ( + + + + + + )} + + + ), + }, + { + label: 'Use', + content: ( + + + + + + ), + }, + ]} + /> + ); +}; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ObjectVersionPage.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ObjectVersionPage.tsx index 085587a64d8c..1b0bb491f1c0 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ObjectVersionPage.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ObjectVersionPage.tsx @@ -7,6 +7,7 @@ import {Icon, IconName} from '../../../../Icon'; import {LoadingDots} from '../../../../LoadingDots'; import {Tailwind} from '../../../../Tailwind'; import {Tooltip} from '../../../../Tooltip'; +import {DatasetVersionPage} from '../datasets/DatasetVersionPage'; import {NotFoundPanel} from '../NotFoundPanel'; import {CustomWeaveTypeProjectContext} from '../typeViews/CustomWeaveTypeDispatcher'; import {WeaveCHTableSourceRefContext} from './CallPage/DataTableView'; @@ -27,7 +28,6 @@ import { } from './common/SimplePageLayout'; import {EvaluationLeaderboardTab} from './LeaderboardTab'; import {TabPrompt} from './TabPrompt'; -import {TabUseDataset} from './TabUseDataset'; import {TabUseModel} from './TabUseModel'; import {TabUseObject} from './TabUseObject'; import {TabUsePrompt} from './TabUsePrompt'; @@ -198,6 +198,10 @@ const ObjectVersionPageInner: React.FC<{ return ; } + if (isDataset) { + return ; + } + return ( + - ) : baseObjectClass === 'Dataset' ? ( - ) : baseObjectClass === 'Model' ? ( Date: Wed, 8 Jan 2025 09:45:22 -0800 Subject: [PATCH 13/51] chore(app): Adding an optional name to label the number input (#3345) --- .../components/elements/NumberInput.tsx | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/weave-js/src/common/components/elements/NumberInput.tsx b/weave-js/src/common/components/elements/NumberInput.tsx index 03a2486d0603..3c180cc7a9ba 100644 --- a/weave-js/src/common/components/elements/NumberInput.tsx +++ b/weave-js/src/common/components/elements/NumberInput.tsx @@ -6,17 +6,18 @@ import clamp from '../../util/clamp'; interface NumberInputProps { className?: string; - value?: number; - placeholder?: string; - disabled?: boolean; - stepper?: boolean; - ticks?: number[]; - min?: number; - max?: number; containerStyle?: React.CSSProperties; + disabled?: boolean; inputStyle?: React.CSSProperties; - strideLength?: number; + label?: string; + max?: number; + min?: number; onChange: (newVal?: number) => void; + placeholder?: string; + stepper?: boolean; + strideLength?: number; + ticks?: number[]; + value?: number; } const NumberInput: React.FC = props => { @@ -79,11 +80,12 @@ const NumberInput: React.FC = props => { return (
{ focusedRef.current = true; From c3910fd008d3206fe323e7feb1f5daa91ef23890 Mon Sep 17 00:00:00 2001 From: brianlund-wandb Date: Wed, 8 Jan 2025 10:59:56 -0800 Subject: [PATCH 14/51] fix(weave): optimize finding next tick up or down (#3231) * optimize finding next tick up or down * addressing static analysic error * tslint issues * refactor logic to single function * removed expensive check * fixing linting issue * removing unnecessary comments --- .../components/elements/SliderInput.test.tsx | 123 ++++++++++++++++++ .../components/elements/SliderInput.tsx | 66 +++++++--- 2 files changed, 169 insertions(+), 20 deletions(-) create mode 100644 weave-js/src/common/components/elements/SliderInput.test.tsx diff --git a/weave-js/src/common/components/elements/SliderInput.test.tsx b/weave-js/src/common/components/elements/SliderInput.test.tsx new file mode 100644 index 000000000000..9e448568aa44 --- /dev/null +++ b/weave-js/src/common/components/elements/SliderInput.test.tsx @@ -0,0 +1,123 @@ +import {getClosestTick} from './SliderInput'; + +describe('getClosestTick', () => { + test('value within range, no ticks', () => { + const ticks = undefined; + const min = 1; + const max = 10; + const previous = 4; + const val = 3; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(3); + }); + test('lower below min coerced to min', () => { + const ticks = [2, 4, 6, 8, 10]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 2; + const val = 1; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(2); + }); + // not convinced this is a realistic test + test.skip('upper above non-inclusive max', () => { + const ticks = [2, 4, 6, 8, 10]; + const min = ticks[0]; + const max = 12; + const previous = 10; + const val = 12; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(10); + }); + test('upper above max coerced to max', () => { + const ticks = [2, 4, 6, 8, 10]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 10; + const val = 11; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(10); + }); + test('lower previous value returns next greater', () => { + const ticks = [2, 4, 6, 8, 10]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 4; + const val = 5; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(6); + }); + test('lower previous value returns next greater, large step', () => { + const ticks = [2, 4, 60, 80, 10]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 4; + const val = 5; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(60); + }); + test('greater previous value returns next lesser', () => { + const ticks = [2, 4, 6, 8, 10]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 4; + const val = 3; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(2); + }); + test('greater previous value returns next lesser, consecutive', () => { + const ticks = [1, 2, 3, 4, 5, 6]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 4; + const val = 3; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(3); + }); + test('lower previous value returns next greater, consecutive', () => { + const ticks = [1, 2, 3, 4, 5, 6]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 3; + const val = 4; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(4); + }); + test('lower previous value returns next greater, erratic', () => { + const ticks = [1, 4, 5, 7, 9, 12]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 9; + const val = 10; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(12); + }); + test('greater previous value returns next lower, erratic', () => { + const ticks = [1, 2, 5, 7, 9, 12]; + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 5; + const val = 4; + const actual = getClosestTick(val, previous, min, max, ticks); + expect(actual).toBe(2); + }); + + // Modified logic, retaining for expected performance + test('large number of ticks', () => { + const ticks = []; + for (let i = 0; i < 10000000; i += 2) { + ticks.push(i); + } + + const min = ticks[0]; + const max = ticks[ticks.length - 1]; + const previous = 123456; + const val = 123457; + const start = global.window.performance.now(); + const actual = getClosestTick(val, previous, min, max, ticks); + const end = global.window.performance.now(); + const duration = end - start; + expect(actual).toBe(123458); + expect(duration).toBeLessThanOrEqual(1.0); + }); +}); diff --git a/weave-js/src/common/components/elements/SliderInput.tsx b/weave-js/src/common/components/elements/SliderInput.tsx index dce75615c93e..5c3d9d210f5a 100644 --- a/weave-js/src/common/components/elements/SliderInput.tsx +++ b/weave-js/src/common/components/elements/SliderInput.tsx @@ -24,8 +24,6 @@ export interface SliderInputProps { ticks?: number[]; disabled?: boolean; strideLength?: number; - // if true, the slider will be restricted to props.max, but the input will be unbounded (https://wandb.atlassian.net/browse/WB-5666) - allowGreaterThanMax?: boolean; onChange(value: number): void; } @@ -47,7 +45,6 @@ const SliderInput: React.FC = React.memo( ticks, disabled, strideLength, - allowGreaterThanMax, onChange, }) => { const [sliderValue, setSliderValue] = React.useState(value ?? 0); @@ -71,19 +68,11 @@ const SliderInput: React.FC = React.memo( if (newVal == null || !_.isFinite(newVal)) { return; } - if (newVal > max && !allowGreaterThanMax) { - newVal = max; - } - if (newVal < min) { - newVal = min; - } - if (ticks != null) { - newVal = getClosestTick(ticks, newVal); - } + newVal = getClosestTick(newVal, sliderValue, min, max, ticks); setSliderValue(newVal); onChangeDebounced(newVal); }, - [ticks, min, max, allowGreaterThanMax, onChangeDebounced] + [ticks, min, max, sliderValue, onChangeDebounced] ); React.useEffect(() => { @@ -138,7 +127,7 @@ const SliderInput: React.FC = React.memo( strideLength={strideLength} disabled={disabled ?? false} min={min} - max={allowGreaterThanMax ? undefined : max} + max={max} value={value} ticks={ticks} onChange={update} @@ -172,17 +161,54 @@ const SliderInput: React.FC = React.memo( export default SliderInput; -function getClosestTick(ticks: number[], val: number): number { +export function getClosestTick( + val: number, + prev: number, + min: number, + max: number, + ticks?: number[] +): number { + // if min/max not in ticks, allow coercion to nearest value + if (val > max) { + return max; + } + if (val < min) { + return min; + } + if (ticks === null || ticks === undefined) { + return val; + } + let closest = val; + const increasing = val > prev; let minDiff = Number.MAX_VALUE; - for (const tick of ticks) { + // Binary search for the closest tick + let left = 0; + let right = ticks.length - 1; + + while (left <= right) { + const mid = Math.floor((left + right) / 2); + const tick = ticks[mid]; const diff = Math.abs(tick - val); - if (diff >= minDiff) { - break; + + // Only update closest if the tick is in the right direction + if ( + diff < minDiff && + ((increasing && tick >= val) || (!increasing && tick <= val)) + ) { + closest = tick; + if (closest === val) { + break; + } + minDiff = diff; + } + + if (tick < val) { + left = mid + 1; + } else { + right = mid - 1; } - closest = tick; - minDiff = diff; } return closest; From a4f9e53194cb8e383b23afd8d8b78917b2f27f9f Mon Sep 17 00:00:00 2001 From: Andrew Truong Date: Wed, 8 Jan 2025 14:35:32 -0500 Subject: [PATCH 15/51] feat(weave): Make redaction applied to captured code #3340 --- tests/trace/test_redaction.py | 17 +++++++++++++++++ weave/trace/op_type.py | 17 +++++++++++------ 2 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 tests/trace/test_redaction.py diff --git a/tests/trace/test_redaction.py b/tests/trace/test_redaction.py new file mode 100644 index 000000000000..954cb04b216c --- /dev/null +++ b/tests/trace/test_redaction.py @@ -0,0 +1,17 @@ +import weave + + +def test_code_capture_redacts_sensitive_values(client): + api_key = "123" + + @weave.op + def func(x: int) -> int: + cap = api_key + return x + 1 + + ref = weave.publish(func) + op = ref.get() + + captured_code = op.get_captured_code() + + assert 'api_key = "REDACTED"' in captured_code diff --git a/weave/trace/op_type.py b/weave/trace/op_type.py index faf6e8e8fd38..1e71beb6dd60 100644 --- a/weave/trace/op_type.py +++ b/weave/trace/op_type.py @@ -24,6 +24,7 @@ from weave.trace.mem_artifact import MemTraceFilesArtifact from weave.trace.op import Op, as_op, is_op from weave.trace.refs import ObjectRef +from weave.trace.sanitize import REDACTED_VALUE, should_redact from weave.trace_server.trace_server_interface_util import str_digest WEAVE_OP_PATTERN = re.compile(r"@weave\.op(\(\))?") @@ -445,17 +446,21 @@ def _get_code_deps( from weave.trace.serialize import to_json - json_val = to_json(var_value, client._project_id(), client) + # Redact sensitive values + if should_redact(var_name): + json_val = REDACTED_VALUE + else: + json_val = to_json(var_value, client._project_id(), client) except Exception as e: warnings.append( f"Serialization error for value of {var_name} needed by {fn}. Encountered:\n {e}" ) else: - code_paragraph = ( - f"{var_name} = " - + json.dumps(json_val, cls=RefJSONEncoder, indent=4) - + "\n" - ) + if should_redact(var_name): + json_str = f'"{REDACTED_VALUE}"' + else: + json_str = json.dumps(json_val, cls=RefJSONEncoder, indent=4) + code_paragraph = f"{var_name} = " + json_str + "\n" code_paragraph = code_paragraph.replace( f'"{RefJSONEncoder.SPECIAL_REF_TOKEN}', "" ) From 3bdbbbca0f966f8a3fa9eb52d210be0c3ab40db0 Mon Sep 17 00:00:00 2001 From: J2-D2-3PO <188380414+J2-D2-3PO@users.noreply.github.com> Date: Wed, 8 Jan 2025 12:46:28 -0700 Subject: [PATCH 16/51] docs(weave): Document autopatch_settings for PII nb (#3317) --- docs/docs/reference/gen_notebooks/pii.md | 235 +++++++++++++++----- docs/notebooks/pii.ipynb | 266 ++++++++++++++++++----- 2 files changed, 392 insertions(+), 109 deletions(-) diff --git a/docs/docs/reference/gen_notebooks/pii.md b/docs/docs/reference/gen_notebooks/pii.md index 001aca82a615..c4e7149d0413 100644 --- a/docs/docs/reference/gen_notebooks/pii.md +++ b/docs/docs/reference/gen_notebooks/pii.md @@ -15,22 +15,32 @@ title: Handling and Redacting PII -# How to use Weave with PII data: +# How to use Weave with PII data -In this tutorial, we'll demonstrate how to utilize Weave while ensuring your Personally Identifiable Information (PII) data remains private. Weave supports removing PII from LLM calls and preventing PII from being displayed in the Weave UI. +In this guide, you'll learn how to use W&B Weave while ensuring your Personally Identifiable Information (PII) data remains private. The guide demonstrates the following methods to identify, redact and anonymize PII data: -To detect and protect our PII data, we'll identify and redact PII data and optionally anonymize it with the following methods: 1. __Regular expressions__ to identify PII data and redact it. 2. __Microsoft's [Presidio](https://microsoft.github.io/presidio/)__, a python-based data protection SDK. This tool provides redaction and replacement functionalities. 3. __[Faker](https://faker.readthedocs.io/en/master/)__, a Python library to generate fake data, combined with Presidio to anonymize PII data. -Additionally, we'll make use of _Weave Ops input/output logging customization_ to seamlessly integrate PII redaction and anonymization into the workflow. See [here](https://weave-docs.wandb.ai/guides/tracking/ops/#customize-logged-inputs-and-outputs) for more information. +Additionally, you'll learn how to use _`weave.op` input/output logging customization_ and _`autopatch_settings`_ to integrate PII redaction and anonymization into the workflow. For more information, see [Customize logged inputs and outputs](https://weave-docs.wandb.ai/guides/tracking/ops/#customize-logged-inputs-and-outputs). -For this use-case, we will leverage Anthropic's Claude Sonnet to perform sentiment analysis while tracing the LLM calls using Weave's [Traces](https://wandb.github.io/weave/quickstart). Sonnet will receive a block of text and output one of the following sentiment classifications: _positive_, _negative_, or _neutral_. +To get started, do the following: -## Overview of Weave Ops Input/Output Logging Customization +1. Review the [Overview](#overview) section. +2. Complete the [prerequisites](#prerequisites). +3. Review the [available methods](#redaction-methods-overview) for identifying, redacting and anonymizing PII data. +4. [Apply the methods to Weave calls](#apply-the-methods-to-weave-calls). -Weave Ops support defining input and output postprocessing functions. These functions allow you to modify the data that is passed to your LLM call or logged to Weave, respectively. +## Overview + +The following section provides an overview of input and output logging using `weave.op`, as well as best practices for working with PII data in Weave. + +### Customize input and output logging using `weave.op` + +Weave Ops allow you to define input and output postprocessing functions. Using these functions, you can modify the data that is passed to your LLM call or logged to Weave. + +In the following example, two postprocessing functions are defined and passed as arguments to `weave.op()`. ```python from dataclasses import dataclass @@ -60,14 +70,33 @@ def some_llm_call(a: int, hide_me: str) -> CustomObject: return CustomObject(x=a, secret_password=hide_me) ``` -# Setup +### Best practices for using Weave with PII data + +Before using Weave with PII data, review the best practices for using Weave with PII data. + +#### During testing +- Log anonymized data to check PII detection +- Track PII handling processes with Weave Traces +- Measure anonymization performance without exposing real PII + +#### In production +- Never log raw PII +- Encrypt sensitive fields before logging + +#### Encryption tips +- Use reversible encryption for data you need to decrypt later +- Apply one-way hashing for unique IDs you don't need to reverse +- Consider specialized encryption for data you need to analyze while encrypted + +## Prerequisites -Let's install the required packages and set up our API keys. Your Weights & Biases API key can be found [here](https://wandb.ai/authorize), and your Anthropic API keys are [here](https://console.anthropic.com/settings/keys). +1. First, install the required packages. ```python %%capture # @title required python packages: +!pip install cryptography !pip install presidio_analyzer !pip install presidio_anonymizer !python -m spacy download en_core_web_lg # Presidio uses spacy NLP engine @@ -78,6 +107,11 @@ Let's install the required packages and set up our API keys. Your Weights & Bias !pip install cryptography # to encrypt our data ``` +2. Set up your API keys. You can find your API keys at the following links. + + - [W&B](https://wandb.ai/authorize) + - [Anthropic](https://console.anthropic.com/settings/keys). + ```python %%capture @@ -90,6 +124,8 @@ _ = set_env("ANTHROPIC_API_KEY") _ = set_env("WANDB_API_KEY") ``` +3. Initialize your Weave project. + ```python import weave @@ -99,7 +135,7 @@ WEAVE_PROJECT = "pii_cookbook" weave.init(WEAVE_PROJECT) ``` -Let's load our initial PII data. For demonstration purposes, we'll use a dataset containing 10 text blocks. A larger dataset with 1000 entries is available. +4. Load the demo PII dataset, which contains 10 text blocks. ```python @@ -112,11 +148,20 @@ pii_data = response.json() print('PII data first sample: "' + pii_data[0]["text"] + '"') ``` -# Redaction Methods Implementation +## Redaction methods overview -## Method 1: Regular Expression Filtering +Once you've completed the [setup](#setup), you can -Our initial method is to use [regular expressions (regex)](https://docs.python.org/3/library/re.html) to identify PII data and redact it. It allows us to define patterns that can match various formats of sensitive information like phone numbers, email addresses, and social security numbers. By using regex, we can scan through large volumes of text and replace or redact information without the need for more complex NLP techniques. +To detect and protect our PII data, we'll identify and redact PII data and optionally anonymize it using the following methods: + +1. __Regular expressions__ to identify PII data and redact it. +2. __Microsoft [Presidio](https://microsoft.github.io/presidio/)__, a Python-based data protection SDK that provides redaction and replacement functionality. +3. __[Faker](https://faker.readthedocs.io/en/master/)__, a Python library for generating fake data. + + +### Method 1: Filter using regular expressions + +[Regular expressions (regex)](https://docs.python.org/3/library/re.html) are the simplest method to identify and redact PII data. Regex allows you to define patterns that can match various formats of sensitive information like phone numbers, email addresses, and social security numbers. Using regex, you can scan through large volumes of text and replace or redact information without the need for more complex NLP techniques. ```python @@ -181,15 +226,10 @@ print(f"Raw text:\n\t{test_text}") print(f"Redacted text:\n\t{cleaned_text}") ``` -## Method 2: Microsoft Presidio Redaction -Our next method involves complete removal of PII data using Presidio. This approach redacts PII and replaces it with a placeholder representing the PII type. - -For example: -`"My name is Alex"` becomes `"My name is "`. +### Method 2: Redact using Microsoft Presidio +The next method involves complete removal of PII data using [Microsoft Presidio](https://microsoft.github.io/presidio/). Presidio redacts PII and replaces it with a placeholder representing the PII type. For example, Presidio replaces `Alex` in `"My name is Alex"` with ``. -Presidio comes with a built-in [list of recognizable entities](https://microsoft.github.io/presidio/supported_entities/). We can select the ones that are important for our use case. In the below example, we redact names, phone numbers, locations, email addresses, and US Social Security Numbers. - -We'll then encapsulate the Presidio process into a function. +Presidio comes with a built-in support for [common entities](https://microsoft.github.io/presidio/supported_entities/). In the below example, we redact all entities that are a `PHONE_NUMBER`, `PERSON`, `LOCATION`, `EMAIL_ADDRESS` or `US_SSN`. The Presidio process is encapsulated in a function. ```python @@ -229,17 +269,17 @@ print(f"Raw text:\n\t{text}") print(f"Redacted text:\n\t{anonymized_text}") ``` -## Method 3: Anonymization with Replacement using Fakr and Presidio +### Method 3: Anonymize with replacement using Faker and Presidio -Instead of redacting text, we can anonymize it by swapping PII (like names and phone numbers) with fake data generated using the [Faker](https://faker.readthedocs.io/en/master/) Python library. For example: +Instead of redacting text, you can anonymize it by using MS Presidio to swap PII like names and phone numbers with fake data generated using the [Faker](https://faker.readthedocs.io/en/master/) Python library. For example, suppose you have the following data: `"My name is Raphael and I like to fish. My phone number is 212-555-5555"` -might become +Once the data has been processed using Presidio and Faker, it might look like: `"My name is Katherine Dixon and I like to fish. My phone number is 667.431.7379"` -To effectively utilize Presidio, we must supply references to our custom operators. These operators will direct Presidio to the functions responsible for swapping PII with fake data. +To effectively use Presidio and Faker together, we must supply references to our custom operators. These operators will direct Presidio to the Faker functions responsible for swapping PII with fake data. ```python @@ -285,7 +325,7 @@ print(f"Raw text:\n\t{text_to_anonymize}") print(f"Anonymized text:\n\t{anonymized_results.text}") ``` -Let's consolidate our code into a single class and expand the list of entities to include the additional ones we identified earlier. +Let's consolidate our code into a single class and expand the list of entities to include the additional ones identified earlier. ```python @@ -348,19 +388,55 @@ print(f"Raw text:\n\t{text_to_anonymize}") print(f"Anonymized text:\n\t{anonymized_text}") ``` -# Applying the Methods to Weave Calls +### Method 4: Use `autopatch_settings` + +You can use `autopatch_settings` to configure PII handling directly during initialization for one or more of the supported LLM integrations. The advantages of this method are: -In these examples we will integrate our PII redaction and anonymization methods into Weave Models, and preview the results in Weave Traces. +1. PII handling logic is centralized and scoped at initialization, reducing the need for scattered custom logic. +2. PII processing workflows can be customized or disabled entirely for specific intergations. -We'll create a [Weave Model](https://wandb.github.io/weave/guides/core-types/models) which is a combination of data (which can include configuration, trained model weights, or other information) and code that defines how the model operates. +To use `autopatch_settings` to configure PII handling, define `postprocess_inputs` and/or `postprocess_output` in `op_settings` for any one of the supported LLM integrations. -In this model, we will include our predict function where the Anthropic API will be called. Additionally, we will include our postprocessing functions to ensure that our PII data is redacted or anonymized before it is sent to the LLM. +```python + +def postprocess(inputs: dict) -> dict: + if "SENSITIVE_KEY" in inputs: + inputs["SENSITIVE_KEY"] = "REDACTED" + return inputs + +client = weave.init( + ..., + autopatch_settings={ + "openai": { + "op_settings": { + "postprocess_inputs": postprocess, + "postprocess_output": ..., + } + }, + "anthropic": { + "op_settings": { + "postprocess_inputs": ..., + "postprocess_output": ..., + } + } + }, +) +``` -Once you run this code you will receive a links to the Weave project page as well as the specific trace (LLM calls)you ran. -## Regex Method +## Apply the methods to Weave calls -In the simplest case, we can use regex to identify and redact PII data in the original text. +In the following examples, we will integrate our PII redaction and anonymization methods into Weave Models and preview the results in Weave Traces. + +First, we'll create a [Weave Model](https://wandb.github.io/weave/guides/core-types/models). A Weave Model is a combination of information like configuration settings, model weights, and code that defines how the model operates. + +In our model, we will include our predict function where the Anthropic API will be called. Anthropic's Claude Sonnet is used to perform sentiment analysis while tracing LLM calls using [Traces](https://wandb.github.io/weave/quickstart). Claude Sonnet will receive a block of text and output one of the following sentiment classifications: _positive_, _negative_, or _neutral_. Additionally, we will include our postprocessing functions to ensure that our PII data is redacted or anonymized before it is sent to the LLM. + +Once you run this code, you will receive a links to the Weave project page, as well as the specific trace (LLM calls) you ran. + +### Regex method + +In the simplest case, we can use regex to identify and redact PII data from the original text. ```python @@ -420,9 +496,9 @@ for entry in pii_data: await model.predict(entry["text"]) ``` -## Presidio Redaction Method +### Presidio redaction method -Here we will use Presidio to identify and redact PII data in the original text. +Next, we will use Presidio to identify and redact PII data from the original text. ![](../../media/pii/redact.png) @@ -484,9 +560,9 @@ for entry in pii_data: await model.predict(entry["text"]) ``` -## Faker + Presidio Replacement Method +### Faker and Presidio replacement method -Here we will have Faker generate anonymized replacement PII data and use Presidio to identify and replace the PII data in the original text. +In this example, we use Faker to generate anonymized replacement PII data and use Presidio to identify and replace the PII data in the original text. ![](../../media/pii/replace.png) @@ -551,27 +627,81 @@ for entry in pii_data: await model.predict(entry["text"]) ``` -## Checklist for Safely Using Weave with PII Data +### `autopatch_settings` method -### During Testing -- Log anonymized data to check PII detection -- Track PII handling processes with Weave Traces -- Measure anonymization performance without exposing real PII +In the following example, we set `postprocess_inputs` for `anthropic` to the `postprocess_inputs_regex()` function () at initialization. The `postprocess_inputs_regex` function applies the`redact_with_regex` method defined in [Method 1: Regular Expression Filtering](#method-1-regular-expression-filtering). Now, `redact_with_regex` will be applied to all inputs to any `anthropic` models. -### In Production -- Never log raw PII -- Encrypt sensitive fields before logging -### Encryption Tips -- Use reversible encryption for data you need to decrypt later -- Apply one-way hashing for unique IDs you don't need to reverse -- Consider specialized encryption for data you need to analyze while encrypted +```python +import json +from typing import Any + +import anthropic + +import weave + +client = weave.init( + ..., + autopatch_settings={ + "anthropic": { + "op_settings": { + "postprocess_inputs": postprocess_inputs_regex, + } + } + }, +) + + +# Define an input postprocessing function that applies our regex redaction for the model prediction Weave Op +def postprocess_inputs_regex(inputs: dict[str, Any]) -> dict: + inputs["text_block"] = redact_with_regex(inputs["text_block"]) + return inputs + + +# Weave model / predict function +class sentiment_analysis_regex_pii_model(weave.Model): + model_name: str + system_prompt: str + temperature: int + + async def predict(self, text_block: str) -> dict: + client = anthropic.AsyncAnthropic() + response = await client.messages.create( + max_tokens=1024, + model=self.model_name, + system=self.system_prompt, + messages=[ + {"role": "user", "content": [{"type": "text", "text": text_block}]} + ], + ) + result = response.content[0].text + if result is None: + raise ValueError("No response from model") + parsed = json.loads(result) + return parsed +``` + + +```python +# create our LLM model with a system prompt +model = sentiment_analysis_regex_pii_model( + name="claude-3-sonnet", + model_name="claude-3-5-sonnet-20240620", + system_prompt='You are a Sentiment Analysis classifier. You will be classifying text based on their sentiment. Your input will be a block of text. You will answer with one the following rating option["positive", "negative", "neutral"]. Your answer should be one word in json format: {classification}. Ensure that it is valid JSON.', + temperature=0, +) + +print("Model: ", model) +# for every block of text, anonymized first and then predict +for entry in pii_data: + await model.predict(entry["text"]) +``` + +### (Optional) Encrypt your data -
- (Optional) Encrypting our data ![](../../media/pii/encrypt.png) -In addition to anonymizing PII, we can add an extra layer of security by encrypting our data using the cryptography library's [Fernet](https://cryptography.io/en/latest/fernet/) symmetric encryption. This approach ensures that even if the anonymized data is intercepted, it remains unreadable without the encryption key. +In addition to anonymizing PII, you can add an extra layer of security by encrypting your data using the cryptography library's [Fernet](https://cryptography.io/en/latest/fernet/) symmetric encryption. This approach ensures that even if the anonymized data is intercepted, it remains unreadable without the encryption key. ```python import os @@ -666,4 +796,3 @@ for entry in pii_data: encrypted_input = EncryptedSentimentAnalysisInput.encrypt(entry["text"]) await model.predict(encrypted_input) ``` -
diff --git a/docs/notebooks/pii.ipynb b/docs/notebooks/pii.ipynb index ad4650105f99..956303eeca13 100644 --- a/docs/notebooks/pii.ipynb +++ b/docs/notebooks/pii.ipynb @@ -10,7 +10,6 @@ "---\n", "docusaurus_head_meta::end -->\n", "\n", - "" ] }, @@ -20,7 +19,7 @@ "id": "m752k2fWKDql" }, "source": [ - "# How to use Weave with PII data:" + "# How to use Weave with PII data" ] }, { @@ -29,25 +28,35 @@ "id": "C70egOGRLCgm" }, "source": [ - "In this tutorial, we'll demonstrate how to utilize Weave while ensuring your Personally Identifiable Information (PII) data remains private. Weave supports removing PII from LLM calls and preventing PII from being displayed in the Weave UI. \n", + "In this guide, you'll learn how to use W&B Weave while ensuring your Personally Identifiable Information (PII) data remains private. The guide demonstrates the following methods to identify, redact and anonymize PII data:\n", "\n", - "To detect and protect our PII data, we'll identify and redact PII data and optionally anonymize it with the following methods:\n", "1. __Regular expressions__ to identify PII data and redact it.\n", "2. __Microsoft's [Presidio](https://microsoft.github.io/presidio/)__, a python-based data protection SDK. This tool provides redaction and replacement functionalities.\n", "3. __[Faker](https://faker.readthedocs.io/en/master/)__, a Python library to generate fake data, combined with Presidio to anonymize PII data.\n", "\n", - "Additionally, we'll make use of _Weave Ops input/output logging customization_ to seamlessly integrate PII redaction and anonymization into the workflow. See [here](https://weave-docs.wandb.ai/guides/tracking/ops/#customize-logged-inputs-and-outputs) for more information.\n", + "Additionally, you'll learn how to use _`weave.op` input/output logging customization_ and _`autopatch_settings`_ to integrate PII redaction and anonymization into the workflow. For more information, see [Customize logged inputs and outputs](https://weave-docs.wandb.ai/guides/tracking/ops/#customize-logged-inputs-and-outputs).\n", "\n", - "For this use-case, we will leverage Anthropic's Claude Sonnet to perform sentiment analysis while tracing the LLM calls using Weave's [Traces](https://wandb.github.io/weave/quickstart). Sonnet will receive a block of text and output one of the following sentiment classifications: _positive_, _negative_, or _neutral_." + "To get started, do the following:\n", + "\n", + "1. Review the [Overview](#overview) section.\n", + "2. Complete the [prerequisites](#prerequisites).\n", + "3. Review the [available methods](#redaction-methods-overview) for identifying, redacting and anonymizing PII data.\n", + "4. [Apply the methods to Weave calls](#apply-the-methods-to-weave-calls)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Overview of Weave Ops Input/Output Logging Customization\n", + "## Overview \n", + "\n", + "The following section provides an overview of input and output logging using `weave.op`, as well as best practices for working with PII data in Weave.\n", + "\n", + "### Customize input and output logging using `weave.op`\n", + "\n", + "Weave Ops allow you to define input and output postprocessing functions. Using these functions, you can modify the data that is passed to your LLM call or logged to Weave.\n", "\n", - "Weave Ops support defining input and output postprocessing functions. These functions allow you to modify the data that is passed to your LLM call or logged to Weave, respectively." + "In the following example, two postprocessing functions are defined and passed as arguments to `weave.op()`." ] }, { @@ -87,19 +96,43 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Setup\n", + "### Best practices for using Weave with PII data \n", + "\n", + "Before using Weave with PII data, review the best practices for using Weave with PII data.\n", + "\n", + "#### During testing\n", + "- Log anonymized data to check PII detection\n", + "- Track PII handling processes with Weave Traces\n", + "- Measure anonymization performance without exposing real PII\n", + "\n", + "#### In production\n", + "- Never log raw PII\n", + "- Encrypt sensitive fields before logging\n", + "\n", + "#### Encryption tips\n", + "- Use reversible encryption for data you need to decrypt later\n", + "- Apply one-way hashing for unique IDs you don't need to reverse\n", + "- Consider specialized encryption for data you need to analyze while encrypted" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", "\n", - "Let's install the required packages and set up our API keys. Your Weights & Biases API key can be found [here](https://wandb.ai/authorize), and your Anthropic API keys are [here](https://console.anthropic.com/settings/keys)." + "1. First, install the required packages. " ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%capture\n", "# @title required python packages:\n", + "!pip install cryptography\n", "!pip install presidio_analyzer\n", "!pip install presidio_anonymizer\n", "!python -m spacy download en_core_web_lg # Presidio uses spacy NLP engine\n", @@ -110,6 +143,16 @@ "!pip install cryptography # to encrypt our data" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Set up your API keys. You can find your API keys at the following links.\n", + "\n", + " - [W&B](https://wandb.ai/authorize)\n", + " - [Anthropic](https://console.anthropic.com/settings/keys)." + ] + }, { "cell_type": "code", "execution_count": 4, @@ -126,6 +169,13 @@ "_ = set_env(\"WANDB_API_KEY\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. Initialize your Weave project." + ] + }, { "cell_type": "code", "execution_count": 6, @@ -154,7 +204,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's load our initial PII data. For demonstration purposes, we'll use a dataset containing 10 text blocks. A larger dataset with 1000 entries is available." + "4. Load the demo PII dataset, which contains 10 text blocks. " ] }, { @@ -184,16 +234,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Redaction Methods Implementation" + "## Redaction methods overview\n", + "\n", + "Once you've completed the [setup](#setup), you can \n", + "\n", + "To detect and protect our PII data, we'll identify and redact PII data and optionally anonymize it using the following methods:\n", + "\n", + "1. __Regular expressions__ to identify PII data and redact it.\n", + "2. __Microsoft [Presidio](https://microsoft.github.io/presidio/)__, a Python-based data protection SDK that provides redaction and replacement functionality.\n", + "3. __[Faker](https://faker.readthedocs.io/en/master/)__, a Python library for generating fake data.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Method 1: Regular Expression Filtering\n", + "### Method 1: Filter using regular expressions\n", "\n", - "Our initial method is to use [regular expressions (regex)](https://docs.python.org/3/library/re.html) to identify PII data and redact it. It allows us to define patterns that can match various formats of sensitive information like phone numbers, email addresses, and social security numbers. By using regex, we can scan through large volumes of text and replace or redact information without the need for more complex NLP techniques. " + "[Regular expressions (regex)](https://docs.python.org/3/library/re.html) are the simplest method to identify and redact PII data. Regex allows you to define patterns that can match various formats of sensitive information like phone numbers, email addresses, and social security numbers. Using regex, you can scan through large volumes of text and replace or redact information without the need for more complex NLP techniques. " ] }, { @@ -287,15 +345,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Method 2: Microsoft Presidio Redaction\n", - "Our next method involves complete removal of PII data using Presidio. This approach redacts PII and replaces it with a placeholder representing the PII type. \n", - "\n", - "For example:\n", - "`\"My name is Alex\"` becomes `\"My name is \"`.\n", + "### Method 2: Redact using Microsoft Presidio \n", + "The next method involves complete removal of PII data using [Microsoft Presidio](https://microsoft.github.io/presidio/). Presidio redacts PII and replaces it with a placeholder representing the PII type. For example, Presidio replaces `Alex` in `\"My name is Alex\"` with ``.\n", "\n", - "Presidio comes with a built-in [list of recognizable entities](https://microsoft.github.io/presidio/supported_entities/). We can select the ones that are important for our use case. In the below example, we redact names, phone numbers, locations, email addresses, and US Social Security Numbers.\n", - "\n", - "We'll then encapsulate the Presidio process into a function." + "Presidio comes with a built-in support for [common entities](https://microsoft.github.io/presidio/supported_entities/). In the below example, we redact all entities that are a `PHONE_NUMBER`, `PERSON`, `LOCATION`, `EMAIL_ADDRESS` or `US_SSN`. The Presidio process is encapsulated in a function." ] }, { @@ -366,17 +419,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Method 3: Anonymization with Replacement using Fakr and Presidio\n", + "### Method 3: Anonymize with replacement using Faker and Presidio\n", "\n", - "Instead of redacting text, we can anonymize it by swapping PII (like names and phone numbers) with fake data generated using the [Faker](https://faker.readthedocs.io/en/master/) Python library. For example:\n", + "Instead of redacting text, you can anonymize it by using MS Presidio to swap PII like names and phone numbers with fake data generated using the [Faker](https://faker.readthedocs.io/en/master/) Python library. For example, suppose you have the following data:\n", "\n", "`\"My name is Raphael and I like to fish. My phone number is 212-555-5555\"` \n", "\n", - "might become\n", + "Once the data has been processed using Presidio and Faker, it might look like:\n", "\n", "`\"My name is Katherine Dixon and I like to fish. My phone number is 667.431.7379\"`\n", "\n", - "To effectively utilize Presidio, we must supply references to our custom operators. These operators will direct Presidio to the functions responsible for swapping PII with fake data." + "To effectively use Presidio and Faker together, we must supply references to our custom operators. These operators will direct Presidio to the Faker functions responsible for swapping PII with fake data." ] }, { @@ -446,7 +499,7 @@ "id": "R4HJcskpSYdL" }, "source": [ - "Let's consolidate our code into a single class and expand the list of entities to include the additional ones we identified earlier." + "Let's consolidate our code into a single class and expand the list of entities to include the additional ones identified earlier." ] }, { @@ -543,24 +596,64 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Applying the Methods to Weave Calls\n", + "### Method 4: Use `autopatch_settings` \n", + "\n", + "You can use `autopatch_settings` to configure PII handling directly during initialization for one or more of the supported LLM integrations. The advantages of this method are:\n", "\n", - "In these examples we will integrate our PII redaction and anonymization methods into Weave Models, and preview the results in Weave Traces.\n", + "1. PII handling logic is centralized and scoped at initialization, reducing the need for scattered custom logic.\n", + "2. PII processing workflows can be customized or disabled entirely for specific intergations.\n", "\n", - "We'll create a [Weave Model](https://wandb.github.io/weave/guides/core-types/models) which is a combination of data (which can include configuration, trained model weights, or other information) and code that defines how the model operates. \n", + "To use `autopatch_settings` to configure PII handling, define `postprocess_inputs` and/or `postprocess_output` in `op_settings` for any one of the supported LLM integrations. \n", "\n", - "In this model, we will include our predict function where the Anthropic API will be called. Additionally, we will include our postprocessing functions to ensure that our PII data is redacted or anonymized before it is sent to the LLM.\n", + "```python \n", + "\n", + "def postprocess(inputs: dict) -> dict:\n", + " if \"SENSITIVE_KEY\" in inputs:\n", + " inputs[\"SENSITIVE_KEY\"] = \"REDACTED\"\n", + " return inputs\n", "\n", - "Once you run this code you will receive a links to the Weave project page as well as the specific trace (LLM calls)you ran." + "client = weave.init(\n", + " ...,\n", + " autopatch_settings={\n", + " \"openai\": {\n", + " \"op_settings\": {\n", + " \"postprocess_inputs\": postprocess,\n", + " \"postprocess_output\": ...,\n", + " }\n", + " },\n", + " \"anthropic\": {\n", + " \"op_settings\": {\n", + " \"postprocess_inputs\": ...,\n", + " \"postprocess_output\": ...,\n", + " }\n", + " }\n", + " },\n", + ")\n", + "```\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Regex Method \n", + "## Apply the methods to Weave calls\n", + "\n", + "In the following examples, we will integrate our PII redaction and anonymization methods into Weave Models and preview the results in Weave Traces.\n", + "\n", + "First, we'll create a [Weave Model](https://wandb.github.io/weave/guides/core-types/models). A Weave Model is a combination of information like configuration settings, model weights, and code that defines how the model operates. \n", + "\n", + "In our model, we will include our predict function where the Anthropic API will be called. Anthropic's Claude Sonnet is used to perform sentiment analysis while tracing LLM calls using [Traces](https://wandb.github.io/weave/quickstart). Claude Sonnet will receive a block of text and output one of the following sentiment classifications: _positive_, _negative_, or _neutral_. Additionally, we will include our postprocessing functions to ensure that our PII data is redacted or anonymized before it is sent to the LLM.\n", "\n", - "In the simplest case, we can use regex to identify and redact PII data in the original text." + "Once you run this code, you will receive a links to the Weave project page, as well as the specific trace (LLM calls) you ran." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regex method \n", + "\n", + "In the simplest case, we can use regex to identify and redact PII data from the original text." ] }, { @@ -651,9 +744,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Presidio Redaction Method\n", + "### Presidio redaction method\n", "\n", - "Here we will use Presidio to identify and redact PII data in the original text." + "Next, we will use Presidio to identify and redact PII data from the original text." ] }, { @@ -784,9 +877,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Faker + Presidio Replacement Method\n", + "### Faker and Presidio replacement method\n", "\n", - "Here we will have Faker generate anonymized replacement PII data and use Presidio to identify and replace the PII data in the original text.\n" + "In this example, we use Faker to generate anonymized replacement PII data and use Presidio to identify and replace the PII data in the original text.\n" ] }, { @@ -886,32 +979,94 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Checklist for Safely Using Weave with PII Data\n", + "### `autopatch_settings` method\n", "\n", - "### During Testing\n", - "- Log anonymized data to check PII detection\n", - "- Track PII handling processes with Weave Traces\n", - "- Measure anonymization performance without exposing real PII\n", + "In the following example, we set `postprocess_inputs` for `anthropic` to the `postprocess_inputs_regex()` function () at initialization. The `postprocess_inputs_regex` function applies the`redact_with_regex` method defined in [Method 1: Regular Expression Filtering](#method-1-regular-expression-filtering). Now, `redact_with_regex` will be applied to all inputs to any `anthropic` models." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from typing import Any\n", "\n", - "### In Production\n", - "- Never log raw PII\n", - "- Encrypt sensitive fields before logging\n", + "import anthropic\n", "\n", - "### Encryption Tips\n", - "- Use reversible encryption for data you need to decrypt later\n", - "- Apply one-way hashing for unique IDs you don't need to reverse\n", - "- Consider specialized encryption for data you need to analyze while encrypted" + "import weave\n", + "\n", + "client = weave.init(\n", + " ...,\n", + " autopatch_settings={\n", + " \"anthropic\": {\n", + " \"op_settings\": {\n", + " \"postprocess_inputs\": postprocess_inputs_regex,\n", + " }\n", + " }\n", + " },\n", + ")\n", + "\n", + "\n", + "# Define an input postprocessing function that applies our regex redaction for the model prediction Weave Op\n", + "def postprocess_inputs_regex(inputs: dict[str, Any]) -> dict:\n", + " inputs[\"text_block\"] = redact_with_regex(inputs[\"text_block\"])\n", + " return inputs\n", + "\n", + "\n", + "# Weave model / predict function\n", + "class sentiment_analysis_regex_pii_model(weave.Model):\n", + " model_name: str\n", + " system_prompt: str\n", + " temperature: int\n", + "\n", + " async def predict(self, text_block: str) -> dict:\n", + " client = anthropic.AsyncAnthropic()\n", + " response = await client.messages.create(\n", + " max_tokens=1024,\n", + " model=self.model_name,\n", + " system=self.system_prompt,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": [{\"type\": \"text\", \"text\": text_block}]}\n", + " ],\n", + " )\n", + " result = response.content[0].text\n", + " if result is None:\n", + " raise ValueError(\"No response from model\")\n", + " parsed = json.loads(result)\n", + " return parsed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create our LLM model with a system prompt\n", + "model = sentiment_analysis_regex_pii_model(\n", + " name=\"claude-3-sonnet\",\n", + " model_name=\"claude-3-5-sonnet-20240620\",\n", + " system_prompt='You are a Sentiment Analysis classifier. You will be classifying text based on their sentiment. Your input will be a block of text. You will answer with one the following rating option[\"positive\", \"negative\", \"neutral\"]. Your answer should be one word in json format: {classification}. Ensure that it is valid JSON.',\n", + " temperature=0,\n", + ")\n", + "\n", + "print(\"Model: \", model)\n", + "# for every block of text, anonymized first and then predict\n", + "for entry in pii_data:\n", + " await model.predict(entry[\"text\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "
\n", - " (Optional) Encrypting our data \n", + "### (Optional) Encrypt your data \n", + "\n", "![](../../media/pii/encrypt.png)\n", "\n", - "In addition to anonymizing PII, we can add an extra layer of security by encrypting our data using the cryptography library's [Fernet](https://cryptography.io/en/latest/fernet/) symmetric encryption. This approach ensures that even if the anonymized data is intercepted, it remains unreadable without the encryption key.\n", + "In addition to anonymizing PII, you can add an extra layer of security by encrypting your data using the cryptography library's [Fernet](https://cryptography.io/en/latest/fernet/) symmetric encryption. This approach ensures that even if the anonymized data is intercepted, it remains unreadable without the encryption key.\n", "\n", "```python\n", "import os\n", @@ -1005,8 +1160,7 @@ "for entry in pii_data:\n", " encrypted_input = EncryptedSentimentAnalysisInput.encrypt(entry[\"text\"])\n", " await model.predict(encrypted_input)\n", - "```\n", - "
" + "```" ] } ], From a359d4ed09ba3078760b47f5fdaf6f5489b466b0 Mon Sep 17 00:00:00 2001 From: Griffin Tarpenning Date: Wed, 8 Jan 2025 12:28:02 -0800 Subject: [PATCH 17/51] chore(weave): refs read batch handles deleted objs silently (#3335) --- tests/trace/test_obj_delete.py | 30 ++++++++++--------- .../clickhouse_trace_server_batched.py | 5 +++- weave/trace_server/sqlite_trace_server.py | 3 ++ 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/tests/trace/test_obj_delete.py b/tests/trace/test_obj_delete.py index a2e1c3ad3b07..b1b1e9cee1b9 100644 --- a/tests/trace/test_obj_delete.py +++ b/tests/trace/test_obj_delete.py @@ -193,14 +193,15 @@ def test_read_deleted_object(client: WeaveClient): ) ) - with pytest.raises(weave.trace_server.errors.NotFoundError): - client.server.refs_read_batch( - tsi.RefsReadBatchReq( - project_id=client._project_id(), - object_ids=["obj_1"], - refs=[obj1_v2.uri()], - ) + ref_res = client.server.refs_read_batch( + tsi.RefsReadBatchReq( + project_id=client._project_id(), + object_ids=["obj_1"], + refs=[obj1_v2.uri()], ) + ) + assert len(ref_res.vals) == 1 + assert ref_res.vals[0] is None def test_op_versions(client: WeaveClient): @@ -250,11 +251,12 @@ def my_op(x: int) -> int: ) ) - with pytest.raises(weave.trace_server.errors.NotFoundError): - client.server.refs_read_batch( - tsi.RefsReadBatchReq( - project_id=client._project_id(), - object_ids=["my_op"], - refs=[op_ref.uri()], - ) + ref_res = client.server.refs_read_batch( + tsi.RefsReadBatchReq( + project_id=client._project_id(), + object_ids=["my_op"], + refs=[op_ref.uri()], ) + ) + assert len(ref_res.vals) == 1 + assert ref_res.vals[0] is None diff --git a/weave/trace_server/clickhouse_trace_server_batched.py b/weave/trace_server/clickhouse_trace_server_batched.py index c4800df64a3c..e5d8dfe9c460 100644 --- a/weave/trace_server/clickhouse_trace_server_batched.py +++ b/weave/trace_server/clickhouse_trace_server_batched.py @@ -1040,6 +1040,7 @@ def get_object_refs_root_val( conditions=conditions, object_id_conditions=object_id_conditions, parameters=parameters, + include_deleted=True, ) objs = self._select_objs_query(object_query_builder) found_digests = {obj.digest for obj in objs} @@ -1047,7 +1048,9 @@ def get_object_refs_root_val( raise NotFoundError( f"Ref read contains {len(ref_digests)} digests, but found {len(found_digests)} objects. Diff digests: {ref_digests - found_digests}" ) - for obj in objs: + # filter out deleted objects + valid_objects = [obj for obj in objs if obj.deleted_at is None] + for obj in valid_objects: root_val_cache[make_obj_cache_key(obj)] = json.loads(obj.val_dump) return [ diff --git a/weave/trace_server/sqlite_trace_server.py b/weave/trace_server/sqlite_trace_server.py index 59bd19580e82..5763bd3467ea 100644 --- a/weave/trace_server/sqlite_trace_server.py +++ b/weave/trace_server/sqlite_trace_server.py @@ -1040,10 +1040,13 @@ def read_ref(r: ri.InternalObjectRef) -> Any: objs = self._select_objs_query( r.project_id, conditions=conds, + include_deleted=True, ) if len(objs) == 0: raise NotFoundError(f"Obj {r.name}:{r.version} not found") obj = objs[0] + if obj.deleted_at is not None: + return None val = obj.val extra = r.extra for extra_index in range(0, len(extra), 2): From b98dd05f3eb668a6ff41888fd11b4d4a85babccd Mon Sep 17 00:00:00 2001 From: Marie Barr-Ramsey <126013019+mbarrramsey@users.noreply.github.com> Date: Wed, 8 Jan 2025 12:29:23 -0800 Subject: [PATCH 18/51] chore(weave): updating ghost buttons (#3322) --- weave-js/src/components/Button/Button.tsx | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/weave-js/src/components/Button/Button.tsx b/weave-js/src/components/Button/Button.tsx index 2e9d987e5e4a..d2eb0af3b48f 100644 --- a/weave-js/src/components/Button/Button.tsx +++ b/weave-js/src/components/Button/Button.tsx @@ -114,14 +114,23 @@ export const Button = React.forwardRef( 'bg-teal-500 text-white hover:bg-teal-450': isPrimary, 'bg-teal-450': isPrimary && active, - // secondary & ghost + // secondary 'bg-oblivion/[0.05] dark:bg-moonbeam/[0.05]': isSecondary, - 'text-moon-800 dark:text-moon-200': isSecondary || isGhost, + 'text-moon-800 dark:text-moon-200': isSecondary, 'hover:bg-teal-300/[0.48] hover:text-teal-600 dark:hover:bg-teal-700/[0.48] dark:hover:text-teal-400': - isSecondary || isGhost, + isSecondary, + + // ghost + 'bg-transparent': isGhost, + 'text-moon-600 dark:text-moon-400': isGhost, + 'hover:bg-oblivion/[0.07] hover:text-moon-800 dark:hover:bg-moonbeam/[0.09] dark:hover:text-moon-200': + isGhost, + + // secondary or ghost 'bg-teal-300/[0.48] text-teal-600 dark:bg-teal-700/[0.48] dark:text-teal-400': (isSecondary || isGhost) && active, + /** @deprecated, use ghost instead */ // quiet 'text-moon-500': isQuiet, 'bg-oblivion/[0.05] text-moon-800 dark:bg-moonbeam/[0.05] dark:text-moon-200': From 74f543ddff9d200fbaaa3c2aaccc50179d3eff86 Mon Sep 17 00:00:00 2001 From: Nicholas Pun <182540099+nicholaspun-wandb@users.noreply.github.com> Date: Wed, 8 Jan 2025 15:54:38 -0800 Subject: [PATCH 19/51] fix(weave_query): prevent full panel crashes if a row contains an unresolved artifact (#3347) --- weave_query/weave_query/artifact_wandb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/weave_query/weave_query/artifact_wandb.py b/weave_query/weave_query/artifact_wandb.py index 02ee1cda0942..5874859a2194 100644 --- a/weave_query/weave_query/artifact_wandb.py +++ b/weave_query/weave_query/artifact_wandb.py @@ -163,6 +163,8 @@ def _convert_client_id_to_server_id(art_id: str) -> str: "clientID": art_id, }, ) + if not (res and res['clientIDMapping']): + raise errors.WeaveArtifactCollectionNotFound return b64_to_hex_id(res["clientIDMapping"]["serverID"]) From 1dc80c5db19697fcf90693ad059e49fdf08dc503 Mon Sep 17 00:00:00 2001 From: Andrew Truong Date: Wed, 8 Jan 2025 19:06:13 -0500 Subject: [PATCH 20/51] feat(weave): Add global post-processing options (#3336) --- tests/trace/test_postprocessing.py | 50 ++++++++++++++++++++++++++++++ weave/trace/api.py | 22 ++++++++++++- weave/trace/weave_client.py | 11 +++++++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 tests/trace/test_postprocessing.py diff --git a/tests/trace/test_postprocessing.py b/tests/trace/test_postprocessing.py new file mode 100644 index 000000000000..0d52289d8702 --- /dev/null +++ b/tests/trace/test_postprocessing.py @@ -0,0 +1,50 @@ +import pytest + +import weave +from weave.trace import api + + +def redact_keys(d: dict) -> dict: + for k in d: + if "key" in k.lower(): + d[k] = "REDACTED" + return d + + +def redact_output(s: str) -> str: + if "key" in str(s).lower(): + return "API KEY DETECTED IN STRING; REDACTED." + return s + + +# These globals are directly set here because we don't have a great way to test weave.init +@pytest.fixture +def apply_postprocessing(): + original_postprocess_inputs = api._global_postprocess_inputs + original_postprocess_output = api._global_postprocess_output + api._global_postprocess_inputs = redact_keys + api._global_postprocess_output = redact_output + yield + api._global_postprocess_inputs = original_postprocess_inputs + api._global_postprocess_output = original_postprocess_output + + +def test_global_postprocessing(client, apply_postprocessing) -> None: + @weave.op + def func(api_key: str, secret_key: str, name: str, age: int) -> str: + return ( + f"Hello, {name}! You are {age} years old. Also your api_key is {api_key}." + ) + + func(api_key="123", secret_key="456", name="John", age=30) + + calls = list(client.get_calls()) + call = calls[0] + + assert call.inputs == { + "api_key": "REDACTED", + "secret_key": "REDACTED", + "name": "John", + "age": 30, + } + assert call.output == "API KEY DETECTED IN STRING; REDACTED." diff --git a/weave/trace/api.py b/weave/trace/api.py index 3553805e66ba..7acb11781637 100644 --- a/weave/trace/api.py +++ b/weave/trace/api.py @@ -18,7 +18,7 @@ from weave.trace.context import call_context from weave.trace.context import weave_client_context as weave_client_context from weave.trace.context.call_context import get_current_call, require_current_call -from weave.trace.op import as_op, op +from weave.trace.op import PostprocessInputsFunc, PostprocessOutputFunc, as_op, op from weave.trace.refs import ObjectRef, parse_uri from weave.trace.settings import ( UserSettings, @@ -28,12 +28,17 @@ from weave.trace.table import Table from weave.trace_server.interface.builtin_object_classes import leaderboard +_global_postprocess_inputs: PostprocessInputsFunc | None = None +_global_postprocess_output: PostprocessOutputFunc | None = None + def init( project_name: str, *, settings: UserSettings | dict[str, Any] | None = None, autopatch_settings: AutopatchSettings | None = None, + global_postprocess_inputs: PostprocessInputsFunc | None = None, + global_postprocess_output: PostprocessOutputFunc | None = None, ) -> weave_client.WeaveClient: """Initialize weave tracking, logging to a wandb project. @@ -45,12 +50,27 @@ def init( Args: project_name: The name of the Weights & Biases project to log to. + settings: Configuration for the Weave client generally. + autopatch_settings: Configuration for autopatch integrations, e.g. openai + global_postprocess_inputs: A function that will be applied to all inputs of all ops. + global_postprocess_output: A function that will be applied to all outputs of all ops. + + NOTE: Global postprocessing settings are applied to all ops after each op's own + postprocessing. The order is always: + 1. Op-specific postprocessing + 2. Global postprocessing Returns: A Weave client. """ parse_and_apply_settings(settings) + global _global_postprocess_inputs + global _global_postprocess_output + + _global_postprocess_inputs = global_postprocess_inputs + _global_postprocess_output = global_postprocess_output + if should_disable_weave(): return weave_init.init_weave_disabled().client diff --git a/weave/trace/weave_client.py b/weave/trace/weave_client.py index 87a445383b57..6decbc12cffc 100644 --- a/weave/trace/weave_client.py +++ b/weave/trace/weave_client.py @@ -755,6 +755,8 @@ def create_call( Returns: The created Call object. """ + from weave.trace.api import _global_postprocess_inputs + if isinstance(op, str): if op not in self._anonymous_ops: self._anonymous_ops[op] = _build_anonymous_op(op) @@ -769,6 +771,9 @@ def create_call( else: inputs_postprocessed = inputs_redacted + if _global_postprocess_inputs: + inputs_postprocessed = _global_postprocess_inputs(inputs_postprocessed) + self._save_nested_objects(inputs_postprocessed) inputs_with_refs = map_to_refs(inputs_postprocessed) @@ -855,6 +860,8 @@ def finish_call( *, op: Op | None = None, ) -> None: + from weave.trace.api import _global_postprocess_output + ended_at = datetime.datetime.now(tz=datetime.timezone.utc) call.ended_at = ended_at original_output = output @@ -863,6 +870,10 @@ def finish_call( postprocessed_output = op.postprocess_output(original_output) else: postprocessed_output = original_output + + if _global_postprocess_output: + postprocessed_output = _global_postprocess_output(postprocessed_output) + self._save_nested_objects(postprocessed_output) call.output = map_to_refs(postprocessed_output) From 7ef61986109db1680a5f7d47775ba6d0e91798f6 Mon Sep 17 00:00:00 2001 From: Jamie Rasmussen <112953339+jamie-rasmussen@users.noreply.github.com> Date: Thu, 9 Jan 2025 09:58:42 -0600 Subject: [PATCH 21/51] chore(docs): Keep programming language selection state in URL (#3350) --- docs/docs/guides/core-types/datasets.md | 2 +- docs/docs/guides/core-types/media.md | 4 ++-- docs/docs/guides/core-types/models.md | 2 +- docs/docs/guides/evaluation/scorers.md | 12 ++++++------ docs/docs/guides/integrations/nvidia_nim.md | 6 +++--- docs/docs/guides/integrations/openai.md | 6 +++--- docs/docs/guides/tracking/costs.md | 8 ++++---- docs/docs/guides/tracking/feedback.md | 14 +++++++------- docs/docs/guides/tracking/objects.md | 4 ++-- docs/docs/guides/tracking/ops.md | 8 ++++---- docs/docs/quickstart.md | 4 ++-- docs/docs/tutorial-eval.md | 8 ++++---- docs/docs/tutorial-rag.md | 12 ++++++------ docs/docs/tutorial-tracing_2.md | 4 ++-- docs/docs/tutorial-weave_models.md | 6 +++--- 15 files changed, 50 insertions(+), 50 deletions(-) diff --git a/docs/docs/guides/core-types/datasets.md b/docs/docs/guides/core-types/datasets.md index 820178185408..30e571fb8a28 100644 --- a/docs/docs/guides/core-types/datasets.md +++ b/docs/docs/guides/core-types/datasets.md @@ -13,7 +13,7 @@ This guide will show you how to: ## Sample code - + ```python import weave diff --git a/docs/docs/guides/core-types/media.md b/docs/docs/guides/core-types/media.md index f097e3a0674f..0a0cfa6e2b75 100644 --- a/docs/docs/guides/core-types/media.md +++ b/docs/docs/guides/core-types/media.md @@ -9,7 +9,7 @@ Weave supports logging and displaying multiple first class media types. Log imag Logging type: `PIL.Image.Image`. Here is an example of logging an image with the OpenAI DALL-E API: - + ```python @@ -83,7 +83,7 @@ This image will be logged to weave and automatically displayed in the UI. The fo Logging type: `wave.Wave_read`. Here is an example of logging an audio file using openai's speech generation API. - + ```python diff --git a/docs/docs/guides/core-types/models.md b/docs/docs/guides/core-types/models.md index 83c16aa19a2c..1a9c70c4d7c7 100644 --- a/docs/docs/guides/core-types/models.md +++ b/docs/docs/guides/core-types/models.md @@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem'; # Models - + A `Model` is a combination of data (which can include configuration, trained model weights, or other information) and code that defines how the model operates. By structuring your code to be compatible with this API, you benefit from a structured way to version your application so you can more systematically keep track of your experiments. diff --git a/docs/docs/guides/evaluation/scorers.md b/docs/docs/guides/evaluation/scorers.md index 08cf5b1b64c0..e21be9a1c94e 100644 --- a/docs/docs/guides/evaluation/scorers.md +++ b/docs/docs/guides/evaluation/scorers.md @@ -7,7 +7,7 @@ import TabItem from '@theme/TabItem'; In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics. They take the AI's output, analyze it, and return a dictionary of results. Scorers can use your input data as reference if needed and can also output extra information, such as explanations or reasonings from the evaluation. - + Scorers are passed to a `weave.Evaluation` object during evaluation. There are two types of Scorers in weave: @@ -26,7 +26,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics. ### Function-based Scorers - + These are functions decorated with `@weave.op` that return a dictionary. They're great for simple evaluations like: @@ -68,7 +68,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics. ### Class-based Scorers - + For more advanced evaluations, especially when you need to keep track of additional scorer metadata, try different prompts for your LLM-evaluators, or make multiple function calls, you can use the `Scorer` class. @@ -139,7 +139,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics. ### Scorer Keyword Arguments - + Scorers can access both the output from your AI system and the input data from the dataset row. @@ -256,7 +256,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics. ### Final summarization of the scorer - + During evaluation, the scorer will be computed for each row of your dataset. To provide a final score for the evaluation we provide an `auto_summarize` depending on the returning type of the output. - Averages are computed for numerical columns @@ -305,7 +305,7 @@ In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics. ## Predefined Scorers - + **Installation** diff --git a/docs/docs/guides/integrations/nvidia_nim.md b/docs/docs/guides/integrations/nvidia_nim.md index 01007ee7e6a6..8aa26f4f3f35 100644 --- a/docs/docs/guides/integrations/nvidia_nim.md +++ b/docs/docs/guides/integrations/nvidia_nim.md @@ -9,7 +9,7 @@ Weave automatically tracks and logs LLM calls made via the [ChatNVIDIA](https:// It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application. - + Weave can automatically capture traces for the [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/). @@ -43,7 +43,7 @@ It’s important to store traces of LLM applications in a central database, both ## Track your own ops - + Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git. @@ -119,7 +119,7 @@ Navigate to Weave and you can click `get_pokemon_data` in the UI to see the inpu ## Create a `Model` for easier experimentation - + Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app. diff --git a/docs/docs/guides/integrations/openai.md b/docs/docs/guides/integrations/openai.md index 541732f5060f..b36951bc27b8 100644 --- a/docs/docs/guides/integrations/openai.md +++ b/docs/docs/guides/integrations/openai.md @@ -7,7 +7,7 @@ import TabItem from '@theme/TabItem'; It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application. - + Weave can automatically capture traces for the [openai python library](https://platform.openai.com/docs/libraries/python-library). @@ -79,7 +79,7 @@ It’s important to store traces of LLM applications in a central database, both ## Track your own ops - + Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git. @@ -249,7 +249,7 @@ Wrapping a function with `weave.op` starts capturing inputs, outputs and app log ## Create a `Model` for easier experimentation - + Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app. diff --git a/docs/docs/guides/tracking/costs.md b/docs/docs/guides/tracking/costs.md index cf130eabea2a..e94c89a40daf 100644 --- a/docs/docs/guides/tracking/costs.md +++ b/docs/docs/guides/tracking/costs.md @@ -5,7 +5,7 @@ import TabItem from '@theme/TabItem'; ## Adding a custom cost - + You can add a custom cost by using the [`add_cost`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-add_cost) method. The three required fields are `llm_id`, `prompt_token_cost`, and `completion_token_cost`. @@ -45,7 +45,7 @@ import TabItem from '@theme/TabItem'; ## Querying for costs - + You can query for costs by using the [`query_costs`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-query_costs) method. There are a few ways to query for costs, you can pass in a singular cost id, or a list of LLM model names. @@ -72,7 +72,7 @@ import TabItem from '@theme/TabItem'; ## Purging a custom cost - + You can purge a custom cost by using the [`purge_costs`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-purge_costs) method. You pass in a list of cost ids, and the costs with those ids are purged. @@ -95,7 +95,7 @@ import TabItem from '@theme/TabItem'; ## Calculating costs for a Project - + You can calculate costs for a project by using our `calls_query` and adding `include_costs=True` with a little bit of setup. diff --git a/docs/docs/guides/tracking/feedback.md b/docs/docs/guides/tracking/feedback.md index 68e27776d444..d3e416e24443 100644 --- a/docs/docs/guides/tracking/feedback.md +++ b/docs/docs/guides/tracking/feedback.md @@ -75,7 +75,7 @@ You can also get additional information for each feedback object in `client.get_ - `feedback_type`: The type of feedback (reaction, note, custom). - `payload`: The feedback payload - + ```python import weave @@ -115,7 +115,7 @@ You can add feedback to a call using the call's UUID. To use the UUID to get a p - `call.feedback.add_note("")`: Add a note. - `call.feedback.add("