-
Notifications
You must be signed in to change notification settings - Fork 73
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into bedrock-integration
- Loading branch information
Showing
20 changed files
with
843 additions
and
464 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Environment variables | ||
|
||
Weave provides a set of environment variables to configure and optimize its behavior. You can set these variables in your shell or within scripts to control specific functionality. | ||
|
||
```bash | ||
# Example of setting environment variables in the shell | ||
WEAVE_PARALLELISM=10 # Controls the number of parallel workers | ||
WEAVE_PRINT_CALL_LINK=false # Disables call link output | ||
``` | ||
|
||
```python | ||
# Example of setting environment variables in Python | ||
import os | ||
|
||
os.environ["WEAVE_PARALLELISM"] = "10" | ||
os.environ["WEAVE_PRINT_CALL_LINK"] = "false" | ||
``` | ||
|
||
## Environment variables reference | ||
|
||
| Variable Name | Description | | ||
|--------------------------|-----------------------------------------------------------------| | ||
| WEAVE_CAPTURE_CODE | Disable code capture for `weave.op` if set to `false`. | | ||
| WEAVE_DEBUG_HTTP | If set to `1`, turns on HTTP request and response logging for debugging. | | ||
| WEAVE_DISABLED | If set to `true`, all tracing to Weave is disabled. | | ||
| WEAVE_PARALLELISM | In evaluations, the number of examples to evaluate in parallel. `1` runs examples sequentially. Default value is `20`. | | ||
| WEAVE_PRINT_CALL_LINK | If set to `false`, call URL printing is suppressed. Default value is `false`. | | ||
| WEAVE_TRACE_LANGCHAIN | When set to `false`, explicitly disable global tracing for LangChain. | | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
from __future__ import annotations | ||
|
||
import pytest | ||
|
||
import weave | ||
from weave.scorers.base_scorer import ApplyScorerResult | ||
from weave.trace.op import OpCallError | ||
from weave.trace.refs import CallRef | ||
from weave.trace.weave_client import Call, Op, WeaveClient | ||
|
||
|
||
def do_assertions_for_scorer_op( | ||
apply_score_res: ApplyScorerResult, | ||
call: Call, | ||
score_fn: Op | weave.Scorer, | ||
client: WeaveClient, | ||
): | ||
assert apply_score_res.score_call.id is not None | ||
assert apply_score_res.result == 0 | ||
|
||
feedbacks = list(call.feedback) | ||
assert len(feedbacks) == 1 | ||
target_feedback = feedbacks[0] | ||
scorer_name = ( | ||
score_fn.name if isinstance(score_fn, Op) else score_fn.__class__.__name__ | ||
) | ||
assert target_feedback.feedback_type == "wandb.runnable." + scorer_name | ||
assert target_feedback.runnable_ref == score_fn.ref.uri() | ||
assert ( | ||
target_feedback.call_ref | ||
== CallRef( | ||
entity=client.entity, | ||
project=client.project, | ||
id=apply_score_res.score_call.id, | ||
).uri() | ||
) | ||
assert target_feedback.payload == {"output": apply_score_res.result} | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_scorer_op_no_context(client: WeaveClient): | ||
@weave.op | ||
def predict(x): | ||
return x + 1 | ||
|
||
@weave.op | ||
def score_fn(x, output): | ||
return output - x - 1 | ||
|
||
_, call = predict.call(1) | ||
apply_score_res = await call.apply_scorer(score_fn) | ||
do_assertions_for_scorer_op(apply_score_res, call, score_fn, client) | ||
|
||
@weave.op | ||
def score_fn_with_incorrect_args(y, output): | ||
return output - y | ||
|
||
with pytest.raises(OpCallError): | ||
apply_score_res = await call.apply_scorer(score_fn_with_incorrect_args) | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_scorer_op_with_context(client: WeaveClient): | ||
@weave.op | ||
def predict(x): | ||
return x + 1 | ||
|
||
@weave.op | ||
def score_fn(x, output, correct_answer): | ||
return output - correct_answer | ||
|
||
_, call = predict.call(1) | ||
apply_score_res = await call.apply_scorer( | ||
score_fn, additional_scorer_kwargs={"correct_answer": 2} | ||
) | ||
do_assertions_for_scorer_op(apply_score_res, call, score_fn, client) | ||
|
||
@weave.op | ||
def score_fn_with_incorrect_args(x, output, incorrect_arg): | ||
return output - incorrect_arg | ||
|
||
with pytest.raises(OpCallError): | ||
apply_score_res = await call.apply_scorer( | ||
score_fn_with_incorrect_args, additional_scorer_kwargs={"correct_answer": 2} | ||
) | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_async_scorer_op(client: WeaveClient): | ||
@weave.op | ||
def predict(x): | ||
return x + 1 | ||
|
||
@weave.op | ||
async def score_fn(x, output): | ||
return output - x - 1 | ||
|
||
_, call = predict.call(1) | ||
apply_score_res = await call.apply_scorer(score_fn) | ||
do_assertions_for_scorer_op(apply_score_res, call, score_fn, client) | ||
|
||
@weave.op | ||
async def score_fn_with_incorrect_args(y, output): | ||
return output - y | ||
|
||
with pytest.raises(OpCallError): | ||
apply_score_res = await call.apply_scorer(score_fn_with_incorrect_args) | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_scorer_obj_no_context(client: WeaveClient): | ||
@weave.op | ||
def predict(x): | ||
return x + 1 | ||
|
||
class MyScorer(weave.Scorer): | ||
offset: int | ||
|
||
@weave.op | ||
def score(self, x, output): | ||
return output - x - self.offset | ||
|
||
scorer = MyScorer(offset=1) | ||
|
||
_, call = predict.call(1) | ||
apply_score_res = await call.apply_scorer(scorer) | ||
do_assertions_for_scorer_op(apply_score_res, call, scorer, client) | ||
|
||
class MyScorerWithIncorrectArgs(weave.Scorer): | ||
offset: int | ||
|
||
@weave.op | ||
def score(self, y, output): | ||
return output - y - self.offset | ||
|
||
with pytest.raises(OpCallError): | ||
apply_score_res = await call.apply_scorer(MyScorerWithIncorrectArgs(offset=1)) | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_scorer_obj_with_context(client: WeaveClient): | ||
@weave.op | ||
def predict(x): | ||
return x + 1 | ||
|
||
class MyScorer(weave.Scorer): | ||
offset: int | ||
|
||
@weave.op | ||
def score(self, x, output, correct_answer): | ||
return output - correct_answer - self.offset | ||
|
||
scorer = MyScorer(offset=0) | ||
|
||
_, call = predict.call(1) | ||
apply_score_res = await call.apply_scorer( | ||
scorer, additional_scorer_kwargs={"correct_answer": 2} | ||
) | ||
do_assertions_for_scorer_op(apply_score_res, call, scorer, client) | ||
|
||
class MyScorerWithIncorrectArgs(weave.Scorer): | ||
offset: int | ||
|
||
@weave.op | ||
def score(self, y, output, incorrect_arg): | ||
return output - incorrect_arg - self.offset | ||
|
||
with pytest.raises(OpCallError): | ||
apply_score_res = await call.apply_scorer( | ||
MyScorerWithIncorrectArgs(offset=0), | ||
additional_scorer_kwargs={"incorrect_arg": 2}, | ||
) | ||
|
||
class MyScorerWithIncorrectArgsButCorrectColumnMapping(weave.Scorer): | ||
offset: int | ||
|
||
@weave.op | ||
def score(self, y, output, incorrect_arg): | ||
return output - incorrect_arg - self.offset | ||
|
||
scorer = MyScorerWithIncorrectArgsButCorrectColumnMapping( | ||
offset=0, column_map={"y": "x", "incorrect_arg": "correct_answer"} | ||
) | ||
|
||
_, call = predict.call(1) | ||
apply_score_res = await call.apply_scorer( | ||
scorer, additional_scorer_kwargs={"correct_answer": 2} | ||
) | ||
do_assertions_for_scorer_op(apply_score_res, call, scorer, client) | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_async_scorer_obj(client: WeaveClient): | ||
@weave.op | ||
def predict(x): | ||
return x + 1 | ||
|
||
class MyScorer(weave.Scorer): | ||
offset: int | ||
|
||
@weave.op | ||
async def score(self, x, output): | ||
return output - x - 1 | ||
|
||
scorer = MyScorer(offset=0) | ||
|
||
_, call = predict.call(1) | ||
apply_score_res = await call.apply_scorer( | ||
scorer, additional_scorer_kwargs={"correct_answer": 2} | ||
) | ||
do_assertions_for_scorer_op(apply_score_res, call, scorer, client) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
45 changes: 45 additions & 0 deletions
45
...rc/components/PagePanelComponents/Home/Browse3/feedback/HumanFeedback/tsScorerFeedback.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import {RUNNABLE_FEEDBACK_TYPE_PREFIX} from '../StructuredFeedback/runnableFeedbackTypes'; | ||
|
||
export const RUNNABLE_FEEDBACK_IN_SUMMARY_PREFIX = | ||
'summary.weave.feedback.' + RUNNABLE_FEEDBACK_TYPE_PREFIX; | ||
export const RUNNABLE_FEEDBACK_OUTPUT_PART = 'payload.output'; | ||
|
||
export type ScorerFeedbackTypeParts = { | ||
scorerName: string; | ||
scorePath: string; | ||
}; | ||
|
||
export const parseScorerFeedbackField = ( | ||
inputField: string | ||
): ScorerFeedbackTypeParts | null => { | ||
const prefix = RUNNABLE_FEEDBACK_IN_SUMMARY_PREFIX + '.'; | ||
if (!inputField.startsWith(prefix)) { | ||
return null; | ||
} | ||
const res = inputField.replace(prefix, ''); | ||
if (!res.includes('.')) { | ||
return null; | ||
} | ||
const [scorerName, ...rest] = res.split('.'); | ||
const prefixedScorePath = rest.join('.'); | ||
const pathPrefix = RUNNABLE_FEEDBACK_OUTPUT_PART; | ||
if (!prefixedScorePath.startsWith(pathPrefix)) { | ||
return null; | ||
} | ||
const scorePath = prefixedScorePath.replace(pathPrefix, ''); | ||
return { | ||
scorerName, | ||
scorePath, | ||
}; | ||
}; | ||
|
||
export const convertScorerFeedbackFieldToBackendFilter = ( | ||
field: string | ||
): string => { | ||
const parsed = parseScorerFeedbackField(field); | ||
if (parsed === null) { | ||
return field; | ||
} | ||
const {scorerName, scorePath} = parsed; | ||
return `feedback.[${RUNNABLE_FEEDBACK_TYPE_PREFIX}.${scorerName}].${RUNNABLE_FEEDBACK_OUTPUT_PART}${scorePath}`; | ||
}; |
Oops, something went wrong.