Skip to content

Commit

Permalink
Merge branch 'master' into griffin/fix-delete-typing
Browse files Browse the repository at this point in the history
  • Loading branch information
gtarpenning authored Jan 22, 2025
2 parents 65b4027 + 8b3cbc2 commit f8c3a8f
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 45 deletions.
54 changes: 40 additions & 14 deletions docs/docs/guides/core-types/datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,25 +73,51 @@ This guide will show you how to:

<Tabs groupId="programming-language" queryString>
<TabItem value="python" label="Python" default>
Datasets can also be constructed from common Weave objects like `list[Call]`, which is useful if you want to run an evaluation on a handful of examples.
Datasets can also be constructed from common Weave objects like `Call`s, and popular python objects like `pandas.DataFrame`s.
<Tabs groupId="use-case">
<TabItem value="from-calls" label="From Calls">
This can be useful if you want to create an example from specific examples.

```python
@weave.op
def model(task: str) -> str:
return f"Now working on {task}"
```python
@weave.op
def model(task: str) -> str:
return f"Now working on {task}"

res1, call1 = model.call(task="fetch")
res2, call2 = model.call(task="parse")
res1, call1 = model.call(task="fetch")
res2, call2 = model.call(task="parse")

dataset = Dataset.from_calls([call1, call2])
# Now you can use the dataset to evaluate the model, etc.
```
dataset = Dataset.from_calls([call1, call2])
# Now you can use the dataset to evaluate the model, etc.
```
</TabItem>

<TabItem value="from-pandas" label="From Pandas">
You can also freely convert between `Dataset`s and `pandas.DataFrame`s.

```python
import pandas as pd

df = pd.DataFrame([
{'id': '0', 'sentence': "He no likes ice cream.", 'correction': "He doesn't like ice cream."},
{'id': '1', 'sentence': "She goed to the store.", 'correction': "She went to the store."},
{'id': '2', 'sentence': "They plays video games all day.", 'correction': "They play video games all day."}
])
dataset = Dataset.from_pandas(df)
df2 = dataset.to_pandas()

assert df.equals(df2)
```

</TabItem>

</Tabs>

</TabItem>
<TabItem value="typescript" label="TypeScript">

```typescript
This feature is not available in TypeScript yet. Stay tuned!
```

```typescript
This feature is not available in TypeScript yet. Stay tuned!
```

</TabItem>
</Tabs>
36 changes: 36 additions & 0 deletions tests/integrations/pandas-test/test_pandas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd

import weave
from weave import Dataset


def test_op_save_with_global_df(client):
Expand All @@ -20,3 +21,38 @@ def my_op(a: str) -> str:
call = list(my_op.calls())[0]
assert call.inputs == {"a": "d"}
assert call.output == "a"


def test_dataset(client):
rows = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
ds = Dataset(rows=rows)
df = ds.to_pandas()
assert df["a"].tolist() == [1, 3, 5]
assert df["b"].tolist() == [2, 4, 6]

df2 = pd.DataFrame(rows)
ds2 = Dataset.from_pandas(df2)
assert ds2.rows == rows
assert df.equals(df2)
assert ds.rows == ds2.rows


def test_calls_to_dataframe(client):
@weave.op
def greet(name: str, age: int) -> str:
return f"Hello, {name}! You are {age} years old."

greet("Alice", 30)
greet("Bob", 25)

calls = greet.calls()
dataset = Dataset.from_calls(calls)
df = dataset.to_pandas()
assert df["inputs"].tolist() == [
{"name": "Alice", "age": 30},
{"name": "Bob", "age": 25},
]
assert df["output"].tolist() == [
"Hello, Alice! You are 30 years old.",
"Hello, Bob! You are 25 years old.",
]
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
} from '@wandb/weave/common/css/color.styles';
import {WeaveObjectRef} from '@wandb/weave/react';
import React from 'react';
import {Link as LinkComp, useHistory} from 'react-router-dom';
import {Link as LinkComp} from 'react-router-dom';
import styled, {css} from 'styled-components';

import {TargetBlank} from '../../../../../../common/util/links';
Expand Down Expand Up @@ -161,7 +161,6 @@ export const ObjectVersionLink: React.FC<{
color?: string;
hideVersionSuffix?: boolean;
}> = props => {
const history = useHistory();
const {peekingRouter} = useWeaveflowRouteContext();
// const text = props.hideName
// ? props.version
Expand All @@ -177,15 +176,9 @@ export const ObjectVersionLink: React.FC<{
props.filePath,
props.refExtra
);
const onClick = () => {
history.push(to);
};

return (
<LinkWrapper
onClick={onClick}
fullWidth={props.fullWidth}
color={props.color}>
<LinkWrapper fullWidth={props.fullWidth} color={props.color}>
<LinkTruncater fullWidth={props.fullWidth}>
<Link
to={to}
Expand Down Expand Up @@ -244,7 +237,6 @@ export const OpVersionLink: React.FC<{
fullWidth?: boolean;
color?: string;
}> = props => {
const history = useHistory();
const {peekingRouter} = useWeaveflowRouteContext();
// const text = props.hideName
// ? props.version
Expand All @@ -256,14 +248,8 @@ export const OpVersionLink: React.FC<{
props.opName,
props.version
);
const onClick = () => {
history.push(to);
};
return (
<LinkWrapper
onClick={onClick}
fullWidth={props.fullWidth}
color={props.color}>
<LinkWrapper fullWidth={props.fullWidth} color={props.color}>
<LinkTruncater fullWidth={props.fullWidth}>
<Link $variant={props.variant} to={to}>
{text}
Expand All @@ -276,7 +262,6 @@ export const OpVersionLink: React.FC<{
export const CallRefLink: React.FC<{
callRef: WeaveObjectRef;
}> = props => {
const history = useHistory();
const {peekingRouter} = useWeaveflowRouteContext();
const callId = props.callRef.artifactName;
const to = peekingRouter.callUIUrl(
Expand All @@ -285,16 +270,13 @@ export const CallRefLink: React.FC<{
'',
callId
);
const onClick = () => {
history.push(to);
};

if (props.callRef.weaveKind !== 'call') {
return null;
}

return (
<LinkWrapper onClick={onClick}>
<LinkWrapper>
<LinkTruncater>
<Link
to={to}
Expand Down Expand Up @@ -327,7 +309,6 @@ export const CallLink: React.FC<{
color?: string;
isEval?: boolean;
}> = props => {
const history = useHistory();
const {peekingRouter} = useWeaveflowRouteContext();

const opName = opNiceName(props.opName);
Expand Down Expand Up @@ -360,15 +341,9 @@ export const CallLink: React.FC<{
showTraceTree,
showFeedbackExpand
);
const onClick = () => {
history.push(to);
};

return (
<LinkWrapper
onClick={onClick}
fullWidth={props.fullWidth}
color={props.color}>
<LinkWrapper fullWidth={props.fullWidth} color={props.color}>
<LinkTruncater fullWidth={props.fullWidth}>
<Link
$variant={props.variant}
Expand Down
18 changes: 17 additions & 1 deletion weave/flow/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections.abc import Iterable, Iterator
from typing import Any
from typing import TYPE_CHECKING, Any

from pydantic import field_validator
from typing_extensions import Self
Expand All @@ -10,6 +10,9 @@
from weave.trace.vals import WeaveObject, WeaveTable
from weave.trace.weave_client import Call

if TYPE_CHECKING:
import pandas as pd


def short_str(obj: Any, limit: int = 25) -> str:
str_val = str(obj)
Expand Down Expand Up @@ -60,6 +63,19 @@ def from_calls(cls, calls: Iterable[Call]) -> Self:
rows = [call.to_dict() for call in calls]
return cls(rows=rows)

@classmethod
def from_pandas(cls, df: "pd.DataFrame") -> Self:
rows = df.to_dict(orient="records")
return cls(rows=rows)

def to_pandas(self) -> "pd.DataFrame":
try:
import pandas as pd
except ImportError:
raise ImportError("pandas is required to use this method")

return pd.DataFrame(self.rows)

@field_validator("rows", mode="before")
def convert_to_table(cls, rows: Any) -> weave.Table:
if not isinstance(rows, weave.Table):
Expand Down

0 comments on commit f8c3a8f

Please sign in to comment.