Skip to content

Commit

Permalink
Changed interface for looping over features
Browse files Browse the repository at this point in the history
  • Loading branch information
nnansters committed Dec 9, 2024
1 parent 2c7d796 commit 086b0e1
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 14 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ Each `Dataset` has `reference` and `monitoring` properties. Each of these expose
- `predicted_probabilities`: access the model's predicted probablilities. Only available for classification datasets. For binary classification datasets this will be a single
- `targets`: access the model targets as a `numpy.ndarray`
- `timestamps`: access the model timestamps as a `numpy.ndarray`
- `categorical_features`: access the model's categorical features as an `Iterable` of `numpy.ndarray`
- `continuous_features`: access the model's continuous features as an `Iterable` of `numpy.ndarray`
- `features`: access the model's features as an `Iterable` of `numpy.ndarray`
- `categorical_features`: access the model's categorical features. Loop over tuples containing the column name and its values.
- `continuous_features`: access the model's continuous features. Loop over tuples containing the column name and its values.
- `features`: access the model's features. Loop over tuples containing the column name and its values.

If any of these properties are not available, trying to access them will raise an `AssertionError`.

Expand All @@ -30,8 +30,8 @@ from nannyml_dataset.binary_classification import synthetic_car_loan # Import t
print(synthetic_car_loan.reference.timestamps) # Access some reference property
print(synthetic_car_loan.monitoring.timestamps) # Access some monitoring property

for col in synthetic_car_loan.reference.categorical_features: # Loop over reference categorical features
print(col) # You can do more useful stuff here, like setting up a univariate covariate shift monitor!
for name, values in synthetic_car_loan.reference.categorical_features: # Loop over reference categorical features
print(f"{name}\t\t{values}") # You can do more useful stuff here, like setting up a univariate covariate shift monitor!

```

Expand Down
20 changes: 11 additions & 9 deletions src/nannyml_datasets/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy.typing as npt
import requests

from typing import Iterable, List, Mapping, Optional
from typing import Iterable, List, Optional, Tuple


class _Dataset:
Expand Down Expand Up @@ -69,24 +69,26 @@ def timestamps(self) -> npt.NDArray:
return self.data[self.timestamps_column_name].to_numpy()

@property
def categorical_features(self) -> Iterable[npt.NDArray]:
def categorical_features(self) -> Iterable[Tuple[str, npt.NDArray]]:
return (
self.data[col].to_numpy() for col in self.categorical_feature_column_names
(col, self.data[col].to_numpy())
for col in self.categorical_feature_column_names
)

@property
def continuous_features(self) -> Iterable[npt.NDArray]:
def continuous_features(self) -> Iterable[Tuple[str, npt.NDArray]]:
return (
self.data[col].to_numpy() for col in self.continuous_feature_column_names
(col, self.data[col].to_numpy())
for col in self.continuous_feature_column_names
)

@property
def features(self) -> Mapping[str, npt.NDArray]:
return {
col: self.data[col].to_numpy()
def features(self) -> Iterable[Tuple[str, npt.NDArray]]:
return (
(col, self.data[col].to_numpy())
for col in self.categorical_feature_column_names
+ self.continuous_feature_column_names
}
)


class Dataset:
Expand Down

0 comments on commit 086b0e1

Please sign in to comment.