Skip to content

Commit

Permalink
Deal with non-decimal float values in the unseen values calculator
Browse files Browse the repository at this point in the history
  • Loading branch information
nnansters committed Aug 26, 2024
1 parent dde6003 commit 08fa87d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
3 changes: 2 additions & 1 deletion nannyml/data_quality/unseen/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ def _convert_int_columns_to_categorical(
int_cols = list(
filter(
lambda c: c in column_names
and data[c].dtype in ('int_', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'),
and data[c].dtype in ('int_', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64')
or (data[c].dtype in ('float_', 'float16', 'float32', 'float64') and (data[c] % 1 == 0).all()),
data.columns,
)
)
Expand Down
14 changes: 14 additions & 0 deletions tests/data_quality/test_unseen.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,17 @@ def test_input_dataframes_are_not_altered_by_calculator(): # noqa: D103
results = calc.calculate(monitored2) # noqa: F841
pd.testing.assert_frame_equal(monitored, monitored2)
pd.testing.assert_frame_equal(reference, reference2)


def test_int_values_are_treated_as_categorical(): # noqa: D103
reference, analysis, _ = load_synthetic_car_loan_data_quality_dataset()
try:
_ = UnseenValuesCalculator(
column_names=[
'repaid',
],
timestamp_column_name='timestamp',
normalize=False,
).fit(reference)
except InvalidArgumentsException:
pytest.fail()

0 comments on commit 08fa87d

Please sign in to comment.