diff --git a/tableone/preprocessors.py b/tableone/preprocessors.py index ed8b18f..0e9f102 100644 --- a/tableone/preprocessors.py +++ b/tableone/preprocessors.py @@ -114,4 +114,11 @@ def handle_categorical_nulls(df: pd.DataFrame, null_value: str = 'None') -> pd.D Returns: - pd.DataFrame: The modified DataFrame if not inplace, otherwise None. """ - return df.fillna(null_value) + for column in df.columns: + if df[column].isnull().any(): + if df[column].dtype.name == 'category': + # Add 'None' to categories if it isn't already there + if null_value not in df[column].cat.categories: + df.loc[:, column] = df[column].cat.add_categories(null_value) + df.loc[:, column] = df[column].fillna(null_value) + return df diff --git a/tests/unit/test_tableone.py b/tests/unit/test_tableone.py index 601529b..42902a4 100644 --- a/tests/unit/test_tableone.py +++ b/tests/unit/test_tableone.py @@ -1242,3 +1242,21 @@ def test_mutual_exclusivity_of_continuous_and_categorical(self, data_sample): # Ensure that the error message matches the one produced by the code assert "Columns cannot be both categorical and continuous" in str(excinfo.value) + + def test_null_values_correctly_handled_for_categorical_data_type(self): + """ + Checks that null values are converted to a new category for categorical column type. + + Issue raised by @eroell in https://github.com/tompollard/tableone/issues/177. + """ + dummy_table = pd.DataFrame( + { + "age": [70, 80, 90, 85, 70], + "sex": ["m", "f", "m", "f", None] + } + ) + dummy_table["sex"] = dummy_table["sex"].astype("category") + t = TableOne(dummy_table, include_null=True) + + expected = '1 (20.0)' + assert t.tableone.loc["sex, n (%)", "None"]["Overall"] == expected