fix: updating docstrings in functions

DSCI-310-2024 · Apr 11, 2024 · ad585b7 · ad585b7
1 parent 550f39f
commit ad585b7
Show file tree

Hide file tree

Showing 3 changed files with 82 additions and 21 deletions.
diff --git a/src/py_predpurchase/function_classification_metrics.py b/src/py_predpurchase/function_classification_metrics.py
@@ -4,14 +4,28 @@
 def calculate_classification_metrics(y_true, y_pred):
     """
     Calculates classification metrics for model predictions including precision, 
-    recall, accuracy and F1 scores. 
+    recall, accuracy, and F1 scores. 
     
     Parameters:
-    - y_true: pd.Series, true target values in a dataset
-    - y_pred: pd.Series, predicted target values by the model.
+    ----------
+    y_true : array-like or pd.Series
+        True target values in a dataset.
+    y_pred : array-like or pd.Series
+        Predicted target values by the model.
     
     Returns:
-    - dict, containing precision, recall, accuracy, and F1 score.
+    ----------
+    dict
+        Contains precision, recall, accuracy, and F1 score.
+    
+    Examples:
+    --------
+
+    Assume `y_true` and `y_pred` are as follows:
+    >>> y_true = [0, 1, 2, 0, 1]
+    >>> y_pred = [0, 2, 1, 0, 0]
+    >>> calculate_classification_metrics(y_true, y_pred)
+
     """
 
     if not all(isinstance(y, (int, float, np.number)) for y in np.concatenate([y_true, y_pred])):

diff --git a/src/py_predpurchase/function_model_cross_val.py b/src/py_predpurchase/function_model_cross_val.py
@@ -12,15 +12,38 @@ def model_cross_validation(preprocessed_training_data, preprocessed_testing_data
 	using preprocessed and cleaned training and testing datasets. Random forests and Dummy hyperparameters are fixed for simplicity sake.
 	
 	Parameters:
-	- preprocessed_training_data: DataFrame, cleaned and preprocessed training data 
-	- preprocessed_testing_data: DataFrame, cleaned and preprocessed testing data
-	- target: str target column name
-	- k: k value hyperparameter for KNearestNeighbours Int
-	- gamma: gamma value hyperparameter for SVM
-	
-	Returns:
-	- dictionary, containing cross validation results (mean and std of scores) from specified model
-	"""
+    ----------
+    preprocessed_training_data : DataFrame
+        Cleaned and preprocessed training data.
+    preprocessed_testing_data : DataFrame
+        Cleaned and preprocessed testing data.
+    target : str
+        Target column name in the dataset.
+    k : int
+        Hyperparameter 'k' value for KNearestNeighbours.
+    gamma : float
+        Hyperparameter 'gamma' value for SVM.
+
+    Returns:
+    ----------
+    dict
+        Contains cross-validation results (mean and std of scores) for each specified model.
+
+    Examples:
+    --------
+    Assuming dataset is preprocessed and split into training and testing sets, 
+	with 'target' as the target column:
+
+    >>> results = model_cross_validation(preprocessed_training_data, preprocessed_testing_data, 'target', k=5, gamma=0.1)
+    >>> pd.DataFrame(results)
+
+    This will output the cross-validation results for each model, displaying the mean and 
+	standard deviation of the scores (also includes train scores).
+
+    Notes:
+    -------
+    The function assumes that the input data is already scaled and encoded.
+    """
 
 	train_data = preprocessed_training_data
 	test_data = preprocessed_testing_data

diff --git a/src/py_predpurchase/function_preprocessing.py b/src/py_predpurchase/function_preprocessing.py
@@ -8,16 +8,40 @@ def numerical_categorical_preprocess(X_train, X_test, y_train, y_test, numeric_f
     This function requires target data to be provided and includes it in the output DataFrames.
 
     Parameters:
-    - X_train: DataFrame, training feature data
-    - X_test: DataFrame, testing feature data
-    - y_train: DataFrame, training target data
-    - y_test: DataFrame, testing target data
-    - numeric_features: list, names of numeric features to scale
-    - categorical_features: list, names of categorical features to encode
-    
+    ----------
+    X_train : DataFrame
+        Training feature data.
+    X_test : DataFrame
+        Testing feature data.
+    y_train : DataFrame or Series
+        Training target data.
+    y_test : DataFrame or Series
+        Testing target data.
+    numeric_features : list
+        Names of numeric features to scale.
+    categorical_features : list
+        Names of categorical features to encode.
     
     Returns:
-    - Tuple containing preprocessed training and testing DataFrames including target data, and transformed column names
+    ----------
+    Tuple
+        Contains preprocessed training and testing DataFrames including target data, 
+        and transformed column names.
+   
+    Examples:
+    --------
+    Assume you want to transform the following features and your data set has already been split
+    into train and test
+
+    >>> numeric_features = ['feature1', 'feature2']
+    >>> categorical_features = ['feature3', feature4']
+    >>> train_transformed, test_transformed, transformed_columns = numerical_categorical_preprocess(
+            X_train, X_test, y_train, y_test, numeric_features, categorical_features)
+    
+    The function will transform feature1,2,3,4 accordingly, carrying out scaling and one-hot encoding and 
+    storing the preprocessed data in 'train_transformed' and 'test_transformed'. Column names will also be stored in 
+    'transformed_columns'.
+    
     """