From a5ee069233d8fc04632c1e9a0aec27dce4aceffb Mon Sep 17 00:00:00 2001 From: KOSASIH <kosasihg88@gmail.com> Date: Tue, 29 Oct 2024 19:09:37 +0700 Subject: [PATCH] Create model_training.py --- src/ml/model_training.py | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 src/ml/model_training.py diff --git a/src/ml/model_training.py b/src/ml/model_training.py new file mode 100644 index 0000000..8ce9d64 --- /dev/null +++ b/src/ml/model_training.py @@ -0,0 +1,53 @@ +# src/ml/model_training.py + +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestRegressor +from sklearn.preprocessing import StandardScaler +import joblib +import os + +class ModelTrainer: + """Class to train machine learning models.""" + + def __init__(self, model_path='models', test_size=0.2, random_state=42): + self.model_path = model_path + self.test_size = test_size + self.random_state = random_state + self.model = RandomForestRegressor() + + def load_data(self, file_path): + """Load dataset from a CSV file.""" + data = pd.read_csv(file_path) + return data + + def preprocess_data(self, data): + """Preprocess the data for training.""" + X = data.drop('target', axis=1) # Assuming 'target' is the label column + y = data['target'] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.random_state) + scaler = StandardScaler() + X_train = scaler.fit_transform(X_train) + X_test = scaler.transform(X_test) + return X_train, X_test, y_train, y_test, scaler + + def train_model(self, X_train, y_train): + """Train the model.""" + self.model.fit(X_train, y_train) + + def save_model(self, model_name='model.pkl', scaler_name='scaler.pkl'): + """Save the trained model and scaler to disk.""" + os.makedirs(self.model_path, exist_ok=True) + joblib.dump(self.model, os.path.join(self.model_path, model_name)) + joblib.dump(self.scaler, os.path.join(self.model_path, scaler_name)) + + def run(self, data_file): + """Load data, preprocess, train, and save the model.""" + data = self.load_data(data_file) + X_train, X_test, y_train, y_test, self.scaler = self.preprocess_data(data) + self.train_model(X_train, y_train) + self.save_model() + +if __name__ == "__main__": + trainer = ModelTrainer() + trainer.run('data/training_data.csv') # Path to your training data