-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
54 lines (39 loc) · 1.46 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
## main libraries
import numpy as np
import pandas as pd
from scipy.stats import boxcox
## for preprocessing and preparing
from sklearn import utils
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
## Reading the Dataset
df = pd.read_excel('file.xlsx', sheet_name='Sheet1')
df = utils.shuffle(df, random_state=123)
## Let's try Boxcox ---> Normal Distribution
df['AGE'], lamd_age = boxcox(df['AGE'])
## Let's try Box-cox Transformation --> Normal Distribution
df['RUT(in)'], lamda_rut = boxcox(df['RUT(in)'])
## split the dataset --> make the test set work as (validation and testing) as the dataset is very small
## It is enough to take 0.20 of the Dataset as a validation Dataset
## Split to target and Features
X = df.drop(columns=['DELTA'], axis=1)
y = df['DELTA']
## split to training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123, test_size=0.2)
## Scaling the Features to (mean=0, std=1) --> standard Normal Distr.
scaler = StandardScaler()
scaler.fit(X_train.values)
def process_one(X_new):
X_new = scaler.transform(X_new)
return X_new
def process_batch(X_new):
age = X_new[:, 0]
fc = X_new[:, 1]
lc = X_new[:, 2]
tc = X_new[:, 3]
rut = X_new[:, 4]
age = boxcox(age, lmbda=lamd_age)
rut = boxcox(rut, lmbda=lamda_rut)
X_new = np.column_stack((age, fc, lc, tc, rut))
X_new = scaler.transform(X_new)
return X_new