-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
64 lines (52 loc) · 1.77 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#-*- coding:utf-8 -*-
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import Binarizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Imputer
import numpy as np
def gen_datasets():
datasets = np.array(
[[1, 2, -2, 0],
[-1, 2, 0, 1],
[1, 0, 2, -2],
[2, 0, -2, -1],
[0, -1, 2, 0]], float)
return datasets
def main():
datasets = gen_datasets()
print "origin data:"
print datasets
#0均值,单位方差
standard_scaler = StandardScaler()
scaler_datasets = standard_scaler.fit_transform(datasets)
print scaler_datasets
print "-" * 80
min_max_scaler = MinMaxScaler()
scaler_datasets = min_max_scaler.fit_transform(datasets)
print scaler_datasets
print "-" * 80
max_abs_scaler = MaxAbsScaler()
scaler_datasets = max_abs_scaler.fit_transform(datasets)
print scaler_datasets
print "-" * 80
normalize = Normalizer(norm="l1")
normalize_datasets = normalize.fit_transform(datasets)
print normalize_datasets
print "-" * 80
binarizer = Binarizer(threshold=1.1)
binarizer_datasets = binarizer.fit_transform(datasets)
print binarizer_datasets
print "-" * 80
one_hot_encoder = OneHotEncoder()
one_hot_encoder_datasets = one_hot_encoder.fit_transform([[0, 1, 4], [1, 2, 0], [2, 3, 5]])
print one_hot_encoder_datasets.toarray()
print "-" * 80
imputer = Imputer(missing_values=0, strategy="median")
imputer_datasets = imputer.fit_transform(datasets)
print imputer_datasets
print imputer.statistics_
if __name__ == "__main__":
main()