-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLinearReg.py
75 lines (70 loc) · 3.26 KB
/
LinearReg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class LinearReg:
def __init__(self):
self.X = None # training data
self.y = None # labels
self.learning_rate = 1e-2
self.regularization_rate = 1e-2
self.tol = 1e-3 # tolerance! amount of variation to error considered as convergence
self.max_iter = 200 # max iterations to perform seeking convergence (respect to specified tolerance)
self.batch_size = 10 # for mini-batch gradient descent
self.fitted = False # already fit ?
self.mae_ = np.inf # mean absolute error of the model
self.weights = [] # Linear_Regression params
self.verbose = 0
self.converged_ = 0 # did the model converge ( % tol)
def fit(self, X, y, learning_rate=1e-2, regularization_rate=1e-1, batch_size=10, max_iter=200,
tol=1e-2, verbose=0):
# will fit the model and return self
# ...
self.fitted = True
self.X = X
self.y = y
self.learning_rate = learning_rate
self.regularization_rate = regularization_rate
self.batch_size = batch_size
self.max_iter = max_iter
self.verbose = verbose
self.tol = tol
(n_samples, n_features) = X.shape
self.weights = np.zeros((n_features,)) # init weights
ex_error = np.inf # check convergence with
for iteration in range(max_iter):
i = 0 # iterate over batches
while i < n_samples:
X_batch = X[i:min(i + batch_size, n_samples), :]
y_predicted = np.dot(X_batch, self.weights) # E batch_size x n_features
y_real = y[i:min(i + batch_size, n_samples)]
self.weights[1:] *= 1 - (regularization_rate / n_samples) # regularization part
self.weights -= (learning_rate / n_samples) * np.dot(X_batch.T, y_predicted - y_real)
i = i + batch_size # next batch
# compute new cost
self.mae_ = (np.sum((np.dot(X, self.weights) - y) ** 2) + (
regularization_rate * np.sum(np.insert(self.weights[1:], 0, 0) ** 2))) / (2 * n_samples)
if verbose:
print('iteration ', iteration, ' error = ', self.mae_)
if self.mae_ + tol >= ex_error:
# if convergence before reaching max_iter
self.converged_ = 1
if verbose:
print('Convergence!')
return self
elif iteration + 1 >= max_iter and verbose:
print('Failure to converge after ', max_iter, ' iterations')
ex_error = self.mae_
return self
def score(self, X, y, tol=1):
# get accuracy of prediction over X & y
# score the output for the matrix of inputs
if not self.fitted:
raise RuntimeError('Model not fitted yet!')
y_pred = np.dot(X, self.weights)
d_avg = np.sum(np.abs(y_pred - y)) / len(y) # average distance from correct prediction
return np.exp(- (np.log(2) * d_avg) / tol), d_avg
def predict(self, X):
# predict the output for the matrix of inputs
if not self.fitted:
raise RuntimeError('Model not fitted yet!')
return np.dot(X, self.weights)