-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear_regression.py
110 lines (72 loc) · 2.73 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# This is based on a guided course from the Coursera Project Network
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import numpy as np
import pandas as pd
import seaborn as sns
plt.rcParams['figure.figsize'] = (12, 8)
data = pd.read_csv("bike_sharing_data.txt")
data.head()
data.info()
ax = sns.scatterplot(x="Population", y="Profit", data=data)
ax.set_title("Profit in $10000 vs City Population in 10000s");
def cost_function(X, y, theta):
m = len(y)
y_pred = X.dot(theta)
error = (y_pred - y) ** 2
return 1 / (2 * m) * np.sum(error)
m = data.Population.values.size
X = np.append(np.ones((m, 1)), data.Population.values.reshape(m, 1), axis=1)
y = data.Profit.values.reshape(m, 1)
theta = np.zeros((2,1))
cost_function(X, y, theta)
def gradient_descent(X, y, theta, alpha, iterations):
m = len(y)
costs = []
for i in range(iterations):
y_pred = X.dot(theta)
error = np.dot(X.transpose(), (y_pred - y))
theta -= alpha * 1/m * error
costs.append(cost_function(X, y, theta))
return theta, costs
theta, costs = gradient_descent(X, y, theta, alpha=0.01, iterations=2000)
print("h(x) = {} + {}x1".format(str(round(theta[0,0], 2)),
str(round(theta[1,0], 2))))
from mpl_toolkits.mplot3d import Axes3D
theta_0 = np.linspace(-10, 10, 100)
theta_1 = np.linspace(-1, 4, 100)
cost_values = np.zeros((len(theta_0), len(theta_1)))
for i in range(len(theta_0)):
for j in range(len(theta_1)):
t = np.array([theta_0[i], theta_1[j]])
cost_values[i, j] = cost_function(X, y, t)
fig = plt.figure(figsize=(12, 8))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(theta_0, theta_1, cost_values, cmap='viridis')
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.xlabel("$\Theta_0$")
plt.ylabel("$\Theta_1$")
ax.set_zlabel("$J(\Theta)$")
ax.view_init(30, 330)
plt.show()
plt.plot(costs)
plt.xlabel("Iterations")
plt.ylabel("$J(\Theta)$")
plt.title("Values of the Cost Function over Iterations of Gradient Descent");
print(theta.shape)
print(theta)
theta = np.squeeze(theta)
ax = sns.scatterplot(x="Population", y="Profit", data=data)
x_value = [x for x in range(5, 25)]
y_value = [(x * theta[1] + theta[0]) for x in x_value]
sns.lineplot(x_value, y_value)
plt.xlabel("Population in 10000s")
plt.ylabel("Profit in $10,000s")
plt.title("Linear Regression Fit");
def predict(x, theta):
y_pred = np.dot(theta.transpose(), x)
return y_pred
y_pred_1 = predict(np.array([1, 4]), theta) * 10000
print("For a population of 40,000 people, the model predicts a profit of $" + str(round(y_pred_1, 0)))
y_pred_2 = predict(np.array([1, 8.3]), theta) * 10000
print("For a population of 83,000 people, the model predicts a profit of $" + str(round(y_pred_2, 0)))