By Fanyu
Ref this reading
https://www.paddlepaddle.org.cn/tutorials/projectdetail/4497936
further reading
https://aistudio.baidu.com/aistudio/education/group/info/25851
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
datafile = './housing.data'
data = np.fromfile(datafile, sep=' ')
data
array([6.320e-03, 1.800e+01, 2.310e+00, ..., 3.969e+02, 7.880e+00, 1.190e+01])
feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE','DIS',
'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
feature_num = len(feature_names)
data = data.reshape([data.shape[0] // feature_num, feature_num])
x = data[0]
print(x.shape)
print(x)
(14,) [6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00 2.400e+01]
ratio = 0.8
offset = int(data.shape[0] * ratio)
training_data = data[:offset]
training_data.shape
(404, 14)
maximums, minimus,avgs = \
training_data.max(axis = 0), \
training_data.min(axis = 0), \
training_data.sum(axis = 0)/training_data.shape[0]
for i in range(feature_num):
data[:,i] = ((data[:, i]) - minimus[i]) / (maximums[i] - minimus[i])
def load_data():
# 从文件导入数据
datafile = './housing.data'
data = np.fromfile(datafile, sep=' ')
# 每条数据包括14项,其中前面13项是影响因素,第14项是相应的房屋价格中位数
feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \
'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
feature_num = len(feature_names)
# 将原始数据进行Reshape,变成[N, 14]这样的形状
data = data.reshape([data.shape[0] // feature_num, feature_num])
# 将原数据集拆分成训练集和测试集
# 这里使用80%的数据做训练,20%的数据做测试
# 测试集和训练集必须是没有交集的
ratio = 0.8
offset = int(data.shape[0] * ratio)
training_data = data[:offset]
# 计算训练集的最大值,最小值,平均值
maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \
training_data.sum(axis=0) / training_data.shape[0]
# 对数据进行归一化处理
for i in range(feature_num):
#print(maximums[i], minimums[i], avgs[i])
data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])
# 训练集和测试集的划分比例
training_data = data[:offset]
test_data = data[offset:]
return training_data, test_data
training_data, test_data = load_data()
x = training_data[:,:-1]
y = training_data[:,-1:]
w = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, -0.1, -0.2, -0.3, -0.4, 0.0]
w = np.array(w).reshape([13, 1])
x1 = x[0]
t = np.dot(x1, w)
print(t)
[0.69474855]
b = -0.2
z = t + b
print(z)
[0.49474855]
class Network(object):
def __init__(self, num_of_weights):
np.random.seed(0)
self.w = np.random.randn(num_of_weights,1 )
self.b = 0.
def forward(self, x):
z = np.dot(x, self.w) + self.b
return z
net = Network(13)
x1, y1 = x[0], y[0]
z = net.forward(x1)
print(z)
[2.39362982]
class Network(object):
def __init__(self, num_of_weights):
np.random.seed(0)
self.w = np.random.randn(num_of_weights,1 )
self.b = 0.
def forward(self, x):
"""
Calculate y^ as z, a vector
"""
z = np.dot(x, self.w) + self.b
return z
def loss(self, z, y):
"""
(1/N)(y - y^)**2
or
(y-y^)T (y-y^)
return a scaler
"""
error = z - y
cost = error * error
return np.mean(cost)
net = Network(13)
x1, y1 = x[0:3], y[0:3]
z = net.forward(x1)
print(f'predict: {z}')
loss = net.loss(z, y1)
print(f'loss: {loss}')
predict: [[2.39362982] [2.46752393] [2.02483479]] loss: 3.3844969926127924
We aim to find $w$ and $b$ that can minimsie the loss function.
We rename $z := \hat{y} = Xw -b$
Then, calculate the Gradient:
$$ \frac{\partial Loss}{\partial w} = -\frac{1}{N}X^T(y-Xw - b) $$Concisely, the gradients would be,
$$\frac{\partial Loss}{\partial w} = \frac{1}{N}X^T (z-y)$$$$ \frac{\partial Loss}{\partial b} = \frac{1}{N} \mathbf{1}^T (z - y) $$net = Network(13)
w5 = np.arange(-160.0, 160.0, 1.0)
w9 = np.arange(-160.0, 160.0, 1.0)
losses = np.zeros([len(w5) ,len(w9) ])
# list 5th and 9th value of `w`,
# and see how does it affect the loss.
for i in range(len(w5)):
for j in range(len(w9)):
net.w[5] = w5[i]
net.w[9] = w9[i]
z = net.forward(x)
loss = net.loss(z, y)
losses[i, j]= loss
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = Axes3D(fig)
w5, w9 = np.meshgrid(w5, w9)
ax.plot_surface(w5, w9, losses, rstride = 1, cstride = 1, cmap = 'rainbow')
plt.show()
/var/folders/90/hk9jz15n56zckxcy3wlt2twh0000gn/T/ipykernel_99676/3977133304.py:16: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6. This is consistent with other Axes classes. ax = Axes3D(fig)
%time
z = net.forward(x)
gradient_w = (z - y)*x
print(f'Gradient_w shape {gradient_w.shape}')
print('-'*50)
gradient_w = np.mean(gradient_w, axis=0).reshape(-1,1)
print('gradient_w ', gradient_w.shape)
print('w ', net.w.shape)
print(gradient_w)
CPU times: user 2 µs, sys: 0 ns, total: 2 µs Wall time: 6.2 µs Gradient_w shape (404, 13) -------------------------------------------------- gradient_w (13, 1) w (13, 1) [[ 4.6555403 ] [ 19.35268996] [ 55.88081118] [ 14.00266972] [ 47.98588869] [ 76.87210821] [ 94.8555119 ] [ 36.07579608] [ 45.44575958] [ 59.65733292] [ 83.65114918] [134.80387478] [ 38.93998153]]
Same as the result in a matrix form.
%time
# X^T (z-y) *(1/N)
np.dot( x.T, (z-y) ) / len(x)
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs Wall time: 5.25 µs
array([[ 4.6555403 ], [ 19.35268996], [ 55.88081118], [ 14.00266972], [ 47.98588869], [ 76.87210821], [ 94.8555119 ], [ 36.07579608], [ 45.44575958], [ 59.65733292], [ 83.65114918], [134.80387478], [ 38.93998153]])
z = net.forward(x)
gradient_w = np.dot( x.T, (z-y) ) / len(x)
gradient_b = np.mean(z-y)
class Network(object):
def __init__(self, num_of_weights):
np.random.seed(0)
self.w = np.random.randn(num_of_weights,1 )
self.b = 0.
def forward(self, x):
"""
Calculate y^ as z, a vector
"""
z = np.dot(x, self.w) + self.b
return z
def loss(self, z, y):
"""
(1/N)(y - y^)**2
or
(y-y^)T (y-y^)
return a scaler
"""
error = z - y
cost = error * error
return np.mean(cost)
def gradient(self, x, y):
z = net.forward(x)
gradient_w = np.dot( x.T, (z-y) ) / len(x)
gradient_b = np.mean(z-y)
return gradient_w, gradient_b
net = Network(13)
# 设置[w5, w9] = [-100., -100.]
net.w[5] = -100.0
net.w[9] = -100.0
z = net.forward(x)
loss = net.loss(z, y)
gradient_w, gradient_b = net.gradient(x, y)
gradient_w5 = gradient_w[5][0]
gradient_w9 = gradient_w[9][0]
print('point {}, loss {}'.format([net.w[5][0], net.w[9][0]], loss))
print('gradient {}'.format([gradient_w5, gradient_w9]))
point [-100.0, -100.0], loss 7873.345739941162 gradient [-45.879682881232156, -35.50236884482908]
# 在[w5, w9]平面上,沿着梯度的反方向移动到下一个点P1
# 定义移动步长 eta
eta = 0.1
# 更新参数w5和w9
net.w[5] = net.w[5] - eta * gradient_w5
net.w[9] = net.w[9] - eta * gradient_w9
# 重新计算z和loss
z = net.forward(x)
loss = net.loss(z, y)
gradient_w, gradient_b = net.gradient(x, y)
gradient_w5 = gradient_w[5][0]
gradient_w9 = gradient_w[9][0]
print('point {}, loss {}'.format([net.w[5][0], net.w[9][0]], loss))
print('gradient {}'.format([gradient_w5, gradient_w9]))
point [-95.41203171187678, -96.44976311551709], loss 7214.694816482366 gradient [-43.88393299906906, -34.01927390849592]
class Network(object):
def __init__(self, num_of_weights):
np.random.seed(0)
self.w = np.random.randn(num_of_weights,1 )
self.b = 0.
def forward(self, x):
"""
Calculate y^ as z, a vector
"""
z = np.dot(x, self.w) + self.b
return z
def loss(self, z, y):
"""
(1/N)(y - y^)**2
or
(y-y^)T (y-y^)
return a scaler
"""
error = z - y
cost = error * error
return np.mean(cost)
def gradient(self, x, y):
z = net.forward(x)
gradient_w = np.dot( x.T, (z-y) ) / len(x)
gradient_b = np.mean(z-y)
return gradient_w, gradient_b
def update(self, gradient_w, gradient_b, learning_rate = 0.01):
self.w = self.w - gradient_w * learning_rate
self.b = self.b - gradient_b * learning_rate
def train(self, x, y, iterations = 100, learning_rate = 0.01):
losses = []
for i in range(iterations):
z = self.forward(x)
los = self.loss(y, z)
gradient_w, gradient_b = self.gradient(x, y)
self.update(gradient_w, gradient_b, learning_rate)
losses.append(los)
if (i)%50 == 0:
print(f'iter {i}, loss {los}')
return losses
train_data, test_data = load_data()
x = train_data[:, :-1]
y = train_data[:, -1:]
# 创建网络
net = Network(13)
num_iterations=1000
# 启动训练
losses = net.train(x,y, iterations=num_iterations, learning_rate=0.01)
iter 0, loss 8.74595446663459 iter 50, loss 1.2774697388163774 iter 100, loss 0.8996702309578073 iter 150, loss 0.7517595081577436 iter 200, loss 0.6399726291386258 iter 250, loss 0.5520715543062921 iter 300, loss 0.4823859335591013 iter 350, loss 0.4267006173554154 iter 400, loss 0.38180958457268516 iter 450, loss 0.34527003891619995 iter 500, loss 0.3152184738991003 iter 550, loss 0.2902312865684321 iter 600, loss 0.26921908800787986 iter 650, loss 0.2513465505981325 iter 700, loss 0.2359716103268972 iter 750, loss 0.2225993398088164 iter 800, loss 0.210846942234815 iter 850, loss 0.20041717616705218 iter 900, loss 0.19107817252537598 iter 950, loss 0.18264809873308355
plt.style.use('fivethirtyeight')
# 画出损失函数的变化趋势
plot_x = np.arange(num_iterations)
plot_y = np.array(losses)
plt.plot(plot_x, plot_y)
plt.show()
每次从总的数据集中随机抽取出小部分数据(mini-batch)来代表整体,基于这部分数据计算梯度和损失来更新参数,这种方法被称作随机梯度下降法(Stochastic Gradient Descent,SGD),核心概念如下:
train_data1 = train_data[0:10]
train_data1.shape
net = Network(13)
x = train_data1[:, :-1]
y = train_data1[:, -1:]
loss = net.train(x, y, iterations=1, learning_rate=0.01)
loss
iter 0, loss 4.497480200683045
[4.497480200683045]
train_data2 = train_data[10:20]
x = train_data2[:, :-1]
y = train_data2[:, -1:]
loss = net.train(x, y, iterations=1, learning_rate=0.01)
loss
iter 0, loss 5.849682302465981
[5.849682302465981]
batch_size = 10
n = len(train_data)
mini_batches = [train_data[k:k+batch_size] for k in range(0, n, batch_size)]
print('total number of mini_batches is ', len(mini_batches))
print('first mini_batch shape ', mini_batches[0].shape)
print('last mini_batch shape ', mini_batches[-1].shape)
total number of mini_batches is 41 first mini_batch shape (10, 14) last mini_batch shape (4, 14)
batch_size = 10
n = len(train_data)
mini_batches = [train_data[k:k+batch_size] for k in range(0, n, batch_size)]
print('total number of mini_batches is ', len(mini_batches))
print('first mini_batch shape ', mini_batches[0].shape)
print('last mini_batch shape ', mini_batches[-1].shape)
total number of mini_batches is 41 first mini_batch shape (10, 14) last mini_batch shape (4, 14)
train_data, test_data = load_data()
np.random.shuffle(train_data)
batch_size = 10
n = len(train_data)
mini_batches = [train_data[k:k+batch_size] for k in range(0, n, batch_size)]
net = Network(13)
losses = []
for mini_batch in mini_batches:
x = mini_batch[:, :-1]
y = mini_batch[:, -1:]
loss = net.train(x, y, iterations=1)
losses.append(loss)
iter 0, loss 9.946450707104104 iter 0, loss 3.710141454098133 iter 0, loss 8.90097134135717 iter 0, loss 10.302473036750808 iter 0, loss 9.079455803350537 iter 0, loss 8.804273656633592 iter 0, loss 9.198792174424685 iter 0, loss 8.080998388087028 iter 0, loss 5.76514411032526 iter 0, loss 4.577177553495764 iter 0, loss 2.2217896373134973 iter 0, loss 3.6147784328648256 iter 0, loss 1.919047643798057 iter 0, loss 4.358638060844264 iter 0, loss 4.624079740459651 iter 0, loss 6.063130039231366 iter 0, loss 3.9135344259858735 iter 0, loss 4.428778947540845 iter 0, loss 2.097014153164939 iter 0, loss 1.3450357670026665 iter 0, loss 1.2112724108456703 iter 0, loss 2.326695147403235 iter 0, loss 2.4518210952957933 iter 0, loss 3.40091152519184 iter 0, loss 2.705886852714526 iter 0, loss 0.25962654579128774 iter 0, loss 1.3357956532173227 iter 0, loss 3.1656983410321393 iter 0, loss 1.1949026980085977 iter 0, loss 1.9223036330686938 iter 0, loss 0.34000546110035756 iter 0, loss 2.4757804572255226 iter 0, loss 0.7347460893710095 iter 0, loss 2.5681653345660913 iter 0, loss 2.9657167704988217 iter 0, loss 1.2424367621516734 iter 0, loss 2.714940976809956 iter 0, loss 1.8436159530720222 iter 0, loss 1.8795516295693937 iter 0, loss 1.4942678478573117 iter 0, loss 0.40674534963022
num_epochs = 10
for epoch_id in range(num_epochs):
for iter_id, mini_batch in enumerate(mini_batches):
print(f'epoch_id: {epoch_id}; iter_id: {iter_id}')
epoch_id: 0; iter_id: 0 epoch_id: 0; iter_id: 1 epoch_id: 0; iter_id: 2 epoch_id: 0; iter_id: 3 epoch_id: 0; iter_id: 4 epoch_id: 0; iter_id: 5 epoch_id: 0; iter_id: 6 epoch_id: 0; iter_id: 7 epoch_id: 0; iter_id: 8 epoch_id: 0; iter_id: 9 epoch_id: 0; iter_id: 10 epoch_id: 0; iter_id: 11 epoch_id: 0; iter_id: 12 epoch_id: 0; iter_id: 13 epoch_id: 0; iter_id: 14 epoch_id: 0; iter_id: 15 epoch_id: 0; iter_id: 16 epoch_id: 0; iter_id: 17 epoch_id: 0; iter_id: 18 epoch_id: 0; iter_id: 19 epoch_id: 0; iter_id: 20 epoch_id: 0; iter_id: 21 epoch_id: 0; iter_id: 22 epoch_id: 0; iter_id: 23 epoch_id: 0; iter_id: 24 epoch_id: 0; iter_id: 25 epoch_id: 0; iter_id: 26 epoch_id: 0; iter_id: 27 epoch_id: 0; iter_id: 28 epoch_id: 0; iter_id: 29 epoch_id: 0; iter_id: 30 epoch_id: 0; iter_id: 31 epoch_id: 0; iter_id: 32 epoch_id: 0; iter_id: 33 epoch_id: 0; iter_id: 34 epoch_id: 0; iter_id: 35 epoch_id: 0; iter_id: 36 epoch_id: 0; iter_id: 37 epoch_id: 0; iter_id: 38 epoch_id: 0; iter_id: 39 epoch_id: 0; iter_id: 40 epoch_id: 1; iter_id: 0 epoch_id: 1; iter_id: 1 epoch_id: 1; iter_id: 2 epoch_id: 1; iter_id: 3 epoch_id: 1; iter_id: 4 epoch_id: 1; iter_id: 5 epoch_id: 1; iter_id: 6 epoch_id: 1; iter_id: 7 epoch_id: 1; iter_id: 8 epoch_id: 1; iter_id: 9 epoch_id: 1; iter_id: 10 epoch_id: 1; iter_id: 11 epoch_id: 1; iter_id: 12 epoch_id: 1; iter_id: 13 epoch_id: 1; iter_id: 14 epoch_id: 1; iter_id: 15 epoch_id: 1; iter_id: 16 epoch_id: 1; iter_id: 17 epoch_id: 1; iter_id: 18 epoch_id: 1; iter_id: 19 epoch_id: 1; iter_id: 20 epoch_id: 1; iter_id: 21 epoch_id: 1; iter_id: 22 epoch_id: 1; iter_id: 23 epoch_id: 1; iter_id: 24 epoch_id: 1; iter_id: 25 epoch_id: 1; iter_id: 26 epoch_id: 1; iter_id: 27 epoch_id: 1; iter_id: 28 epoch_id: 1; iter_id: 29 epoch_id: 1; iter_id: 30 epoch_id: 1; iter_id: 31 epoch_id: 1; iter_id: 32 epoch_id: 1; iter_id: 33 epoch_id: 1; iter_id: 34 epoch_id: 1; iter_id: 35 epoch_id: 1; iter_id: 36 epoch_id: 1; iter_id: 37 epoch_id: 1; iter_id: 38 epoch_id: 1; iter_id: 39 epoch_id: 1; iter_id: 40 epoch_id: 2; iter_id: 0 epoch_id: 2; iter_id: 1 epoch_id: 2; iter_id: 2 epoch_id: 2; iter_id: 3 epoch_id: 2; iter_id: 4 epoch_id: 2; iter_id: 5 epoch_id: 2; iter_id: 6 epoch_id: 2; iter_id: 7 epoch_id: 2; iter_id: 8 epoch_id: 2; iter_id: 9 epoch_id: 2; iter_id: 10 epoch_id: 2; iter_id: 11 epoch_id: 2; iter_id: 12 epoch_id: 2; iter_id: 13 epoch_id: 2; iter_id: 14 epoch_id: 2; iter_id: 15 epoch_id: 2; iter_id: 16 epoch_id: 2; iter_id: 17 epoch_id: 2; iter_id: 18 epoch_id: 2; iter_id: 19 epoch_id: 2; iter_id: 20 epoch_id: 2; iter_id: 21 epoch_id: 2; iter_id: 22 epoch_id: 2; iter_id: 23 epoch_id: 2; iter_id: 24 epoch_id: 2; iter_id: 25 epoch_id: 2; iter_id: 26 epoch_id: 2; iter_id: 27 epoch_id: 2; iter_id: 28 epoch_id: 2; iter_id: 29 epoch_id: 2; iter_id: 30 epoch_id: 2; iter_id: 31 epoch_id: 2; iter_id: 32 epoch_id: 2; iter_id: 33 epoch_id: 2; iter_id: 34 epoch_id: 2; iter_id: 35 epoch_id: 2; iter_id: 36 epoch_id: 2; iter_id: 37 epoch_id: 2; iter_id: 38 epoch_id: 2; iter_id: 39 epoch_id: 2; iter_id: 40 epoch_id: 3; iter_id: 0 epoch_id: 3; iter_id: 1 epoch_id: 3; iter_id: 2 epoch_id: 3; iter_id: 3 epoch_id: 3; iter_id: 4 epoch_id: 3; iter_id: 5 epoch_id: 3; iter_id: 6 epoch_id: 3; iter_id: 7 epoch_id: 3; iter_id: 8 epoch_id: 3; iter_id: 9 epoch_id: 3; iter_id: 10 epoch_id: 3; iter_id: 11 epoch_id: 3; iter_id: 12 epoch_id: 3; iter_id: 13 epoch_id: 3; iter_id: 14 epoch_id: 3; iter_id: 15 epoch_id: 3; iter_id: 16 epoch_id: 3; iter_id: 17 epoch_id: 3; iter_id: 18 epoch_id: 3; iter_id: 19 epoch_id: 3; iter_id: 20 epoch_id: 3; iter_id: 21 epoch_id: 3; iter_id: 22 epoch_id: 3; iter_id: 23 epoch_id: 3; iter_id: 24 epoch_id: 3; iter_id: 25 epoch_id: 3; iter_id: 26 epoch_id: 3; iter_id: 27 epoch_id: 3; iter_id: 28 epoch_id: 3; iter_id: 29 epoch_id: 3; iter_id: 30 epoch_id: 3; iter_id: 31 epoch_id: 3; iter_id: 32 epoch_id: 3; iter_id: 33 epoch_id: 3; iter_id: 34 epoch_id: 3; iter_id: 35 epoch_id: 3; iter_id: 36 epoch_id: 3; iter_id: 37 epoch_id: 3; iter_id: 38 epoch_id: 3; iter_id: 39 epoch_id: 3; iter_id: 40 epoch_id: 4; iter_id: 0 epoch_id: 4; iter_id: 1 epoch_id: 4; iter_id: 2 epoch_id: 4; iter_id: 3 epoch_id: 4; iter_id: 4 epoch_id: 4; iter_id: 5 epoch_id: 4; iter_id: 6 epoch_id: 4; iter_id: 7 epoch_id: 4; iter_id: 8 epoch_id: 4; iter_id: 9 epoch_id: 4; iter_id: 10 epoch_id: 4; iter_id: 11 epoch_id: 4; iter_id: 12 epoch_id: 4; iter_id: 13 epoch_id: 4; iter_id: 14 epoch_id: 4; iter_id: 15 epoch_id: 4; iter_id: 16 epoch_id: 4; iter_id: 17 epoch_id: 4; iter_id: 18 epoch_id: 4; iter_id: 19 epoch_id: 4; iter_id: 20 epoch_id: 4; iter_id: 21 epoch_id: 4; iter_id: 22 epoch_id: 4; iter_id: 23 epoch_id: 4; iter_id: 24 epoch_id: 4; iter_id: 25 epoch_id: 4; iter_id: 26 epoch_id: 4; iter_id: 27 epoch_id: 4; iter_id: 28 epoch_id: 4; iter_id: 29 epoch_id: 4; iter_id: 30 epoch_id: 4; iter_id: 31 epoch_id: 4; iter_id: 32 epoch_id: 4; iter_id: 33 epoch_id: 4; iter_id: 34 epoch_id: 4; iter_id: 35 epoch_id: 4; iter_id: 36 epoch_id: 4; iter_id: 37 epoch_id: 4; iter_id: 38 epoch_id: 4; iter_id: 39 epoch_id: 4; iter_id: 40 epoch_id: 5; iter_id: 0 epoch_id: 5; iter_id: 1 epoch_id: 5; iter_id: 2 epoch_id: 5; iter_id: 3 epoch_id: 5; iter_id: 4 epoch_id: 5; iter_id: 5 epoch_id: 5; iter_id: 6 epoch_id: 5; iter_id: 7 epoch_id: 5; iter_id: 8 epoch_id: 5; iter_id: 9 epoch_id: 5; iter_id: 10 epoch_id: 5; iter_id: 11 epoch_id: 5; iter_id: 12 epoch_id: 5; iter_id: 13 epoch_id: 5; iter_id: 14 epoch_id: 5; iter_id: 15 epoch_id: 5; iter_id: 16 epoch_id: 5; iter_id: 17 epoch_id: 5; iter_id: 18 epoch_id: 5; iter_id: 19 epoch_id: 5; iter_id: 20 epoch_id: 5; iter_id: 21 epoch_id: 5; iter_id: 22 epoch_id: 5; iter_id: 23 epoch_id: 5; iter_id: 24 epoch_id: 5; iter_id: 25 epoch_id: 5; iter_id: 26 epoch_id: 5; iter_id: 27 epoch_id: 5; iter_id: 28 epoch_id: 5; iter_id: 29 epoch_id: 5; iter_id: 30 epoch_id: 5; iter_id: 31 epoch_id: 5; iter_id: 32 epoch_id: 5; iter_id: 33 epoch_id: 5; iter_id: 34 epoch_id: 5; iter_id: 35 epoch_id: 5; iter_id: 36 epoch_id: 5; iter_id: 37 epoch_id: 5; iter_id: 38 epoch_id: 5; iter_id: 39 epoch_id: 5; iter_id: 40 epoch_id: 6; iter_id: 0 epoch_id: 6; iter_id: 1 epoch_id: 6; iter_id: 2 epoch_id: 6; iter_id: 3 epoch_id: 6; iter_id: 4 epoch_id: 6; iter_id: 5 epoch_id: 6; iter_id: 6 epoch_id: 6; iter_id: 7 epoch_id: 6; iter_id: 8 epoch_id: 6; iter_id: 9 epoch_id: 6; iter_id: 10 epoch_id: 6; iter_id: 11 epoch_id: 6; iter_id: 12 epoch_id: 6; iter_id: 13 epoch_id: 6; iter_id: 14 epoch_id: 6; iter_id: 15 epoch_id: 6; iter_id: 16 epoch_id: 6; iter_id: 17 epoch_id: 6; iter_id: 18 epoch_id: 6; iter_id: 19 epoch_id: 6; iter_id: 20 epoch_id: 6; iter_id: 21 epoch_id: 6; iter_id: 22 epoch_id: 6; iter_id: 23 epoch_id: 6; iter_id: 24 epoch_id: 6; iter_id: 25 epoch_id: 6; iter_id: 26 epoch_id: 6; iter_id: 27 epoch_id: 6; iter_id: 28 epoch_id: 6; iter_id: 29 epoch_id: 6; iter_id: 30 epoch_id: 6; iter_id: 31 epoch_id: 6; iter_id: 32 epoch_id: 6; iter_id: 33 epoch_id: 6; iter_id: 34 epoch_id: 6; iter_id: 35 epoch_id: 6; iter_id: 36 epoch_id: 6; iter_id: 37 epoch_id: 6; iter_id: 38 epoch_id: 6; iter_id: 39 epoch_id: 6; iter_id: 40 epoch_id: 7; iter_id: 0 epoch_id: 7; iter_id: 1 epoch_id: 7; iter_id: 2 epoch_id: 7; iter_id: 3 epoch_id: 7; iter_id: 4 epoch_id: 7; iter_id: 5 epoch_id: 7; iter_id: 6 epoch_id: 7; iter_id: 7 epoch_id: 7; iter_id: 8 epoch_id: 7; iter_id: 9 epoch_id: 7; iter_id: 10 epoch_id: 7; iter_id: 11 epoch_id: 7; iter_id: 12 epoch_id: 7; iter_id: 13 epoch_id: 7; iter_id: 14 epoch_id: 7; iter_id: 15 epoch_id: 7; iter_id: 16 epoch_id: 7; iter_id: 17 epoch_id: 7; iter_id: 18 epoch_id: 7; iter_id: 19 epoch_id: 7; iter_id: 20 epoch_id: 7; iter_id: 21 epoch_id: 7; iter_id: 22 epoch_id: 7; iter_id: 23 epoch_id: 7; iter_id: 24 epoch_id: 7; iter_id: 25 epoch_id: 7; iter_id: 26 epoch_id: 7; iter_id: 27 epoch_id: 7; iter_id: 28 epoch_id: 7; iter_id: 29 epoch_id: 7; iter_id: 30 epoch_id: 7; iter_id: 31 epoch_id: 7; iter_id: 32 epoch_id: 7; iter_id: 33 epoch_id: 7; iter_id: 34 epoch_id: 7; iter_id: 35 epoch_id: 7; iter_id: 36 epoch_id: 7; iter_id: 37 epoch_id: 7; iter_id: 38 epoch_id: 7; iter_id: 39 epoch_id: 7; iter_id: 40 epoch_id: 8; iter_id: 0 epoch_id: 8; iter_id: 1 epoch_id: 8; iter_id: 2 epoch_id: 8; iter_id: 3 epoch_id: 8; iter_id: 4 epoch_id: 8; iter_id: 5 epoch_id: 8; iter_id: 6 epoch_id: 8; iter_id: 7 epoch_id: 8; iter_id: 8 epoch_id: 8; iter_id: 9 epoch_id: 8; iter_id: 10 epoch_id: 8; iter_id: 11 epoch_id: 8; iter_id: 12 epoch_id: 8; iter_id: 13 epoch_id: 8; iter_id: 14 epoch_id: 8; iter_id: 15 epoch_id: 8; iter_id: 16 epoch_id: 8; iter_id: 17 epoch_id: 8; iter_id: 18 epoch_id: 8; iter_id: 19 epoch_id: 8; iter_id: 20 epoch_id: 8; iter_id: 21 epoch_id: 8; iter_id: 22 epoch_id: 8; iter_id: 23 epoch_id: 8; iter_id: 24 epoch_id: 8; iter_id: 25 epoch_id: 8; iter_id: 26 epoch_id: 8; iter_id: 27 epoch_id: 8; iter_id: 28 epoch_id: 8; iter_id: 29 epoch_id: 8; iter_id: 30 epoch_id: 8; iter_id: 31 epoch_id: 8; iter_id: 32 epoch_id: 8; iter_id: 33 epoch_id: 8; iter_id: 34 epoch_id: 8; iter_id: 35 epoch_id: 8; iter_id: 36 epoch_id: 8; iter_id: 37 epoch_id: 8; iter_id: 38 epoch_id: 8; iter_id: 39 epoch_id: 8; iter_id: 40 epoch_id: 9; iter_id: 0 epoch_id: 9; iter_id: 1 epoch_id: 9; iter_id: 2 epoch_id: 9; iter_id: 3 epoch_id: 9; iter_id: 4 epoch_id: 9; iter_id: 5 epoch_id: 9; iter_id: 6 epoch_id: 9; iter_id: 7 epoch_id: 9; iter_id: 8 epoch_id: 9; iter_id: 9 epoch_id: 9; iter_id: 10 epoch_id: 9; iter_id: 11 epoch_id: 9; iter_id: 12 epoch_id: 9; iter_id: 13 epoch_id: 9; iter_id: 14 epoch_id: 9; iter_id: 15 epoch_id: 9; iter_id: 16 epoch_id: 9; iter_id: 17 epoch_id: 9; iter_id: 18 epoch_id: 9; iter_id: 19 epoch_id: 9; iter_id: 20 epoch_id: 9; iter_id: 21 epoch_id: 9; iter_id: 22 epoch_id: 9; iter_id: 23 epoch_id: 9; iter_id: 24 epoch_id: 9; iter_id: 25 epoch_id: 9; iter_id: 26 epoch_id: 9; iter_id: 27 epoch_id: 9; iter_id: 28 epoch_id: 9; iter_id: 29 epoch_id: 9; iter_id: 30 epoch_id: 9; iter_id: 31 epoch_id: 9; iter_id: 32 epoch_id: 9; iter_id: 33 epoch_id: 9; iter_id: 34 epoch_id: 9; iter_id: 35 epoch_id: 9; iter_id: 36 epoch_id: 9; iter_id: 37 epoch_id: 9; iter_id: 38 epoch_id: 9; iter_id: 39 epoch_id: 9; iter_id: 40
class Network(object):
def __init__(self, num_of_weights):
np.random.seed(0)
self.w = np.random.randn(num_of_weights,1 )
self.b = 0.
def forward(self, x):
"""
Calculate y^ as z, a vector
"""
z = np.dot(x, self.w) + self.b
return z
def loss(self, z, y):
"""
(1/N)(y - y^)**2
or
(y-y^)T (y-y^)
return a scaler
"""
error = z - y
cost = error * error
return np.mean(cost)
def gradient(self, x, y):
z = net.forward(x)
gradient_w = np.dot( x.T, (z-y) ) / len(x)
gradient_b = np.mean(z-y)
return gradient_w, gradient_b
def update(self, gradient_w, gradient_b, learning_rate = 0.01):
self.w = self.w - gradient_w * learning_rate
self.b = self.b - gradient_b * learning_rate
def train(self, train_data, num_epochs = 100, batch_size = 10, learning_rate = 0.01):
losses = []
n = len(train_data)
for epoch_id in range(num_epochs):
np.random.shuffle(train_data)
mini_batches = [train_data[k:k+batch_size] for k in range(0, n, batch_size)]
for iter_id, mini_batch in enumerate(mini_batches):
x = mini_batch[:,:-1]
y = mini_batch[:, -1:]
z = self.forward(x)
los = self.loss(y, z)
gradient_w, gradient_b = self.gradient(x, y)
self.update(gradient_w, gradient_b, learning_rate)
losses.append(los)
print("Epoch {:3d} / iter {:3d}, loss = {:4f}".\
format(epoch_id, iter_id, los))
return losses
# 获取数据
train_data, test_data = load_data()
# 创建网络
net = Network(13)
# 启动训练
losses = net.train(train_data, num_epochs=50, batch_size=100, learning_rate=0.1)
Epoch 0 / iter 0, loss = 10.135404 Epoch 0 / iter 1, loss = 3.828966 Epoch 0 / iter 2, loss = 1.920760 Epoch 0 / iter 3, loss = 1.774041 Epoch 0 / iter 4, loss = 0.336600 Epoch 1 / iter 0, loss = 1.627542 Epoch 1 / iter 1, loss = 0.984200 Epoch 1 / iter 2, loss = 0.912056 Epoch 1 / iter 3, loss = 0.997115 Epoch 1 / iter 4, loss = 0.607135 Epoch 2 / iter 0, loss = 0.768925 Epoch 2 / iter 1, loss = 0.854743 Epoch 2 / iter 2, loss = 0.942834 Epoch 2 / iter 3, loss = 0.979108 Epoch 2 / iter 4, loss = 0.460793 Epoch 3 / iter 0, loss = 0.614814 Epoch 3 / iter 1, loss = 0.908525 Epoch 3 / iter 2, loss = 0.677130 Epoch 3 / iter 3, loss = 0.777015 Epoch 3 / iter 4, loss = 0.546766 Epoch 4 / iter 0, loss = 0.968059 Epoch 4 / iter 1, loss = 0.626840 Epoch 4 / iter 2, loss = 0.526811 Epoch 4 / iter 3, loss = 0.489395 Epoch 4 / iter 4, loss = 0.301553 Epoch 5 / iter 0, loss = 0.823272 Epoch 5 / iter 1, loss = 0.490497 Epoch 5 / iter 2, loss = 0.420443 Epoch 5 / iter 3, loss = 0.462731 Epoch 5 / iter 4, loss = 0.653089 Epoch 6 / iter 0, loss = 0.428302 Epoch 6 / iter 1, loss = 0.435469 Epoch 6 / iter 2, loss = 0.498579 Epoch 6 / iter 3, loss = 0.549377 Epoch 6 / iter 4, loss = 0.229211 Epoch 7 / iter 0, loss = 0.466117 Epoch 7 / iter 1, loss = 0.359503 Epoch 7 / iter 2, loss = 0.581241 Epoch 7 / iter 3, loss = 0.313703 Epoch 7 / iter 4, loss = 0.248705 Epoch 8 / iter 0, loss = 0.460996 Epoch 8 / iter 1, loss = 0.356321 Epoch 8 / iter 2, loss = 0.489876 Epoch 8 / iter 3, loss = 0.234724 Epoch 8 / iter 4, loss = 0.057017 Epoch 9 / iter 0, loss = 0.394685 Epoch 9 / iter 1, loss = 0.348599 Epoch 9 / iter 2, loss = 0.276296 Epoch 9 / iter 3, loss = 0.374864 Epoch 9 / iter 4, loss = 0.461231 Epoch 10 / iter 0, loss = 0.432867 Epoch 10 / iter 1, loss = 0.240709 Epoch 10 / iter 2, loss = 0.401436 Epoch 10 / iter 3, loss = 0.169868 Epoch 10 / iter 4, loss = 2.368112 Epoch 11 / iter 0, loss = 0.485272 Epoch 11 / iter 1, loss = 0.342551 Epoch 11 / iter 2, loss = 0.233444 Epoch 11 / iter 3, loss = 0.233394 Epoch 11 / iter 4, loss = 1.267675 Epoch 12 / iter 0, loss = 0.345757 Epoch 12 / iter 1, loss = 0.212655 Epoch 12 / iter 2, loss = 0.241559 Epoch 12 / iter 3, loss = 0.367463 Epoch 12 / iter 4, loss = 0.052459 Epoch 13 / iter 0, loss = 0.319378 Epoch 13 / iter 1, loss = 0.171210 Epoch 13 / iter 2, loss = 0.279699 Epoch 13 / iter 3, loss = 0.133398 Epoch 13 / iter 4, loss = 0.105347 Epoch 14 / iter 0, loss = 0.222456 Epoch 14 / iter 1, loss = 0.227293 Epoch 14 / iter 2, loss = 0.205279 Epoch 14 / iter 3, loss = 0.216202 Epoch 14 / iter 4, loss = 0.042827 Epoch 15 / iter 0, loss = 0.166392 Epoch 15 / iter 1, loss = 0.279883 Epoch 15 / iter 2, loss = 0.216810 Epoch 15 / iter 3, loss = 0.173843 Epoch 15 / iter 4, loss = 0.048404 Epoch 16 / iter 0, loss = 0.198102 Epoch 16 / iter 1, loss = 0.137621 Epoch 16 / iter 2, loss = 0.266528 Epoch 16 / iter 3, loss = 0.206751 Epoch 16 / iter 4, loss = 0.118105 Epoch 17 / iter 0, loss = 0.226176 Epoch 17 / iter 1, loss = 0.183152 Epoch 17 / iter 2, loss = 0.173258 Epoch 17 / iter 3, loss = 0.195262 Epoch 17 / iter 4, loss = 0.019476 Epoch 18 / iter 0, loss = 0.146372 Epoch 18 / iter 1, loss = 0.202434 Epoch 18 / iter 2, loss = 0.125485 Epoch 18 / iter 3, loss = 0.267111 Epoch 18 / iter 4, loss = 0.010109 Epoch 19 / iter 0, loss = 0.149863 Epoch 19 / iter 1, loss = 0.233999 Epoch 19 / iter 2, loss = 0.118887 Epoch 19 / iter 3, loss = 0.212671 Epoch 19 / iter 4, loss = 0.051640 Epoch 20 / iter 0, loss = 0.153007 Epoch 20 / iter 1, loss = 0.186747 Epoch 20 / iter 2, loss = 0.183877 Epoch 20 / iter 3, loss = 0.131633 Epoch 20 / iter 4, loss = 0.948540 Epoch 21 / iter 0, loss = 0.233290 Epoch 21 / iter 1, loss = 0.144034 Epoch 21 / iter 2, loss = 0.147718 Epoch 21 / iter 3, loss = 0.147037 Epoch 21 / iter 4, loss = 0.076659 Epoch 22 / iter 0, loss = 0.164341 Epoch 22 / iter 1, loss = 0.189352 Epoch 22 / iter 2, loss = 0.132811 Epoch 22 / iter 3, loss = 0.135993 Epoch 22 / iter 4, loss = 0.061111 Epoch 23 / iter 0, loss = 0.094440 Epoch 23 / iter 1, loss = 0.178488 Epoch 23 / iter 2, loss = 0.221989 Epoch 23 / iter 3, loss = 0.106098 Epoch 23 / iter 4, loss = 0.126764 Epoch 24 / iter 0, loss = 0.171267 Epoch 24 / iter 1, loss = 0.134297 Epoch 24 / iter 2, loss = 0.126803 Epoch 24 / iter 3, loss = 0.147202 Epoch 24 / iter 4, loss = 0.170803 Epoch 25 / iter 0, loss = 0.125693 Epoch 25 / iter 1, loss = 0.166324 Epoch 25 / iter 2, loss = 0.141285 Epoch 25 / iter 3, loss = 0.121424 Epoch 25 / iter 4, loss = 0.220350 Epoch 26 / iter 0, loss = 0.173772 Epoch 26 / iter 1, loss = 0.133839 Epoch 26 / iter 2, loss = 0.124130 Epoch 26 / iter 3, loss = 0.115211 Epoch 26 / iter 4, loss = 0.053037 Epoch 27 / iter 0, loss = 0.138911 Epoch 27 / iter 1, loss = 0.127609 Epoch 27 / iter 2, loss = 0.148968 Epoch 27 / iter 3, loss = 0.106497 Epoch 27 / iter 4, loss = 0.150177 Epoch 28 / iter 0, loss = 0.107684 Epoch 28 / iter 1, loss = 0.103500 Epoch 28 / iter 2, loss = 0.143542 Epoch 28 / iter 3, loss = 0.163717 Epoch 28 / iter 4, loss = 0.009051 Epoch 29 / iter 0, loss = 0.147101 Epoch 29 / iter 1, loss = 0.123633 Epoch 29 / iter 2, loss = 0.101835 Epoch 29 / iter 3, loss = 0.124129 Epoch 29 / iter 4, loss = 0.026591 Epoch 30 / iter 0, loss = 0.102968 Epoch 30 / iter 1, loss = 0.113417 Epoch 30 / iter 2, loss = 0.176287 Epoch 30 / iter 3, loss = 0.091736 Epoch 30 / iter 4, loss = 0.043736 Epoch 31 / iter 0, loss = 0.121219 Epoch 31 / iter 1, loss = 0.115929 Epoch 31 / iter 2, loss = 0.084310 Epoch 31 / iter 3, loss = 0.148139 Epoch 31 / iter 4, loss = 0.065668 Epoch 32 / iter 0, loss = 0.140297 Epoch 32 / iter 1, loss = 0.141763 Epoch 32 / iter 2, loss = 0.095136 Epoch 32 / iter 3, loss = 0.083955 Epoch 32 / iter 4, loss = 0.014014 Epoch 33 / iter 0, loss = 0.121495 Epoch 33 / iter 1, loss = 0.088256 Epoch 33 / iter 2, loss = 0.139284 Epoch 33 / iter 3, loss = 0.100261 Epoch 33 / iter 4, loss = 0.068345 Epoch 34 / iter 0, loss = 0.128987 Epoch 34 / iter 1, loss = 0.074511 Epoch 34 / iter 2, loss = 0.085662 Epoch 34 / iter 3, loss = 0.151447 Epoch 34 / iter 4, loss = 0.030165 Epoch 35 / iter 0, loss = 0.100365 Epoch 35 / iter 1, loss = 0.122673 Epoch 35 / iter 2, loss = 0.116623 Epoch 35 / iter 3, loss = 0.096626 Epoch 35 / iter 4, loss = 0.088665 Epoch 36 / iter 0, loss = 0.104467 Epoch 36 / iter 1, loss = 0.092787 Epoch 36 / iter 2, loss = 0.122183 Epoch 36 / iter 3, loss = 0.109857 Epoch 36 / iter 4, loss = 0.057577 Epoch 37 / iter 0, loss = 0.102155 Epoch 37 / iter 1, loss = 0.103558 Epoch 37 / iter 2, loss = 0.077922 Epoch 37 / iter 3, loss = 0.114175 Epoch 37 / iter 4, loss = 0.411361 Epoch 38 / iter 0, loss = 0.102730 Epoch 38 / iter 1, loss = 0.098086 Epoch 38 / iter 2, loss = 0.136757 Epoch 38 / iter 3, loss = 0.094591 Epoch 38 / iter 4, loss = 0.042419 Epoch 39 / iter 0, loss = 0.093660 Epoch 39 / iter 1, loss = 0.139435 Epoch 39 / iter 2, loss = 0.089581 Epoch 39 / iter 3, loss = 0.070793 Epoch 39 / iter 4, loss = 0.119657 Epoch 40 / iter 0, loss = 0.072520 Epoch 40 / iter 1, loss = 0.134115 Epoch 40 / iter 2, loss = 0.075815 Epoch 40 / iter 3, loss = 0.108410 Epoch 40 / iter 4, loss = 0.013648 Epoch 41 / iter 0, loss = 0.077906 Epoch 41 / iter 1, loss = 0.089782 Epoch 41 / iter 2, loss = 0.094927 Epoch 41 / iter 3, loss = 0.111411 Epoch 41 / iter 4, loss = 0.027549 Epoch 42 / iter 0, loss = 0.090842 Epoch 42 / iter 1, loss = 0.062995 Epoch 42 / iter 2, loss = 0.096689 Epoch 42 / iter 3, loss = 0.117111 Epoch 42 / iter 4, loss = 0.038014 Epoch 43 / iter 0, loss = 0.103646 Epoch 43 / iter 1, loss = 0.095863 Epoch 43 / iter 2, loss = 0.100038 Epoch 43 / iter 3, loss = 0.058507 Epoch 43 / iter 4, loss = 0.042606 Epoch 44 / iter 0, loss = 0.110850 Epoch 44 / iter 1, loss = 0.103690 Epoch 44 / iter 2, loss = 0.068799 Epoch 44 / iter 3, loss = 0.067427 Epoch 44 / iter 4, loss = 0.009928 Epoch 45 / iter 0, loss = 0.095442 Epoch 45 / iter 1, loss = 0.069813 Epoch 45 / iter 2, loss = 0.078124 Epoch 45 / iter 3, loss = 0.100712 Epoch 45 / iter 4, loss = 0.042269 Epoch 46 / iter 0, loss = 0.090912 Epoch 46 / iter 1, loss = 0.082110 Epoch 46 / iter 2, loss = 0.073073 Epoch 46 / iter 3, loss = 0.082452 Epoch 46 / iter 4, loss = 0.273121 Epoch 47 / iter 0, loss = 0.089612 Epoch 47 / iter 1, loss = 0.086334 Epoch 47 / iter 2, loss = 0.093325 Epoch 47 / iter 3, loss = 0.075305 Epoch 47 / iter 4, loss = 0.010612 Epoch 48 / iter 0, loss = 0.064365 Epoch 48 / iter 1, loss = 0.100471 Epoch 48 / iter 2, loss = 0.068530 Epoch 48 / iter 3, loss = 0.083644 Epoch 48 / iter 4, loss = 0.191271 Epoch 49 / iter 0, loss = 0.089814 Epoch 49 / iter 1, loss = 0.061669 Epoch 49 / iter 2, loss = 0.077089 Epoch 49 / iter 3, loss = 0.089054 Epoch 49 / iter 4, loss = 0.036948
plot_x = np.arange(len(losses))
plot_y = np.array(losses)
plt.plot(plot_x, plot_y)
plt.show()
net.w
array([[ 1.58905846], [ 0.44158883], [-0.10662307], [ 0.46245184], [ 0.73913199], [-0.4717203 ], [-0.06738263], [ 0.08392057], [-0.47044925], [-0.21357679], [-0.04075396], [ 1.06631383], [-0.27723575]])
np.save('w_and_b.npy', [net.w, net.b])
/Users/mie/opt/anaconda3/lib/python3.9/site-packages/numpy/lib/npyio.py:528: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. arr = np.asanyarray(arr)