1、導(dǎo)入頭文件
import numpy as np
2、前向傳播函數(shù)
- x:包含輸入數(shù)據(jù)的numpy數(shù)組,形狀為(N,d_1,...,d_k)
- w:形狀為(D,M)的一系列權(quán)重
- b:偏置,形狀為(M,)
def affine_forward(x, w, b):
out = None # 初始化返回值為None
N = x.shape[0] # 重置輸入?yún)?shù)X的形狀
x_row = x.reshape(N, -1) # (N,D)
out = np.dot(x_row, w) + b # (N,M)
cache = (x, w, b) # 緩存值,反向傳播時使用
return out,cache
3、 反向傳播函數(shù)
- x:包含輸入數(shù)據(jù)的numpy數(shù)組,形狀為(N,d_1,...,d_k)
- w:形狀(D,M)的一系列權(quán)重
- b:偏置,形狀為(M,)
def affine_backward(dout, cache):
x, w, b = cache # 讀取緩存
dx, dw, db = None, None, None # 返回值初始化
dx = np.dot(dout, w.T) # (N,D)
dx = np.reshape(dx, x.shape) # (N,d1,...,d_k)
x_row = x.reshape(x.shape[0], -1) # (N,D)
dw = np.dot(x_row.T, dout) # (D,M)
db = np.sum(dout, axis=0, keepdims=True) # (1,M)
return dx, dw, db
4、兩層神經(jīng)網(wǎng)絡(luò)
def two_layer_netWork():
# 參數(shù)初始化
X = np.array([[2, 1],
[-1, 1],
[-1, -1],
[1, -1]]) # 用于訓(xùn)練的坐標(biāo),對應(yīng)的是I、II、III、IV象限
t = np.array([0, 1, 2, 3]) # 標(biāo)簽,對應(yīng)的是I、II、III、IV象限
np.random.seed(1) # 有這行語句,你們生成的隨機數(shù)就和我一樣了
# 一些初始化參數(shù)
input_dim = X.shape[1] # 輸入?yún)?shù)的維度,此處為2,即每個坐標(biāo)用兩個數(shù)表示
num_classes = t.shape[0] # 輸出參數(shù)的維度,此處為4,即最終分為四個象限
hidden_dim = 50 # 隱藏層維度,為可調(diào)參數(shù)
reg = 0.001 # 正則化強度,為可調(diào)參數(shù)
epsilon = 0.001 # 梯度下降的學(xué)習(xí)率,為可調(diào)參數(shù)
# 初始化W1,W2,b1,b2
W1 = np.random.randn(input_dim, hidden_dim) # (2,50)
W2 = np.random.randn(hidden_dim, num_classes) # (50,4)
b1 = np.zeros((1, hidden_dim)) # (1,50)
b2 = np.zeros((1, num_classes)) # (1,4)
# 訓(xùn)練與迭代
for j in range(10000): # 這里設(shè)置了訓(xùn)練的循環(huán)次數(shù)為10000
# ①前向傳播
H, fc_cache = affine_forward(X, W1, b1) # 第一層前向傳播
H = np.maximum(0, H) # 激活
relu_cache = H # 緩存第一層激活后的結(jié)果
Y, cachey = affine_forward(H, W2, b2) # 第二層前向傳播
# ②Softmax層計算
probs = np.exp(Y - np.max(Y, axis=1, keepdims=True))
probs /= np.sum(probs, axis=1, keepdims=True) # Softmax算法實現(xiàn)
# ③計算loss值
N = Y.shape[0] # 值為4
print(probs[np.arange(N), t]) # 打印各個數(shù)據(jù)的正確解標(biāo)簽對應(yīng)的神經(jīng)網(wǎng)絡(luò)的輸出
loss = -np.sum(np.log(probs[np.arange(N), t])) / N # 計算loss
print("loss的值為:%f" % (loss)) # 打印loss
# ④反向傳播
dx = probs.copy() # 以Softmax輸出結(jié)果作為反向輸出的起點
dx[np.arange(N), t] -= 1 #
dx /= N # 到這里是反向傳播到softmax前
dh1, dW2, db2 = affine_backward(dx, cachey) # 反向傳播至第二層前
dh1[relu_cache <= 0] = 0 # 反向傳播至激活層前
dX, dW1, db1 = affine_backward(dh1, fc_cache) # 反向傳播至第一層前
# ⑤參數(shù)更新
dW2 += reg * W2
dW1 += reg * W1
W2 += -epsilon * dW2
b2 += -epsilon * db2
W1 += -epsilon * dW1
b1 += -epsilon * db1
# 驗證
# 驗證方法:訓(xùn)練時的正向傳播的過程基本一致,
# 即第一層網(wǎng)絡(luò)線性計算→激活→第二層網(wǎng)絡(luò)線性計算→Softmax→得到分類結(jié)果。
test = np.array([[2, 2], [-2, 2], [-2, -2], [2, -2]])
H, fc_cache = affine_forward(test, W1, b1) # 仿射
H = np.maximum(0, H) # 激活
relu_cache = H
Y, cachey = affine_forward(H, W2, b2) # 仿射
# Softmax
probs = np.exp(Y - np.max(Y, axis=1, keepdims=True))
probs /= np.sum(probs, axis=1, keepdims=True) # Softmax
print(probs)
for k in range(4):
print(test[k, :], "所在的象限為", np.argmax(probs[k, :]) + 1)
5、結(jié)果
[0.99929731 0.99738312 0.99416875 0.99332853]
loss的值為:0.003966
[[9.99999965e-01 2.60924736e-09 3.25828271e-08 4.07725468e-15]
[1.37643661e-05 9.99909902e-01 7.63013397e-05 3.24322873e-08]
[1.19985338e-12 2.28178352e-06 9.99915510e-01 8.22077698e-05]
[4.18966184e-07 1.07713698e-05 2.83298111e-05 9.99960480e-01]]
[2 2] 所在的象限為 1
[-2 2] 所在的象限為 2
[-2 -2] 所在的象限為 3
[ 2 -2] 所在的象限為 4