# 神经网络的学习

• 对于一般的机器学习，机器会根据样本的“特征”来调整参数，而特征是由人来设置的。
• 对于神经网络，我们直接将整个样本输入到网络，无需设置任何特征。

## 损失函数

### 均方误差

$E = \frac{1}{2} \sum_k (y_k - t_k)^2$
import numpy as np

def mean_squared_error(y, t):
return 0.5 * np.sum((y-t)**2)

y1 = np.array([0.1,0.7,0.2])
y2 = np.array([0.1,0.5,0.4])
t = np.array([0,1,0])

[mean_squared_error(t,t),
mean_squared_error(y1,t),
mean_squared_error(y2,t)]

OUTPUT
[0.0, 0.07000000000000002, 0.21000000000000002]


### 交叉熵误差

$E=-\sum_k t_k \log y_k$

batch 学习的交叉熵误差：

$E=-\frac{1}{N} \sum_n \sum_k t_{n,k} \log y_{n,k}$

import numpy as np

def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)

# 监督数据是one-hot-vector的情况下，转换为正确解标签的索引
if t.size == y.size:
t = t.argmax(axis=1)

batch_size = y.shape[0]

#加上微小值防止出现负无穷
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

y1 = np.array([0.1,0.7,0.2])
y2 = np.array([0.1,0.5,0.4])
t = np.array([0,1,0])

[cross_entropy_error(t,t),
cross_entropy_error(y1,t),
cross_entropy_error(y2,t)]

OUTPUT
[-9.999999505838704e-08, 0.3566748010815999, 0.6931469805599654]


## 梯度

$w_0' = w_0 - \eta\frac{\partial L(f(x,w))}{\partial w_0}$

$\eta$ 称为学习率

def numerical_gradient(f, x): #笨方法求梯度
h = 1e-4 # 0.0001

it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)

x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h) #取相近两点求斜率

x[idx] = tmp_val # 还原值
it.iternext()


import numpy as np

class simpleNet:
def __init__(self):
self.W = np.random.randn(2,3) #随机生成参数

def predict(self, x): #预测
return np.dot(x, self.W)

def loss(self, x, t): #计算误差
z = self.predict(x)
y = softmax(z)
loss = cross_entropy_error(y, t)

return loss

x = np.array([0.6, 0.9])
t = np.array([0, 0, 1])

net = simpleNet()

f = lambda w: net.loss(x, t)
dW = numerical_gradient(f, net.W)

print(net.loss(x,t))
print(dW)

OUTPUT
0.2698596081175344
[[ 0.11629091  0.02561713 -0.14190804]
[ 0.17443636  0.03842569 -0.21286206]]


eta = 1

print(net.W)
print(net.loss(x,t))

for i in range(10): #利用梯度下降法更新十次参数
net.W = net.W - eta*numerical_gradient(f, net.W)

print(net.W)
print(net.loss(x,t))

OUTPUT
[[-0.52760346 -0.60826468  0.14784348]
[-0.53011149 -2.15726367  0.54289599]]
0.2698596081175344
[[-0.98917483 -0.74641069  0.74756086]
[-1.22246854 -2.36448269  1.44247207]]
0.044317052837686284


# 回到MNIST数据集

TwoLayerNet
Training
OUTPUT
train acc, test acc | 0.09863333333333334, 0.0958
train acc, test acc | 0.7995166666666667, 0.8065
train acc, test acc | 0.8767, 0.8792
train acc, test acc | 0.898, 0.9016
train acc, test acc | 0.9086333333333333, 0.9114
train acc, test acc | 0.9147666666666666, 0.9157
train acc, test acc | 0.91985, 0.9204
train acc, test acc | 0.9245, 0.9257
train acc, test acc | 0.9285666666666667, 0.93
train acc, test acc | 0.9315, 0.9325
train acc, test acc | 0.93455, 0.9357
train acc, test acc | 0.9379166666666666, 0.9383
train acc, test acc | 0.94035, 0.9403
train acc, test acc | 0.9420166666666666, 0.9408
train acc, test acc | 0.9448666666666666, 0.9442
train acc, test acc | 0.9472833333333334, 0.9468
train acc, test acc | 0.9488166666666666, 0.947