# 神经网络

## 从感知机到神经网络

• 输入层
• 中间层（隐藏层）
• 输出层

$y=h(a)\\ a=b+w_1x_1+w_2x_2\\ h(x)=\begin{cases} 0 & x\leq 0\\ 1 & x > 0 \end{cases}$

## 常见的激活函数

### 阶跃函数

$h(x)=\begin{cases} 0 & x\leq 0\\ 1 & x > 0 \end{cases}$
import numpy as np
import matplotlib.pylab as plt

def step_function(x):
return np.array(x > 0, dtype=np.int)

X = np.arange(-5.0, 5.0, 0.1)
Y = step_function(X)
plt.plot(X, Y)
plt.ylim(-0.1, 1.1)  # 指定图中绘制的y轴的范围
plt.show()


### sigmoid 函数

$h(x)=\frac{1}{1+\exp(-x)}$
import numpy as np
import matplotlib.pylab as plt

def sigmoid(x):
return 1 / (1 + np.exp(-x))

X = np.arange(-5.0, 5.0, 0.1)
Y = sigmoid(X)
plt.plot(X, Y)
plt.ylim(-0.1, 1.1)
plt.show()


### ReLU函数（Rectified Linear Unit）

$h(x)=\begin{cases} 0 & x\leq 0\\ x & x > 0 \end{cases}$
import numpy as np
import matplotlib.pylab as plt

def relu(x):
return np.maximum(0, x)

x = np.arange(-5.0, 5.0, 0.1)
y = relu(x)
plt.plot(x, y)
plt.ylim(-1.0, 5.5)
plt.show()


## 利用矩阵乘法实现神经网络

$y_1=1x_1+2x_2\\ y_2=3x_1+4x_2\\ y_3=5x_1+6x_3\\$

\begin{align} Y&=X \cdot\; W\\ \begin{bmatrix} y_1 & y_2 & y_3 \end{bmatrix}&= \begin{bmatrix} x_1 & x_2 \end{bmatrix}\cdot \begin{bmatrix} 1 & 3 & 5\\ 2 & 4 & 6 \end{bmatrix}\\ 3&=2 \cdot\; 2\times 3 \end{align}

$Y=h(A)=h(XW+B)$

a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)

a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)

a3 = np.dot(z2, W3) + b3
y = softmax(a3)


## 输出层

$y_k = \frac{\exp(a_k)}{\sum_{i=1}^n \exp(a_i)}$

（使用指数函数是为了使负数 $a_i$ 也可以用于计算概率）

$y_k = \frac{\exp(a_k-C)}{\sum_{i=1}^n \exp(a_i-C)}$
import numpy as np

a=np.array([1010,1000,990])
np.exp(a)/np.sum(np.exp(a)) #出现了指数爆炸

OUTPUT:
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:5: RuntimeWarning: overflow encountered in exp
"""
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in true_divide
"""

array([nan, nan, nan])

c=np.max(a)
np.exp(a-c)/np.sum(np.exp(a-c)) #防止指数爆炸

OUTPUT:
array([9.99954600e-01, 4.53978686e-05, 2.06106005e-09])

def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T

x = x - np.max(x) # 溢出对策
return np.exp(x) / np.sum(np.exp(x))


# 开始搭建神经网络

## 手写数字识别

MNIST 手写数字图像集是机器学习最常用的数据集之一。有句话说，如果你的网络在 MNIST 上跑不过，那就别指望它能用。

• normalize=True 正规化，即单个像素是 0~255（False） 还是 0~1（True）
• flatten=True 一维化，即是一张 28x28 的图片，还是一张 1x784 的长线
• one_hot_label=False one-hot标签，即标签是 0~9，还是 [0,0,1,0,0,0,0,0,0,0]

tf.keras.datasets.mnist 也有 load_minst() 函数，但它不会对数据做任何处理。

import os
# import tensorflow as tf
import matplotlib.pyplot as plt

# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# tf.compat.v1.enable_eager_execution()

# print("TensorFlow Version:\t", tf.__version__)

## 加载 MNIST
# mnist = tf.keras.datasets.mnist

## 读入数据
(x_train, y_train), (x_test, y_test) = load_mnist(flatten=False, normalize=True)

print(x_train.shape,
y_train.shape,
x_test.shape,
y_test.shape
) # (60000, 1, 28, 28) (60000,) (10000, 1, 28, 28) (10000,)

fig, ax = plt.subplots(nrows=5, ncols=5, sharex='all', sharey='all')
ax = ax.flatten()
## 读取前 25 张图
for i in range(25):
img = x_train[i].reshape(28, 28)
ax[i].set_title(y_train[i])
ax[i].imshow(img, cmap='Greys', interpolation='nearest')
ax[0].set_xticks([])
ax[0].set_yticks([])
plt.tight_layout()
plt.show()

OUTPUT:
(60000, 1, 28, 28) (60000,) (10000, 1, 28, 28) (10000,)


• 输入 784
• 隐藏层1 50个神经元 784x50
• 隐藏层2 100个神经元 50x100
• 输出 100x10
def predict(network, x):
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']

a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2, W3) + b3
y = softmax(a3)

return y


(x_train, y_train), (x_test, y_test) = load_mnist(flatten=True, normalize=True, one_hot_label=False)

network={}
network['W1']=np.random.rand(784, 50)
network['W2']=np.random.rand(50, 100)
network['W3']=np.random.rand(100, 10)
network['b1']=np.random.rand(50)
network['b2']=np.random.rand(100)
network['b3']=np.random.rand(10)

accuracy_cnt = 0
for i in range(len(x_test)):
y = predict(network, x_test[i])
p= np.argmax(y) # 获取概率最高的元素的索引
if p == y_test[i]:
accuracy_cnt += 1

print("Accuracy:" + str(float(accuracy_cnt) / len(x_test)))


OUTPUT:
Accuracy:0.0958


batch_size = 100 # 批数量
accuracy_cnt = 0

for i in range(0, len(x_test), batch_size):
x_batch = x_test[i:i+batch_size]
y_batch = predict(network, x_batch)
p = np.argmax(y_batch, axis=1)
accuracy_cnt += np.sum(p == y_test[i:i+batch_size])

print("Accuracy:" + str(float(accuracy_cnt) / len(x_test)))

OUTPUT:
Accuracy:0.0958