mirror of
https://github.com/apachecn/ailearning.git
synced 2026-02-10 05:45:40 +08:00
261 lines
5.8 KiB
Markdown
261 lines
5.8 KiB
Markdown
# Theano 实例:卷积神经网络
|
||
|
||
In [1]:
|
||
|
||
```py
|
||
import theano
|
||
import theano.tensor as T
|
||
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
|
||
import numpy as np
|
||
from load import mnist
|
||
|
||
srng = RandomStreams()
|
||
|
||
```
|
||
|
||
```py
|
||
Using gpu device 1: Tesla C2075 (CNMeM is disabled)
|
||
|
||
```
|
||
|
||
从前一节导入有用的函数:
|
||
|
||
In [2]:
|
||
|
||
```py
|
||
def floatX(X):
|
||
return np.asarray(X, dtype=theano.config.floatX)
|
||
|
||
def init_weights(shape):
|
||
return theano.shared(floatX(np.random.randn(*shape) * 0.01))
|
||
|
||
def rectify(X):
|
||
return T.maximum(X, 0.)
|
||
|
||
def softmax(X):
|
||
e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
|
||
return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
|
||
|
||
def dropout(X, p=0.):
|
||
if p > 0:
|
||
retain_prob = 1 - p
|
||
X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
|
||
X /= retain_prob
|
||
return X
|
||
|
||
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
|
||
grads = T.grad(cost=cost, wrt=params)
|
||
updates = []
|
||
for p, g in zip(params, grads):
|
||
acc = theano.shared(p.get_value() * 0.)
|
||
acc_new = rho * acc + (1 - rho) * g ** 2
|
||
gradient_scaling = T.sqrt(acc_new + epsilon)
|
||
g = g / gradient_scaling
|
||
updates.append((acc, acc_new))
|
||
updates.append((p, p - lr * g))
|
||
return updates
|
||
|
||
```
|
||
|
||
与前一节不同,我们使用卷积神经网络来实现这次的模型,为此,我们需要导入 2 维的卷积和池化函数:
|
||
|
||
In [3]:
|
||
|
||
```py
|
||
from theano.tensor.nnet.conv import conv2d
|
||
from theano.tensor.signal.downsample import max_pool_2d
|
||
|
||
```
|
||
|
||
`conv2d` 函数接受两个输入:
|
||
|
||
* 对应输入的 `4D` 张量,其形状如下:
|
||
|
||
`[mini-batch size, number of feature maps at layer m-1, image height, image width]`
|
||
|
||
* 对应参数矩阵的 `4D` 张量,其形状如下:
|
||
|
||
`[number of feature maps at layer m, number of feature maps at layer m-1, filter height, filter width]`
|
||
|
||
为了对图像使用卷积,我们需要将图像转化为原始的 `28 × 28` 大小,同时添加一维表示图像的通道数(黑白图像为 1):
|
||
|
||
In [4]:
|
||
|
||
```py
|
||
trX, teX, trY, teY = mnist(onehot=True)
|
||
|
||
trX = trX.reshape(-1, 1, 28, 28)
|
||
teX = teX.reshape(-1, 1, 28, 28)
|
||
|
||
```
|
||
|
||
注意,对于 `reshape` 方法,传入的参数是 `-1` 表示该维的维度将根据其他参数自动计算。
|
||
|
||
模型首先进行三层卷积加池化操作,然后在第三层的输出中加一个全连结层,最后在第四层加上一个 `softmax` 层:
|
||
|
||
In [5]:
|
||
|
||
```py
|
||
def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden):
|
||
|
||
# X: 128 * 1 * 28 * 28
|
||
# w: 32 * 1 * 3 * 3
|
||
# full mode
|
||
# l1a: 128 * 32 * (28 + 3 - 1) * (28 + 3 - 1)
|
||
l1a = rectify(conv2d(X, w, border_mode='full'))
|
||
# l1a: 128 * 32 * 30 * 30
|
||
# ignore_border False
|
||
# l1: 128 * 32 * (30 / 2) * (30 / 2)
|
||
l1 = max_pool_2d(l1a, (2, 2), ignore_border=False)
|
||
l1 = dropout(l1, p_drop_conv)
|
||
|
||
# l1: 128 * 32 * 15 * 15
|
||
# w2: 64 * 32 * 3 * 3
|
||
# valid mode
|
||
# l2a: 128 * 64 * (15 - 3 + 1) * (15 - 3 + 1)
|
||
l2a = rectify(conv2d(l1, w2))
|
||
# l2a: 128 * 64 * 13 * 13
|
||
# l2: 128 * 64 * (13 / 2 + 1) * (13 / 2 + 1)
|
||
l2 = max_pool_2d(l2a, (2, 2), ignore_border=False)
|
||
l2 = dropout(l2, p_drop_conv)
|
||
|
||
# l2: 128 * 64 * 7 * 7
|
||
# w3: 128 * 64 * 3 * 3
|
||
# l3a: 128 * 128 * (7 - 3 + 1) * (7 - 3 + 1)
|
||
l3a = rectify(conv2d(l2, w3))
|
||
# l3a: 128 * 128 * 5 * 5
|
||
# l3b: 128 * 128 * (5 / 2 + 1) * (5 / 2 + 1)
|
||
l3b = max_pool_2d(l3a, (2, 2), ignore_border=False)
|
||
# l3b: 128 * 128 * 3 * 3
|
||
# l3: 128 * (128 * 3 * 3)
|
||
l3 = T.flatten(l3b, outdim=2)
|
||
l3 = dropout(l3, p_drop_conv)
|
||
|
||
# l3: 128 * (128 * 3 * 3)
|
||
# w4: (128 * 3 * 3) * 625
|
||
# l4: 128 * 625
|
||
l4 = rectify(T.dot(l3, w4))
|
||
l4 = dropout(l4, p_drop_hidden)
|
||
|
||
# l5: 128 * 625
|
||
# w5: 625 * 10
|
||
# pyx: 128 * 10
|
||
pyx = softmax(T.dot(l4, w_o))
|
||
return l1, l2, l3, l4, pyx
|
||
|
||
```
|
||
|
||
定义符号变量:
|
||
|
||
In [6]:
|
||
|
||
```py
|
||
X = T.ftensor4()
|
||
Y = T.fmatrix()
|
||
|
||
w = init_weights((32, 1, 3, 3))
|
||
w2 = init_weights((64, 32, 3, 3))
|
||
w3 = init_weights((128, 64, 3, 3))
|
||
w4 = init_weights((128 * 3 * 3, 625))
|
||
w_o = init_weights((625, 10))
|
||
|
||
```
|
||
|
||
使用带 `dropout` 的模型进行训练:
|
||
|
||
In [7]:
|
||
|
||
```py
|
||
noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5)
|
||
|
||
```
|
||
|
||
使用不带 `dropout` 的模型进行预测:
|
||
|
||
In [8]:
|
||
|
||
```py
|
||
l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)
|
||
y_x = T.argmax(py_x, axis=1)
|
||
|
||
```
|
||
|
||
定义损失函数和迭代规则:
|
||
|
||
In [9]:
|
||
|
||
```py
|
||
cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
|
||
params = [w, w2, w3, w4, w_o]
|
||
updates = RMSprop(cost, params, lr=0.001)
|
||
|
||
```
|
||
|
||
开始训练:
|
||
|
||
In [10]:
|
||
|
||
```py
|
||
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
|
||
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
|
||
|
||
for i in range(50):
|
||
for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
|
||
cost = train(trX[start:end], trY[start:end])
|
||
print "iter {:03d}, {:.3f}".format(i + 1, np.mean(np.argmax(teY, axis=1) == predict(teX)))
|
||
|
||
```
|
||
|
||
```py
|
||
iter 001, 0.917
|
||
iter 002, 0.974
|
||
iter 003, 0.983
|
||
iter 004, 0.984
|
||
iter 005, 0.987
|
||
iter 006, 0.989
|
||
iter 007, 0.991
|
||
iter 008, 0.993
|
||
iter 009, 0.991
|
||
iter 010, 0.992
|
||
iter 011, 0.993
|
||
iter 012, 0.992
|
||
iter 013, 0.992
|
||
iter 014, 0.992
|
||
iter 015, 0.993
|
||
iter 016, 0.992
|
||
iter 017, 0.994
|
||
iter 018, 0.993
|
||
iter 019, 0.993
|
||
iter 020, 0.994
|
||
iter 021, 0.993
|
||
iter 022, 0.993
|
||
iter 023, 0.993
|
||
iter 024, 0.992
|
||
iter 025, 0.994
|
||
iter 026, 0.993
|
||
iter 027, 0.994
|
||
iter 028, 0.993
|
||
iter 029, 0.993
|
||
iter 030, 0.994
|
||
iter 031, 0.994
|
||
iter 032, 0.993
|
||
iter 033, 0.994
|
||
iter 034, 0.994
|
||
iter 035, 0.994
|
||
iter 036, 0.994
|
||
iter 037, 0.994
|
||
iter 038, 0.993
|
||
iter 039, 0.994
|
||
iter 040, 0.994
|
||
iter 041, 0.994
|
||
iter 042, 0.994
|
||
iter 043, 0.995
|
||
iter 044, 0.994
|
||
iter 045, 0.994
|
||
iter 046, 0.994
|
||
iter 047, 0.995
|
||
iter 048, 0.994
|
||
iter 049, 0.994
|
||
iter 050, 0.995
|
||
|
||
``` |