用theano库实现卷积神经网络的反向传播
时间: 2023-11-19 14:05:48 浏览: 33
首先,我们需要定义一个卷积层的类,其中包含正向传播和反向传播的函数。
```python
import numpy as np
import theano
import theano.tensor as T
class ConvLayer(object):
def __init__(self, rng, input_shape, filter_shape):
self.input_shape = input_shape
self.filter_shape = filter_shape
fan_in = np.prod(filter_shape[1:])
fan_out = (filter_shape[0] * np.prod(filter_shape[2:]))
W_bound = np.sqrt(6. / (fan_in + fan_out))
self.W = theano.shared(
np.asarray(
rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
dtype=theano.config.floatX
),
borrow=True
)
self.b = theano.shared(
np.zeros((filter_shape[0],), dtype=theano.config.floatX),
borrow=True
)
self.params = [self.W, self.b]
def convolve(self, input):
conv_out = T.nnet.conv2d(
input=input,
filters=self.W,
filter_shape=self.filter_shape,
input_shape=self.input_shape
)
return T.nnet.sigmoid(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))
def get_cost_updates(self, cost, learning_rate):
grads = T.grad(cost, self.params)
updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, grads)]
return updates
```
接下来,我们定义一个多层感知机的类,包含训练函数和预测函数。
```python
class MLP(object):
def __init__(self, rng, input_shape, filter_shapes, hidden_sizes, output_size):
self.x = T.tensor4('x')
self.y = T.matrix('y')
self.layers = []
self.params = []
layer_input = self.x
input_shape = input_shape
for i in range(len(filter_shapes)):
filter_shape = filter_shapes[i]
layer = ConvLayer(rng=rng, input_shape=input_shape, filter_shape=filter_shape)
self.layers.append(layer)
self.params += layer.params
layer_output = layer.convolve(layer_input)
layer_input = layer_output
input_shape = (input_shape[0], filter_shape[0], input_shape[2] - filter_shape[2] + 1, input_shape[3] - filter_shape[3] + 1)
flatten_layer_output = layer_output.flatten(2)
hidden_layer_input = flatten_layer_output
hidden_layer_size = input_shape[1] * input_shape[2] * input_shape[3]
for hidden_size in hidden_sizes:
W = theano.shared(
np.asarray(
rng.uniform(low=-np.sqrt(6. / (hidden_layer_size + hidden_size)), high=np.sqrt(6. / (hidden_layer_size + hidden_size)), size=(hidden_layer_size, hidden_size)),
dtype=theano.config.floatX
),
borrow=True
)
b = theano.shared(
np.zeros((hidden_size,), dtype=theano.config.floatX),
borrow=True
)
self.params += [W, b]
hidden_layer_output = T.nnet.sigmoid(T.dot(hidden_layer_input, W) + b)
hidden_layer_input = hidden_layer_output
hidden_layer_size = hidden_size
W = theano.shared(
np.asarray(
rng.uniform(low=-np.sqrt(6. / (hidden_layer_size + output_size)), high=np.sqrt(6. / (hidden_layer_size + output_size)), size=(hidden_layer_size, output_size)),
dtype=theano.config.floatX
),
borrow=True
)
b = theano.shared(
np.zeros((output_size,), dtype=theano.config.floatX),
borrow=True
)
self.params += [W, b]
self.output = T.nnet.softmax(T.dot(hidden_layer_output, W) + b)
self.prediction = T.argmax(self.output, axis=1)
self.cost = -T.mean(T.log(self.output)[T.arange(self.y.shape[0]), self.y])
self.updates = self.layers[0].get_cost_updates(self.cost, learning_rate=0.1)
for i in range(1, len(self.layers)):
layer_updates = self.layers[i].get_cost_updates(self.cost, learning_rate=0.1)
self.updates += layer_updates
self.train = theano.function(inputs=[self.x, self.y], outputs=self.cost, updates=self.updates, allow_input_downcast=True)
self.predict = theano.function(inputs=[self.x], outputs=self.prediction, allow_input_downcast=True)
```
最后,我们可以使用以下代码来训练和测试模型。
```python
rng = np.random.RandomState(1234)
train_x = np.random.rand(100, 1, 28, 28).astype(theano.config.floatX)
train_y = np.random.randint(0, 10, size=(100,)).astype(np.int32)
mlp = MLP(rng=rng, input_shape=(100, 1, 28, 28), filter_shapes=[(20, 1, 5, 5), (50, 20, 5, 5)], hidden_sizes=[500], output_size=10)
for i in range(10):
cost = mlp.train(train_x, train_y)
print('Epoch %d, cost %f' % (i, cost))
test_x = np.random.rand(10, 1, 28, 28).astype(theano.config.floatX)
test_y = np.random.randint(0, 10, size=(10,)).astype(np.int32)
pred_y = mlp.predict(test_x)
accuracy = np.mean(pred_y == test_y)
print('Accuracy %f' % accuracy)
```