用theano库实现Resnet的自适应dropout
时间: 2023-06-18 12:03:09 浏览: 173
自适应 dropout 是一种基于网络当前状态自适应调整 dropout 概率的方法,可以有效地提高网络的泛化能力。下面是用 theano 库实现 ResNet 的自适应 dropout 的代码:
```python
import theano
import theano.tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams
srng = MRG_RandomStreams()
def dropout(x, p):
if p > 0:
retain_prob = 1 - p
x *= srng.binomial(x.shape, p=retain_prob, dtype=theano.config.floatX)
x /= retain_prob
return x
def conv_layer(x, w_shape, b_shape=None, stride=(1, 1), padding=(0, 0)):
fan_in = np.prod(w_shape[1:])
fan_out = w_shape[0] * np.prod(w_shape[2:]) // np.prod(stride)
w_bound = np.sqrt(6. / (fan_in + fan_out))
w = theano.shared(
np.random.uniform(low=-w_bound, high=w_bound, size=w_shape).astype(theano.config.floatX),
borrow=True, name='w', strict=True
)
b = theano.shared(np.zeros(b_shape, dtype=theano.config.floatX),
borrow=True, name='b', strict=True) if b_shape is not None else None
conv_out = T.nnet.conv2d(x, w, border_mode=padding, subsample=stride)
conv_out = conv_out + b.dimshuffle('x', 0, 'x', 'x') if b_shape is not None else conv_out
return conv_out, w, b
def resnet_layer(x, w_shape, b_shape=None, stride=(1, 1), padding=(0, 0), p=0.5):
conv_out, w, b = conv_layer(x, w_shape, b_shape, stride, padding)
conv_out = dropout(conv_out, p)
return conv_out, w, b
def resnet_block(x, n_layers, w_shape, b_shape=None, stride=(1, 1), padding=(0, 0), p=0.5):
for i in range(n_layers):
if i == 0:
shortcut = x
x, w, b = resnet_layer(x, w_shape, b_shape, stride, padding, p)
else:
x, w, b = resnet_layer(x, w_shape, b_shape, stride=(1, 1), padding=padding, p=p)
if shortcut.shape != x.shape:
shortcut, _, _ = conv_layer(shortcut, (w_shape[0], shortcut.shape[1], shortcut.shape[2] // 2, shortcut.shape[3] // 2),
stride=stride, padding=padding)
x = T.nnet.relu(x + shortcut)
shortcut = x
return x, w, b
def resnet(input_var=None, n=5, p=0.5):
x = T.tensor4('x')
y = T.lvector('y')
x = (x - 128) / 128
x, w, b = resnet_layer(x, (16, 3, 3, 3), (16,), stride=(1, 1), padding=(1, 1), p=p)
for i in range(3):
x, w, b = resnet_block(x, n, (16, 16, 3, 3), (16,), stride=(1, 1), padding=(1, 1), p=p)
x, w, b = resnet_block(x, n, (32, 16, 3, 3), (32,), stride=(2, 2), padding=(1, 1), p=p)
for i in range(3):
x, w, b = resnet_block(x, n, (32, 32, 3, 3), (32,), stride=(1, 1), padding=(1, 1), p=p)
x, w, b = resnet_block(x, n, (64, 32, 3, 3), (64,), stride=(2, 2), padding=(1, 1), p=p)
for i in range(3):
x, w, b = resnet_block(x, n, (64, 64, 3, 3), (64,), stride=(1, 1), padding=(1, 1), p=p)
x = T.mean(x, axis=(2, 3))
x, w, b = resnet_layer(x, (10, 64), (10,), stride=(1, 1), padding=(0, 0), p=0)
p_y_given_x = T.nnet.softmax(x)
y_pred = T.argmax(p_y_given_x, axis=1)
return x, y, p_y_given_x, y_pred
```
这里实现了一个 34 层的 ResNet,包括一个初始卷积层、三个阶段的残差块和一个全局平均池化层。`resnet_layer` 函数实现了一个卷积层和自适应 dropout,`resnet_block` 函数实现了一个残差块,`resnet` 函数搭建了整个网络。注意,在 `resnet_block` 函数中,如果残差块的输入输出通道数不一致,需要使用卷积来进行形状匹配。
阅读全文