自编码器提取数据集特征代码
时间: 2023-11-12 12:04:41 浏览: 85
特征码提取
4星 · 用户满意度95%
以下是使用PyTorch实现自编码器提取数据集特征的示例代码:
```python
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from tqdm import tqdm
# 定义自编码器模型
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Conv2d(1, 16, 3, stride=3, padding=1), # 16x10x10
nn.ReLU(inplace=True),
nn.Conv2d(16, 8, 3, stride=2, padding=1), # 8x5x5
nn.ReLU(inplace=True),
nn.Conv2d(8, 4, 2, stride=1, padding=0), # 4x4x4
nn.ReLU(inplace=True),
)
# 解码器
self.decoder = nn.Sequential(
nn.ConvTranspose2d(4, 8, 2, stride=1, padding=0), # 8x5x5
nn.ReLU(inplace=True),
nn.ConvTranspose2d(8, 16, 3, stride=2, padding=1, output_padding=1), # 16x10x10
nn.ReLU(inplace=True),
nn.ConvTranspose2d(16, 1, 3, stride=3, padding=1), # 1x28x28
nn.Sigmoid(),
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
# 加载MNIST数据集
train_dataset = MNIST(root='data/', train=True, transform=ToTensor(), download=True)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
# 初始化自编码器模型
model = Autoencoder()
# 定义优化器
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# 训练自编码器
num_epochs = 10
for epoch in range(num_epochs):
for data in tqdm(train_loader):
img, _ = data
optimizer.zero_grad()
output = model(img)
loss = nn.MSELoss()(output, img)
loss.backward()
optimizer.step()
# 提取特征
feature_extractor = nn.Sequential(model.encoder, nn.Flatten())
features = []
with torch.no_grad():
for data in tqdm(train_loader):
img, _ = data
feature = feature_extractor(img)
features.append(feature)
features = torch.cat(features, dim=0)
```
这里使用了一个简单的自编码器作为特征提取器,将MNIST手写数字数据集的图像转换为4x4x4的特征向量。训练自编码器时使用均方误差损失函数,优化器为Adam。最后使用训练好的自编码器的编码器部分来提取特征,将特征向量展平为一维向量。
阅读全文