RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 32 but got size 64 for tensor number 1 in the list.
根据提供的代码和错误信息 RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 32 but got size 64 for tensor number 1 in the list.
,可以推测出问题可能出现在以下几个地方:
输入维度不匹配:在生成器或判别器的前向传播过程中,某个张量的尺寸与预期不符。具体来说,可能是某个卷积层或全连接层的输入尺寸不正确。
数据加载器的问题:在
DataLoader
中,批次大小(batch size)被设置为 32,但某些张量的尺寸被期望为 64。这通常发生在数据预处理或模型定义中。
可能的原因及解决方法
1. 检查数据预处理
确保数据预处理步骤中的图像尺寸与模型期望的输入尺寸一致。例如,transforms.Resize((64, 64))
将图像调整为 64x64 大小,确保所有后续操作都与此尺寸兼容。
transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])
])
2. 检查生成器和判别器的定义
确保生成器和判别器的输入和输出尺寸一致。特别是,检查嵌入层(embedding layer)的输出尺寸是否与后续层的输入尺寸匹配。
生成器
生成器的输入是噪声向量 z
和类别标签 labels
,它们被拼接在一起后传递给线性层。确保嵌入层的输出尺寸与噪声向量的尺寸相加后的结果与线性层的输入尺寸一致。
class Generator(nn.Module):
def __init__(self, latent_dim, num_classes, img_shape):
super(Generator, self).__init__()
self.latent_dim = latent_dim
self.num_classes = num_classes
self.img_shape = img_shape
self.embedding = nn.Embedding(num_classes, latent_dim)
self.model = nn.Sequential(
nn.Linear(latent_dim * 2, 512 * 4 * 4),
nn.BatchNorm1d(512 * 4 * 4),
nn.LeakyReLU(0.2, inplace=True),
Reshape((512, 4, 4)),
nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True),
nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace=True),
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace=True),
nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(32),
nn.LeakyReLU(0.2, inplace=True),
nn.ConvTranspose2d(32, 16, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(16),
nn.LeakyReLU(0.2, inplace=True),
nn.ConvTranspose2d(16, 1, kernel_size=3, stride=1, padding=1),
nn.Tanh()
)
def forward(self, z, labels):
cond_embedded = self.embedding(labels)
gen_input = torch.cat((z, cond_embedded), dim=1)
img = self.model(gen_input)
return img
判别器
判别器的输入是图像 img
和类别标签 labels
,它们被拼接在一起后传递给卷积层。确保嵌入层的输出尺寸与图像的通道数相加后的结果与卷积层的输入尺寸一致。
class Discriminator(nn.Module):
def __init__(self, img_shape, num_classes):
super(Discriminator, self).__init__()
self.img_shape = img_shape
self.num_classes = num_classes
self.embedding = nn.Embedding(num_classes, int(np.prod(img_shape)))
self.model = nn.Sequential(
nn.Conv2d(1 + 1, 16, kernel_size=3, stride=2, padding=1),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(0.25),
nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(0.25),
nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(0.25),
nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(0.25),
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(0.2, inplace=True),
nn.Dropout2d(0.25),
Flatten(),
nn.Linear(256 * 4 * 4, 1),
nn.Sigmoid()
)
def forward(self, img, labels):
cond_embedded = self.embedding(labels)
cond_embedded = cond_embedded.view(cond_embedded.size(0), 1, *self.img_shape[1:])
d_in = torch.cat((img, cond_embedded), dim=1)
validity = self.model(d_in)
return validity
3. 调试和验证
在训练过程中,可以在关键位置打印张量的形状,以确保每个步骤的输出尺寸符合预期。
def train_cgan(generator, discriminator, dataloader, latent_dim, num_classes, n_epochs=200001, lr=0.00001):
criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))
save_image_iterations = [1000, 5000, 15000, 30000, 50000, 100000, 200000]
for epoch in range(n_epochs):
for i, (imgs, labels) in enumerate(dataloader):
real_imgs = imgs.to(device)
labels = labels.to(device)
valid = torch.ones(imgs.size(0), 1).to(device)
fake = torch.zeros(imgs.size(0), 1).to(device)
z = torch.randn(imgs.size(0), latent_dim).to(device)
# 训练判别器
optimizer_D.zero_grad()
real_validity = discriminator(real_imgs, labels)
d_real_loss = criterion(real_validity, valid)
gen_imgs = generator(z, labels).detach()
fake_validity = discriminator(gen_imgs, labels)
d_fake_loss = criterion(fake_validity, fake)
d_loss = (d_real_loss + d_fake_loss) / 2
d_loss.backward()
optimizer_D.step()
# 训练生成器
optimizer_G.zero_grad()
gen_imgs = generator(z, labels)
g_validity = discriminator(gen_imgs, labels)
g_loss = criterion(g_validity, valid)
g_loss.backward()
optimizer_G.step()
# 打印调试信息
print(f"Epoch [{epoch + 1}/{n_epochs}], Batch [{i + 1}/{len(dataloader)}], D Loss: {d_loss.item()}, G Loss: {g_loss.item()}")
# 在特定迭代保存图像
if (epoch + 1) in save_image_iterations or (epoch == n_epochs - 1 and i == len(dataloader) - 1):
with torch.no_grad():
for label_idx in range(num_classes):
fixed_conditions = torch.tensor([label_idx] * 5).to(device)
fixed_z = torch.randn(5, latent_dim).to(device)
generated_images = generator(fixed_z, fixed_conditions)
generated_images = generated_images.cpu().detach().numpy()
output_dir = 'D:/wjy1/generated_images'
os.makedirs(output_dir, exist_ok=True)
for j in range(len(generated_images)):
img = generated_images[j][0]
img = ((img + 1) / 2.0 * 255).astype(np.uint8)
img = Image.fromarray(img, mode='L')
img.save(os.path.join(output_dir, f'image_epoch_{epoch + 1}_label_{fixed_conditions[j].item()}_idx{j}.png'))
print(f"Saved images at epoch {epoch + 1}")
# 每个epoch结束后输出当前的epoch次数
print(f"Completed epoch [{epoch + 1}/{n_epochs}]")
通过以上步骤,你应该能够找到并修复导致 RuntimeError
的原因。如果问题仍然存在,请提供更多详细的错误日志和上下文信息,以便进一步诊断。
相关推荐

















