lr 为什么不采用 mse 而是采用交叉熵损失?
Lr(Learning Rate,学习率)本身并不是一种损失函数,而是梯度下降等优化算法中的一个重要参数,它控制着每次更新参数的速度。MSE(Mean Squared Error,均方误差)和交叉熵损失(Cross-Entropy Loss)则是用于训练机器学习模型的成本函数,特别是针对分类任务的。
1. **对称性**:交叉熵是对数形式的,它不像MSE那样受到极端值的影响,对于概率接近0或1的预测结果,它惩罚效果更明显,有助于模型更好地分辨不同类别的边界。
2. **概率解释**:交叉熵的自然对数形式使得它的梯度直接对应于模型预测的概率分布与真实标签分布之间的差异,这与实际的决策边界相吻合,有助于理解模型的学习过程。
3. **稳定性**:在深度学习中,交叉熵损失有助于防止梯度消失问题,并且在训练过程中更容易收敛。
### 交叉熵损失函数与其它组件的组合
#### 交叉熵损失函数与Sigmoid激活函数结合
import torch
import torch.nn as nn
model = nn.Sequential(
criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification with Sigmoid activation.
#### 交叉熵损失函数与Softmax激活函数结合
对于多分类场景,则更倾向于采用`nn.CrossEntropyLoss()`,它实际上集成了Log Softmax操作和NLL(Negative Log Likelihood)损失计算于一体。因此,在定义模型时无需显式添加Softmax层[^1]。
model = nn.Sequential(
# No need to add a softmax layer here because it's included within the loss function.
criterion = nn.CrossEntropyLoss() # For multi-class classification without explicit Softmax output.
#### 添加L2正则化项
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)
#### 应用Huber损失替代MSE
import tensorflow as tfdef cross_entropy_loss(y_true, y_pred): # 计算交叉熵损失 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred) return tf.reduce_mean(cross_entropy)def boundary_loss(y_true, y_pred): # 计算边界损失 boundary_filter = tf.constant([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=tf.float32) y_true_boundary = tf.nn.conv2d(y_true, boundary_filter, strides=[1, 1, 1, 1], padding='SAME') y_pred_boundary = tf.nn.conv2d(y_pred, boundary_filter, strides=[1, 1, 1, 1], padding='SAME') boundary_loss = tf.reduce_mean(tf.square(y_true_boundary - y_pred_boundary)) return boundary_lossdef total_loss(y_true, y_pred): # 总损失函数 = 交叉熵损失 + 边界损失 return cross_entropy_loss(y_true, y_pred) + 0.5 * boundary_loss(y_true, y_pred)# 构建模型model = ...# 编译模型model.compile(optimizer='adam', loss=total_loss, metrics=['accuracy'])
import torch
import torch.nn as nn
import torch.nn.functional as F
def cross_entropy_loss(y_true, y_pred):
# 计算交叉熵损失
cross_entropy = nn.CrossEntropyLoss()(y_pred, y_true)
return cross_entropy
def boundary_loss(y_true, y_pred):
# 计算边界损失
boundary_filter = torch.tensor([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=torch.float32)
boundary_filter = boundary_filter.view(1, 1, 3, 3)
y_true_boundary = F.conv2d(y_true, boundary_filter, padding=1)
y_pred_boundary = F.conv2d(y_pred, boundary_filter, padding=1)
boundary_loss = F.mse_loss(y_true_boundary, y_pred_boundary)
return boundary_loss
def total_loss(y_true, y_pred):
# 总损失函数 = 交叉熵损失 + 边界损失
return cross_entropy_loss(y_true, y_pred) + 0.5 * boundary_loss(y_true, y_pred)
# 构建模型
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.fc1 = nn.Linear(32*8*8, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 32*8*8)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
model = Model()
# 编译模型
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = total_loss
metrics = ['accuracy']