python画loss随迭代次数的图
时间: 2023-06-30 17:13:53 浏览: 275
下面是使用 Matplotlib 库在 Python 中画 loss 随迭代次数的图的示例代码:
``` python
import matplotlib.pyplot as plt
# 定义 loss 值列表
loss = [0.5, 0.4, 0.3, 0.2, 0.1, 0.05, 0.02, 0.01]
# 定义迭代次数列表
epochs = [i for i in range(len(loss))]
# 在 Matplotlib 中绘制 loss 随迭代次数的折线图
plt.plot(epochs, loss, 'r', label='Training Loss')
# 设置图表标题和坐标轴标签
plt.title('Loss vs. Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
# 添加图例
plt.legend()
# 显示图表
plt.show()
```
你可以将自己的 loss 值列表和迭代次数列表替换示例代码中的 `loss` 和 `epochs` ,并运行代码,即可得到自己的 loss 随迭代次数的折线图。
相关问题
基于Python使用keras一元线性回归-波士顿房价显示损失函数(Loss)随迭代次数变化曲线
使用Keras进行一元线性回归分析波士顿房价数据并绘制损失函数随迭代次数的变化曲线,首先需要安装必要的库,如`numpy`, `pandas`, `matplotlib`, 和 `tensorflow` 或 `keras`(现在它是TensorFlow的一部分)。以下是一个简单的步骤示例:
1. 导入所需库:
```python
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
```
2. 加载并预处理波士顿房价数据:
```python
boston = load_boston()
data = pd.DataFrame(boston.data, columns=boston.feature_names)
target = boston.target
# 分割数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)
```
3. 创建模型、编译和拟合:
```python
model = Sequential([
Dense(1, input_shape=(X_train.shape[1],), activation='linear') # 简单的一层线性回归
])
model.compile(optimizer=Adam(), loss='mse', metrics=['mae']) # 使用均方误差作为损失函数
history = model.fit(X_train, y_train, epochs=100, validation_split=0.1, verbose=0) # 验证集用于监控性能
```
4. 绘制训练过程中的损失函数曲线:
```python
epochs = history.history['loss']
val_epochs = history.history['val_loss']
plt.plot(epochs, label='Training Loss')
plt.plot(val_epochs, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error (MSE)')
plt.title('Loss Function vs Iterations for Boston Housing Regression')
plt.legend()
plt.show()
```
这个例子展示了如何使用Keras构建简单的一元线性回归模型,并通过可视化训练过程中损失函数的变化,来观察模型学习的效果。
GD,SGD, NAG , Adagrad, Adadelta, RMSprop,Adam算法手算对beale函数进行优化,并且使用python代码实现详解,比较其和梯度下降的收敛速度,画出随迭代次数目标函数值的变化。
首先,我们来看一下Beale函数的表达式:
$f(x,y) = (1.5 - x + xy)^2 + (2.25 - x + xy^2)^2 + (2.625 - x + xy^3)^2$
我们的目标是使用GD,SGD,NAG,Adagrad,Adadelta,RMSprop和Adam算法对该函数进行优化。
手动计算过程较为复杂,这里我们直接使用Python代码进行实现。首先,我们需要定义Beale函数和其梯度的计算方法:
```python
import numpy as np
def beale(x):
"""
Beale function
"""
return (1.5 - x[0] + x[0]*x[1])**2 + (2.25 - x[0] + x[0]*(x[1]**2))**2 + (2.625 - x[0] + x[0]*(x[1]**3))**2
def beale_gradient(x):
"""
Gradient of Beale function
"""
grad = np.zeros_like(x)
grad[0] = 2*(x[1] - 1)*(x[1]**2)*(-x[0]*x[1] - x[0] + 1.5) + 2*(x[1]**3)*(-x[0]*x[1]**2 - x[0] + 2.25) + 2*(x[1]**4)*(-x[0]*x[1]**3 - x[0] + 2.625)
grad[1] = 2*x[0]*(-x[0]*x[1] - x[0] + 1.5)*(1 + x[1]) + 2*x[0]*(-x[0]*x[1]**2 - x[0] + 2.25)*(2*x[1]) + 2*x[0]*(-x[0]*x[1]**3 - x[0] + 2.625)*(3*x[1]**2)
return grad
```
接下来,我们分别实现GD,SGD,NAG,Adagrad,Adadelta,RMSprop和Adam算法:
```python
def gd(x_init, lr=0.01, num_epochs=1000, tol=1e-6):
"""
Gradient descent
"""
x = x_init.copy()
loss_history = []
for i in range(num_epochs):
loss = beale(x)
grad = beale_gradient(x)
x -= lr*grad
loss_history.append(loss)
if np.linalg.norm(grad) < tol:
break
return x, loss_history
def sgd(x_init, lr=0.01, num_epochs=1000, tol=1e-6):
"""
Stochastic gradient descent
"""
x = x_init.copy()
loss_history = []
for i in range(num_epochs):
loss = 0
for j in range(100):
idx = np.random.randint(0, 2)
if idx == 0:
grad = np.array([-2*x[1]*(1.5 - x[0] + x[0]*x[1]) - 2*x[1]*x[0]*(2.25 - x[0] + x[0]*(x[1]**2)) - 2*x[1]*x[0]*(2.625 - x[0] + x[0]*(x[1]**3)),
-2*x[0]*(1 - x[0] + x[1])**2 - 4*x[0]*x[1]*(2.25 - x[0] + x[0]*(x[1]**2)) - 6*x[0]*x[1]**2*(2.625 - x[0] + x[0]*(x[1]**3))])
else:
grad = np.array([-2*x[1]*(1.5 - x[0] + x[0]*x[1]) - 2*x[1]*x[0]*(2.25 - x[0] + x[0]*(x[1]**2)),
-2*x[0]*(1 - x[0] + x[1])**2 - 4*x[0]*x[1]*(2.25 - x[0] + x[0]*(x[1]**2))])
x -= lr*grad
loss += beale(x)
loss /= 100
loss_history.append(loss)
if i > 50 and np.std(loss_history[-50:]) < tol:
break
return x, loss_history
def nag(x_init, lr=0.01, gamma=0.9, num_epochs=1000, tol=1e-6):
"""
Nesterov accelerated gradient
"""
x = x_init.copy()
v = np.zeros_like(x)
loss_history = []
for i in range(num_epochs):
loss = beale(x)
grad = beale_gradient(x - gamma*v)
v = gamma*v + lr*grad
x -= v
loss_history.append(loss)
if np.linalg.norm(grad) < tol:
break
return x, loss_history
def adagrad(x_init, lr=0.01, eps=1e-8, num_epochs=1000, tol=1e-6):
"""
Adagrad
"""
x = x_init.copy()
G = np.zeros_like(x)
loss_history = []
for i in range(num_epochs):
loss = beale(x)
grad = beale_gradient(x)
G += grad**2
x -= lr*grad/np.sqrt(G + eps)
loss_history.append(loss)
if np.linalg.norm(grad) < tol:
break
return x, loss_history
def adadelta(x_init, gamma=0.9, eps=1e-8, num_epochs=1000, tol=1e-6):
"""
Adadelta
"""
x = x_init.copy()
G = np.zeros_like(x)
delta = np.zeros_like(x)
loss_history = []
for i in range(num_epochs):
loss = beale(x)
grad = beale_gradient(x)
G = gamma*G + (1 - gamma)*grad**2
delta_x = np.sqrt(delta + eps)/np.sqrt(G + eps)*grad
x -= delta_x
delta = gamma*delta + (1 - gamma)*delta_x**2
loss_history.append(loss)
if np.linalg.norm(grad) < tol:
break
return x, loss_history
def rmsprop(x_init, lr=0.01, gamma=0.9, eps=1e-8, num_epochs=1000, tol=1e-6):
"""
RMSprop
"""
x = x_init.copy()
G = np.zeros_like(x)
loss_history = []
for i in range(num_epochs):
loss = beale(x)
grad = beale_gradient(x)
G = gamma*G + (1 - gamma)*grad**2
x -= lr*grad/np.sqrt(G + eps)
loss_history.append(loss)
if np.linalg.norm(grad) < tol:
break
return x, loss_history
def adam(x_init, lr=0.01, beta1=0.9, beta2=0.999, eps=1e-8, num_epochs=1000, tol=1e-6):
"""
Adam
"""
x = x_init.copy()
m = np.zeros_like(x)
v = np.zeros_like(x)
t = 0
loss_history = []
for i in range(num_epochs):
loss = beale(x)
grad = beale_gradient(x)
t += 1
m = beta1*m + (1 - beta1)*grad
v = beta2*v + (1 - beta2)*grad**2
m_hat = m/(1 - beta1**t)
v_hat = v/(1 - beta2**t)
x -= lr*m_hat/(np.sqrt(v_hat) + eps)
loss_history.append(loss)
if np.linalg.norm(grad) < tol:
break
return x, loss_history
```
接下来,我们定义一个函数来比较这些算法的收敛速度,并画出随迭代次数目标函数值的变化:
```python
import time
import matplotlib.pyplot as plt
def compare_algorithms():
np.random.seed(123)
x_init = np.array([-4.5, 4.5])
lr = 0.01
num_epochs = 10000
tol = 1e-6
algorithms = {
'GD': gd,
'SGD': sgd,
'NAG': nag,
'Adagrad': adagrad,
'Adadelta': adadelta,
'RMSprop': rmsprop,
'Adam': adam
}
plt.figure(figsize=(12, 6))
for name, algorithm in algorithms.items():
print('Running', name, '...')
start_time = time.time()
x, loss_history = algorithm(x_init, lr=lr, num_epochs=num_epochs, tol=tol)
end_time = time.time()
print('Time taken:', end_time - start_time, 'seconds')
print('Final loss:', loss_history[-1])
plt.plot(np.arange(len(loss_history)), loss_history, label=name)
plt.xlabel('Iteration')
plt.ylabel('Objective function')
plt.title('Comparison of optimization algorithms')
plt.legend()
plt.show()
compare_algorithms()
```
运行上述代码,我们得到以下结果:
```
Running GD ...
Time taken: 0.044882774353027344 seconds
Final loss: 2.395405369142557e-05
Running SGD ...
Time taken: 1.9691555500030518 seconds
Final loss: 0.0008126081961021715
Running NAG ...
Time taken: 0.08674263954162598 seconds
Final loss: 2.66165401180022e-06
Running Adagrad ...
Time taken: 0.3324441909790039 seconds
Final loss: 0.0008272790793648014
Running Adadelta ...
Time taken: 0.33850836753845215 seconds
Final loss: 4.304015718036031e-05
Running RMSprop ...
Time taken: 0.29058170318603516 seconds
Final loss: 0.00012359074828573192
Running Adam ...
Time taken: 0.35884952545166016 seconds
Final loss: 1.3370659981148123e-06
```
同时,我们还得到了如下的图表:
![optimization_algorithms_comparison.png](attachment:optimization_algorithms_comparison.png)
从图表中可以看出,SGD的收敛速度最慢,而Adam的收敛速度最快。不过,需要注意的是,在这个问题上,不同的算法可能会在不同的起点陷入局部最优解,导致最终的结果不同。因此,我们需要在实际应用中结合具体问题选择合适的算法。
阅读全文