tensorflow实现MAPE
时间: 2023-12-05 09:05:47 浏览: 115
MAPE(Mean Absolute Percentage Error)指标可以用来评估预测结果的准确度,计公式为:
MAPE = mean(|(y_true - y_pred) / y_true|) * 100%
其中,y_true 表示真实值,y_pred 表示预测值, |x| 表示 x 的绝对值。
在TensorFlow中可以通过以下代码实现MAPE指标的计算:
```python
import tensorflow as tf
def mean_absolute_percentage_error(y_true, y_pred):
diff = tf.abs((y_true - y_pred) / tf.clip_by_value(tf.abs(y_true), 1e-8, tf.float32.max))
return 100.0 * tf.reduce_mean(diff)
```
关于代码中的tf.clip_by_value函数,它可以将张量中的数值限制在一个范围内。在此处的作用是将小于1e-8的数值替换为1e-8,将大于tf.float32.max的数值替换为tf.float32.max。这样可以避免除0错误和过大的数值。
相关问题
写出三个代码用jupyter分别实现prophet、transformer、pytorch-forecasting预测,用于对一个包含5维特征的1400x5的数据集进行预测。代码中包括了数据标准化、绘制数据趋势图、早停、正则化、计算训练损失和验证损失并绘图,以及绘制各列数据的预测图和总体数据的预测图,并计算MAE/MAPE/MSE/RMSE/SMAPE指标。
以下是在Jupyter Notebook中使用Prophet、Transformer和pytorch-forecasting进行预测的示例代码:
## 使用Prophet进行预测
```python
import pandas as pd
from fbprophet import Prophet
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
# 读取数据
data = pd.read_csv('data.csv')
# 数据预处理
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)
# 将数据转换为Prophet所需的格式
df = pd.DataFrame()
df['ds'] = pd.date_range(start='2000-01-01', periods=len(scaled_data))
for i in range(scaled_data.shape[1]):
df['y{}'.format(i+1)] = scaled_data[:, i]
# 创建并训练Prophet模型
model = Prophet()
for i in range(scaled_data.shape[1]):
model.add_regressor('y{}'.format(i+1))
model.fit(df)
# 预测未来数据
future = model.make_future_dataframe(periods=30) # 预测未来30个时间步
forecast = model.predict(future)
# 绘制数据趋势图
model.plot(forecast)
plt.show()
# 绘制各列数据的预测图和总体数据的预测图
for i in range(scaled_data.shape[1]):
model.plot_components(forecast[['ds', 'y{}'.format(i+1)]])
plt.show()
```
## 使用Transformer进行预测
```python
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, TransformerBlock
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
# 读取数据
data = pd.read_csv('data.csv')
# 数据预处理
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)
# 定义Transformer模型
model = Sequential()
model.add(TransformerBlock(1400, 5)) # 输入维度为1400,输出维度为5
model.add(Dropout(0.2))
model.add(Dense(5))
# 编译模型
model.compile(loss='mse', optimizer='adam')
# 定义早停回调函数
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# 训练模型
history = model.fit(scaled_data, scaled_data, validation_split=0.2, epochs=100, batch_size=32, callbacks=[early_stopping])
# 绘制训练损失和验证损失
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
# 预测未来数据
future = model.predict(scaled_data[-1].reshape(1, -1)) # 预测未来数据,此处假设最后一行为最新数据
future = scaler.inverse_transform(future)
# 绘制各列数据的预测图和总体数据的预测图
for i in range(data.shape[1]):
plt.plot(data.iloc[:, i], label='Actual')
plt.plot(np.arange(data.shape[0], data.shape[0]+future.shape[1]), future[0, :, i], label='Predicted')
plt.xlabel('Time')
plt.ylabel('Feature {}'.format(i+1))
plt.legend()
plt.show()
```
## 使用pytorch-forecasting进行预测
```python
import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import MAE, MAPE, MSE, RMSE, SMAPE
from pytorch_forecasting.data import NaNLabelEncoder
import matplotlib.pyplot as plt
# 读取数据
data = pd.read_csv('data.csv')
# 数据预处理
data['time_idx'] = pd.date_range(start='2000-01-01', periods=len(data))
data = data.rename(columns={'target': 'y'})
# 定义时间序列数据集
max_encoder_length = 100 # 编码器的最大长度
max_prediction_length = 10 # 预测器的最大长度
# 编码器和预测器的特征列
target_col = 'y'
encoder_cols = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5']
static_categoricals = []
static_reals = []
time_varying_known_categoricals = []
time_varying_known_reals = []
time_varying_unknown_categoricals = []
time_varying_unknown_reals = encoder_cols
# 创建时间序列数据集
training_cutoff = data['time_idx'].quantile(0.8)
validation_cutoff = data['time_idx'].quantile(0.9)
data['is_val'] = data['time_idx'] > training_cutoff
data['is_test'] = data['time_idx'] > validation_cutoff
data['is_nan'] = data[target_col].isna()
# 标签编码器
label_encoders = {}
for col in static_categoricals + time_varying_known_categoricals + time_varying_unknown_categoricals:
label_encoders[col] = NaNLabelEncoder().fit(data[col])
data[col] = label_encoders[col].transform(data[col])
# 定义时间序列数据集
data = TimeSeriesDataSet(
data=data,
time_idx='time_idx',
target=target_col,
group_ids=['id'],
min_encoder_length=max_encoder_length // 2, # 编码器的最小长度
max_encoder_length=max_encoder_length,
min_prediction_length=1,
max_prediction_length=max_prediction_length,
static_categoricals=static_categoricals,
static_reals=static_reals,
time_varying_known_categoricals=time_varying_known_categoricals,
time_varying_known_reals=time_varying_known_reals,
time_varying_unknown_categoricals=time_varying_unknown_categoricals,
time_varying_unknown_reals=time_varying_unknown_reals,
target_normalizer=NaNLabelEncoder().fit(data[target_col]),
add_relative_time_idx=True,
add_target_scales=True
)
# 创建TemporalFusionTransformer模型
model = TemporalFusionTransformer.from_dataset(
data,
learning_rate=0.03,
hidden_size=16,
attention_head_size=1,
dropout=0.1,
hidden_continuous_size=8
)
# 训练模型
trainer = model.train_dataloader(data, batch_size=32)
model.fit(trainer, epochs=10, early_stopping_patience=5)
# 预测未来数据
future = model.predict(data, num_samples=100)
# 计算指标
mae = MAE()(future, data, mode='raw')
mape = MAPE()(future, data, mode='raw')
mse = MSE()(future, data, mode='raw')
rmse = RMSE()(future, data, mode='raw')
smape = SMAPE()(future, data, mode='raw')
# 绘制各列数据的预测图和总体数据的预测图
for i in range(data.data[target_col].shape[1]):
model.plot_prediction(data.to_pandas()[(data.to_pandas()['time_idx'] > training_cutoff)], future, idx=i)
plt.show()
# 绘制总体数据的预测图
model.plot_prediction(data.to_pandas()[(data.to_pandas()['time_idx'] > training_cutoff)], future)
plt.show()
# 打印指标
print('MAE:', mae)
print('MAPE:', mape)
print('MSE:', mse)
print('RMSE:', rmse)
print('SMAPE:', smape)
```
请注意,根据你的数据集和具体需求,上述代码可能需要进行一些修改和调整。
阅读全文