使用纽约地铁交通量用GRU模型预测交通量具体实例及代码
时间: 2023-06-18 08:05:24 浏览: 193
基于深度学习方法的车辆上牌量预测
下面是使用GRU模型预测纽约地铁交通量的示例代码:
首先,我们需要导入必要的库和数据集:
```python
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, GRU
from sklearn.preprocessing import MinMaxScaler
# 读取数据集
data = pd.read_csv('nyc_subway.csv', parse_dates=['datetime'], index_col='datetime')
```
接着,我们对数据进行预处理,并将其划分为训练集和测试集:
```python
# 数据预处理
data = data.resample('H').sum()
data['rain'] = (data['rain'] > 0).astype(int)
data['hour'] = data.index.hour
data['weekday'] = data.index.weekday
data['is_weekend'] = data.weekday.isin([5,6]).astype(int)
# 划分训练集和测试集
train_size = int(len(data) * 0.8)
train_data, test_data = data.iloc[:train_size], data.iloc[train_size:]
```
然后,我们使用`MinMaxScaler`进行数据归一化:
```python
# 数据归一化
scaler = MinMaxScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)
```
接着,我们定义一个函数来将数据转换为有监督学习问题:
```python
def to_supervised(data, n_input, n_out=1):
X, y = [], []
in_start = 0
for i in range(len(data)):
in_end = in_start + n_input
out_end = in_end + n_out
if out_end < len(data):
X.append(data[in_start:in_end])
y.append(data[in_end:out_end])
in_start += 1
return np.array(X), np.array(y)
```
然后,我们将训练集和测试集转换为有监督学习问题:
```python
n_input = 24
n_out = 1
train_X, train_y = to_supervised(train_data, n_input, n_out)
test_X, test_y = to_supervised(test_data, n_input, n_out)
```
接着,我们使用`Sequential`模型搭建GRU模型,并对其进行训练:
```python
# 搭建GRU模型
model = Sequential()
model.add(GRU(100, activation='relu', input_shape=(n_input, train_X.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# 训练GRU模型
history = model.fit(train_X, train_y, epochs=50, batch_size=32, validation_data=(test_X, test_y), verbose=2, shuffle=False)
```
最后,我们使用训练好的模型对测试集进行预测,并计算其均方根误差(RMSE):
```python
# 对测试集进行预测
y_pred = model.predict(test_X)
# 计算均方根误差
rmse = np.sqrt(np.mean((y_pred - test_y)**2))
print('RMSE: %.3f' % rmse)
```
完整代码如下:
```python
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, GRU
from sklearn.preprocessing import MinMaxScaler
# 读取数据集
data = pd.read_csv('nyc_subway.csv', parse_dates=['datetime'], index_col='datetime')
# 数据预处理
data = data.resample('H').sum()
data['rain'] = (data['rain'] > 0).astype(int)
data['hour'] = data.index.hour
data['weekday'] = data.index.weekday
data['is_weekend'] = data.weekday.isin([5,6]).astype(int)
# 划分训练集和测试集
train_size = int(len(data) * 0.8)
train_data, test_data = data.iloc[:train_size], data.iloc[train_size:]
# 数据归一化
scaler = MinMaxScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)
# 将数据转换为有监督学习问题
def to_supervised(data, n_input, n_out=1):
X, y = [], []
in_start = 0
for i in range(len(data)):
in_end = in_start + n_input
out_end = in_end + n_out
if out_end < len(data):
X.append(data[in_start:in_end])
y.append(data[in_end:out_end])
in_start += 1
return np.array(X), np.array(y)
n_input = 24
n_out = 1
train_X, train_y = to_supervised(train_data, n_input, n_out)
test_X, test_y = to_supervised(test_data, n_input, n_out)
# 搭建GRU模型
model = Sequential()
model.add(GRU(100, activation='relu', input_shape=(n_input, train_X.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# 训练GRU模型
history = model.fit(train_X, train_y, epochs=50, batch_size=32, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# 对测试集进行预测
y_pred = model.predict(test_X)
# 计算均方根误差
rmse = np.sqrt(np.mean((y_pred - test_y)**2))
print('RMSE: %.3f' % rmse)
```
阅读全文