lstm预测出租车旅行时间的python代码
时间: 2023-09-08 14:08:19 浏览: 90
一种lstm时间序列预测python
以下是使用LSTM模型预测出租车旅行时间的Python代码示例:
```python
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM
# 读取数据
data = pd.read_csv('taxi.csv')
# 特征选择
data = data[['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'passenger_count', 'trip_duration']]
# 数据预处理
data['trip_duration'] = data['trip_duration'] / 60 # 将秒转换为分钟
data = data[data['trip_duration'] < 120] # 去除超过2小时的异常数据
data = data[data['pickup_longitude'] != 0] # 去除经纬度为0的异常数据
data = data[data['pickup_latitude'] != 0]
data = data[data['dropoff_longitude'] != 0]
data = data[data['dropoff_latitude'] != 0]
# 划分训练集和测试集
train_size = int(len(data) * 0.8)
train = data[:train_size]
test = data[train_size:]
# 特征归一化
train_mean = train.mean()
train_std = train.std()
train = (train - train_mean) / train_std
test = (test - train_mean) / train_std
# 构建训练集和测试集
def create_dataset(X, y, time_steps=1):
Xs, ys = [], []
for i in range(len(X) - time_steps):
Xs.append(X.iloc[i:(i + time_steps)].values)
ys.append(y.iloc[i + time_steps])
return np.array(Xs), np.array(ys)
time_steps = 10
X_train, y_train = create_dataset(train.drop('trip_duration', axis=1), train['trip_duration'], time_steps)
X_test, y_test = create_dataset(test.drop('trip_duration', axis=1), test['trip_duration'], time_steps)
# 构建LSTM模型
model = Sequential()
model.add(LSTM(64, input_shape=(time_steps, X_train.shape[2]), return_sequences=True))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
model.fit(X_train, y_train, epochs=10, batch_size=16, verbose=1)
# 预测测试集
y_pred = model.predict(X_test)
# 反归一化
y_pred = y_pred.reshape(-1, 1)
y_pred = (y_pred * train_std['trip_duration']) + train_mean['trip_duration']
y_test = (y_test * train_std['trip_duration']) + train_mean['trip_duration']
# 计算RMSE
rmse = np.sqrt(np.mean((y_pred - y_test) ** 2))
print('RMSE:', rmse)
```
需要注意的是,本示例中使用的数据集是从Kaggle上下载的纽约市出租车行程数据集,包含了一些特征,如出发地经纬度、目的地经纬度、乘客数量等。实际应用中,需要根据具体情况选择特征。此外,本示例中使用的是LSTM模型,也可以尝试使用其他的深度学习模型进行预测。
阅读全文