python实现LSTM时间序列分类代码,数据集为多个Excel
时间: 2023-11-30 14:36:02 浏览: 95
文件,每个文件包含一个时间序列,每个时间序列有多个特征。
以下是一个简单的LSTM时间序列分类代码,用于将多个Excel文件中的时间序列数据分类为正常或异常。
首先,我们需要导入必要的库和模块:
```python
import numpy as np
import pandas as pd
import os
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.metrics import confusion_matrix
```
然后,我们需要定义一些常量和函数:
```python
# 定义常量
SEQUENCE_LENGTH = 50
NORMAL = 0
ANOMALY = 1
# 定义函数
def create_model(sequence_length, n_features):
model = Sequential()
model.add(LSTM(units=64, input_shape=(sequence_length, n_features)))
model.add(Dropout(0.5))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
def get_data(data, sequence_length):
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length])
return np.array(result)
def get_labels(data, sequence_length):
result = []
for index in range(len(data) - sequence_length):
result.append(NORMAL)
return np.array(result)
def get_prediction(model, data):
predictions = model.predict(data)
predictions = predictions.reshape(predictions.shape[0])
return predictions > 0.5
def evaluate(model, X_test, y_test):
y_pred = get_prediction(model, X_test)
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()
accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1_score = 2 * precision * recall / (precision + recall)
return accuracy, precision, recall, f1_score
```
接下来,我们需要遍历每个Excel文件,读取数据并将其转换为模型的输入格式:
```python
# 读取数据
data_path = 'path/to/your/data'
data_files = os.listdir(data_path)
data = []
for file in data_files:
if file.endswith('.xlsx'):
df = pd.read_excel(os.path.join(data_path, file))
data.append(df.values)
# 将数据转换为模型的输入格式
n_features = data[0].shape[1]
X_data = []
y_data = []
for d in data:
X = get_data(d, SEQUENCE_LENGTH)
y = get_labels(d, SEQUENCE_LENGTH)
X_data.append(X)
y_data.append(y)
X_data = np.concatenate(X_data)
y_data = np.concatenate(y_data)
```
然后,我们需要将数据拆分为训练集和测试集:
```python
# 将数据拆分为训练集和测试集
split = int(len(X_data) * 0.8)
X_train = X_data[:split]
y_train = y_data[:split]
X_test = X_data[split:]
y_test = y_data[split:]
```
接下来,我们需要创建模型并训练它:
```python
# 创建模型并训练
model = create_model(SEQUENCE_LENGTH, n_features)
model.fit(X_train, y_train, batch_size=64, epochs=50, validation_split=0.1)
```
最后,我们可以使用测试集评估模型的性能:
```python
# 评估模型
accuracy, precision, recall, f1_score = evaluate(model, X_test, y_test)
print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1 score:', f1_score)
```
这就是一个简单的LSTM时间序列分类代码,用于将多个Excel文件中的时间序列数据分类为正常或异常。你可以根据自己的需求进行修改和优化。
阅读全文