data_list1 = data1.values.tolist() print(data_list1) x = data_list1 # 横坐标值 # 绘制换手率散点图 plt.scatter(x, hs) plt.rcParams['font.sans-serif'] = ['FangSong'] plt.title('换手率散点图') # 添加坐标轴标题 plt.ylabel('换手率%') plt.xticks(rotation=90) # 设定坐标轴限制 plt.ylim(0, 101) plt.legend('', title='2008030101陈冰钿') plt.show()
时间: 2023-12-27 20:04:55 浏览: 63
这段代码主要用于绘制散点图,其中:
- `data1` 是一个 pandas.DataFrame 类型的数据,通过 `values.tolist()` 将其转换为列表类型 `data_list1`;
- `x` 取 `data_list1`,用于绘制横坐标;
- `hs` 是纵坐标的值;
- `plt.scatter(x, hs)` 绘制散点图;
- `plt.rcParams['font.sans-serif'] = ['FangSong']` 设置绘图字体为仿宋;
- `plt.title('换手率散点图')` 设置图表标题;
- `plt.ylabel('换手率%')` 设置纵坐标标题;
- `plt.xticks(rotation=90)` 将横坐标标签旋转90度;
- `plt.ylim(0, 101)` 设置纵坐标轴限制;
- `plt.legend('', title='2008030101陈冰钿')` 添加图例,其中 `''` 表示图例文本为空,`title` 参数为图例标题;
- `plt.show()` 显示图表。
需要注意的是,这段代码中没有给出 `hs` 的定义,可能在其他部分的代码中给出了。
相关问题
写一段python代码,读取一个csv文件,该文件没有表头,第一列为时间编号,编号以1,2,3……这样自然数的形式出现,第二列为充电量数值,将该数据集的前90%划分为训练集,其余部分设置为测试集,利用L1/L2正则化改进的lstm模型预测该时间序列,并打印rmse作为评价指标,并分别绘制训练集的真实值和预测值图片以及测试集的真实值和预测值图片,两张图片的横坐标单位为time/h,纵坐标单位为kwh
好的,下面是代码实现,需要安装pandas、numpy、matplotlib、sklearn、tensorflow等库:
```python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras import regularizers
# 读取csv文件
data = pd.read_csv('data.csv', header=None)
values = data.values[:, 1].astype('float32')
# 数据归一化
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values.reshape(-1, 1))
# 划分训练集和测试集
train_size = int(len(scaled) * 0.9)
train, test = scaled[:train_size, :], scaled[train_size:, :]
# 将序列数据转换为监督学习数据
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = [], []
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
agg = pd.concat(cols, axis=1)
agg.columns = names
if dropnan:
agg.dropna(inplace=True)
return agg
n_lag = 3
n_features = 1
reframed = series_to_supervised(scaled, n_lag, 1)
# 划分输入和输出
values = reframed.values
train_X, train_y = values[:train_size, :-1], values[:train_size, -1]
test_X, test_y = values[train_size:, :-1], values[train_size:, -1]
# 转换成3D格式[Lag, Samples, Features]
train_X = train_X.reshape((train_X.shape[0], n_lag, n_features))
test_X = test_X.reshape((test_X.shape[0], n_lag, n_features))
# 构建LSTM模型
model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2]),
kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01)))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
# 训练模型
history = model.fit(train_X, train_y, epochs=50, batch_size=72,
validation_data=(test_X, test_y), verbose=2, shuffle=False)
# 预测测试集
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], n_lag * n_features))
# 反归一化
inv_yhat = scaler.inverse_transform(yhat)
inv_y = scaler.inverse_transform(test_y.reshape(-1, 1))
# 计算rmse
rmse = np.sqrt(np.mean((inv_y - inv_yhat)**2))
print('Test RMSE: %.3f' % rmse)
# 绘制训练集和测试集预测结果
train_predict = model.predict(train_X)
train_predict = scaler.inverse_transform(train_predict)
train_actual = scaler.inverse_transform([train_y])
test_predict = scaler.inverse_transform(yhat)
test_actual = scaler.inverse_transform(test_y.reshape(-1, 1))
plt.figure(figsize=(10, 6))
plt.subplot(2, 1, 1)
plt.plot(train_actual.flatten(), label='Train Actual')
plt.plot(train_predict.flatten(), label='Train Predict')
plt.legend()
plt.subplot(2, 1, 2)
plt.plot(test_actual.flatten(), label='Test Actual')
plt.plot(test_predict.flatten(), label='Test Predict')
plt.legend()
plt.show()
```
注:该代码适用于只有一列数据的csv文件,如果有多列数据需要进行修改。
其中,L1正则化和L2正则化分别通过`regularizers.l1_l2(l1=0.01, l2=0.01)`实现,参数`l1`和`l2`分别表示L1正则化和L2正则化的权重。RMSE是评价指标,通过`np.sqrt(np.mean((inv_y - inv_yhat)**2))`计算,其中`inv_y`和`inv_yhat`分别表示真实值和预测值的反归一化结果。训练集和测试集的真实值和预测值图片通过matplotlib库绘制。
写一段python代码,读取一个csv文件,该文件没有表头,第一列为时间编号,编号以1,2,3……这样自然数的形式出现,第二列为充电量数值,将该数据集的前90%划分为训练集,其余部分设置为测试集,采用简单的遗传算法对lstm模型进行优化,并基于优化后的模型对时间序列进行预测,并打印rmse作为评价指标,将训练集和测试集分别绘制在两张图上,两张图片的横坐标单位为time/h,纵坐标单位为kwh
以下是一段读取csv文件、划分训练集和测试集、使用遗传算法优化LSTM模型并对时间序列进行预测的Python代码。需要使用pytorch和pandas库。
```python
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
from deap import algorithms, base, creator, tools
# 读取csv文件
df = pd.read_csv('data.csv', header=None)
# 划分训练集和测试集
train_size = int(len(df) * 0.9)
train_data, test_data = df[:train_size], df[train_size:]
# 定义LSTM模型
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, input):
lstm_out, _ = self.lstm(input.view(len(input), 1, -1))
y_pred = self.linear(lstm_out.view(len(input), -1))
return y_pred[-1]
# 定义遗传算法
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register("attr_float", np.random.uniform, low=0, high=1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=3)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
def evaluate(individual):
input_size = int(individual[0] * len(train_data))
hidden_size = int(individual[1] * 100)
output_size = int(individual[2] * 10)
lstm = LSTM(input_size, hidden_size, output_size)
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.01)
loss_fn = nn.MSELoss()
for epoch in range(100):
optimizer.zero_grad()
y_pred = lstm(train_data[1][:input_size])
loss = loss_fn(y_pred, train_data[1][1:input_size+1])
loss.backward()
optimizer.step()
y_true = test_data[1].tolist()
y_pred = [lstm(test_data[1][:i+1]).item() for i in range(len(test_data))]
rmse = np.sqrt(np.mean((np.array(y_true) - np.array(y_pred)) ** 2))
return rmse,
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.1, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)
# 运行遗传算法
random.seed(1)
pop = toolbox.population(n=10)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("min", np.min)
pop, log = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=10, halloffame=hof, stats=stats)
# 训练优化后的模型并预测时间序列
lstm = LSTM(int(hof[0][0] * len(df)), int(hof[0][1] * 100), int(hof[0][2] * 10))
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.01)
loss_fn = nn.MSELoss()
for epoch in range(100):
optimizer.zero_grad()
y_pred = lstm(df[1][:train_size])
loss = loss_fn(y_pred, df[1][1:train_size+1])
loss.backward()
optimizer.step()
y_true = df[1][train_size:].tolist()
y_pred = [lstm(df[1][:i+train_size]).item() for i in range(len(df)-train_size)]
rmse = np.sqrt(np.mean((np.array(y_true) - np.array(y_pred)) ** 2))
print('RMSE: %f' % rmse)
# 绘制训练集和测试集的时间序列
import matplotlib.pyplot as plt
plt.plot(train_data[0], train_data[1], label='Train')
plt.plot(test_data[0], test_data[1], label='Test')
plt.xlabel('Time/h')
plt.ylabel('kwh')
plt.legend()
plt.show()
```
阅读全文