dataset = data.Global_active_power.values.astype('float32') # Reshape the numpy array into a 2D array with 1 column dataset = np.reshape(dataset, (-1, 1)) # Create an instance of the MinMaxScaler class to scale the values between 0 and 1 scaler = MinMaxScaler(feature_range=(0, 1)) # Fit the MinMaxScaler to the transformed data and transform the values dataset = scaler.fit_transform(dataset) # Split the transformed data into a training set (80%) and a test set (20%)
时间: 2024-03-14 15:49:45 浏览: 74
这段代码是用于数据预处理的,主要包括以下几个步骤:
1. 将原始数据集中的Global_active_power列提取出来,并将数据类型转换为float32。
2. 将提取出来的数据集变成一个二维数组,其中只有一列。
3. 利用MinMaxScaler将数据集中的数值缩放到0-1之间。
4. 将缩放后的数据集按照8:2的比例分割成训练集和测试集。
这些预处理步骤可以帮助我们更好地训练模型和评估模型的性能。
相关问题
修改一下这段代码在pycharm中的实现,import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim #from torchvision import datasets,transforms import torch.utils.data as data #from torch .nn:utils import weight_norm import matplotlib.pyplot as plt from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import f1_score from sklearn.metrics import cohen_kappa_score data_ = pd.read_csv(open(r"C:\Users\zhangjinyue\Desktop\rice.csv"),header=None) data_ = np.array(data_).astype('float64') train_data =data_[:,:520] train_Data =np.array(train_data).astype('float64') train_labels=data_[:,520] train_labels=np.array(train_data).astype('float64') train_data,train_data,train_labels,train_labels=train_test_split(train_data,train_labels,test_size=0.33333) train_data=torch.Tensor(train_data) train_data=torch.LongTensor(train_labels) train_data=train_data.reshape(-1,1,20,26) train_data=torch.Tensor(train_data) train_data=torch.LongTensor(train_labels) train_data=train_data.reshape(-1,1,20,26) start_epoch=1 num_epoch=1 BATCH_SIZE=70 Ir=0.001 classes=('0','1','2','3','4','5') device=torch.device("cuda"if torch.cuda.is_available()else"cpu") torch.backends.cudnn.benchmark=True best_acc=0.0 train_dataset=data.TensorDataset(train_data,train_labels) test_dataset=data.TensorDataset(train_data,train_labels) train_loader=torch.utills.data.DataLoader(dtaset=train_dataset,batch_size=BATCH_SIZE,shuffle=True) test_loader=torch.utills.data.DataLoader(dtaset=train_dataset,batch_size=BATCH_SIZE,shuffle=True)
修改后的代码如下所示:
```python
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, cohen_kappa_score
data_ = pd.read_csv(r"C:\Users\zhangjinyue\Desktop\rice.csv", header=None)
data_ = np.array(data_).astype('float64')
train_data = data_[:, :520]
train_labels = data_[:, 520]
train_data, _, train_labels, _ = train_test_split(train_data, train_labels, test_size=0.33333)
train_data = torch.Tensor(train_data)
train_labels = torch.LongTensor(train_labels)
train_data = train_data.reshape(-1, 1, 20, 26)
start_epoch = 1
num_epoch = 1
BATCH_SIZE = 70
Ir = 0.001
classes = ('0', '1', '2', '3', '4', '5')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True
best_acc = 0.0
train_dataset = data.TensorDataset(train_data, train_labels)
test_dataset = data.TensorDataset(train_data, train_labels)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
```
请注意,我做了以下修改:
1. 移除了不需要的导入语句。
2. 修复了变量名拼写错误。
3. 移除了重复的代码行。
4. 修正了 `torch.utils.data.DataLoader` 的拼写错误。
5. 修正了数据集分割时的变量名错误。
请根据你的实际需求进一步调整代码。
import numpy import numpy as np import matplotlib.pyplot as plt import math import torch from torch import nn from torch.utils.data import DataLoader, Dataset import os os.environ['KMP_DUPLICATE_LIB_OK']='True' dataset = [] for data in np.arange(0, 3, .01): data = math.sin(data * math.pi) dataset.append(data) dataset = np.array(dataset) dataset = dataset.astype('float32') max_value = np.max(dataset) min_value = np.min(dataset) scalar = max_value - min_value print(scalar) dataset = list(map(lambda x: x / scalar, dataset)) def create_dataset(dataset, look_back=3): dataX, dataY = [], [] for i in range(len(dataset) - look_back): a = dataset[i:(i + look_back)] dataX.append(a) dataY.append(dataset[i + look_back]) return np.array(dataX), np.array(dataY) data_X, data_Y = create_dataset(dataset) train_X, train_Y = data_X[:int(0.8 * len(data_X))], data_Y[:int(0.8 * len(data_Y))] test_X, test_Y = data_Y[int(0.8 * len(data_X)):], data_Y[int(0.8 * len(data_Y)):] train_X = train_X.reshape(-1, 1, 3).astype('float32') train_Y = train_Y.reshape(-1, 1, 3).astype('float32') test_X = test_X.reshape(-1, 1, 3).astype('float32') train_X = torch.from_numpy(train_X) train_Y = torch.from_numpy(train_Y) test_X = torch.from_numpy(test_X) class RNN(nn.Module): def __init__(self, input_size, hidden_size, output_size=1, num_layer=2): super(RNN, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.num_layer = num_layer self.rnn = nn.RNN(input_size, hidden_size, batch_first=True) self.linear = nn.Linear(hidden_size, output_size) def forward(self, x): out, h = self.rnn(x) out = self.linear(out[0]) return out net = RNN(3, 20) criterion = nn.MSELoss(reduction='mean') optimizer = torch.optim.Adam(net.parameters(), lr=1e-2) train_loss = [] test_loss = [] for e in range(1000): pred = net(train_X) loss = criterion(pred, train_Y) optimizer.zero_grad() # 反向传播 loss.backward() optimizer.step() if (e + 1) % 100 == 0: print('Epoch:{},loss:{:.10f}'.format(e + 1, loss.data.item())) train_loss.append(loss.item()) plt.plot(train_loss, label='train_loss') plt.legend() plt.show()请适当修改代码,并写出预测值和真实值的代码
以下是适当修改后的代码以及预测值和真实值的代码:
```
import numpy as np
import matplotlib.pyplot as plt
import math
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
dataset = []
for data in np.arange(0, 3, .01):
data = math.sin(data * math.pi)
dataset.append(data)
dataset = np.array(dataset)
dataset = dataset.astype('float32')
max_value = np.max(dataset)
min_value = np.min(dataset)
scalar = max_value - min_value
print(scalar)
dataset = list(map(lambda x: x / scalar, dataset))
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset) - look_back):
a = dataset[i:(i + look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
data_X, data_Y = create_dataset(dataset)
train_X, train_Y = data_X[:int(0.8 * len(data_X))], data_Y[:int(0.8 * len(data_Y))]
test_X, test_Y = data_Y[int(0.8 * len(data_X)):], data_Y[int(0.8 * len(data_Y)):]
train_X = train_X.reshape(-1, 1, 3).astype('float32')
train_Y = train_Y.reshape(-1, 1, 1).astype('float32')
test_X = test_X.reshape(-1, 1, 3).astype('float32')
train_X = torch.from_numpy(train_X)
train_Y = torch.from_numpy(train_Y)
test_X = torch.from_numpy(test_X)
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size=1, num_layer=2):
super(RNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.num_layer = num_layer
self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, x):
out, h = self.rnn(x)
out = self.linear(out[:, -1, :])
return out
net = RNN(3, 20)
criterion = nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)
train_loss = []
test_loss = []
for e in range(1000):
pred = net(train_X)
loss = criterion(pred, train_Y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (e + 1) % 100 == 0:
print('Epoch:{},loss:{:.10f}'.format(e + 1, loss.data.item()))
train_loss.append(loss.item())
plt.plot(train_loss, label='train_loss')
plt.legend()
plt.show()
# 预测值和真实值的代码
net.eval()
pred_test = net(test_X)
pred_test = pred_test.detach().numpy() * scalar
test_Y = test_Y.reshape(-1, 1) * scalar
plt.plot(pred_test, label='predict')
plt.plot(test_Y, label='true')
plt.legend()
plt.show()
```
阅读全文