def preprocess(data, window): data_corr = data.corr()['Power'] key = [] for i in data_corr.keys(): if (data_corr[i] > 0.6): key.append(i) data = data[key] data = data.dropna(subset=['Power']) #数据预处理: 删除功率为空的数据组 data = data.fillna(0) # 数据预处理:将缺失值NAN值赋0 data[data < 0] = 0 #数据预处理:对<0的功率值赋为0 scaler = MinMaxScaler() # data[key] = scaler.fit_transform(data[key].to_numpy()) train_x = [] train_y = [] L = len(data) for i in range(L - window): train_seq = data[key][i:i + window] train_x.append(train_seq) train_label = data['Power'][i + window:i + window + 1] train_y.append(train_label) train_x = torch.from_numpy(np.array(train_x)) train_y = torch.from_numpy(np.array(train_y)) train_x = torch.tensor(train_x, dtype=torch.float) train_y = torch.tensor(train_y, dtype=torch.float) print(train_y.type()) return train_x, train_y
时间: 2024-04-27 18:19:41 浏览: 86
这段代码是一个数据预处理的函数,其中包含以下步骤:
1. 首先计算 DataFrame 数据中 'Power' 列与其他列的相关系数,筛选出相关系数大于 0.6 的列作为特征列;
2. 删除 'Power' 列中包含缺失值(NaN)的行;
3. 将 DataFrame 中的缺失值用 0 填充;
4. 对 DataFrame 中小于 0 的功率值赋为 0;
5. 使用 MinMaxScaler 进行特征缩放;
6. 将数据按照窗口大小 window 进行切分,每个窗口内包含 window 个连续的特征值和一个对应的功率值,作为训练数据;
7. 将训练数据转换为 PyTorch 张量,并返回训练数据集 train_x 和 train_y。
需要注意的是,该函数中使用了 PyTorch 中的张量(tensor)作为数据类型,并且使用了 MinMaxScaler 对特征进行缩放。
相关问题
修改以下代码使其能够输出模型预测结果: def open_image(self): file_dialog = QFileDialog() file_paths, _ = file_dialog.getOpenFileNames(self, "选择图片", "", "Image Files (*.png *.jpg *.jpeg)") if file_paths: self.display_images(file_paths) def preprocess_images(self, image_paths): data_transform = transforms.Compose([ transforms.CenterCrop(150), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) self.current_image_paths = [] images = [] for image_path in image_paths: image = Image.open(image_path) image = data_transform(image) image = torch.unsqueeze(image, dim=0) images.append(image) self.current_image_paths.append(image_path) return images def predict_images(self): if not self.current_image_paths: return for i, image_path in enumerate(self.current_image_paths): image = self.preprocess_image(image_path) output = self.model(image) predicted_class = self.class_dict[output.argmax().item()] self.result_labels[i].setText(f"Predicted Class: {predicted_class}") self.progress_bar.setValue((i+1)*20) def display_images(self, image_paths): for i, image_path in enumerate(image_paths): image = QImage(image_path) image = image.scaled(300, 300, Qt.KeepAspectRatio) if i == 0: self.image_label_1.setPixmap(QPixmap.fromImage(image)) elif i == 1: self.image_label_2.setPixmap(QPixmap.fromImage(image)) elif i == 2: self.image_label_3.setPixmap(QPixmap.fromImage(image)) elif i == 3: self.image_label_4.setPixmap(QPixmap.fromImage(image)) elif i == 4: self.image_label_5.setPixmap(QPixmap.fromImage(image))
请注意,代码中的 preprocess_images 函数和 predict_images 函数中的 self.preprocess_image(image_path) 需要修改为 self.preprocess_images([image_path])。修改后的代码如下:
```
def open_image(self):
file_dialog = QFileDialog()
file_paths, _ = file_dialog.getOpenFileNames(self, "选择图片", "", "Image Files (*.png *.jpg *.jpeg)")
if file_paths:
self.display_images(file_paths)
def preprocess_images(self, image_paths):
data_transform = transforms.Compose([
transforms.CenterCrop(150),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
self.current_image_paths = []
images = []
for image_path in image_paths:
image = Image.open(image_path)
image = data_transform(image)
image = torch.unsqueeze(image, dim=0)
images.append(image)
self.current_image_paths.append(image_path)
return images
def predict_images(self):
if not self.current_image_paths:
return
for i, image_path in enumerate(self.current_image_paths):
image = self.preprocess_images([image_path])
output = self.model(image[0])
predicted_class = self.class_dict[output.argmax().item()]
self.result_labels[i].setText(f"Predicted Class: {predicted_class}")
self.progress_bar.setValue((i+1)*20)
def display_images(self, image_paths):
for i, image_path in enumerate(image_paths):
image = QImage(image_path)
image = image.scaled(300, 300, Qt.KeepAspectRatio)
if i == 0:
self.image_label_1.setPixmap(QPixmap.fromImage(image))
elif i == 1:
self.image_label_2.setPixmap(QPixmap.fromImage(image))
elif i == 2:
self.image_label_3.setPixmap(QPixmap.fromImage(image))
elif i == 3:
self.image_label_4.setPixmap(QPixmap.fromImage(image))
elif i == 4:
self.image_label_5.setPixmap(QPixmap.fromImage(image))
```
gan_args = batch_size, learning_rate, noise_dim, 24, 2, (0, 1), dim def preprocess(data, seq_len): ori_data = data[::-1] scaler = MinMaxScaler().fit(ori_data) ori_data = scaler.transform(ori_data) temp_data = [] for i in range(0, len(ori_data) - seq_len): _x = ori_data[i:i + seq_len] temp_data.append(_x) idx = np.random.permutation(len(temp_data)) data = [] for i in range(len(temp_data)): data.append(temp_data[idx[i]]) return data
这段代码是一个用于数据预处理的函数。它接受两个参数:data和seq_len。data是原始数据,seq_len是序列的长度。
首先,代码将原始数据进行了反转([::-1]),然后使用MinMaxScaler对数据进行归一化处理。归一化后的数据存储在ori_data中。
接下来,代码创建了一个空列表temp_data,并通过循环将长度为seq_len的子序列添加到temp_data中。
然后,通过随机重排列的方式对temp_data进行打乱。这里使用了np.random.permutation函数生成一个打乱顺序的索引数组idx。
最后,通过遍历idx,将打乱后的数据按照新的顺序添加到data列表中。
最终,函数返回data,即经过预处理后的数据。