帮我用csv的库改写以下代码,而不用pandas库: data = pd.read_csv('.\\csv文件\Louban_0.csv','r', header=None)
时间: 2024-05-25 22:18:06 浏览: 79
import csv
with open('.\\csv文件\Louban_0.csv', 'r') as file:
data = list(csv.reader(file))
# header参数在csv.reader()中没有,如果需要去掉标题行,可以使用data[1:]来获取除标题行以外的数据。
相关问题
import seaborn as sns import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression df = pd.read_csv('heart.csv') corrmat = df.corr() top_corr_features = corrmat.index plt.figure(figsize=(16,16)) sns.heatmap(df[top_corr_features].corr(),annot=True,cmap="RdYlGn") plt.show() sns.set_style('whitegrid') sns.countplot(x='target',data=df,palette='RdBu_r') plt.show() dataset = pd.get_dummies(df, columns=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']) columns_to_scale = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak'] scaler = StandardScaler() dataset[columns_to_scale] = scaler.fit_transform(dataset[columns_to_scale]) dataset.head() y = dataset['target'] X = dataset.drop(['target'], axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) logreg = LogisticRegression() logreg.fit(X_train, y_train) print("Training accuracy: {:.3f}".format(logreg.score(X_train, y_train))) print("Test accuracy: {:.3f}".format(logreg.score(X_test, y_test)))改写为ROC代码
以下是将 Logistic Regression 模型的评估指标改为 ROC 曲线的代码:
```python
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc
df = pd.read_csv('heart.csv')
# 查看特征相关性
corrmat = df.corr()
top_corr_features = corrmat.index
plt.figure(figsize=(16,16))
sns.heatmap(df[top_corr_features].corr(),annot=True,cmap="RdYlGn")
plt.show()
# 查看数据分布
sns.set_style('whitegrid')
sns.countplot(x='target',data=df,palette='RdBu_r')
plt.show()
# 对数据进行 One-hot 编码和标准化
dataset = pd.get_dummies(df, columns=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'])
columns_to_scale = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
dataset[columns_to_scale] = scaler.fit_transform(dataset[columns_to_scale])
dataset.head()
# 划分数据集
y = dataset['target']
X = dataset.drop(['target'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
# 训练模型
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
# 评估模型
y_train_pred = logreg.predict_proba(X_train)[:, 1]
y_test_pred = logreg.predict_proba(X_test)[:, 1]
fpr_train, tpr_train, thresholds_train = roc_curve(y_train, y_train_pred)
fpr_test, tpr_test, thresholds_test = roc_curve(y_test, y_test_pred)
roc_auc_train = auc(fpr_train, tpr_train)
roc_auc_test = auc(fpr_test, tpr_test)
# 绘制 ROC 曲线
plt.figure()
plt.plot(fpr_train, tpr_train, color='darkorange', lw=2, label='Train ROC curve (area = %0.2f)' % roc_auc_train)
plt.plot(fpr_test, tpr_test, color='navy', lw=2, label='Test ROC curve (area = %0.2f)' % roc_auc_test)
plt.plot([0, 1], [0, 1], color='black', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.show()
```
在这个代码中,我们使用 `roc_curve` 函数计算训练集和测试集的 FPR 和 TPR,然后使用 `auc` 函数计算 ROC 曲线下的面积。最后,我们使用 `matplotlib` 库绘制 ROC 曲线。
将冒号后面的代码改写成一个nn.module类:data1 = pd.read_csv("终极1.csv", usecols=[17], encoding='gb18030') df = data1.fillna(method='ffill') data = df.values.reshape(-1, 1) scaler = MinMaxScaler(feature_range=(0, 1)) data = scaler.fit_transform(data) train_size = int(len(data) * 0.8) test_size = len(data) - train_size train, test = data[0:train_size, :], data[train_size:len(data), :] def create_dataset(dataset, look_back=1): dataX, dataY = [], [] for i in range(len(dataset)-look_back-1): a = dataset[i:(i+look_back), 0] dataX.append(a) dataY.append(dataset[i + look_back, 0]) return np.array(dataX), np.array(dataY) look_back = 30 trainX, trainY = create_dataset(train, look_back) testX, testY = create_dataset(test, look_back) trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) model = Sequential() model.add(LSTM(50, input_shape=(1, look_back), return_sequences=True)) model.add(LSTM(50)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(trainX, trainY, epochs=6, batch_size=1, verbose=2) trainPredict = model.predict(trainX) testPredict = model.predict(testX) trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY])
以下是将代码改写为nn.module类的示例:
```python
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
super(LSTMModel, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
out = self.fc(out[:, -1, :])
return out
# 读取数据
data1 = pd.read_csv("终极1.csv", usecols=[17], encoding='gb18030')
df = data1.fillna(method='ffill')
data = df.values.reshape(-1, 1)
# 数据归一化
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data)
# 划分数据集
train_size = int(len(data) * 0.8)
test_size = len(data) - train_size
train, test = data[0:train_size, :], data[train_size:len(data), :]
# 创建数据集
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
look_back = 30
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))
# 模型训练
input_dim = 1
hidden_dim = 50
output_dim = 1
num_layers = 2
model = LSTMModel(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 6
for epoch in range(num_epochs):
outputs = model(trainX)
optimizer.zero_grad()
loss = criterion(outputs, trainY)
loss.backward()
optimizer.step()
if epoch % 1 == 0:
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
# 预测结果
trainPredict = model(trainX)
testPredict = model(testX)
trainPredict = scaler.inverse_transform(trainPredict.detach().numpy())
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict.detach().numpy())
testY = scaler.inverse_transform([testY])
```