import pandas as pd import numpy as np import scipy.stats as stats import seaborn as sns from sklearn.metrics import RocCurveDisplay from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix,accuracy_score from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn import tree from sklearn.decomposition import PCA import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeClassifier请在此基础上续写代码块,要求是(1) 读入数据后,选取自变量"sysBP", "diaBP","age","totChol","BMI", "heartRate", "glucose"记为X,因变量"TenYearCHD"记为y,组成新的数据集。¶
import pandas as pd import numpy as np import scipy.stats as stats import seaborn as sns from sklearn.metrics import RocCurveDisplay from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix,accuracy_score from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn import tree from sklearn.decomposition import PCA import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeClassifier
读入数据
data = pd.read_csv("heart.csv")
选取自变量
X = data[["sysBP", "diaBP", "age", "totChol", "BMI", "heartRate", "glucose"]]
因变量
y = data["TenYearCHD"]
组成新的数据集
new_data = pd.concat([X, y], axis=1)
打印新的数据集
print(new_data.head())
相关推荐


















