X = data['trestbps'].values.reshape(-1, 1)
时间: 2024-04-28 07:26:18 浏览: 8
这段代码是用来从heart.csv数据集中选择'trestbps'这一列,并将其转化为NumPy数组。同时,由于KNN算法的输入数据必须是二维数组,因此使用reshape()函数将一维数组转换为二维数组。其中,参数-1表示自动计算数组的长度,而1表示数组的列数为1,即二维数组中每个元素只有一个特征。这样做是为了将'trestbps'这一特征变成一个二维的NumPy数组,以便于后续的特征缩放和建模。
相关问题
新数据前面多了一列无用的,每列用逗号隔开,改代码data = pd.read_csv('/home/w123/Documents/data-analysis/40-0-data/ratio/40-0-ratio.txt') y = data.iloc[:, :-1].values.reshape(-1, 1) X = data.iloc[:, -1].values.reshape(-1, 1) regressor = LinearRegression() regressor.fit(X, y) y_pred = regressor.predict(X) print("Regression Function: y = {:.2f} + {:.2f}x".format(regressor.intercept_[0], regressor.coef_[0][0])) plt.scatter(X, y, color='blue') plt.plot(X, y_pred, color='red') data2 = pd.read_csv('/home/w123/Documents/data-analysis/40-0-data/ratio/40-5-ratio.txt') y2 = data2.iloc[:, :-1].values.reshape(-1, 1) X2 = data2.iloc[:, -1].values.reshape(-1, 1) regressor2 = LinearRegression() regressor2.fit(X2, y2) y2_pred = regressor2.predict(X2) print("Regression Function: y = {:.2f} + {:.2f}x".format(regressor2.intercept_[0], regressor2.coef_[0][0])) plt.scatter(X2, y2, color='green') plt.plot(X2, y2_pred, color='orange') plt.legend(['Regression Line 2', 'Observations 2']) #3 data3 = pd.read_csv('/home/w123/Documents/data-analysis/40-0-data/ratio/40-10-ratio.txt') y3 = data3.iloc[:, :-1].values.reshape(-1, 1) X3 = data3.iloc[:, -1].values.reshape(-1, 1) regressor3 = LinearRegression() regressor3.fit(X3, y3) y3_pred = regressor3.predict(X3) print("Regression Function: y = {:.2f} + {:.2f}x".format(regressor3.intercept_[0], regressor.coef_[0][0])) plt.scatter(X3, y3, color='purple') plt.plot(X3, y3_pred, color='yellow') plt.title('Linear Regression') plt.xlabel('Independent Variable') plt.ylabel('Dependent Variable') plt.legend(['Regression Line 1', 'Observations 1', 'Regression Line 2', 'Observations 2', 'Regression Line 3', 'Observations 3']) plt.show()
你想问如何处理新数据前面多出来的一列无用的数据,对吗?可以使用`usecols`参数来指定读取哪些列,例如`usecols=[1]`来只读取第二列数据。修改后的代码如下:
```
data = pd.read_csv('/home/w123/Documents/data-analysis/40-0-data/ratio/40-0-ratio.txt', usecols=[1, 2])
y = data.iloc[:, :-1].values.reshape(-1, 1)
X = data.iloc[:, -1].values.reshape(-1, 1)
regressor = LinearRegression()
regressor.fit(X, y)
y_pred = regressor.predict(X)
print("Regression Function: y = {:.2f} + {:.2f}x".format(regressor.intercept_[0], regressor.coef_[0][0]))
plt.scatter(X, y, color='blue')
plt.plot(X, y_pred, color='red')
data2 = pd.read_csv('/home/w123/Documents/data-analysis/40-0-data/ratio/40-5-ratio.txt', usecols=[1, 2])
y2 = data2.iloc[:, :-1].values.reshape(-1, 1)
X2 = data2.iloc[:, -1].values.reshape(-1, 1)
regressor2 = LinearRegression()
regressor2.fit(X2, y2)
y2_pred = regressor2.predict(X2)
print("Regression Function: y = {:.2f} + {:.2f}x".format(regressor2.intercept_[0], regressor2.coef_[0][0]))
plt.scatter(X2, y2, color='green')
plt.plot(X2, y2_pred, color='orange')
plt.legend(['Regression Line 2', 'Observations 2'])
data3 = pd.read_csv('/home/w123/Documents/data-analysis/40-0-data/ratio/40-10-ratio.txt', usecols=[1, 2])
y3 = data3.iloc[:, :-1].values.reshape(-1, 1)
X3 = data3.iloc[:, -1].values.reshape(-1, 1)
regressor3 = LinearRegression()
regressor3.fit(X3, y3)
y3_pred = regressor3.predict(X3)
print("Regression Function: y = {:.2f} + {:.2f}x".format(regressor3.intercept_[0], regressor.coef_[0][0]))
plt.scatter(X3, y3, color='purple')
plt.plot(X3, y3_pred, color='yellow')
plt.title('Linear Regression')
plt.xlabel('Independent Variable')
plt.ylabel('Dependent Variable')
plt.legend(['Regression Line 1', 'Observations 1', 'Regression Line 2', 'Observations 2', 'Regression Line 3', 'Observations 3'])
plt.show()
```
import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression data = pd.read_csv('/home/w123/Documents/fatigue_detecting-master/TXT-data/5.14/2/Eye aspect ratio.txt') y = data.iloc[:, :-1].values.reshape(-1, 1) X = data.iloc[:, -1].values.reshape(-1, 1) regressor = LinearRegression() regressor.fit(X, y) y_pred = regressor.predict(X) print("Regression Function: y = {:.2f} + {:.2f}x".format(regressor.intercept_[0], regressor.coef_[0][0])) plt.scatter(X, y, color='blue') plt.plot(X, y_pred, color='red') plt.title('Linear Regression') plt.xlabel('Independent Variable') plt.ylabel('Dependent Variable') plt.legend(['Regression Line', 'Observations']) # 画第二个图 data2 = pd.read_csv('/home/w123/Documents/fatigue_detecting-master/TXT-data/5.14/2/Another file.txt') y2 = data2.iloc[:, :-1].values.reshape(-1, 1) X2 = data2.iloc[:, -1].values.reshape(-1, 1) regressor2 = LinearRegression() regressor2.fit(X2, y2) y_pred2 = regressor2.predict(X2) print("Regression Function: y = {:.2f} + {:.2f}x".format(regressor2.intercept_[0], regressor2.coef_[0][0])) plt.scatter(X2, y2, color='green') plt.plot(X2, y_pred2, color='orange') plt.legend(['Regression Line 2', 'Observations 2']) plt.show()再加一个文件
你需要将第三个文件的路径传递给程序,然后对其进行类似的处理。以下是修改后的代码:
```
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# 读取第一个文件
data = pd.read_csv('/home/w123/Documents/fatigue_detecting-master/TXT-data/5.14/2/Eye aspect ratio.txt')
y = data.iloc[:, :-1].values.reshape(-1, 1)
X = data.iloc[:, -1].values.reshape(-1, 1)
regressor = LinearRegression()
regressor.fit(X, y)
y_pred = regressor.predict(X)
print("Regression Function 1: y = {:.2f} + {:.2f}x".format(regressor.intercept_[0], regressor.coef_[0][0]))
# 画第一个图
plt.scatter(X, y, color='blue')
plt.plot(X, y_pred, color='red')
# 读取第二个文件
data2 = pd.read_csv('/home/w123/Documents/fatigue_detecting-master/TXT-data/5.14/2/Another file.txt')
y2 = data2.iloc[:, :-1].values.reshape(-1, 1)
X2 = data2.iloc[:, -1].values.reshape(-1, 1)
regressor2 = LinearRegression()
regressor2.fit(X2, y2)
y_pred2 = regressor2.predict(X2)
print("Regression Function 2: y = {:.2f} + {:.2f}x".format(regressor2.intercept_[0], regressor2.coef_[0][0]))
# 画第二个图
plt.scatter(X2, y2, color='green')
plt.plot(X2, y_pred2, color='orange')
# 读取第三个文件
data3 = pd.read_csv('/path/to/third/file.txt')
y3 = data3.iloc[:, :-1].values.reshape(-1, 1)
X3 = data3.iloc[:, -1].values.reshape(-1, 1)
regressor3 = LinearRegression()
regressor3.fit(X3, y3)
y_pred3 = regressor3.predict(X3)
print("Regression Function 3: y = {:.2f} + {:.2f}x".format(regressor3.intercept_[0], regressor3.coef_[0][0]))
# 画第三个图
plt.scatter(X3, y3, color='purple')
plt.plot(X3, y_pred3, color='yellow')
plt.title('Linear Regression')
plt.xlabel('Independent Variable')
plt.ylabel('Dependent Variable')
plt.legend(['Regression Line 1', 'Observations 1', 'Regression Line 2', 'Observations 2', 'Regression Line 3', 'Observations 3'])
plt.show()
```
请将`/path/to/third/file.txt`替换为实际的第三个文件路径。