fit_time = time.time()
时间: 2024-06-21 18:03:14 浏览: 3
fit_time = time.time() 是一个Python语言中的时间函数,用于获取当前时间戳。它返回的是一个浮点型数值,表示当前时间与 Unix 时间戳(1970年1月1日00:00:00 UTC)之间的时间差(单位为秒)。在机器学习中,通常用 fit_time 来计算模型的训练时间。通过记录模型训练开始的时间和结束的时间,可以计算出模型的训练时间。
相关问题
import numpy as np import matplotlib.pyplot as plt %matplotlib inline from sklearn.datasets import load_digits data, labels = load_digits(return_X_y=True) (n_samples, n_features), n_digits = data.shape, np.unique(labels).size print(f"# 类别数: {n_digits}; # 样本数: {n_samples}; # 特征数: {n_features}") print(data[:2]) from time import time from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans kmeans=KMeans(n_clusters=10, random_state=42) ### 创建管道并训练,记录训练时间 t0 = time() estimator = make_pipeline(StandardScaler(), kmeans).fit(data) fit_time = time() - t0 print("训练时间:", fit_time) ### 通过惯性(inertia)聚类的性能 print(estimator) print(estimator[-1].inertia_) result1={"fit-time":fit_time,"inertia:":estimator[-1].inertia_ } from sklearn.decomposition import PCA ### ??编程使用PCA分解,得到10个主成分,放到变量 components 中--------------------------- pca = PCA(n_components=10) components = pca.fit_transform(data) ###------------------------------------------------------------------------- ### 创建KMeas对象 kmeans=KMeans(n_clusters=10, init="k-means++", random_state=42) ### 创建管道并训练,记录训练时间 t0 = time() estimator = make_pipeline(StandardScaler(), kmeans).fit(data) fit_time = time() - t0 print("训练时间:", fit_time) ### 通过惯性(inertia)聚类的性能 print(estimator) print(estimator[-1].inertia_) result2={"fit-time":fit_time,"inertia:":estimator[-1].inertia_ } from sklearn.decomposition import PCA ### ??编程 选择保持 98%的信息的PCA模型,用名为pca的变量表示 ---------- pca = PCA(n_components=0.98) ###------------------------------------------------------------------- ###创建KMeas对象 kmeans=KMeans(n_clusters=10, random_state=42) ###??编程 创建一个 标准化+PCA降维+KMeas聚类的管道并训练,记录训练时间 t0 = time() estimator = make_pipeline(StandardScaler(), pca, kmeans).fit(data) ##增加pca预处理 fit_time = time() - t0 print("训练时间:", fit_time) ### 通过惯性(inertia)聚类的性能 print(estimator) print(estimator[-1].inertia_) result3={"fit-time":fit_time,"inertia:":estimator[-1].inertia_ }可以选择不同的KMeans的参数对digits进行聚类,比较实验结果,并选择一个针对此问题的最好模型
这份代码是用数字数据集(load_digits)进行 KMeans 聚类和 PCA 降维后再聚类的实验,可以通过调整 KMeans 的参数来比较不同实验结果,最终选择最好的模型。
第一部分是载入数据集并展示基本信息:
```python
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_digits
data, labels = load_digits(return_X_y=True)
(n_samples, n_features), n_digits = data.shape, np.unique(labels).size
print(f"# 类别数: {n_digits}; # 样本数: {n_samples}; # 特征数: {n_features}")
print(data[:2])
```
第二部分是使用 KMeans 进行聚类,并计算惯性(inertia):
```python
from time import time
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
kmeans=KMeans(n_clusters=10, random_state=42)
### 创建管道并训练,记录训练时间
t0 = time()
estimator = make_pipeline(StandardScaler(), kmeans).fit(data)
fit_time = time() - t0
print("训练时间:", fit_time)
### 通过惯性(inertia)聚类的性能
print(estimator)
print(estimator[-1].inertia_)
result1={"fit-time":fit_time,"inertia:":estimator[-1].inertia_}
```
第三部分是使用 PCA 进行降维,并使用 KMeans 进行聚类:
```python
from sklearn.decomposition import PCA
### 编程使用PCA分解,得到10个主成分,放到变量 components 中
pca = PCA(n_components=10)
components = pca.fit_transform(data)
### 创建KMeas对象
kmeans=KMeans(n_clusters=10, init="k-means++", random_state=42)
### 创建管道并训练,记录训练时间
t0 = time()
estimator = make_pipeline(StandardScaler(), kmeans).fit(data)
fit_time = time() - t0
print("训练时间:", fit_time)
### 通过惯性(inertia)聚类的性能
print(estimator)
print(estimator[-1].inertia_)
result2={"fit-time":fit_time,"inertia:":estimator[-1].inertia_}
```
第四部分是选择保持 98% 信息的 PCA 模型,并使用 KMeans 进行聚类:
```python
from sklearn.decomposition import PCA
### 编程选择保持 98%的信息的PCA模型,用名为pca的变量表示
pca = PCA(n_components=0.98)
### 创建KMeas对象
kmeans=KMeans(n_clusters=10, random_state=42)
### 编程创建一个 标准化+PCA降维+KMeas聚类的管道并训练,记录训练时间
t0 = time()
estimator = make_pipeline(StandardScaler(), pca, kmeans).fit(data)
fit_time = time() - t0
print("训练时间:", fit_time)
### 通过惯性(inertia)聚类的性能
print(estimator)
print(estimator[-1].inertia_)
result3={"fit-time":fit_time,"inertia:":estimator[-1].inertia_}
```
最后,可以通过比较不同实验结果,选择最好的模型。
import time import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import MiniBatchKMeans, KMeans from sklearn.metrics.pairwise import pairwise_distances_argmin from sklearn.datasets import make_blobs # Generate sample data np.random.seed(0) batch_size = 45 centers = [[1, 1], [-1, -1], [1, -1]] n_clusters = len(centers) X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7) # Compute clustering with Means k_means = KMeans(init='k-means++', n_clusters=3, n_init=10) t0 = time.time() k_means.fit(X) t_batch = time.time() - t0 # Compute clustering with MiniBatchKMeans mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, batch_size=batch_size, n_init=10, max_no_improvement=10, verbose=0) t0 = time.time() mbk.fit(X) t_mini_batch = time.time() - t0 # Plot result fig = plt.figure(figsize=(8, 3)) fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9) colors = ['#4EACC5', '#FF9C34', '#4E9A06'] # We want to have the same colors for the same cluster from the # MiniBatchKMeans and the KMeans algorithm. Let's pair the cluster centers per # closest one. k_means_cluster_centers = k_means.cluster_centers_ order = pairwise_distances_argmin(k_means.cluster_centers_, mbk.cluster_centers_) mbk_means_cluster_centers = mbk.cluster_centers_[order] k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers) mbk_means_labels = pairwise_distances_argmin(X, mbk_means_cluster_centers) # KMeans for k, col in zip(range(n_clusters), colors): my_members = k_means_labels == k cluster_center = k_means_cluster_centers[k] plt.plot(X[my_members, 0], X[my_members, 1], 'w', markerfacecolor=col, marker='.') plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) plt.title('KMeans') plt.xticks(()) plt.yticks(()) plt.show() 这段代码每一句在干什么
这段代码主要是使用Scikit-learn库中的KMeans和MiniBatchKMeans算法对生成的样本数据进行聚类,并通过可视化方式展示聚类结果。
具体每一句代码的作用如下:
1. 导入所需的库和模块
```
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.metrics.pairwise import pairwise_distances_argmin
from sklearn.datasets import make_blobs
```
2. 生成样本数据
```
batch_size = 45
centers = [[1, 1], [-1, -1], [1, -1]]
n_clusters = len(centers)
# 生成样本数据
X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7)
```
3. 使用KMeans算法进行聚类
```
# 初始化KMeans模型
k_means = KMeans(init='k-means++', n_clusters=3, n_init=10)
# 训练KMeans模型
t0 = time.time()
k_means.fit(X)
t_batch = time.time() - t0
```
4. 使用MiniBatchKMeans算法进行聚类
```
# 初始化MiniBatchKMeans模型
mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, batch_size=batch_size,
n_init=10, max_no_improvement=10, verbose=0)
# 训练MiniBatchKMeans模型
t0 = time.time()
mbk.fit(X)
t_mini_batch = time.time() - t0
```
5. 可视化聚类结果
```
# 可视化KMeans聚类结果
fig = plt.figure(figsize=(8, 3))
fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
colors = ['#4EACC5', '#FF9C34', '#4E9A06']
k_means_cluster_centers = k_means.cluster_centers_
order = pairwise_distances_argmin(k_means.cluster_centers_, mbk.cluster_centers_)
mbk_means_cluster_centers = mbk.cluster_centers_[order]
k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)
mbk_means_labels = pairwise_distances_argmin(X, mbk_means_cluster_centers)
for k, col in zip(range(n_clusters), colors):
my_members = k_means_labels == k
cluster_center = k_means_cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], 'w',
markerfacecolor=col, marker='.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=6)
plt.title('KMeans')
plt.xticks(())
plt.yticks(())
plt.show()
```
以上就是这段代码的每一句话的作用。
相关推荐
![rar](https://img-home.csdnimg.cn/images/20210720083606.png)
![rar](https://img-home.csdnimg.cn/images/20210720083606.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)