scipy.sparse.linalg svds

scipy.sparse.linalg.svds是scipy中用于计算稀疏矩阵的奇异值分解（Singular Value Decomposition，SVD）的函数。SVD是一种重要的矩阵分解方法，可以将一个矩阵分解为三个矩阵的乘积，其中一个矩阵是对角矩阵，对角线上的元素称为奇异值。SVD在很多领域都有广泛的应用，例如图像处理、推荐系统、自然语言处理等。scipy.sparse.linalg.svds函数可以用于计算稀疏矩阵的前k个奇异值和对应的奇异向量。 #### 引用[.reference_title] - *1* *2* *3* [python之scipy库详解](https://blog.csdn.net/RosebudTT/article/details/105979939)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^control,239^v3^insert_chatgpt"}} ] [.reference_item] [ .reference_list ]

import pandas as pd import numpy as np # 计算用户对歌曲的播放比例 triplet_dataset_sub_song_merged_sum_df = triplet_dataset_sub_song_mergedpd[['user', 'listen_count']].groupby('user').sum().reset_index() triplet_dataset_sub_song_merged_sum_df.rename(columns={'listen_count': 'total_listen_count'}, inplace=True) triplet_dataset_sub_song_merged = pd.merge(triplet_dataset_sub_song_mergedpd, triplet_dataset_sub_song_merged_sum_df) triplet_dataset_sub_song_mergedpd['fractional_play_count'] = triplet_dataset_sub_song_mergedpd['listen_count'] / triplet_dataset_sub_song_merged['total_listen_count'] # 将用户和歌曲编码为数字 small_set = triplet_dataset_sub_song_mergedpd user_codes = small_set.user.drop_duplicates().reset_index() song_codes = small_set.song.drop_duplicates().reset_index() user_codes.rename(columns={'index': 'user_index'}, inplace=True) song_codes.rename(columns={'index': 'song_index'}, inplace=True) song_codes['so_index_value'] = list(song_codes.index) user_codes['us_index_value'] = list(user_codes.index) small_set = pd.merge(small_set, song_codes, how='left') small_set = pd.merge(small_set, user_codes, how='left') # 将数据转换为稀疏矩阵形式 from scipy.sparse import coo_matrix mat_candidate = small_set[['us_index_value', 'so_index_value', 'fractional_play_count']] data_array = mat_candidate.fractional_play_count.values row_array = mat_candidate.us_index_value.values col_array = mat_candidate.so_index_value.values data_sparse = coo_matrix((data_array, (row_array, col_array)), dtype=float) # 使用SVD方法进行矩阵分解并进行推荐 from scipy.sparse import csc_matrix from scipy.sparse.linalg import svds import math as mt def compute_svd(urm, K): U, s, Vt = svds(urm, K) dim = (len(s), len(s)) S = np.zeros(dim, dtype=np.float32) for i in range(0, len(s)): S[i, i] = mt.sqrt(s[i]) U = csc_matrix(U, dtype=np.float32) S = csc_matrix(S, dtype=np.float32) Vt = csc_matrix(Vt, dtype=np.float32) return U, S, Vt def compute_estimated_matrix(urm, U, S, Vt, uTest, K, test): rightTerm = S * Vt max_recommendation = 250 estimatedRatings = np.zeros(shape=(MAX_UID, MAX_PID), dtype=np.float16) recomendRatings = np.zeros(shape=(MAX_UID, max_recommendation), dtype=np.float16) for userTest in uTest: prod = U[userTest, :] * rightTerm estimatedRatings[userTest, :] = prod.todense() recomendRatings[userTest, :] = (-estimatedRatings[userTest, :]).argsort()[:max_recommendation] return recomendRatings K = 50 urm = data_sparse MAX_PID = urm.shape[1] MAX_UID = urm.shape[0] U, S, Vt = compute_svd(urm, K) uTest = [4, 5, 6, 7, 8, 73, 23] # uTest=[1b5bb32767963cbc215d27a24fef1aa01e933025] uTest_recommended_items = compute_estimated_matrix(urm, U, S, Vt 继续将这段代码输出完整

, 并进行解释： ``` import pandas as pd import numpy as np # 计算用户对歌曲的播放比例 triplet_dataset_sub_song_merged_sum_df = triplet_dataset_sub_song_merged[['user', 'listen_count']].groupby('user').sum().reset_index() triplet_dataset_sub_song_merged_sum_df.rename(columns={'listen_count': 'total_listen_count'}, inplace=True) triplet_dataset_sub_song_merged = pd.merge(triplet_dataset_sub_song_merged, triplet_dataset_sub_song_merged_sum_df) triplet_dataset_sub_song_merged['fractional_play_count'] = triplet_dataset_sub_song_merged['listen_count'] / triplet_dataset_sub_song_merged['total_listen_count'] # 将用户和歌曲编码为数字 small_set = triplet_dataset_sub_song_merged user_codes = small_set.user.drop_duplicates().reset_index() song_codes = small_set.song.drop_duplicates().reset_index() user_codes.rename(columns={'index': 'user_index'}, inplace=True) song_codes.rename(columns={'index': 'song_index'}, inplace=True) song_codes['so_index_value'] = list(song_codes.index) user_codes['us_index_value'] = list(user_codes.index) small_set = pd.merge(small_set, song_codes, how='left') small_set = pd.merge(small_set, user_codes, how='left') # 将数据转换为稀疏矩阵形式 from scipy.sparse import coo_matrix mat_candidate = small_set[['us_index_value', 'so_index_value', 'fractional_play_count']] data_array = mat_candidate.fractional_play_count.values row_array = mat_candidate.us_index_value.values col_array = mat_candidate.so_index_value.values data_sparse = coo_matrix((data_array, (row_array, col_array)), dtype=float) # 使用SVD方法进行矩阵分解并进行推荐 from scipy.sparse import csc_matrix from scipy.sparse.linalg import svds import math as mt def compute_svd(urm, K): U, s, Vt = svds(urm, K) dim = (len(s), len(s)) S = np.zeros(dim, dtype=np.float32) for i in range(0, len(s)): S[i, i] = mt.sqrt(s[i]) U = csc_matrix(U, dtype=np.float32) S = csc_matrix(S, dtype=np.float32) Vt = csc_matrix(Vt, dtype=np.float32) return U, S, Vt def compute_estimated_matrix(urm, U, S, Vt, uTest, K, test): rightTerm = S * Vt max_recommendation = 250 estimatedRatings = np.zeros(shape=(MAX_UID, MAX_PID), dtype=np.float16) recomendRatings = np.zeros(shape=(MAX_UID, max_recommendation), dtype=np.float16) for userTest in uTest: prod = U[userTest, :] * rightTerm estimatedRatings[userTest, :] = prod.todense() recomendRatings[userTest, :] = (-estimatedRatings[userTest, :]).argsort()[:max_recommendation] return recomendRatings K = 50 urm = data_sparse MAX_PID = urm.shape[1] MAX_UID = urm.shape[0] U, S, Vt = compute_svd(urm, K) uTest = [4, 5, 6, 7, 8, 73, 23] # uTest=[1b5bb32767963cbc215d27a24fef1aa01e933025] uTest_recommended_items = compute_estimated_matrix(urm, U, S, Vt, uTest, K, test) ``` 这段代码实现了一个基于SVD方法的推荐系统，具体步骤如下： 1. 读入数据，计算每个用户对每首歌曲的播放比例。 2. 将用户和歌曲编码为数字，转换为稀疏矩阵形式。 3. 使用SVD方法进行矩阵分解，得到用户和歌曲的隐向量。 4. 对于给定的测试用户，使用隐向量和分解后的矩阵计算出该用户对每首歌曲的预测评分。 5. 根据预测评分，为该用户推荐最高的250首歌曲。其中，SVD方法是一种矩阵分解的方法，可以将一个大矩阵分解为多个小矩阵，这些小矩阵可以表示出原始矩阵中的潜在特征（即隐向量）。通过计算用户和歌曲的隐向量，可以获得它们之间的相似度，从而进行推荐。

from scipy.sparse.linalg import eigsh, LinearOperator from scipy.sparse import isspmatrix, is_pydata_spmatrix class SVDRecommender: def init(self, k=50, ncv=None, tol=0, which='LM', v0=None, maxiter=None, return_singular_vectors=True, solver='arpack'): self.k = k self.ncv = ncv self.tol = tol self.which = which self.v0 = v0 self.maxiter = maxiter self.return_singular_vectors = return_singular_vectors self.solver = solver def svds(self, A): largest = self.which == 'LM' if not largest and self.which != 'SM': raise ValueError("which must be either 'LM' or 'SM'.") if not (isinstance(A, LinearOperator) or isspmatrix(A) or is_pydata_spmatrix(A)): A = np.asarray(A) n, m = A.shape if self.k <= 0 or self.k >= min(n, m): raise ValueError("k must be between 1 and min(A.shape), k=%d" % self.k) if isinstance(A, LinearOperator): if n > m: X_dot = A.matvec X_matmat = A.matmat XH_dot = A.rmatvec XH_mat = A.rmatmat else: X_dot = A.rmatvec X_matmat = A.rmatmat XH_dot = A.matvec XH_mat = A.matmat dtype = getattr(A, 'dtype', None) if dtype is None: dtype = A.dot(np.zeros([m, 1])).dtype else: if n > m: X_dot = X_matmat = A.dot XH_dot = XH_mat = _herm(A).dot else: XH_dot = XH_mat = A.dot X_dot = X_matmat = _herm(A).dot def matvec_XH_X(x): return XH_dot(X_dot(x)) def matmat_XH_X(x): return XH_mat(X_matmat(x)) XH_X = LinearOperator(matvec=matvec_XH_X, dtype=A.dtype, matmat=matmat_XH_X, shape=(min(A.shape), min(A.shape))) eigvals, eigvec = eigsh(XH_X, k=self.k, tol=self.tol ** 2, maxiter=self.maxiter, ncv=self.ncv, which=self.which, v0=self.v0) eigvals = np.maximum(eigvals.real, 0) t = eigvec.dtype.char.lower() factor = {'f': 1E3, 'd': 1E6} cond = factor[t] * np.finfo(t).eps cutoff = cond * np.max(eigvals) above_cutoff = (eigvals > cutoff) nlarge = above_cutoff.sum() nsmall = self.k - nlarge slarge = np.sqrt(eigvals[above_cutoff]) s = np.zeros_like(eigvals) s[:nlarge] = slarge if not self.return_singular_vectors: return np.sort(s) if n > m: vlarge = eigvec[:, above_cutoff] ularge = X_matmat(vlarge) / slarge if self.return_singular_vectors != 'vh' else None vhlarge = _herm(vlarge) else: ularge = eigvec[:, above_cutoff] vhlarge = _herm(X_matmat(ularge) / slarge) if self.return_singular_vectors != 'u' else None u = _augmented_orthonormal_cols(ularge, nsmall) if ularge is not None else None vh = _augmented_orthonormal_rows(vhlarge, nsmall) if vhlarge is not None else None indexes_sorted = np.argsort(s) s = s[indexes_sorted] if u is not None: u = u[:, indexes_sorted] if vh is not None: vh = vh[indexes_sorted] return u, s, vh def _augmented_orthonormal_cols(U, n): if U.shape[0] <= n: return U Q, R = np.linalg.qr(U) return Q[:, :n] def _augmented_orthonormal_rows(V, n): if V.shape[1] <= n: return V Q, R = np.linalg.qr(V.T) return Q[:, :n].T def _herm(x): return np.conjugate(x.T)这段代码中使用的scipy包太旧了，导致会出现报错信息为：cannot import name 'is_pydata_spmatrix' from 'scipy.sparse' (D:\Anaconda\lib\site-packages\scipy\sparse_init.py)，将这段代码修改为使用最新版的scipy包

好的，将使用最新版的scipy包进行修改。首先需要将代码中的`is_pydata_spmatrix`替换为`isspmatrix_csr`。同时需要将代码开头的导入语句修改为`from scipy.sparse.linalg import eigsh, LinearOperator from scipy.sparse import isspmatrix_csr`。

scipy.sparse.linalg svds

相关推荐

scikit-sparse:稀疏矩阵工具扩展了scipy.sparse，但许可证不兼容

详解利用Python scipy.signal.filtfilt() 实现信号滤波

python统计函数库scipy.stats的用法解析

基于矩阵分解的模型代码实现

能否用python语言实现稀疏保持重构矩阵的计算

如果不使用sklearn.cross_decomposition库，代码又该怎么写呢？我听说还有个叫稀疏典型相关性分析的方法，你知道吗？这个的代码又该怎么写呢

低秩稀疏矩阵分解代码

使用python实现Latent factor model和Collaborative filtering model

格文纽曼算法解决社区规划问题python实现

Python库中还有哪些可以实现TF-IDF、TextRank和LSA三种算法的程序包,并通过实例实现关键词提取。

张量补全代码python

找出一个运用矩阵分解解决实际问题的实例,并编写程序实现。

稀疏贝叶斯学习的高效DOA估计代码怎么写

给我用python写一段程序，实现用RPCA算法对图像的分类

最新推荐

pre_o_1csdn63m9a1bs0e1rr51niuu33e.a

matlab建立计算力学课程的笔记和文件.zip

FT-Prog-v3.12.38.643-FTD USB 工作模式设定及eprom读写

matlab基于RRT和人工势场法混合算法的路径规划.zip

matlab基于matlab的两步定位软件定义接收机的开源GNSS直接位置估计插件模块.zip

zigbee-cluster-library-specification

管理建模和仿真的文件

实现实时数据湖架构：Kafka与Hive集成

2． 通过python绘制y=e-xsin(2πx)图像

JSBSim Reference Manual

2．通过python绘制y=e-xsin(2πx)图像