scipy.sparse.linalg svds
时间: 2024-04-24 14:24:51 浏览: 16
scipy.sparse.linalg.svds是scipy中用于计算稀疏矩阵的奇异值分解(Singular Value Decomposition,SVD)的函数。SVD是一种重要的矩阵分解方法,可以将一个矩阵分解为三个矩阵的乘积,其中一个矩阵是对角矩阵,对角线上的元素称为奇异值。SVD在很多领域都有广泛的应用,例如图像处理、推荐系统、自然语言处理等。scipy.sparse.linalg.svds函数可以用于计算稀疏矩阵的前k个奇异值和对应的奇异向量。
#### 引用[.reference_title]
- *1* *2* *3* [python之scipy库详解](https://blog.csdn.net/RosebudTT/article/details/105979939)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^control,239^v3^insert_chatgpt"}} ] [.reference_item]
[ .reference_list ]
相关问题
import pandas as pd import numpy as np # 计算用户对歌曲的播放比例 triplet_dataset_sub_song_merged_sum_df = triplet_dataset_sub_song_mergedpd[['user', 'listen_count']].groupby('user').sum().reset_index() triplet_dataset_sub_song_merged_sum_df.rename(columns={'listen_count': 'total_listen_count'}, inplace=True) triplet_dataset_sub_song_merged = pd.merge(triplet_dataset_sub_song_mergedpd, triplet_dataset_sub_song_merged_sum_df) triplet_dataset_sub_song_mergedpd['fractional_play_count'] = triplet_dataset_sub_song_mergedpd['listen_count'] / triplet_dataset_sub_song_merged['total_listen_count'] # 将用户和歌曲编码为数字 small_set = triplet_dataset_sub_song_mergedpd user_codes = small_set.user.drop_duplicates().reset_index() song_codes = small_set.song.drop_duplicates().reset_index() user_codes.rename(columns={'index': 'user_index'}, inplace=True) song_codes.rename(columns={'index': 'song_index'}, inplace=True) song_codes['so_index_value'] = list(song_codes.index) user_codes['us_index_value'] = list(user_codes.index) small_set = pd.merge(small_set, song_codes, how='left') small_set = pd.merge(small_set, user_codes, how='left') # 将数据转换为稀疏矩阵形式 from scipy.sparse import coo_matrix mat_candidate = small_set[['us_index_value', 'so_index_value', 'fractional_play_count']] data_array = mat_candidate.fractional_play_count.values row_array = mat_candidate.us_index_value.values col_array = mat_candidate.so_index_value.values data_sparse = coo_matrix((data_array, (row_array, col_array)), dtype=float) # 使用SVD方法进行矩阵分解并进行推荐 from scipy.sparse import csc_matrix from scipy.sparse.linalg import svds import math as mt def compute_svd(urm, K): U, s, Vt = svds(urm, K) dim = (len(s), len(s)) S = np.zeros(dim, dtype=np.float32) for i in range(0, len(s)): S[i, i] = mt.sqrt(s[i]) U = csc_matrix(U, dtype=np.float32) S = csc_matrix(S, dtype=np.float32) Vt = csc_matrix(Vt, dtype=np.float32) return U, S, Vt def compute_estimated_matrix(urm, U, S, Vt, uTest, K, test): rightTerm = S * Vt max_recommendation = 250 estimatedRatings = np.zeros(shape=(MAX_UID, MAX_PID), dtype=np.float16) recomendRatings = np.zeros(shape=(MAX_UID, max_recommendation), dtype=np.float16) for userTest in uTest: prod = U[userTest, :] * rightTerm estimatedRatings[userTest, :] = prod.todense() recomendRatings[userTest, :] = (-estimatedRatings[userTest, :]).argsort()[:max_recommendation] return recomendRatings K = 50 urm = data_sparse MAX_PID = urm.shape[1] MAX_UID = urm.shape[0] U, S, Vt = compute_svd(urm, K) uTest = [4, 5, 6, 7, 8, 73, 23] # uTest=[1b5bb32767963cbc215d27a24fef1aa01e933025] uTest_recommended_items = compute_estimated_matrix(urm, U, S, Vt 继续将这段代码输出完整
, 并进行解释:
```
import pandas as pd
import numpy as np
# 计算用户对歌曲的播放比例
triplet_dataset_sub_song_merged_sum_df = triplet_dataset_sub_song_merged[['user', 'listen_count']].groupby('user').sum().reset_index()
triplet_dataset_sub_song_merged_sum_df.rename(columns={'listen_count': 'total_listen_count'}, inplace=True)
triplet_dataset_sub_song_merged = pd.merge(triplet_dataset_sub_song_merged, triplet_dataset_sub_song_merged_sum_df)
triplet_dataset_sub_song_merged['fractional_play_count'] = triplet_dataset_sub_song_merged['listen_count'] / triplet_dataset_sub_song_merged['total_listen_count']
# 将用户和歌曲编码为数字
small_set = triplet_dataset_sub_song_merged
user_codes = small_set.user.drop_duplicates().reset_index()
song_codes = small_set.song.drop_duplicates().reset_index()
user_codes.rename(columns={'index': 'user_index'}, inplace=True)
song_codes.rename(columns={'index': 'song_index'}, inplace=True)
song_codes['so_index_value'] = list(song_codes.index)
user_codes['us_index_value'] = list(user_codes.index)
small_set = pd.merge(small_set, song_codes, how='left')
small_set = pd.merge(small_set, user_codes, how='left')
# 将数据转换为稀疏矩阵形式
from scipy.sparse import coo_matrix
mat_candidate = small_set[['us_index_value', 'so_index_value', 'fractional_play_count']]
data_array = mat_candidate.fractional_play_count.values
row_array = mat_candidate.us_index_value.values
col_array = mat_candidate.so_index_value.values
data_sparse = coo_matrix((data_array, (row_array, col_array)), dtype=float)
# 使用SVD方法进行矩阵分解并进行推荐
from scipy.sparse import csc_matrix
from scipy.sparse.linalg import svds
import math as mt
def compute_svd(urm, K):
U, s, Vt = svds(urm, K)
dim = (len(s), len(s))
S = np.zeros(dim, dtype=np.float32)
for i in range(0, len(s)):
S[i, i] = mt.sqrt(s[i])
U = csc_matrix(U, dtype=np.float32)
S = csc_matrix(S, dtype=np.float32)
Vt = csc_matrix(Vt, dtype=np.float32)
return U, S, Vt
def compute_estimated_matrix(urm, U, S, Vt, uTest, K, test):
rightTerm = S * Vt
max_recommendation = 250
estimatedRatings = np.zeros(shape=(MAX_UID, MAX_PID), dtype=np.float16)
recomendRatings = np.zeros(shape=(MAX_UID, max_recommendation), dtype=np.float16)
for userTest in uTest:
prod = U[userTest, :] * rightTerm
estimatedRatings[userTest, :] = prod.todense()
recomendRatings[userTest, :] = (-estimatedRatings[userTest, :]).argsort()[:max_recommendation]
return recomendRatings
K = 50
urm = data_sparse
MAX_PID = urm.shape[1]
MAX_UID = urm.shape[0]
U, S, Vt = compute_svd(urm, K)
uTest = [4, 5, 6, 7, 8, 73, 23] # uTest=[1b5bb32767963cbc215d27a24fef1aa01e933025]
uTest_recommended_items = compute_estimated_matrix(urm, U, S, Vt, uTest, K, test)
```
这段代码实现了一个基于SVD方法的推荐系统,具体步骤如下:
1. 读入数据,计算每个用户对每首歌曲的播放比例。
2. 将用户和歌曲编码为数字,转换为稀疏矩阵形式。
3. 使用SVD方法进行矩阵分解,得到用户和歌曲的隐向量。
4. 对于给定的测试用户,使用隐向量和分解后的矩阵计算出该用户对每首歌曲的预测评分。
5. 根据预测评分,为该用户推荐最高的250首歌曲。
其中,SVD方法是一种矩阵分解的方法,可以将一个大矩阵分解为多个小矩阵,这些小矩阵可以表示出原始矩阵中的潜在特征(即隐向量)。通过计算用户和歌曲的隐向量,可以获得它们之间的相似度,从而进行推荐。
from scipy.sparse.linalg import eigsh, LinearOperator from scipy.sparse import isspmatrix, is_pydata_spmatrix class SVDRecommender: def init(self, k=50, ncv=None, tol=0, which='LM', v0=None, maxiter=None, return_singular_vectors=True, solver='arpack'): self.k = k self.ncv = ncv self.tol = tol self.which = which self.v0 = v0 self.maxiter = maxiter self.return_singular_vectors = return_singular_vectors self.solver = solver def svds(self, A): largest = self.which == 'LM' if not largest and self.which != 'SM': raise ValueError("which must be either 'LM' or 'SM'.") if not (isinstance(A, LinearOperator) or isspmatrix(A) or is_pydata_spmatrix(A)): A = np.asarray(A) n, m = A.shape if self.k <= 0 or self.k >= min(n, m): raise ValueError("k must be between 1 and min(A.shape), k=%d" % self.k) if isinstance(A, LinearOperator): if n > m: X_dot = A.matvec X_matmat = A.matmat XH_dot = A.rmatvec XH_mat = A.rmatmat else: X_dot = A.rmatvec X_matmat = A.rmatmat XH_dot = A.matvec XH_mat = A.matmat dtype = getattr(A, 'dtype', None) if dtype is None: dtype = A.dot(np.zeros([m, 1])).dtype else: if n > m: X_dot = X_matmat = A.dot XH_dot = XH_mat = _herm(A).dot else: XH_dot = XH_mat = A.dot X_dot = X_matmat = _herm(A).dot def matvec_XH_X(x): return XH_dot(X_dot(x)) def matmat_XH_X(x): return XH_mat(X_matmat(x)) XH_X = LinearOperator(matvec=matvec_XH_X, dtype=A.dtype, matmat=matmat_XH_X, shape=(min(A.shape), min(A.shape))) eigvals, eigvec = eigsh(XH_X, k=self.k, tol=self.tol ** 2, maxiter=self.maxiter, ncv=self.ncv, which=self.which, v0=self.v0) eigvals = np.maximum(eigvals.real, 0) t = eigvec.dtype.char.lower() factor = {'f': 1E3, 'd': 1E6} cond = factor[t] * np.finfo(t).eps cutoff = cond * np.max(eigvals) above_cutoff = (eigvals > cutoff) nlarge = above_cutoff.sum() nsmall = self.k - nlarge slarge = np.sqrt(eigvals[above_cutoff]) s = np.zeros_like(eigvals) s[:nlarge] = slarge if not self.return_singular_vectors: return np.sort(s) if n > m: vlarge = eigvec[:, above_cutoff] ularge = X_matmat(vlarge) / slarge if self.return_singular_vectors != 'vh' else None vhlarge = _herm(vlarge) else: ularge = eigvec[:, above_cutoff] vhlarge = _herm(X_matmat(ularge) / slarge) if self.return_singular_vectors != 'u' else None u = _augmented_orthonormal_cols(ularge, nsmall) if ularge is not None else None vh = _augmented_orthonormal_rows(vhlarge, nsmall) if vhlarge is not None else None indexes_sorted = np.argsort(s) s = s[indexes_sorted] if u is not None: u = u[:, indexes_sorted] if vh is not None: vh = vh[indexes_sorted] return u, s, vh def _augmented_orthonormal_cols(U, n): if U.shape[0] <= n: return U Q, R = np.linalg.qr(U) return Q[:, :n] def _augmented_orthonormal_rows(V, n): if V.shape[1] <= n: return V Q, R = np.linalg.qr(V.T) return Q[:, :n].T def _herm(x): return np.conjugate(x.T)这段代码中使用的scipy包太旧了,导致会出现报错信息为:cannot import name 'is_pydata_spmatrix' from 'scipy.sparse' (D:\Anaconda\lib\site-packages\scipy\sparse_init.py),将这段代码修改为使用最新版的scipy包
好的,将使用最新版的scipy包进行修改。首先需要将代码中的`is_pydata_spmatrix`替换为`isspmatrix_csr`。同时需要将代码开头的导入语句修改为`from scipy.sparse.linalg import eigsh, LinearOperator from scipy.sparse import isspmatrix_csr`。