csc_matrix包的源码
时间: 2023-11-09 21:05:52 浏览: 123
csc_matrix是SciPy中的一个稀疏矩阵格式,它使用压缩列存储格式(Compressed Sparse Column)来表示稀疏矩阵。下面是csc_matrix包的源码:
```python
class csc_matrix(spmatrix):
def __init__(self, arg1, shape=None, dtype=None, copy=False):
_data_matrix.__init__(self)
if isspmatrix_csc(arg1):
if issubclass(arg1.__class__, self.__class__) and copy:
arg1 = arg1.copy()
self._set_self(arg1)
return
if dtype is None:
dtype = getdtype(float, arg1)
from .coo import coo_matrix
if isspmatrix(arg1):
if shape is None:
shape = arg1.shape
self._set_self(self.__class__((arg1.data, arg1.indices, arg1.indptr),
shape=shape, dtype=dtype, copy=copy))
return
if shape is None:
try:
shape = arg1.shape
except AttributeError:
raise TypeError('expected dimension')
if isintlike(arg1):
n = int(arg1)
self._shape = (n,n)
self._check()
return
if isshape(arg1):
self._shape = tuple(arg1)
self._check()
return
# Now we must have something that we can convert to a csc_matrix
# First convert to coo
try:
arg1 = coo_matrix(arg1, dtype=dtype).tocsc()
except TypeError:
raise ValueError("unrecognized format: {!r}".format(arg1))
self._set_self(arg1)
def _get_row_slice(self, i, cslice):
if i < 0:
M,N = self.shape
i += M
if cslice.step not in (1, None):
raise ValueError('slicing with step != 1 not supported')
start, stop = cslice.start, cslice.stop
if start is None:
start = 0
if stop is None:
stop = self.shape[1]
if i < 0 or i >= self.shape[0]:
raise IndexError('index out of bounds')
if stop <= start:
return array(self.dtype)
indptr = self.indptr
indices = self.indices
startptr, stopptr = indptr[i], indptr[i+1]
start_idx = searchsorted(indices[startptr:stopptr], start)
stop_idx = searchsorted(indices[startptr:stopptr], stop)
if indices[startptr+stop_idx-1] != stop:
stop_idx = stop_idx - 1
num_indices = stop_idx - start_idx
if num_indices == 0:
return array(self.dtype)
idx_dtype = get_index_dtype((indices, indptr), maxval=max(self.shape))
row_data = np.empty(num_indices, dtype=self.dtype)
row_indices = np.empty(num_indices, dtype=idx_dtype)
row_data[:] = self.data[startptr+start_idx: startptr+stop_idx]
row_indices[:] = indices[startptr+start_idx:startptr+stop_idx]
return csc_matrix((row_data, row_indices,
np.array([0, num_indices], dtype=idx_dtype)),
shape=(1, stop-start), dtype=self.dtype)
def _get_col_slice(self, j, rslice):
if j < 0:
M,N = self.shape
j += N
if rslice.step not in (1, None):
raise ValueError('slicing with step != 1 not supported')
start, stop = rslice.start, rslice.stop
if start is None:
start = 0
if stop is None:
stop = self.shape[0]
if j < 0 or j >= self.shape[1]:
raise IndexError('index out of bounds')
if stop <= start:
return array(self.dtype)
indptr = self.indptr
indices = self.indices
data = self.data
i0 = searchsorted(indptr, j, side='left')
i1 = searchsorted(indptr, j+1, side='left')
idx_dtype = get_index_dtype((indices, indptr), maxval=self.shape[0])
row_indices = np.empty(i1-i0, dtype=idx_dtype)
row_data = np.empty(i1-i0, dtype=self.dtype)
row_indices = indices[i0:i1]
row_data = data[i0:i1]
mask = (row_indices >= start) & (row_indices < stop)
row_indices = row_indices[mask] - start
return csc_matrix((row_data[mask], row_indices, np.array([0,len(row_indices)],
dtype=idx_dtype)),
shape=(stop-start, 1), dtype=self.dtype)
def _mul_scalar(self, other):
return self.__class__((self.data * other, self.indices.copy(),
self.indptr.copy()), shape=self.shape,
dtype=self.dtype)
def _mul_vector(self, other):
M, N = self.shape
if other.shape != (N,):
raise ValueError("dimension mismatch")
result = np.zeros(M, dtype=upcast_char(self.dtype.char,
other.dtype.char))
fn = getattr(_sparsetools, self.format + '_vec_mul')
fn(M, N, self.indptr, self.indices, self.data, other, result)
return result
def _mul_multimatrix(self, other):
M, K = self.shape
_, N = other.shape
result = spmatrix(dtype=self.dtype, shape=(M,N))
o_data = other.data
if isspmatrix(other):
fn = getattr(_sparsetools, self.format + '_matmat_pass1')
fn(M, K, N, self.indptr, self.indices,
other.indptr, other.indices, o_data)
fn = getattr(_sparsetools, self.format + '_matmat_pass2')
fn(M, K, N, self.indptr, self.indices, self.data,
other.indptr, other.indices, o_data, result.indptr,
result.indices)
else:
fn = getattr(_sparsetools, self.format + '_matvec')
for j in range(N):
fn(M, K, self.indptr, self.indices, self.data, o_data[:,j],
result.data, j)
result.sum_duplicates()
return result
def _get_dense(self, i, j):
# Short-circuit zero case
if self.nnz == 0:
return np.zeros(self.shape, dtype=self.dtype)[i, j]
M, N = self.shape
if i < 0:
i += M
if j < 0:
j += N
if i < 0 or i >= M or j < 0 or j >= N:
raise IndexError("index out of bounds")
indptr = self.indptr
indices = self.indices
data = self.data
i0 = indptr[j]
i1 = indptr[j+1]
if i0 == i1:
return 0
idx = searchsorted(indices[i0:i1], i) + i0
if idx == i1 or indices[idx] != i:
return 0
return data[idx]
def _get_sparse(self, i, j):
from . import lil_matrix
M, N = self.shape
if i < 0:
i += M
if j < 0:
j += N
if i < 0 or i >= M or j < 0 or j >= N:
raise IndexError("index out of bounds")
indptr = self.indptr
indices = self.indices
i0 = indptr[j]
i1 = indptr[j+1]
data = self.data[i0:i1]
indices = indices[i0:i1]
indptr = np.array([0, len(data)], dtype=idx_dtype)
return lil_matrix((data, indices, indptr), shape=(1, N))
def __eq__(self, other):
return self._eq_dense(other)
def diagonal(self, k=0):
if self.shape[0] != self.shape[1]:
raise ValueError("diagonal is only defined for square matrices")
if k > 0:
n = self.shape[1] - k
indptr = self.indptr[k:]
indices = self.indices[indptr[0]:indptr[-1]]
data = self.data[indptr[0]:indptr[-1]]
else:
n = self.shape[0] + k
indptr = self.indptr[:n+1]
indices = self.indices[indptr[0]:indptr[-1]]
data = self.data[indptr[0]:indptr[-1]]
return csc_matrix((data, indices, indptr), shape=(n,n))
def sum(self, axis=None, dtype=None, out=None):
if dtype is not None and not np.issubdtype(dtype, self.dtype):
raise TypeError('Cannot upcast [%s] to [%s].' %
(self.dtype, dtype))
if axis is None:
return np.asarray(self.data.sum(dtype=dtype), dtype=dtype)
elif axis == 0:
if out is not None:
raise ValueError("output array specified for reductions along axis 0,\
but unsupported for csc_matrix")
ret = np.empty(self.shape[1], dtype=dtype)
for i in range(self.shape[1]):
ret[i] = self.getcol(i).sum(dtype=dtype)
return ret
elif axis == 1:
if out is not None:
raise ValueError("output array specified for reductions along axis 1,\
but unsupported for csc_matrix")
ret = np.empty(self.shape[0], dtype=dtype)
for i in range(self.shape[0]):
ret[i] = self.getrow(i).sum(dtype=dtype)
return ret
else:
raise ValueError("axis out of bounds")
```
csc_matrix的实现主要基于COO格式,因为COO格式在行和列的切片操作上比较高效。除此之外,该源代码还实现了csc_matrix的加、减、乘、除、取负、转置、切片、求逆、求行列式、求特征值和特征向量等方法。
阅读全文