Python实现EM算法,并求解最优算法
时间: 2023-07-07 22:31:44 浏览: 162
python em算法的实现
EM算法是一种迭代算法,用于求解含有隐变量的概率模型的最大似然估计。下面是一个Python实现的EM算法,并求解最优算法的示例代码:
```python
import numpy as np
class EMAlgorithm:
def __init__(self, n_components, max_iter=100, tol=1e-4):
self.n_components = n_components
self.max_iter = max_iter
self.tol = tol
def init_params(self, X):
# 初始化模型参数
self.pi = np.ones(self.n_components) / self.n_components
self.mu = np.random.randn(self.n_components, X.shape[1])
self.sigma = np.array([np.eye(X.shape[1]) for i in range(self.n_components)])
def e_step(self, X):
# E步骤,计算每个样本属于每个高斯分布的后验概率
log_prob = np.zeros((X.shape[0], self.n_components))
for i in range(self.n_components):
log_prob[:, i] = np.log(self.pi[i]) + self.log_gaussian(X, self.mu[i], self.sigma[i])
log_prob -= np.max(log_prob, axis=1, keepdims=True)
prob = np.exp(log_prob)
prob /= np.sum(prob, axis=1, keepdims=True)
return prob
def m_step(self, X, prob):
# M步骤,更新模型参数
Nk = np.sum(prob, axis=0)
self.pi = Nk / np.sum(Nk)
for i in range(self.n_components):
self.mu[i] = np.sum(X * prob[:, i][:, np.newaxis], axis=0) / Nk[i]
X_centered = X - self.mu[i]
self.sigma[i] = np.dot(prob[:, i] * X_centered.T, X_centered) / Nk[i]
def log_gaussian(self, X, mu, sigma):
# 计算多元高斯分布的对数概率密度
n_features = X.shape[1]
log_det = np.log(np.linalg.det(sigma))
log_norm = -0.5 * n_features * np.log(2 * np.pi) - 0.5 * log_det
X_centered = X - mu
sigma_inv = np.linalg.inv(sigma)
log_exp = -0.5 * np.sum(np.dot(X_centered, sigma_inv) * X_centered, axis=1)
return log_norm + log_exp
def fit(self, X):
self.init_params(X)
for i in range(self.max_iter):
prob = self.e_step(X)
self.m_step(X, prob)
log_likelihood = np.sum(np.log(np.sum(prob, axis=1)))
if i > 0 and abs(log_likelihood - prev_log_likelihood) < self.tol:
break
prev_log_likelihood = log_likelihood
def predict(self, X):
# 预测每个样本属于哪个高斯分布
log_prob = np.zeros((X.shape[0], self.n_components))
for i in range(self.n_components):
log_prob[:, i] = np.log(self.pi[i]) + self.log_gaussian(X, self.mu[i], self.sigma[i])
return np.argmax(log_prob, axis=1)
```
接下来,我们可以使用EM算法来对一个二元高斯混合模型进行拟合,求解最优算法:
```python
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
# 创建数据集
X, y = make_blobs(n_samples=300, centers=2, random_state=0)
# 拟合二元高斯混合模型
em = EMAlgorithm(2)
em.fit(X)
# 绘制决策边界
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
Z = em.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.8)
plt.show()
```
运行以上代码,即可得到最优算法的结果:二元高斯混合模型的拟合结果和决策边界图。
阅读全文