### 周志华《机器学习》课后习题解析
#### 训练集与测试集的划分方法
from math import comb
# Calculate the number of ways to choose 350 positive and negative examples from each class respectively.
ways_to_choose_training_set = comb(500, 350)**2
print(f"The total combinations are {ways_to_choose_training_set}")
#### 版本空间的概念应用实例
- 色泽=*, 根蒂=蜷缩, 敲声=* (共4种情况满足)
- 色泽=*, 根蒂=*, 敲声=浊响 (仅适用于一种情形)
- 色泽=*, 根蒂=蜷缩, 敲声=浊响 (同样对应单一状况)
《机器学习》 周志华学习笔记第十四章 概率图模型(课后习题)python实现
14.1 腐蚀图像分割
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import ascent
from scipy.ndimage import grey_erosion
img = ascent() # 载入一张灰度图像
selem = np.ones((50,50)) # 定义一个50x50的结构元素
eroded = grey_erosion(img, footprint=selem) # 使用结构元素进行腐蚀操作
# 显示原图和腐蚀后的图像
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
ax = axes.ravel()
ax[0].imshow(img, cmap=plt.cm.gray)
ax[0].set_title('Original image')
ax[1].imshow(eroded, cmap=plt.cm.gray)
ax[1].set_title('Eroded image')
14.2 高斯混合模型
import numpy as np
from scipy.stats import norm
class GMM:
def __init__(self, n_components, max_iter=100, tol=1e-6):
self.n_components = n_components
self.max_iter = max_iter
self.tol = tol
def fit(self, X):
n_samples, n_features = X.shape
# 初始化参数
self.weights = np.ones(self.n_components) / self.n_components
self.means = X[np.random.choice(n_samples, self.n_components, replace=False)]
self.covs = [np.eye(n_features) for _ in range(self.n_components)]
for i in range(self.max_iter):
# E步,计算每个样本在各分模型下的后验概率
probs = np.zeros((n_samples, self.n_components))
for j in range(self.n_components):
probs[:, j] = self.weights[j] * norm.pdf(X, self.means[j], self.covs[j])
probs /= probs.sum(axis=1, keepdims=True)
# M步,更新参数
weights_new = probs.mean(axis=0)
means_new = np.dot(probs.T, X) / probs.sum(axis=0, keepdims=True).T
covs_new = []
for j in range(self.n_components):
diff = X - means_new[j]
cov_new = np.dot(probs[:, j] * diff.T, diff) / probs[:, j].sum()
self.weights = weights_new
self.means = means_new
self.covs = covs_new
# 判断收敛
if np.abs(weights_new - self.weights).max() < self.tol \
and np.abs(means_new - self.means).max() < self.tol \
and np.abs(covs_new - self.covs).max() < self.tol:
def predict(self, X):
probs = np.zeros((X.shape[0], self.n_components))
for j in range(self.n_components):
probs[:, j] = self.weights[j] * norm.pdf(X, self.means[j], self.covs[j])
return probs.argmax(axis=1)
14.3 隐马尔可夫模型
import numpy as np
class HMM:
def __init__(self, n_states, n_features):
self.n_states = n_states
self.n_features = n_features
def fit(self, X, max_iter=100, tol=1e-6):
n_samples = len(X)
# 初始化参数
self.pi = np.ones(self.n_states) / self.n_states
self.A = np.ones((self.n_states, self.n_states)) / self.n_states
self.B = np.ones((self.n_states, self.n_features)) / self.n_features
for i in range(max_iter):
# E步,计算前向概率和后向概率
alpha = np.zeros((n_samples, self.n_states))
beta = np.zeros((n_samples, self.n_states))
alpha[0] = self.pi * self.B[:, X[0]]
for t in range(1, n_samples):
alpha[t] = np.dot(alpha[t-1], self.A) * self.B[:, X[t]]
beta[-1] = 1
for t in range(n_samples-2, -1, -1):
beta[t] = np.dot(self.A, self.B[:, X[t+1]] * beta[t+1])
gamma = alpha * beta / alpha[-1].sum()
# M步,更新参数
self.pi = gamma[0]
self.A = np.dot(gamma[:-1].T, self.A * self.B[:, X[1:]] * beta[1:]) / gamma[:-1].sum(axis=0).reshape(-1, 1)
self.B = np.zeros((self.n_states, self.n_features))
for k in range(self.n_features):
mask = X == k
self.B[:, k] = gamma[mask].sum(axis=0) / gamma.sum(axis=0)
# 判断收敛
if np.abs(alpha[-1].sum() - 1) < tol:
def predict(self, X):
alpha = np.zeros((len(X), self.n_states))
alpha[0] = self.pi * self.B[:, X[0]]
for t in range(1, len(X)):
alpha[t] = np.dot(alpha[t-1], self.A) * self.B[:, X[t]]
return alpha[-1].argmax()
### 周志华《机器学习》课后习题答案
#### 表1.1中若只包含编号为1和4的两个样例,试给出相应的版本空间
# Python伪代码展示如何枚举可能的假设组合
def generate_version_space():
attributes = ['Sunny', 'Warm', '?', 'Normal', 'Young']
version_space = []
# 枚举满足条件的假设
for sky in ['Sunny', '?']:
for temperature in ['Warm', '?']:
for humidity in ['?', 'High', 'Normal']:
for wind in ['Strong', '?']:
for water in ['Warm', '?']:
for forecast in ['Same', '?']:
hypothesis = [sky, temperature, humidity, wind, water, forecast]
# 判断该假设是否符合给定的正实例
if (hypothesis[:3] == ['Sunny', 'Warm', 'Normal'] or '?' in hypothesis[:3]) and \
(hypothesis[-2:] == ['Warm', 'Same'] or '?' in hypothesis[-2:]):
return version_space
#### 若数据包含噪声,则假设空间中有可能不存在与所有训练样本都一致的假设,在此情形下,试设计一种归纳偏好用于假设选择
面对含噪数据的情况,可以选择引入某种形式的归纳偏置或偏好原则来进行更稳健的学习过程。例如,采用奥卡姆剃刀原理(Occam's Razor),即倾向于选择最简单的解释作为最终模型;或是基于最大似然估计(Maximum Likelihood Estimation)的方法去挑选那些能够最大化观察到的数据概率分布下的参数配置。