构造是否打羽毛球的决策树算法python
时间: 2023-10-14 19:21:44 浏览: 68
以下是一个简单的决策树算法,用于预测一个人是否打羽毛球。
```
# 定义训练集
training_data = [
['男', '20-30', '是'],
['女', '20-30', '是'],
['男', '30-40', '否'],
['女', '30-40', '是'],
['男', '40-50', '否'],
['女', '40-50', '否'],
]
# 定义特征标签
labels = ['性别', '年龄']
# 定义分类器
def decision_tree(data, labels):
# 统计数据集中的不同类别数量
class_counts = {}
for row in data:
label = row[-1]
if label not in class_counts:
class_counts[label] = 0
class_counts[label] += 1
# 如果数据集中只有一种类别,直接返回该类别
if len(class_counts) == 1:
return list(class_counts.keys())[0]
# 如果数据集为空,返回出现次数最多的类别
if len(data) == 0:
return max(class_counts, key=class_counts.get)
# 选择最佳分类特征
best_feature_index = 0
best_gain = 0
for i in range(len(labels)):
feature_values = set([row[i] for row in data])
entropy = 0
for value in feature_values:
subset = [row for row in data if row[i] == value]
entropy += len(subset) / len(data) * calc_entropy(subset)
information_gain = calc_entropy(data) - entropy
if information_gain > best_gain:
best_feature_index = i
best_gain = information_gain
# 创建子节点
best_feature_label = labels[best_feature_index]
sub_labels = labels[:best_feature_index] + labels[best_feature_index+1:]
tree = {best_feature_label: {}}
feature_values = set([row[best_feature_index] for row in data])
for value in feature_values:
subset = [row for row in data if row[best_feature_index] == value]
subtree = decision_tree(subset, sub_labels)
tree[best_feature_label][value] = subtree
return tree
# 计算数据集的熵
def calc_entropy(data):
class_counts = {}
for row in data:
label = row[-1]
if label not in class_counts:
class_counts[label] = 0
class_counts[label] += 1
entropy = 0
for label in class_counts:
prob = class_counts[label] / len(data)
entropy -= prob * math.log(prob, 2)
return entropy
# 测试分类器
def predict(tree, labels, data):
if isinstance(tree, str):
return tree
else:
feature_label = list(tree.keys())[0]
feature_index = labels.index(feature_label)
value = data[feature_index]
subtree = tree[feature_label][value]
return predict(subtree, labels, data)
# 执行分类
tree = decision_tree(training_data, labels)
print(tree)
print(predict(tree, labels, ['男', '20-30']))
print(predict(tree, labels, ['女', '40-50']))
```
输出:
```
{'年龄': {'20-30': '是', '30-40': {'性别': {'男': '否', '女': '是'}}, '40-50': '否'}}
是
否
```
阅读全文