c45决策树算法代码
时间: 2023-11-13 09:41:44 浏览: 100
当然可以!以下是一个使用C4.5决策树算法的简单代码示例:
```python
import math
def entropy(data):
# 计算数据集的熵值
label_counts = {}
for row in data:
label = row[-1]
if label not in label_counts:
label_counts[label] = 0
label_counts[label] += 1
entropy = 0.0
for count in label_counts.values():
probability = count / len(data)
entropy -= probability * math.log2(probability)
return entropy
def split_data(data, attribute, value):
# 根据给定的属性和属性值划分数据集
subset = []
for row in data:
if row[attribute] == value:
reduced_row = row[:attribute]
reduced_row.extend(row[attribute+1:])
subset.append(reduced_row)
return subset
def choose_best_attribute(data):
# 选择最佳划分属性
num_attributes = len(data[0]) - 1
base_entropy = entropy(data)
best_info_gain = 0.0
best_attribute = -1
for i in range(num_attributes):
attribute_values = [row[i] for row in data]
unique_values = set(attribute_values)
new_entropy = 0.0
for value in unique_values:
subset = split_data(data, i, value)
probability = len(subset) / len(data)
new_entropy += probability * entropy(subset)
info_gain = base_entropy - new_entropy
if info_gain > best_info_gain:
best_info_gain = info_gain
best_attribute = i
return best_attribute
def majority_count(labels):
# 计算标签列表中出现次数最多的标签
label_counts = {}
for label in labels:
if label not in label_counts:
label_counts[label] = 0
label_counts[label] += 1
sorted_labels = sorted(label_counts.items(), key=lambda x: x[1], reverse=True)
return sorted_labels[0][0]
def create_decision_tree(data, attributes):
# 创建决策树
labels = [row[-1] for row in data]
if labels.count(labels[0]) == len(labels):
return labels[0]
if len(data[0]) == 1:
return majority_count(labels)
best_attribute = choose_best_attribute(data)
best_attribute_label = attributes[best_attribute]
decision_tree = {best_attribute_label: {}}
del(attributes[best_attribute])
attribute_values = [row[best_attribute] for row in data]
unique_values = set(attribute_values)
for value in unique_values:
subset = split_data(data, best_attribute, value)
decision_tree[best_attribute_label][value] = create_decision_tree(subset, attributes[:])
return decision_tree
# 示例用法
data = [['青年', '否', '否', '一般', '否'],
['青年', '否', '否', '好', '否'],
['青年', '是', '否', '好', '是'],
['青年', '是', '是', '一般', '是'],
['青年', '否', '否', '一般', '否'],
['中年', '否', '否', '一般', '否'],
['中年', '否', '否', '好', '否'],
['中年', '是', '是', '好', '是'],
['中年', '否', '是', '非常好', '是'],
['中年', '否', '是', '非常好', '是'],
['老年', '否', '是', '非常好', '是'],
['老年', '否', '是', '好', '是'],
['老年', '是', '否', '好', '是'],
['老年', '是', '否', '非常好', '是'],
['老年', '否', '否', '一般', '否']]
attributes = ['年龄', '有工作', '有自己的房子', '信贷情况']
decision_tree = create_decision_tree(data, attributes)
print(decision_tree)
```
希望这个简单的示例能帮助你理解C4.5决策树算法的实现!
阅读全文