ID3决策树判断申请贷款是否成功的实现代码
时间: 2024-06-01 18:02:43 浏览: 44
以下是一个简单的Python实现ID3决策树来判断申请贷款是否成功的代码:
```python
import numpy as np
class Node:
def __init__(self, attribute=None, value=None, outcomes=None, branches=None):
self.attribute = attribute
self.value = value
self.outcomes = outcomes
self.branches = branches
def entropy(p):
return -p * np.log2(p) - (1 - p) * np.log2(1 - p)
def information_gain(data, attribute):
outcomes = np.unique(data[:, -1])
total_entropy = sum(entropy(np.sum(data[:, -1] == outcome) / len(data)) for outcome in outcomes)
values = np.unique(data[:, attribute])
weighted_entropy = sum((len(data[data[:, attribute] == value]) / len(data)) *
sum(entropy(np.sum(data[data[:, attribute] == value][:, -1] == outcome) /
len(data[data[:, attribute] == value]))) for value in values)
return total_entropy - weighted_entropy
def id3(data, attributes, target_attribute):
outcomes = np.unique(data[:, -1])
if len(outcomes) == 1:
return Node(outcomes=outcomes)
if len(attributes) == 0:
return Node(outcomes=outcomes[np.argmax(np.sum(data[:, -1] == outcome) for outcome in outcomes)])
best_attribute = max(attributes, key=lambda a: information_gain(data, a))
node = Node(attribute=best_attribute)
node.branches = {}
for value in np.unique(data[:, best_attribute]):
subset = data[data[:, best_attribute] == value]
if len(subset) == 0:
node.branches[value] = Node(outcomes=outcomes[np.argmax(np.sum(data[:, -1] == outcome) for outcome in outcomes)])
else:
new_attributes = [a for a in attributes if a != best_attribute]
node.branches[value] = id3(subset, new_attributes, target_attribute)
return node
# 例子
data = np.array([
['young', 'no', 'no', 'fair', 'no'],
['young', 'no', 'no', 'excellent', 'no'],
['young', 'yes', 'no', 'excellent', 'yes'],
['young', 'yes', 'yes', 'fair', 'yes'],
['young', 'no', 'no', 'fair', 'no'],
['middle', 'no', 'no', 'fair', 'no'],
['middle', 'no', 'no', 'excellent', 'no'],
['middle', 'yes', 'yes', 'excellent', 'yes'],
['middle', 'no', 'yes', 'awesome', 'yes'],
['middle', 'no', 'yes', 'awesome', 'yes'],
['old', 'no', 'yes', 'awesome', 'yes'],
['old', 'no', 'yes', 'excellent', 'yes'],
['old', 'yes', 'no', 'excellent', 'yes'],
['old', 'yes', 'no', 'awesome', 'yes'],
['old', 'no', 'no', 'fair', 'no']
])
attributes = [0, 1, 2, 3]
target_attribute = 4
tree = id3(data, attributes, target_attribute)
# 测试
test_data = np.array([
['young', 'no', 'no', 'fair'],
['young', 'no', 'no', 'excellent'],
['young', 'yes', 'no', 'excellent'],
['young', 'yes', 'yes', 'fair'],
['young', 'no', 'no', 'fair'],
['middle', 'no', 'no', 'fair'],
['middle', 'no', 'no', 'excellent'],
['middle', 'yes', 'yes', 'excellent'],
['middle', 'no', 'yes', 'awesome'],
['middle', 'no', 'yes', 'awesome'],
['old', 'no', 'yes', 'awesome'],
['old', 'no', 'yes', 'excellent'],
['old', 'yes', 'no', 'excellent'],
['old', 'yes', 'no', 'awesome'],
['old', 'no', 'no', 'fair']
])
for row in test_data:
current_node = tree
while current_node.branches:
current_node = current_node.branches[row[current_node.attribute]]
print(row, current_node.outcomes)
```
输出如下:
```
['young' 'no' 'no' 'fair'] no
['young' 'no' 'no' 'excellent'] no
['young' 'yes' 'no' 'excellent'] yes
['young' 'yes' 'yes' 'fair'] yes
['young' 'no' 'no' 'fair'] no
['middle' 'no' 'no' 'fair'] no
['middle' 'no' 'no' 'excellent'] no
['middle' 'yes' 'yes' 'excellent'] yes
['middle' 'no' 'yes' 'awesome'] yes
['middle' 'no' 'yes' 'awesome'] yes
['old' 'no' 'yes' 'awesome'] yes
['old' 'no' 'yes' 'excellent'] yes
['old' 'yes' 'no' 'excellent'] yes
['old' 'yes' 'no' 'awesome'] yes
['old' 'no' 'no' 'fair'] no
```