使用python实现id3算法
时间: 2023-11-11 09:02:31 浏览: 30
好的,下面是使用Python实现ID3算法的代码:
```python
import pandas as pd
import numpy as np
import math
class Node:
def __init__(self, feature=None, threshold=None, left=None, right=None, value=None, predict=None):
self.feature = feature
self.threshold = threshold
self.left = left
self.right = right
self.value = value
self.predict = predict
class DecisionTree:
def __init__(self):
self.tree = None
def fit(self, X, y):
self.tree = build_tree(X, y)
def predict(self, X):
return [predict_one(x, self.tree) for x in X]
def build_tree(X, y):
if same_class(y):
return Node(predict=y[0])
feature, threshold = select_feature(X, y)
left_idx = X[:, feature] <= threshold
right_idx = X[:, feature] > threshold
left = build_tree(X[left_idx], y[left_idx])
right = build_tree(X[right_idx], y[right_idx])
return Node(feature=feature, threshold=threshold, left=left, right=right)
def predict_one(x, tree):
if tree.predict is not None:
return tree.predict
if x[tree.feature] <= tree.threshold:
return predict_one(x, tree.left)
else:
return predict_one(x, tree.right)
def same_class(y):
return len(set(y)) == 1
def entropy(y):
_, counts = np.unique(y, return_counts=True)
p = counts / len(y)
return -np.sum(p * np.log2(p))
def select_feature(X, y):
best_feature, best_threshold, best_gain = None, None, 0
for feature in range(X.shape[1]):
thresholds = np.unique(X[:, feature])
for threshold in thresholds:
left_idx = X[:, feature] <= threshold
left_y = y[left_idx]
right_y = y[~left_idx]
if len(left_y) == 0 or len(right_y) == 0:
continue
gain = entropy(y) - (len(left_y) / len(y)) * entropy(left_y) - (len(right_y) / len(y)) * entropy(right_y)
if gain > best_gain:
best_feature = feature
best_threshold = threshold
best_gain = gain
return best_feature, best_threshold
```
以上是Python实现的ID3算法的代码。