决策树python代码
时间: 2023-09-25 15:15:53 浏览: 41
以下是一个使用Python实现的决策树的示例代码:
```python
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 创建决策树模型
clf = DecisionTreeClassifier()
# 训练模型
clf.fit(X_train, y_train)
# 预测
y_pred = clf.predict(X_test)
# 输出准确率
print("Accuracy:", clf.score(X_test, y_test))
```
以上代码使用sklearn库中的决策树分类器来实现决策树模型。首先加载Iris数据集,然后将其划分为训练集和测试集。接着创建一个决策树分类器对象clf,并使用训练集拟合模型。最后,使用测试集进行预测,并输出准确率。
相关问题
决策树PYTHON代码
以下是一个基本的决策树Python代码:
```python
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
# 加载数据
data = pd.read_csv("data.csv")
# 定义特征和目标变量
X = data[['feature1', 'feature2', 'feature3', 'feature4']]
y = data['target']
# 将数据拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
# 创建决策树分类器
clf = DecisionTreeClassifier()
# 训练模型
clf = clf.fit(X_train, y_train)
# 预测测试集结果
y_pred = clf.predict(X_test)
# 评估模型性能
print("准确率:",metrics.accuracy_score(y_test, y_pred))
```
上述代码假设数据文件名为“data.csv”,其中包含特征列“feature1”、“feature2”、“feature3”和“feature4”,以及目标列“target”。该代码使用sklearn库中的DecisionTreeClassifier类来创建一个决策树分类器,并使用train_test_split()函数将数据拆分为训练集和测试集。在模型训练后,使用predict()函数对测试集进行预测,并使用accuracy_score()函数计算模型的准确率。
C45决策树python代码
C4.5决策树是一种经典的机器学习算法,用于分类和回归任务。下面是一个简单的C4.5决策树的Python代码示例:
```python
import numpy as np
from collections import Counter
class Node:
def __init__(self, feature=None, threshold=None, label=None):
self.feature = feature # 分裂特征的索引
self.threshold = threshold # 分裂特征的阈值
self.label = label # 叶节点的类别
self.left = None # 左子树
self.right = None # 右子树
class C45DecisionTree:
def __init__(self, min_samples_split=2, max_depth=float('inf'), min_info_gain=1e-7):
self.min_samples_split = min_samples_split # 分裂所需的最小样本数
self.max_depth = max_depth # 树的最大深度
self.min_info_gain = min_info_gain # 分裂所需的最小信息增益
def _calculate_entropy(self, y):
counter = Counter(y)
probs = [count / len(y) for count in counter.values()]
entropy = -sum(p * np.log2(p) for p in probs)
return entropy
def _calculate_info_gain(self, X, y, feature, threshold):
left_mask = X[:, feature] <= threshold
right_mask = X[:, feature] > threshold
left_entropy = self._calculate_entropy(y[left_mask])
right_entropy = self._calculate_entropy(y[right_mask])
info_gain = self._calculate_entropy(y) - (left_entropy * sum(left_mask) / len(y) + right_entropy * sum(right_mask) / len(y))
return info_gain
def _split(self, X, y):
m, n = X.shape
best_info_gain = 0
best_feature = None
best_threshold = None
for feature in range(n):
thresholds = np.unique(X[:, feature])
for threshold in thresholds:
info_gain = self._calculate_info_gain(X, y, feature, threshold)
if info_gain > best_info_gain:
best_info_gain = info_gain
best_feature = feature
best_threshold = threshold
return best_feature, best_threshold
def _build_tree(self, X, y, depth):
if len(set(y)) == 1:
return Node(label=y[0])
if depth >= self.max_depth or len(X) < self.min_samples_split:
counter = Counter(y)
most_common_label = counter.most_common(1)[0][0]
return Node(label=most_common_label)
feature, threshold = self._split(X, y)
if feature is None or threshold is None:
counter = Counter(y)
most_common_label = counter.most_common(1)[0][0]
return Node(label=most_common_label)
left_mask = X[:, feature] <= threshold
right_mask = X[:, feature] > threshold
left_subtree = self._build_tree(X[left_mask], y[left_mask], depth + 1)
right_subtree = self._build_tree(X[right_mask], y[right_mask], depth + 1)
return Node(feature=feature, threshold=threshold, left=left_subtree, right=right_subtree)
def fit(self, X, y):
self.tree = self._build_tree(X, y, 0)
def _predict_single(self, x, node):
if node.label is not None:
return node.label
if x[node.feature] <= node.threshold:
return self._predict_single(x, node.left)
else:
return self._predict_single(x, node.right)
def predict(self, X):
return [self._predict_single(x, self.tree) for x in X]
```
这段代码实现了一个基本的C4.5决策树算法,包括计算熵、计算信息增益、选择最佳分裂特征和阈值、构建决策树等功能。你可以使用`fit`方法来训练模型,使用`predict`方法来进行预测。