信息增益 python代码
时间: 2023-06-08 07:05:55 浏览: 127
python实现求特征选择的信息增益
好的,下面是信息增益的 Python 代码:
```python
import math
def entropy(data):
# 计算数据集的熵
num = len(data)
label_counts = {}
for feat_vec in data:
current_label = feat_vec[-1]
if current_label not in label_counts.keys():
label_counts[current_label] = 0
label_counts[current_label] += 1
entropy = 0.0
for key in label_counts:
prob = float(label_counts[key]) / num
entropy -= prob * math.log(prob, 2)
return entropy
def split_data(data, axis, value):
# 按照给定特征划分数据集
ret_data = []
for feat_vec in data:
if feat_vec[axis] == value:
reduced_feat_vec = feat_vec[:axis]
reduced_feat_vec.extend(feat_vec[axis+1:])
ret_data.append(reduced_feat_vec)
return ret_data
def choose_best_feature(data):
# 选择最好的数据集划分方式
num_features = len(data[0]) - 1
base_entropy = entropy(data)
best_info_gain = 0.0
best_feature = -1
for i in range(num_features):
feat_list = [example[i] for example in data]
unique_vals = set(feat_list)
new_entropy = 0.0
for value in unique_vals:
sub_data = split_data(data, i, value)
prob = len(sub_data) / float(len(data))
new_entropy += prob * entropy(sub_data)
info_gain = base_entropy - new_entropy
if (info_gain > best_info_gain):
best_info_gain = info_gain
best_feature = i
return best_feature
```
希望能够解答您的问题。
阅读全文