import re
def fuzzy_search(string, pattern):
regex = '.*?'.join(map(re.escape, pattern))
match = re.search(regex, string, re.IGNORECASE)
return match.group(0) if match else None
# 示例用法
string = 'hello world'
pattern = 'hlo'
result = fuzzy_search(string, pattern)
print(result) # 输出 'hello'
该函数接受两个参数:`string`和`pattern`。`string`是要搜索的字符串,`pattern`是要查找的模式。它使用`re.escape`函数来转义模式中的特殊字符,并将它们连接起来,形成一个正则表达式。然后,它使用`re.search`函数在字符串中查找匹配项。如果找到了匹配项,则返回它,否则返回`None`。在上面的示例中,模式`'hlo'`与字符串`'hello world'`中的`'hello'`匹配。
import re
def fuzzy_query(keyword, data_list):
result = []
pattern = re.compile(keyword, re.IGNORECASE)
for item in data_list:
if re.search(pattern, item):
return result
# 示例数据
data_list = ['apple', 'banana', 'orange', 'grape', 'watermelon']
# 模糊查询关键词为 'an'
result = fuzzy_query('an', data_list)
print(result) # 输出:['banana', 'orange']
# 模糊查询关键词为 'e'
result = fuzzy_query('e', data_list)
print(result) # 输出:['apple', 'grape', 'watermelon']
import numpy as np
# 定义模糊集合的隶属度函数
def triangular_membership(x, a, b, c):
if x <= a or x >= c:
return 0
elif a < x <= b:
return (x - a) / (b - a)
return (c - x) / (c - b)
# 定义节点类
class Node:
def __init__(self, feature_index=None, threshold=None, operator=None, left=None, right=None, label=None):
self.feature_index = feature_index # 节点划分特征的索引
self.threshold = threshold # 节点划分阈值
self.operator = operator # 节点划分运算符
self.left = left # 左子节点
self.right = right # 右子节点
self.label = label # 叶子节点标签
# 定义模糊决策树类
class FuzzyDecisionTree:
def __init__(self, max_depth=5, min_samples_split=2, min_impurity_decrease=0):
self.max_depth = max_depth # 最大深度
self.min_samples_split = min_samples_split # 最小样本分割数
self.min_impurity_decrease = min_impurity_decrease # 最小不纯度减少量
self.tree = None # 模糊决策树
# 计算基尼指数(用于节点不纯度的衡量)
def _gini_index(self, y):
classes = np.unique(y)
n_samples = len(y)
gini = 1
for c in classes:
p_c = len(y[y == c]) / n_samples
gini -= p_c ** 2
return gini
# 计算信息熵(用于节点不纯度的衡量)
def _entropy(self, y):
classes = np.unique(y)
n_samples = len(y)
entropy = 0
for c in classes:
p_c = len(y[y == c]) / n_samples
entropy -= p_c * np.log2(p_c)
return entropy
# 寻找最优划分特征和阈值
def _find_best_split(self, X, y):
n_features = X.shape[1]
best_feature_index, best_threshold, best_operator, best_impurity = None, None, None, 1
for feature_index in range(n_features):
fuzzy_set1 = {'low': [], 'medium': [], 'high': []}
fuzzy_set2 = {'low': [], 'medium': [], 'high': []}
for i in range(len(X)):
if X[i][feature_index] <= np.percentile(X[:, feature_index], 33):
elif X[i][feature_index] >= np.percentile(X[:, feature_index], 67):
for i in range(len(X)):
fuzzy_set2['low'].append(triangular_membership(X[i][feature_index], np.percentile(X[:, feature_index], 0), np.percentile(X[:, feature_index], 33), np.percentile(X[:, feature_index], 67)))
fuzzy_set2['medium'].append(triangular_membership(X[i][feature_index], np.percentile(X[:, feature_index], 33), np.percentile(X[:, feature_index], 67), np.percentile(X[:, feature_index], 100)))
fuzzy_set2['high'].append(triangular_membership(X[i][feature_index], np.percentile(X[:, feature_index], 67), np.percentile(X[:, feature_index], 100), np.percentile(X[:, feature_index], 101)))
impurity = 0
for operator in ('<=', '>'):
if operator == '<=':
fuzzy_set = fuzzy_set1
fuzzy_set = fuzzy_set2
if len(fuzzy_set['low']) > 0 and len(fuzzy_set['medium']) > 0:
impurity += fuzzy_set['low'].count(0) + fuzzy_set['medium'].count(1) - (fuzzy_set['low'].count(0) / len(fuzzy_set['low'])) ** 2 - (fuzzy_set['medium'].count(1) / len(fuzzy_set['medium'])) ** 2
if len(fuzzy_set['medium']) > 0 and len(fuzzy_set['high']) > 0:
impurity += fuzzy_set['medium'].count(0) + fuzzy_set['high'].count(1) - (fuzzy_set['medium'].count(0) / len(fuzzy_set['medium'])) ** 2 - (fuzzy_set['high'].count(1) / len(fuzzy_set['high'])) ** 2
if impurity < best_impurity:
best_feature_index, best_threshold, best_operator, best_impurity = feature_index, np.percentile(X[:, feature_index], 50), '<=', impurity
for operator in ('>',):
if operator == '<=':
fuzzy_set = fuzzy_set1
fuzzy_set = fuzzy_set2
if len(fuzzy_set['low']) > 0 and len(fuzzy_set['medium']) > 0:
impurity += fuzzy_set['low'].count(1) + fuzzy_set['medium'].count(0) - (fuzzy_set['low'].count(1) / len(fuzzy_set['low'])) ** 2 - (fuzzy_set['medium'].count(0) / len(fuzzy_set['medium'])) ** 2
if len(fuzzy_set['medium']) > 0 and len(fuzzy_set['high']) > 0:
impurity += fuzzy_set['medium'].count(1) + fuzzy_set['high'].count(0) - (fuzzy_set['medium'].count(1) / len(fuzzy_set['medium'])) ** 2 - (fuzzy_set['high'].count(0) / len(fuzzy_set['high'])) ** 2
if impurity < best_impurity:
best_feature_index, best_threshold, best_operator, best_impurity = feature_index, np.percentile(X[:, feature_index], 50), '>', impurity
return best_feature_index, best_threshold, best_operator, best_impurity
# 创建模糊决策树
def _create_tree(self, X, y, depth):
n_samples, n_features = X.shape
# 如果样本数小于最小样本分割数或树的深度大于等于最大深度,返回叶子节点,并以样本标签最多的类别作为标签
if n_samples < self.min_samples_split or depth >= self.max_depth:
return Node(label=np.argmax(np.bincount(y)))
# 计算节点的不纯度
impurity = self._gini_index(y) if self.min_impurity_decrease <= 0 else self._entropy(y)
# 如果节点的不纯度小于等于最小不纯度减少量,返回叶子节点,并以样本标签最多的类别作为标签
if impurity <= self.min_impurity_decrease:
return Node(label=np.argmax(np.bincount(y)))
# 寻找最优划分特征和阈值
feature_index, threshold, operator, impurity = self._find_best_split(X, y)
# 如果找不到合适的划分特征和阈值,返回叶子节点,并以样本标签最多的类别作为标签
if feature_index is None or threshold is None:
return Node(label=np.argmax(np.bincount(y)))
# 根据划分特征和阈值拆分数据集并递归生成子树
if operator == '<=':
left_indices = [i for i in range(n_samples) if triangular_membership(X[i][feature_index], np.percentile(X[:, feature_index], 0), np.percentile(X[:, feature_index], 33), np.percentile(X[:, feature_index], 67)) > threshold]
right_indices = [i for i in range(n_samples) if triangular_membership(X[i][feature_index], np.percentile(X[:, feature_index], 0), np.percentile(X[:, feature_index], 33), np.percentile(X[:, feature_index], 67)) <= threshold]
left_indices = [i for i in range(n_samples) if triangular_membership(X[i][feature_index], np.percentile(X[:, feature_index], 67), np.percentile(X[:, feature_index], 100), np.percentile(X[:, feature_index], 101)) > threshold]
right_indices = [i for i in range(n_samples) if triangular_membership(X[i][feature_index], np.percentile(X[:, feature_index], 67), np.percentile(X[:, feature_index], 100), np.percentile(X[:, feature_index], 101)) <= threshold]
left = self._create_tree(X[left_indices], y[left_indices], depth + 1)
right = self._create_tree(X[right_indices], y[right_indices], depth + 1)
return Node(feature_index=feature_index, threshold=threshold, operator=operator, left=left, right=right)
# 拟合模型
def fit(self, X, y):
self.tree = self._create_tree(X, y, 0)
# 预测样本标签
def predict(self, X):
n_samples = X.shape[0]
y_pred = []
for i in range(n_samples):
node = self.tree
while node.label is None:
if node.operator == '<=':
if triangular_membership(X[i][node.feature_index], np.percentile(X[:, node.feature_index], 0), np.percentile(X[:, node.feature_index], 33), np.percentile(X[:, node.feature_index], 67)) > node.threshold:
node = node.left
node = node.right
if triangular_membership(X[i][node.feature_index], np.percentile(X[:, node.feature_index], 67), np.percentile(X[:, node.feature_index], 100), np.percentile(X[:, node.feature_index], 101)) > node.threshold:
node = node.left
node = node.right
return y_pred