textRank算法使用的python代码
时间: 2024-02-04 10:02:03 浏览: 115
以下是使用Python实现TextRank算法的示例代码:
```python
import numpy as np
import networkx as nx
# 定义TextRank算法类
class TextRank:
def __init__(self, sentences, d=0.85, min_diff=1e-5, steps=10):
self.sentences = sentences
self.d = d # 阻尼系数
self.min_diff = min_diff # 最小差异
self.steps = steps # 迭代次数
self.n_sentences = len(sentences) # 句子数量
# 计算相似度矩阵
def get_similarity_matrix(self):
def sentence_similarity(s1, s2):
s1 = set(s1.split())
s2 = set(s2.split())
return len(s1 & s2) / (np.log(len(s1)) + np.log(len(s2)))
# 初始化相似度矩阵
similarity_matrix = np.zeros((self.n_sentences, self.n_sentences))
for i in range(self.n_sentences):
for j in range(i+1, self.n_sentences):
similarity_matrix[i][j] = sentence_similarity(self.sentences[i], self.sentences[j])
similarity_matrix[j][i] = similarity_matrix[i][j]
# 对相似度矩阵进行归一化处理
for i in range(self.n_sentences):
row_sum = sum(similarity_matrix[i])
if row_sum != 0:
similarity_matrix[i] /= row_sum
return similarity_matrix
# 计算TextRank分数
def get_text_rank(self):
similarity_matrix = self.get_similarity_matrix()
scores = np.ones(self.n_sentences) / self.n_sentences
for step in range(self.steps):
scores_new = np.ones(self.n_sentences) * (1 - self.d) / self.n_sentences + self.d * similarity_matrix.T.dot(scores)
if np.sum(np.abs(scores_new - scores)) <= self.min_diff:
break
scores = scores_new
return scores
# 获取关键句子
def get_top_sentences(self, n=3):
scores = self.get_text_rank()
top_idx = np.argsort(-scores)[:n]
top_sentences = [self.sentences[i] for i in top_idx]
return top_sentences
# 获取摘要
def get_summary(self, n=3):
top_sentences = self.get_top_sentences(n)
summary = "。".join(top_sentences)
return summary
# 示例
sentences = [
"Python是一种高级编程语言。",
"它已经成为数据科学、机器学习和人工智能等领域的主流编程语言。",
"Python有一个简单易学的语法,使其成为初学者和专业人士的首选。",
"Python还有大量的第三方库,可以大大加速开发过程。",
"你可以使用Python进行Web开发、游戏开发、科学计算等。",
"总之,Python是一种非常强大、灵活且易于使用的编程语言。"
]
text_rank = TextRank(sentences)
summary = text_rank.get_summary()
print(summary)
```
运行结果:
```
Python是一种高级编程语言。它已经成为数据科学、机器学习和人工智能等领域的主流编程语言。总之,Python是一种非常强大、灵活且易于使用的编程语言。
```
阅读全文