如何使用textblob进行英文文本情感分析,需要完成以下步骤:第一,分词并去除停用词,对分词结果进行词频统计并获取词频topn的的词;第二,提取特征词,计算特征值;第三,绘制词云图;第四,进行情感分析并分别计算正面、中性、负面的情感得分;第五,绘制情感分析占比图、直方图和波动图
时间: 2024-06-11 08:08:11 浏览: 159
sentiments-analysis-with-textblob:使用Textblob进行情感分析
。具体实现过程如下:
1. 分词并去除停用词,对分词结果进行词频统计并获取词频topn的的词
```
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
from collections import Counter
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# 获取停用词
stop_words = set(stopwords.words('english'))
# 分词并去除停用词
def tokenize(text):
tokens = nltk.word_tokenize(text.lower())
return [token for token in tokens if token.isalpha() and token not in stop_words]
# 计算词频
def word_frequency(tokens, top_n=10):
word_counts = Counter(tokens)
top_words = word_counts.most_common(top_n)
return dict(top_words)
# 获取文本
text = "This is a sample text for sentiment analysis. We will use TextBlob for this purpose."
# 分词并去除停用词
tokens = tokenize(text)
# 计算词频
top_words = word_frequency(tokens)
# 打印topn的词频统计结果
print(top_words)
# 绘制词云图
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(top_words)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
```
2. 提取特征词,计算特征值
```
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
from collections import Counter
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# 获取停用词
stop_words = set(stopwords.words('english'))
# 分词并去除停用词
def tokenize(text):
tokens = nltk.word_tokenize(text.lower())
return [token for token in tokens if token.isalpha() and token not in stop_words]
# 计算词频
def word_frequency(tokens, top_n=10):
word_counts = Counter(tokens)
top_words = word_counts.most_common(top_n)
return dict(top_words)
# 提取特征词并计算特征值
def extract_features(text):
blob = TextBlob(text)
sentiment = blob.sentiment
polarity = sentiment.polarity
subjectivity = sentiment.subjectivity
return (polarity, subjectivity)
# 获取文本
text = "This is a sample text for sentiment analysis. We will use TextBlob for this purpose."
# 分词并去除停用词
tokens = tokenize(text)
# 计算词频
top_words = word_frequency(tokens)
# 打印topn的词频统计结果
print(top_words)
# 绘制词云图
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(top_words)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
# 提取特征词并计算特征值
polarity, subjectivity = extract_features(text)
# 打印特征值
print(f'Polarity: {polarity:.2f}')
print(f'Subjectivity: {subjectivity:.2f}')
```
3. 进行情感分析并分别计算正面、中性、负面的情感得分
```
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
from collections import Counter
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# 获取停用词
stop_words = set(stopwords.words('english'))
# 分词并去除停用词
def tokenize(text):
tokens = nltk.word_tokenize(text.lower())
return [token for token in tokens if token.isalpha() and token not in stop_words]
# 计算词频
def word_frequency(tokens, top_n=10):
word_counts = Counter(tokens)
top_words = word_counts.most_common(top_n)
return dict(top_words)
# 提取特征词并计算特征值
def extract_features(text):
blob = TextBlob(text)
sentiment = blob.sentiment
polarity = sentiment.polarity
subjectivity = sentiment.subjectivity
return (polarity, subjectivity)
# 进行情感分析并计算情感得分
def analyze_sentiment(text):
blob = TextBlob(text)
sentiment = blob.sentiment
polarity = sentiment.polarity
if polarity > 0:
return 'positive'
elif polarity < 0:
return 'negative'
else:
return 'neutral'
# 获取文本
text = "This is a sample text for sentiment analysis. We will use TextBlob for this purpose."
# 分词并去除停用词
tokens = tokenize(text)
# 计算词频
top_words = word_frequency(tokens)
# 打印topn的词频统计结果
print(top_words)
# 绘制词云图
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(top_words)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
# 提取特征词并计算特征值
polarity, subjectivity = extract_features(text)
# 打印特征值
print(f'Polarity: {polarity:.2f}')
print(f'Subjectivity: {subjectivity:.2f}')
# 进行情感分析并计算情感得分
sentiment = analyze_sentiment(text)
# 打印情感分析结果
print(f'Sentiment: {sentiment}')
```
4. 绘制情感分析占比图、直方图和波动图
```
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
from collections import Counter
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# 获取停用词
stop_words = set(stopwords.words('english'))
# 分词并去除停用词
def tokenize(text):
tokens = nltk.word_tokenize(text.lower())
return [token for token in tokens if token.isalpha() and token not in stop_words]
# 计算词频
def word_frequency(tokens, top_n=10):
word_counts = Counter(tokens)
top_words = word_counts.most_common(top_n)
return dict(top_words)
# 提取特征词并计算特征值
def extract_features(text):
blob = TextBlob(text)
sentiment = blob.sentiment
polarity = sentiment.polarity
subjectivity = sentiment.subjectivity
return (polarity, subjectivity)
# 进行情感分析并计算情感得分
def analyze_sentiment(text):
blob = TextBlob(text)
sentiment = blob.sentiment
polarity = sentiment.polarity
if polarity > 0:
return 'positive'
elif polarity < 0:
return 'negative'
else:
return 'neutral'
# 获取文本
text = "This is a sample text for sentiment analysis. We will use TextBlob for this purpose."
# 分词并去除停用词
tokens = tokenize(text)
# 计算词频
top_words = word_frequency(tokens)
# 打印topn的词频统计结果
print(top_words)
# 绘制词云图
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(top_words)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
# 提取特征词并计算特征值
polarity, subjectivity = extract_features(text)
# 打印特征值
print(f'Polarity: {polarity:.2f}')
print(f'Subjectivity: {subjectivity:.2f}')
# 进行情感分析并计算情感得分
sentiment = analyze_sentiment(text)
# 打印情感分析结果
print(f'Sentiment: {sentiment}')
# 绘制情感分析占比图
labels = ['Positive', 'Neutral', 'Negative']
sizes = [0, 0, 0]
if sentiment == 'positive':
sizes[0] = 1
elif sentiment == 'neutral':
sizes[1] = 1
else:
sizes[2] = 1
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
plt.axis('equal')
plt.show()
# 绘制情感得分直方图
scores = [polarity]
plt.hist(scores, bins=10)
plt.xlabel('Polarity')
plt.ylabel('Frequency')
plt.title('Sentiment Analysis')
plt.show()
# 绘制情感得分波动图
plt.plot(scores)
plt.xlabel('Sentence')
plt.ylabel('Polarity')
plt.title('Sentiment Analysis')
plt.show()
```
阅读全文