时间: 2024-03-20 17:44:11 浏览: 60
!pip install numpy
!pip install scikit-learn
!pip install nltk
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
def load_glove_vectors(glove_file):
word_vectors = {}
with open(glove_file, encoding="utf8") as f:
for line in f:
values = line.split()
word = values[0]
vector = np.asarray(values[1:], dtype='float32')
word_vectors[word] = vector
return word_vectors
glove_file = "glove.6B.100d.txt" # 请提前下载GloVe文件
word_vectors = load_glove_vectors(glove_file)
def prepare_data():
# 准备数据
train_data = [
("I love this sandwich.", "positive"),
("This is an amazing place!", "positive"),
("I feel very good about these beers.", "positive"),
("This is my best work.", "positive"),
("What an awesome view", "positive"),
("I do not like this restaurant", "negative"),
("I am tired of this stuff.", "negative"),
("I can't deal with this", "negative"),
("He is my sworn enemy!", "negative"),
("My boss is horrible.", "negative")
test_data = [
("The beer was good.", "positive"),
("I do not enjoy my job", "negative"),
("I ain't feeling dandy today.", "negative"),
("I feel amazing!", "positive"),
("Gary is a friend of mine.", "positive"),
("I can't believe I'm doing this.", "negative")
train_corpus = [x[0] for x in train_data]
train_labels = [x[1] for x in train_data]
test_corpus = [x[0] for x in test_data]
test_labels = [x[1] for x in test_data]
return train_corpus, train_labels, test_corpus, test_labels
train_corpus, train_labels, test_corpus, test_labels = prepare_data()
def text_to_vector(text):
stop_words = set(stopwords.words('english'))
words = word_tokenize(text.lower())
words = [w for w in words if not w in stop_words]
vector = np.zeros(100)
count = 0
for word in words:
if word in word_vectors:
vector += word_vectors[word]
count += 1
if count != 0:
vector /= count
return vector
def corpus_to_vectors(corpus):
return np.array([text_to_vector(text) for text in corpus])
X_train = corpus_to_vectors(train_corpus)
X_test = corpus_to_vectors(test_corpus)
from sklearn.svm import SVC
def train_classifier(X_train, y_train):
classifier = SVC(kernel='linear', probability=True)
classifier.fit(X_train, y_train)
return classifier
classifier = train_classifier(X_train, train_labels)
def test_classifier(classifier, X_test, y_test):
y_pred = classifier.predict(X_test)
return accuracy_score(y_test, y_pred)
accuracy = test_classifier(classifier, X_test, test_labels)
print("Accuracy:", accuracy)