bilstm-crf命名实体识别用python实现代码
时间: 2023-08-01 11:06:40 浏览: 184
以下是一个简单的 bilstm-crf 命名实体识别的 Python 实现代码:
```python
import numpy as np
import tensorflow as tf
class BiLSTM_CRF:
def __init__(self, vocab_size, tag_to_id, embedding_dim, hidden_dim):
self.vocab_size = vocab_size
self.tag_to_id = tag_to_id
self.embedding_dim = embedding_dim
self.hidden_dim = hidden_dim
self.num_tags = len(tag_to_id)
self.word_ids = tf.placeholder(tf.int32, shape=[None, None], name="word_ids")
self.sequence_lengths = tf.placeholder(tf.int32, shape=[None], name="sequence_lengths")
self.labels = tf.placeholder(tf.int32, shape=[None, None], name="labels")
with tf.variable_scope("words"):
self.word_embeddings = tf.get_variable("word_embeddings", [vocab_size, embedding_dim])
self.embedded_words = tf.nn.embedding_lookup(self.word_embeddings, self.word_ids)
with tf.variable_scope("bi-lstm"):
cell_fw = tf.contrib.rnn.LSTMCell(hidden_dim)
cell_bw = tf.contrib.rnn.LSTMCell(hidden_dim)
(output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, self.embedded_words, sequence_length=self.sequence_lengths, dtype=tf.float32)
self.lstm_output = tf.concat([output_fw, output_bw], axis=-1)
with tf.variable_scope("proj"):
W = tf.get_variable("W", shape=[2*hidden_dim, self.num_tags], initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b", shape=[self.num_tags], initializer=tf.zeros_initializer())
self.logits = tf.matmul(tf.reshape(self.lstm_output, [-1, 2*hidden_dim]), W) + b
self.logits = tf.reshape(self.logits, [-1, tf.shape(self.word_ids)[1], self.num_tags])
with tf.variable_scope("crf"):
self.transition_params = tf.get_variable("transition_params", shape=[self.num_tags, self.num_tags], initializer=tf.contrib.layers.xavier_initializer())
log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(self.logits, self.labels, self.sequence_lengths, self.transition_params)
self.loss = tf.reduce_mean(-log_likelihood)
with tf.variable_scope("train_step"):
self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
def train(self, train_data, dev_data, epochs, batch_size):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
print("Epoch {}/{}".format(epoch+1, epochs))
for batch in self.generate_batches(train_data, batch_size):
word_ids, labels, sequence_lengths = batch
feed_dict = {self.word_ids: word_ids, self.labels: labels, self.sequence_lengths: sequence_lengths}
_, loss = sess.run([self.train_op, self.loss], feed_dict=feed_dict)
print("Train loss: {}".format(loss))
self.evaluate(sess, dev_data)
def evaluate(self, sess, data):
correct_preds, total_correct, total_preds = 0., 0., 0.
for batch in self.generate_batches(data, 1):
word_ids, labels, sequence_lengths = batch
feed_dict = {self.word_ids: word_ids, self.labels: labels, self.sequence_lengths: sequence_lengths}
logits, transition_params = sess.run([self.logits, self.transition_params], feed_dict=feed_dict)
lengths = sequence_lengths.tolist()
for logit, sequence_length, label in zip(logits, lengths, labels):
logit = logit[:sequence_length]
viterbi_seq, _ = tf.contrib.crf.viterbi_decode(logit, transition_params)
viterbi_seq = np.array(viterbi_seq)
label = label[:sequence_length]
correct_preds += np.sum(np.equal(viterbi_seq, label))
total_preds += len(viterbi_seq)
total_correct += len(label)
p = correct_preds / total_preds if correct_preds > 0 else 0
r = correct_preds / total_correct if correct_preds > 0 else 0
f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
print("Accuracy: {:.2f}, Precision: {:.2f}, Recall: {:.2f}, F1: {:.2f}".format(100*correct_preds/total_preds, 100*p, 100*r, 100*f1))
def generate_batches(self, data, batch_size):
num_batches = (len(data) + batch_size - 1) // batch_size
for i in range(num_batches):
start_index = i * batch_size
end_index = min((i+1) * batch_size, len(data))
batch = data[start_index:end_index]
word_ids = [sentence[0] for sentence in batch]
labels = [sentence[1] for sentence in batch]
sequence_lengths = [len(sentence[0]) for sentence in batch]
max_length = max(sequence_lengths)
word_ids = [sentence + [0]*(max_length-len(sentence)) for sentence in word_ids]
labels = [sentence + [0]*(max_length-len(sentence)) for sentence in labels]
yield word_ids, labels, sequence_lengths
```
这个实现代码使用 TensorFlow 实现了一个双向 LSTM 和 CRF 的模型,用于进行命名实体识别。输入数据是一个句子的单词序列和对应的标签序列,输出是对应的标签序列的预测值。在训练时使用 Adam 优化器进行参数优化,使用 CRF 来计算模型的损失。在评估时使用精度、精确率、召回率和 F1 值来评估模型的性能。