基于CNN+MFCC的语音情感识别的python代码
时间: 2023-09-28 21:07:32 浏览: 77
以下是基于CNN+MFCC的语音情感识别的Python代码:
```python
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras import layers
# Load dataset
def load_data():
X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')
return X_train, X_test, y_train, y_test
# Extract MFCC features
def extract_features(file_path):
with tf.device('/CPU:0'):
audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled = np.mean(mfccs.T, axis=0)
return mfccs_scaled
# Build CNN model
def build_model(input_shape, num_classes):
model = tf.keras.Sequential()
model.add(layers.Reshape((input_shape[0], input_shape[1], 1), input_shape=input_shape))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))
return model
# Train model
def train_model(X_train, y_train, X_test, y_test, epochs=30, batch_size=32):
input_shape = (X_train.shape[1], X_train.shape[2])
num_classes = len(np.unique(y_train))
model = build_model(input_shape, num_classes)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))
return model
# Test model
def test_model(model, X_test, y_test):
loss, accuracy = model.evaluate(X_test, y_test)
print('Test loss:', loss)
print('Test accuracy:', accuracy)
# Predict emotion
def predict_emotion(model, file_path):
with tf.device('/CPU:0'):
features = extract_features(file_path)
features = np.expand_dims(features, axis=0)
emotion = np.argmax(model.predict(features))
return emotion
# Main function
if __name__ == '__main__':
X_train, X_test, y_train, y_test = load_data()
model = train_model(X_train, y_train, X_test, y_test)
test_model(model, X_test, y_test)
emotion = predict_emotion(model, 'test.wav')
print('Predicted emotion:', emotion)
```
其中,`load_data()`函数用于加载数据集(训练集和测试集),`extract_features()`函数用于提取MFCC特征,`build_model()`函数用于构建CNN模型,`train_model()`函数用于训练模型,`test_model()`函数用于测试模型,`predict_emotion()`函数用于预测语音情感。在主函数中,我们首先加载数据集,然后训练模型,并测试模型的性能。最后,我们使用模型预测一段语音文件的情感。注意,为了加速训练过程,我们将模型部署在GPU上进行训练,但是在提取MFCC特征和预测情感时,我们将模型部署在CPU上运行,这样可以节省GPU资源。