cnn实现语音识别代码
时间: 2023-09-01 08:07:52 浏览: 111
以下是使用CNN实现语音识别的Python代码示例:
```
import numpy as np
import librosa
import math
import os
import scipy.io.wavfile as wav
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Reshape
from keras.optimizers import Adam
from keras.utils import to_categorical
# 加载数据
def load_data(data_path):
data = []
labels = []
classes = os.listdir(data_path)
for c in classes:
class_path = os.path.join(data_path, c)
for file in os.listdir(class_path):
file_path = os.path.join(class_path, file)
signal, sr = librosa.load(file_path, sr=16000) # 加载音频文件
mfccs = librosa.feature.mfcc(signal, sr=sr, n_mfcc=40) # 计算MFCC系数
data.append(mfccs)
labels.append(c)
return np.array(data), np.array(labels)
# 创建CNN模型
def create_model(input_shape, num_classes):
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
return model
# 训练模型
def train_model(model, X_train, Y_train, X_val, Y_val, num_epochs, batch_size):
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size, epochs=num_epochs, verbose=1, validation_data=(X_val, Y_val))
# 预测
def predict(model, signal, sr):
mfccs = librosa.feature.mfcc(signal, sr=sr, n_mfcc=40)
mfccs = np.expand_dims(mfccs, axis=-1) # 添加维度以匹配模型输入
mfccs = np.expand_dims(mfccs, axis=0)
prediction = model.predict(mfccs)
predicted_class = np.argmax(prediction, axis=1)
return predicted_class
# 加载数据
data_path = "data/train"
X, Y = load_data(data_path)
# 将标签转换为独热编码
Y = to_categorical(Y)
# 划分训练和验证集
val_split = 0.2
num_val = math.floor(X.shape[0] * val_split)
num_train = X.shape[0] - num_val
X_train, X_val = X[:num_train], X[num_train:]
Y_train, Y_val = Y[:num_train], Y[num_train:]
# 创建模型
input_shape = (X_train.shape[1], X_train.shape[2], 1)
num_classes = Y_train.shape[1]
model = create_model(input_shape, num_classes)
# 训练模型
num_epochs = 10
batch_size = 32
train_model(model, X_train, Y_train, X_val, Y_val, num_epochs, batch_size)
# 测试模型
test_file = "data/test/yes/0a7c2a8d_nohash_0.wav"
signal, sr = librosa.load(test_file, sr=16000)
predicted_class = predict(model, signal, sr)
print(predicted_class)
```
注意,上述代码仅为示例,实际应用中,可能需要对模型结构、超参数等进行调整以达到更好的性能。