plp语音特征提取python代码
时间: 2023-09-11 15:04:54 浏览: 412
以下是使用Python实现PLP语音特征提取的示例代码:
```python
import numpy as np
import scipy.signal as signal
def pre_emp(signal, coeff=0.95):
# 预加重
return signal[1:] - coeff * signal[:-1]
def framing(signal, fs, frame_len=0.025, frame_shift=0.01):
# 分帧
frame_size = int(frame_len * fs)
frame_step = int(frame_shift * fs)
signal_len = len(signal)
num_frames = int(np.ceil(float(np.abs(signal_len - frame_size)) / np.abs(frame_step)))
pad_signal_len = num_frames * frame_step + frame_size
pad_signal = np.zeros((pad_signal_len,))
pad_signal[:signal_len] = signal
indices = np.tile(np.arange(0, frame_size), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_size, 1)).T
frames = pad_signal[indices.astype(np.int32, copy=False)]
return frames
def hamming_window(frame):
# 加窗
return frame * np.hamming(len(frame))
def power_spectrum(frame, nfft):
# 计算功率谱
return np.square(np.abs(np.fft.rfft(frame, nfft)))
def mel_filter_bank(nfilt, nfft, fs):
# 计算Mel滤波器组
low_freq_mel = 0
high_freq_mel = (2595 * np.log10(1 + (fs / 2) / 700)) # 把 Hz 转换为 Mel
mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)
hz_points = (700 * (10**(mel_points / 2595) - 1)) # 把 Mel 转换为 Hz
bin = np.floor((nfft + 1) * hz_points / fs)
fbank = np.zeros((nfilt, int(np.floor(nfft / 2 + 1))))
for m in range(1, nfilt + 1):
f_m_minus = int(bin[m - 1])
f_m = int(bin[m])
f_m_plus = int(bin[m + 1])
for k in range(f_m_minus, f_m):
fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
for k in range(f_m, f_m_plus):
fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
return fbank
def mfcc(signal, fs, nfilt=40, nfft=512):
# 提取MFCC特征
pre_emphasized_signal = pre_emp(signal)
frames = framing(pre_emphasized_signal, fs)
frames *= np.hamming(frames.shape[1])
NFFT = np.power(2, np.ceil(np.log2(frames.shape[1])))
mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
pow_frames = ((1.0 / NFFT) * (np.square(mag_frames)))
fb = mel_filter_bank(nfilt, NFFT, fs)
feat = np.dot(pow_frames, fb.T)
feat = np.where(feat == 0, np.finfo(float).eps, feat)
feat = 20 * np.log10(feat)
feat = signal.lfilter([1], [1, -0.97], feat) # 可选步骤,对MFCC系数进行预加重
feat = dct(feat, type=2, axis=1, norm='ortho')[:, :13] # 只保留前13个系数
return feat
```
上述代码中,`pre_emp`函数实现预加重,`framing`函数实现分帧,`hamming_window`函数实现加窗,`power_spectrum`函数实现计算功率谱,`mel_filter_bank`函数实现计算Mel滤波器组,`mfcc`函数实现提取MFCC特征。其中,`nfilt`表示滤波器组数量,`nfft`表示FFT点数,`type`表示DCT类型(1或2),`axis`表示进行DCT的轴,`norm`表示是否进行正交归一化。
阅读全文