计算MFCC特征的自相关系数实现代码
时间: 2024-05-06 20:15:51 浏览: 134
以下是使用Python实现计算MFCC特征的自相关系数的代码:
```python
import numpy as np
from scipy.fftpack import dct
def mfcc(signal, sample_rate=16000, num_ceps=13, nfilt=26, nfft=512):
# Pre-emphasis
pre_emphasis = 0.97
emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
# Framing
frame_size = 0.025
frame_stride = 0.01
frame_length = int(round(frame_size * sample_rate))
frame_step = int(round(frame_stride * sample_rate))
signal_length = len(emphasized_signal)
num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))
pad_signal_length = num_frames * frame_step + frame_length
z = np.zeros((pad_signal_length - signal_length))
pad_signal = np.append(emphasized_signal, z)
indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
frames = pad_signal[indices.astype(np.int32, copy=False)]
# Windowing
hamming_window = np.hamming(frame_length)
frames *= hamming_window
# Fourier-transform and power spectrum
mag_frames = np.absolute(np.fft.rfft(frames, nfft)) # Magnitude of the FFT
pow_frames = ((1.0 / nfft) * ((mag_frames) ** 2)) # Power Spectrum
# Filter banks
low_freq_mel = 0
high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700)) # Convert Hz to Mel
mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2) # Equally spaced in Mel scale
hz_points = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz
bin = np.floor((nfft + 1) * hz_points / sample_rate)
fbank = np.zeros((nfilt, int(np.floor(nfft / 2 + 1))))
for m in range(1, nfilt + 1):
f_m_minus = int(bin[m - 1]) # left
f_m = int(bin[m]) # center
f_m_plus = int(bin[m + 1]) # right
for k in range(f_m_minus, f_m):
fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
for k in range(f_m, f_m_plus):
fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
# Apply filter banks
filtered_frames = np.dot(pow_frames, fbank.T)
filtered_frames = np.where(filtered_frames == 0, np.finfo(float).eps, filtered_frames) # Numerical Stability
filtered_frames = 20 * np.log10(filtered_frames) # dB
# MFCCs
mfccs = dct(filtered_frames, type=2, axis=1, norm='ortho')[:, 1 : (num_ceps + 1)]
# Mean normalization
mfccs -= (np.mean(mfccs, axis=0) + 1e-8)
# Compute autocorrelation
mfccs_ac = np.zeros_like(mfccs)
for i in range(num_ceps):
for j in range(num_ceps):
mfccs_ac[i, j] = np.mean(np.multiply(mfccs[:, i], mfccs[:, j]))
return mfccs_ac.flatten()
```
在该代码中,我们首先对输入的信号进行预加重、分帧、加窗、傅里叶变换和滤波器组处理,得到MFCC系数。然后,我们对MFCC系数进行均值归一化,并计算自相关系数。最后,我们将自相关系数展平为一维数组并返回。
阅读全文