def __init__(self, json_dir, n_src=2, sample_rate=8000, segment=4.0): super().__init__() # Task setting self.json_dir = json_dir self.sample_rate = sample_rate if segment is None: self.seg_len = None else: self.seg_len = int(segment * sample_rate) self.n_src = n_src self.like_test = self.seg_len is None # Load json files mix_json = os.path.join(json_dir, "mix.json") sources_json = [ os.path.join(json_dir, source + ".json") for source in [f"s{n+1}" for n in range(n_src)] ] with open(mix_json, "r") as f: mix_infos = json.load(f) sources_infos = [] for src_json in sources_json: with open(src_json, "r") as f: sources_infos.append(json.load(f)) # Filter out short utterances only when segment is specified orig_len = len(mix_infos) drop_utt, drop_len = 0, 0 if not self.like_test: for i in range(len(mix_infos) - 1, -1, -1): # Go backward if mix_infos[i][1] < self.seg_len: drop_utt += 1 drop_len += mix_infos[i][1] del mix_infos[i] for src_inf in sources_infos: del src_inf[i] print( "Drop {} utts({:.2f} h) from {} (shorter than {} samples)".format( drop_utt, drop_len / sample_rate / 36000, orig_len, self.seg_len ) ) self.mix = mix_infos self.sources = sources_infos
时间: 2024-02-10 17:21:12 浏览: 199
这是一个 Python 类的初始化函数,看起来是用于处理音频混合数据和其源数据的。具体来说,它的输入参数包括一个 JSON 目录、音频源的数量、采样率和段长度等。它会从指定的 JSON 文件中读取混合音频和其源音频的信息,并对其中长度太短的音频进行过滤。最终,该函数会将处理好的音频混合数据和其源数据存储在该类的 mix 和 sources 属性中。
相关问题
class ShiftModule(nn.Module): def __init__(self, input_channels, n_segment=8,n_div=8, mode='shift'): super(ShiftModule, self).__init__() self.input_channels = input_channels self.n_segment = n_segment self.fold_div = n_div self.fold = self.input_channels // self.fold_div self.conv = nn.Conv1d(self.fold_div*self.fold, self.fold_div*self.fold, kernel_size=3, padding=1, groups=self.fold_div*self.fold, bias=False) if mode == 'shift': self.conv.weight.requires_grad = True self.conv.weight.data.zero_() self.conv.weight.data[:self.fold, 0, 2] = 1 # shift left self.conv.weight.data[self.fold: 2 * self.fold, 0, 0] = 1 # shift right if 2*self.fold < self.input_channels: self.conv.weight.data[2 * self.fold:, 0, 1] = 1 # fixed elif mode == 'fixed': self.conv.weight.requires_grad = True self.conv.weight.data.zero_() self.conv.weight.data[:, 0, 1] = 1 # fixed elif mode == 'norm': self.conv.weight.requires_grad = True def forward(self, x): nt, c, h, w = x.size() n_batch = nt // self.n_segment x = x.view(n_batch, self.n_segment, c, h, w) x = x.permute(0, 3, 4, 2, 1) # (n_batch, h, w, c, n_segment) x = x.contiguous().view(n_batch*h*w, c, self.n_segment) x = self.conv(x) # (n_batch*h*w, c, n_segment) x = x.view(n_batch, h, w, c, self.n_segment) x = x.permute(0, 4, 3, 1, 2) # (n_batch, n_segment, c, h, w) x = x.contiguous().view(nt, c, h, w) return x
这是一个名为ShiftModule的类,继承自nn.Module。它有四个参数:input_channels表示输入通道数,n_segment表示分段数,默认为8,n_div表示每个分段的通道数,默认为8,mode表示模式,默认为shift。
在初始化函数__init__中,首先调用父类的构造函数,然后将参数赋值给类的属性。接着,根据n_div和fold计算出fold_div,即每个fold中的通道数。然后创建一个1D卷积层,输入通道数为fold_div*fold,输出通道数也为fold_div*fold,卷积核大小为3,padding为1,groups为fold_div*fold,表示每个fold内的通道共享卷积核,bias为False,表示不使用偏置。
如果mode为shift,则将卷积层的权重设置为可训练,初始化为0,并将第一个fold的第0个通道的第2个位置的权重设置为1。
CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 2 RATE = 44100 RECORED_SECONDS = 0.5 TERMINATE = False def run(self): p = pyaudio.PyAudio() while True: try: self.sock.connect(self.ADDR) break except: time.sleep(3) continue print('音频客户端已经连接...') self.stream = self.p.open(format = FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) while self.stream.is_active(): frames = [] for i in range(0,int(RATE / CHUNK * RECORED_SECONDS)): data = self.stream.read(CHUNK) frames.append(data) # 将音频数据转换为PyDub的音频片段 audio_segment = AudioSegment( data=b''.join(frames), sample_width=p.get_sample_size(FORMAT), frame_rate=RATE, channels=CHANNELS ) # 变声 audio_segment = audio_segment.speedup( playback_speed=SPEEDUP_FACTOR, chunk_size=CHUNK, crossfade=10 修改上述代码,实现变音功能
要实现变音功能,可以使用 PyDub 库中的 `pitch_shift` 方法。在上述代码中,可以在 `audio_segment` 变量上调用该方法,例如:
```
# 变声
audio_segment = audio_segment.speedup(
playback_speed=SPEEDUP_FACTOR,
chunk_size=CHUNK,
crossfade=10
)
# 变音
audio_segment = audio_segment.pitch_shift(
semitones=SHIFT_SEMITONES
)
```
其中,`SHIFT_SEMITONES` 是需要变化的半音数,可以是正数或负数,例如 `SHIFT_SEMITONES = 2` 表示将音高升高两个半音。
阅读全文