解释这段代码 def load_data_set(self, train_file, test_file): train_dict = defaultdict(list) test_dict = defaultdict(list) train_user_items_dict = defaultdict(list) train_item_users_dict = defaultdict(list) test_user_items_dict = defaultdict(list) test_item_users_dict = defaultdict(list) train_data = [] test_data = [] with open(train_file) as f_train: with open(test_file) as f_test: for l in f_train: uid, iid = l.strip().split('\t') train_dict[int(uid)].append(int(iid)-self.n_users) train_user_items_dict[int(uid)].append(int(iid)) train_item_users_dict[int(iid)].append(int(uid)) train_data.append((int(uid), int(iid))) for l in f_test.readlines(): uid, iid = l.strip().split('\t') test_dict[int(uid)].append(int(iid)-self.n_users) test_user_items_dict[int(uid)].append(int(iid)) test_item_users_dict[int(iid)].append(int(uid)) test_data.append((int(uid), int(iid))) return train_data, test_data, train_dict, test_dict, train_user_items_dict, train_item_users_dict, test_user_items_dict, test_item_users_dict
时间: 2023-05-20 07:07:17 浏览: 148
这是一段 Python 代码,load_data_set 函数的作用是从 train_file 和 test_file 中读入数据,并将数据处理成不同的字典形式,同时将数据分为训练集和测试集。具体来说,train_dict 和 test_dict 存储了用户和物品之间的映射关系,train_user_items_dict 和 test_user_items_dict 存储了每个用户对应的物品列表,train_item_users_dict 和 test_item_users_dict 存储了每个物品对应的用户列表,train_data 和 test_data 分别存储了训练集和测试集中的数据,每个数据是一个二元组,分别表示用户 ID 和物品 ID。这段代码使用了 defaultdict 数据结构来避免键不存在时的 KeyError 异常,同时也比使用普通字典更加方便。
相关问题
人工智能语音识别python代码
以下是从提供的《实验指导.docx》文档中提炼出来的关于人工智能语音识别的Python代码概要:
### 1. 解压数据集
```python
!unzip -q data/data300576/recordings.zip -d wc_work
```
### 2. 切分数据集
```python
import os
import random
# 获取所有音频文件路径
recordings = ['recordings/' + name for name in os.listdir('work/recordings')]
total = []
# 遍历每个音频文件路径,提取标签
for recording in recordings:
label = int(recording[11])
total.append(f'{recording}\t{label}')
# 创建训练集、验证集和测试集文件
train = open('work/train.tsv', 'w', encoding='UTF-8')
dev = open('work/dev.tsv', 'w', encoding='UTF-8')
test = open('work/test.tsv', 'w', encoding='UTF-8')
# 打乱数据顺序
random.shuffle(total)
# 确定数据集划分的索引
split_num = int((len(total) - 100) * 0.9)
# 写入训练集数据
for line in total[:split_num]:
train.write(line)
# 写入验证集数据
for line in total[split_num:-100]:
dev.write(line)
# 写入测试集数据
for line in total[-100:]:
test.write(line)
# 关闭文件
train.close()
dev.close()
test.close()
```
### 3. 音频数据预处理
```python
import random
import numpy as np
import scipy.io.wavfile as wav
from python_speech_features import mfcc, delta
def get_mfcc(data, fs):
# 提取MFCC特征
wav_feature = mfcc(data, fs)
# 计算一阶差分
d_mfcc_feat = delta(wav_feature, 1)
# 计算二阶差分
d_mfcc_feat2 = delta(wav_feature, 2)
# 拼接特征
feature = np.concatenate([
wav_feature.reshape(1, -1, 13),
d_mfcc_feat.reshape(1, -1, 13),
d_mfcc_feat2.reshape(1, -1, 13)
], axis=0)
# 统一时间维度
if feature.shape[1] > 64:
feature = feature[:, :64, :]
else:
feature = np.pad(feature, ((0, 0), (0, 64 - feature.shape[1]), (0, 0)), 'constant')
# 调整数据维度
feature = feature.transpose((2, 0, 1))
feature = feature[np.newaxis, :]
return feature
def loader(tsv):
datas = []
with open(tsv, 'r', encoding='UTF-8') as f:
for line in f:
audio, label = line.strip().split('\t')
fs, signal = wav.read('work/' + audio)
feature = get_mfcc(signal, fs)
datas.append([feature, int(label)])
return datas
def reader(datas, batch_size, is_random=True):
features = []
labels = []
if is_random:
random.shuffle(datas)
for data in datas:
feature, label = data
features.append(feature)
labels.append(label)
if len(labels) == batch_size:
features = np.concatenate(features, axis=0).reshape(-1, 13, 3, 64).astype('float32')
labels = np.array(labels).reshape(-1, 1).astype('int64')
yield features, labels
features = []
labels = []
```
### 4. 模型搭建
```python
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Conv2D, BatchNorm
from paddle.fluid.layers import softmax_with_cross_entropy, accuracy, reshape
class Audio(fluid.dygraph.Layer):
def __init__(self):
super(Audio, self).__init__()
self.conv1 = Conv2D(13, 16, 3, 1, 1)
self.conv2 = Conv2D(16, 16, (3, 2), (1, 2), (1, 0))
self.conv3 = Conv2D(16, 32, 3, 1, 1)
self.conv4 = Conv2D(32, 32, (3, 2), (1, 2), (1, 0))
self.conv5 = Conv2D(32, 64, 3, 1, 1)
self.conv6 = Conv2D(64, 64, (3, 2), 2)
self.fc1 = Linear(8 * 64, 128)
self.fc2 = Linear(128, 10)
def forward(self, inputs, labels=None):
out = self.conv1(inputs)
out = self.conv2(out)
out = self.conv3(out)
out = self.conv4(out)
out = self.conv5(out)
out = self.conv6(out)
out = reshape(out, [-1, 8 * 64])
out = self.fc1(out)
out = self.fc2(out)
if labels is not None:
loss = softmax_with_cross_entropy(out, labels)
acc = accuracy(out, labels)
return loss, acc
else:
return out
```
### 5. 查看网络结构
```python
import paddle
audio_network = Audio()
paddle.summary(audio_network, input_size=[(64, 13, 3, 64)], dtypes=['float32'])
```
### 6. 模型训练
```python
import numpy as np
import paddle.fluid as fluid
from visualdl import LogWriter
from paddle.fluid.optimizer import Adam
from paddle.fluid.dygraph import to_variable, save_dygraph
writer = LogWriter(logdir="./log/train")
train_datas = loader('work/train.tsv')
dev_datas = loader('work/dev.tsv')
place = fluid.CPUPlace()
epochs = 10
with fluid.dygraph.guard(place):
model = Audio()
optimizer = Adam(learning_rate=0.001, parameter_list=model.parameters())
global_step = 0
max_acc = 0
for epoch in range(epochs):
model.train()
train_reader = reader(train_datas, batch_size=64)
for step, data in enumerate(train_reader):
signal, label = [to_variable(_) for _ in data]
loss, acc = model(signal, label)
if step % 20 == 0:
print(f'train epoch: {epoch} step: {step}, loss: {loss.numpy().mean()}, acc: {acc.numpy()}')
writer.add_scalar(tag='train_loss', step=global_step, value=loss.numpy().mean())
writer.add_scalar(tag='train_acc', step=global_step, value=acc.numpy())
global_step += 1
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
model.eval()
dev_reader = reader(dev_datas, batch_size=64, is_random=False)
accs = []
losses = []
for data in dev_reader:
signal, label = [to_variable(_) for _ in data]
loss, acc = model(signal, label)
losses.append(loss.numpy().mean())
accs.append(acc.numpy())
avg_acc = np.array(accs).mean()
avg_loss = np.array(losses).mean()
if avg_acc > max_acc:
max_acc = avg_acc
print(f'the best accuracy: {max_acc}')
print('saving the best model')
save_dygraph(optimizer.state_dict(), 'best_model')
save_dygraph(model.state_dict(), 'best_model')
print(f'dev epoch: {epoch}, loss: {avg_loss}, acc: {avg_acc}')
writer.add_scalar(tag='dev_loss', step=epoch, value=avg_loss)
writer.add_scalar(tag='dev_acc', step=epoch, value=avg_acc)
print(f'the best accuracy: {max_acc}')
print('saving the final model')
save_dygraph(optimizer.state_dict(), 'final_model')
save_dygraph(model.state_dict(), 'final_model')
```
### 7. 模型测试
```python
import os
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable, load_dygraph
test_datas = loader('work/test.tsv')
print(f'{len(test_datas)} data in test set')
with fluid.dygraph.guard(fluid.CPUPlace()):
model = Audio()
model.eval()
params_dict, _ = load_dygraph('best_model')
model.set_dict(params_dict)
test_reader = reader(test_datas, batch_size=100, is_random=False)
accs = []
for data in test_reader:
signal, label = [to_variable(_) for _ in data]
_, acc = model(signal, label)
accs.append(acc.numpy())
avg_acc = np.array(accs).mean()
print(f'test acc: {avg_acc}')
```
### 8. 用训练好的模型识别语音
```python
import numpy as np
import webrtcvad
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable, load_dygraph
def vad(file_path, mode=3):
samp_rate, signal_data = wav.read(file_path)
vad = webrtcvad.Vad(mode=mode)
signal = np.pad(signal_data, (0, 160 - (signal_data.shape[0] % int(samp_rate * 0.02))), 'constant')
lens = signal.shape[0]
signals = np.split(signal, lens // int(samp_rate * 0.02))
audio = []
audios = []
for signal_item in signals:
if vad.is_speech(signal_item.tobytes(), samp_rate):
audio.append(signal_item)
elif len(audio) > 0 and not vad.is_speech(signal_item.tobytes(), samp_rate):
audios.append(np.concatenate(audio, 0))
audio = []
return audios, samp_rate
audios, samp_rate = vad('data/audio.wav')
features = []
for audio in audios:
feature = get_mfcc(audio, samp_rate)
features.append(feature)
features = np.concatenate(features, 0).astype('float32')
with fluid.dygraph.guard(place=fluid.CPUPlace()):
model = Audio()
params_dict, _ = load_dygraph('final_model')
model.set_dict(params_dict)
model.eval()
features = to_variable(features)
out = model(features)
result = ' '.join([str(num) for num in np.argmax(out.numpy(), 1).tolist()])
print(f'语音数字的识别结果是:{result}')
```
以上代码涵盖了从数据准备、预处理、模型构建、训练、测试到实际应用的完整流程。希望这些代码对你有所帮助!
基于pytorch水果图像识别系统,数据集使用Fruits 360,并将训练好的模型应用到图片识别界面中,使用GPU加速训练模型,具体代码实现
好的,我可以为你提供一个基于PyTorch实现的水果图像识别系统的代码示例。这个示例将使用Fruits 360数据集,并且将训练好的模型应用到一个基于PyQt5的图片识别界面中。同时,我们将使用GPU来加速训练模型,以便更快地获得准确的识别结果。
首先,我们需要安装PyTorch和PyQt5库,可以使用以下命令:
```
pip install torch
pip install PyQt5
```
接下来,我们将定义一个用于加载Fruits 360数据集的函数。这个函数使用torchvision库来加载数据,并将数据集分成训练集和测试集。
```python
import torch
import torchvision
import torchvision.transforms as transforms
def load_data():
transform = transforms.Compose(
[transforms.Resize((64, 64)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.ImageFolder(root='./fruits-360/Training',
transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
shuffle=True, num_workers=2)
testset = torchvision.datasets.ImageFolder(root='./fruits-360/Test',
transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
shuffle=False, num_workers=2)
return trainloader, testloader
```
然后,我们将定义一个用于训练模型的函数。这个函数将使用PyTorch的GPU加速来加速训练过程。
```python
def train_model(trainloader):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = Net()
net = net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(10): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 200 == 199: # print every 200 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 200))
running_loss = 0.0
print('Finished Training')
return net
```
接着,我们将定义一个用于测试模型的函数。这个函数将使用测试集上的图像来评估模型的准确率。
```python
def test_model(net, testloader):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
```
最后,我们将定义一个用于应用训练好的模型的函数。这个函数将加载训练好的模型,并使用PyQt5来实现一个简单的GUI界面,以便我们可以将图像加载到系统中,并使用训练好的模型来识别它们。
```python
from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
from PyQt5.QtCore import *
from PIL import Image
import numpy as np
class App(QWidget):
def __init__(self):
super().__init__()
self.title = 'Fruit Recognition'
self.left = 10
self.top = 10
self.width = 640
self.height = 480
self.initUI()
def initUI(self):
self.setWindowTitle(self.title)
self.setGeometry(self.left, self.top, self.width, self.height)
# create a label
self.label = QLabel(self)
self.label.setGeometry(QRect(30, 30, 400, 400))
self.label.setAlignment(Qt.AlignCenter)
# create a button
button = QPushButton('Open', self)
button.setGeometry(QRect(500, 30, 100, 30))
button.clicked.connect(self.open_image)
self.show()
def open_image(self):
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
file_name, _ = QFileDialog.getOpenFileName(self, "Open Image", "",
"Images (*.png *.xpm *.jpg *.bmp);;All Files (*)",
options=options)
if file_name:
image = Image.open(file_name)
image = image.resize((64, 64))
image = np.array(image)
image = image.transpose((2, 0, 1))
image = image / 255
image = torch.from_numpy(image).type(torch.FloatTensor)
image = image.unsqueeze(0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = Net()
net = net.to(device)
net.load_state_dict(torch.load('fruits_model.pth'))
outputs = net(image)
_, predicted = torch.max(outputs.data, 1)
self.label.setText('This is a ' + classes[predicted.item()] + '!')
self.label.setPixmap(QPixmap(file_name).scaled(400, 400, Qt.KeepAspectRatio))
self.label.setAlignment(Qt.AlignCenter)
if __name__ == '__main__':
classes = ('Apple Braeburn', 'Apple Golden 1', 'Apple Golden 2', 'Apple Golden 3', 'Apple Granny Smith', 'Apple Red 1', 'Apple Red 2', 'Apple Red 3', 'Apple Red Delicious', 'Apple Red Yellow 1', 'Apple Red Yellow 2', 'Apricot', 'Avocado', 'Banana', 'Beetroot', 'Blueberry', 'Cactus fruit', 'Cantaloupe 1', 'Cantaloupe 2', 'Carambula', 'Cauliflower', 'Cherry 1', 'Cherry 2', 'Cherry Rainier', 'Cherry Wax Black', 'Cherry Wax Red', 'Cherry Wax Yellow', 'Chestnut', 'Clementine', 'Cocos', 'Dates', 'Eggplant', 'Fig', 'Ginger Root', 'Granadilla', 'Grape Blue', 'Grape Pink', 'Grape White', 'Grape White 2', 'Grape White 3', 'Grape White 4', 'Grapefruit Pink', 'Grapefruit White', 'Guava', 'Hazelnut', 'Huckleberry', 'Kaki', 'Kiwi', 'Kohlrabi', 'Kumquats', 'Lemon', 'Lemon Meyer', 'Limes', 'Lychee', 'Mandarine', 'Mango', 'Mangostan', 'Maracuja', 'Melon Piel de Sapo', 'Mulberry', 'Nectarine', 'Orange', 'Papaya', 'Passion Fruit', 'Peach', 'Peach Flat', 'Pear', 'Pear Abate', 'Pear Monster', 'Pear Williams', 'Pepino', 'Pepper Green', 'Pepper Red', 'Pepper Yellow', 'Physalis', 'Physalis with Husk', 'Pineapple', 'Pineapple Mini', 'Pitahaya Red', 'Plum', 'Plum 2', 'Plum 3', 'Pomegranate', 'Pomelo Sweetie', 'Potato Red', 'Potato Red Washed', 'Potato Sweet', 'Potato White', 'Quince', 'Rambutan', 'Raspberry', 'Redcurrant', 'Salak', 'Strawberry', 'Tamarillo', 'Tangelo', 'Tomato 1', 'Tomato 2', 'Tomato 3', 'Tomato 4', 'Tomato Cherry Red', 'Tomato Maroon', 'Tomato Yellow', 'Walnut')
trainloader, testloader = load_data()
net = train_model(trainloader)
test_model(net, testloader)
torch.save(net.state_dict(), 'fruits_model.pth')
app = QApplication(sys.argv)
ex = App()
sys.exit(app.exec_())
```
注意,这个示例中我们使用了一个名为Net的神经网络模型,你可以根据需要进行替换。
至此,我们已经完成了一个基于PyTorch的水果图像识别系统的实现。你可以使用这个示例作为起点,根据需要进行修改和扩展。
阅读全文