def train(): # 训练 print('Start training ===========================================>') best_epo = -1 max_pck = -1 cur_lr = learning_rate print('Learning Rate: {}'.format(learning_rate)) for epoch in range(1, epochs + 1): print('Epoch[{}/{}] ==============>'.format(epoch, epochs)) model.train() train_loss = [] for step, (img, label, img_name, w, h) in enumerate(train_loader): label = torch.stack([label] * 6, dim=1) # bz * 6 * 21 * 46 * 46 if cuda: img = img.cuda() label = label.cuda() optimizer.zero_grad() pred_maps = model(img) # (FloatTensor.cuda) size:(bz,6,21,46,46) loss = sum_mse_loss(pred_maps, label) # total loss loss.backward() optimizer.step() if step % 100 == 0: print('STEP: {} LOSS {}'.format(step, loss.item())) loss_final = sum_mse_loss(pred_maps[:, -1, ...].cpu(), label[:, -1, ...].cpu()) train_loss.append(loss_final) # save sample image **** save_images(label[:, -1, ...].cpu(), pred_maps[:, -1, ...].cpu(), epoch, img_name, save_dir) # eval model after one epoch eval_loss, cur_pck = eval(epoch, mode='valid') print('EPOCH {} Valid PCK {}'.format(epoch, cur_pck)) print('EPOCH {} TRAIN_LOSS {}'.format(epoch, sum(train_loss)/len(train_loss))) print('EPOCH {} VALID_LOSS {}'.format(epoch, eval_loss)) if cur_pck > max_pck: torch.save(model.state_dict(), os.path.join(save_dir, 'best_model.pth')) max_pck = cur_pck best_epo = epoch print('Current Best EPOCH is : {}\n**************\n'.format(best_epo)) torch.save(model.state_dict(), os.path.join(save_dir, 'final_epoch.pth')) if epoch % lr_decay_epoch == 0: cur_lr /= 2 update_lr(optimizer, cur_lr) print('Train Done!') print('Best epoch is {}'.format(best_epo))
时间: 2023-05-29 10:06:50 浏览: 335
ruby中的双等号==问题详解
train()是一个函数,具体实现的内容需要根据上下文和代码细节来确定。一般来说,train()函数用于训练机器学习模型,包括读取数据、构建模型、设置优化器和损失函数、进行训练迭代等步骤。在训练过程中,train()函数通常需要不断调用其他函数来执行具体的操作,如计算梯度、更新参数等。
阅读全文