tensorrt C++ api 加载trt
时间: 2023-09-19 20:12:20 浏览: 250
文件的步骤如下:
1. 导入必要的头文件:
```c
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvOnnxParserRuntime.h"
#include "NvInferRuntimeCommon.h"
```
2. 创建 `IRuntime` 对象:
```c
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
```
其中,`gLogger` 是用来记录日志的对象,需要先定义。
3. 从文件中创建 `ICudaEngine` 对象:
```c
std::ifstream trt_file("model.trt", std::ios::binary);
if (!trt_file.good())
{
std::cerr << "Failed to load TRT file: model.trt" << std::endl;
return -1;
}
trt_file.seekg(0, trt_file.end);
const int model_size = trt_file.tellg();
trt_file.seekg(0, trt_file.beg);
char* model_data = new char[model_size];
trt_file.read(model_data, model_size);
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_data, model_size, nullptr);
```
其中,`model.trt` 是保存 TensorRT 模型的文件。
4. 创建 `IExecutionContext` 对象:
```c
nvinfer1::IExecutionContext* context = engine->createExecutionContext();
```
5. 设置输入和输出的内存:
```c
const int input_index = engine->getBindingIndex("input");
const int output_index = engine->getBindingIndex("output");
void* input_memory;
cudaMalloc(&input_memory, input_size);
void* output_memory;
cudaMalloc(&output_memory, output_size);
```
其中,`input_size` 和 `output_size` 分别是输入和输出的数据大小。
6. 执行推理:
```c
void* bindings[] = {input_memory, output_memory};
context->execute(1, bindings);
```
其中,`1` 是 batch size。
7. 获取输出数据:
```c
float* output_data = new float[output_size / sizeof(float)];
cudaMemcpy(output_data, output_memory, output_size, cudaMemcpyDeviceToHost);
```
8. 释放资源:
```c
cudaFree(input_memory);
cudaFree(output_memory);
delete[] model_data;
delete[] output_data;
context->destroy();
engine->destroy();
runtime->destroy();
```
阅读全文