/* * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef TRT_INFERENCE_H_ #define TRT_INFERENCE_H_ #include #include #include "NvInfer.h" #include "NvCaffeParser.h" #include "NvOnnxParser.h" #include using namespace nvinfer1; using namespace nvcaffeparser1; using namespace nvonnxparser; using namespace std; // Model Index #define GOOGLENET_SINGLE_CLASS 0 #define GOOGLENET_THREE_CLASS 1 #define RESNET_THREE_CLASS 2 class Logger; class Profiler; class TRT_Context { public: //net related parameter int getNetWidth() const; int getNetHeight() const; uint32_t getBatchSize() const; int getChannel() const; int getModelClassCnt() const; void* getScales() const; void* getOffsets() const; // Buffer is allocated in TRT_Conxtex, // Expose this interface for inputing data void*& getBuffer(const int& index); float*& getInputBuf(); uint32_t getNumTrtInstances() const; //0 fp16 1 fp32 2 int8 void setMode(const int& mode); void setBatchSize(const uint32_t& batchsize); void setDumpResult(const bool& dump_result); void setTrtProfilerEnabled(const bool& enable_trt_profiler); int getFilterNum() const; void setFilterNum(const unsigned int& filter_num); TRT_Context(); void setModelIndex(int modelIndex); void buildTrtContext(const string& deployfile, const string& modelfile, bool bUseCPUBuf = false, bool isOnnxModel = false); void doInference( queue< vector >* rectList_queue, float *input = NULL); void destroyTrtContext(bool bUseCPUBuf = false); ~TRT_Context(); private: int net_width; int net_height; int filter_num; void **buffers; float *input_buf; float *output_cov_buf; float *output_bbox_buf; void* offset_gpu; void* scales_gpu; float helnet_scale[4]; IRuntime *runtime; ICudaEngine *engine; IExecutionContext *context; uint32_t *pResultArray; int channel; //input file's channel int num_bindings; int trtinstance_num; //inference channel num int batch_size; int mode; bool dump_result; ofstream fstream; bool enable_trt_profiler; bool is_onnx_model; IHostMemory *trtModelStream{nullptr}; vector outputs; string result_file; Logger *pLogger; Profiler *pProfiler; int frame_num; uint64_t elapsed_frame_num; uint64_t elapsed_time; int inputIndex; int outputIndex; int outputIndexBBOX; Dims3 inputDims; Dims3 outputDims; Dims3 outputDimsBBOX; size_t inputSize; size_t outputSize; size_t outputSizeBBOX; struct { const int classCnt; float THRESHOLD[3]; const char *INPUT_BLOB_NAME; const char *OUTPUT_BLOB_NAME; const char *OUTPUT_BBOX_NAME; const int STRIDE; const int WORKSPACE_SIZE; int offsets[3]; float input_scale[3]; float bbox_output_scales[4]; const int ParseFunc_ID; } *g_pModelNetAttr, gModelNetAttr[4] = { { // GOOGLENET_SINGLE_CLASS 1, {0.8, 0, 0}, "data", "coverage", "bboxes", 4, 450 * 1024 * 1024, {0, 0, 0}, {1.0f, 1.0f, 1.0f}, {1, 1, 1, 1}, 0 }, { // GOOGLENET_THREE_CLASS 3, {0.6, 0.6, 1.0}, //People, Motorbike, Car "data", "Layer16_cov", "Layer16_bbox", 16, 110 * 1024 * 1024, {124, 117, 104}, {1.0f, 1.0f, 1.0f}, {-640, -368, 640, 368}, 0 }, { // RESNET_THREE_CLASS 4, {0.1, 0.1, 0.1}, //People, Motorbike, Car "data", "Layer7_cov", "Layer7_bbox", 16, 110 * 1024 * 1024, {0, 0, 0}, {0.0039215697906911373, 0.0039215697906911373, 0.0039215697906911373}, {-640, -368, 640, 368}, 1 }, }; enum Mode_type{ MODE_FP16 = 0, MODE_FP32 = 1, MODE_INT8 = 2 }; int parseNet(const string& deployfile); void parseBbox(vector* rectList, int batch_th); void ParseResnet10Bbox(vector* rectList, int batch_th); void allocateMemory(bool bUseCPUBuf); void releaseMemory(bool bUseCPUBuf); void caffeToTRTModel(const string& deployfile, const string& modelfile); void onnxToTRTModel(const string& modelfile); }; #endif