diff --git a/CMakeLists.txt b/CMakeLists.txt index fa99c6ff78..6cee4ef726 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -300,10 +300,16 @@ if(WITH_GPU) include_directories(${CUDA_DIRECTORY}/include) if(WIN32) find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64) + find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib/x64) + add_definitions(-DENABLE_NVJPEG) else() find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64) + if(NOT BUILD_ON_JETSON) + find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib64) + add_definitions(-DENABLE_NVJPEG) + endif() endif() - list(APPEND DEPEND_LIBS ${CUDA_LIB}) + list(APPEND DEPEND_LIBS ${CUDA_LIB} ${NVJPEG_LIB}) # build CUDA source files in fastdeploy, CUDA source files include CUDA preprocessing, TRT plugins, etc. enable_language(CUDA) diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index c79001c28e..5c2c5b7338 100644 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -169,21 +169,25 @@ if(ENABLE_POROS_BACKEND) endif() if(WITH_GPU) - if (NOT CUDA_DIRECTORY) + if(NOT CUDA_DIRECTORY) set(CUDA_DIRECTORY "/usr/local/cuda") endif() if(WIN32) find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64) + find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib/x64) else() find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64) + if(NOT BUILD_ON_JETSON) + find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib64) + endif() endif() if(NOT CUDA_LIB) message(FATAL_ERROR "[FastDeploy] Cannot find library cudart in ${CUDA_DIRECTORY}, Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/path/to/cuda") endif() - list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB}) + list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB} ${NVJPEG_LIB}) list(APPEND FASTDEPLOY_INCS ${CUDA_DIRECTORY}/include) - if (ENABLE_TRT_BACKEND) + if(ENABLE_TRT_BACKEND) if(BUILD_ON_JETSON) find_library(TRT_INFER_LIB nvinfer /usr/lib/aarch64-linux-gnu/) find_library(TRT_ONNX_LIB nvonnxparser /usr/lib/aarch64-linux-gnu/) diff --git a/fastdeploy/core/fd_tensor.cc b/fastdeploy/core/fd_tensor.cc index c21caf3e97..8fd921043b 100644 --- a/fastdeploy/core/fd_tensor.cc +++ b/fastdeploy/core/fd_tensor.cc @@ -245,12 +245,13 @@ void FDTensor::PrintInfo(const std::string& prefix) const { bool FDTensor::ReallocFn(size_t nbytes) { if (device == Device::GPU) { #ifdef WITH_GPU - size_t original_nbytes = Nbytes(); + size_t original_nbytes = nbytes_allocated; if (nbytes > original_nbytes) { if (buffer_ != nullptr) { FDDeviceFree()(buffer_); } FDDeviceAllocator()(&buffer_, nbytes); + nbytes_allocated = nbytes; } return buffer_ != nullptr; #else @@ -262,12 +263,13 @@ bool FDTensor::ReallocFn(size_t nbytes) { } else { if (is_pinned_memory) { #ifdef WITH_GPU - size_t original_nbytes = Nbytes(); + size_t original_nbytes = nbytes_allocated; if (nbytes > original_nbytes) { if (buffer_ != nullptr) { FDDeviceHostFree()(buffer_); } FDDeviceHostAllocator()(&buffer_, nbytes); + nbytes_allocated = nbytes; } return buffer_ != nullptr; #else @@ -278,6 +280,7 @@ bool FDTensor::ReallocFn(size_t nbytes) { #endif } buffer_ = realloc(buffer_, nbytes); + nbytes_allocated = nbytes; return buffer_ != nullptr; } } @@ -299,6 +302,7 @@ void FDTensor::FreeFn() { } } buffer_ = nullptr; + nbytes_allocated = 0; } } @@ -380,7 +384,7 @@ FDTensor::FDTensor(const FDTensor& other) device_id(other.device_id) { // Copy buffer if (other.buffer_ == nullptr) { - buffer_ = nullptr; + FreeFn(); } else { size_t nbytes = Nbytes(); FDASSERT(ReallocFn(nbytes), @@ -396,7 +400,8 @@ FDTensor::FDTensor(FDTensor&& other) dtype(other.dtype), external_data_ptr(other.external_data_ptr), device(other.device), - device_id(other.device_id) { + device_id(other.device_id), + nbytes_allocated(other.nbytes_allocated) { other.name = ""; // Note(zhoushunjie): Avoid double free. other.buffer_ = nullptr; @@ -435,6 +440,7 @@ FDTensor& FDTensor::operator=(FDTensor&& other) { dtype = other.dtype; device = other.device; device_id = other.device_id; + nbytes_allocated = other.nbytes_allocated; other.name = ""; // Note(zhoushunjie): Avoid double free. diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h index 5584f1b305..95a603dd88 100644 --- a/fastdeploy/core/fd_tensor.h +++ b/fastdeploy/core/fd_tensor.h @@ -54,6 +54,11 @@ struct FASTDEPLOY_DECL FDTensor { // other devices' data std::vector temporary_cpu_buffer; + // The number of bytes allocated so far. + // When resizing GPU memory, we will free and realloc the memory only if the + // required size is larger than this value. + size_t nbytes_allocated = 0; + // Get data buffer pointer void* MutableData(); diff --git a/fastdeploy/vision/classification/ppcls/model.cc b/fastdeploy/vision/classification/ppcls/model.cc old mode 100755 new mode 100644 index 6868c9c62d..d52eeace90 --- a/fastdeploy/vision/classification/ppcls/model.cc +++ b/fastdeploy/vision/classification/ppcls/model.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/vision/classification/ppcls/model.h" + #include "fastdeploy/utils/unique_ptr.h" namespace fastdeploy { @@ -23,7 +24,8 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file, const std::string& params_file, const std::string& config_file, const RuntimeOption& custom_option, - const ModelFormat& model_format) : preprocessor_(config_file) { + const ModelFormat& model_format) + : preprocessor_(config_file) { if (model_format == ModelFormat::PADDLE) { valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, Backend::LITE}; @@ -32,15 +34,14 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file, valid_ascend_backends = {Backend::LITE}; valid_kunlunxin_backends = {Backend::LITE}; valid_ipu_backends = {Backend::PDINFER}; - }else if (model_format == ModelFormat::SOPHGO) { + } else if (model_format == ModelFormat::SOPHGO) { valid_sophgonpu_backends = {Backend::SOPHGOTPU}; - } - else { + } else { valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_rknpu_backends = {Backend::RKNPU2}; } - + runtime_option = custom_option; runtime_option.model_format = model_format; runtime_option.model_file = model_file; @@ -48,8 +49,9 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file, initialized = Initialize(); } -std::unique_ptr PaddleClasModel::Clone() const { - std::unique_ptr clone_model = utils::make_unique(PaddleClasModel(*this)); +std::unique_ptr PaddleClasModel::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(PaddleClasModel(*this)); clone_model->SetRuntime(clone_model->CloneRuntime()); return clone_model; } @@ -71,17 +73,30 @@ bool PaddleClasModel::Predict(cv::Mat* im, ClassifyResult* result, int topk) { } bool PaddleClasModel::Predict(const cv::Mat& im, ClassifyResult* result) { + FDMat mat = WrapMat(im); + return Predict(mat, result); +} + +bool PaddleClasModel::BatchPredict(const std::vector& images, + std::vector* results) { + std::vector mats = WrapMat(images); + return BatchPredict(mats, results); +} + +bool PaddleClasModel::Predict(const FDMat& mat, ClassifyResult* result) { std::vector results; - if (!BatchPredict({im}, &results)) { + std::vector mats = {mat}; + if (!BatchPredict(mats, &results)) { return false; } *result = std::move(results[0]); return true; } -bool PaddleClasModel::BatchPredict(const std::vector& images, std::vector* results) { - std::vector fd_images = WrapMat(images); - if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { +bool PaddleClasModel::BatchPredict(const std::vector& mats, + std::vector* results) { + std::vector fd_mats = mats; + if (!preprocessor_.Run(&fd_mats, &reused_input_tensors_)) { FDERROR << "Failed to preprocess the input image." << std::endl; return false; } @@ -92,7 +107,8 @@ bool PaddleClasModel::BatchPredict(const std::vector& images, std::vect } if (!postprocessor_.Run(reused_output_tensors_, results)) { - FDERROR << "Failed to postprocess the inference results by runtime." << std::endl; + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; return false; } diff --git a/fastdeploy/vision/classification/ppcls/model.h b/fastdeploy/vision/classification/ppcls/model.h index 5971147fb5..b5ef8a60ca 100644 --- a/fastdeploy/vision/classification/ppcls/model.h +++ b/fastdeploy/vision/classification/ppcls/model.h @@ -75,6 +75,23 @@ class FASTDEPLOY_DECL PaddleClasModel : public FastDeployModel { virtual bool BatchPredict(const std::vector& imgs, std::vector* results); + /** \brief Predict the classification result for an input image + * + * \param[in] mat The input mat + * \param[in] result The output classification result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const FDMat& mat, ClassifyResult* result); + + /** \brief Predict the classification results for a batch of input images + * + * \param[in] mats, The input mat list + * \param[in] results The output classification result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector& mats, + std::vector* results); + /// Get preprocessor reference of PaddleClasModel virtual PaddleClasPreprocessor& GetPreprocessor() { return preprocessor_; diff --git a/fastdeploy/vision/common/image_decoder/image_decoder.cc b/fastdeploy/vision/common/image_decoder/image_decoder.cc new file mode 100644 index 0000000000..085d234c24 --- /dev/null +++ b/fastdeploy/vision/common/image_decoder/image_decoder.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/image_decoder/image_decoder.h" + +#include "opencv2/imgcodecs.hpp" + +namespace fastdeploy { +namespace vision { + +ImageDecoder::ImageDecoder(ImageDecoderLib lib) { + if (lib == ImageDecoderLib::NVJPEG) { +#ifdef ENABLE_NVJPEG + nvjpeg::init_decoder(nvjpeg_params_); +#endif + } + lib_ = lib; +} + +ImageDecoder::~ImageDecoder() { + if (lib_ == ImageDecoderLib::NVJPEG) { +#ifdef ENABLE_NVJPEG + nvjpeg::destroy_decoder(nvjpeg_params_); +#endif + } +} + +bool ImageDecoder::Decode(const std::string& img_name, FDMat* mat) { + std::vector mats(1); + mats[0] = std::move(*mat); + if (!BatchDecode({img_name}, &mats)) { + return false; + } + *mat = std::move(mats[0]); + return true; +} + +bool ImageDecoder::BatchDecode(const std::vector& img_names, + std::vector* mats) { + if (lib_ == ImageDecoderLib::OPENCV) { + return ImplByOpenCV(img_names, mats); + } else if (lib_ == ImageDecoderLib::NVJPEG) { + return ImplByNvJpeg(img_names, mats); + } + return true; +} + +bool ImageDecoder::ImplByOpenCV(const std::vector& img_names, + std::vector* mats) { + for (size_t i = 0; i < img_names.size(); ++i) { + cv::Mat im = cv::imread(img_names[i]); + (*mats)[i].SetMat(im); + (*mats)[i].layout = Layout::HWC; + (*mats)[i].SetWidth(im.cols); + (*mats)[i].SetHeight(im.rows); + (*mats)[i].SetChannels(im.channels()); + } + return true; +} + +bool ImageDecoder::ImplByNvJpeg(const std::vector& img_names, + std::vector* mats) { +#ifdef ENABLE_NVJPEG + nvjpeg_params_.batch_size = img_names.size(); + std::vector output_imgs(nvjpeg_params_.batch_size); + std::vector widths(nvjpeg_params_.batch_size); + std::vector heights(nvjpeg_params_.batch_size); + // TODO(wangxinyu): support other output format + nvjpeg_params_.fmt = NVJPEG_OUTPUT_BGRI; + double total; + nvjpeg_params_.stream = (*mats)[0].Stream(); + + std::vector output_buffers; + for (size_t i = 0; i < mats->size(); ++i) { + FDASSERT((*mats)[i].output_cache != nullptr, + "The output_cache of FDMat was not set."); + output_buffers.push_back((*mats)[i].output_cache); + } + + if (nvjpeg::process_images(img_names, nvjpeg_params_, total, output_imgs, + output_buffers, widths, heights)) { + // If nvJPEG decode failed, will fallback to OpenCV, + // e.g. png format is not supported by nvJPEG + FDWARNING << "nvJPEG decode failed, falling back to OpenCV for this batch" + << std::endl; + return ImplByOpenCV(img_names, mats); + } + + for (size_t i = 0; i < mats->size(); ++i) { + (*mats)[i].mat_type = ProcLib::CUDA; + (*mats)[i].layout = Layout::HWC; + (*mats)[i].SetTensor(output_buffers[i]); + } +#else + FDASSERT(false, "FastDeploy didn't compile with NVJPEG."); +#endif + return true; +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/image_decoder/image_decoder.h b/fastdeploy/vision/common/image_decoder/image_decoder.h new file mode 100644 index 0000000000..7c7b2d2979 --- /dev/null +++ b/fastdeploy/vision/common/image_decoder/image_decoder.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/utils/utils.h" +#include "fastdeploy/vision/common/processors/mat.h" +#include "fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h" + +namespace fastdeploy { +namespace vision { + +enum class FASTDEPLOY_DECL ImageDecoderLib { OPENCV, NVJPEG }; + +class FASTDEPLOY_DECL ImageDecoder { + public: + explicit ImageDecoder(ImageDecoderLib lib = ImageDecoderLib::OPENCV); + + ~ImageDecoder(); + + bool Decode(const std::string& img_name, FDMat* mat); + + bool BatchDecode(const std::vector& img_names, + std::vector* mats); + + private: + bool ImplByOpenCV(const std::vector& img_names, + std::vector* mats); + bool ImplByNvJpeg(const std::vector& img_names, + std::vector* mats); + ImageDecoderLib lib_ = ImageDecoderLib::OPENCV; +#ifdef ENABLE_NVJPEG + nvjpeg::decode_params_t nvjpeg_params_; +#endif +}; + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.cc b/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.cc new file mode 100644 index 0000000000..ad121ee3fb --- /dev/null +++ b/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.cc @@ -0,0 +1,363 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Part of the following code in this file refs to +// https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.cpp +// +// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Licensed under the Apache-2.0 license +// \brief +// \author NVIDIA + +#ifdef ENABLE_NVJPEG +#include "fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h" + +namespace fastdeploy { +namespace vision { +namespace nvjpeg { + +#define CHECK_CUDA(call) \ + { \ + cudaError_t _e = (call); \ + if (_e != cudaSuccess) { \ + std::cout << "CUDA Runtime failure: '#" << _e << "' at " << __FILE__ \ + << ":" << __LINE__ << std::endl; \ + exit(1); \ + } \ + } + +#define CHECK_NVJPEG(call) \ + { \ + nvjpegStatus_t _e = (call); \ + if (_e != NVJPEG_STATUS_SUCCESS) { \ + std::cout << "NVJPEG failure: '#" << _e << "' at " << __FILE__ << ":" \ + << __LINE__ << std::endl; \ + exit(1); \ + } \ + } + +static int dev_malloc(void** p, size_t s) { return (int)cudaMalloc(p, s); } + +static int dev_free(void* p) { return (int)cudaFree(p); } + +static int host_malloc(void** p, size_t s, unsigned int f) { + return (int)cudaHostAlloc(p, s, f); +} + +static int host_free(void* p) { return (int)cudaFreeHost(p); } + +static int read_images(const FileNames& image_names, FileData& raw_data, + std::vector& raw_len) { + for (size_t i = 0; i < image_names.size(); ++i) { + if (image_names.size() == 0) { + std::cerr << "No valid images left in the input list, exit" << std::endl; + return EXIT_FAILURE; + } + + // Read an image from disk. + std::ifstream input(image_names[i].c_str(), + std::ios::in | std::ios::binary | std::ios::ate); + if (!(input.is_open())) { + std::cerr << "Cannot open image: " << image_names[i] << std::endl; + FDASSERT(false, "Read file error."); + continue; + } + + // Get the size + long unsigned int file_size = input.tellg(); + input.seekg(0, std::ios::beg); + // resize if buffer is too small + if (raw_data[i].size() < file_size) { + raw_data[i].resize(file_size); + } + if (!input.read(raw_data[i].data(), file_size)) { + std::cerr << "Cannot read from file: " << image_names[i] << std::endl; + // image_names.erase(cur_iter); + FDASSERT(false, "Read file error."); + continue; + } + raw_len[i] = file_size; + } + return EXIT_SUCCESS; +} + +// prepare buffers for RGBi output format +static int prepare_buffers(FileData& file_data, std::vector& file_len, + std::vector& img_width, + std::vector& img_height, + std::vector& ibuf, + std::vector& isz, + std::vector& output_buffers, + const FileNames& current_names, + decode_params_t& params) { + int widths[NVJPEG_MAX_COMPONENT]; + int heights[NVJPEG_MAX_COMPONENT]; + int channels; + nvjpegChromaSubsampling_t subsampling; + + for (long unsigned int i = 0; i < file_data.size(); i++) { + nvjpegStatus_t status = nvjpegGetImageInfo( + params.nvjpeg_handle, (unsigned char*)file_data[i].data(), file_len[i], + &channels, &subsampling, widths, heights); + if (status != NVJPEG_STATUS_SUCCESS) { + std::cout << "NVJPEG failure: #" << status << " in nvjpegGetImageInfo." + << std::endl; + return EXIT_FAILURE; + } + + img_width[i] = widths[0]; + img_height[i] = heights[0]; + + int mul = 1; + // in the case of interleaved RGB output, write only to single channel, but + // 3 samples at once + if (params.fmt == NVJPEG_OUTPUT_RGBI || params.fmt == NVJPEG_OUTPUT_BGRI) { + channels = 1; + mul = 3; + } else if (params.fmt == NVJPEG_OUTPUT_RGB || + params.fmt == NVJPEG_OUTPUT_BGR) { + // in the case of rgb create 3 buffers with sizes of original image + channels = 3; + widths[1] = widths[2] = widths[0]; + heights[1] = heights[2] = heights[0]; + } else { + FDASSERT(false, "Unsupport NVJPEG output format: %d", params.fmt); + } + + output_buffers[i]->Resize({heights[0], widths[0], mul * channels}, + FDDataType::UINT8, "output_cache", Device::GPU); + + uint8_t* cur_buffer = reinterpret_cast(output_buffers[i]->Data()); + + // realloc output buffer if required + for (int c = 0; c < channels; c++) { + int aw = mul * widths[c]; + int ah = heights[c]; + size_t sz = aw * ah; + ibuf[i].pitch[c] = aw; + if (sz > isz[i].pitch[c]) { + ibuf[i].channel[c] = cur_buffer; + cur_buffer = cur_buffer + sz; + isz[i].pitch[c] = sz; + } + } + } + return EXIT_SUCCESS; +} + +static void create_decoupled_api_handles(decode_params_t& params) { + CHECK_NVJPEG(nvjpegDecoderCreate(params.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT, + ¶ms.nvjpeg_decoder)); + CHECK_NVJPEG(nvjpegDecoderStateCreate(params.nvjpeg_handle, + params.nvjpeg_decoder, + ¶ms.nvjpeg_decoupled_state)); + + CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL, + ¶ms.pinned_buffers[0])); + CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL, + ¶ms.pinned_buffers[1])); + CHECK_NVJPEG(nvjpegBufferDeviceCreate(params.nvjpeg_handle, NULL, + ¶ms.device_buffer)); + + CHECK_NVJPEG( + nvjpegJpegStreamCreate(params.nvjpeg_handle, ¶ms.jpeg_streams[0])); + CHECK_NVJPEG( + nvjpegJpegStreamCreate(params.nvjpeg_handle, ¶ms.jpeg_streams[1])); + + CHECK_NVJPEG(nvjpegDecodeParamsCreate(params.nvjpeg_handle, + ¶ms.nvjpeg_decode_params)); +} + +static void destroy_decoupled_api_handles(decode_params_t& params) { + CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params.nvjpeg_decode_params)); + CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[0])); + CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[1])); + CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[0])); + CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[1])); + CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params.device_buffer)); + CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_decoupled_state)); + CHECK_NVJPEG(nvjpegDecoderDestroy(params.nvjpeg_decoder)); +} + +int decode_images(const FileData& img_data, const std::vector& img_len, + std::vector& out, decode_params_t& params, + double& time) { + CHECK_CUDA(cudaStreamSynchronize(params.stream)); + + std::vector batched_bitstreams; + std::vector batched_bitstreams_size; + std::vector batched_output; + + // bit-streams that batched decode cannot handle + std::vector otherdecode_bitstreams; + std::vector otherdecode_bitstreams_size; + std::vector otherdecode_output; + + if (params.hw_decode_available) { + for (int i = 0; i < params.batch_size; i++) { + // extract bitstream meta data to figure out whether a bit-stream can be + // decoded + nvjpegJpegStreamParseHeader(params.nvjpeg_handle, + (const unsigned char*)img_data[i].data(), + img_len[i], params.jpeg_streams[0]); + int isSupported = -1; + nvjpegDecodeBatchedSupported(params.nvjpeg_handle, params.jpeg_streams[0], + &isSupported); + + if (isSupported == 0) { + batched_bitstreams.push_back((const unsigned char*)img_data[i].data()); + batched_bitstreams_size.push_back(img_len[i]); + batched_output.push_back(out[i]); + } else { + otherdecode_bitstreams.push_back( + (const unsigned char*)img_data[i].data()); + otherdecode_bitstreams_size.push_back(img_len[i]); + otherdecode_output.push_back(out[i]); + } + } + } else { + for (int i = 0; i < params.batch_size; i++) { + otherdecode_bitstreams.push_back( + (const unsigned char*)img_data[i].data()); + otherdecode_bitstreams_size.push_back(img_len[i]); + otherdecode_output.push_back(out[i]); + } + } + + if (batched_bitstreams.size() > 0) { + CHECK_NVJPEG(nvjpegDecodeBatchedInitialize( + params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.size(), 1, + params.fmt)); + + CHECK_NVJPEG(nvjpegDecodeBatched( + params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.data(), + batched_bitstreams_size.data(), batched_output.data(), params.stream)); + } + + if (otherdecode_bitstreams.size() > 0) { + CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state, + params.device_buffer)); + int buffer_index = 0; + CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params, + params.fmt)); + for (int i = 0; i < params.batch_size; i++) { + CHECK_NVJPEG(nvjpegJpegStreamParse(params.nvjpeg_handle, + otherdecode_bitstreams[i], + otherdecode_bitstreams_size[i], 0, 0, + params.jpeg_streams[buffer_index])); + + CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer( + params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index])); + + CHECK_NVJPEG(nvjpegDecodeJpegHost( + params.nvjpeg_handle, params.nvjpeg_decoder, + params.nvjpeg_decoupled_state, params.nvjpeg_decode_params, + params.jpeg_streams[buffer_index])); + + CHECK_CUDA(cudaStreamSynchronize(params.stream)); + + CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice( + params.nvjpeg_handle, params.nvjpeg_decoder, + params.nvjpeg_decoupled_state, params.jpeg_streams[buffer_index], + params.stream)); + + buffer_index = 1 - buffer_index; // switch pinned buffer in pipeline mode + // to avoid an extra sync + + CHECK_NVJPEG( + nvjpegDecodeJpegDevice(params.nvjpeg_handle, params.nvjpeg_decoder, + params.nvjpeg_decoupled_state, + &otherdecode_output[i], params.stream)); + } + } + return EXIT_SUCCESS; +} + +double process_images(const FileNames& image_names, decode_params_t& params, + double& total, std::vector& iout, + std::vector& output_buffers, + std::vector& widths, std::vector& heights) { + FDASSERT(image_names.size() == params.batch_size, + "Number of images and batch size must be equal."); + // vector for storing raw files and file lengths + FileData file_data(params.batch_size); + std::vector file_len(params.batch_size); + FileNames current_names(params.batch_size); + // we wrap over image files to process total_images of files + auto file_iter = image_names.begin(); + + // output buffer sizes, for convenience + std::vector isz(params.batch_size); + + for (long unsigned int i = 0; i < iout.size(); i++) { + for (int c = 0; c < NVJPEG_MAX_COMPONENT; c++) { + iout[i].channel[c] = NULL; + iout[i].pitch[c] = 0; + isz[i].pitch[c] = 0; + } + } + + if (read_images(image_names, file_data, file_len)) { + return EXIT_FAILURE; + } + + if (prepare_buffers(file_data, file_len, widths, heights, iout, isz, + output_buffers, image_names, params)) { + return EXIT_FAILURE; + } + + double time; + if (decode_images(file_data, file_len, iout, params, time)) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +void init_decoder(decode_params_t& params) { + params.hw_decode_available = true; + nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free}; + nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free}; + nvjpegStatus_t status = + nvjpegCreateEx(NVJPEG_BACKEND_HARDWARE, &dev_allocator, &pinned_allocator, + NVJPEG_FLAGS_DEFAULT, ¶ms.nvjpeg_handle); + if (status == NVJPEG_STATUS_ARCH_MISMATCH) { + std::cout << "Hardware Decoder not supported. " + "Falling back to default backend" + << std::endl; + CHECK_NVJPEG(nvjpegCreateEx(NVJPEG_BACKEND_DEFAULT, &dev_allocator, + &pinned_allocator, NVJPEG_FLAGS_DEFAULT, + ¶ms.nvjpeg_handle)); + params.hw_decode_available = false; + } else { + CHECK_NVJPEG(status); + } + + CHECK_NVJPEG( + nvjpegJpegStateCreate(params.nvjpeg_handle, ¶ms.nvjpeg_state)); + + create_decoupled_api_handles(params); +} + +void destroy_decoder(decode_params_t& params) { + destroy_decoupled_api_handles(params); + CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_state)); + CHECK_NVJPEG(nvjpegDestroy(params.nvjpeg_handle)); +} + +} // namespace nvjpeg +} // namespace vision +} // namespace fastdeploy + +#endif // ENABLE_NVJPEG diff --git a/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h b/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h new file mode 100644 index 0000000000..65307ced89 --- /dev/null +++ b/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h @@ -0,0 +1,69 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Part of the following code in this file refs to +// https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.h +// +// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Licensed under the Apache-2.0 license +// \brief +// \author NVIDIA + +#pragma once + +#ifdef ENABLE_NVJPEG +#include "fastdeploy/core/fd_tensor.h" + +#include +#include + + +namespace fastdeploy { +namespace vision { +namespace nvjpeg { + +typedef std::vector FileNames; +typedef std::vector> FileData; + +struct decode_params_t { + int batch_size; + nvjpegJpegState_t nvjpeg_state; + nvjpegHandle_t nvjpeg_handle; + cudaStream_t stream; + + // used with decoupled API + nvjpegJpegState_t nvjpeg_decoupled_state; + nvjpegBufferPinned_t pinned_buffers[2]; // 2 buffers for pipelining + nvjpegBufferDevice_t device_buffer; + nvjpegJpegStream_t jpeg_streams[2]; // 2 streams for pipelining + nvjpegDecodeParams_t nvjpeg_decode_params; + nvjpegJpegDecoder_t nvjpeg_decoder; + + nvjpegOutputFormat_t fmt; + bool hw_decode_available; +}; + +void init_decoder(decode_params_t& params); +void destroy_decoder(decode_params_t& params); + +double process_images(const FileNames& image_names, decode_params_t& params, + double& total, std::vector& iout, + std::vector& output_buffers, + std::vector& widths, std::vector& heights); + +} // namespace nvjpeg +} // namespace vision +} // namespace fastdeploy + +#endif // ENABLE_NVJPEG diff --git a/fastdeploy/vision/common/processors/manager.cc b/fastdeploy/vision/common/processors/manager.cc index 45b29866bf..070354da11 100644 --- a/fastdeploy/vision/common/processors/manager.cc +++ b/fastdeploy/vision/common/processors/manager.cc @@ -77,6 +77,16 @@ bool ProcessorManager::Run(std::vector* images, } (*images)[i].input_cache = &input_caches_[i]; (*images)[i].output_cache = &output_caches_[i]; + if ((*images)[i].mat_type == ProcLib::CUDA) { + // Make a copy of the input data ptr, so that the original data ptr of + // FDMat won't be modified. + auto fd_tensor = std::make_shared(); + fd_tensor->SetExternalData( + (*images)[i].Tensor()->shape, (*images)[i].Tensor()->Dtype(), + (*images)[i].Tensor()->Data(), (*images)[i].Tensor()->device, + (*images)[i].Tensor()->device_id); + (*images)[i].SetTensor(fd_tensor); + } } bool ret = Apply(&image_batch, outputs); diff --git a/fastdeploy/vision/common/processors/manager.h b/fastdeploy/vision/common/processors/manager.h index 6c119ff56b..48b5575c42 100644 --- a/fastdeploy/vision/common/processors/manager.h +++ b/fastdeploy/vision/common/processors/manager.h @@ -35,6 +35,10 @@ class FASTDEPLOY_DECL ProcessorManager { bool CudaUsed(); +#ifdef WITH_GPU + cudaStream_t Stream() const { return stream_; } +#endif + void SetStream(FDMat* mat) { #ifdef WITH_GPU mat->SetStream(stream_); @@ -56,7 +60,7 @@ class FASTDEPLOY_DECL ProcessorManager { int DeviceId() { return device_id_; } - /** \brief Process the input image and prepare input tensors for runtime + /** \brief Process the input images and prepare input tensors for runtime * * \param[in] images The input image data list, all the elements are returned by cv::imread() * \param[in] outputs The output tensors which will feed in runtime diff --git a/fastdeploy/vision/common/processors/mat.cc b/fastdeploy/vision/common/processors/mat.cc index f56d0b585c..b78f574362 100644 --- a/fastdeploy/vision/common/processors/mat.cc +++ b/fastdeploy/vision/common/processors/mat.cc @@ -37,7 +37,7 @@ cv::Mat* Mat::GetOpenCVMat() { #ifdef WITH_GPU FDASSERT(cudaStreamSynchronize(stream) == cudaSuccess, "[ERROR] Error occurs while sync cuda stream."); - cpu_mat = CreateZeroCopyOpenCVMatFromTensor(fd_tensor); + cpu_mat = CreateZeroCopyOpenCVMatFromTensor(*fd_tensor); mat_type = ProcLib::OPENCV; device = Device::CPU; return &cpu_mat; @@ -59,29 +59,53 @@ void* Mat::Data() { "fcv::Mat."); #endif } else if (device == Device::GPU) { - return fd_tensor.Data(); + return fd_tensor->Data(); } return cpu_mat.ptr(); } FDTensor* Mat::Tensor() { if (mat_type == ProcLib::OPENCV) { - ShareWithTensor(&fd_tensor); + ShareWithTensor(fd_tensor.get()); } else if (mat_type == ProcLib::FLYCV) { #ifdef ENABLE_FLYCV cpu_mat = ConvertFlyCVMatToOpenCV(fcv_mat); mat_type = ProcLib::OPENCV; - ShareWithTensor(&fd_tensor); + ShareWithTensor(fd_tensor.get()); #else FDASSERT(false, "FastDeploy didn't compiled with FlyCV!"); #endif } - return &fd_tensor; + return fd_tensor.get(); } void Mat::SetTensor(FDTensor* tensor) { - fd_tensor.SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(), - tensor->device, tensor->device_id); + fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(), + tensor->device, tensor->device_id); + device = tensor->device; + if (layout == Layout::HWC) { + height = tensor->Shape()[0]; + width = tensor->Shape()[1]; + channels = tensor->Shape()[2]; + } else if (layout == Layout::CHW) { + channels = tensor->Shape()[0]; + height = tensor->Shape()[1]; + width = tensor->Shape()[2]; + } +} + +void Mat::SetTensor(std::shared_ptr& tensor) { + fd_tensor = tensor; + device = tensor->device; + if (layout == Layout::HWC) { + height = tensor->Shape()[0]; + width = tensor->Shape()[1]; + channels = tensor->Shape()[2]; + } else if (layout == Layout::CHW) { + channels = tensor->Shape()[0]; + height = tensor->Shape()[1]; + width = tensor->Shape()[2]; + } } void Mat::ShareWithTensor(FDTensor* tensor) { @@ -134,7 +158,7 @@ void Mat::PrintInfo(const std::string& flag) { #ifdef WITH_GPU FDASSERT(cudaStreamSynchronize(stream) == cudaSuccess, "[ERROR] Error occurs while sync cuda stream."); - cv::Mat tmp_mat = CreateZeroCopyOpenCVMatFromTensor(fd_tensor); + cv::Mat tmp_mat = CreateZeroCopyOpenCVMatFromTensor(*fd_tensor); cv::Scalar mean = cv::mean(tmp_mat); for (int i = 0; i < Channels(); ++i) { std::cout << mean[i] << " "; @@ -157,7 +181,7 @@ FDDataType Mat::Type() { "fcv::Mat."); #endif } else if (mat_type == ProcLib::CUDA || mat_type == ProcLib::CVCUDA) { - return fd_tensor.Dtype(); + return fd_tensor->Dtype(); } return OpenCVDataTypeToFD(cpu_mat.type()); } @@ -262,6 +286,10 @@ FDTensor* CreateCachedGpuInputTensor(Mat* mat) { #ifdef WITH_GPU FDTensor* src = mat->Tensor(); if (src->device == Device::GPU) { + if (src->Data() == mat->output_cache->Data()) { + std::swap(mat->input_cache, mat->output_cache); + std::swap(mat->input_cache->name, mat->output_cache->name); + } return src; } else if (src->device == Device::CPU) { // Mats on CPU, we need copy these tensors from CPU to GPU diff --git a/fastdeploy/vision/common/processors/mat.h b/fastdeploy/vision/common/processors/mat.h index c29fdd4b2b..13ae76abdb 100644 --- a/fastdeploy/vision/common/processors/mat.h +++ b/fastdeploy/vision/common/processors/mat.h @@ -49,7 +49,6 @@ struct FASTDEPLOY_DECL Mat { #endif Mat(const Mat& mat) = default; - // Move assignment Mat& operator=(const Mat& mat) = default; // Move constructor @@ -96,6 +95,8 @@ struct FASTDEPLOY_DECL Mat { // Set fd_tensor void SetTensor(FDTensor* tensor); + void SetTensor(std::shared_ptr& tensor); + private: int channels; int height; @@ -109,7 +110,7 @@ struct FASTDEPLOY_DECL Mat { #endif // Currently, fd_tensor is only used by CUDA and CV-CUDA, // OpenCV and FlyCV are not using it. - FDTensor fd_tensor; + std::shared_ptr fd_tensor = std::make_shared(); public: FDDataType Type(); diff --git a/fastdeploy/vision/common/processors/mat_batch.cc b/fastdeploy/vision/common/processors/mat_batch.cc index b737035882..f625d6d4db 100644 --- a/fastdeploy/vision/common/processors/mat_batch.cc +++ b/fastdeploy/vision/common/processors/mat_batch.cc @@ -27,7 +27,7 @@ void FDMatBatch::SetStream(cudaStream_t s) { FDTensor* FDMatBatch::Tensor() { if (has_batched_tensor) { - return &fd_tensor; + return fd_tensor.get(); } FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent.") // Each mat has its own tensor, @@ -45,12 +45,12 @@ FDTensor* FDMatBatch::Tensor() { num_bytes, device, false); } SetTensor(input_cache); - return &fd_tensor; + return fd_tensor.get(); } void FDMatBatch::SetTensor(FDTensor* tensor) { - fd_tensor.SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(), - tensor->device, tensor->device_id); + fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(), + tensor->device, tensor->device_id); has_batched_tensor = true; } diff --git a/fastdeploy/vision/common/processors/mat_batch.h b/fastdeploy/vision/common/processors/mat_batch.h index ed5b408c33..090d8bb591 100644 --- a/fastdeploy/vision/common/processors/mat_batch.h +++ b/fastdeploy/vision/common/processors/mat_batch.h @@ -29,7 +29,7 @@ struct FASTDEPLOY_DECL FDMatBatch { // MatBatch is intialized with a list of mats, // the data is stored in the mats separately. // Call Tensor() function to get a batched 4-dimension tensor. - explicit FDMatBatch(std::vector* _mats) { + explicit FDMatBatch(std::vector* _mats) { mats = _mats; layout = FDMatBatchLayout::NHWC; mat_type = ProcLib::OPENCV; @@ -44,7 +44,7 @@ struct FASTDEPLOY_DECL FDMatBatch { #ifdef WITH_GPU cudaStream_t stream = nullptr; #endif - FDTensor fd_tensor; + std::shared_ptr fd_tensor = std::make_shared(); public: // When using CV-CUDA/CUDA, please set input/output cache, diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.cu b/fastdeploy/vision/common/processors/normalize_and_permute.cu index fd482e9d67..7f6320ba48 100644 --- a/fastdeploy/vision/common/processors/normalize_and_permute.cu +++ b/fastdeploy/vision/common/processors/normalize_and_permute.cu @@ -81,7 +81,7 @@ bool NormalizeAndPermute::ImplByCuda(FDMatBatch* mat_batch) { // Prepare output tensor mat_batch->output_cache->Resize(src->Shape(), FDDataType::FP32, - "output_cache", Device::GPU); + "batch_output_cache", Device::GPU); // NHWC -> NCHW std::swap(mat_batch->output_cache->shape[1], mat_batch->output_cache->shape[3]); diff --git a/tutorials/README.md b/tutorials/README.md index 05c4c74a8a..8e0f3095c1 100644 --- a/tutorials/README.md +++ b/tutorials/README.md @@ -1,13 +1,9 @@ English | [中文](README_CN.md) - # Tutorials - - This directory provides some tutorials for FastDeploy. For other model deployment, please refer to the example [FastDeploy/examples](../examples) directly. - - Intel independent graphics card/integrated graphics card deployment [see intel_gpu](intel_gpu) - - Model multithreaded call [see multi_thread](multi_thread) +- Image decoding, including hardward decoding, e.g. nvJPEG [image_decoder](image_decoder) diff --git a/tutorials/README_CN.md b/tutorials/README_CN.md index 771bb89342..efdd904b9a 100644 --- a/tutorials/README_CN.md +++ b/tutorials/README_CN.md @@ -7,3 +7,4 @@ - Intel独立显卡/集成显卡部署 [见intel_gpu](intel_gpu) - 模型多线程调用 [见multi_thread](multi_thread) +- 图片解码(含nvJPEG硬解码) [见image_decoder](image_decoder) diff --git a/tutorials/image_decoder/README.md b/tutorials/image_decoder/README.md new file mode 100644 index 0000000000..659afee9cf --- /dev/null +++ b/tutorials/image_decoder/README.md @@ -0,0 +1,16 @@ +English | [中文](README_CN.md) + +# Image Decoder + +Currently, we support below image decoder libs: +- OpenCV +- nvJPEG (Needs NVIDIA GPU, doesn't support Jetson) + +## Example + +- [C++ Example](cpp) +- Python API(WIP) + +## nvJPEG vs. OpenCV performance benchmark + +Refer to: https://github.com/PaddlePaddle/FastDeploy/pull/1288#issuecomment-1427749772 diff --git a/tutorials/image_decoder/README_CN.md b/tutorials/image_decoder/README_CN.md new file mode 100644 index 0000000000..d6fcfee768 --- /dev/null +++ b/tutorials/image_decoder/README_CN.md @@ -0,0 +1,16 @@ +简体中文 | [English](README.md) + +# Image Decoder + +图片解码库,目前支持以下图片解码库: +- OpenCV +- nvJPEG (依赖NVIDIA GPU,不支持Jetson) + +## 示例代码 + +- [C++示例](cpp) +- Python API仍在开发中... + +## nvJPEG和OpenCV性能对比数据 + +参见:https://github.com/PaddlePaddle/FastDeploy/pull/1288#issuecomment-1427749772 diff --git a/tutorials/image_decoder/cpp/CMakeLists.txt b/tutorials/image_decoder/cpp/CMakeLists.txt new file mode 100644 index 0000000000..d1f90095e2 --- /dev/null +++ b/tutorials/image_decoder/cpp/CMakeLists.txt @@ -0,0 +1,11 @@ +PROJECT(image_decoder C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +include_directories(${FASTDEPLOY_INCS}) + +add_executable(image_decoder ${PROJECT_SOURCE_DIR}/main.cc) +target_link_libraries(image_decoder ${FASTDEPLOY_LIBS}) diff --git a/tutorials/image_decoder/cpp/README.md b/tutorials/image_decoder/cpp/README.md new file mode 100644 index 0000000000..1a2198b0f9 --- /dev/null +++ b/tutorials/image_decoder/cpp/README.md @@ -0,0 +1,22 @@ +English | [中文](README_CN.md) + +# Image Decoder C++ Example + +1. [Build FastDeploy](../docs/cn/build_and_install) or download [FastDeploy prebuilt library](../docs/cn/build_and_install/download_prebuilt_libraries.md) + +2. Build example +```bash +mkdir build +cd build + +# [PATH-TO-FASTDEPLOY] is the install directory of FastDeploy +cmake .. -DFASTDEPLOY_INSTALL_DIR=[PATH-TO-FASTDEPLOY] +make -j + +# Download the test image +wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg + +# OpenCV decoder +./image_decoder ILSVRC2012_val_00000010.jpeg 0 +# nvJPEG +./image_decoder ILSVRC2012_val_00000010.jpeg 1 diff --git a/tutorials/image_decoder/cpp/README_CN.md b/tutorials/image_decoder/cpp/README_CN.md new file mode 100644 index 0000000000..a62c26630e --- /dev/null +++ b/tutorials/image_decoder/cpp/README_CN.md @@ -0,0 +1,22 @@ +简体中文 | [English](README.md) + +# Image Decoder C++示例 + +1. [编译FastDeploy](../docs/cn/build_and_install), 或直接下载[FastDeploy预编译库](../docs/cn/build_and_install/download_prebuilt_libraries.md) + +2. 编译示例 +```bash +mkdir build +cd build + +# [PATH-TO-FASTDEPLOY]需替换为FastDeploy的安装路径 +cmake .. -DFASTDEPLOY_INSTALL_DIR=[PATH-TO-FASTDEPLOY] +make -j + +# 下载测试图片 +wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg + +# OpenCV解码 +./image_decoder ILSVRC2012_val_00000010.jpeg 0 +# nvJPEG +./image_decoder ILSVRC2012_val_00000010.jpeg 1 diff --git a/tutorials/image_decoder/cpp/main.cc b/tutorials/image_decoder/cpp/main.cc new file mode 100644 index 0000000000..2193aa8e76 --- /dev/null +++ b/tutorials/image_decoder/cpp/main.cc @@ -0,0 +1,57 @@ +#include "fastdeploy/vision/common/image_decoder/image_decoder.h" + +namespace fdvis = fastdeploy::vision; +namespace fd = fastdeploy; + +void OpenCVImageDecode(const std::string& img_name) { + fdvis::FDMat mat; + auto img_decoder = new fdvis::ImageDecoder(); + img_decoder->Decode(img_name, &mat); + mat.PrintInfo(""); + delete img_decoder; +} + +void NvJpegImageDecode(const std::string& img_name) { + std::vector mats(1); + std::vector caches(1); + + cudaStream_t stream; + cudaStreamCreate(&stream); + // For nvJPEG decoder, we need set stream and output cache for the FDMat + for (size_t i = 0; i < mats.size(); i++) { + mats[i].output_cache = &caches[i]; + mats[i].SetStream(stream); + } + auto img_decoder = new fdvis::ImageDecoder(fdvis::ImageDecoderLib::NVJPEG); + + // This is batch decode API, for single image decode API, + // please refer to OpenCVImageDecode() + img_decoder->BatchDecode({img_name}, &mats); + + for (size_t i = 0; i < mats.size(); i++) { + std::cout << "Mat type: " << mats[i].mat_type << ", " + << "DataType=" << mats[i].Type() << ", " + << "Channel=" << mats[i].Channels() << ", " + << "Height=" << mats[i].Height() << ", " + << "Width=" << mats[i].Width() << std::endl; + } + + cudaStreamDestroy(stream); +} + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "Usage: image_decoder path/to/image run_option, " + "e.g ./image_decoder ./test.jpeg 0" + << std::endl; + std::cout << "Run_option 0: OpenCV; 1: nvJPEG " << std::endl; + return -1; + } + + if (std::atoi(argv[2]) == 0) { + OpenCVImageDecode(argv[1]); + } else if (std::atoi(argv[2]) == 1) { + NvJpegImageDecode(argv[1]); + } + return 0; +} \ No newline at end of file