From c797d3114e8d0e9bdf57f4b0d14180c1969c12b4 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com> Date: Sat, 14 Jan 2023 20:41:36 +0800 Subject: [PATCH] [Other] Add Function For Aligning Face With Five Points (#1124) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 更新5点人脸对齐的代码 * 更新代码格式 * 解决comment * update example * 更新注释 Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com> --- .../vision/facedet/scrfd/cpp/CMakeLists.txt | 11 +- .../vision/facedet/scrfd/cpp/README_CN.md | 14 +- .../scrfd/cpp/infer_with_face_align.cc | 115 +++++++++++++ .../{infer.cc => infer_without_face_align.cc} | 0 fastdeploy/vision/utils/face_align.cc | 151 ++++++++++++++++++ fastdeploy/vision/utils/utils.h | 26 ++- 6 files changed, 304 insertions(+), 13 deletions(-) create mode 100644 examples/vision/facedet/scrfd/cpp/infer_with_face_align.cc rename examples/vision/facedet/scrfd/cpp/{infer.cc => infer_without_face_align.cc} (100%) create mode 100644 fastdeploy/vision/utils/face_align.cc diff --git a/examples/vision/facedet/scrfd/cpp/CMakeLists.txt b/examples/vision/facedet/scrfd/cpp/CMakeLists.txt index 93540a7e83..4151638789 100644 --- a/examples/vision/facedet/scrfd/cpp/CMakeLists.txt +++ b/examples/vision/facedet/scrfd/cpp/CMakeLists.txt @@ -1,14 +1,15 @@ PROJECT(infer_demo C CXX) CMAKE_MINIMUM_REQUIRED (VERSION 3.10) -# 指定下载解压后的fastdeploy库路径 option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) -# 添加FastDeploy依赖头文件 + include_directories(${FASTDEPLOY_INCS}) -add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) -# 添加FastDeploy库依赖 -target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) +add_executable(infer_with_face_align_demo ${PROJECT_SOURCE_DIR}/infer_with_face_align.cc) +target_link_libraries(infer_with_face_align_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_without_face_align_demo ${PROJECT_SOURCE_DIR}/infer_without_face_align.cc) +target_link_libraries(infer_without_face_align_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/facedet/scrfd/cpp/README_CN.md b/examples/vision/facedet/scrfd/cpp/README_CN.md index 1c01173b24..b4e0257630 100644 --- a/examples/vision/facedet/scrfd/cpp/README_CN.md +++ b/examples/vision/facedet/scrfd/cpp/README_CN.md @@ -23,13 +23,21 @@ make -j wget https://bj.bcebos.com/paddlehub/fastdeploy/scrfd_500m_bnkps_shape640x640.onnx wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg +# SCRFD +# CPU推理 +./infer_without_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 0 +# GPU推理 +./infer_without_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 1 +# GPU上TensorRT推理 +./infer_without_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 2 +# SCRFD + FaceAlign # CPU推理 -./infer_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 0 +./infer_with_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 0 # GPU推理 -./infer_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 1 +./infer_with_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 1 # GPU上TensorRT推理 -./infer_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 2 +./infer_with_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 2 ``` 运行完成可视化结果如下图所示 diff --git a/examples/vision/facedet/scrfd/cpp/infer_with_face_align.cc b/examples/vision/facedet/scrfd/cpp/infer_with_face_align.cc new file mode 100644 index 0000000000..1a32ac67c8 --- /dev/null +++ b/examples/vision/facedet/scrfd/cpp/infer_with_face_align.cc @@ -0,0 +1,115 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +void CpuInfer(const std::string& model_file, const std::string& image_file) { + auto model = fastdeploy::vision::facedet::SCRFD(model_file); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::FaceDetectionResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + std::cout << res.Str() << std::endl; + + auto vis_im_list = + fastdeploy::vision::utils::AlignFaceWithFivePoints(im, res); + if (!vis_im_list.empty()) { + cv::imwrite("vis_result.jpg", vis_im_list[0]); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } +} + +void GpuInfer(const std::string& model_file, const std::string& image_file) { + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::facedet::SCRFD(model_file, "", option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::FaceDetectionResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + std::cout << res.Str() << std::endl; + + auto vis_im_list = + fastdeploy::vision::utils::AlignFaceWithFivePoints(im, res); + if (!vis_im_list.empty()) { + cv::imwrite("vis_result.jpg", vis_im_list[0]); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } +} + +void TrtInfer(const std::string& model_file, const std::string& image_file) { + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + option.UseTrtBackend(); + option.SetTrtInputShape("images", {1, 3, 640, 640}); + auto model = fastdeploy::vision::facedet::SCRFD(model_file, "", option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::FaceDetectionResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + std::cout << res.Str() << std::endl; + + auto vis_im_list = + fastdeploy::vision::utils::AlignFaceWithFivePoints(im, res); + if (!vis_im_list.empty()) { + cv::imwrite("vis_result.jpg", vis_im_list[0]); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model path/to/image run_option, " + "e.g ./infer_model scrfd_500m_bnkps_shape640x640.onnx ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 2) { + TrtInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/facedet/scrfd/cpp/infer.cc b/examples/vision/facedet/scrfd/cpp/infer_without_face_align.cc similarity index 100% rename from examples/vision/facedet/scrfd/cpp/infer.cc rename to examples/vision/facedet/scrfd/cpp/infer_without_face_align.cc diff --git a/fastdeploy/vision/utils/face_align.cc b/fastdeploy/vision/utils/face_align.cc new file mode 100644 index 0000000000..63dcc43972 --- /dev/null +++ b/fastdeploy/vision/utils/face_align.cc @@ -0,0 +1,151 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// reference: +// https://github.com/deepinsight/insightface/blob/master/recognition/_tools_/cpp_align/face_align.h +#include "fastdeploy/vision/utils/utils.h" + +namespace fastdeploy { +namespace vision { +namespace utils { + +cv::Mat MeanAxis0(const cv::Mat& src) { + int num = src.rows; + int dim = src.cols; + cv::Mat output(1, dim, CV_32F); + for (int i = 0; i < dim; i++) { + float sum = 0; + for (int j = 0; j < num; j++) { + sum += src.at(j, i); + } + output.at(0, i) = sum / num; + } + return output; +} + +cv::Mat ElementwiseMinus(const cv::Mat& A, const cv::Mat& B) { + cv::Mat output(A.rows, A.cols, A.type()); + assert(B.cols == A.cols); + if (B.cols == A.cols) { + for (int i = 0; i < A.rows; i++) { + for (int j = 0; j < B.cols; j++) { + output.at(i, j) = A.at(i, j) - B.at(0, j); + } + } + } + return output; +} + +cv::Mat VarAxis0(const cv::Mat& src) { + cv::Mat temp_ = ElementwiseMinus(src, MeanAxis0(src)); + cv::multiply(temp_, temp_, temp_); + return MeanAxis0(temp_); +} + +int MatrixRank(cv::Mat M) { + cv::Mat w, u, vt; + cv::SVD::compute(M, w, u, vt); + cv::Mat1b non_zero_singular_values = w > 0.0001; + int rank = countNonZero(non_zero_singular_values); + return rank; +} + +cv::Mat SimilarTransform(cv::Mat& dst, cv::Mat& src) { + int num = dst.rows; + int dim = dst.cols; + cv::Mat src_mean = MeanAxis0(dst); + cv::Mat dst_mean = MeanAxis0(src); + cv::Mat src_demean = ElementwiseMinus(dst, src_mean); + cv::Mat dst_demean = ElementwiseMinus(src, dst_mean); + cv::Mat A = (dst_demean.t() * src_demean) / static_cast(num); + cv::Mat d(dim, 1, CV_32F); + d.setTo(1.0f); + if (cv::determinant(A) < 0) { + d.at(dim - 1, 0) = -1; + } + cv::Mat T = cv::Mat::eye(dim + 1, dim + 1, CV_32F); + cv::Mat U, S, V; + cv::SVD::compute(A, S, U, V); + int rank = MatrixRank(A); + if (rank == 0) { + assert(rank == 0); + } else if (rank == dim - 1) { + if (cv::determinant(U) * cv::determinant(V) > 0) { + T.rowRange(0, dim).colRange(0, dim) = U * V; + } else { + int s = d.at(dim - 1, 0) = -1; + d.at(dim - 1, 0) = -1; + + T.rowRange(0, dim).colRange(0, dim) = U * V; + cv::Mat diag_ = cv::Mat::diag(d); + cv::Mat twp = diag_ * V; // np.dot(np.diag(d), V.T) + cv::Mat B = cv::Mat::zeros(3, 3, CV_8UC1); + cv::Mat C = B.diag(0); + T.rowRange(0, dim).colRange(0, dim) = U * twp; + d.at(dim - 1, 0) = s; + } + } else { + cv::Mat diag_ = cv::Mat::diag(d); + cv::Mat twp = diag_ * V.t(); // np.dot(np.diag(d), V.T) + cv::Mat res = U * twp; // U + T.rowRange(0, dim).colRange(0, dim) = -U.t() * twp; + } + cv::Mat var_ = VarAxis0(src_demean); + float val = cv::sum(var_).val[0]; + cv::Mat res; + cv::multiply(d, S, res); + float scale = 1.0 / val * cv::sum(res).val[0]; + T.rowRange(0, dim).colRange(0, dim) = + -T.rowRange(0, dim).colRange(0, dim).t(); + cv::Mat temp1 = T.rowRange(0, dim).colRange(0, dim); // T[:dim, :dim] + cv::Mat temp2 = src_mean.t(); // src_mean.T + cv::Mat temp3 = temp1 * temp2; // np.dot(T[:dim, :dim], src_mean.T) + cv::Mat temp4 = scale * temp3; + T.rowRange(0, dim).colRange(dim, dim + 1) = -(temp4 - dst_mean.t()); + T.rowRange(0, dim).colRange(0, dim) *= scale; + return T; +} + +std::vector AlignFaceWithFivePoints( + cv::Mat& image, FaceDetectionResult& result, + std::vector> std_landmarks, + std::array output_size) { + FDASSERT(std_landmarks.size() == 5, "The landmarks.size() must be 5.") + FDASSERT(!image.empty(), "The input_image can't be empty.") + std::vector output_images(result.boxes.size()); + if (result.boxes.empty()) { + FDWARNING << "The result is empty." << std::endl; + return output_images; + } + + cv::Mat src(5, 2, CV_32FC1, std_landmarks.data()); + for (int i = 0; i < result.landmarks.size(); i += 5) { + cv::Mat dst(5, 2, CV_32FC1, result.landmarks.data() + i); + cv::Mat m = SimilarTransform(dst, src); + cv::Mat map_matrix; + cv::Rect map_matrix_r = cv::Rect(0, 0, 3, 2); + cv::Mat(m, map_matrix_r).copyTo(map_matrix); + cv::Mat cropped_image_aligned; + cv::warpAffine(image, cropped_image_aligned, map_matrix, + {output_size[0], output_size[1]}); + if (cropped_image_aligned.empty()) { + FDWARNING << "croppedImageAligned is empty." << std::endl; + } + output_images.push_back(cropped_image_aligned); + } + return output_images; +} +} // namespace utils +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/utils/utils.h b/fastdeploy/vision/utils/utils.h index c36d8d0369..9f5106c4ad 100644 --- a/fastdeploy/vision/utils/utils.h +++ b/fastdeploy/vision/utils/utils.h @@ -70,16 +70,32 @@ void SortDetectionResult(DetectionResult* output); void SortDetectionResult(FaceDetectionResult* result); // L2 Norm / cosine similarity (for face recognition, ...) -FASTDEPLOY_DECL std::vector L2Normalize( - const std::vector& values); +FASTDEPLOY_DECL std::vector +L2Normalize(const std::vector& values); FASTDEPLOY_DECL float CosineSimilarity(const std::vector& a, const std::vector& b, bool normalized = true); -bool CropImageByBox(Mat& src_im, Mat* dst_im, - const std::vector& box, std::vector* center, - std::vector* scale, const float expandratio = 0.3); +/** \brief Do face align for model with five points. + * + * \param[in] image The original image + * \param[in] result FaceDetectionResult + * \param[in] std_landmarks Standard face template + * \param[in] output_size The size of output mat + */ +FASTDEPLOY_DECL std::vector AlignFaceWithFivePoints( + cv::Mat& image, FaceDetectionResult& result, + std::vector> std_landmarks = {{38.2946f, 51.6963f}, + {73.5318f, 51.5014f}, + {56.0252f, 71.7366f}, + {41.5493f, 92.3655f}, + {70.7299f, 92.2041f}}, + std::array output_size = {112, 112}); + +bool CropImageByBox(Mat& src_im, Mat* dst_im, const std::vector& box, + std::vector* center, std::vector* scale, + const float expandratio = 0.3); /** * Function: for keypoint detection model, fine positioning of keypoints in