diff --git a/amd_openvx/openvx/ago/ago_util.cpp b/amd_openvx/openvx/ago/ago_util.cpp
index 545de099d9..cbfc9b65d7 100644
--- a/amd_openvx/openvx/ago/ago_util.cpp
+++ b/amd_openvx/openvx/ago/ago_util.cpp
@@ -1769,7 +1769,8 @@ int agoGetDataFromDescription(AgoContext * acontext, AgoGraph * agraph, AgoData
if (data->u.tensor.data_type != VX_TYPE_BOOL && data->u.tensor.data_type != VX_TYPE_INT16 &&
data->u.tensor.data_type != VX_TYPE_UINT8 && data->u.tensor.data_type != VX_TYPE_UINT16 &&
data->u.tensor.data_type != VX_TYPE_FLOAT32 && data->u.tensor.data_type != VX_TYPE_FLOAT16 &&
- data->u.tensor.data_type != VX_TYPE_INT64 && data->u.tensor.data_type != VX_TYPE_INT32)
+ data->u.tensor.data_type != VX_TYPE_INT64 && data->u.tensor.data_type != VX_TYPE_INT32 &&
+ data->u.tensor.data_type != VX_TYPE_UINT32 && data->u.tensor.data_type != VX_TYPE_INT8)
{
agoAddLogEntry(&data->ref, VX_FAILURE, "ERROR: agoGetDataFromDescription: invalid data_type for tensor: %s\n", data_type);
return -1;
diff --git a/amd_openvx/openvx/api/vx_api.cpp b/amd_openvx/openvx/api/vx_api.cpp
index a5b5ecc3a4..4bd11eb15a 100644
--- a/amd_openvx/openvx/api/vx_api.cpp
+++ b/amd_openvx/openvx/api/vx_api.cpp
@@ -9820,6 +9820,17 @@ VX_API_ENTRY vx_status VX_API_CALL vxQueryTensor(vx_tensor tensor, vx_enum attri
#endif
#endif
+ case VX_TENSOR_BUFFER_HOST:
+ if (size == sizeof(vx_uint8 *)) {
+ if (data->buffer) {
+ *(vx_uint8 **)ptr = data->buffer;
+ }
+ else {
+ *(vx_uint8 **)ptr = NULL;
+ }
+ status = VX_SUCCESS;
+ }
+ break;
default:
status = VX_ERROR_NOT_SUPPORTED;
break;
diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt
index 2397baceca..ac91da67b0 100644
--- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt
+++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt
@@ -115,7 +115,7 @@ list(APPEND SOURCES
source/image/RotatebatchPD.cpp
source/image/SaturationbatchPD.cpp
source/image/ScalebatchPD.cpp
- source/image/SequenceRearrange.cpp
+ source/image/SequenceRearrangebatchPD.cpp
source/image/SnowbatchPD.cpp
source/image/SobelbatchPD.cpp
source/image/SubtractbatchPD.cpp
@@ -128,11 +128,39 @@ list(APPEND SOURCES
source/image/VignettebatchPD.cpp
source/image/WarpAffinebatchPD.cpp
source/image/WarpPerspectivebatchPD.cpp
+ source/tensor/Blend.cpp
+ source/tensor/Blur.cpp
source/tensor/Brightness.cpp
+ source/tensor/ColorTemperature.cpp
+ source/tensor/ColorTwist.cpp
+ source/tensor/Contrast.cpp
source/tensor/Copy.cpp
+ source/tensor/Crop.cpp
source/tensor/CropMirrorNormalize.cpp
+ source/tensor/Exposure.cpp
+ source/tensor/FishEye.cpp
+ source/tensor/Flip.cpp
+ source/tensor/Fog.cpp
+ source/tensor/GammaCorrection.cpp
+ source/tensor/Glitch.cpp
+ source/tensor/Hue.cpp
+ source/tensor/Jitter.cpp
+ source/tensor/LensCorrection.cpp
+ source/tensor/Noise.cpp
source/tensor/Nop.cpp
+ source/tensor/Rotate.cpp
+ source/tensor/Pixelate.cpp
+ source/tensor/Rain.cpp
source/tensor/Resize.cpp
+ source/tensor/ResizeCrop.cpp
+ source/tensor/ResizeCropMirror.cpp
+ source/tensor/ResizeMirrorNormalize.cpp
+ source/tensor/Saturation.cpp
+ source/tensor/SequenceRearrange.cpp
+ source/tensor/Snow.cpp
+ source/tensor/Vignette.cpp
+ source/tensor/WarpAffine.cpp
+ source/tensor/SequenceRearrange.cpp
source/kernel_rpp.cpp
source/internal_publishKernels.cpp
)
diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
index 5ddc9b5ec2..853051bcac 100644
--- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
+++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
@@ -108,7 +108,7 @@ vx_status Resizetensor_Register(vx_context);
vx_status RotatebatchPD_Register(vx_context);
vx_status SaturationbatchPD_Register(vx_context);
vx_status ScalebatchPD_Register(vx_context);
-vx_status SequenceRearrange_Register(vx_context);
+vx_status SequenceRearrangebatchPD_Register(vx_context);
vx_status SnowbatchPD_Register(vx_context);
vx_status SobelbatchPD_Register(vx_context);
vx_status SubtractbatchPD_Register(vx_context);
@@ -122,101 +122,162 @@ vx_status VignettebatchPD_Register(vx_context);
vx_status WarpAffinebatchPD_Register(vx_context);
vx_status WarpPerspectivebatchPD_Register(vx_context);
-//tensor
+vx_status Blend_Register(vx_context);
+vx_status Blur_Register(vx_context);
vx_status Brightness_Register(vx_context);
+vx_status ColorTemperature_Register(vx_context);
+vx_status ColorTwist_Register(vx_context);
+vx_status Contrast_Register(vx_context);
vx_status Copy_Register(vx_context);
+vx_status Crop_Register(vx_context);
vx_status CropMirrorNormalize_Register(vx_context);
+vx_status Exposure_Register(vx_context);
+vx_status FishEye_Register(vx_context);
+vx_status Flip_Register(vx_context);
+vx_status Fog_Register(vx_context);
+vx_status GammaCorrection_Register(vx_context);
+vx_status Glitch_Register(vx_context);
+vx_status Hue_Register(vx_context);
+vx_status Jitter_Register(vx_context);
+vx_status LensCorrection_Register(vx_context);
+vx_status Noise_Register(vx_context);
vx_status Nop_Register(vx_context);
+vx_status Pixelate_Register(vx_context);
+vx_status Rain_Register(vx_context);
vx_status Resize_Register(vx_context);
+vx_status ResizeCrop_Register(vx_context);
+vx_status ResizeCropMirror_Register(vx_context);
+vx_status ResizeMirrorNormalize_Register(vx_context);
+vx_status Rotate_Register(vx_context);
+vx_status Saturation_Register(vx_context);
+vx_status SequenceRearrange_Register(vx_context);
+vx_status Snow_Register(vx_context);
+vx_status Vignette_Register(vx_context);
+vx_status WarpAffine_Register(vx_context);
+vx_status SequenceRearrange_Register(vx_context);
// kernel names
-#define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD"
-#define VX_KERNEL_RPP_COPYBATCHPD_NAME "org.rpp.CopybatchPD"
-#define VX_KERNEL_RPP_BRIGHTNESSBATCHPD_NAME "org.rpp.BrightnessbatchPD"
-#define VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD_NAME "org.rpp.GammaCorrectionbatchPD"
-#define VX_KERNEL_RPP_BLENDBATCHPD_NAME "org.rpp.BlendbatchPD"
-#define VX_KERNEL_RPP_BLURBATCHPD_NAME "org.rpp.BlurbatchPD"
-#define VX_KERNEL_RPP_CONTRASTBATCHPD_NAME "org.rpp.ContrastbatchPD"
-#define VX_KERNEL_RPP_PIXELATEBATCHPD_NAME "org.rpp.PixelatebatchPD"
-#define VX_KERNEL_RPP_JITTERBATCHPD_NAME "org.rpp.JitterbatchPD"
-#define VX_KERNEL_RPP_SNOWBATCHPD_NAME "org.rpp.SnowbatchPD"
-#define VX_KERNEL_RPP_NOISEBATCHPD_NAME "org.rpp.NoisebatchPD"
-#define VX_KERNEL_RPP_RANDOMSHADOWBATCHPD_NAME "org.rpp.RandomShadowbatchPD"
-#define VX_KERNEL_RPP_FOGBATCHPD_NAME "org.rpp.FogbatchPD"
-#define VX_KERNEL_RPP_RAINBATCHPD_NAME "org.rpp.RainbatchPD"
-#define VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD_NAME "org.rpp.RandomCropLetterBoxbatchPD"
-#define VX_KERNEL_RPP_EXPOSUREBATCHPD_NAME "org.rpp.ExposurebatchPD"
-#define VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD_NAME "org.rpp.HistogramBalancebatchPD"
-#define VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD_NAME "org.rpp.AbsoluteDifferencebatchPD"
-#define VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD_NAME "org.rpp.AccumulateWeightedbatchPD"
-#define VX_KERNEL_RPP_ACCUMULATEBATCHPD_NAME "org.rpp.AccumulatebatchPD"
-#define VX_KERNEL_RPP_ADDBATCHPD_NAME "org.rpp.AddbatchPD"
-#define VX_KERNEL_RPP_SUBTRACTBATCHPD_NAME "org.rpp.SubtractbatchPD"
-#define VX_KERNEL_RPP_MAGNITUDEBATCHPD_NAME "org.rpp.MagnitudebatchPD"
-#define VX_KERNEL_RPP_MULTIPLYBATCHPD_NAME "org.rpp.MultiplybatchPD"
-#define VX_KERNEL_RPP_PHASEBATCHPD_NAME "org.rpp.PhasebatchPD"
-#define VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD_NAME "org.rpp.AccumulateSquaredbatchPD"
-#define VX_KERNEL_RPP_BITWISEANDBATCHPD_NAME "org.rpp.BitwiseANDbatchPD"
-#define VX_KERNEL_RPP_BITWISENOTBATCHPD_NAME "org.rpp.BitwiseNOTbatchPD"
-#define VX_KERNEL_RPP_EXCLUSIVEORBATCHPD_NAME "org.rpp.ExclusiveORbatchPD"
-#define VX_KERNEL_RPP_INCLUSIVEORBATCHPD_NAME "org.rpp.InclusiveORbatchPD"
-#define VX_KERNEL_RPP_HISTOGRAM_NAME "org.rpp.Histogram"
-#define VX_KERNEL_RPP_THRESHOLDINGBATCHPD_NAME "org.rpp.ThresholdingbatchPD"
-#define VX_KERNEL_RPP_MAXBATCHPD_NAME "org.rpp.MaxbatchPD"
-#define VX_KERNEL_RPP_MINBATCHPD_NAME "org.rpp.MinbatchPD"
-#define VX_KERNEL_RPP_MINMAXLOC_NAME "org.rpp.MinMaxLoc"
-#define VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD_NAME "org.rpp.HistogramEqualizebatchPD"
-#define VX_KERNEL_RPP_MEANSTDDEV_NAME "org.rpp.MeanStddev"
-#define VX_KERNEL_RPP_FLIPBATCHPD_NAME "org.rpp.FlipbatchPD"
-#define VX_KERNEL_RPP_RESIZEBATCHPD_NAME "org.rpp.ResizebatchPD"
-#define VX_KERNEL_RPP_RESIZECROPBATCHPD_NAME "org.rpp.ResizeCropbatchPD"
-#define VX_KERNEL_RPP_ROTATEBATCHPD_NAME "org.rpp.RotatebatchPD"
-#define VX_KERNEL_RPP_WARPAFFINEBATCHPD_NAME "org.rpp.WarpAffinebatchPD"
-#define VX_KERNEL_RPP_FISHEYEBATCHPD_NAME "org.rpp.FisheyebatchPD"
-#define VX_KERNEL_RPP_LENSCORRECTIONBATCHPD_NAME "org.rpp.LensCorrectionbatchPD"
-#define VX_KERNEL_RPP_SCALEBATCHPD_NAME "org.rpp.ScalebatchPD"
-#define VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD_NAME "org.rpp.WarpPerspectivebatchPD"
-#define VX_KERNEL_RPP_DILATEBATCHPD_NAME "org.rpp.DilatebatchPD"
-#define VX_KERNEL_RPP_ERODEBATCHPD_NAME "org.rpp.ErodebatchPD"
-#define VX_KERNEL_RPP_HUEBATCHPD_NAME "org.rpp.HuebatchPD"
-#define VX_KERNEL_RPP_SATURATIONBATCHPD_NAME "org.rpp.SaturationbatchPD"
-#define VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD_NAME "org.rpp.ColorTemperaturebatchPD"
-#define VX_KERNEL_RPP_VIGNETTEBATCHPD_NAME "org.rpp.VignettebatchPD"
-#define VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD_NAME "org.rpp.ChannelExtractbatchPD"
-#define VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD_NAME "org.rpp.ChannelCombinebatchPD"
-#define VX_KERNEL_RPP_LOOKUPTABLEBATCHPD_NAME "org.rpp.LookUpTablebatchPD"
-#define VX_KERNEL_RPP_BOXFILTERBATCHPD_NAME "org.rpp.BoxFilterbatchPD"
-#define VX_KERNEL_RPP_SOBELBATCHPD_NAME "org.rpp.SobelbatchPD"
-#define VX_KERNEL_RPP_MEDIANFILTERBATCHPD_NAME "org.rpp.MedianFilterbatchPD"
-#define VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD_NAME "org.rpp.CustomConvolutionbatchPD"
-#define VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD_NAME "org.rpp.NonMaxSupressionbatchPD"
-#define VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD_NAME "org.rpp.GaussianFilterbatchPD"
-#define VX_KERNEL_RPP_NONLINEARFILTERBATCHPD_NAME "org.rpp.NonLinearFilterbatchPD"
-#define VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD_NAME "org.rpp.LocalBinaryPatternbatchPD"
-#define VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD_NAME "org.rpp.DataObjectCopybatchPD"
-#define VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD_NAME "org.rpp.GaussianImagePyramidbatchPD"
-#define VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID_NAME "org.rpp.LaplacianImagePyramid"
-#define VX_KERNEL_RPP_CANNYEDGEDETECTOR_NAME "org.rpp.CannyEdgeDetector"
-#define VX_KERNEL_RPP_HARRISCORNERDETECTOR_NAME "org.rpp.HarrisCornerDetector"
-#define VX_KERNEL_RPP_FASTCORNERDETECTOR_NAME "org.rpp.FastCornerDetector"
-#define VX_KERNEL_RPP_REMAP_NAME "org.rpp.remap"
-#define VX_KERNEL_RPP_TENSORADD_NAME "org.rpp.TensorAdd"
-#define VX_KERNEL_RPP_TENSORSUBTRACT_NAME "org.rpp.TensorSubtract"
-#define VX_KERNEL_RPP_TENSORMULTIPLY_NAME "org.rpp.TensorMultiply"
-#define VX_KERNEL_RPP_TENSORMATRIXMULTIPLY_NAME "org.rpp.TensorMatrixMultiply"
-#define VX_KERNEL_RPP_TENSORLOOKUP_NAME "org.rpp.TensorLookup"
-#define VX_KERNEL_RPP_COLORTWISTBATCHPD_NAME "org.rpp.ColorTwistbatchPD"
-#define VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD_NAME "org.rpp.CropMirrorNormalizebatchPD"
-#define VX_KERNEL_RPP_CROPPD_NAME "org.rpp.CropPD"
-#define VX_KERNEL_RPP_RESIZECROPMIRRORPD_NAME "org.rpp.ResizeCropMirrorPD"
-#define VX_KERNEL_RPP_RESIZEMIRRORNORMALIZETENSOR_NAME "org.rpp.ResizeMirrorNormalizeTensor"
-#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange"
-#define VX_KERNEL_RPP_RESIZETENSOR_NAME "org.rpp.Resizetensor"
+#define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD"
+#define VX_KERNEL_RPP_COPYBATCHPD_NAME "org.rpp.CopybatchPD"
+#define VX_KERNEL_RPP_BRIGHTNESSBATCHPD_NAME "org.rpp.BrightnessbatchPD"
+#define VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD_NAME "org.rpp.GammaCorrectionbatchPD"
+#define VX_KERNEL_RPP_BLENDBATCHPD_NAME "org.rpp.BlendbatchPD"
+#define VX_KERNEL_RPP_BLURBATCHPD_NAME "org.rpp.BlurbatchPD"
+#define VX_KERNEL_RPP_CONTRASTBATCHPD_NAME "org.rpp.ContrastbatchPD"
+#define VX_KERNEL_RPP_PIXELATEBATCHPD_NAME "org.rpp.PixelatebatchPD"
+#define VX_KERNEL_RPP_JITTERBATCHPD_NAME "org.rpp.JitterbatchPD"
+#define VX_KERNEL_RPP_SNOWBATCHPD_NAME "org.rpp.SnowbatchPD"
+#define VX_KERNEL_RPP_NOISEBATCHPD_NAME "org.rpp.NoisebatchPD"
+#define VX_KERNEL_RPP_RANDOMSHADOWBATCHPD_NAME "org.rpp.RandomShadowbatchPD"
+#define VX_KERNEL_RPP_FOGBATCHPD_NAME "org.rpp.FogbatchPD"
+#define VX_KERNEL_RPP_RAINBATCHPD_NAME "org.rpp.RainbatchPD"
+#define VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD_NAME "org.rpp.RandomCropLetterBoxbatchPD"
+#define VX_KERNEL_RPP_EXPOSUREBATCHPD_NAME "org.rpp.ExposurebatchPD"
+#define VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD_NAME "org.rpp.HistogramBalancebatchPD"
+#define VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD_NAME "org.rpp.AbsoluteDifferencebatchPD"
+#define VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD_NAME "org.rpp.AccumulateWeightedbatchPD"
+#define VX_KERNEL_RPP_ACCUMULATEBATCHPD_NAME "org.rpp.AccumulatebatchPD"
+#define VX_KERNEL_RPP_ADDBATCHPD_NAME "org.rpp.AddbatchPD"
+#define VX_KERNEL_RPP_SUBTRACTBATCHPD_NAME "org.rpp.SubtractbatchPD"
+#define VX_KERNEL_RPP_MAGNITUDEBATCHPD_NAME "org.rpp.MagnitudebatchPD"
+#define VX_KERNEL_RPP_MULTIPLYBATCHPD_NAME "org.rpp.MultiplybatchPD"
+#define VX_KERNEL_RPP_PHASEBATCHPD_NAME "org.rpp.PhasebatchPD"
+#define VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD_NAME "org.rpp.AccumulateSquaredbatchPD"
+#define VX_KERNEL_RPP_BITWISEANDBATCHPD_NAME "org.rpp.BitwiseANDbatchPD"
+#define VX_KERNEL_RPP_BITWISENOTBATCHPD_NAME "org.rpp.BitwiseNOTbatchPD"
+#define VX_KERNEL_RPP_EXCLUSIVEORBATCHPD_NAME "org.rpp.ExclusiveORbatchPD"
+#define VX_KERNEL_RPP_INCLUSIVEORBATCHPD_NAME "org.rpp.InclusiveORbatchPD"
+#define VX_KERNEL_RPP_HISTOGRAM_NAME "org.rpp.Histogram"
+#define VX_KERNEL_RPP_THRESHOLDINGBATCHPD_NAME "org.rpp.ThresholdingbatchPD"
+#define VX_KERNEL_RPP_MAXBATCHPD_NAME "org.rpp.MaxbatchPD"
+#define VX_KERNEL_RPP_MINBATCHPD_NAME "org.rpp.MinbatchPD"
+#define VX_KERNEL_RPP_MINMAXLOC_NAME "org.rpp.MinMaxLoc"
+#define VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD_NAME "org.rpp.HistogramEqualizebatchPD"
+#define VX_KERNEL_RPP_MEANSTDDEV_NAME "org.rpp.MeanStddev"
+#define VX_KERNEL_RPP_FLIPBATCHPD_NAME "org.rpp.FlipbatchPD"
+#define VX_KERNEL_RPP_RESIZEBATCHPD_NAME "org.rpp.ResizebatchPD"
+#define VX_KERNEL_RPP_RESIZECROPBATCHPD_NAME "org.rpp.ResizeCropbatchPD"
+#define VX_KERNEL_RPP_ROTATEBATCHPD_NAME "org.rpp.RotatebatchPD"
+#define VX_KERNEL_RPP_WARPAFFINEBATCHPD_NAME "org.rpp.WarpAffinebatchPD"
+#define VX_KERNEL_RPP_FISHEYEBATCHPD_NAME "org.rpp.FisheyebatchPD"
+#define VX_KERNEL_RPP_LENSCORRECTIONBATCHPD_NAME "org.rpp.LensCorrectionbatchPD"
+#define VX_KERNEL_RPP_SCALEBATCHPD_NAME "org.rpp.ScalebatchPD"
+#define VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD_NAME "org.rpp.WarpPerspectivebatchPD"
+#define VX_KERNEL_RPP_DILATEBATCHPD_NAME "org.rpp.DilatebatchPD"
+#define VX_KERNEL_RPP_ERODEBATCHPD_NAME "org.rpp.ErodebatchPD"
+#define VX_KERNEL_RPP_HUEBATCHPD_NAME "org.rpp.HuebatchPD"
+#define VX_KERNEL_RPP_SATURATIONBATCHPD_NAME "org.rpp.SaturationbatchPD"
+#define VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD_NAME "org.rpp.ColorTemperaturebatchPD"
+#define VX_KERNEL_RPP_VIGNETTEBATCHPD_NAME "org.rpp.VignettebatchPD"
+#define VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD_NAME "org.rpp.ChannelExtractbatchPD"
+#define VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD_NAME "org.rpp.ChannelCombinebatchPD"
+#define VX_KERNEL_RPP_LOOKUPTABLEBATCHPD_NAME "org.rpp.LookUpTablebatchPD"
+#define VX_KERNEL_RPP_BOXFILTERBATCHPD_NAME "org.rpp.BoxFilterbatchPD"
+#define VX_KERNEL_RPP_SOBELBATCHPD_NAME "org.rpp.SobelbatchPD"
+#define VX_KERNEL_RPP_MEDIANFILTERBATCHPD_NAME "org.rpp.MedianFilterbatchPD"
+#define VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD_NAME "org.rpp.CustomConvolutionbatchPD"
+#define VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD_NAME "org.rpp.NonMaxSupressionbatchPD"
+#define VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD_NAME "org.rpp.GaussianFilterbatchPD"
+#define VX_KERNEL_RPP_NONLINEARFILTERBATCHPD_NAME "org.rpp.NonLinearFilterbatchPD"
+#define VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD_NAME "org.rpp.LocalBinaryPatternbatchPD"
+#define VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD_NAME "org.rpp.DataObjectCopybatchPD"
+#define VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD_NAME "org.rpp.GaussianImagePyramidbatchPD"
+#define VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID_NAME "org.rpp.LaplacianImagePyramid"
+#define VX_KERNEL_RPP_CANNYEDGEDETECTOR_NAME "org.rpp.CannyEdgeDetector"
+#define VX_KERNEL_RPP_HARRISCORNERDETECTOR_NAME "org.rpp.HarrisCornerDetector"
+#define VX_KERNEL_RPP_FASTCORNERDETECTOR_NAME "org.rpp.FastCornerDetector"
+#define VX_KERNEL_RPP_REMAP_NAME "org.rpp.remap"
+#define VX_KERNEL_RPP_TENSORADD_NAME "org.rpp.TensorAdd"
+#define VX_KERNEL_RPP_TENSORSUBTRACT_NAME "org.rpp.TensorSubtract"
+#define VX_KERNEL_RPP_TENSORMULTIPLY_NAME "org.rpp.TensorMultiply"
+#define VX_KERNEL_RPP_TENSORMATRIXMULTIPLY_NAME "org.rpp.TensorMatrixMultiply"
+#define VX_KERNEL_RPP_TENSORLOOKUP_NAME "org.rpp.TensorLookup"
+#define VX_KERNEL_RPP_COLORTWISTBATCHPD_NAME "org.rpp.ColorTwistbatchPD"
+#define VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD_NAME "org.rpp.CropMirrorNormalizebatchPD"
+#define VX_KERNEL_RPP_CROPPD_NAME "org.rpp.CropPD"
+#define VX_KERNEL_RPP_RESIZECROPMIRRORPD_NAME "org.rpp.ResizeCropMirrorPD"
+#define VX_KERNEL_RPP_RESIZEMIRRORNORMALIZETENSOR_NAME "org.rpp.ResizeMirrorNormalizeTensor"
+#define VX_KERNEL_RPP_SEQUENCEREARRANGEBATCHPD_NAME "org.rpp.SequenceRearrangebatchPD"
+#define VX_KERNEL_RPP_RESIZETENSOR_NAME "org.rpp.Resizetensor"
//tensor
+#define VX_KERNEL_RPP_BLEND_NAME "org.rpp.Blend"
+#define VX_KERNEL_RPP_BLUR_NAME "org.rpp.Blur"
+#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness"
+#define VX_KERNEL_RPP_COLORTEMPERATURE_NAME "org.rpp.ColorTemperature"
+#define VX_KERNEL_RPP_COLORTWIST_NAME "org.rpp.ColorTwist"
+#define VX_KERNEL_RPP_CONTRAST_NAME "org.rpp.Contrast"
+#define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy"
+#define VX_KERNEL_RPP_CROP_NAME "org.rpp.Crop"
+#define VX_KERNEL_RPP_CROPMIRRORNORMALIZE_NAME "org.rpp.CropMirrorNormalize"
+#define VX_KERNEL_RPP_EXPOSURE_NAME "org.rpp.Exposure"
+#define VX_KERNEL_RPP_FISHEYE_NAME "org.rpp.FishEye"
+#define VX_KERNEL_RPP_FLIP_NAME "org.rpp.Flip"
+#define VX_KERNEL_RPP_FOG_NAME "org.rpp.Fog"
+#define VX_KERNEL_RPP_GAMMACORRECTION_NAME "org.rpp.GammaCorrection"
+#define VX_KERNEL_RPP_GLITCH_NAME "org.rpp.Glitch"
+#define VX_KERNEL_RPP_HUE_NAME "org.rpp.Hue"
+#define VX_KERNEL_RPP_JITTER_NAME "org.rpp.Jitter"
+#define VX_KERNEL_RPP_LENSCORRECTION_NAME "org.rpp.LensCorrection"
+#define VX_KERNEL_RPP_NOISE_NAME "org.rpp.Noise"
+#define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop"
+#define VX_KERNEL_RPP_RAIN_NAME "org.rpp.Rain"
+#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize"
+#define VX_KERNEL_RPP_RESIZECROP_NAME "org.rpp.ResizeCrop"
+#define VX_KERNEL_RPP_RESIZECROPMIRROR_NAME "org.rpp.ResizeCropMirror"
+#define VX_KERNEL_RPP_RESIZEMIRRORNORMALIZE_NAME "org.rpp.ResizeMirrorNormalize"
+#define VX_KERNEL_RPP_ROTATE_NAME "org.rpp.Rotate"
+#define VX_KERNEL_RPP_SATURATION_NAME "org.rpp.Saturation"
+#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange"
+#define VX_KERNEL_RPP_SNOW_NAME "org.rpp.Snow"
+#define VX_KERNEL_RPP_PIXELATE_NAME "org.rpp.Pixelate"
+#define VX_KERNEL_RPP_VIGNETTE_NAME "org.rpp.Vignette"
+#define VX_KERNEL_RPP_WARPAFFINE_NAME "org.rpp.WarpAffine"
#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness"
#define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy"
#define VX_KERNEL_RPP_CROPMIRRORNORMALIZE_NAME "org.rpp.CropMirrorNormalize"
#define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop"
#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize"
+#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange"
+
#endif //_AMDVX_EXT__PUBLISH_KERNELS_H_
diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
index e36b7745d6..41da96b0aa 100644
--- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
+++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
@@ -102,7 +102,7 @@ extern "C"
VX_KERNEL_RPP_SCALEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x45,
VX_KERNEL_RPP_SATURATIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x46,
VX_KERNEL_RPP_SOBELBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x47,
- VX_KERNEL_RPP_SEQUENCEREARRANGE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x48,
+ VX_KERNEL_RPP_SEQUENCEREARRANGEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x48,
VX_KERNEL_RPP_THRESHOLDINGBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x49,
VX_KERNEL_RPP_TENSORADD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4a,
VX_KERNEL_RPP_TENSORSUBTRACT = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4b,
@@ -115,12 +115,39 @@ extern "C"
VX_KERNEL_RPP_RESIZETENSOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x52,
VX_KERNEL_RPP_RESIZEMIRRORNORMALIZETENSOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x53,
- //tensor
- VX_KERNEL_RPP_BRIGHTNESS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x54,
- VX_KERNEL_RPP_COPY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x55,
- VX_KERNEL_RPP_CROPMIRRORNORMALIZE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x56,
- VX_KERNEL_RPP_NOP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x57,
- VX_KERNEL_RPP_RESIZE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x58,
+ //tensor
+ VX_KERNEL_RPP_BLEND = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x54,
+ VX_KERNEL_RPP_BLUR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x55,
+ VX_KERNEL_RPP_BRIGHTNESS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x56,
+ VX_KERNEL_RPP_COLORTEMPERATURE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x57,
+ VX_KERNEL_RPP_COLORTWIST = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x58,
+ VX_KERNEL_RPP_CONTRAST = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x59,
+ VX_KERNEL_RPP_COPY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x5a,
+ VX_KERNEL_RPP_CROP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x5b,
+ VX_KERNEL_RPP_CROPMIRRORNORMALIZE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x5c,
+ VX_KERNEL_RPP_EXPOSURE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x5d,
+ VX_KERNEL_RPP_FISHEYE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x5e,
+ VX_KERNEL_RPP_FLIP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x5f,
+ VX_KERNEL_RPP_FOG = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x60,
+ VX_KERNEL_RPP_GAMMACORRECTION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x61,
+ VX_KERNEL_RPP_GLITCH = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x62,
+ VX_KERNEL_RPP_HUE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x63,
+ VX_KERNEL_RPP_JITTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x64,
+ VX_KERNEL_RPP_LENSCORRECTION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x65,
+ VX_KERNEL_RPP_NOISE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x66,
+ VX_KERNEL_RPP_NOP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x67,
+ VX_KERNEL_RPP_PIXELATE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x68,
+ VX_KERNEL_RPP_RAIN = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x69,
+ VX_KERNEL_RPP_RESIZE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x6a,
+ VX_KERNEL_RPP_RESIZECROP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x6b,
+ VX_KERNEL_RPP_RESIZECROPMIRROR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x6c,
+ VX_KERNEL_RPP_RESIZEMIRRORNORMALIZE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x6d,
+ VX_KERNEL_RPP_ROTATE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x6e,
+ VX_KERNEL_RPP_SATURATION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x6f,
+ VX_KERNEL_RPP_SEQUENCEREARRANGE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x70,
+ VX_KERNEL_RPP_SNOW = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x71,
+ VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x72,
+ VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73
};
#ifdef __cplusplus
diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
index e4276789ae..2a4c75ebe2 100644
--- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
+++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
@@ -36,6 +36,11 @@ THE SOFTWARE.
*/
#ifndef dimof
+/*! \def dimof(x)
+ * \brief A macro to get the number of elements in an array.
+ * \param [in] x The array whose size is to be determined.
+ * \return The number of elements in the array.
+ */
#define dimof(x) (sizeof(x) / sizeof(x[0]))
#endif
@@ -43,10 +48,20 @@ THE SOFTWARE.
#if _WIN32
#define SHARED_PUBLIC __declspec(dllexport)
#else
+/*! \def SHARED_PUBLIC
+ * \brief A macro to specify public visibility for shared library symbols.
+ */
#define SHARED_PUBLIC __attribute__((visibility("default")))
#endif
#endif
+/*! \brief Creates a node in a graph using a predefined kernel structure.
+ * \param [in] graph The handle to the graph.
+ * \param [in] kernelenum The enum value representing the kernel to be used.
+ * \param [in] params An array of parameter references for the kernel.
+ * \param [in] num The number of parameters in the params array.
+ * \return A handle to the created node.
+ */
vx_node vxCreateNodeByStructure(vx_graph graph, vx_enum kernelenum, vx_reference params[], vx_uint32 num);
#ifdef __cplusplus
@@ -1343,7 +1358,7 @@ extern "C"
* \returns A node reference \ref vx_node. Any possible errors preventing a
* successful creation should be checked using \ref vxGetStatus.
*/
- SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SequenceRearrange(vx_graph graph, vx_image pSrc, vx_image pDst, vx_array newOrder, vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount);
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SequenceRearrangebatchPD(vx_graph graph, vx_image pSrc, vx_image pDst, vx_array newOrder, vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount);
/*!
* \brief [Graph] Creates a RPP Resize Tensor function node.
@@ -1364,81 +1379,491 @@ extern "C"
SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Resizetensor(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_int32 interpolation_type, vx_uint32 nbatchSize);
// Tensor Augmentations
- /*! \brief [Graph] Creates a RPP Brightness function node.
+ /*! \brief [Graph] Creates a Brightness function node.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
- * \param [in] pSrc The input tensor data.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
* \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
- * \param [out] pDst The output tensor data.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
* \param [in] pAlpha The input array in \ref VX_TYPE_FLOAT32 format containing the alpha data.
* \param [in] pBeta The input array in \ref VX_TYPE_FLOAT32 format containing the beta data.
* \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
* \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
* \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
- * \return vx_node.
- * \returns A node reference \ref vx_node. Any possible errors preventing a
- * successful creation should be checked using \ref vxGetStatus.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
*/
- SHARED_PUBLIC vx_node VX_API_CALL vxRppBrightness(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAlpha, vx_array pBeta, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppBrightness(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAlpha, vx_array pBeta, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
- /*! \brief [Graph] Creates a RPP Copy function node.
+ /*! \brief [Graph] Creates a Copy function node.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
* \param [in] pSrc The input tensor data.
* \param [out] pDst The output tensor data.
- * \return vx_node.
- * \returns A node reference \ref vx_node. Any possible errors preventing a
- * successful creation should be checked using \ref vxGetStatus.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
*/
- SHARED_PUBLIC vx_node VX_API_CALL vxRppCopy(vx_graph graph, vx_tensor pSrc, vx_tensor pDst);
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppCopy(vx_graph graph, vx_tensor pSrc, vx_tensor pDst);
- /*! \brief [Graph] Creates a RPP CropMirrorNormalize function node.
+ /*! \brief [Graph] Creates a CropMirrorNormalize function node.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
- * \param [in] pSrc The input tensor data.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
* \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
- * \param [out] pDst The output tensor data.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
* \param [in] pMultiplier The input array in \ref VX_TYPE_FLOAT32 format containing the multiplier data.
* \param [in] pOffset The input array in \ref VX_TYPE_FLOAT32 format containing the offset data.
- * \param [in] pFlip The input array in \ref VX_TYPE_INT32 format containing the flip data.
+ * \param [in] pMirror The input array in \ref VX_TYPE_INT32 format containing the flip data.
* \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
* \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
* \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
- * \return vx_node.
- * \returns A node reference \ref vx_node. Any possible errors preventing a
- * successful creation should be checked using \ref vxGetStatus.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
*/
- SHARED_PUBLIC vx_node VX_API_CALL vxRppCropMirrorNormalize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pMultiplier, vx_array pOffset, vx_array pFlip, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppCropMirrorNormalize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pMultiplier, vx_array pOffset, vx_array pMirror, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
- /*! \brief [Graph] Creates a RPP Nop function node.
+ /*! \brief [Graph] Creates a Nop function node.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
* \param [in] pSrc The input tensor data.
* \param [out] pDst The output tensor data.
- * \return vx_node.
- * \returns A node reference \ref vx_node. Any possible errors preventing a
- * successful creation should be checked using \ref vxGetStatus.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
*/
- SHARED_PUBLIC vx_node VX_API_CALL vxRppNop(vx_graph graph, vx_tensor pSrc, vx_tensor pDst);
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppNop(vx_graph graph, vx_tensor pSrc, vx_tensor pDst);
- /*! \brief [Graph] Creates a RPP vxRppResize function node.
+ /*! \brief [Graph] Creates a Resize function node.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
- * \param [in] pSrc The input tensor data.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
* \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
- * \param [out] pDst The output tensor data.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pDstWidth The input array in \ref VX_TYPE_UINT32 format containing the output width data.
+ * \param [in] pDstHeight The input array in \ref VX_TYPE_UINT32 format containing the output height data.
+ * \param [in] interpolationType The resize interpolation type in \ref VX_TYPE_INT32 format containing the type of interpolation.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppResize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pDstWidth, vx_array pDstHeight, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*!
+ * \brief [Graph] Creates a SequenceRearrange function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor \ref VX_TYPE_UINT8 format data.
+ * \param [out] pDst The output tensor \ref VX_TYPE_UINT8 format data.
+ * \param [in] pNewOrder The rearrange order in \ref VX_TYPE_UINT32 containing the order in which frames are copied.
+ * \param [in] layout The layout in \ref VX_TYPE_INT32 denotes the layout of input and output tensor.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSequenceRearrange(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_array pNewOrder, vx_scalar layout);
+
+ /*! \brief [Graph] Creates a Blend function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc1 The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrc2 The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pShift The input array in \ref VX_TYPE_FLOAT32 format containing the shift data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppBlend(vx_graph graph, vx_tensor pSrc1, vx_tensor pSrc2, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pShift, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Blur function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pKernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppBlur(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pKernelSize, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a ColorTemperature function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pAdjustValue The input array in \ref VX_TYPE_FLOAT32 format containing the adjustment value data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppColorTemperature(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAdjustValue, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a ColorTwist function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pAlpha The input array in \ref VX_TYPE_FLOAT32 format containing the alpha data.
+ * \param [in] pBeta The input array in \ref VX_TYPE_FLOAT32 format containing the beta data.
+ * \param [in] pHue The input array in \ref VX_TYPE_FLOAT32 format containing the hue data.
+ * \param [in] pSat The input array in \ref VX_TYPE_FLOAT32 format containing the saturation data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppColorTwist(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAlpha, vx_array pBeta, vx_array pHue, vx_array pSat, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Contrast function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pContrastFactor The input array in \ref VX_TYPE_FLOAT32 format containing the contrast factor data.
+ * \param [in] pContrastCenter The input array in \ref VX_TYPE_FLOAT32 format containing the contrast center data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppContrast(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pContrastFactor, vx_array pContrastCenter, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Crop function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppCrop(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Exposure function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pExposureFactor The input array in \ref VX_TYPE_FLOAT32 format containing the exposure factor data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppExposure(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pExposureFactor, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a FishEye function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppFishEye(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Flip function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pHflag The input array in \ref VX_TYPE_UINT32 format containing the horizontal flag data.
+ * \param [in] pVflag The input array in \ref VX_TYPE_UINT32 format containing the vertical flag data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppFlip(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pHflag, vx_array pVflag, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Fog function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pFogValue The input array in \ref VX_TYPE_FLOAT32 format containing the fog value data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppFog(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pFogValue, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a GammaCorrection function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pGamma The input array in \ref VX_TYPE_FLOAT32 format containing the gamma data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppGammaCorrection(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pGamma, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Glitch function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pXoffsetR The input array in \ref VX_TYPE_UINT32 format containing the x offset for r-channel data.
+ * \param [in] pYoffsetR The input array in \ref VX_TYPE_UINT32 format containing the y offset for r-channel data.
+ * \param [in] pXoffsetG The input array in \ref VX_TYPE_UINT32 format containing the x offset for g-channel data.
+ * \param [in] pYoffsetG The input array in \ref VX_TYPE_UINT32 format containing the y offset for g-channel data.
+ * \param [in] pXoffsetB The input array in \ref VX_TYPE_UINT32 format containing the x offset for b-channel data.
+ * \param [in] pYoffsetB The input array in \ref VX_TYPE_UINT32 format containing the y offset for b-channel data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppGlitch(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pXoffsetR, vx_array pYoffsetR, vx_array pXoffsetG, vx_array pYoffsetG, vx_array pXoffsetB, vx_array pYoffsetB, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Hue function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pHueShift The input array in \ref VX_TYPE_FLOAT32 format containing the hue shift data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppHue(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pHueShift, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Jitter function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pKernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data.
+ * \param [in] seed The input scalar in \ref VX_TYPE_UINT32 contains the seed value.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppJitter(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pKernelSize, vx_scalar seed, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a LensCorrection function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pStrength The input array in \ref VX_TYPE_FLOAT32 format containing the strength value data.
+ * \param [in] pZoom The input array in \ref VX_TYPE_FLOAT32 format containing the zoom value data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppLensCorrection(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pStrength, vx_array pZoom, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Noise function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pNoiseProb The input array in \ref VX_TYPE_FLOAT32 format containing the noise probability data.
+ * \param [in] pSaltProb The input array in \ref VX_TYPE_FLOAT32 format containing the salt probability data.
+ * \param [in] pSaltValue The input array in \ref VX_TYPE_FLOAT32 format containing the salt value data.
+ * \param [in] pPepperValue The input array in \ref VX_TYPE_FLOAT32 format containing the pepper value data.
+ * \param [in] seed The input scalar in \ref VX_TYPE_UINT32 contains the seed value.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppNoise(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pNoiseProb, vx_array pSaltProb, vx_array pSaltValue, vx_array pPepperValue, vx_scalar seed, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Noise function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pRainValue The input array in \ref VX_TYPE_FLOAT32 format containing the rain value data.
+ * \param [in] pRainWidth The input array in \ref VX_TYPE_UINT32 format containing the rain width data.
+ * \param [in] pRainHeight The input array in \ref VX_TYPE_UINT32 format containing the rain height data.
+ * \param [in] pRainTransperancy The input array in \ref VX_TYPE_FLOAT32 format containing the rain transparency data.
+ * \param [in] seed The input scalar in \ref VX_TYPE_UINT32 contains the seed value.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppRain(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pRainValue, vx_array pRainWidth, vx_array pRainHeight, vx_array pRainTransperancy, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a ResizeCrop function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [in] pCropTensor The input tensor of batch size in unsigned int containing the crop coordinates for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 format data.
* \param [in] pDstWidth The input array in \ref VX_TYPE_UINT32 format containing the output width data.
* \param [in] pDstHeight The input array in \ref VX_TYPE_UINT32 format containing the output height data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppResizeCrop(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pCropTensor, vx_tensor pDst, vx_array pDstWidth, vx_array pDstHeight, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a ResizeCropMirror function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pDstWidth The input array in \ref VX_TYPE_UINT32 format containing the output width data.
+ * \param [in] pDstHeight The input array in \ref VX_TYPE_UINT32 format containing the output height data.
+ * \param [in] pMirror The input array in \ref VX_TYPE_INT32 format containing the mirror data.
* \param [in] interpolationType The resize interpolation type in \ref VX_TYPE_INT32 format containing the type of interpolation.
* \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
* \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
* \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppResizeCropMirror(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pDstWidth, vx_array pDstHeight, vx_array pMirror, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a ResizeMirrorNormalize function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pDstWidth The input array in \ref VX_TYPE_UINT32 format containing the output width data.
+ * \param [in] pDstHeight The input array in \ref VX_TYPE_UINT32 format containing the output height data.
+ * \param [in] interpolationType The resize interpolation type in \ref VX_TYPE_INT32 format containing the type of interpolation.
+ * \param [in] pMean The input array in \ref VX_TYPE_FLOAT32 format containing the mean data.
+ * \param [in] pStdDev The input array in \ref VX_TYPE_FLOAT32 format containing the std-dev data.
+ * \param [in] pMirror The input array in \ref VX_TYPE_INT32 format containing the mirror data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppResizeMirrorNormalize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst,vx_array pDstWidth, vx_array pDstHeight, vx_scalar interpolationType, vx_array pMean, vx_array pStdDev, vx_array pMirror, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Rotate function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pAngle The input array in \ref VX_TYPE_FLOAT32 format containing the angle data.
+ * \param [in] interpolationType The resize interpolation type in \ref VX_TYPE_INT32 format containing the type of interpolation.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppRotate(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAngle, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Saturation function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSaturationFactor The input array in \ref VX_TYPE_FLOAT32 format containing the saturation factor data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSaturation(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pSaturationFactor, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Snow function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pSnowValue The input array in \ref VX_TYPE_FLOAT32 format containing the snow value data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSnow(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pSnowValue, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Pixelate function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppPixelate(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Vignette function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pStdDev The input array in VX_TYPE_FLOAT32 format containing the standard deviation data.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppVignette(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pStdDev, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+ /*! \brief [Graph] Creates a Warp-Affine function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data.
+ * \param [in] pAffineArray The input array in \ref VX_TYPE_FLOAT32 format containing the affine transformation data.
+ * \param [in] interpolationType The resize interpolation type in \ref VX_TYPE_INT32 format containing the type of interpolation.
+ * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor.
+ * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor.
+ * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type.
+ * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ */
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppWarpAffine(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAffineArray, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
+
+
+ /*!
+ * \brief [Graph] Creates a Tensor SequenceRearrange function node.
+ * \ingroup group_amd_rpp
+ * \param [in] graph The handle to the graph.
+ * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 format data.
+ * \param [in] pNewOrder The rearrange order in \ref VX_TYPE_UINT32 containing the order in which frames are copied.
+ * \param [in] layout The layout in \ref VX_TYPE_INT32 denotes the layout of input and output tensor.
* \return vx_node.
* \returns A node reference \ref vx_node. Any possible errors preventing a
* successful creation should be checked using \ref vxGetStatus.
*/
- SHARED_PUBLIC vx_node VX_API_CALL vxRppResize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pDstWidth, vx_array pDstHeight, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
-
+ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSequenceRearrange(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_array pNewOrder, vx_scalar layout);
#ifdef __cplusplus
}
#endif
diff --git a/amd_openvx_extensions/amd_rpp/source/image/SequenceRearrange.cpp b/amd_openvx_extensions/amd_rpp/source/image/SequenceRearrangebatchPD.cpp
similarity index 93%
rename from amd_openvx_extensions/amd_rpp/source/image/SequenceRearrange.cpp
rename to amd_openvx_extensions/amd_rpp/source/image/SequenceRearrangebatchPD.cpp
index a37681d699..3a30863f3d 100644
--- a/amd_openvx_extensions/amd_rpp/source/image/SequenceRearrange.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/image/SequenceRearrangebatchPD.cpp
@@ -1,20 +1,20 @@
/*
-SequenceRearrangeright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a SequenceRearrange
+Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
-to use, SequenceRearrange, modify, merge, publish, distribute, sublicense, and/or sell
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-The above SequenceRearrangeright notice and this permission notice shall be included in
+The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR SequenceRearrangeRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
@@ -22,7 +22,7 @@ THE SOFTWARE.
#include "internal_publishKernels.h"
-struct SequenceRearrangeLocalData
+struct SequenceRearrangebatchPDLocalData
{
vxRppHandle *handle;
RppiSize dimensions;
@@ -42,9 +42,8 @@ struct SequenceRearrangeLocalData
#endif
};
-static vx_status VX_CALLBACK validateSequenceRearrange(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
+static vx_status VX_CALLBACK validateSequenceRearrangebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
{
- // check scalar alpha and beta type
vx_status status = VX_SUCCESS;
vx_enum scalar_type;
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
@@ -76,9 +75,9 @@ static vx_status VX_CALLBACK validateSequenceRearrange(vx_node node, const vx_re
return status;
}
-static vx_status VX_CALLBACK processSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num)
+static vx_status VX_CALLBACK processSequenceRearrangebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num)
{
- SequenceRearrangeLocalData *data = NULL;
+ SequenceRearrangebatchPDLocalData *data = NULL;
vx_status return_status = VX_SUCCESS;
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
vx_df_image df_image = VX_DF_IMAGE_VIRT;
@@ -226,9 +225,9 @@ static vx_status VX_CALLBACK processSequenceRearrange(vx_node node, const vx_ref
return return_status;
}
-static vx_status VX_CALLBACK initializeSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num)
+static vx_status VX_CALLBACK initializeSequenceRearrangebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num)
{
- SequenceRearrangeLocalData *data = new SequenceRearrangeLocalData;
+ SequenceRearrangebatchPDLocalData *data = new SequenceRearrangebatchPDLocalData;
memset(data, 0, sizeof(*data));
STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height)));
STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width)));
@@ -249,22 +248,22 @@ static vx_status VX_CALLBACK initializeSequenceRearrange(vx_node node, const vx_
return VX_SUCCESS;
}
-static vx_status VX_CALLBACK uninitializeSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num)
+static vx_status VX_CALLBACK uninitializeSequenceRearrangebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num)
{
return VX_SUCCESS;
}
-vx_status SequenceRearrange_Register(vx_context context)
+vx_status SequenceRearrangebatchPD_Register(vx_context context)
{
vx_status status = VX_SUCCESS;
// add kernel to the context with callbacks
- vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SequenceRearrange",
- VX_KERNEL_RPP_SEQUENCEREARRANGE,
- processSequenceRearrange,
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SequenceRearrangebatchPD",
+ VX_KERNEL_RPP_SEQUENCEREARRANGEBATCHPD,
+ processSequenceRearrangebatchPD,
7,
- validateSequenceRearrange,
- initializeSequenceRearrange,
- uninitializeSequenceRearrange);
+ validateSequenceRearrangebatchPD,
+ initializeSequenceRearrangebatchPD,
+ uninitializeSequenceRearrangebatchPD);
ERROR_CHECK_OBJECT(kernel);
AgoTargetAffinityInfo affinity;
vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
index 7b341c61d2..cdb195372c 100644
--- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
@@ -124,15 +124,43 @@ vx_status get_kernels_to_publish()
STATUS_ERROR_CHECK(ADD_KERNEL(CopybatchPD_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(NopbatchPD_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(ResizeMirrorNormalizeTensor_Register));
- STATUS_ERROR_CHECK(ADD_KERNEL(SequenceRearrange_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(SequenceRearrangebatchPD_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Resizetensor_Register));
//tensor
+ STATUS_ERROR_CHECK(ADD_KERNEL(Blend_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Blur_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Brightness_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(ColorTemperature_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(ColorTwist_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Contrast_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Copy_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Crop_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(CropMirrorNormalize_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Exposure_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(FishEye_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Flip_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Fog_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(GammaCorrection_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Glitch_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Hue_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Jitter_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(LensCorrection_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Noise_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Nop_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Pixelate_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Rain_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Resize_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(ResizeCrop_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(ResizeCropMirror_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(ResizeMirrorNormalize_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Rotate_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Saturation_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(SequenceRearrange_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Snow_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(Vignette_Register));
+ STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffine_Register));
+
return status;
}
diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
index 61b10edb97..367db7ab2e 100644
--- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
@@ -1833,6 +1833,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_CopybatchPD(vx_graph graph, vx_ima
}
return node;
}
+
//Creating node for Pixelate effect
VX_API_CALL vx_node VX_API_CALL vxExtrppNode_NopbatchPD(vx_graph graph, vx_image pSrc, vx_image pDst)
{
@@ -1851,33 +1852,33 @@ VX_API_CALL vx_node VX_API_CALL vxExtrppNode_NopbatchPD(vx_graph graph, vx_image
return node;
}
-VX_API_CALL vx_node VX_API_CALL vxExtrppNode_SequenceRearrange(vx_graph graph,vx_image pSrc,vx_image pDst, vx_array newOrder, vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount)
+VX_API_CALL vx_node VX_API_CALL vxExtrppNode_SequenceRearrangebatchPD(vx_graph graph, vx_image pSrc, vx_image pDst, vx_array pNewOrder, vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount)
{
- vx_node node = NULL;
- vx_context context = vxGetContext((vx_reference)graph);
- if(vxGetStatus((vx_reference)context) == VX_SUCCESS) {
- vx_uint32 dev_type = getGraphAffinity(graph);
- vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type);
- vx_scalar NEWSEQUENCELENGTH = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &newSequenceLength);
- vx_scalar SEQUENCELENGTH = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &sequenceLength);
- vx_scalar SEQUENCECOUNT = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &sequenceCount);
- vx_reference params[] = {
- (vx_reference) pSrc,
- (vx_reference) pDst,
- (vx_reference) newOrder,
- (vx_reference) NEWSEQUENCELENGTH,
- (vx_reference) SEQUENCELENGTH,
- (vx_reference) SEQUENCECOUNT,
- (vx_reference) DEV_TYPE
- };
- node = createNode(graph, VX_KERNEL_RPP_SEQUENCEREARRANGE, params, 7);
- }
- return node;
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if(vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_scalar newSequenceLengthScalar = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &newSequenceLength);
+ vx_scalar sequenceLengthScalar = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &sequenceLength);
+ vx_scalar sequenceCountScalar = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &sequenceCount);
+ vx_reference params[] = {
+ (vx_reference) pSrc,
+ (vx_reference) pDst,
+ (vx_reference) pNewOrder,
+ (vx_reference) newSequenceLengthScalar,
+ (vx_reference) sequenceLengthScalar,
+ (vx_reference) sequenceCountScalar,
+ (vx_reference) deviceType
+ };
+ node = createNode(graph, VX_KERNEL_RPP_SEQUENCEREARRANGEBATCHPD, params, 7);
+ }
+ return node;
}
//tensor
-VX_API_ENTRY vx_node VX_API_CALL vxRppBrightness(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAlpha, vx_array pBeta, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppBrightness(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAlpha, vx_array pBeta, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
@@ -1898,7 +1899,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxRppBrightness(vx_graph graph, vx_tensor pSrc,
return node;
}
-VX_API_ENTRY vx_node VX_API_CALL vxRppCopy(vx_graph graph, vx_tensor pSrc, vx_tensor pDst) {
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppCopy(vx_graph graph, vx_tensor pSrc, vx_tensor pDst) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
@@ -1913,7 +1914,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxRppCopy(vx_graph graph, vx_tensor pSrc, vx_te
return node;
}
-VX_API_ENTRY vx_node VX_API_CALL vxRppCropMirrorNormalize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pMultiplier, vx_array pOffset, vx_array pFlip, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppCropMirrorNormalize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pMultiplier, vx_array pOffset, vx_array pMirror, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
@@ -1925,7 +1926,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxRppCropMirrorNormalize(vx_graph graph, vx_ten
(vx_reference)pDst,
(vx_reference)pMultiplier,
(vx_reference)pOffset,
- (vx_reference)pFlip,
+ (vx_reference)pMirror,
(vx_reference)inputLayout,
(vx_reference)outputLayout,
(vx_reference)roiType,
@@ -1935,7 +1936,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxRppCropMirrorNormalize(vx_graph graph, vx_ten
return node;
}
-VX_API_CALL vx_node VX_API_CALL vxRppNop(vx_graph graph, vx_tensor pSrc, vx_tensor pDst) {
+VX_API_CALL vx_node VX_API_CALL vxExtRppNop(vx_graph graph, vx_tensor pSrc, vx_tensor pDst) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
@@ -1950,7 +1951,7 @@ VX_API_CALL vx_node VX_API_CALL vxRppNop(vx_graph graph, vx_tensor pSrc, vx_tens
return node;
}
-VX_API_ENTRY vx_node VX_API_CALL vxRppResize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pDstWidth, vx_array pDstHeight, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppResize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pDstWidth, vx_array pDstHeight, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
@@ -1972,6 +1973,573 @@ VX_API_ENTRY vx_node VX_API_CALL vxRppResize(vx_graph graph, vx_tensor pSrc, vx_
return node;
}
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppBlend(vx_graph graph, vx_tensor pSrc1, vx_tensor pSrc2, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pShift, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc1,
+ (vx_reference)pSrc2,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pShift,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_BLEND, params, 9);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppBlur(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pKernelSize, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pKernelSize,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_BLUR, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppColorTwist(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAlpha, vx_array pBeta, vx_array pHue, vx_array pSat, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pAlpha,
+ (vx_reference)pBeta,
+ (vx_reference)pHue,
+ (vx_reference)pSat,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_COLORTWIST, params, 11);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppContrast(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pContrastFactor, vx_array pContrastCenter, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pContrastFactor,
+ (vx_reference)pContrastCenter,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_CONTRAST, params, 9);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppColorTemperature(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAdjustValue, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pAdjustValue,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_COLORTEMPERATURE, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppCrop(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_CROP, params, 7);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppExposure(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pExposureFactor, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pExposureFactor,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_EXPOSURE, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppFishEye(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_FISHEYE, params, 7);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppFlip(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pHorizontalFlag, vx_array pVerticalFlag, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pHorizontalFlag,
+ (vx_reference)pVerticalFlag,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_FLIP, params, 9);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppFog(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pFogValue, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pFogValue,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_FOG, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppGammaCorrection(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pGamma, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pGamma,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_GAMMACORRECTION, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppGlitch(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst,vx_array pXoffsetR, vx_array pYoffsetR, vx_array pXoffsetG, vx_array pYoffsetG, vx_array pXoffsetB, vx_array pYoffsetB, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pXoffsetR,
+ (vx_reference)pYoffsetR,
+ (vx_reference)pXoffsetG,
+ (vx_reference)pYoffsetG,
+ (vx_reference)pXoffsetB,
+ (vx_reference)pYoffsetB,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_GLITCH, params, 13);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppHue(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pHueShift, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pHueShift,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_HUE, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppJitter(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pKernelSize, vx_scalar seed, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pKernelSize,
+ (vx_reference)seed,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_JITTER, params, 9);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppLensCorrection(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pStrength, vx_array pZoom, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pStrength,
+ (vx_reference)pZoom,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_LENSCORRECTION, params, 9);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppNoise(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pNoiseProb, vx_array pSaltProb, vx_array pSaltValue, vx_array pPepperValue, vx_scalar seed,vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pNoiseProb,
+ (vx_reference)pSaltProb,
+ (vx_reference)pSaltValue,
+ (vx_reference)pPepperValue,
+ (vx_reference)seed,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_NOISE, params, 12);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppPixelate(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_PIXELATE, params, 7);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppRain(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pRainValue, vx_array pRainWidth, vx_array pRainHeight, vx_array pRainTransperancy, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pRainValue,
+ (vx_reference)pRainWidth,
+ (vx_reference)pRainHeight,
+ (vx_reference)pRainTransperancy,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_RAIN, params, 11);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppResizeCrop(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi,vx_tensor pCropTensor, vx_tensor pDst, vx_array pDstWidth, vx_array pDstHeight, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pCropTensor,
+ (vx_reference)pDst,
+ (vx_reference)pDstWidth,
+ (vx_reference)pDstHeight,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_RESIZECROP, params, 10);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppResizeCropMirror(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pDstWidth, vx_array pDstHeight, vx_array pMirror,vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pDstWidth,
+ (vx_reference)pDstHeight,
+ (vx_reference)pMirror,
+ (vx_reference)interpolationType,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_RESIZECROPMIRROR, params, 11);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppResizeMirrorNormalize(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst,vx_array pDstWidth,vx_array pDstHeight, vx_scalar interpolationType, vx_array pMean, vx_array pStdDev, vx_array pMirror, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pDstWidth,
+ (vx_reference)pDstHeight,
+ (vx_reference)interpolationType,
+ (vx_reference)pMean,
+ (vx_reference)pStdDev,
+ (vx_reference)pMirror,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_RESIZEMIRRORNORMALIZE, params, 13);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppRotate(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAngle, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pAngle,
+ (vx_reference)interpolationType,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_ROTATE, params, 9);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppSaturation(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pSaturationFactor, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pSaturationFactor,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_SATURATION, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppSnow(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pSnowValue, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pSnowValue,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_SNOW, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppVignette(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pStdDev, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pStdDev,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_VIGNETTE, params, 8);
+ }
+ return node;
+}
+
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppWarpAffine(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAffineArray, vx_scalar interpolationType, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pSrcRoi,
+ (vx_reference)pDst,
+ (vx_reference)pAffineArray,
+ (vx_reference)interpolationType,
+ (vx_reference)inputLayout,
+ (vx_reference)outputLayout,
+ (vx_reference)roiType,
+ (vx_reference)deviceType};
+ node = createNode(graph, VX_KERNEL_RPP_WARPAFFINE, params, 9);
+ }
+ return node;
+}
+
+VX_API_CALL vx_node VX_API_CALL vxExtRppSequenceRearrange(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_array pNewOrder, vx_scalar layout) {
+ vx_node node = NULL;
+ vx_context context = vxGetContext((vx_reference)graph);
+ if(vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+ vx_uint32 devType = getGraphAffinity(graph);
+ vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
+ vx_reference params[] = {
+ (vx_reference)pSrc,
+ (vx_reference)pDst,
+ (vx_reference)pNewOrder,
+ (vx_reference)layout,
+ (vx_reference)deviceType
+ };
+ node = createNode(graph, VX_KERNEL_RPP_SEQUENCEREARRANGE, params, 5);
+ }
+ return node;
+}
+
RpptDataType getRpptDataType(vx_enum vxDataType) {
switch(vxDataType) {
case vx_type_e::VX_TYPE_FLOAT32:
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Blend.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Blend.cpp
new file mode 100644
index 0000000000..f0d6686c1e
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Blend.cpp
@@ -0,0 +1,243 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct BlendLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc1;
+ RppPtr_t pSrc2;
+ RppPtr_t pDst;
+ vx_float32 *pAlpha;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshBlend(vx_node node, const vx_reference *parameters, vx_uint32 num, BlendLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pAlpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc1, sizeof(data->pSrc1)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &data->pSrc2, sizeof(data->pSrc2)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc1, sizeof(data->pSrc1)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pSrc2, sizeof(data->pSrc2)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pAlpha[index + f] = data->pAlpha[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateBlend(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Blend: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Blend: tensor: #3 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processBlend(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ BlendLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshBlend(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_blend_gpu(data->pSrc1, data->pSrc2, data->pSrcDesc, data->pDst, data->pDstDesc, data->pAlpha, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_blend_host(data->pSrc1, data->pSrc2, data->pSrcDesc, data->pDst, data->pDstDesc, data->pAlpha, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeBlend(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ BlendLocalData *data = new BlendLocalData;
+ memset(data, 0, sizeof(BlendLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pAlpha = new vx_float32[data->pSrcDesc->n];
+ refreshBlend(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeBlend(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ BlendLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pAlpha;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Blend_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Blend",
+ VX_KERNEL_RPP_BLEND,
+ processBlend,
+ 9,
+ validateBlend,
+ initializeBlend,
+ uninitializeBlend);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Blur.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Blur.cpp
new file mode 100644
index 0000000000..f2e0caa26b
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Blur.cpp
@@ -0,0 +1,259 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct BlurLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_uint32 *pKernelSize;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+};
+
+static vx_status VX_CALLBACK refreshBlur(vx_node node, const vx_reference *parameters, vx_uint32 num, BlurLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pKernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ // Fill width and height array with ROI data required by RPP batchPD kernels
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ }
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pKernelSize[index + f] = data->pKernelSize[n];
+ data->pSrcDimensions[index + f] = data->pSrcDimensions[n];
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateBlur(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Blur: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Blur: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processBlur(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ BlurLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshBlur(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_blur_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pKernelSize, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_blur_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pKernelSize, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_blur_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pKernelSize, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_blur_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pKernelSize, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeBlur(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ BlurLocalData *data = new BlurLocalData;
+ memset(data, 0, sizeof(BlurLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ data->pKernelSize = new vx_uint32[data->pSrcDesc->n];
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+
+ refreshBlur(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeBlur(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ BlurLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pKernelSize;
+ delete[] data->pSrcDimensions;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Blur_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Blur",
+ VX_KERNEL_RPP_BLUR,
+ processBlur,
+ 8,
+ validateBlur,
+ initializeBlur,
+ uninitializeBlur);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Brightness.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Brightness.cpp
index 8a392f5f1e..f3d430263c 100644
--- a/amd_openvx_extensions/amd_rpp/source/tensor/Brightness.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Brightness.cpp
@@ -1,5 +1,5 @@
/*
-Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -24,7 +24,7 @@ THE SOFTWARE.
struct BrightnessLocalData {
vxRppHandle *handle;
- Rpp32u deviceType;
+ vx_uint32 deviceType;
RppPtr_t pSrc;
RppPtr_t pDst;
vx_float32 *pAlpha;
@@ -59,18 +59,17 @@ static vx_status VX_CALLBACK refreshBrightness(vx_node node, const vx_reference
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
}
data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
- if((data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW)) {
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
- for(int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
unsigned index = n * num_of_frames;
- for(int f = 0; f < num_of_frames; f++) {
+ for (unsigned f = 0; f < num_of_frames; f++) {
data->pAlpha[index + f] = data->pAlpha[n];
data->pBeta[index + f] = data->pBeta[n];
data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
}
}
}
-
return status;
}
@@ -79,34 +78,34 @@ static vx_status VX_CALLBACK validateBrightness(vx_node node, const vx_reference
vx_enum scalar_type;
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_UINT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
- // Check for input parameters
+ // Check for input tensor
size_t num_tensor_dims;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
if(num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Brightness: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
- // Check for output parameters
+ // Check for output tensor
vx_uint8 tensor_fixed_point_position;
size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
- vx_enum tensor_type;
+ vx_enum tensor_dtype;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
if(num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Brightness: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
return status;
}
@@ -135,13 +134,13 @@ static vx_status VX_CALLBACK initializeBrightness(vx_node node, const vx_referen
BrightnessLocalData *data = new BrightnessLocalData;
memset(data, 0, sizeof(BrightnessLocalData));
- vx_enum input_tensor_type, output_tensor_type;
- int roi_type, input_layout, output_layout;
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
- data->roiType = (roi_type == 0) ? RpptRoiType::XYWH : RpptRoiType::LTRB;
+ data->roiType = static_cast(roi_type);
data->inputLayout = static_cast(input_layout);
data->outputLayout = static_cast(output_layout);
@@ -149,8 +148,8 @@ static vx_status VX_CALLBACK initializeBrightness(vx_node node, const vx_referen
data->pSrcDesc = new RpptDesc;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_type, sizeof(input_tensor_type)));
- data->pSrcDesc->dataType = getRpptDataType(input_tensor_type);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
data->pSrcDesc->offsetInBytes = 0;
fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
@@ -158,13 +157,13 @@ static vx_status VX_CALLBACK initializeBrightness(vx_node node, const vx_referen
data->pDstDesc = new RpptDesc;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_type, sizeof(output_tensor_type)));
- data->pDstDesc->dataType = getRpptDataType(output_tensor_type);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
data->pDstDesc->offsetInBytes = 0;
fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
- data->pAlpha = static_cast(malloc(sizeof(vx_float32) * data->pSrcDesc->n));
- data->pBeta = static_cast(malloc(sizeof(vx_float32) * data->pSrcDesc->n));
+ data->pAlpha = new vx_float32[data->pSrcDesc->n];
+ data->pBeta = new vx_float32[data->pSrcDesc->n];
refreshBrightness(node, parameters, num, data);
STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
@@ -174,17 +173,17 @@ static vx_status VX_CALLBACK initializeBrightness(vx_node node, const vx_referen
static vx_status VX_CALLBACK uninitializeBrightness(vx_node node, const vx_reference *parameters, vx_uint32 num) {
BrightnessLocalData *data;
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
- if (data->pAlpha != nullptr) free(data->pAlpha);
- if (data->pBeta != nullptr) free(data->pBeta);
- delete(data->pSrcDesc);
- delete(data->pDstDesc);
+ delete[] data->pAlpha;
+ delete[] data->pBeta;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
- delete (data);
+ delete data;
return VX_SUCCESS;
}
//! \brief The kernel target support callback.
-// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ColorTemperature.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ColorTemperature.cpp
new file mode 100644
index 0000000000..52f2e01077
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/ColorTemperature.cpp
@@ -0,0 +1,240 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct ColorTemperatureLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_int32 *pAdjustmentValue;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshColorTemperature(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTemperatureLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_int32), data->pAdjustmentValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pAdjustmentValue[index + f] = data->pAdjustmentValue[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateColorTemperature(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ColorTemperature: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ColorTemperature: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processColorTemperature(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_ERROR_NOT_IMPLEMENTED;
+ ColorTemperatureLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshColorTemperature(node, parameters, num, data);
+ // rppt_color_temperature not available in RPP TOT, will be enabled once support is added
+ /*if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_color_temperature_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pAdjustmentValue, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_color_temperature_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pAdjustmentValue, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }*/
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeColorTemperature(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ColorTemperatureLocalData *data = new ColorTemperatureLocalData;
+ memset(data, 0, sizeof(ColorTemperatureLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pAdjustmentValue = new vx_int32[data->pSrcDesc->n];
+ refreshColorTemperature(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeColorTemperature(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ColorTemperatureLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pAdjustmentValue;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status ColorTemperature_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTemperature",
+ VX_KERNEL_RPP_COLORTEMPERATURE,
+ processColorTemperature,
+ 8,
+ validateColorTemperature,
+ initializeColorTemperature,
+ uninitializeColorTemperature);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ColorTwist.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ColorTwist.cpp
new file mode 100644
index 0000000000..74151008ef
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/ColorTwist.cpp
@@ -0,0 +1,257 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct ColorTwistLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pAlpha;
+ vx_float32 *pBeta;
+ vx_float32 *pHue;
+ vx_float32 *pSat;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshColorTwist(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTwistLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pAlpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pBeta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pHue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pSat, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pAlpha[index + f] = data->pAlpha[n];
+ data->pBeta[index + f] = data->pBeta[n];
+ data->pHue[index + f] = data->pHue[n];
+ data->pSat[index + f] = data->pSat[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateColorTwist(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #10 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ColorTwist: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ColorTwist: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+
+ return status;
+}
+
+static vx_status VX_CALLBACK processColorTwist(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ ColorTwistLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshColorTwist(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU)
+ {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_color_twist_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pSrcDesc, data->pAlpha, data->pBeta, data->pHue, data->pSat, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_color_twist_host(data->pSrc, data->pSrcDesc, data->pDst, data->pSrcDesc, data->pAlpha, data->pBeta, data->pHue, data->pSat, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeColorTwist(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ColorTwistLocalData *data = new ColorTwistLocalData;
+ memset(data, 0, sizeof(ColorTwistLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0],VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2],VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->outputTensorDims);
+
+ data->pAlpha = new vx_float32[data->pSrcDesc->n];
+ data->pBeta = new vx_float32[data->pSrcDesc->n];
+ data->pHue = new vx_float32[data->pSrcDesc->n];
+ data->pSat = new vx_float32[data->pSrcDesc->n];
+ refreshColorTwist(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeColorTwist(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ColorTwistLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pAlpha;
+ delete[] data->pBeta;
+ delete[] data->pHue;
+ delete[] data->pSat;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status ColorTwist_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTwist",
+ VX_KERNEL_RPP_COLORTWIST,
+ processColorTwist,
+ 11,
+ validateColorTwist,
+ initializeColorTwist,
+ uninitializeColorTwist);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Contrast.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Contrast.cpp
new file mode 100644
index 0000000000..258d572c5f
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Contrast.cpp
@@ -0,0 +1,245 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct ContrastLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pContrastFactor;
+ vx_float32 *pContrastCenter;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshContrast(vx_node node, const vx_reference *parameters, vx_uint32 num, ContrastLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pContrastFactor, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pContrastCenter, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(&roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(&roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pContrastFactor[index + f] = data->pContrastFactor[n];
+ data->pContrastCenter[index + f] = data->pContrastCenter[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateContrast(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Contrast: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Contrast: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processContrast(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ ContrastLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshContrast(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_contrast_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pContrastFactor, data->pContrastCenter, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_contrast_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pContrastFactor, data->pContrastCenter, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeContrast(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ContrastLocalData *data = new ContrastLocalData;
+ memset(data, 0, sizeof(ContrastLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->outputTensorDims);
+
+ data->pContrastFactor = new vx_float32[data->pSrcDesc->n];
+ data->pContrastCenter = new vx_float32[data->pSrcDesc->n];
+ refreshContrast(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeContrast(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ContrastLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pContrastFactor;
+ delete[] data->pContrastCenter;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Contrast_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Contrast",
+ VX_KERNEL_RPP_CONTRAST,
+ processContrast,
+ 9,
+ validateContrast,
+ initializeContrast,
+ uninitializeContrast);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Copy.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Copy.cpp
index 469fab350c..d3d084cf6b 100644
--- a/amd_openvx_extensions/amd_rpp/source/tensor/Copy.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Copy.cpp
@@ -1,5 +1,5 @@
/*
-Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -24,7 +24,7 @@ THE SOFTWARE.
struct CopyLocalData {
vxRppHandle handle;
- Rpp32u deviceType;
+ vx_uint32 deviceType;
RppPtr_t pSrc;
RppPtr_t pDst;
size_t tensorSize;
@@ -49,20 +49,20 @@ static vx_status VX_CALLBACK validateCopy(vx_node node, const vx_reference param
vx_enum scalar_type;
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_UINT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #2 type=%d (must be size)\n", scalar_type);
- // Check for output parameters
+ // Check for output tensor
vx_uint8 tensor_fixed_point_position;
size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
size_t num_tensor_dims;
- vx_enum tensor_type;
+ vx_enum tensor_dtype;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
return status;
}
@@ -94,21 +94,21 @@ static vx_status VX_CALLBACK initializeCopy(vx_node node, const vx_reference *pa
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream)));
#endif
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
- vx_enum input_tensor_type, output_tensor_type;
+ vx_enum input_tensor_dtype, output_tensor_dtype;
vx_size num_of_dims;
size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_of_dims, sizeof(vx_size)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, tensor_dims, sizeof(vx_size) * num_of_dims));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_type, sizeof(input_tensor_type)));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &output_tensor_type, sizeof(output_tensor_type)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
data->tensorSize = 1;
- for(int i = 0; i < num_of_dims; i++)
+ for (unsigned i = 0; i < num_of_dims; i++)
data->tensorSize *= tensor_dims[i];
- if (input_tensor_type == vx_type_e::VX_TYPE_FLOAT32 && output_tensor_type == vx_type_e::VX_TYPE_FLOAT32) {
+ if (input_tensor_dtype == vx_type_e::VX_TYPE_FLOAT32 && output_tensor_dtype == vx_type_e::VX_TYPE_FLOAT32) {
data->tensorSize *= sizeof(vx_float32);
- } else if (input_tensor_type == vx_type_e::VX_TYPE_FLOAT16 && output_tensor_type == vx_type_e::VX_TYPE_FLOAT16) {
+ } else if (input_tensor_dtype == vx_type_e::VX_TYPE_FLOAT16 && output_tensor_dtype == vx_type_e::VX_TYPE_FLOAT16) {
#if defined(AMD_FP16_SUPPORT)
data->tensorSize *= sizeof(vx_float16);
#else
@@ -128,7 +128,7 @@ static vx_status VX_CALLBACK uninitializeCopy(vx_node node, const vx_reference *
}
//! \brief The kernel target support callback.
-// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Crop.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Crop.cpp
new file mode 100644
index 0000000000..a5dc92a49d
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Crop.cpp
@@ -0,0 +1,230 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct CropLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshCrop(vx_node node, const vx_reference *parameters, vx_uint32 num, CropLocalData *data) {
+ vx_status status = VX_SUCCESS;
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (int f = 0; f < num_of_frames; f++) {
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateCrop(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #3 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Crop: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Crop: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+
+ return status;
+}
+
+static vx_status VX_CALLBACK processCrop(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ CropLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshCrop(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_crop_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_crop_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ CropLocalData *data = new CropLocalData;
+ memset(data, 0, sizeof(CropLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+ refreshCrop(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ CropLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Crop_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Crop",
+ VX_KERNEL_RPP_CROP,
+ processCrop,
+ 7,
+ validateCrop,
+ initializeCrop,
+ uninitializeCrop);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/CropMirrorNormalize.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/CropMirrorNormalize.cpp
index 08a1b2adcf..ed29ecc1bd 100644
--- a/amd_openvx_extensions/amd_rpp/source/tensor/CropMirrorNormalize.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/CropMirrorNormalize.cpp
@@ -1,5 +1,5 @@
/*
-Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -24,7 +24,7 @@ THE SOFTWARE.
struct CropMirrorNormalizeLocalData {
vxRppHandle *handle;
- Rpp32u deviceType;
+ vx_uint32 deviceType;
RppPtr_t pSrc;
RppPtr_t pDst;
vx_float32 *pMultiplier;
@@ -62,12 +62,12 @@ static vx_status VX_CALLBACK refreshCropMirrorNormalize(vx_node node, const vx_r
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
}
data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
- if((data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW)) {
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
- for(int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
unsigned index = n * num_of_frames;
- for(int f = 0; f < num_of_frames; f++) {
- for(int c = 0; c < data->pSrcDesc->c; c++) {
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ for (unsigned c = 0; c < data->pSrcDesc->c; c++) {
int dst_ind = (index + f) * data->pSrcDesc->c + c;
int src_ind = n * data->pSrcDesc->c + c;
data->pMultiplier[dst_ind] = data->pMultiplier[src_ind];
@@ -78,7 +78,6 @@ static vx_status VX_CALLBACK refreshCropMirrorNormalize(vx_node node, const vx_r
}
}
}
-
return status;
}
@@ -87,34 +86,34 @@ static vx_status VX_CALLBACK validateCropMirrorNormalize(vx_node node, const vx_
vx_enum scalar_type;
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be a boolean size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be a boolean size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_UINT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
- // Check for input parameters
+ // Check for input tensor
size_t num_tensor_dims;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
if(num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: CropMirrorNormalize: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
- // Check for output parameters
+ // Check for output tensor
vx_uint8 tensor_fixed_point_position;
size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
- vx_enum tensor_type;
+ vx_enum tensor_dtype;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
if(num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: CropMirrorNormalize: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
return status;
@@ -146,13 +145,13 @@ static vx_status VX_CALLBACK initializeCropMirrorNormalize(vx_node node, const v
CropMirrorNormalizeLocalData *data = new CropMirrorNormalizeLocalData;
memset(data, 0, sizeof(CropMirrorNormalizeLocalData));
- vx_enum input_tensor_type, output_tensor_type;
- int roi_type, input_layout, output_layout;
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
- data->roiType = (roi_type == 0) ? RpptRoiType::XYWH : RpptRoiType::LTRB;
+ data->roiType = static_cast(roi_type);
data->inputLayout = static_cast(input_layout);
data->outputLayout = static_cast(output_layout);
@@ -160,8 +159,8 @@ static vx_status VX_CALLBACK initializeCropMirrorNormalize(vx_node node, const v
data->pSrcDesc = new RpptDesc;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_type, sizeof(input_tensor_type)));
- data->pSrcDesc->dataType = getRpptDataType(input_tensor_type);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
data->pSrcDesc->offsetInBytes = 0;
fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
@@ -169,14 +168,14 @@ static vx_status VX_CALLBACK initializeCropMirrorNormalize(vx_node node, const v
data->pDstDesc = new RpptDesc;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2],VX_TENSOR_DATA_TYPE, &output_tensor_type, sizeof(output_tensor_type)));
- data->pDstDesc->dataType = getRpptDataType(output_tensor_type);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2],VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
data->pDstDesc->offsetInBytes = 0;
fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
- data->pMultiplier = static_cast(malloc(sizeof(vx_float32) * data->pSrcDesc->n * data->pSrcDesc->c));
- data->pOffset = static_cast(malloc(sizeof(vx_float32) * data->pSrcDesc->n * data->pSrcDesc->c));
- data->pMirror = static_cast(malloc(sizeof(vx_uint32) * data->pSrcDesc->n));
+ data->pMultiplier = new vx_float32[data->pSrcDesc->n * data->pSrcDesc->c];
+ data->pOffset = new vx_float32[data->pSrcDesc->n * data->pSrcDesc->c];
+ data->pMirror = new vx_uint32[data->pSrcDesc->n];
refreshCropMirrorNormalize(node, parameters, num, data);
STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
@@ -186,18 +185,18 @@ static vx_status VX_CALLBACK initializeCropMirrorNormalize(vx_node node, const v
static vx_status VX_CALLBACK uninitializeCropMirrorNormalize(vx_node node, const vx_reference *parameters, vx_uint32 num) {
CropMirrorNormalizeLocalData *data;
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
- if (data->pMultiplier != nullptr) free(data->pMultiplier);
- if (data->pOffset != nullptr) free(data->pOffset);
- if (data->pMirror != nullptr) free(data->pMirror);
- delete(data->pSrcDesc);
- delete(data->pDstDesc);
+ delete[] data->pMultiplier;
+ delete[] data->pOffset;
+ delete[] data->pMirror;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
- delete (data);
+ delete data;
return VX_SUCCESS;
}
//! \brief The kernel target support callback.
-// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Exposure.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Exposure.cpp
new file mode 100644
index 0000000000..70cc47899c
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Exposure.cpp
@@ -0,0 +1,240 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct ExposureLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pExposureFactor;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshExposure(vx_node node, const vx_reference *parameters, vx_uint32 num, ExposureLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pExposureFactor, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (int f = 0; f < num_of_frames; f++) {
+ data->pExposureFactor[index + f] = data->pExposureFactor[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateExposure(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Exposure: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Exposure: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processExposure(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+
+ ExposureLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshExposure(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_exposure_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pExposureFactor, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_exposure_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pExposureFactor, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeExposure(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ExposureLocalData *data = new ExposureLocalData;
+ memset(data, 0, sizeof(ExposureLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->outputTensorDims);
+
+ data->pExposureFactor = new vx_float32[data->pSrcDesc->n];
+ refreshExposure(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeExposure(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ExposureLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pExposureFactor;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Exposure_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Exposure",
+ VX_KERNEL_RPP_EXPOSURE,
+ processExposure,
+ 8,
+ validateExposure,
+ initializeExposure,
+ uninitializeExposure);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/FishEye.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/FishEye.cpp
new file mode 100644
index 0000000000..003a423386
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/FishEye.cpp
@@ -0,0 +1,252 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct FishEyeLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+};
+
+static vx_status VX_CALLBACK refreshFishEye(vx_node node, const vx_reference *parameters, vx_uint32 num, FishEyeLocalData *data) {
+ vx_status status = VX_SUCCESS;
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ // Fill width and height array with ROI data required by RPP batchPD kernels
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ }
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (int f = 0; f < num_of_frames; f++) {
+ data->pSrcDimensions[index + f] = data->pSrcDimensions[n];
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateFishEye(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #3 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: FishEye: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: FishEye: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processFishEye(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ FishEyeLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshFishEye(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_fisheye_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_fisheye_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_fisheye_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_fisheye_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeFishEye(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ FishEyeLocalData *data = new FishEyeLocalData;
+ memset(data, 0, sizeof(FishEyeLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+ refreshFishEye(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeFishEye(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ FishEyeLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pSrcDimensions;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status FishEye_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FishEye",
+ VX_KERNEL_RPP_FISHEYE,
+ processFishEye,
+ 7,
+ validateFishEye,
+ initializeFishEye,
+ uninitializeFishEye);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Flip.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Flip.cpp
new file mode 100644
index 0000000000..ab494d0c94
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Flip.cpp
@@ -0,0 +1,246 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct FlipLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_uint32 *pHorizontalFlag;
+ vx_uint32 *pVerticalFlag;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshFlip(vx_node node, const vx_reference *parameters, vx_uint32 num, FlipLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pHorizontalFlag, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pVerticalFlag, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pHorizontalFlag[index + f] = data->pHorizontalFlag[n];
+ data->pVerticalFlag[index + f] = data->pVerticalFlag[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateFlip(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Flip: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Flip: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processFlip(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ FlipLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshFlip(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_flip_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pHorizontalFlag, data->pVerticalFlag, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ refreshFlip(node, parameters, num, data);
+ rpp_status = rppt_flip_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pHorizontalFlag, data->pVerticalFlag, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeFlip(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ FlipLocalData *data = new FlipLocalData;
+ memset(data, 0, sizeof(FlipLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pHorizontalFlag = new vx_uint32[data->pSrcDesc->n];
+ data->pVerticalFlag = new vx_uint32[data->pSrcDesc->n];
+ refreshFlip(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeFlip(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ FlipLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pHorizontalFlag;
+ delete[] data->pVerticalFlag;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Flip_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Flip",
+ VX_KERNEL_RPP_FLIP,
+ processFlip,
+ 9,
+ validateFlip,
+ initializeFlip,
+ uninitializeFlip);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Fog.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Fog.cpp
new file mode 100644
index 0000000000..b2621ea37c
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Fog.cpp
@@ -0,0 +1,259 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct FogLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pFogValue;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+};
+
+static vx_status VX_CALLBACK refreshFog(vx_node node, const vx_reference *parameters, vx_uint32 num, FogLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pFogValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ // Fill width and height array with ROI data required by RPP batchPD kernels
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ }
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pFogValue[index + f] = data->pFogValue[n];
+ data->pSrcDimensions[index + f] = data->pSrcDimensions[n];
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateFog(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Fog: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Fog: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processFog(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+
+ FogLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshFog(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_fog_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pFogValue, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_fog_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pFogValue, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_fog_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pFogValue, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_fog_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pFogValue, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeFog(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ FogLocalData *data = new FogLocalData;
+ memset(data, 0, sizeof(FogLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+ data->pFogValue = new vx_float32[data->pSrcDesc->n];
+ refreshFog(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeFog(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ FogLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pFogValue;
+ delete[] data->pSrcDimensions;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Fog_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Fog",
+ VX_KERNEL_RPP_FOG,
+ processFog,
+ 8,
+ validateFog,
+ initializeFog,
+ uninitializeFog);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/GammaCorrection.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/GammaCorrection.cpp
new file mode 100644
index 0000000000..9d0590448f
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/GammaCorrection.cpp
@@ -0,0 +1,239 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct GammaCorrectionLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pGamma;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshGammaCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num, GammaCorrectionLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pGamma, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (int f = 0; f < num_of_frames; f++) {
+ data->pGamma[index + f] = data->pGamma[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateGammaCorrection(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: GammaCorrection: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: GammaCorrection: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processGammaCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ GammaCorrectionLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshGammaCorrection(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_gamma_correction_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pGamma, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_gamma_correction_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pGamma, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeGammaCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ GammaCorrectionLocalData *data = new GammaCorrectionLocalData;
+ memset(data, 0, sizeof(GammaCorrectionLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->outputTensorDims);
+
+ data->pGamma = new vx_float32[data->pSrcDesc->n];
+ refreshGammaCorrection(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeGammaCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ GammaCorrectionLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pGamma;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status GammaCorrection_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GammaCorrection",
+ VX_KERNEL_RPP_GAMMACORRECTION,
+ processGammaCorrection,
+ 8,
+ validateGammaCorrection,
+ initializeGammaCorrection,
+ uninitializeGammaCorrection);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Glitch.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Glitch.cpp
new file mode 100644
index 0000000000..cadcf32980
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Glitch.cpp
@@ -0,0 +1,270 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct GlitchLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_uint32 *pXOffsetR;
+ vx_uint32 *pYOffsetR;
+ vx_uint32 *pXOffsetG;
+ vx_uint32 *pYOffsetG;
+ vx_uint32 *pXOffsetB;
+ vx_uint32 *pYOffsetB;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshGlitch(vx_node node, const vx_reference *parameters, vx_uint32 num, GlitchLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pXOffsetR, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pYOffsetR, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pXOffsetG, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pYOffsetG, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pXOffsetB, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pYOffsetB, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pXOffsetR[index + f] = data->pXOffsetR[n];
+ data->pYOffsetR[index + f] = data->pYOffsetR[n];
+ data->pXOffsetG[index + f] = data->pXOffsetG[n];
+ data->pYOffsetG[index + f] = data->pYOffsetG[n];
+ data->pXOffsetB[index + f] = data->pXOffsetB[n];
+ data->pYOffsetB[index + f] = data->pYOffsetB[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateGlitch(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #10 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #11 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #12 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Glitch: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Glitch: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processGlitch(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_ERROR_NOT_IMPLEMENTED;
+ GlitchLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshGlitch(node, parameters, num, data);
+ // rppt_glitch is not available in RPP TOT, will be enabled once support is added
+ /* if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_glitch_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pXOffsetR, data->pYOffsetR, data->pXOffsetG, data->pYOffsetG, data->pXOffsetB, data->pYOffsetB, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_glitch_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pXOffsetR, data->pYOffsetR, data->pXOffsetG, data->pYOffsetG, data->pXOffsetB, data->pYOffsetB data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }*/
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeGlitch(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ GlitchLocalData *data = new GlitchLocalData;
+ memset(data, 0, sizeof(GlitchLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pXOffsetR = new vx_uint32[data->pSrcDesc->n];
+ data->pYOffsetR = new vx_uint32[data->pSrcDesc->n];
+ data->pXOffsetG = new vx_uint32[data->pSrcDesc->n];
+ data->pYOffsetG = new vx_uint32[data->pSrcDesc->n];
+ data->pXOffsetB = new vx_uint32[data->pSrcDesc->n];
+ data->pYOffsetB = new vx_uint32[data->pSrcDesc->n];
+ refreshGlitch(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeGlitch(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ GlitchLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pXOffsetR;
+ delete[] data->pYOffsetR;
+ delete[] data->pXOffsetG;
+ delete[] data->pYOffsetG;
+ delete[] data->pXOffsetB;
+ delete[] data->pYOffsetB;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Glitch_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Glitch",
+ VX_KERNEL_RPP_GLITCH,
+ processGlitch,
+ 13,
+ validateGlitch,
+ initializeGlitch,
+ uninitializeGlitch);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Hue.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Hue.cpp
new file mode 100644
index 0000000000..69d824ecf6
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Hue.cpp
@@ -0,0 +1,254 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct HueLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pHueShift;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+};
+
+static vx_status VX_CALLBACK refreshHue(vx_node node, const vx_reference *parameters, vx_uint32 num, HueLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pHueShift, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ // Fill width and height array with ROI data required by RPP batchPD kernels
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ }
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pHueShift[index + f] = data->pHueShift[n];
+ data->pSrcDimensions[index + f] = data->pSrcDimensions[n];
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateHue(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Hue: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Hue: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processHue(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+
+ HueLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshHue(node, parameters, num, data);
+ if (data->pSrcDesc->c == 1)
+ return VX_ERROR_NOT_SUPPORTED;
+
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppi_hueRGB_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pHueShift, data->pSrcDesc->n, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppi_hueRGB_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pHueShift, data->pSrcDesc->n, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeHue(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ HueLocalData *data = new HueLocalData;
+ memset(data, 0, sizeof(HueLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+ data->pHueShift = new vx_float32[data->pSrcDesc->n];
+ refreshHue(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeHue(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ HueLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pHueShift;
+ delete[] data->pSrcDimensions;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Hue_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Hue",
+ VX_KERNEL_RPP_HUE,
+ processHue,
+ 8,
+ validateHue,
+ initializeHue,
+ uninitializeHue);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Jitter.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Jitter.cpp
new file mode 100644
index 0000000000..9ccd680cd4
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Jitter.cpp
@@ -0,0 +1,241 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct JitterLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_uint32 *pKernelSize;
+ vx_uint32 seed;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshJitter(vx_node node, const vx_reference *parameters, vx_uint32 num, JitterLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pKernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pKernelSize[index + f] = data->pKernelSize[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateJitter(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Jitter: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Jitter: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processJitter(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_ERROR_NOT_IMPLEMENTED;
+ JitterLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshJitter(node, parameters, num, data);
+ // rppt_jitter is not available in RPP TOT, will be enabled once support is added
+ /* if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_HIP
+ rpp_status = rppt_jitter_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pKernelSize, data->seed, data->roiPtr, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_jitter_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pKernelSize, data->seed, data->roiPtr, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ } */
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeJitter(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ JitterLocalData *data = new JitterLocalData;
+ memset(data, 0, sizeof(JitterLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->seed, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pKernelSize = new vx_uint32[data->pSrcDesc->n];
+ refreshJitter(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeJitter(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ JitterLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pKernelSize;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Jitter_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Jitter",
+ VX_KERNEL_RPP_JITTER,
+ processJitter,
+ 9,
+ validateJitter,
+ initializeJitter,
+ uninitializeJitter);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/LensCorrection.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/LensCorrection.cpp
new file mode 100644
index 0000000000..6c44cb10e0
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/LensCorrection.cpp
@@ -0,0 +1,265 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct LensCorrectionLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pStrength;
+ vx_float32 *pZoom;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+};
+
+static vx_status VX_CALLBACK refreshLensCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num, LensCorrectionLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pStrength, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pZoom, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ // Fill width and height array with ROI data required by RPP batchPD kernels
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ }
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pStrength[index + f] = data->pStrength[n];
+ data->pZoom[index + f] = data->pZoom[n];
+ data->pSrcDimensions[index + f] = data->pSrcDimensions[n];
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateLensCorrection(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: LensCorrection: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor;
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: LensCorrection: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processLensCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+
+ LensCorrectionLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshLensCorrection(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_lens_correction_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pStrength, data->pZoom, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_lens_correction_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pStrength, data->pZoom, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_lens_correction_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pStrength, data->pZoom, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_lens_correction_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pStrength, data->pZoom, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeLensCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ LensCorrectionLocalData *data = new LensCorrectionLocalData;
+ memset(data, 0, sizeof(LensCorrectionLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+ data->pStrength = new vx_float32[data->pSrcDesc->n];
+ data->pZoom = new vx_float32[data->pSrcDesc->n];
+ refreshLensCorrection(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeLensCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ LensCorrectionLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pStrength;
+ delete[] data->pZoom;
+ delete[] data->pSrcDimensions;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status LensCorrection_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LensCorrection",
+ VX_KERNEL_RPP_LENSCORRECTION,
+ processLensCorrection,
+ 9,
+ validateLensCorrection,
+ initializeLensCorrection,
+ uninitializeLensCorrection);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Noise.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Noise.cpp
new file mode 100644
index 0000000000..3c0aa2f9bd
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Noise.cpp
@@ -0,0 +1,266 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct NoiseLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pNoiseProb;
+ vx_float32 *pSaltProb;
+ vx_float32 *pSaltValue;
+ vx_float32 *pPepperValue;
+ vx_uint32 seed;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshNoise(vx_node node, const vx_reference *parameters, vx_uint32 num, NoiseLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pNoiseProb, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pSaltProb, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pSaltValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pPepperValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pNoiseProb[index + f] = data->pNoiseProb[n];
+ data->pSaltProb[index + f] = data->pSaltProb[n];
+ data->pSaltValue[index + f] = data->pSaltValue[n];
+ data->pPepperValue[index + f] = data->pPepperValue[n];
+ data->pSrcRoi[index + f].xywhROI.xy.x = data->pSrcRoi[n].xywhROI.xy.x;
+ data->pSrcRoi[index + f].xywhROI.xy.y = data->pSrcRoi[n].xywhROI.xy.y;
+ data->pSrcRoi[index + f].xywhROI.roiWidth = data->pSrcRoi[n].xywhROI.roiWidth;
+ data->pSrcRoi[index + f].xywhROI.roiHeight = data->pSrcRoi[n].xywhROI.roiHeight;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateNoise(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #10 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #11 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Noise: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Noise : tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processNoise(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ NoiseLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshNoise(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_salt_and_pepper_noise_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pNoiseProb, data->pSaltProb, data->pSaltValue, data->pPepperValue, data->seed, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_salt_and_pepper_noise_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pNoiseProb, data->pSaltProb, data->pSaltValue, data->pPepperValue, data->seed, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeNoise(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ NoiseLocalData *data = new NoiseLocalData;
+ memset(data, 0, sizeof(NoiseLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->seed, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pNoiseProb = new vx_float32[data->pSrcDesc->n];
+ data->pSaltProb = new vx_float32[data->pSrcDesc->n];
+ data->pSaltValue = new vx_float32[data->pSrcDesc->n];
+ data->pPepperValue = new vx_float32[data->pSrcDesc->n];
+ refreshNoise(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeNoise(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ NoiseLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pNoiseProb;
+ delete[] data->pSaltProb;
+ delete[] data->pSaltValue;
+ delete[] data->pPepperValue;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Noise_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Noise",
+ VX_KERNEL_RPP_NOISE,
+ processNoise,
+ 9,
+ validateNoise,
+ initializeNoise,
+ uninitializeNoise);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Nop.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Nop.cpp
index eb0afba977..374fc7808b 100644
--- a/amd_openvx_extensions/amd_rpp/source/tensor/Nop.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Nop.cpp
@@ -1,5 +1,5 @@
/*
-Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -24,7 +24,7 @@ THE SOFTWARE.
struct NopLocalData {
vxRppHandle handle;
- Rpp32u deviceType;
+ vx_uint32 deviceType;
RppPtr_t pSrc;
RppPtr_t pDst;
};
@@ -51,20 +51,20 @@ static vx_status VX_CALLBACK validateNop(vx_node node, const vx_reference parame
vx_enum scalar_type;
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_UINT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #2 type=%d (must be size)\n", scalar_type);
- // Check for output parameters
+ // Check for output tensor
vx_uint8 tensor_fixed_point_position;
size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
size_t num_tensor_dims;
- vx_enum tensor_type;
+ vx_enum tensor_dtype;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
return status;
}
@@ -94,7 +94,7 @@ static vx_status VX_CALLBACK uninitializeNop(vx_node node, const vx_reference *p
}
//! \brief The kernel target support callback.
-// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Pixelate.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Pixelate.cpp
new file mode 100644
index 0000000000..b515057f3e
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Pixelate.cpp
@@ -0,0 +1,234 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct PixelateLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshPixelate(vx_node node, const vx_reference *parameters, vx_uint32 num, PixelateLocalData *data) {
+ vx_status status = VX_SUCCESS;
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validatePixelate(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #3 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Pixelate: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Pixelate: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processPixelate(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_ERROR_NOT_IMPLEMENTED;
+ PixelateLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshPixelate(node, parameters, num, data);
+ // rppt_pixelate is not available in RPP TOT, will be enabled once support is added
+ /* if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_pixelate_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_pixelate_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ } */
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializePixelate(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ PixelateLocalData *data = new PixelateLocalData;
+ memset(data, 0, sizeof(PixelateLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ refreshPixelate(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializePixelate(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ PixelateLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Pixelate_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Pixelate",
+ VX_KERNEL_RPP_PIXELATE,
+ processPixelate,
+ 7,
+ validatePixelate,
+ initializePixelate,
+ uninitializePixelate);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Rain.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Rain.cpp
new file mode 100644
index 0000000000..a02e726496
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Rain.cpp
@@ -0,0 +1,277 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct RainLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pRainValue;
+ vx_uint32 *pRainWidth;
+ vx_uint32 *pRainHeight;
+ vx_float32 *pRainTransperancy;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+};
+
+static vx_status VX_CALLBACK refreshRain(vx_node node, const vx_reference *parameters, vx_uint32 num, RainLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pRainValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pRainWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pRainHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pRainTransperancy, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ // Fill width and height array with ROI data required by RPP batchPD kernels
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ }
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pRainValue[index + f] = data->pRainValue[n];
+ data->pRainWidth[index + f] = data->pRainWidth[n];
+ data->pRainHeight[index + f] = data->pRainHeight[n];
+ data->pRainTransperancy[index + f] = data->pRainTransperancy[n];
+ data->pSrcDimensions[index + f] = data->pSrcDimensions[n];
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateRain(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #10 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Rain: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Rain: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processRain(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ RainLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshRain(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_rain_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pRainValue, data->pRainWidth, data->pRainHeight, data->pRainTransperancy, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_rain_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pRainValue, data->pRainWidth, data->pRainHeight, data->pRainTransperancy, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_rain_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pRainValue, data->pRainWidth, data->pRainHeight, data->pRainTransperancy, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_rain_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pRainValue, data->pRainWidth, data->pRainHeight, data->pRainTransperancy, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeRain(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RainLocalData *data = new RainLocalData;
+ memset(data, 0, sizeof(RainLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pRainValue = new vx_float32[data->pSrcDesc->n];
+ data->pRainWidth = new vx_uint32[data->pSrcDesc->n];
+ data->pRainHeight = new vx_uint32[data->pSrcDesc->n];
+ data->pRainTransperancy = new vx_float32[data->pSrcDesc->n];
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ refreshRain(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeRain(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RainLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pRainHeight;
+ delete[] data->pRainWidth;
+ delete[] data->pRainTransperancy;
+ delete[] data->pRainValue;
+ delete[] data->pSrcDimensions;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Rain_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Rain",
+ VX_KERNEL_RPP_RAIN,
+ processRain,
+ 11,
+ validateRain,
+ initializeRain,
+ uninitializeRain);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Resize.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Resize.cpp
index afd120d5eb..755b69a1a5 100644
--- a/amd_openvx_extensions/amd_rpp/source/tensor/Resize.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Resize.cpp
@@ -1,5 +1,5 @@
/*
-Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -24,7 +24,7 @@ THE SOFTWARE.
struct ResizeLocalData {
vxRppHandle *handle;
- Rpp32u deviceType;
+ vx_uint32 deviceType;
RppPtr_t pSrc;
RppPtr_t pDst;
vx_uint32 *pResizeHeight;
@@ -38,14 +38,14 @@ struct ResizeLocalData {
size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
RpptImagePatch *pDstImgSize;
- Rpp32s interpolationType;
+ RpptInterpolationType interpolationType;
};
static vx_status VX_CALLBACK refreshResize(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeLocalData *data) {
vx_status status = VX_SUCCESS;
STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pResizeWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pResizeHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
- for (int i = 0; i < data->pSrcDesc->n; i++) {
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
data->pDstImgSize[i].width = data->pResizeWidth[i];
data->pDstImgSize[i].height = data->pResizeHeight[i];
}
@@ -64,11 +64,11 @@ static vx_status VX_CALLBACK refreshResize(vx_node node, const vx_reference *par
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
}
data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
- if((data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW)) {
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
- for(int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
unsigned index = n * num_of_frames;
- for(int f = 0; f < num_of_frames; f++) {
+ for (unsigned f = 0; f < num_of_frames; f++) {
data->pDstImgSize[index + f] = data->pDstImgSize[n];
data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
}
@@ -82,37 +82,37 @@ static vx_status VX_CALLBACK validateResize(vx_node node, const vx_reference par
vx_enum scalar_type;
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be a boolean size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be a boolean size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_INT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_UINT32)
- return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type);
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
- // Check for input parameters
+ // Check for input tensor
size_t num_tensor_dims;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
if(num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Resize: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
- // Check for output parameters
+ // Check for output tensor
vx_uint8 tensor_fixed_point_position;
size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
- vx_enum tensor_type;
+ vx_enum tensor_dtype;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
if(num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Resize: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
- STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
return status;
}
@@ -127,11 +127,11 @@ static vx_status VX_CALLBACK processResize(vx_node node, const vx_reference *par
#if ENABLE_OPENCL
return_status = VX_ERROR_NOT_IMPLEMENTED;
#elif ENABLE_HIP
- rpp_status = rppt_resize_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pDstImgSize, (RpptInterpolationType)data->interpolationType, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ rpp_status = rppt_resize_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pDstImgSize, data->interpolationType, data->pSrcRoi, data->roiType, data->handle->rppHandle);
return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
#endif
} else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
- rpp_status = rppt_resize_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pDstImgSize, (RpptInterpolationType)data->interpolationType, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ rpp_status = rppt_resize_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pDstImgSize, data->interpolationType, data->pSrcRoi, data->roiType, data->handle->rppHandle);
return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
}
return return_status;
@@ -141,23 +141,24 @@ static vx_status VX_CALLBACK initializeResize(vx_node node, const vx_reference *
ResizeLocalData *data = new ResizeLocalData;
memset(data, 0, sizeof(ResizeLocalData));
- vx_enum input_tensor_type, output_tensor_type;
- int roi_type, input_layout, output_layout;
- STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->interpolationType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout, interpolation_type;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &interpolation_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
- data->roiType = (roi_type == 0) ? RpptRoiType::XYWH : RpptRoiType::LTRB;
+ data->roiType = static_cast(roi_type);
data->inputLayout = static_cast(input_layout);
data->outputLayout = static_cast(output_layout);
+ data->interpolationType = static_cast(interpolation_type);
// Querying for input tensor
data->pSrcDesc = new RpptDesc;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_type, sizeof(input_tensor_type)));
- data->pSrcDesc->dataType = getRpptDataType(input_tensor_type);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
data->pSrcDesc->offsetInBytes = 0;
fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
@@ -165,18 +166,18 @@ static vx_status VX_CALLBACK initializeResize(vx_node node, const vx_reference *
data->pDstDesc = new RpptDesc;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
- STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2],VX_TENSOR_DATA_TYPE, &output_tensor_type, sizeof(output_tensor_type)));
- data->pDstDesc->dataType = getRpptDataType(output_tensor_type);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2],VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
data->pDstDesc->offsetInBytes = 0;
fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->outputTensorDims);
#if ENABLE_HIP
hipHostMalloc(&data->pDstImgSize, data->pSrcDesc->n * sizeof(RpptImagePatch));
#else
- data->pDstImgSize = static_cast(calloc(data->pSrcDesc->n, sizeof(RpptImagePatch)));
+ data->pDstImgSize = new RpptImagePatch[data->pSrcDesc->n];
#endif
- data->pResizeWidth = static_cast(malloc(sizeof(vx_uint32) * data->pSrcDesc->n));
- data->pResizeHeight = static_cast(malloc(sizeof(vx_uint32) * data->pSrcDesc->n));
+ data->pResizeWidth = new vx_uint32[data->pSrcDesc->n];
+ data->pResizeHeight = new vx_uint32[data->pSrcDesc->n];
refreshResize(node, parameters, num, data);
STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
@@ -186,22 +187,22 @@ static vx_status VX_CALLBACK initializeResize(vx_node node, const vx_reference *
static vx_status VX_CALLBACK uninitializeResize(vx_node node, const vx_reference *parameters, vx_uint32 num) {
ResizeLocalData *data;
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
- if (data->pResizeWidth != nullptr) free(data->pResizeWidth);
- if (data->pResizeHeight != nullptr) free(data->pResizeHeight);
+ delete[] data->pResizeWidth;
+ delete[] data->pResizeHeight;
#if ENABLE_HIP
if (data->pDstImgSize != nullptr) hipHostFree(data->pDstImgSize);
#else
- if (data->pDstImgSize != nullptr) free(data->pDstImgSize);
+ delete[] data->pDstImgSize;
#endif
- delete(data->pSrcDesc);
- delete(data->pDstDesc);
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
- delete (data);
+ delete data;
return VX_SUCCESS;
}
//! \brief The kernel target support callback.
-// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ResizeCrop.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ResizeCrop.cpp
new file mode 100644
index 0000000000..d433bba723
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/ResizeCrop.cpp
@@ -0,0 +1,297 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct ResizeCropLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_uint32 *pDstBatchWidth;
+ vx_uint32 *pDstBatchHeight;
+ vx_uint32 *pX1;
+ vx_uint32 *pY1;
+ vx_uint32 *pX2;
+ vx_uint32 *pY2;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptROI *pCropRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+ RppiSize *pDstDimensions;
+ RppiSize maxDstDimensions;
+};
+
+static vx_status VX_CALLBACK refreshResizeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pDstBatchWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pDstBatchHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr, *crop_roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &crop_roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &crop_roi_tensor_ptr, sizeof(crop_roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ data->pCropRoi = reinterpret_cast(crop_roi_tensor_ptr);
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pX1[i] = data->pCropRoi[i].xywhROI.xy.x;
+ data->pY1[i] = data->pCropRoi[i].xywhROI.xy.y;
+ data->pX2[i] = data->pCropRoi[i].xywhROI.xy.x + data->pCropRoi[i].xywhROI.roiWidth;
+ data->pY2[i] = data->pCropRoi[i].xywhROI.xy.y + data->pCropRoi[i].xywhROI.roiHeight;
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ data->pDstDimensions[i].width = data->pDstBatchWidth[i];
+ data->pDstDimensions[i].height = data->pDstBatchHeight[i];
+ }
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pX1[index + f] = data->pX1[n];
+ data->pY1[index + f] = data->pY1[n];
+ data->pX2[index + f] = data->pX2[n];
+ data->pY2[index + f] = data->pY2[n];
+ data->pSrcDimensions[index + f] = data->pSrcDimensions[n];
+ data->pDstDimensions[index + f] = data->pDstDimensions[n];
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateResizeCrop(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ResizeCrop: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ResizeCrop: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+
+}
+
+static vx_status VX_CALLBACK processResizeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ vx_int32 output_format_toggle = 0;
+
+ ResizeCropLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshResizeCrop(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_resize_crop_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pDstDimensions, data->maxDstDimensions, data->pX1, data->pX2, data->pY1, data->pY2, output_format_toggle, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_resize_crop_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pDstDimensions, data->maxDstDimensions, data->pX1, data->pX2, data->pY1, data->pY2, output_format_toggle, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_resize_crop_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pDstDimensions, data->maxDstDimensions, data->pX1, data->pX2, data->pY1, data->pY2, output_format_toggle, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_resize_crop_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pDstDimensions, data->maxDstDimensions, data->pX1, data->pX2, data->pY1, data->pY2, output_format_toggle, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeResizeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ResizeCropLocalData *data = new ResizeCropLocalData;
+ memset(data, 0, sizeof(ResizeCropLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pDstBatchWidth = new vx_uint32[data->pSrcDesc->n];
+ data->pDstBatchHeight = new vx_uint32[data->pSrcDesc->n];
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+ data->pDstDimensions = new RppiSize[data->pSrcDesc->n];
+ data->pX1 = new vx_uint32[data->pSrcDesc->n];
+ data->pY1 = new vx_uint32[data->pSrcDesc->n];
+ data->pX2 = new vx_uint32[data->pSrcDesc->n];
+ data->pY2 = new vx_uint32[data->pSrcDesc->n];
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ data->maxDstDimensions.height = data->pDstDesc->h;
+ data->maxDstDimensions.width = data->pDstDesc->w;
+ refreshResizeCrop(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeResizeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ResizeCropLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pDstBatchWidth;
+ delete[] data->pDstBatchHeight;
+ delete[] data->pSrcDimensions;
+ delete[] data->pDstDimensions;
+ delete[] data->pX1;
+ delete[] data->pY1;
+ delete[] data->pX2;
+ delete[] data->pY2;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+ return VX_SUCCESS;
+}
+
+vx_status ResizeCrop_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCrop",
+ VX_KERNEL_RPP_RESIZECROP,
+ processResizeCrop,
+ 10,
+ validateResizeCrop,
+ initializeResizeCrop,
+ uninitializeResizeCrop);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ResizeCropMirror.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ResizeCropMirror.cpp
new file mode 100644
index 0000000000..465663a981
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/ResizeCropMirror.cpp
@@ -0,0 +1,272 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct ResizeCropMirrorLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_uint32 *pResizeHeight;
+ vx_uint32 *pResizeWidth;
+ vx_uint32 *pMirror;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RpptImagePatch *pDstImgSize;
+ RpptInterpolationType interpolationType;
+};
+
+static vx_status VX_CALLBACK refreshResizeCropMirror(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropMirrorLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pResizeWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pResizeHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pMirror, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pDstImgSize[i].width = data->pResizeWidth[i];
+ data->pDstImgSize[i].height = data->pResizeHeight[i];
+ }
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0]- 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pMirror[index + f] = data->pMirror[n];
+ data->pDstImgSize[index + f] = data->pDstImgSize[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateResizeCropMirror(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be a boolean size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #10 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ResizeCropMirror: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ResizeCropMirror: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processResizeCropMirror(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ ResizeCropMirrorLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshResizeCropMirror(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_resize_crop_mirror_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pDstImgSize, data->interpolationType, data->pMirror, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_resize_crop_mirror_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pDstImgSize, data->interpolationType, data->pMirror, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeResizeCropMirror(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ResizeCropMirrorLocalData *data = new ResizeCropMirrorLocalData;
+ memset(data, 0, sizeof(ResizeCropMirrorLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, inputLayout, outputLayout, interpolation_type;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &interpolation_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &inputLayout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &outputLayout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(inputLayout);
+ data->outputLayout = static_cast(outputLayout);
+ data->interpolationType = static_cast(interpolation_type);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2],VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->outputTensorDims);
+
+#if ENABLE_HIP
+ hipHostMalloc(&data->pDstImgSize, data->pSrcDesc->n * sizeof(RpptImagePatch));
+#else
+ data->pDstImgSize = new RpptImagePatch[data->pSrcDesc->n];
+#endif
+ data->pResizeWidth = new vx_uint32[data->pSrcDesc->n];
+ data->pResizeHeight = new vx_uint32[data->pSrcDesc->n];
+ data->pMirror = new vx_uint32[data->pSrcDesc->n];
+ refreshResizeCropMirror(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeResizeCropMirror(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ResizeCropMirrorLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pResizeWidth;
+ delete[] data->pResizeHeight;
+ delete[] data->pMirror;
+#if ENABLE_HIP
+ if (data->pDstImgSize != nullptr) hipHostFree(data->pDstImgSize);
+#else
+ delete[] data->pDstImgSize;
+#endif
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status ResizeCropMirror_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropMirror",
+ VX_KERNEL_RPP_RESIZECROPMIRROR,
+ processResizeCropMirror,
+ 11,
+ validateResizeCropMirror,
+ initializeResizeCropMirror,
+ uninitializeResizeCropMirror);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ResizeMirrorNormalize.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ResizeMirrorNormalize.cpp
new file mode 100644
index 0000000000..55cf6e207a
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/ResizeMirrorNormalize.cpp
@@ -0,0 +1,290 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct ResizeMirrorNormalizeLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ vx_uint32 *pResizeHeight;
+ vx_uint32 *pResizeWidth;
+ vx_float32 *pMean;
+ vx_float32 *pStdDev;
+ vx_uint32 *pMirror;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; // will have NHWC info
+ size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RpptInterpolationType interpolationType;
+ RpptImagePatch *pDstImgSize;
+};
+
+static vx_status VX_CALLBACK refreshResizeMirrorNormalize(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeMirrorNormalizeLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pResizeWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pResizeHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->inputTensorDims[0] * data->pSrcDesc->c, sizeof(vx_float32), data->pMean, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->inputTensorDims[0] * data->pSrcDesc->c, sizeof(vx_float32), data->pStdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->pMirror, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pDstImgSize[i].width = data->pResizeWidth[i];
+ data->pDstImgSize[i].height = data->pResizeHeight[i];
+ }
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ data->pDstImgSize[index + f] = data->pDstImgSize[n];
+ data->pMirror[index + f] = data->pMirror[n];
+ for (unsigned c = 0; c < data->pSrcDesc->c; c++) {
+ int dst_ind = (index + f) * data->pSrcDesc->c + c;
+ int src_ind = n * data->pSrcDesc->c + c;
+ data->pMean[dst_ind] = data->pMean[src_ind];
+ data->pStdDev[dst_ind] = data->pStdDev[src_ind];
+ }
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateResizeMirrorNormalize(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be a boolean size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #10 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #11 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #12 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ResizeMirrorNormalize: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ResizeMirrorNormalize: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+
+ return status;
+}
+
+static vx_status VX_CALLBACK processResizeMirrorNormalize(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ ResizeMirrorNormalizeLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshResizeMirrorNormalize(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_resize_mirror_normalize_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pDstImgSize, RpptInterpolationType::BILINEAR,
+ data->pMean, data->pStdDev, data->pMirror, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_resize_mirror_normalize_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pDstImgSize, RpptInterpolationType::BILINEAR,
+ data->pMean, data->pStdDev, data->pMirror, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeResizeMirrorNormalize(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ ResizeMirrorNormalizeLocalData *data = new ResizeMirrorNormalizeLocalData;
+ memset(data, 0, sizeof(ResizeMirrorNormalizeLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout, interpolation_type;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &interpolation_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+ data->interpolationType = static_cast(interpolation_type);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->outputTensorDims);
+
+
+ data->pResizeWidth = new vx_uint32[data->pSrcDesc->n];
+ data->pResizeHeight = new vx_uint32[data->pSrcDesc->n];
+ data->pMean = new vx_float32[data->pSrcDesc->n * data->pSrcDesc->c];
+ data->pStdDev = new vx_float32[data->pSrcDesc->n * data->pSrcDesc->c];
+ data->pMirror = new vx_uint32[data->pSrcDesc->n];
+#if ENABLE_HIP
+ hipHostMalloc(&data->pDstImgSize, data->pSrcDesc->n * sizeof(RpptImagePatch));
+#else
+ data->pDstImgSize = new RpptImagePatch[data->pSrcDesc->n];
+#endif
+ refreshResizeMirrorNormalize(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeResizeMirrorNormalize(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+ ResizeMirrorNormalizeLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pResizeWidth;
+ delete[] data->pResizeHeight;
+ delete[] data->pMean;
+ delete[] data->pStdDev;
+ delete[] data->pMirror;
+#if ENABLE_HIP
+ if (data->pDstImgSize != nullptr) hipHostFree(data->pDstImgSize);
+#else
+ delete[] data->pDstImgSize;
+#endif
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status ResizeMirrorNormalize_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeMirrorNormalize",
+ VX_KERNEL_RPP_RESIZEMIRRORNORMALIZE,
+ processResizeMirrorNormalize,
+ 13,
+ validateResizeMirrorNormalize,
+ initializeResizeMirrorNormalize,
+ uninitializeResizeMirrorNormalize);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_OPENCL || ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Rotate.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Rotate.cpp
new file mode 100644
index 0000000000..38dd98a002
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Rotate.cpp
@@ -0,0 +1,243 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct RotateLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pAngle;
+ RpptInterpolationType interpolationType;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshRotate(vx_node node, const vx_reference *parameters, vx_uint32 num, RotateLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pAngle, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pAngle[index + f] = data->pAngle[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateRotate(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Rotate: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Rotate: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processRotate(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ RotateLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshRotate(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_rotate_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pAngle, data->interpolationType, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_rotate_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pAngle, data->interpolationType, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeRotate(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RotateLocalData *data = new RotateLocalData;
+ memset(data, 0, sizeof(RotateLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout, interpolation_type;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &interpolation_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+ data->interpolationType = static_cast(interpolation_type);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pAngle = new vx_float32[data->pSrcDesc->n];
+ refreshRotate(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeRotate(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RotateLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pAngle;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Rotate_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Rotate",
+ VX_KERNEL_RPP_ROTATE,
+ processRotate,
+ 9,
+ validateRotate,
+ initializeRotate,
+ uninitializeRotate);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Saturation.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Saturation.cpp
new file mode 100644
index 0000000000..d5d8672ca7
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Saturation.cpp
@@ -0,0 +1,255 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct SaturationLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pSaturationFactor;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+};
+
+static vx_status VX_CALLBACK refreshSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num, SaturationLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pSaturationFactor, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ // Fill width and height array with ROI data required by RPP batchPD kernels
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ }
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pSaturationFactor[index + f] = data->pSaturationFactor[n];
+ data->pSrcDimensions[index + f] = data->pSrcDimensions[n];
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateSaturation(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Saturation: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Saturation: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ SaturationLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshSaturation(node, parameters, num, data);
+ if (data->pSrcDesc->c == 1)
+ return VX_ERROR_NOT_SUPPORTED;
+
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSaturationFactor, data->inputTensorDims[0], data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSaturationFactor, data->inputTensorDims[0], data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ SaturationLocalData *data = new SaturationLocalData;
+ memset(data, 0, sizeof(SaturationLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pSaturationFactor = new vx_float32[data->pSrcDesc->n];
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ refreshSaturation(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ SaturationLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pSaturationFactor;
+ delete[] data->pSrcDimensions;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Saturation_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Saturation",
+ VX_KERNEL_RPP_SATURATION,
+ processSaturation,
+ 8,
+ validateSaturation,
+ initializeSaturation,
+ uninitializeSaturation);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/SequenceRearrange.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/SequenceRearrange.cpp
new file mode 100644
index 0000000000..74e862e215
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/SequenceRearrange.cpp
@@ -0,0 +1,253 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct SequenceRearrangeLocalData {
+ vxRppHandle *handle;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_uint32 deviceType;
+ vx_uint32 newSequenceLength;
+ vx_uint32 sequenceLength;
+ vx_uint32 *pNewOrder;
+ vxTensorLayout layout;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+#if ENABLE_OPENCL
+ cl_mem pClSrc;
+ cl_mem pClDst;
+#endif
+};
+
+static vx_status VX_CALLBACK refreshSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num, SequenceRearrangeLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->newSequenceLength, sizeof(vx_uint32), data->pNewOrder, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_OPENCL, &data->pClSrc, sizeof(data->pClSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_OPENCL, &data->pClDst, sizeof(data->pClDst)));
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateSequenceRearrange(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #3 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims != 5) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: SequenceRearrange: tensor: #0 dimensions=%lu (must be equal to 5)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims != 5) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: SequenceRearrange: tensor: #1 dimensions=%lu (must be equal to 5)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+
+ return status;
+}
+
+static vx_status VX_CALLBACK processSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ SequenceRearrangeLocalData *data = NULL;
+ vx_status status = VX_SUCCESS;
+
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshSequenceRearrange(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ cl_command_queue handle = data->handle->cmdq;
+ for (unsigned sequence_cnt = 0; sequence_cnt < data->pSrcDesc->n; sequence_cnt++) {
+ unsigned src_sequence_start_address = sequence_cnt * data->pSrcDesc->strides.nStride * data->sequenceLength;
+ unsigned dst_sequence_start_address = sequence_cnt * data->pDstDesc->strides.nStride * data->newSequenceLength;
+ for (unsigned dst_index = 0; dst_index < data->newSequenceLength; dst_index++) {
+ unsigned src_index = data->pNewOrder[dst_index];
+ if (src_index > data->sequenceLength)
+ ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequenceLength - 1);
+ auto dst_offset = dst_sequence_start_address + (dst_index * data->pSrcDesc->strides.nStride);
+ auto src_offset = src_sequence_start_address + (src_index * data->pDstDesc->strides.nStride);
+ if (clEnqueueCopyBuffer(handle, data->pClSrc, data->pClDst, src_offset, dst_offset, data->pSrcDesc->strides.nStride, 0, NULL, NULL) != CL_SUCCESS)
+ return VX_FAILURE;
+ }
+ }
+#elif ENABLE_HIP
+ for (unsigned sequence_cnt = 0; sequence_cnt < data->pSrcDesc->n; sequence_cnt++) {
+ unsigned src_sequence_start_address = sequence_cnt * data->pSrcDesc->strides.nStride * data->sequenceLength;
+ unsigned dst_sequence_start_address = sequence_cnt * data->pDstDesc->strides.nStride * data->newSequenceLength;
+ for (unsigned dst_index = 0; dst_index < (data->newSequenceLength); dst_index++) {
+ unsigned src_index = data->pNewOrder[dst_index];
+ if (src_index > data->sequenceLength)
+ ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequenceLength - 1);
+ auto dst_address = static_cast(data->pDst) + dst_sequence_start_address + (dst_index * data->pSrcDesc->strides.nStride);
+ auto src_address = static_cast(data->pSrc) + src_sequence_start_address + (src_index * data->pDstDesc->strides.nStride);
+ hipError_t status = hipMemcpyDtoD(dst_address, src_address, data->pSrcDesc->strides.nStride);
+ if (status != hipSuccess)
+ return VX_FAILURE;
+ }
+ }
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ for (unsigned sequence_cnt = 0; sequence_cnt < data->pSrcDesc->n; sequence_cnt++) {
+ unsigned src_sequence_start_address = sequence_cnt * data->pSrcDesc->strides.nStride * data->sequenceLength;
+ unsigned dst_sequence_start_address = sequence_cnt * data->pDstDesc->strides.nStride * data->newSequenceLength;
+ for (unsigned dst_index = 0; dst_index < (data->newSequenceLength); dst_index++) {
+ unsigned src_index = data->pNewOrder[dst_index];
+ if (src_index > data->sequenceLength)
+ ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequenceLength - 1);
+ auto dst_address = static_cast(data->pDst) + dst_sequence_start_address + (dst_index * data->pSrcDesc->strides.nStride);
+ auto src_address = static_cast(data->pSrc) + src_sequence_start_address + (src_index * data->pDstDesc->strides.nStride);
+ memcpy(dst_address, src_address, data->pSrcDesc->strides.nStride);
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK initializeSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ SequenceRearrangeLocalData *data = new SequenceRearrangeLocalData;
+ memset(data, 0, sizeof(SequenceRearrangeLocalData));
+
+ int layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->layout = static_cast(layout);
+
+ vx_size in_num_of_dims, out_num_of_dims;
+ size_t in_tensor_dims[RPP_MAX_TENSOR_DIMS], out_tensor_dims[RPP_MAX_TENSOR_DIMS];
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &in_num_of_dims, sizeof(vx_size)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, in_tensor_dims, sizeof(vx_size) * in_num_of_dims));
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->layout, in_tensor_dims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &out_num_of_dims, sizeof(vx_size)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, out_tensor_dims, sizeof(vx_size) * out_num_of_dims));
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->layout, out_tensor_dims);
+
+ data->pSrcDesc->n = in_tensor_dims[0];
+ data->sequenceLength = in_tensor_dims[1];
+
+ data->pDstDesc->n = out_tensor_dims[0];
+ data->newSequenceLength = out_tensor_dims[1];
+ data->pNewOrder = new vx_uint32[data->newSequenceLength];
+ refreshSequenceRearrange(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ SequenceRearrangeLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pNewOrder;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes
+#if ENABLE_OPENCL
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+#endif
+ return VX_SUCCESS;
+}
+
+vx_status SequenceRearrange_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SequenceRearrange",
+ VX_KERNEL_RPP_SEQUENCEREARRANGE,
+ processSequenceRearrange,
+ 5,
+ validateSequenceRearrange,
+ initializeSequenceRearrange,
+ uninitializeSequenceRearrange);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_OPENCL || ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit: vxRemoveKernel(kernel); return VX_FAILURE;
+ }
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Snow.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Snow.cpp
new file mode 100644
index 0000000000..05fab19e8c
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Snow.cpp
@@ -0,0 +1,258 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct SnowLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pSnowValue;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+ RppiSize *pSrcDimensions;
+ RppiSize maxSrcDimensions;
+};
+
+static vx_status VX_CALLBACK refreshSnow(vx_node node, const vx_reference *parameters, vx_uint32 num, SnowLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pSnowValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ // Fill width and height array with ROI data required by RPP batchPD kernels
+ for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+ data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth;
+ data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight;
+ }
+ if ((data->inputLayout == 2 || data->inputLayout == 3)) { // For NFCHW and NFHWC formats
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pSnowValue[index + f] = data->pSnowValue[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateSnow(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Snow: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Snow: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processSnow(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ SnowLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshSnow(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_snow_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSnowValue, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_snow_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSnowValue, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ if (data->pSrcDesc->c == 1) {
+ rpp_status = rppi_snow_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSnowValue, data->pSrcDesc->n, data->handle->rppHandle);
+ } else {
+ rpp_status = rppi_snow_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSnowValue, data->pSrcDesc->n, data->handle->rppHandle);
+ }
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeSnow(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ SnowLocalData *data = new SnowLocalData;
+ memset(data, 0, sizeof(SnowLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pSnowValue = new vx_float32[data->pSrcDesc->n];
+ data->pSrcDimensions = new RppiSize[data->pSrcDesc->n];
+ data->maxSrcDimensions.height = data->pSrcDesc->h;
+ data->maxSrcDimensions.width = data->pSrcDesc->w;
+ refreshSnow(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeSnow(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ SnowLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pSnowValue;
+ delete[] data->pSrcDimensions;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Snow_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Snow",
+ VX_KERNEL_RPP_SNOW,
+ processSnow,
+ 8,
+ validateSnow,
+ initializeSnow,
+ uninitializeSnow);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Vignette.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Vignette.cpp
new file mode 100644
index 0000000000..dbbc4156f8
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/Vignette.cpp
@@ -0,0 +1,240 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct VignetteLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pStdDev;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshVignette(vx_node node, const vx_reference *parameters, vx_uint32 num, VignetteLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pStdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0]- 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ data->pStdDev[index + f] = data->pStdDev[n];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateVignette(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Vignette: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Vignette: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processVignette(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_ERROR_NOT_IMPLEMENTED;
+ VignetteLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshVignette(node, parameters, num, data);
+ // rppt_vignette is not available in RPP TOT, will be enabled once support is added
+ /* if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_vignette_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pStdDev, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_vignette_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pStdDev, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ } */
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeVignette(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ VignetteLocalData *data = new VignetteLocalData;
+ memset(data, 0, sizeof(VignetteLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pStdDev = new vx_float32[data->pSrcDesc->n];
+ refreshVignette(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeVignette(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ VignetteLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pStdDev;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status Vignette_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Vignette",
+ VX_KERNEL_RPP_VIGNETTE,
+ processVignette,
+ 8,
+ validateVignette,
+ initializeVignette,
+ uninitializeVignette);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/WarpAffine.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/WarpAffine.cpp
new file mode 100644
index 0000000000..4cd24f7a18
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/WarpAffine.cpp
@@ -0,0 +1,254 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+#define AFFINE_MATRIX_SIZE 6
+
+struct WarpAffineLocalData {
+ vxRppHandle *handle;
+ vx_uint32 deviceType;
+ RppPtr_t pSrc;
+ RppPtr_t pDst;
+ vx_float32 *pAffine;
+ RpptInterpolationType interpolationType;
+ RpptDescPtr pSrcDesc;
+ RpptDescPtr pDstDesc;
+ RpptROI *pSrcRoi;
+ RpptRoiType roiType;
+ vxTensorLayout inputLayout;
+ vxTensorLayout outputLayout;
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+ size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+static vx_status VX_CALLBACK refreshWarpAffine(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpAffineLocalData *data) {
+ vx_status status = VX_SUCCESS;
+ STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0] * AFFINE_MATRIX_SIZE, sizeof(vx_float32), data->pAffine, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+ void *roi_tensor_ptr;
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+ }
+ data->pSrcRoi = reinterpret_cast(roi_tensor_ptr);
+ if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) {
+ unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F'
+ for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) {
+ unsigned index = n * num_of_frames;
+ for (unsigned f = 0; f < num_of_frames; f++) {
+ int var = (index * AFFINE_MATRIX_SIZE) + f;
+ int var2 = n * AFFINE_MATRIX_SIZE;
+ data->pAffine[var] = data->pAffine[var2];
+ data->pAffine[var + 1] = data->pAffine[var2 + 1];
+ data->pAffine[var + 2] = data->pAffine[var2 + 2];
+ data->pAffine[var + 3] = data->pAffine[var2 + 3];
+ data->pAffine[var + 4] = data->pAffine[var2 + 4];
+ data->pAffine[var + 5] = data->pAffine[var2 + 5];
+ data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI;
+ }
+ }
+ }
+ return status;
+}
+
+static vx_status VX_CALLBACK validateWarpAffine(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+ vx_status status = VX_SUCCESS;
+ vx_enum scalar_type;
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_INT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+ STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+ if (scalar_type != VX_TYPE_UINT32)
+ return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+
+ // Check for input tensor
+ size_t num_tensor_dims;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4)
+ return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: WarpAffine: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+
+ // Check for output tensor
+ vx_uint8 tensor_fixed_point_position;
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+ vx_enum tensor_dtype;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ if (num_tensor_dims < 4) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: WarpAffine: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims);
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_dtype, sizeof(tensor_dtype)));
+ STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+ return status;
+}
+
+static vx_status VX_CALLBACK processWarpAffine(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ RppStatus rpp_status = RPP_SUCCESS;
+ vx_status return_status = VX_SUCCESS;
+ WarpAffineLocalData *data = NULL;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ refreshWarpAffine(node, parameters, num, data);
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+ rpp_status = rppt_warp_affine_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pAffine, data->interpolationType, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+ rpp_status = rppt_warp_affine_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pAffine, data->interpolationType, data->pSrcRoi, data->roiType, data->handle->rppHandle);
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+ }
+ return return_status;
+}
+
+static vx_status VX_CALLBACK initializeWarpAffine(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ WarpAffineLocalData *data = new WarpAffineLocalData;
+ memset(data, 0, sizeof(WarpAffineLocalData));
+
+ vx_enum input_tensor_dtype, output_tensor_dtype;
+ vx_int32 roi_type, input_layout, output_layout, interpolation_type;
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &interpolation_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+ data->roiType = static_cast(roi_type);
+ data->inputLayout = static_cast(input_layout);
+ data->outputLayout = static_cast(output_layout);
+ data->interpolationType = static_cast(interpolation_type);
+
+ // Querying for input tensor
+ data->pSrcDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
+ data->pSrcDesc->dataType = getRpptDataType(input_tensor_dtype);
+ data->pSrcDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pSrcDesc, data->inputLayout, data->inputTensorDims);
+
+ // Querying for output tensor
+ data->pDstDesc = new RpptDesc;
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->ouputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+ STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
+ data->pDstDesc->dataType = getRpptDataType(output_tensor_dtype);
+ data->pDstDesc->offsetInBytes = 0;
+ fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims);
+
+ data->pAffine = new vx_float32[AFFINE_MATRIX_SIZE * data->pSrcDesc->n];
+ refreshWarpAffine(node, parameters, num, data);
+ STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+ STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeWarpAffine(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+ WarpAffineLocalData *data;
+ STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+ delete[] data->pAffine;
+ delete data->pSrcDesc;
+ delete data->pDstDesc;
+ STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+ delete data;
+ return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+ vx_context context = vxGetContext((vx_reference)graph);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+ else
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+ return VX_SUCCESS;
+}
+
+vx_status WarpAffine_Register(vx_context context) {
+ vx_status status = VX_SUCCESS;
+ // Add kernel to the context with callbacks
+ vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpAffine",
+ VX_KERNEL_RPP_WARPAFFINE,
+ processWarpAffine,
+ 9,
+ validateWarpAffine,
+ initializeWarpAffine,
+ uninitializeWarpAffine);
+ ERROR_CHECK_OBJECT(kernel);
+ AgoTargetAffinityInfo affinity;
+ vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+ vx_bool enableBufferAccess = vx_true_e;
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+ vx_bool enableBufferAccess = vx_false_e;
+#endif
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+ if (kernel) {
+ STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+ PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+ }
+ if (status != VX_SUCCESS) {
+ exit:
+ vxRemoveKernel(kernel);
+ return VX_FAILURE;
+ }
+
+ return status;
+}
diff --git a/rocAL/rocAL/source/augmentations/node_sequence_rearrange.cpp b/rocAL/rocAL/source/augmentations/node_sequence_rearrange.cpp
index 30ec3b3779..3167514c8f 100644
--- a/rocAL/rocAL/source/augmentations/node_sequence_rearrange.cpp
+++ b/rocAL/rocAL/source/augmentations/node_sequence_rearrange.cpp
@@ -41,7 +41,7 @@ void SequenceRearrangeNode::create_node()
status = vxAddArrayItems(_sequence_array, _new_sequence_length, _new_order.data(), sizeof(vx_uint32));
if(status != VX_SUCCESS)
THROW("Adding array items failed: "+ TOSTR(status))
- _node = vxExtrppNode_SequenceRearrange(_graph->get(), _inputs[0]->handle(), _outputs[0]->handle(), _sequence_array, _new_sequence_length, _sequence_length, _sequence_count);
+ _node = vxExtrppNode_SequenceRearrangebatchPD(_graph->get(), _inputs[0]->handle(), _outputs[0]->handle(), _sequence_array, _new_sequence_length, _sequence_length, _sequence_count);
if((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS)
THROW("Adding the sequence rearrange (vxExtrppNode_SequenceRearrange) node failed: "+ TOSTR(status))
}