Skip to content

Commit

Permalink
ocl: implement alpha blending
Browse files Browse the repository at this point in the history
 * import cl image from VASurfaceID in ocl vpp base class
 * add opencl kernel for alpha blending
  • Loading branch information
dspmeng committed Jan 13, 2016
1 parent e42f012 commit 37defed
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 7 deletions.
13 changes: 11 additions & 2 deletions examples/blend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@
#include <va/va.h>
#include <va/va_x11.h>

struct timespec start, end;
#define PERF_START(block) clock_gettime(CLOCK_REALTIME, &start);
#define PERF_STOP(block) clock_gettime(CLOCK_REALTIME, &end); \
INFO(#block " used %f ms\n", \
(end.tv_sec - start.tv_sec) * 1000 \
+ (end.tv_nsec - start.tv_nsec) / 1E6);

using namespace YamiMediaCodec;
using namespace std;

Expand Down Expand Up @@ -103,6 +110,7 @@ class Blend
memset(&m_dest->crop, 0, sizeof(VideoRect));
m_scaler->process(frame, m_dest);

PERF_START(blend);
//blend it
for (int i = 0; i < m_blendSurfaces.size(); i++) {
m_bumpBoxes[i]->getPos(m_dest->crop.x, m_dest->crop.y, m_dest->crop.width, m_dest->crop.height);
Expand All @@ -111,6 +119,7 @@ class Blend
SharedPtr<VideoFrame>& src = m_blendSurfaces[i];
m_blender->process(src, m_dest);
}
PERF_STOP(blend);

//display it on screen
memcpy(&m_dest->crop, &frame->crop, sizeof(VideoRect));
Expand All @@ -121,7 +130,6 @@ class Blend
ERROR("vaPutSurface return %d", status);
break;
}

}
return true;
}
Expand Down Expand Up @@ -244,7 +252,8 @@ class Blend

bool createDestSurface(uint32_t targetWidth, uint32_t targetHeight)
{
m_dest = createSurface(VA_RT_FORMAT_YUV420, VA_FOURCC_NV12, targetWidth, targetHeight );
m_dest = createSurface(VA_RT_FORMAT_YUV420, VA_FOURCC_NV12, targetWidth, targetHeight);
m_dest->fourcc = YAMI_FOURCC_NV12;
return m_dest;
}

Expand Down
72 changes: 72 additions & 0 deletions ocl/kernels/blend.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* blend.cl - alpha blending opencl kernel
*
* Copyright (C) 2015 Intel Corporation
* Author: Jia Meng<[email protected]>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301 USA
*/

__kernel void blend(__write_only image2d_t dst_y,
__write_only image2d_t dst_uv,
__read_only image2d_t bg_y,
__read_only image2d_t bg_uv,
__read_only image2d_t fg,
uint crop_x, uint crop_y, uint crop_w, uint crop_h)
{
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
int i;
int id_x = get_global_id(0);
int id_y = get_global_id(1) * 2;
int id_z = id_x;
int id_w = id_y;

float4 y1, y2;
float4 y1_dst, y2_dst, y_dst;
float4 uv, uv_dst;
float4 rgba[4];

id_x += crop_x;
id_y += crop_y;
y1 = read_imagef(bg_y, sampler, (int2)(id_x, id_y));
y2 = read_imagef(bg_y, sampler, (int2)(id_x, id_y + 1));
uv = read_imagef(bg_uv, sampler, (int2)(id_x, id_y / 2));

rgba[0] = read_imagef(fg, sampler, (int2)(id_z, id_w));
rgba[1] = read_imagef(fg, sampler, (int2)(id_z + 1, id_w + 1));
rgba[2] = read_imagef(fg, sampler, (int2)(id_z , id_w));
rgba[3] = read_imagef(fg, sampler, (int2)(id_z + 1, id_w + 1));

y_dst = 0.299 * (float4) (rgba[0].x, rgba[1].x, rgba[2].x, rgba[3].x);
y_dst = mad(0.587, (float4) (rgba[0].y, rgba[1].y, rgba[2].y, rgba[3].y), y_dst);
y_dst = mad(0.114, (float4) (rgba[0].z, rgba[1].z, rgba[2].z, rgba[3].z), y_dst);
y_dst *= (float4) (rgba[0].w, rgba[1].w, rgba[2].w, rgba[3].w);
y1_dst.x = mad(1 - rgba[0].w, y1.x, y_dst.x);
y1_dst.y = mad(1 - rgba[1].w, y1.y, y_dst.y);
y2_dst.x = mad(1 - rgba[2].w, y2.x, y_dst.z);
y2_dst.y = mad(1 - rgba[3].w, y2.y, y_dst.w);

uv_dst.x = rgba[0].w * (-0.14713 * rgba[0].x - 0.28886 * rgba[0].y + 0.43600 * rgba[0].z + 0.5);
uv_dst.y = rgba[0].w * ( 0.61500 * rgba[0].x - 0.51499 * rgba[0].y - 0.10001 * rgba[0].z + 0.5);
uv_dst.x = mad(1 - rgba[0].w, uv.x, uv_dst.x);
uv_dst.y = mad(1 - rgba[0].w, uv.y, uv_dst.y);

if (id_z <= crop_w && id_w <= crop_h) {
write_imagef(dst_y, (int2)(id_x, id_y), y1_dst);
write_imagef(dst_y, (int2)(id_x, id_y + 1), y2_dst);
write_imagef(dst_uv, (int2)(id_x, id_y / 2), uv_dst);
}
}
36 changes: 35 additions & 1 deletion ocl/oclcontext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@ class OclDevice {
public:
static SharedPtr<OclDevice> getInstance();
bool createKernel(cl_context context, const char* name, cl_kernel& kernel);
YamiStatus createImageFromFdIntel(cl_context context, const cl_import_image_info_intel* info, cl_mem* mem);

private:
typedef cl_mem (*OclCreateImageFromFdIntel)(cl_context, const cl_import_image_info_intel*, cl_int*);

OclDevice();
bool init();
bool loadKernel_l(cl_context context, const char* name, cl_kernel& kernel);
Expand All @@ -54,6 +57,7 @@ class OclDevice {
//all operations need procted by m_lock
cl_platform_id m_platform;
cl_device_id m_device;
OclCreateImageFromFdIntel m_oclCreateImageFromFdIntel;
friend OclContext;

DISALLOW_COPY_AND_ASSIGN(OclDevice)
Expand Down Expand Up @@ -105,8 +109,14 @@ bool OclContext::createKernel(const char* name, cl_kernel& kernel)
return m_device->createKernel(m_context, name, kernel);
}

YamiStatus
OclContext::createImageFromFdIntel(const cl_import_image_info_intel* info, cl_mem* mem)
{
return m_device->createImageFromFdIntel(m_context, info, mem);
}

OclDevice::OclDevice()
:m_platform(0), m_device(0)
:m_platform(0), m_device(0), m_oclCreateImageFromFdIntel(0)
{
}

Expand All @@ -133,6 +143,18 @@ bool OclDevice::init()
status = clGetDeviceIDs(m_platform, CL_DEVICE_TYPE_GPU, 1, &m_device, NULL);
if (!checkOclStatus(status, "clGetDeviceIDs"))
return false;

#ifdef CL_VERSION_1_2
m_oclCreateImageFromFdIntel = (OclCreateImageFromFdIntel)
clGetExtensionFunctionAddressForPlatform(m_platform, "clCreateImageFromFdINTEL");
#else
m_oclCreateImageFromFdIntel = (OclCreateImageFromFdIntel)
clGetExtensionFunctionAddress("clCreateImageFromFdINTEL");
#endif
if (!m_oclCreateImageFromFdIntel) {
ERROR("failed to get extension function createImageFromFdIntel");
return false;
}
return true;
}

Expand Down Expand Up @@ -196,6 +218,18 @@ bool OclDevice::createKernel(cl_context context, const char* name, cl_kernel& ke
return loadKernel_l(context, name, kernel);
}


YamiStatus OclDevice::createImageFromFdIntel(cl_context context, const cl_import_image_info_intel* info, cl_mem* mem)
{
cl_int status;
*mem = m_oclCreateImageFromFdIntel(context, info, &status);
if (checkOclStatus(status, "clCreateImageFromFdINTEL")) {
return YAMI_SUCCESS;
} else {
return YAMI_FAIL;
}
}

bool checkOclStatus(cl_int status, const char* msg)
{
/* todo add more description error here*/
Expand Down
3 changes: 2 additions & 1 deletion ocl/oclcontext.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class OclContext
public:
static SharedPtr<OclContext> create();
bool createKernel(const char* name, cl_kernel& kernel);
YamiStatus createImageFromFdIntel(const cl_import_image_info_intel* info, cl_mem* mem);
~OclContext();

cl_context m_context;
Expand All @@ -52,4 +53,4 @@ bool checkOclStatus(cl_int status, const char* err);

}

#endif //oclcontext_h
#endif //oclcontext_h
142 changes: 142 additions & 0 deletions vpp/oclpostprocess_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include "oclpostprocess_base.h"
#include "common/log.h"
#include "ocl/oclcontext.h"
#include "vaapi/vaapiutils.h"
#include <va/va_drmcommon.h>

namespace YamiMediaCodec{

Expand Down Expand Up @@ -56,12 +58,152 @@ YamiStatus OclPostProcessBase::ensureContext(const char* kernalName)
return YAMI_SUCCESS;
}

SharedPtr<OclPostProcessBase::OclImage>
OclPostProcessBase::createCLImage(const SharedPtr<VideoFrame>& frame,
const cl_image_format& fmt)
{
SharedPtr<OclImage> clImage(new OclImage(m_display));
VASurfaceID surfaceId = (VASurfaceID)frame->surface;
VABufferInfo bufferInfo;
cl_import_image_info_intel importInfo;
uint32_t height[3], i;

VAImage image;
if (!checkVaapiStatus(vaDeriveImage(m_display, surfaceId, &image), "DeriveImage")) {
clImage.reset();
goto done;
}

clImage->m_imageId = image.image_id;
clImage->m_bufId = image.buf;
bufferInfo.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;
if (!checkVaapiStatus(vaAcquireBufferHandle(m_display, image.buf, &bufferInfo),
"AcquireBufferHandle")) {
clImage.reset();
goto done;
}

switch (image.format.fourcc) {
case VA_FOURCC_RGBA:
height[0] = image.height;
break;
case VA_FOURCC_NV12:
height[0] = image.height;
height[1] = image.height / 2;
break;
default:
ERROR("unsupported format");
clImage.reset();
goto done;
}
clImage->m_format = image.format.fourcc;

clImage->m_numPlanes = image.num_planes;
for (i = 0; i < image.num_planes; i++) {
importInfo.fd = bufferInfo.handle;
importInfo.type = CL_MEM_OBJECT_IMAGE2D;
importInfo.fmt.image_channel_order = fmt.image_channel_order;
importInfo.fmt.image_channel_data_type = fmt.image_channel_data_type;
importInfo.row_pitch = image.pitches[i];
importInfo.offset = image.offsets[i];
importInfo.width = image.width;
importInfo.height = height[i];
importInfo.size = importInfo.row_pitch * importInfo.height;
if (YAMI_SUCCESS != m_context->createImageFromFdIntel(&importInfo, &clImage->m_mem[i])) {
clImage.reset();
goto done;
}
}

done:
return clImage;
}

uint32_t OclPostProcessBase::getPixelSize(const cl_image_format& fmt)
{
uint32_t size = 0;

switch (fmt.image_channel_order) {
case CL_R:
case CL_A:
case CL_Rx:
size = 1;
break;
case CL_RG:
case CL_RA:
case CL_RGx:
size = 2;
break;
case CL_RGB:
case CL_RGBx:
size = 3;
break;
case CL_RGBA:
case CL_BGRA:
case CL_ARGB:
size = 4;
break;
case CL_INTENSITY:
case CL_LUMINANCE:
size = 1;
break;
default:
ERROR("invalid image channel order: %u", fmt.image_channel_order);
return 0;
}

switch (fmt.image_channel_data_type) {
case CL_UNORM_INT8:
case CL_SNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
size *= 1;
break;
case CL_SNORM_INT16:
case CL_UNORM_INT16:
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
case CL_HALF_FLOAT:
size *= 2;
break;
case CL_UNORM_INT24:
size *= 3;
break;
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
case CL_UNORM_INT_101010:
case CL_FLOAT:
size *= 4;
break;
default:
ERROR("invalid image channel data type: %d", fmt.image_channel_data_type);
return 0;
}

return size;
}

OclPostProcessBase::~OclPostProcessBase()
{
if (m_kernel) {
checkOclStatus(clReleaseKernel(m_kernel), "ReleaseKernel");
}
}

OclPostProcessBase::OclImage::OclImage(VADisplay d)
: m_numPlanes(0), m_display(d), m_imageId(VA_INVALID_ID), m_bufId(VA_INVALID_ID)
{
memset(m_mem, 0, sizeof(m_mem));
}

OclPostProcessBase::OclImage::~OclImage()
{
for (int i = 0; i < m_numPlanes; i++)
checkOclStatus(clReleaseMemObject(m_mem[i]), "ReleaseMemObject");

checkVaapiStatus(vaReleaseBufferHandle(m_display, m_bufId), "ReleaseBufferHandle");
checkVaapiStatus(vaDestroyImage(m_display, m_imageId), "DestroyImage");
}
}
20 changes: 20 additions & 0 deletions vpp/oclpostprocess_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,27 @@ class OclPostProcessBase : public IVideoPostProcess {
const SharedPtr<VideoFrame>& dest) = 0;
OclPostProcessBase();
virtual ~OclPostProcessBase();

protected:
struct OclImage {
friend class OclPostProcessBase;
OclImage(VADisplay d);
~OclImage();

cl_mem m_mem[3];
unsigned int m_numPlanes;
unsigned int m_format;

private:
VADisplay m_display;
VAImageID m_imageId;
VABufferID m_bufId;
};

SharedPtr<OclImage> createCLImage(const SharedPtr<VideoFrame>& frame,
const cl_image_format& fmt);
uint32_t getPixelSize(const cl_image_format& fmt);

VADisplay m_display;
cl_kernel m_kernel;
SharedPtr<OclContext> m_context;
Expand Down
Loading

0 comments on commit 37defed

Please sign in to comment.