Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OGL interop #108

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Orochi/Orochi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,8 @@ namespace CU4ORO
#define cudaGraphicsSubResourceGetMappedArray cudaGraphicsSubResourceGetMappedArray_oro
#define cudaGraphicsUnmapResources cudaGraphicsUnmapResources_oro
#define cudaGraphicsUnregisterResource cudaGraphicsUnregisterResource_oro
#define cudaGraphicsGLRegisterImage cudaGraphicsGLRegisterImage_oro
#define cudaGraphicsGLRegisterBuffer cudaGraphicsGLRegisterBuffer_oro
#define cudaHostAlloc cudaHostAlloc_oro
#define cudaHostGetDevicePointer cudaHostGetDevicePointer_oro
#define cudaHostGetFlags cudaHostGetFlags_oro
Expand Down Expand Up @@ -1850,6 +1852,19 @@ oroError_t OROAPI oroGraphicsUnregisterResource(oroGraphicsResource_t resource)
hipGraphicsUnregisterResource(resource) );
return oroErrorUnknown;
}
oroError_t OROAPI oroGraphicsGLRegisterBuffer(oroGraphicsResource** resource, GLuint buffer, unsigned int flags)
{
__ORO_FUNC( CU4ORO::hipGraphicsGLRegisterBuffer_cu4oro( __ORO_FORCE_CAST( CU4ORO::hipGraphicsResource**, resource ), buffer, flags ),
hipGraphicsGLRegisterBuffer( resource, buffer, flags ) );
return oroErrorUnknown;
}
oroError_t OROAPI oroGraphicsGLRegisterImage(oroGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags)
{
__ORO_FUNC( CU4ORO::hipGraphicsGLRegisterImage_cu4oro( __ORO_FORCE_CAST( CU4ORO::hipGraphicsResource**, resource ), image, target, flags ),
hipGraphicsGLRegisterImage( resource, image, target, flags ) );
return oroErrorUnknown;
}

oroError_t OROAPI oroHostAlloc(void ** ptr, size_t size, unsigned int flags)
{
__ORO_FUNC(
Expand Down
2 changes: 2 additions & 0 deletions Orochi/Orochi.h
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,8 @@ oroError_t OROAPI oroGraphicsResourceGetMappedPointer(void ** devPtr, size_t * s
oroError_t OROAPI oroGraphicsSubResourceGetMappedArray(oroArray_t * array, oroGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel);
oroError_t OROAPI oroGraphicsUnmapResources(int count, oroGraphicsResource_t * resources, oroStream_t stream);
oroError_t OROAPI oroGraphicsUnregisterResource(oroGraphicsResource_t resource);
oroError_t OROAPI oroGraphicsGLRegisterBuffer( oroGraphicsResource** resource, GLuint buffer, unsigned int flags );
oroError_t OROAPI oroGraphicsGLRegisterImage( oroGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags );
oroError_t OROAPI oroHostAlloc(void ** ptr, size_t size, unsigned int flags);
oroError_t OROAPI oroHostFree(void * ptr);
oroError_t OROAPI oroHostGetDevicePointer(void ** devPtr, void * hstPtr, unsigned int flags);
Expand Down
8 changes: 8 additions & 0 deletions Orochi/nvidia_hip_runtime_api_oro.h
Original file line number Diff line number Diff line change
Expand Up @@ -3306,6 +3306,14 @@ inline static hipError_t hipGraphicsUnregisterResource_cu4oro(hipGraphicsResourc
return hipCUDAErrorTohipError(cudaGraphicsUnregisterResource(resource));
}

inline static hipError_t hipGraphicsGLRegisterImage_cu4oro( hipGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags ) {
return hipCUDAErrorTohipError(cudaGraphicsGLRegisterImage(resource,image,target,flags));
}

inline static hipError_t hipGraphicsGLRegisterBuffer_cu4oro( hipGraphicsResource** resource, GLuint buffer, unsigned int flags ) {
return hipCUDAErrorTohipError(cudaGraphicsGLRegisterBuffer(resource,buffer,flags));
}

#if CUDA_VERSION >= CUDA_11020
// ========================== HIP Stream Ordered Memory Allocator =================================
inline static hipError_t hipDeviceGetDefaultMemPool_cu4oro(hipMemPool_t* mem_pool, int device) {
Expand Down
152 changes: 152 additions & 0 deletions Test/OpenGL/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#define GLEW_STATIC
#include <contrib/glew/include/glew/glew.h>
#include <contrib/glfw/include/GLFW/glfw3.h>

#include <Orochi/Orochi.h>
#include <iostream>
#include <assert.h>

inline void checkError( const oroError result )
{
if( result != oroSuccess )
{
const char* errorNamePtr = oroGetErrorName( result );
const std::string_view errorName{ ( errorNamePtr != nullptr ) ? errorNamePtr : "NoErrorName" };
std::cerr << "oroError '" << errorName << "': ";
const char* errorDescriptionPtr = nullptr;
oroGetErrorString( result, &errorDescriptionPtr );
const std::string_view errorDescription{ ( errorDescriptionPtr != nullptr ) ? errorDescriptionPtr : "No description." };
std::cerr << errorDescription << '\n' << std::flush;
std::abort();
}
}

inline void checkError( const orortcResult result )
{
if( result != ORORTC_SUCCESS )
{
const std::string_view errorDescription = orortcGetErrorString( result );
std::cerr << "orortcError: " << errorDescription << '\n' << std::flush;
std::abort();
}
}

#define ERROR_CHECK( e ) checkError( e )

int main( int argc, char** argv )
{
GLFWwindow* window;
if( !glfwInit() ) return 0;

glfwWindowHint( GLFW_RED_BITS, 32 );
glfwWindowHint( GLFW_GREEN_BITS, 32 );
glfwWindowHint( GLFW_BLUE_BITS, 32 );

glfwWindowHint( GLFW_RESIZABLE, GL_FALSE );

window = glfwCreateWindow( 1280, 720, "orochiOglInterop", NULL, NULL );
if( !window )
{
glfwTerminate();
return 0;
}
glfwMakeContextCurrent( window );

if( glewInit() != GLEW_OK )
{
glfwTerminate();
return 0;
}

oroDevice m_device = 0;
oroCtx m_ctx = nullptr;
oroStream m_stream = nullptr;

{
const int deviceIndex = 0;
oroApi api = (oroApi)( ORO_API_CUDA | ORO_API_HIP );
int a = oroInitialize( api, 0 );
assert( a == 0 );

ERROR_CHECK( oroInit( 0 ) );
ERROR_CHECK( oroDeviceGet( &m_device, deviceIndex ) );
ERROR_CHECK( oroCtxCreate( &m_ctx, 0, m_device ) );
ERROR_CHECK( oroCtxSetCurrent( m_ctx ) );
ERROR_CHECK( oroStreamCreate( &m_stream ) );
}

{
const uint32_t imageSize = 64u;
GLuint texture = 0;
oroGraphicsResource_t oroResource = nullptr;
oroArray_t interopArray = nullptr;
oroTextureObject_t interopTexture = nullptr;

// Work around for AMD driver crash when calling `oroGLRegister*` functions
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;
if( isAmd )
{
uint32_t deviceCount = 16;
int glDevices[16];
ERROR_CHECK( hipGLGetDevices( &deviceCount, glDevices, deviceCount, hipGLDeviceListAll ) );
}

// Create texture object
glGenTextures( 1, &texture );
glBindTexture( GL_TEXTURE_2D, texture );
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, imageSize, imageSize, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glBindTexture( GL_TEXTURE_2D, 0 );

// Register framebuffer attachment that will be passed to Oro
{
ERROR_CHECK( oroGraphicsGLRegisterImage( &oroResource, texture, GL_TEXTURE_2D, oroGraphicsRegisterFlagsReadOnly ) );
oroDeviceSynchronize();
ERROR_CHECK( oroGetLastError() );
// Map buffer objects to get device pointers
ERROR_CHECK( oroGraphicsMapResources( 1, &oroResource, 0 ) );
oroDeviceSynchronize();
ERROR_CHECK( oroGetLastError() );
ERROR_CHECK( oroGraphicsSubResourceGetMappedArray( &interopArray, oroResource, 0, 0 ) );
oroDeviceSynchronize();
ERROR_CHECK( oroGetLastError() );

// Create texture interop object to be passed to kernel
{
oroChannelFormatDesc desc;
ERROR_CHECK( oroGetChannelDesc( &desc, interopArray ) );
oroDeviceSynchronize();
ERROR_CHECK( oroGetLastError() );

oroResourceDesc texRes;
memset( &texRes, 0, sizeof( oroResourceDesc ) );

texRes.resType = oroResourceTypeArray;
texRes.res.array.array = interopArray;

oroTextureDesc texDescr;
memset( &texDescr, 0, sizeof( oroTextureDesc ) );

texDescr.normalizedCoords = false;
texDescr.filterMode = oroFilterModePoint;
texDescr.addressMode[0] = oroAddressModeWrap;
texDescr.readMode = oroReadModeElementType;

ERROR_CHECK( oroCreateTextureObject( &interopTexture, &texRes, &texDescr, NULL ) );
}
}

ERROR_CHECK( oroGraphicsUnmapResources( 1, &oroResource, 0 ) );
ERROR_CHECK( oroDestroyTextureObject( interopTexture ) );
ERROR_CHECK( oroGraphicsUnregisterResource( oroResource ) );

glDeleteTextures( 1, &texture );
}

ERROR_CHECK( oroStreamDestroy( m_stream ) );
ERROR_CHECK( oroCtxDestroy( m_ctx ) );


std::cout << "Success!\n";
return 0;
}

17 changes: 17 additions & 0 deletions Test/OpenGL/premake5.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
project "OpenGL"
kind "ConsoleApp"

targetdir "../../dist/bin/%{cfg.buildcfg}"
location "../../build/"

if os.istarget("windows") then
links{ "version" }
libdirs{ "../../contrib/glew", "../../contrib/glfw/" }
links{ "glew32s", "glfw3", "opengl32" }
end

includedirs { "../../" }
files { "../../Orochi/**.h", "../../Orochi/**.cpp" }
files { "../../contrib/**.h", "../../contrib/**.cpp" }
files { "../../contrib/**.h", "../../contrib/**.cpp" }
files { "*.cpp" }
4 changes: 2 additions & 2 deletions Test/Texture/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#include <Orochi/Orochi.h>
#include <Test/Common.h>
#include <iostream>
#include "../../UnitTest/contrib/stb/stb_image_write.h"
#include "../../UnitTest/contrib/stb/stb_image.h"
#include <contrib/stb/stb_image_write.h>
#include <contrib/stb/stb_image.h>
#include "../../UnitTest/demoErrorCodes.h"


Expand Down
80 changes: 80 additions & 0 deletions UnitTest/basicTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
#include "basicTests.h"
#include "common.h"

#if defined(_WIN32)
#define GLEW_STATIC
#include "contrib/glew/include/glew/glew.h"
#include "contrib/glfw/include/GLFW/glfw3.h"
#endif

TEST_F( OroTestBase, init )
{

Expand Down Expand Up @@ -970,6 +976,80 @@ TEST_F( OroTestBase, ManagedMemory )
}
o.unloadKernelCache();
}
#if defined( _WIN32 )
#if 0 // fails on headless execution
TEST_F( OroTestBase, glRegisterBuffer )
{
ASSERT_EQ( glfwInit(), GLFW_TRUE );
GLFWwindow* window = glfwCreateWindow( 1, 1, "glRegisterBuffer", nullptr, nullptr );
glfwMakeContextCurrent( window );
ASSERT_EQ( glewInit(), GLEW_OK );

const uint32_t dataSize = 1024u;
GLuint buf = 0;
oroGraphicsResource* oroResource = nullptr;

// Work around for AMD driver crash when calling `oroGLRegister*` functions
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;
if( isAmd )
{
uint32_t deviceCount = 16;
int glDevices[16];
OROCHECK( hipGLGetDevices( &deviceCount, glDevices, deviceCount, hipGLDeviceListAll ) );
}

// Create buffer object
glGenBuffers( 1, &buf );
glBindBuffer( GL_ARRAY_BUFFER, buf );
glBufferData( GL_ARRAY_BUFFER, dataSize, NULL, GL_DYNAMIC_DRAW );
glBindBuffer( GL_ARRAY_BUFFER, 0 );

OROCHECK( oroGraphicsGLRegisterBuffer( &oroResource, buf, oroGraphicsRegisterFlagsNone ) );
OROCHECK( oroGraphicsUnregisterResource( oroResource ) );

glDeleteBuffers( 1, &buf );
buf = 0;

glfwDestroyWindow( window );
window = nullptr;
glfwTerminate();
}

TEST_F( OroTestBase, glRegisterImage )
{
ASSERT_EQ( glfwInit(), GLFW_TRUE );
GLFWwindow* window = glfwCreateWindow( 1, 1, "glRegisterImage", nullptr, nullptr );
glfwMakeContextCurrent( window );
ASSERT_EQ( glewInit(), GLEW_OK );

const uint32_t imageSize = 64u;
GLuint texture = 0;
oroGraphicsResource* oroResource = nullptr;

// Work around for AMD driver crash when calling `oroGLRegister*` functions
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;
if( isAmd )
{
uint32_t deviceCount = 16;
int glDevices[16];
OROCHECK( hipGLGetDevices( &deviceCount, glDevices, deviceCount, hipGLDeviceListAll ) );
}

// Create texture object
glGenTextures( 1, &texture );
glBindTexture( GL_TEXTURE_2D, texture );
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, imageSize, imageSize, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glBindTexture( GL_TEXTURE_2D, 0 );

OROCHECK( oroGraphicsGLRegisterImage( &oroResource, texture, GL_TEXTURE_2D, oroGraphicsRegisterFlagsNone ) );
OROCHECK( oroGraphicsUnregisterResource( oroResource ) );

glDeleteTextures( 1, &texture );

glfwDestroyWindow( window );
window = nullptr;
glfwTerminate();
}
#endif
#endif

8 changes: 5 additions & 3 deletions UnitTest/premake5.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ project "Unittest"
location "../build/"

if os.istarget("windows") then
libdirs{ "../contrib/glew", "../contrib/glfw/" }
links{ "glew32s", "glfw3", "opengl32" }
links{ "version" }
end
if os.istarget("linux") then
Expand All @@ -15,10 +17,10 @@ project "Unittest"
files { "*.cpp", "*.h" }
removefiles { "moduleTestFunc.cpp", "moduleTestKernel.cpp" }
files { "../contrib/**.h", "../contrib/**.cpp" }
files { "../UnitTest/contrib/**.h", "../UnitTest/contrib/**.cpp" }
files { "../contrib/**.h", "../contrib/**.cpp" }

files { "../UnitTest/contrib/gtest-1.6.0/gtest-all.cc" }
externalincludedirs{ "../UnitTest/contrib/gtest-1.6.0/" }
files { "../contrib/gtest-1.6.0/gtest-all.cc" }
externalincludedirs{ "../contrib/gtest-1.6.0/" }
defines { "GTEST_HAS_TR1_TUPLE=0" }
if _OPTIONS["kernelcompile"] then
os.execute( "cd ./bitcodes/ && generate_bitcodes.bat" )
Expand Down
8 changes: 7 additions & 1 deletion contrib/cuew/include/cuew.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ extern "C" {



typedef unsigned int GLuint;
typedef unsigned int GLenum;



Expand Down Expand Up @@ -762,6 +764,8 @@ typedef cudaError_t CUDAAPI tcudaUserObjectCreate(cudaUserObject_t * object_out,
typedef cudaError_t CUDAAPI tcudaUserObjectRelease(cudaUserObject_t object, unsigned int count);
typedef cudaError_t CUDAAPI tcudaUserObjectRetain(cudaUserObject_t object, unsigned int count);
typedef cudaError_t CUDAAPI tcudaWaitExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t * extSemArray, const struct cudaExternalSemaphoreWaitParams * paramsArray, unsigned int numExtSems, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaGraphicsGLRegisterImage( struct cudaGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags );
typedef cudaError_t CUDAAPI tcudaGraphicsGLRegisterBuffer( struct cudaGraphicsResource** resource, GLuint buffer, unsigned int flags );
typedef nvrtcResult CUDAAPI tnvrtcAddNameExpression(nvrtcProgram prog, const char *const name_expression);
typedef nvrtcResult CUDAAPI tnvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char *const * options);
typedef nvrtcResult CUDAAPI tnvrtcCreateProgram(nvrtcProgram * prog, const char * src, const char * name, int numHeaders, const char *const * headers, const char *const * includeNames);
Expand Down Expand Up @@ -1470,7 +1474,9 @@ extern tcudaUserObjectCreate *cudaUserObjectCreate_oro;
extern tcudaUserObjectRelease *cudaUserObjectRelease_oro;
extern tcudaUserObjectRetain *cudaUserObjectRetain_oro;
extern tcudaWaitExternalSemaphoresAsync_v2 *cudaWaitExternalSemaphoresAsync_v2_oro;
extern tnvrtcAddNameExpression *nvrtcAddNameExpression_oro;
extern tcudaGraphicsGLRegisterImage* cudaGraphicsGLRegisterImage_oro;
extern tcudaGraphicsGLRegisterBuffer* cudaGraphicsGLRegisterBuffer_oro;
extern tnvrtcAddNameExpression* nvrtcAddNameExpression_oro;
extern tnvrtcCompileProgram *nvrtcCompileProgram_oro;
extern tnvrtcCreateProgram *nvrtcCreateProgram_oro;
extern tnvrtcDestroyProgram *nvrtcDestroyProgram_oro;
Expand Down
Loading
Loading