-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.cu
299 lines (261 loc) · 12.3 KB
/
main.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
#include <iostream>
#include <string>
#include <sstream>
#include <glad/glad.h>
#include <cuda_gl_interop.h>
#include <GLFW/glfw3.h>
#include "cuda_check_error.h"
#include "timer.h"
#define THREADS_PER_BLOCK 128
int windowWidth = 800, windowHeight = 480;
using namespace std;
//A custom, lightweight CUDA implementation of a 2D vector with floats.
class CUDAVec2{
public:
__device__ CUDAVec2(float x = 0, float y = 0){this->x = x; this->y=y;}
__device__ CUDAVec2(CUDAVec2& v){x = v.x; y=v.y;}
float x, y;
//Get the magnitude of this vector.
__device__ float mag(){return hypotf(x,y);}
//Subtract an amount from this vector.
__device__ void subtract(CUDAVec2 amount){x-=amount.x;y-=amount.y;}
//Subtract an amount from this vector.
__device__ void subtract(float x, float y){this->x-=x;this->y-=y;}
};
/**
* @brief Updates a surface per-pixel, much like a GLSL shader on an OpenGL texture.
* This kernel makes a rotating ring of circles on a colorful background.
*
* @param surface the output/input CUDA surface ID. This is where color data can be read from and written to.
* @param textureWidth the width of the surface in pixels.
* @param textureHeight the height of the surface in pixels.
* @param i the frame index. This is used for animations.
*/
__global__ void update_surface(cudaSurfaceObject_t surface, int textureWidth, int textureHeight, int i)
{
//Get the pixel index
int xPx = threadIdx.x + blockIdx.x * blockDim.x;
int yPx = threadIdx.y + blockIdx.y * blockDim.y;
//Don't do any computation if this thread is outside of the surface bounds.
if(xPx >= textureWidth)
return;
if(yPx >= textureHeight)
return;
//Map the coordinates between 0 and 1, and make them floating-point.
CUDAVec2 normalizedCoordinates((float)xPx / textureWidth, (float)yPx / textureHeight);
//This animation repeats at t=400, i.e. the circles make one full rotation at i=400.
i%=400;
float theta = (float)i/63.661977236758134307553505349006f;
bool inCircle = false; //Used to set the color of the pixel.
//Create a bunch of circles and check if this pixel is inside any one of them.
int numCircles = 10;
//For each circle
for(int i = 0; i < numCircles; i++){
//Each circle is unique by its direction relative to the center of the surface (normalized coordinates (0.5,0.5))
theta+=(float)6.283185307179586476925286766559f/numCircles;
//Calculate the center of the circle.
CUDAVec2 circlePosition(0.5+0.4*cos(theta), 0.5+0.4*sin(theta));
//Get the distance from this pixel to the center of the circle
CUDAVec2 v (normalizedCoordinates);
v.subtract(circlePosition);
float r = v.mag();
//If the distance from any circle's center is less than 0.1, it is in that circle.
if(r<0.1){
inCircle = true;
break;
}
}//End for loop
//Calculate the pretty colors
float red = normalizedCoordinates.x;
float green = 1.f - normalizedCoordinates.y;
float blue = 1.f-normalizedCoordinates.x;
float alpha = 1.f;
//Invert the colors if inside a circle
if(inCircle){
red = 1.f - red;
green = 1.f - green;
blue = 1.f - blue;
}
//Convert each value to an unsigned byte
uchar4 pixel = { (uint8_t)(red*255),
(uint8_t)(green*255),
(uint8_t)(blue*255),
(uint8_t)(alpha*255)};
//Write to the surface.
surf2Dwrite(pixel, surface, xPx * sizeof(uchar4), yPx);
}
static void init_opengl(int w, int h) {
glViewport(0, 0, w, h); // use a screen size of windowWidth x windowHeight
glMatrixMode(GL_PROJECTION); // Make a simple 2D projection on the entire window
glLoadIdentity();
glOrtho(0.0, w, h, 0.0, 0.0, 100.0);
glMatrixMode(GL_MODELVIEW); // Set the matrix mode to object modeling
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glClearDepth(0.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear the window
}
/**
* A helper function for terminating the program
*/
void terminate(int errorCode) {
cout << "Closing application";
//Close GLFW
glfwTerminate();
//Exit
exit(errorCode);
}
static void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) {
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) {
glfwSetWindowShouldClose(window, GLFW_TRUE);
}
}
static void resize_callback(GLFWwindow* window, int new_width, int new_height) {
glViewport(0, 0, windowWidth = new_width, windowHeight = new_height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0.0, windowWidth, windowHeight, 0.0, 0.0, 100.0);
glMatrixMode(GL_MODELVIEW);
}
static void error_callback(int error, const char* description) {
fprintf(stderr, "Error: %s\n", description);
}
int main(int argc, char **argv)
{
// Initialize GLFW, and GLAD, in exactly the same way as project 4.
GLFWwindow* window;
glfwSetErrorCallback(error_callback);
if (!glfwInit()) {
exit(EXIT_FAILURE);
}
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
window = glfwCreateWindow(windowWidth, windowHeight, "Simple CUDA + GLFW example", NULL, NULL);
if (!window) {
glfwTerminate();
exit(EXIT_FAILURE);
}
glfwSetKeyCallback(window, key_callback);
glfwSetWindowSizeCallback(window, resize_callback);
glfwMakeContextCurrent(window);
glfwSwapInterval(1);
if (!gladLoadGL()) {
cerr << "GLAD failed to initialize :(";
terminate(1);
}
init_opengl(windowWidth, windowHeight);
// End GLAD and GLFW setup
// Create the OpenGL texture that will be displayed with GLAD and GLFW
GLuint textureID;
glGenTextures(1, &textureID);
// Bind to our texture handle
glBindTexture(GL_TEXTURE_2D, textureID);
// Set texture interpolation methods for minification and magnification
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// Set texture clamping method
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
// Create the texture and its attributes
int textureWidth = windowWidth, textureHeight = windowHeight;
unsigned char* data = new unsigned char[textureWidth*textureHeight*4];
glTexImage2D(GL_TEXTURE_2D, // Type of texture
0, // Pyramid level (for mip-mapping) - 0 is the top level
GL_RGBA, // Internal colour format to convert to
windowWidth, // Image width i.e. 640 for Kinect in standard mode
windowHeight, // Image height i.e. 480 for Kinect in standard mode
0, // Border width in pixels (can either be 1 or 0)
GL_RGBA, // Input image format (i.e. GL_RGB, GL_RGBA, GL_BGR etc.)
GL_UNSIGNED_BYTE, // Image data type.
data); // The actual image data itself
//Note that the type of this texture is an RGBA UNSIGNED_BYTE type. When CUDA surfaces
//are synchronized with OpenGL textures, the surfaces will be of the same type.
//They won't know or care about their data types though, for they are all just byte arrays
//at heart. So be careful to ensure that any CUDA kernel that handles a CUDA surface
//uses it as an appropriate type. You will see that the update_surface kernel (defined
//above) treats each pixel as four unsigned bytes along the X-axis: one for red, green, blue,
//and alpha respectively.
//Create the CUDA array and texture reference
cudaArray *bitmap_d;
cudaGraphicsResource *cudaTextureID;
//Register the GL texture with the CUDA graphics library. A new cudaGraphicsResource is created, and its address is placed in cudaTextureID.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__OPENGL.html#group__CUDART__OPENGL_1g80d12187ae7590807c7676697d9fe03d
cudaGraphicsGLRegisterImage(&cudaTextureID, textureID, GL_TEXTURE_2D,
cudaGraphicsRegisterFlagsNone);
cudaCheckError();
//Map graphics resources for access by CUDA.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP_1gad8fbe74d02adefb8e7efb4971ee6322
cudaGraphicsMapResources(1, &cudaTextureID, 0);
cudaCheckError();
//Get the location of the array of pixels that was mapped by the previous function and place that address in bitmap_d
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP_1g0dd6b5f024dfdcff5c28a08ef9958031
cudaGraphicsSubResourceGetMappedArray(&bitmap_d, cudaTextureID, 0, 0);
cudaCheckError();
//Create a CUDA resource descriptor. This is used to get and set attributes of CUDA resources.
//This one will tell CUDA how we want the bitmap_surface to be configured.
//Documentation for the struct: https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaResourceDesc.html#structcudaResourceDesc
struct cudaResourceDesc resDesc;
//Clear it with 0s so that some flags aren't arbitrarily left at 1s
memset(&resDesc, 0, sizeof(resDesc));
//Set the resource type to be an array for convenient processing in the CUDA kernel.
//List of resTypes: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g067b774c0e639817a00a972c8e2c203c
resDesc.resType = cudaResourceTypeArray;
//Bind the new descriptor with the bitmap created earlier.
resDesc.res.array.array = bitmap_d;
//Create a new CUDA surface ID reference.
//This is really just an unsigned long long.
//Docuentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1gbe57cf2ccbe7f9d696f18808dd634c0a
cudaSurfaceObject_t bitmap_surface = 0;
//Create the surface with the given description. That surface ID is placed in bitmap_surface.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__SURFACE__OBJECT.html#group__CUDART__SURFACE__OBJECT_1g958899474ab2c5f40d233b524d6c5a01
cudaCreateSurfaceObject(&bitmap_surface, &resDesc);
cudaCheckError();
//Set the number of blocks to call the kernel with.
dim3 blocks((unsigned int)ceil((float)textureWidth / THREADS_PER_BLOCK), textureHeight);
//Frame counter
int i = 0;
while (!glfwWindowShouldClose(window)) {
//Start the timer
Timer stopwatch;
//Update the texture with the CUDA kernel
update_surface<<<blocks, THREADS_PER_BLOCK>>>(bitmap_surface, textureWidth, textureHeight, i++);
cudaCheckError();
//Synchronize the CUDA surface with the OpenGL texture, so the texture has the same data as the newly written surface.
cudaDeviceSynchronize();
cudaCheckError();
//Print elapsed time occasionally to the title
if(i%10==0){
float fps = 1.f/stopwatch.getElapsedSeconds();
ostringstream myString;
myString << "Simple CUDA + GLFW example (";
myString.precision(2);
myString << std::fixed << fps;
myString << " FPS)";
glfwSetWindowTitle(window, myString.str().c_str());
}
//Render the results on the screen buffer
//Clear color and depth buffers
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
//Begin drawing
glMatrixMode(GL_MODELVIEW); //Operate on model-view matrix
//Enable the use of 2D textures
glEnable(GL_TEXTURE_2D);
//Bind to our texture handle
glBindTexture(GL_TEXTURE_2D, textureID);
//Draw a quad with the synchronized texture
glBegin(GL_QUADS);
glTexCoord2i(0, 0); glVertex2i(0, 0);
glTexCoord2i(0, 1); glVertex2i(0, windowHeight);
glTexCoord2i(1, 1); glVertex2i(windowWidth, windowHeight);
glTexCoord2i(1, 0); glVertex2i(windowWidth, 0);
glEnd();
//Disable the use of 2D textures.
glDisable(GL_TEXTURE_2D);
//Blit rendered contents on the screen and poll events.
glfwSwapBuffers(window);
glfwPollEvents();
}
glfwDestroyWindow(window);
glfwTerminate();
exit(EXIT_SUCCESS);
return 0;
}