-
Notifications
You must be signed in to change notification settings - Fork 0
/
theSecondMain.cpp
151 lines (119 loc) · 4.29 KB
/
theSecondMain.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include <stdlib.h>
#include <sys/types.h>
#include <CL/cl.h>
#include <iostream>
#include <time.h>
int main(){
cl_platform_id platform;
cl_context context;
cl_command_queue queue;
cl_device_id device;
cl_int error;
//platform
if (clGetPlatformIDs(1,&platform, NULL) != CL_SUCCESS) {
std::cout << "Error getting platform id\n";
exit(error);
}
std::cout << "PLatform Id :" << platform <<"\n";
// Device
if (clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL)!= CL_SUCCESS) {
std::cout << "Error getting device ids\n";
exit(error);
}
std::cout << "Device Id :" << device <<"\n";
// Context
context = clCreateContext(0, 1, &device, NULL, NULL, &error);
if (error != CL_SUCCESS) {
std::cout << "Error creating context\n";
exit(error);
}
//std::cout << "Context :"<< context <<"\n";
//FOR PROFILING
cl_event timing_event;
cl_ulong time_start, time_end;
float read_time;
// Command-queue
queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &error);
if (error != CL_SUCCESS) {
std::cout << "Error creating command queue\n";
exit(error);
}
// Initialize of memory
const int size = 10000;
float* src_a_h = new float[size];
float* src_b_h = new float[size];
float* res_h = new float[size];
// Initialize both vectors
for (int i = 0; i < size; i++) {
src_a_h[i] = src_b_h[i] = (float) i;
}
// Creating buffers for the arrays
const int mem_size = sizeof(float)*size;
// Allocates a buffer of size mem_size and copies mem_size bytes from src_a_h and others
cl_mem src_a_d = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_a_h, &error);
cl_mem src_b_d = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_b_h, &error);
cl_mem res_d = clCreateBuffer(context, CL_MEM_WRITE_ONLY, mem_size, NULL, &error);
cl_program program;
FILE *program_handle;
char* program_buffer;
size_t program_size;
program_handle = fopen("vector_add_gpu.cl", "r");
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
//std::cout<<program_size<<"\n";
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size,program_handle);
fclose(program_handle);
program = clCreateProgramWithSource(context, 1,(const char**)&program_buffer, &program_size, &error);
// Builds the program
error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
// Shows the log
char* build_log;
size_t log_size;
// First call to know the proper size
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
build_log = new char[log_size+1];
// Second call to get the log
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
build_log[log_size] = '\0';
std::cout << build_log <<"\n";
delete[] build_log;
// Extracting the kernel
cl_kernel vector_add_kernel = clCreateKernel(program, "vector_add_gpu", &error);
//assert(error == CL_SUCCESS);
error = clSetKernelArg(vector_add_kernel, 0, sizeof(cl_mem), &src_a_d);
error |= clSetKernelArg(vector_add_kernel, 1, sizeof(cl_mem), &src_b_d);
error |= clSetKernelArg(vector_add_kernel, 2, sizeof(cl_mem), &res_d);
error |= clSetKernelArg(vector_add_kernel, 3, sizeof(size_t), &size);
//assert(error == CL_SUCCESS);
// Launching kernel
const size_t work_units_per_kernel = (size_t)size;
//const size_t global_units = ((size/work_units_per_kernel) + 1)*(work_units_per_kernel);
clEnqueueNDRangeKernel(queue, vector_add_kernel, 1, NULL, &work_units_per_kernel, NULL, 0, NULL, NULL);
//getting global work size
clEnqueueReadBuffer(queue, res_d, CL_TRUE, 0, mem_size, res_h, 0, NULL, &timing_event);
clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL);
clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL);
read_time = time_end - time_start;
printf("..............%f(miliseconds).................\n\n",(read_time/1000000));
for(int i = 0; i < size; i++)
{
std::cout << res_h[i]<<"\t";
}
// Cleaning up
delete[] src_a_h;
delete[] src_b_h;
delete[] res_h;
//delete[] check;
clReleaseKernel(vector_add_kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
clReleaseMemObject(src_a_d);
clReleaseMemObject(src_b_d);
clReleaseMemObject(res_d);
return 0;
} // Main Ends Here