Skip to content

Commit

Permalink
Updated VectorizedComputeBenchmark and ComputeLib
Browse files Browse the repository at this point in the history
Fixed loadProgram() function to actually work for loading opencl code.
Added res/clprograms/benchmarks.cl to include all opencl benchmark kernels instead of Strings.
Removed all remaining context sharing code.
  • Loading branch information
goofyseeker311 committed Dec 26, 2024
1 parent c0aa7db commit 51b33c6
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 52 deletions.
28 changes: 2 additions & 26 deletions src/fi/jkauppa/vectorizedcomputebenchmark/ComputeLib.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.lwjgl.PointerBuffer;
import org.lwjgl.opencl.CL;
import org.lwjgl.opencl.CL30;
import org.lwjgl.opencl.CL12GL;
import org.lwjgl.opencl.CLCapabilities;
import org.lwjgl.opencl.CLContextCallback;
import org.lwjgl.system.MemoryStack;
Expand All @@ -28,11 +27,7 @@ public ComputeLib() {
for (int i=0;i<devicelist.length;i++) {
long device = devicelist[i];
Device devicedata = devicemap.get(device);
System.out.print("OpenCL device["+i+"]: "+devicedata.devicename+" ["+devicedata.plaformopenclversion+"]");
if (devicedata.platformcontextsharing) {
System.out.print(" (OpenGL context sharing supported)");
}
System.out.println();
System.out.println("OpenCL device["+i+"]: "+devicedata.devicename+" ["+devicedata.plaformopenclversion+"]");
}
}

Expand Down Expand Up @@ -119,22 +114,6 @@ public void removeBuffer(long vmem) {
CL30.clReleaseMemObject(vmem);
}

public long createSharedGLBuffer(long device, int glbuffer) {
MemoryStack clStack = MemoryStack.stackPush();
IntBuffer errcode_ret = clStack.callocInt(1);
Device devicedata = devicemap.get(device);
long context = devicedata.context;
long buffer = CL12GL.clCreateFromGLBuffer(context, CL30.CL_MEM_READ_WRITE, glbuffer, errcode_ret);
MemoryStack.stackPop();
return buffer;
}
public void acquireSharedGLBuffer(long queue, long vmem) {
CL12GL.clEnqueueAcquireGLObjects(queue, vmem, null, null);
}
public void releaseSharedGLBuffer(long queue, long vmem) {
CL12GL.clEnqueueReleaseGLObjects(queue, vmem, null, null);
}

public static String loadProgram(String filename, boolean loadresourcefromjar) {
String k = null;
if (filename!=null) {
Expand All @@ -146,10 +125,10 @@ public static String loadProgram(String filename, boolean loadresourcefromjar) {
}else {
textfilestream = new BufferedInputStream(new FileInputStream(textfile));
}
textfilestream.reset();
byte[] bytes = new byte[textfilestream.available()];
DataInputStream dataInputStream = new DataInputStream(textfilestream);
dataInputStream.readFully(bytes);
k = new String(bytes);
textfilestream.close();
} catch (Exception ex) {ex.printStackTrace();}
}
Expand Down Expand Up @@ -224,7 +203,6 @@ public static class Device {
public String platformname = null;
public CLCapabilities plaformcaps = null;
public String plaformopenclversion = null;
public boolean platformcontextsharing = false;
public String devicename = null;
}

Expand All @@ -247,7 +225,6 @@ private TreeMap<Long,Device> initClDevices() {

IntBuffer errcode_ret = clStack.callocInt(1);
int errcode_ret_int = 1;
boolean contextsharing = false;
PointerBuffer clCtxProps = clStack.mallocPointer(3);
clCtxProps.put(0, CL30.CL_CONTEXT_PLATFORM).put(1, platform).put(2, 0);
long context = CL30.clCreateContext(clCtxProps, device, (CLContextCallback)null, MemoryUtil.NULL, errcode_ret);
Expand All @@ -262,7 +239,6 @@ private TreeMap<Long,Device> initClDevices() {
devicedesc.plaformcaps = platformcaps;
devicedesc.plaformopenclversion = getClPlatformInfo(platform, CL30.CL_PLATFORM_VERSION).trim();
devicedesc.devicename = getClDeviceInfo(device, CL30.CL_DEVICE_NAME).trim();
devicedesc.platformcontextsharing = contextsharing;
devicesinit.put(device, devicedesc);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,14 @@ public class VectorizedComputeBenchmark {
private int nc;
private int re;

private final String clLoopsSource =
"kernel void loopsmmult(global float *c) {"
+ "unsigned int xid = get_global_id(0);"
+ "float id = (float)xid;"
+ "float loopsum = 0.0f;"
+ "for (int y=0;y<72;y++) {"
+ "for (int x=0;x<128;x++) {"
+ "loopsum += (id+x)*y;"
+ "}"
+ "}"
+ "c[xid] = loopsum;"
+"}";

private final String clFillSource =
"kernel void loopsfill(global float *img) {"
+ "unsigned int xid = get_global_id(0);"
+ "img[xid*5+0] = 0.0f;"
+ "img[xid*5+1] = 0.0f;"
+ "img[xid*5+2] = 0.0f;"
+ "img[xid*5+3] = 0.0f;"
+ "img[xid*5+4] = INFINITY;"
+"}";

public VectorizedComputeBenchmark(int vde, int vnc, int vre) {
this.de = vde;
this.nc = vnc;
this.re = vre;
}

public static void main(String[] args) {
System.out.println("VectorizedComputeBenchmark v1.0.1");
System.out.println("VectorizedComputeBenchmark v1.0.2");
int de = 0;
int nc = 100000000;
int re = 1000;
Expand All @@ -60,9 +37,11 @@ public void run() {
String devicename = devicedata.devicename;
System.out.println("Using device["+de+"]: "+devicename);

String clSource = ComputeLib.loadProgram("res/clprograms/benchmarks.cl", true);
long program = computelib.compileProgram(device, clSource);

if (true) {
long[] cbuf = {computelib.createBuffer(device, nc)};
long program = computelib.compileProgram(device, clLoopsSource);
float ctimedif = computelib.runProgram(device, queue, program, "loopsmmult", cbuf, new int[]{0}, new int[]{nc}, re, true)/re;
float tflops = (nc*3.0f*128.0f*72.0f*(1000.0f/ctimedif))/1000000000000.0f;
System.out.println(String.format("%.4f",ctimedif).replace(",", ".")+"ms\t"+String.format("%.3f",tflops).replace(",", ".")+"tflops\t device: "+devicename);
Expand All @@ -72,7 +51,6 @@ public void run() {
long ncmem = computelib.createBuffer(device, 1);
long[] cbuf = {computelib.createBuffer(device, nc*5), computelib.createBuffer(device, 1)};
computelib.writeBufferi(device, queue, ncmem, new int[]{nc});
long program = computelib.compileProgram(device, clFillSource);
float ctimedif = computelib.runProgram(device, queue, program, "loopsfill", cbuf, new int[]{0}, new int[]{nc}, re, true)/re;
float gbytes = (nc*5.0f*4.0f*(1000.0f/ctimedif))/1000000000.0f;
System.out.println(String.format("%.4f",ctimedif).replace(",", ".")+"ms\t"+String.format("%.3f",gbytes).replace(",", ".")+"GB/s\t device: "+devicename);
Expand Down
20 changes: 20 additions & 0 deletions src/res/clprograms/benchmarks.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
kernel void loopsmmult(global float *c) {
unsigned int xid = get_global_id(0);
float id = (float)xid;
float loopsum = 0.0f;
for (int y=0;y<72;y++) {
for (int x=0;x<128;x++) {
loopsum += (id+x)*y;
}
}
c[xid] = loopsum;
}

kernel void loopsfill(global float *img) {
unsigned int xid = get_global_id(0);
img[xid*5+0] = 0.0f;
img[xid*5+1] = 0.0f;
img[xid*5+2] = 0.0f;
img[xid*5+3] = 0.0f;
img[xid*5+4] = INFINITY;
}

0 comments on commit 51b33c6

Please sign in to comment.