-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKs.cu
86 lines (82 loc) · 1.6 KB
/
Ks.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include "Ks.cuh"
#include <iostream>
#include<stdio.h>
#include<algorithm>
using namespace std;
unsigned int iDivUp(unsigned int a, unsigned int b)
{
return (a % b != 0) ? (a / b + 1) : (a / b);
}
class CThreadScaler
{
private:
int Dg;
int Db;
public:
CThreadScaler(int NumThreads)
{
Db = min( 512, NumThreads);
if(Db > 0)
{
Dg = iDivUp(NumThreads, Db);
}else
{
Dg = 0;
}
}
int Grids()
{
return Dg;
}
int Blocks()
{
return Db;
}
};
static __inline__ __device__ int CudaGetTargetID()
{
return blockDim.x * blockIdx.x + threadIdx.x;
}
__global__ void knapsackKernel(int *V,int *W,int *M,int Capacity,int i)
{
int w = CudaGetTargetID();
if (w > Capacity)
return;
if (i%2!=0)
{
if (W[i] <= w)
M[Capacity+w] = max(V[i] + M[w-W[i]],M[w] );
else
M[Capacity+w] = M[w];
}
else
{
if (W[i] <= w)
M[w] = max(V[i] + M[Capacity + (w-W[i])],M[Capacity + w]);
else
M[w] = M[Capacity+w];
}
__syncthreads();
}
void CudaFunctionCall(int *Value,int *Weight,int *Matrix,int NumofItems,int Capacity)
{
CudaSafeCall(cudaSetDevice(0));
CThreadScaler TS(Capacity+1);
int i=0;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
while (i <NumofItems)
{
knapsackKernel<<<TS.Grids(), TS.Blocks()>>>(Value, Weight, Matrix,Capacity,i);
CudaSafeCall(cudaDeviceSynchronize());
i++;
}
cudaEventRecord(stop);
CudaSafeCall(cudaGetLastError());
cudaEventSynchronize(stop);
float milliseconds = 0;
cudaEventElapsedTime(&milliseconds, start, stop);
printf("Parallel Time in ms:%f\n",milliseconds);
}