Skip to content

Commit

Permalink
updated overlay functions
Browse files Browse the repository at this point in the history
  • Loading branch information
dusty-nv committed Jul 24, 2020
1 parent 62cec40 commit 8b399c1
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 19 deletions.
13 changes: 12 additions & 1 deletion c/detectNet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,17 @@ bool detectNet::Overlay( void* input, void* output, uint32_t width, uint32_t hei
return false;
}

// if input and output are different images, copy the input to the output first
// then overlay the bounding boxes, ect. on top of the output image
if( input != output )
{
if( CUDA_FAILED(cudaMemcpy(output, input, imageFormatSize(format, width, height), cudaMemcpyDeviceToDevice)) )
{
LogError(LOG_TRT "detectNet -- Overlay() failed to copy input image to output image\n");
return false;
}
}

// bounding box overlay
if( flags & OVERLAY_BOX )
{
Expand Down Expand Up @@ -1102,7 +1113,7 @@ bool detectNet::Overlay( void* input, void* output, uint32_t width, uint32_t hei
}
}

font->OverlayText(input, format, width, height, labels, make_float4(255,255,255,255));
font->OverlayText(output, format, width, height, labels, make_float4(255,255,255,255));
}

PROFILER_END(PROFILER_VISUALIZE);
Expand Down
25 changes: 7 additions & 18 deletions c/detectNet.cu
Original file line number Diff line number Diff line change
Expand Up @@ -95,29 +95,18 @@ cudaError_t launchDetectionOverlay( T* input, T* output, uint32_t width, uint32_
if( !input || !output || width == 0 || height == 0 || !detections || numDetections == 0 || !colors )
return cudaErrorInvalidValue;

// if input and output are the same image, then we can use the faster method
// which draws 1 box per kernel, but doesn't copy pixels that aren't inside boxes
if( input == output )
// this assumes that the output already has the input image copied to it,
// which if input != output, is done first by detectNet::Detect()
for( int n=0; n < numDetections; n++ )
{
for( int n=0; n < numDetections; n++ )
{
const int boxWidth = (int)detections[n].Width();
const int boxHeight = (int)detections[n].Height();
const int boxWidth = (int)detections[n].Width();
const int boxHeight = (int)detections[n].Height();

// launch kernel
const dim3 blockDim(8, 8);
const dim3 gridDim(iDivUp(boxWidth,blockDim.x), iDivUp(boxHeight,blockDim.y));

gpuDetectionOverlayBox<T><<<gridDim, blockDim>>>(input, output, width, height, (int)detections[n].Left, (int)detections[n].Top, boxWidth, boxHeight, colors[detections[n].ClassID]);
}
}
else
{
// launch kernel
const dim3 blockDim(8, 8);
const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height,blockDim.y));
const dim3 gridDim(iDivUp(boxWidth,blockDim.x), iDivUp(boxHeight,blockDim.y));

gpuDetectionOverlay<T><<<gridDim, blockDim>>>(input, output, width, height, detections, numDetections, colors);
gpuDetectionOverlayBox<T><<<gridDim, blockDim>>>(input, output, width, height, (int)detections[n].Left, (int)detections[n].Top, boxWidth, boxHeight, colors[detections[n].ClassID]);
}

return cudaGetLastError();
Expand Down

0 comments on commit 8b399c1

Please sign in to comment.