diff --git a/libvmaf/src/feature/cuda/integer_adm_cuda.c b/libvmaf/src/feature/cuda/integer_adm_cuda.c index cdf9f482c..9dec3c11c 100644 --- a/libvmaf/src/feature/cuda/integer_adm_cuda.c +++ b/libvmaf/src/feature/cuda/integer_adm_cuda.c @@ -149,7 +149,6 @@ void adm_dwt2_s123_combined_device(AdmStateCuda *s,const int32_t *d_i4_scale, in const int BLOCK_Y = (h + 1) / 2; void * args_vert[] = {&d_i4_scale, &tmp_buf, &w, &h, &img_stride, &*p}; - const int num_threads = 128; switch (scale) { case 1: CHECK_CUDA(cuLaunchKernel(s->func_dwt_s123_combined_vert_kernel_0_0_int32_t, diff --git a/libvmaf/src/feature/cuda/integer_psnr_cuda.c b/libvmaf/src/feature/cuda/integer_psnr_cuda.c index d121c7c74..d29bdabcb 100644 --- a/libvmaf/src/feature/cuda/integer_psnr_cuda.c +++ b/libvmaf/src/feature/cuda/integer_psnr_cuda.c @@ -23,9 +23,8 @@ #include #include +#include "common.h" #include "cuda_helper.cuh" -#include "libvmaf/vmaf_cuda_state.h" -#include "picture_cuda.h" #include "feature_collector.h" #include "feature_extractor.h" @@ -231,13 +230,8 @@ static int extract_fex_cuda(VmafFeatureExtractor *fex, CHECK_CUDA(cuEventRecord(s->event, vmaf_cuda_picture_get_stream(ref_pic))); // This event ensures the input buffer is consumed CHECK_CUDA(cuStreamWaitEvent(s->str, s->event, CU_EVENT_WAIT_DEFAULT)); - // CHECK_CUDA(cuCtxPushCurrent(fex->cu_state->ctx)); - // CHECK_CUDA(cuEventDestroy(s->event)); - // CHECK_CUDA(cuEventCreate(&s->event, CU_EVENT_DEFAULT)); - // CHECK_CUDA(cuCtxPopCurrent(NULL)); - // Download sad - // CHECK_CUDA(cuStreamSynchronize(s->host_stream)); + // Download SSE CHECK_CUDA(cuMemcpyDtoHAsync(s->sse_host, (CUdeviceptr)s->sse_device->data, sizeof(uint64_t) * 3, s->str)); CHECK_CUDA(cuEventRecord(s->finished, s->str)); diff --git a/libvmaf/src/libvmaf.c b/libvmaf/src/libvmaf.c index cc167376f..6e0574aca 100644 --- a/libvmaf/src/libvmaf.c +++ b/libvmaf/src/libvmaf.c @@ -593,7 +593,7 @@ static int translate_picture_device(VmafContext *vmaf, VmafPicture *pic, return err; } - err = vmaf_cuda_picture_download_async(pic, pic_host, 0x3); + err = vmaf_cuda_picture_download_async(pic, pic_host, 0xF); if (err) { vmaf_log(VMAF_LOG_LEVEL_ERROR, "problem moving cuda pic into host buffer\n");