AlexeyAB · FindHao · Dec 10, 2020 · Dec 14, 2020 · Aug 31, 2023 · Sep 1, 2023
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
@@ -711,7 +711,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
             if (train) l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
         }
 
-        l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
+        l.output_gpu = cuda_make_array_init2zero(total_batch*out_h*out_w*n);
         if (train) l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n);
 
         if(binary){
@@ -755,9 +755,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
             }
 
             if (train) {
-                l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
+                l.x_gpu = cuda_make_array_init2zero(total_batch*out_h*out_w*n);
 #ifndef CUDNN
-                l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
+                l.x_norm_gpu = cuda_make_array_init2zero( total_batch*out_h*out_w*n);
 #endif  // CUDNN
             }
         }

diff --git a/src/dark_cuda.c b/src/dark_cuda.c
@@ -487,6 +487,23 @@ float *cuda_make_array(float *x, size_t n)
     return x_gpu;
 }
 
+float *cuda_make_array_init2zero(size_t n) {
+  float *x_gpu;
+  size_t size = sizeof(float) * n;
+  cudaError_t status = cudaMalloc((void **)&x_gpu, size);
+  // cudaError_t status = cudaMallocManaged((void **)&x_gpu, size,
+  // cudaMemAttachGlobal); status = cudaMemAdvise(x_gpu, size,
+  // cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId);
+  if (status != cudaSuccess)
+    fprintf(stderr, " Try to set subdivisions=64 in your cfg-file. \n");
+  CHECK_CUDA(status);
+  // status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
+  status =cudaMemsetAsync(x_gpu, 0, size, 0);
+  CHECK_CUDA(status);
+  if (!x_gpu) error("Cuda malloc failed", DARKNET_LOC);
+  return x_gpu;
+}
+
 void **cuda_make_array_pointers(void **x, size_t n)
 {
     void **x_gpu;

diff --git a/src/dark_cuda.h b/src/dark_cuda.h
@@ -78,6 +78,7 @@ extern "C" {
     float *cuda_make_array_pinned_preallocated(float *x, size_t n);
     float *cuda_make_array_pinned(float *x, size_t n);
     float *cuda_make_array(float *x, size_t n);
+    float *cuda_make_array_init2zero(size_t n);
     void **cuda_make_array_pointers(void **x, size_t n);
     int *cuda_make_int_array(size_t n);
     int *cuda_make_int_array_new_api(int *x, size_t n);

diff --git a/src/route_layer.c b/src/route_layer.c
@@ -35,8 +35,8 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz
     l.forward_gpu = forward_route_layer_gpu;
     l.backward_gpu = backward_route_layer_gpu;
 
-    l.delta_gpu =  cuda_make_array(l.delta, outputs*batch);
-    l.output_gpu = cuda_make_array(l.output, outputs*batch);
+    l.delta_gpu =  cuda_make_array_init2zero(outputs*batch);
+    l.output_gpu = cuda_make_array_init2zero(outputs*batch);
     #endif
     return l;
 }