From 15e16f2810847b097267fb83509063db62afcfec Mon Sep 17 00:00:00 2001 From: Hanno Schwalm Date: Tue, 14 Jan 2025 06:11:28 +0100 Subject: [PATCH] Implement module-in-focus process cache The standard pixelpipe cache offers *input* data for a module to be fully processed if all parameters including blending stuff match. This commit implements a single line cache per pixelpipe only effective for a module being in focus *and* doing some blending. Data are possibly read from this cache instead of the possibly costly module->process/_cl. Possibly write to that cache if data were not valid before. Fixes #18180 --- src/develop/imageop.c | 7 ++ src/develop/pixelpipe_hb.c | 211 ++++++++++++++++++++++++++++--------- src/develop/pixelpipe_hb.h | 3 + 3 files changed, 171 insertions(+), 50 deletions(-) diff --git a/src/develop/imageop.c b/src/develop/imageop.c index 1bbe28910b35..e3566f5203b7 100644 --- a/src/develop/imageop.c +++ b/src/develop/imageop.c @@ -2358,6 +2358,13 @@ void dt_iop_request_focus(dt_iop_module_t *module) dev->gui_module = module; dev->focus_hash = TRUE; + dt_free_align(dev->full.pipe->bcache_data); + dev->full.pipe->bcache_data = NULL; + dt_free_align(dev->preview_pipe->bcache_data); + dev->preview_pipe->bcache_data = NULL; + dt_free_align(dev->preview2.pipe->bcache_data); + dev->preview2.pipe->bcache_data = NULL; + /* lets lose the focus of previous focus module*/ if(out_focus_module) { diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c index 4aab0d1544e5..5340e8651ce4 100644 --- a/src/develop/pixelpipe_hb.c +++ b/src/develop/pixelpipe_hb.c @@ -248,9 +248,9 @@ gboolean dt_dev_pixelpipe_init_cached(dt_dev_pixelpipe_t *pipe, pipe->bypass_blendif = FALSE; pipe->input_timestamp = 0; pipe->levels = IMAGEIO_RGB | IMAGEIO_INT8; - dt_pthread_mutex_init(&(pipe->mutex), NULL); - dt_pthread_mutex_init(&(pipe->backbuf_mutex), NULL); - dt_pthread_mutex_init(&(pipe->busy_mutex), NULL); + dt_pthread_mutex_init(&pipe->mutex, NULL); + dt_pthread_mutex_init(&pipe->backbuf_mutex, NULL); + dt_pthread_mutex_init(&pipe->busy_mutex, NULL); pipe->icc_type = DT_COLORSPACE_NONE; pipe->icc_filename = NULL; pipe->icc_intent = DT_INTENT_LAST; @@ -262,7 +262,8 @@ gboolean dt_dev_pixelpipe_init_cached(dt_dev_pixelpipe_t *pipe, pipe->input_profile_info = NULL; pipe->output_profile_info = NULL; pipe->runs = 0; - + pipe->bcache_data = NULL; + pipe->bcache_hash = 0; return dt_dev_pixelpipe_cache_init(pipe, entries, size, memlimit); } @@ -323,6 +324,7 @@ void dt_dev_pixelpipe_cleanup(dt_dev_pixelpipe_t *pipe) dt_dev_pixelpipe_cleanup_nodes(pipe); // so now it's safe to clean up cache: dt_dev_pixelpipe_cache_cleanup(pipe); + dt_free_align(pipe->bcache_data); pipe->icc_type = DT_COLORSPACE_NONE; g_free(pipe->icc_filename); @@ -334,7 +336,7 @@ void dt_dev_pixelpipe_cleanup(dt_dev_pixelpipe_t *pipe) pipe->backbuf_height = 0; dt_pthread_mutex_unlock(&pipe->backbuf_mutex); - dt_pthread_mutex_destroy(&(pipe->backbuf_mutex)); + dt_pthread_mutex_destroy(&pipe->backbuf_mutex); pipe->output_imgid = NO_IMGID; @@ -343,8 +345,8 @@ void dt_dev_pixelpipe_cleanup(dt_dev_pixelpipe_t *pipe) g_list_free_full(pipe->forms, (void (*)(void *))dt_masks_free_form); pipe->forms = NULL; } - dt_pthread_mutex_destroy(&(pipe->busy_mutex)); - dt_pthread_mutex_destroy(&(pipe->mutex)); + dt_pthread_mutex_destroy(&pipe->busy_mutex); + dt_pthread_mutex_destroy(&pipe->mutex); } void dt_dev_pixelpipe_cleanup_nodes(dt_dev_pixelpipe_t *pipe) @@ -1083,7 +1085,7 @@ static void _collect_histogram_on_CPU(dt_dev_pixelpipe_t *pipe, if((dev->gui_attached || !(piece->request_histogram & DT_REQUEST_ONLY_IN_GUI)) && (piece->request_histogram & DT_REQUEST_ON)) { - _histogram_collect(piece, input, roi_in, &(piece->histogram), piece->histogram_max); + _histogram_collect(piece, input, roi_in, &piece->histogram, piece->histogram_max); *pixelpipe_flow |= (PIXELPIPE_FLOW_HISTOGRAM_ON_CPU); *pixelpipe_flow &= ~(PIXELPIPE_FLOW_HISTOGRAM_NONE | PIXELPIPE_FLOW_HISTOGRAM_ON_GPU); @@ -1102,6 +1104,51 @@ static void _collect_histogram_on_CPU(dt_dev_pixelpipe_t *pipe, } } +/* About the module-in-focus blending cache + Processing a piece in the pixelpipe is basically + a) call the module->process + b) possibly blend output of (a) to pipe output + As (a) can be very demanding and it's output does not change if we only change any + blending parameter we check for this situation if + - there is a module in focus and + - we process any pipe writing to screen. + + The validating hash is almost dt_dev_pixelpipe_cache_hash() except it + does *not* include the blending paramters for the module in focus. +*/ +static inline dt_hash_t _piece_process_hash(const dt_dev_pixelpipe_iop_t *piece, + const dt_iop_roi_t *roi, + const dt_iop_module_t *module) +{ + dt_hash_t phash = dt_dev_pixelpipe_cache_hash(piece->pipe->image.id, roi, piece->pipe, module->iop_order -1); + phash = dt_hash(phash, roi, sizeof(dt_iop_roi_t)); + phash = dt_hash(phash, &module->so->op, strlen(module->so->op)); + phash = dt_hash(phash, &module->instance, sizeof(module->instance)); + phash = dt_hash(phash, module->params, module->params_size); + return phash; +} + +static inline gboolean _piece_fast_blend(const dt_dev_pixelpipe_iop_t *piece, + const dt_iop_module_t *module) +{ + return (piece->pipe->type & DT_DEV_PIXELPIPE_SCREEN) + && module->dev + && module->dev->gui_attached + && module == module->dev->gui_module + && dt_dev_modulegroups_test_activated(darktable.develop) + && _transform_for_blend(module, piece); +} + +static inline float *_get_fast_blendcache(const size_t nfloats, + const dt_hash_t phash, + dt_dev_pixelpipe_t *pipe) +{ + dt_free_align(pipe->bcache_data); + pipe->bcache_data = dt_alloc_align_float(nfloats); + pipe->bcache_hash = phash; + return pipe->bcache_data; +} + static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe, dt_develop_t *dev, float *input, @@ -1150,14 +1197,14 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe, if(cst_from != cst_to) dt_print_pipe(DT_DEBUG_PIPE, "transform colorspace", - piece->pipe, module, DT_DEVICE_CPU, roi_in, NULL, " %s -> %s `%s'", + pipe, module, DT_DEVICE_CPU, roi_in, NULL, " %s -> %s `%s'", dt_iop_colorspace_to_name(cst_from), dt_iop_colorspace_to_name(cst_to), work_profile ? dt_colorspaces_get_name(work_profile->type, work_profile->filename) : "no work profile"); // transform to module input colorspace - dt_ioppr_transform_image_colorspace - (module, input, input, roi_in->width, roi_in->height, cst_from, + dt_ioppr_transform_image_colorspace( + module, input, input, roi_in->width, roi_in->height, cst_from, cst_to, &input_format->cst, work_profile); if(dt_atomic_get_int(&pipe->shutdown)) @@ -1178,23 +1225,40 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe, /* process module on cpu. use tiling if needed and possible. */ const gboolean pfm_dump = darktable.dump_pfm_pipe - && (piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)); + && (pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)); if(pfm_dump) dt_dump_pipe_pfm(module->op, input, roi_in->width, roi_in->height, in_bpp, - TRUE, dt_dev_pixelpipe_type_to_str(piece->pipe->type)); + TRUE, dt_dev_pixelpipe_type_to_str(pipe->type)); + + const gboolean relevant = _piece_fast_blend(piece, module); + const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0; + const size_t nfloats = bpp * roi_out->width * roi_out->height / sizeof(float); + const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE; if(!fitting && piece->process_tiling_ready) { dt_print_pipe(DT_DEBUG_PIPE, - "process tiles", - piece->pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s", + bcaching ? "from focus cache" : "process tiles", + pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s", dt_iop_colorspace_to_name(cst_to), cst_to != cst_out ? " -> " : "", cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : ""); - module->process_tiling(module, piece, input, *output, roi_in, roi_out, in_bpp); + if(bcaching) + { + dt_iop_image_copy(*output, pipe->bcache_data, nfloats); + } + else + { + module->process_tiling(module, piece, input, *output, roi_in, roi_out, in_bpp); + if(relevant) + { + float *cache = _get_fast_blendcache(nfloats, phash, pipe); + if(cache) dt_iop_image_copy(cache, *output, nfloats); + } + } *pixelpipe_flow |= (PIXELPIPE_FLOW_PROCESSED_ON_CPU | PIXELPIPE_FLOW_PROCESSED_WITH_TILING); *pixelpipe_flow &= ~(PIXELPIPE_FLOW_PROCESSED_ON_GPU); @@ -1202,8 +1266,8 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe, else { dt_print_pipe(DT_DEBUG_PIPE, - "process", - piece->pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s%s %.fMB", + bcaching ? "from focus cache" : "process", + pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s%s %.fMB", dt_iop_colorspace_to_name(cst_to), cst_to != cst_out ? " -> " : "", cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "", @@ -1213,7 +1277,7 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe, 1e-6 * (tiling->factor * (m_width * m_height * m_bpp) + tiling->overhead)); // this code section is for simplistic benchmarking via --bench-module - if((piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)) + if((pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)) && darktable.bench_module) { if(dt_str_commasubstring(darktable.bench_module, module->op)) @@ -1222,8 +1286,8 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe, dt_times_t end; const int old_muted = darktable.unmuted; darktable.unmuted = 0; - const gboolean full = piece->pipe->type & DT_DEV_PIXELPIPE_FULL; - const int counter = (piece->pipe->type & DT_DEV_PIXELPIPE_FULL) ? 100 : 50; + const gboolean full = pipe->type & DT_DEV_PIXELPIPE_FULL; + const int counter = full ? 100 : 50; const float mpix = (roi_out->width * roi_out->height) / 1.0e6; if(module->process_plain) @@ -1240,7 +1304,20 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe, darktable.unmuted = old_muted; } } - module->process(module, piece, input, *output, roi_in, roi_out); + + if(bcaching) + { + dt_iop_image_copy(*output, pipe->bcache_data, nfloats); + } + else + { + module->process(module, piece, input, *output, roi_in, roi_out); + if(relevant) + { + float *cache = _get_fast_blendcache(nfloats, phash, pipe); + if(cache) dt_iop_image_copy(cache, *output, nfloats); + } + } *pixelpipe_flow |= (PIXELPIPE_FLOW_PROCESSED_ON_CPU); *pixelpipe_flow &= ~(PIXELPIPE_FLOW_PROCESSED_ON_GPU @@ -1253,7 +1330,7 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe, roi_out->width, roi_out->height, bpp, FALSE, dt_dev_pixelpipe_type_to_str(piece->pipe->type)); _dump_pipe_pfm_diff(module->op, input, roi_in, in_bpp, *output, roi_out, bpp, - dt_dev_pixelpipe_type_to_str(piece->pipe->type)); + dt_dev_pixelpipe_type_to_str(pipe->type)); } // and save the output colorspace @@ -1545,7 +1622,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, module->modify_roi_in(module, piece, roi_out, &roi_in); if((darktable.unmuted & DT_DEBUG_PIPE) && memcmp(roi_out, &roi_in, sizeof(dt_iop_roi_t))) dt_print_pipe(DT_DEBUG_PIPE, - "modify roi IN", piece->pipe, module, DT_DEVICE_NONE, roi_out, &roi_in, "ID=%i", + "modify roi IN", pipe, module, DT_DEVICE_NONE, roi_out, &roi_in, "ID=%i", pipe->image.id); // recurse to get actual data of input buffer @@ -1752,7 +1829,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, { dt_print_pipe(DT_DEBUG_OPENCL | DT_DEBUG_PIPE, "no input cl_mem", - piece->pipe, module, pipe->devid, &roi_in, roi_out); + pipe, module, pipe->devid, &roi_in, roi_out); success_opencl = FALSE; } @@ -1784,7 +1861,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, { dt_print_pipe(DT_DEBUG_OPENCL | DT_DEBUG_PIPE, "no output cl_mem", - piece->pipe, module, pipe->devid, &roi_in, roi_out); + pipe, module, pipe->devid, &roi_in, roi_out); success_opencl = FALSE; } } @@ -1797,12 +1874,12 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, { if(cst_from != cst_to) dt_print_pipe(DT_DEBUG_PIPE, - "transform colorspace", piece->pipe, module, pipe->devid, &roi_in, NULL, " %s -> %s `%s'", + "transform colorspace", pipe, module, pipe->devid, &roi_in, NULL, " %s -> %s `%s'", dt_iop_colorspace_to_name(cst_from), dt_iop_colorspace_to_name(cst_to), work_profile ? dt_colorspaces_get_name(work_profile->type, work_profile->filename) : "no work profile"); success_opencl = dt_ioppr_transform_image_colorspace_cl( - module, piece->pipe->devid, + module, pipe->devid, cl_mem_input, cl_mem_input, roi_in.width, roi_in.height, input_cst_cl, @@ -1822,7 +1899,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, const size_t outbufsize = bpp * roi_out->width * roi_out->height; _histogram_collect_cl(pipe->devid, piece, cl_mem_input, - &roi_in, &(piece->histogram), + &roi_in, &piece->histogram, piece->histogram_max, *output, outbufsize); pixelpipe_flow |= (PIXELPIPE_FLOW_HISTOGRAM_ON_GPU); pixelpipe_flow &= ~(PIXELPIPE_FLOW_HISTOGRAM_NONE @@ -1851,16 +1928,20 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, meaningful messages in case of error */ if(success_opencl) { + const gboolean relevant = _piece_fast_blend(piece, module); + const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0; + const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE; + dt_print_pipe(DT_DEBUG_PIPE, - "process", - piece->pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s %.1fMB", + bcaching ? "from focus cache" : "process", + pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s %.1fMB", dt_iop_colorspace_to_name(cst_to), cst_to != cst_out ? " -> " : "", cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "", 1e-6 * (tiling.factor_cl * (m_width * m_height * m_bpp) + tiling.overhead)); // this code section is for simplistic benchmarking via --bench-module - if((piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)) + if((pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)) && darktable.bench_module) { if(dt_str_commasubstring(darktable.bench_module, module->op)) @@ -1869,9 +1950,9 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, dt_times_t end; const int old_muted = darktable.unmuted; darktable.unmuted = 0; - const gboolean full = piece->pipe->type & DT_DEV_PIXELPIPE_FULL; + const gboolean full = pipe->type & DT_DEV_PIXELPIPE_FULL; const float mpix = (roi_out->width * roi_out->height) / 1.0e6; - const int counter = (piece->pipe->type & DT_DEV_PIXELPIPE_FULL) ? 100 : 50; + const int counter = (pipe->type & DT_DEV_PIXELPIPE_FULL) ? 100 : 50; gboolean success = TRUE; dt_get_times(&bench); for(int i = 0; i < counter; i++) @@ -1898,20 +1979,33 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, } } const gboolean pfm_dump = darktable.dump_pfm_pipe - && (piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)) + && (pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)) && dt_str_commasubstring(darktable.dump_pfm_pipe, module->op); if(pfm_dump) dt_opencl_dump_pipe_pfm(module->op, pipe->devid, cl_mem_input, TRUE, dt_dev_pixelpipe_type_to_str(piece->pipe->type)); - const cl_int err = module->process_cl(module, piece, cl_mem_input, *cl_mem_output, - &roi_in, roi_out); + cl_int err = CL_SUCCESS; + + if(bcaching) + { + err = dt_opencl_write_host_to_device(pipe->devid, pipe->bcache_data, *cl_mem_output, roi_out->width, roi_out->height, out_bpp); + } + else + { + err = module->process_cl(module, piece, cl_mem_input, *cl_mem_output, &roi_in, roi_out); + if(relevant && (err == CL_SUCCESS)) + { + float *cache = _get_fast_blendcache(out_bpp * roi_out->width * roi_out->height / sizeof(float), phash, piece->pipe); + if(cache) err = dt_opencl_read_host_from_device(pipe->devid, cache, *cl_mem_output, roi_out->width, roi_out->height, out_bpp); + } + } success_opencl = (err == CL_SUCCESS); if(!success_opencl) dt_print_pipe(DT_DEBUG_OPENCL, - "Error: process", piece->pipe, module, pipe->devid, &roi_in, roi_out, + "Error: process", pipe, module, pipe->devid, &roi_in, roi_out, "device=%i (%s), %s", pipe->devid, darktable.opencl->dev[pipe->devid].cname, cl_errstr(err)); @@ -1919,9 +2013,9 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, { if(pfm_dump) dt_opencl_dump_pipe_pfm(module->op, pipe->devid, *cl_mem_output, - FALSE, dt_dev_pixelpipe_type_to_str(piece->pipe->type)); + FALSE, dt_dev_pixelpipe_type_to_str(pipe->type)); - if((piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)) + if((pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT)) && darktable.dump_diff_pipe) { const int ch = dt_opencl_get_image_element_size(cl_mem_input) / sizeof(float); @@ -2005,12 +2099,12 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, { success_opencl = dt_ioppr_transform_image_colorspace_cl( - module, piece->pipe->devid, cl_mem_input, cl_mem_input, + module, pipe->devid, cl_mem_input, cl_mem_input, roi_in.width, roi_in.height, input_cst_cl, blend_cst, &input_cst_cl, work_profile); success_opencl &= dt_ioppr_transform_image_colorspace_cl( - module, piece->pipe->devid, *cl_mem_output, *cl_mem_output, + module, pipe->devid, *cl_mem_output, *cl_mem_output, roi_out->width, roi_out->height, pipe->dsc.cst, blend_cst, &pipe->dsc.cst, work_profile); @@ -2036,9 +2130,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, /* process blending */ if(success_opencl) { - success_opencl = dt_develop_blend_process_cl( - module, piece, cl_mem_input, *cl_mem_output, - &roi_in, roi_out); + success_opencl = dt_develop_blend_process_cl(module, piece, cl_mem_input, *cl_mem_output, &roi_in, roi_out); pixelpipe_flow |= (PIXELPIPE_FLOW_BLENDED_ON_GPU); pixelpipe_flow &= ~(PIXELPIPE_FLOW_BLENDED_ON_CPU); } @@ -2091,7 +2183,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, { if(cst_from != cst_to) dt_print_pipe(DT_DEBUG_PIPE, - "transform colorspace", piece->pipe, module, pipe->devid, &roi_in, NULL, " %s -> %s", + "transform colorspace", pipe, module, pipe->devid, &roi_in, NULL, " %s -> %s", dt_iop_colorspace_to_name(cst_from), dt_iop_colorspace_to_name(cst_to)); dt_ioppr_transform_image_colorspace( @@ -2116,18 +2208,37 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe, meaningful messages in case of error */ if(success_opencl) { + const gboolean relevant = _piece_fast_blend(piece, module); + const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0; + const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE; dt_print_pipe(DT_DEBUG_PIPE, - "process tiled", - piece->pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s", + bcaching ? "from focus cache" : "process tiled", + pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s", dt_iop_colorspace_to_name(cst_to), cst_to != cst_out ? " -> " : "", cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : ""); - const cl_int err = module->process_tiling_cl(module, piece, input, *output, &roi_in, roi_out, in_bpp); + + cl_int err = CL_SUCCESS; + + if(bcaching) + { + err = dt_opencl_write_host_to_device(pipe->devid, pipe->bcache_data, *cl_mem_output, roi_out->width, roi_out->height, out_bpp); + } + else + { + err = module->process_tiling_cl(module, piece, input, *output, &roi_in, roi_out, in_bpp); + if(relevant && (err == CL_SUCCESS)) + { + float *cache = _get_fast_blendcache(out_bpp * roi_out->width * roi_out->height / sizeof(float), phash, piece->pipe); + if(cache) + err = dt_opencl_read_host_from_device(pipe->devid, cache, *cl_mem_output, roi_out->width, roi_out->height, out_bpp); + } + } success_opencl = (err == CL_SUCCESS); if(!success_opencl) dt_print_pipe(DT_DEBUG_OPENCL, - "Error: process_tiling", piece->pipe, module, pipe->devid, &roi_in, roi_out, + "Error: process_tiling", pipe, module, pipe->devid, &roi_in, roi_out, "device=%i (%s), %s", pipe->devid, darktable.opencl->dev[pipe->devid].cname, cl_errstr(err)); diff --git a/src/develop/pixelpipe_hb.h b/src/develop/pixelpipe_hb.h index d31f65910645..0e93725a7a42 100644 --- a/src/develop/pixelpipe_hb.h +++ b/src/develop/pixelpipe_hb.h @@ -195,6 +195,9 @@ typedef struct dt_dev_pixelpipe_t GList *forms; // the masks generated in the pipe for later reusal are inside dt_dev_pixelpipe_iop_t gboolean store_all_raster_masks; + // module blending cache + float *bcache_data; + dt_hash_t bcache_hash; } dt_dev_pixelpipe_t; struct dt_develop_t;