Skip to content

Commit d103632

Browse files
stduhpfStéphane du Hamel
authored andcommitted
Tiling: fix edge cases for adaptative overlap
1 parent 8ab1afd commit d103632

File tree

1 file changed

+23
-4
lines changed

1 file changed

+23
-4
lines changed

ggml_extend.hpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -546,9 +546,20 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
546546
input_tile_size = tile_size * scale;
547547
output_tile_size = tile_size;
548548
}
549-
int num_tiles_x = (input_width - (int)(input_tile_size * tile_overlap_factor)) / (int)(input_tile_size * (1 - tile_overlap_factor));
549+
int tile_overlap = (input_tile_size * tile_overlap_factor);
550+
int non_tile_overlap = input_tile_size - tile_overlap;
551+
552+
int num_tiles_x = (input_width - tile_overlap) / non_tile_overlap;
553+
int overshoot_x = ((num_tiles_x + 1) * non_tile_overlap + tile_overlap) % input_width;
554+
555+
if ((overshoot_x != non_tile_overlap) && (overshoot_x <= num_tiles_x * (input_tile_size / 2 - tile_overlap))) {
556+
// if tiles don't fit perfectly using the desired overlap
557+
// and there is enough room to squeeze an extra tile without overlap becoming >0.5
558+
num_tiles_x++;
559+
}
560+
550561
float tile_overlap_factor_x = (float)(input_tile_size * num_tiles_x - input_width) / (float)(input_tile_size * (num_tiles_x - 1));
551-
if (num_tiles_x <= 1) {
562+
if (num_tiles_x <= 2) {
552563
if (input_width == input_tile_size) {
553564
num_tiles_x = 1;
554565
tile_overlap_factor_x = 0;
@@ -558,9 +569,17 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
558569
}
559570
}
560571

561-
int num_tiles_y = (input_height - (int)(input_tile_size * tile_overlap_factor)) / (int)(input_tile_size * (1 - tile_overlap_factor));
572+
int num_tiles_y = (input_height - tile_overlap) / non_tile_overlap;
573+
int overshoot_y = ((num_tiles_y + 1) * non_tile_overlap + tile_overlap) % input_height;
574+
575+
if ((overshoot_y != non_tile_overlap) && (overshoot_y <= num_tiles_y * (input_tile_size / 2 - tile_overlap))) {
576+
// if tiles don't fit perfectly using the desired overlap
577+
// and there is enough room to squeeze an extra tile without overlap becoming >0.5
578+
num_tiles_y++;
579+
}
580+
562581
float tile_overlap_factor_y = (float)(input_tile_size * num_tiles_y - input_height) / (float)(input_tile_size * (num_tiles_y - 1));
563-
if (num_tiles_y <= 1) {
582+
if (num_tiles_y <= 2) {
564583
if (input_height == input_tile_size) {
565584
num_tiles_y = 1;
566585
tile_overlap_factor_y = 0;

0 commit comments

Comments
 (0)