@@ -546,9 +546,20 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
546
546
input_tile_size = tile_size * scale;
547
547
output_tile_size = tile_size;
548
548
}
549
- int num_tiles_x = (input_width - (int )(input_tile_size * tile_overlap_factor)) / (int )(input_tile_size * (1 - tile_overlap_factor));
549
+ int tile_overlap = (input_tile_size * tile_overlap_factor);
550
+ int non_tile_overlap = input_tile_size - tile_overlap;
551
+
552
+ int num_tiles_x = (input_width - tile_overlap) / non_tile_overlap;
553
+ int overshoot_x = ((num_tiles_x + 1 ) * non_tile_overlap + tile_overlap) % input_width;
554
+
555
+ if ((overshoot_x != non_tile_overlap) && (overshoot_x <= num_tiles_x * (input_tile_size / 2 - tile_overlap))) {
556
+ // if tiles don't fit perfectly using the desired overlap
557
+ // and there is enough room to squeeze an extra tile without overlap becoming >0.5
558
+ num_tiles_x++;
559
+ }
560
+
550
561
float tile_overlap_factor_x = (float )(input_tile_size * num_tiles_x - input_width) / (float )(input_tile_size * (num_tiles_x - 1 ));
551
- if (num_tiles_x <= 1 ) {
562
+ if (num_tiles_x <= 2 ) {
552
563
if (input_width == input_tile_size) {
553
564
num_tiles_x = 1 ;
554
565
tile_overlap_factor_x = 0 ;
@@ -558,9 +569,17 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
558
569
}
559
570
}
560
571
561
- int num_tiles_y = (input_height - (int )(input_tile_size * tile_overlap_factor)) / (int )(input_tile_size * (1 - tile_overlap_factor));
572
+ int num_tiles_y = (input_height - tile_overlap) / non_tile_overlap;
573
+ int overshoot_y = ((num_tiles_y + 1 ) * non_tile_overlap + tile_overlap) % input_height;
574
+
575
+ if ((overshoot_y != non_tile_overlap) && (overshoot_y <= num_tiles_y * (input_tile_size / 2 - tile_overlap))) {
576
+ // if tiles don't fit perfectly using the desired overlap
577
+ // and there is enough room to squeeze an extra tile without overlap becoming >0.5
578
+ num_tiles_y++;
579
+ }
580
+
562
581
float tile_overlap_factor_y = (float )(input_tile_size * num_tiles_y - input_height) / (float )(input_tile_size * (num_tiles_y - 1 ));
563
- if (num_tiles_y <= 1 ) {
582
+ if (num_tiles_y <= 2 ) {
564
583
if (input_height == input_tile_size) {
565
584
num_tiles_y = 1 ;
566
585
tile_overlap_factor_y = 0 ;
0 commit comments