@@ -672,18 +672,25 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
672
672
int tile_overlap_y = (int32_t )(tile_size * tile_overlap_factor_y);
673
673
int non_tile_overlap_y = tile_size - tile_overlap_y;
674
674
675
- int input_tile_size = tile_size;
676
- int output_tile_size = tile_size;
675
+ int tile_size_x = tile_size < small_width ? tile_size : small_width;
676
+ int tile_size_y = tile_size < small_height ? tile_size : small_height;
677
+
678
+ int input_tile_size_x = tile_size_x;
679
+ int input_tile_size_y = tile_size_y;
680
+ int output_tile_size_x = tile_size_x;
681
+ int output_tile_size_y = tile_size_y;
677
682
678
683
if (big_out) {
679
- output_tile_size *= scale;
684
+ output_tile_size_x *= scale;
685
+ output_tile_size_y *= scale;
680
686
} else {
681
- input_tile_size *= scale;
687
+ input_tile_size_x *= scale;
688
+ input_tile_size_y *= scale;
682
689
}
683
690
684
691
struct ggml_init_params params = {};
685
- params.mem_size += input_tile_size * input_tile_size * input->ne [2 ] * sizeof (float ); // input chunk
686
- params.mem_size += output_tile_size * output_tile_size * output->ne [2 ] * sizeof (float ); // output chunk
692
+ params.mem_size += input_tile_size_x * input_tile_size_y * input->ne [2 ] * sizeof (float ); // input chunk
693
+ params.mem_size += output_tile_size_x * output_tile_size_y * output->ne [2 ] * sizeof (float ); // output chunk
687
694
params.mem_size += 3 * ggml_tensor_overhead ();
688
695
params.mem_buffer = NULL ;
689
696
params.no_alloc = false ;
@@ -698,19 +705,19 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
698
705
}
699
706
700
707
// tiling
701
- ggml_tensor* input_tile = ggml_new_tensor_4d (tiles_ctx, GGML_TYPE_F32, input_tile_size, input_tile_size , input->ne [2 ], 1 );
702
- ggml_tensor* output_tile = ggml_new_tensor_4d (tiles_ctx, GGML_TYPE_F32, output_tile_size, output_tile_size , output->ne [2 ], 1 );
703
- int num_tiles = num_tiles_x * num_tiles_y;
708
+ ggml_tensor* input_tile = ggml_new_tensor_4d (tiles_ctx, GGML_TYPE_F32, input_tile_size_x, input_tile_size_y , input->ne [2 ], 1 );
709
+ ggml_tensor* output_tile = ggml_new_tensor_4d (tiles_ctx, GGML_TYPE_F32, output_tile_size_x, output_tile_size_y , output->ne [2 ], 1 );
710
+ int num_tiles = num_tiles_x * num_tiles_y;
704
711
LOG_INFO (" processing %i tiles" , num_tiles);
705
712
pretty_progress (0 , num_tiles, 0 .0f );
706
713
int tile_count = 1 ;
707
714
bool last_y = false , last_x = false ;
708
715
float last_time = 0 .0f ;
709
716
for (int y = 0 ; y < small_height && !last_y; y += non_tile_overlap_y) {
710
717
int dy = 0 ;
711
- if (y + tile_size >= small_height) {
718
+ if (y + tile_size_y >= small_height) {
712
719
int _y = y;
713
- y = small_height - tile_size ;
720
+ y = small_height - tile_size_y ;
714
721
dy = _y - y;
715
722
if (big_out) {
716
723
dy *= scale;
@@ -719,9 +726,9 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
719
726
}
720
727
for (int x = 0 ; x < small_width && !last_x; x += non_tile_overlap_x) {
721
728
int dx = 0 ;
722
- if (x + tile_size >= small_width) {
729
+ if (x + tile_size_x >= small_width) {
723
730
int _x = x;
724
- x = small_width - tile_size ;
731
+ x = small_width - tile_size_x ;
725
732
dx = _x - x;
726
733
if (big_out) {
727
734
dx *= scale;
0 commit comments