diff --git a/src/runtime/profiler_common.cpp b/src/runtime/profiler_common.cpp index b9002daf6881..aed1376b6087 100644 --- a/src/runtime/profiler_common.cpp +++ b/src/runtime/profiler_common.cpp @@ -333,6 +333,23 @@ WEAK void halide_profiler_memory_free(void *user_context, WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_state *s) { StringStreamPrinter<1024> sstr(user_context); + int64_t (*compare_fs_fn)(halide_profiler_func_stats *a, halide_profiler_func_stats *b) = nullptr; + + const char *sort_str = getenv("HL_PROFILER_SORT"); + if (sort_str) { + if (!strcmp(sort_str, "time")) { + // Sort by descending time + compare_fs_fn = [](halide_profiler_func_stats *a, halide_profiler_func_stats *b) -> int64_t { + return (int64_t)b->time - (int64_t)a->time; + }; + } else if (!strcmp(sort_str, "name")) { + // Sort by ascending name + compare_fs_fn = [](halide_profiler_func_stats *a, halide_profiler_func_stats *b) -> int64_t { + return strcmp(a->name, b->name); + }; + } + } + for (halide_profiler_pipeline_stats *p = s->pipelines; p; p = (halide_profiler_pipeline_stats *)(p->next)) { float t = p->time / 1000000.0f; @@ -366,6 +383,9 @@ WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_st } if (print_f_states) { + int f_stats_count = 0; + halide_profiler_func_stats **f_stats = (halide_profiler_func_stats **)__builtin_alloca(p->num_funcs * sizeof(halide_profiler_func_stats *)); + int max_func_name_length = 0; for (int i = 0; i < p->num_funcs; i++) { halide_profiler_func_stats *fs = p->funcs + i; @@ -376,8 +396,6 @@ WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_st } for (int i = 0; i < p->num_funcs; i++) { - size_t cursor = 0; - sstr.clear(); halide_profiler_func_stats *fs = p->funcs + i; // The first func is always a catch-all overhead @@ -386,6 +404,25 @@ WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_st continue; } + f_stats[f_stats_count++] = fs; + } + + if (compare_fs_fn) { + for (int i = 1; i < f_stats_count; i++) { + for (int j = i; j > 0 && compare_fs_fn(f_stats[j - 1], f_stats[j]) > 0; j--) { + auto *a = f_stats[j - 1]; + auto *b = f_stats[j]; + f_stats[j - 1] = b; + f_stats[j] = a; + } + } + } + + for (int i = 0; i < f_stats_count; i++) { + size_t cursor = 0; + sstr.clear(); + halide_profiler_func_stats *fs = f_stats[i]; + sstr << " " << fs->name << ": "; cursor += max_func_name_length + 5; while (sstr.size() < cursor) {