@@ -129,6 +129,11 @@ struct PlacedRootMonoItems<'tcx> {
129
129
codegen_units : Vec < CodegenUnit < ' tcx > > ,
130
130
131
131
internalization_candidates : FxHashSet < MonoItem < ' tcx > > ,
132
+
133
+ /// These must be obtained when the iterator in `partition` runs. They
134
+ /// can't be obtained later because some inlined functions might not be
135
+ /// reachable.
136
+ unique_inlined_stats : ( usize , usize ) ,
132
137
}
133
138
134
139
// The output CGUs are sorted by name.
@@ -147,7 +152,7 @@ where
147
152
// In the first step, we place all regular monomorphizations into their
148
153
// respective 'home' codegen unit. Regular monomorphizations are all
149
154
// functions and statics defined in the local crate.
150
- let PlacedRootMonoItems { mut codegen_units, internalization_candidates } = {
155
+ let PlacedRootMonoItems { mut codegen_units, internalization_candidates, unique_inlined_stats } = {
151
156
let _prof_timer = tcx. prof . generic_activity ( "cgu_partitioning_place_roots" ) ;
152
157
place_root_mono_items ( cx, mono_items)
153
158
} ;
@@ -156,15 +161,15 @@ where
156
161
cgu. create_size_estimate ( tcx) ;
157
162
}
158
163
159
- debug_dump ( tcx, "INITIAL PARTITIONING " , & codegen_units) ;
164
+ debug_dump ( tcx, "ROOTS " , & codegen_units, unique_inlined_stats ) ;
160
165
161
166
// Merge until we have at most `max_cgu_count` codegen units.
162
167
// `merge_codegen_units` is responsible for updating the CGU size
163
168
// estimates.
164
169
{
165
170
let _prof_timer = tcx. prof . generic_activity ( "cgu_partitioning_merge_cgus" ) ;
166
171
merge_codegen_units ( cx, & mut codegen_units) ;
167
- debug_dump ( tcx, "POST MERGING " , & codegen_units) ;
172
+ debug_dump ( tcx, "MERGE " , & codegen_units, unique_inlined_stats ) ;
168
173
}
169
174
170
175
// In the next step, we use the inlining map to determine which additional
@@ -180,7 +185,7 @@ where
180
185
cgu. create_size_estimate ( tcx) ;
181
186
}
182
187
183
- debug_dump ( tcx, "POST INLINING " , & codegen_units) ;
188
+ debug_dump ( tcx, "INLINE " , & codegen_units, unique_inlined_stats ) ;
184
189
185
190
// Next we try to make as many symbols "internal" as possible, so LLVM has
186
191
// more freedom to optimize.
@@ -224,7 +229,7 @@ where
224
229
// Ensure CGUs are sorted by name, so that we get deterministic results.
225
230
assert ! ( codegen_units. is_sorted_by( |a, b| Some ( a. name( ) . as_str( ) . cmp( b. name( ) . as_str( ) ) ) ) ) ;
226
231
227
- debug_dump ( tcx, "FINAL" , & codegen_units) ;
232
+ debug_dump ( tcx, "FINAL" , & codegen_units, unique_inlined_stats ) ;
228
233
229
234
codegen_units
230
235
}
@@ -250,10 +255,16 @@ where
250
255
let cgu_name_builder = & mut CodegenUnitNameBuilder :: new ( cx. tcx ) ;
251
256
let cgu_name_cache = & mut FxHashMap :: default ( ) ;
252
257
258
+ let mut num_unique_inlined_items = 0 ;
259
+ let mut unique_inlined_items_size = 0 ;
253
260
for mono_item in mono_items {
254
261
match mono_item. instantiation_mode ( cx. tcx ) {
255
262
InstantiationMode :: GloballyShared { .. } => { }
256
- InstantiationMode :: LocalCopy => continue ,
263
+ InstantiationMode :: LocalCopy => {
264
+ num_unique_inlined_items += 1 ;
265
+ unique_inlined_items_size += mono_item. size_estimate ( cx. tcx ) ;
266
+ continue ;
267
+ }
257
268
}
258
269
259
270
let characteristic_def_id = characteristic_def_id_of_mono_item ( cx. tcx , mono_item) ;
@@ -298,7 +309,11 @@ where
298
309
let mut codegen_units: Vec < _ > = codegen_units. into_values ( ) . collect ( ) ;
299
310
codegen_units. sort_by ( |a, b| a. name ( ) . as_str ( ) . cmp ( b. name ( ) . as_str ( ) ) ) ;
300
311
301
- PlacedRootMonoItems { codegen_units, internalization_candidates }
312
+ PlacedRootMonoItems {
313
+ codegen_units,
314
+ internalization_candidates,
315
+ unique_inlined_stats : ( num_unique_inlined_items, unique_inlined_items_size) ,
316
+ }
302
317
}
303
318
304
319
// This function requires the CGUs to be sorted by name on input, and ensures
@@ -812,31 +827,91 @@ fn default_visibility(tcx: TyCtxt<'_>, id: DefId, is_generic: bool) -> Visibilit
812
827
}
813
828
}
814
829
815
- fn debug_dump < ' a , ' tcx : ' a > ( tcx : TyCtxt < ' tcx > , label : & str , cgus : & [ CodegenUnit < ' tcx > ] ) {
830
+ fn debug_dump < ' a , ' tcx : ' a > (
831
+ tcx : TyCtxt < ' tcx > ,
832
+ label : & str ,
833
+ cgus : & [ CodegenUnit < ' tcx > ] ,
834
+ ( unique_inlined_items, unique_inlined_size) : ( usize , usize ) ,
835
+ ) {
816
836
let dump = move || {
817
837
use std:: fmt:: Write ;
818
838
819
- let num_cgus = cgus. len ( ) ;
820
- let num_items: usize = cgus. iter ( ) . map ( |cgu| cgu. items ( ) . len ( ) ) . sum ( ) ;
821
- let total_size: usize = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . sum ( ) ;
822
- let max_size = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . max ( ) . unwrap ( ) ;
823
- let min_size = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . min ( ) . unwrap ( ) ;
824
- let max_min_size_ratio = max_size as f64 / min_size as f64 ;
839
+ let mut num_cgus = 0 ;
840
+ let mut all_cgu_sizes = Vec :: new ( ) ;
841
+
842
+ // Note: every unique root item is placed exactly once, so the number
843
+ // of unique root items always equals the number of placed root items.
844
+
845
+ let mut root_items = 0 ;
846
+ // unique_inlined_items is passed in above.
847
+ let mut placed_inlined_items = 0 ;
848
+
849
+ let mut root_size = 0 ;
850
+ // unique_inlined_size is passed in above.
851
+ let mut placed_inlined_size = 0 ;
852
+
853
+ for cgu in cgus. iter ( ) {
854
+ num_cgus += 1 ;
855
+ all_cgu_sizes. push ( cgu. size_estimate ( ) ) ;
856
+
857
+ for ( item, _) in cgu. items ( ) {
858
+ match item. instantiation_mode ( tcx) {
859
+ InstantiationMode :: GloballyShared { .. } => {
860
+ root_items += 1 ;
861
+ root_size += item. size_estimate ( tcx) ;
862
+ }
863
+ InstantiationMode :: LocalCopy => {
864
+ placed_inlined_items += 1 ;
865
+ placed_inlined_size += item. size_estimate ( tcx) ;
866
+ }
867
+ }
868
+ }
869
+ }
870
+
871
+ all_cgu_sizes. sort_unstable_by_key ( |& n| cmp:: Reverse ( n) ) ;
872
+
873
+ let unique_items = root_items + unique_inlined_items;
874
+ let placed_items = root_items + placed_inlined_items;
875
+ let items_ratio = placed_items as f64 / unique_items as f64 ;
876
+
877
+ let unique_size = root_size + unique_inlined_size;
878
+ let placed_size = root_size + placed_inlined_size;
879
+ let size_ratio = placed_size as f64 / unique_size as f64 ;
880
+
881
+ let mean_cgu_size = placed_size as f64 / num_cgus as f64 ;
882
+
883
+ assert_eq ! ( placed_size, all_cgu_sizes. iter( ) . sum:: <usize >( ) ) ;
825
884
826
885
let s = & mut String :: new ( ) ;
886
+ let _ = writeln ! ( s, "{label}" ) ;
827
887
let _ = writeln ! (
828
888
s,
829
- "{label} ({num_items} items, total_size={total_size}; {num_cgus} CGUs, \
830
- max_size={max_size}, min_size={min_size}, max_size/min_size={max_min_size_ratio:.1}):"
889
+ "- unique items: {unique_items} ({root_items} root + {unique_inlined_items} inlined), \
890
+ unique size: {unique_size} ({root_size} root + {unique_inlined_size} inlined)\n \
891
+ - placed items: {placed_items} ({root_items} root + {placed_inlined_items} inlined), \
892
+ placed size: {placed_size} ({root_size} root + {placed_inlined_size} inlined)\n \
893
+ - placed/unique items ratio: {items_ratio:.2}, \
894
+ placed/unique size ratio: {size_ratio:.2}\n \
895
+ - CGUs: {num_cgus}, mean size: {mean_cgu_size:.1}, sizes: {}",
896
+ list( & all_cgu_sizes) ,
831
897
) ;
898
+ let _ = writeln ! ( s) ;
899
+
832
900
for ( i, cgu) in cgus. iter ( ) . enumerate ( ) {
901
+ let name = cgu. name ( ) ;
902
+ let size = cgu. size_estimate ( ) ;
833
903
let num_items = cgu. items ( ) . len ( ) ;
834
- let _ = writeln ! (
835
- s,
836
- "- CGU[{i}] {} ({num_items} items, size={}):" ,
837
- cgu. name( ) ,
838
- cgu. size_estimate( )
839
- ) ;
904
+ let mean_size = size as f64 / num_items as f64 ;
905
+
906
+ let mut placed_item_sizes: Vec < _ > =
907
+ cgu. items ( ) . iter ( ) . map ( |( item, _) | item. size_estimate ( tcx) ) . collect ( ) ;
908
+ placed_item_sizes. sort_unstable_by_key ( |& n| cmp:: Reverse ( n) ) ;
909
+ let sizes = list ( & placed_item_sizes) ;
910
+
911
+ let _ = writeln ! ( s, "- CGU[{i}]" ) ;
912
+ let _ = writeln ! ( s, " - {name}, size: {size}" ) ;
913
+ let _ =
914
+ writeln ! ( s, " - items: {num_items}, mean size: {mean_size:.1}, sizes: {sizes}" , ) ;
840
915
841
916
for ( item, linkage) in cgu. items_in_deterministic_order ( tcx) {
842
917
let symbol_name = item. symbol_name ( tcx) . name ;
@@ -852,7 +927,43 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
852
927
let _ = writeln ! ( s) ;
853
928
}
854
929
855
- std:: mem:: take ( s)
930
+ return std:: mem:: take ( s) ;
931
+
932
+ // Converts a slice to a string, capturing repetitions to save space.
933
+ // E.g. `[4, 4, 4, 3, 2, 1, 1, 1, 1, 1]` -> "[4 (x3), 3, 2, 1 (x5)]".
934
+ fn list ( ns : & [ usize ] ) -> String {
935
+ let mut v = Vec :: new ( ) ;
936
+ if ns. is_empty ( ) {
937
+ return "[]" . to_string ( ) ;
938
+ }
939
+
940
+ let mut elem = |curr, curr_count| {
941
+ if curr_count == 1 {
942
+ v. push ( format ! ( "{curr}" ) ) ;
943
+ } else {
944
+ v. push ( format ! ( "{curr} (x{curr_count})" ) ) ;
945
+ }
946
+ } ;
947
+
948
+ let mut curr = ns[ 0 ] ;
949
+ let mut curr_count = 1 ;
950
+
951
+ for & n in & ns[ 1 ..] {
952
+ if n != curr {
953
+ elem ( curr, curr_count) ;
954
+ curr = n;
955
+ curr_count = 1 ;
956
+ } else {
957
+ curr_count += 1 ;
958
+ }
959
+ }
960
+ elem ( curr, curr_count) ;
961
+
962
+ let mut s = "[" . to_string ( ) ;
963
+ s. push_str ( & v. join ( ", " ) ) ;
964
+ s. push_str ( "]" ) ;
965
+ s
966
+ }
856
967
} ;
857
968
858
969
debug ! ( "{}" , dump( ) ) ;
0 commit comments