Skip to content

Commit e3181b0

Browse files
committed
Auto merge of rust-lang#119912 - notriddle:notriddle/reexport-dedup, r=GuillaumeGomez
rustdoc-search: single result for items with multiple paths Part of rust-lang#15723 Preview: https://notriddle.com/rustdoc-html-demo-9/reexport-dup/std/index.html?search=hashmap This change uses the same "exact" paths as trait implementors and type alias inlining to track items with multiple reachable paths. This way, if you search for `vec`, you get only the `std` exports of it, and not the one from `alloc`. It still includes all the items in the search index so that you can search for them by all available paths. For example, try `core::option` and `std::option`, and notice that the results page doesn't show duplicates, but still shows all the items in their respective crates.
2 parents 3412f01 + 226c77d commit e3181b0

14 files changed

+330
-31
lines changed

src/librustdoc/formats/cache.rs

+12
Original file line numberDiff line numberDiff line change
@@ -346,16 +346,28 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> {
346346
{
347347
let desc =
348348
short_markdown_summary(&item.doc_value(), &item.link_names(self.cache));
349+
// For searching purposes, a re-export is a duplicate if:
350+
//
351+
// - It's either an inline, or a true re-export
352+
// - It's got the same name
353+
// - Both of them have the same exact path
354+
let defid = (match &*item.kind {
355+
&clean::ItemKind::ImportItem(ref import) => import.source.did,
356+
_ => None,
357+
})
358+
.or_else(|| item.item_id.as_def_id());
349359
// In case this is a field from a tuple struct, we don't add it into
350360
// the search index because its name is something like "0", which is
351361
// not useful for rustdoc search.
352362
self.cache.search_index.push(IndexItem {
353363
ty,
364+
defid,
354365
name: s,
355366
path: join_with_double_colon(path),
356367
desc,
357368
parent,
358369
parent_idx: None,
370+
exact_path: None,
359371
impl_id: if let Some(ParentStackItem::Impl { item_id, .. }) =
360372
self.cache.parent_stack.last()
361373
{

src/librustdoc/html/render/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,13 @@ pub(crate) enum RenderMode {
111111
#[derive(Debug)]
112112
pub(crate) struct IndexItem {
113113
pub(crate) ty: ItemType,
114+
pub(crate) defid: Option<DefId>,
114115
pub(crate) name: Symbol,
115116
pub(crate) path: String,
116117
pub(crate) desc: String,
117118
pub(crate) parent: Option<DefId>,
118119
pub(crate) parent_idx: Option<isize>,
120+
pub(crate) exact_path: Option<String>,
119121
pub(crate) impl_id: Option<DefId>,
120122
pub(crate) search_type: Option<IndexItemFunctionType>,
121123
pub(crate) aliases: Box<[Symbol]>,

src/librustdoc/html/render/search_index.rs

+138-16
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,13 @@ pub(crate) fn build_index<'tcx>(
5959
cache: &mut Cache,
6060
tcx: TyCtxt<'tcx>,
6161
) -> SerializedSearchIndex {
62+
// Maps from ID to position in the `crate_paths` array.
6263
let mut itemid_to_pathid = FxHashMap::default();
6364
let mut primitives = FxHashMap::default();
6465
let mut associated_types = FxHashMap::default();
65-
let mut crate_paths = vec![];
66+
67+
// item type, display path, re-exported internal path
68+
let mut crate_paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)> = vec![];
6669

6770
// Attach all orphan items to the type's definition if the type
6871
// has since been learned.
@@ -72,11 +75,13 @@ pub(crate) fn build_index<'tcx>(
7275
let desc = short_markdown_summary(&item.doc_value(), &item.link_names(cache));
7376
cache.search_index.push(IndexItem {
7477
ty: item.type_(),
78+
defid: item.item_id.as_def_id(),
7579
name: item.name.unwrap(),
7680
path: join_with_double_colon(&fqp[..fqp.len() - 1]),
7781
desc,
7882
parent: Some(parent),
7983
parent_idx: None,
84+
exact_path: None,
8085
impl_id,
8186
search_type: get_function_type_for_search(
8287
item,
@@ -126,17 +131,22 @@ pub(crate) fn build_index<'tcx>(
126131
map: &mut FxHashMap<F, isize>,
127132
itemid: F,
128133
lastpathid: &mut isize,
129-
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
134+
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
130135
item_type: ItemType,
131136
path: &[Symbol],
137+
exact_path: Option<&[Symbol]>,
132138
) -> RenderTypeId {
133139
match map.entry(itemid) {
134140
Entry::Occupied(entry) => RenderTypeId::Index(*entry.get()),
135141
Entry::Vacant(entry) => {
136142
let pathid = *lastpathid;
137143
entry.insert(pathid);
138144
*lastpathid += 1;
139-
crate_paths.push((item_type, path.to_vec()));
145+
crate_paths.push((
146+
item_type,
147+
path.to_vec(),
148+
exact_path.map(|path| path.to_vec()),
149+
));
140150
RenderTypeId::Index(pathid)
141151
}
142152
}
@@ -149,21 +159,32 @@ pub(crate) fn build_index<'tcx>(
149159
primitives: &mut FxHashMap<Symbol, isize>,
150160
associated_types: &mut FxHashMap<Symbol, isize>,
151161
lastpathid: &mut isize,
152-
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
162+
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
153163
) -> Option<RenderTypeId> {
154-
let Cache { ref paths, ref external_paths, .. } = *cache;
164+
let Cache { ref paths, ref external_paths, ref exact_paths, .. } = *cache;
155165
match id {
156166
RenderTypeId::DefId(defid) => {
157167
if let Some(&(ref fqp, item_type)) =
158168
paths.get(&defid).or_else(|| external_paths.get(&defid))
159169
{
170+
let exact_fqp = exact_paths
171+
.get(&defid)
172+
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
173+
// Re-exports only count if the name is exactly the same.
174+
// This is a size optimization, since it means we only need
175+
// to store the name once (and the path is re-used for everything
176+
// exported from this same module). It's also likely to Do
177+
// What I Mean, since if a re-export changes the name, it might
178+
// also be a change in semantic meaning.
179+
.filter(|fqp| fqp.last() == fqp.last());
160180
Some(insert_into_map(
161181
itemid_to_pathid,
162182
ItemId::DefId(defid),
163183
lastpathid,
164184
crate_paths,
165185
item_type,
166186
fqp,
187+
exact_fqp.map(|x| &x[..]).filter(|exact_fqp| exact_fqp != fqp),
167188
))
168189
} else {
169190
None
@@ -178,6 +199,7 @@ pub(crate) fn build_index<'tcx>(
178199
crate_paths,
179200
ItemType::Primitive,
180201
&[sym],
202+
None,
181203
))
182204
}
183205
RenderTypeId::Index(_) => Some(id),
@@ -188,6 +210,7 @@ pub(crate) fn build_index<'tcx>(
188210
crate_paths,
189211
ItemType::AssocType,
190212
&[sym],
213+
None,
191214
)),
192215
}
193216
}
@@ -199,7 +222,7 @@ pub(crate) fn build_index<'tcx>(
199222
primitives: &mut FxHashMap<Symbol, isize>,
200223
associated_types: &mut FxHashMap<Symbol, isize>,
201224
lastpathid: &mut isize,
202-
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
225+
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
203226
) {
204227
if let Some(generics) = &mut ty.generics {
205228
for item in generics {
@@ -296,7 +319,7 @@ pub(crate) fn build_index<'tcx>(
296319
}
297320
}
298321

299-
let Cache { ref paths, .. } = *cache;
322+
let Cache { ref paths, ref exact_paths, ref external_paths, .. } = *cache;
300323

301324
// Then, on parent modules
302325
let crate_items: Vec<&IndexItem> = search_index
@@ -311,14 +334,56 @@ pub(crate) fn build_index<'tcx>(
311334
lastpathid += 1;
312335

313336
if let Some(&(ref fqp, short)) = paths.get(&defid) {
314-
crate_paths.push((short, fqp.clone()));
337+
let exact_fqp = exact_paths
338+
.get(&defid)
339+
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
340+
.filter(|exact_fqp| {
341+
exact_fqp.last() == Some(&item.name) && *exact_fqp != fqp
342+
});
343+
crate_paths.push((short, fqp.clone(), exact_fqp.cloned()));
315344
Some(pathid)
316345
} else {
317346
None
318347
}
319348
}
320349
});
321350

351+
if let Some(defid) = item.defid
352+
&& item.parent_idx.is_none()
353+
{
354+
// If this is a re-export, retain the original path.
355+
// Associated items don't use this.
356+
// Their parent carries the exact fqp instead.
357+
let exact_fqp = exact_paths
358+
.get(&defid)
359+
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp));
360+
item.exact_path = exact_fqp.and_then(|fqp| {
361+
// Re-exports only count if the name is exactly the same.
362+
// This is a size optimization, since it means we only need
363+
// to store the name once (and the path is re-used for everything
364+
// exported from this same module). It's also likely to Do
365+
// What I Mean, since if a re-export changes the name, it might
366+
// also be a change in semantic meaning.
367+
if fqp.last() != Some(&item.name) {
368+
return None;
369+
}
370+
let path =
371+
if item.ty == ItemType::Macro && tcx.has_attr(defid, sym::macro_export) {
372+
// `#[macro_export]` always exports to the crate root.
373+
tcx.crate_name(defid.krate).to_string()
374+
} else {
375+
if fqp.len() < 2 {
376+
return None;
377+
}
378+
join_with_double_colon(&fqp[..fqp.len() - 1])
379+
};
380+
if path == item.path {
381+
return None;
382+
}
383+
Some(path)
384+
});
385+
}
386+
322387
// Omit the parent path if it is same to that of the prior item.
323388
if lastpath == &item.path {
324389
item.path.clear();
@@ -356,7 +421,7 @@ pub(crate) fn build_index<'tcx>(
356421

357422
struct CrateData<'a> {
358423
items: Vec<&'a IndexItem>,
359-
paths: Vec<(ItemType, Vec<Symbol>)>,
424+
paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
360425
// The String is alias name and the vec is the list of the elements with this alias.
361426
//
362427
// To be noted: the `usize` elements are indexes to `items`.
@@ -374,6 +439,7 @@ pub(crate) fn build_index<'tcx>(
374439
ty: ItemType,
375440
name: Symbol,
376441
path: Option<usize>,
442+
exact_path: Option<usize>,
377443
}
378444

379445
impl Serialize for Paths {
@@ -387,6 +453,10 @@ pub(crate) fn build_index<'tcx>(
387453
if let Some(ref path) = self.path {
388454
seq.serialize_element(path)?;
389455
}
456+
if let Some(ref path) = self.exact_path {
457+
assert!(self.path.is_some());
458+
seq.serialize_element(path)?;
459+
}
390460
seq.end()
391461
}
392462
}
@@ -409,43 +479,94 @@ pub(crate) fn build_index<'tcx>(
409479
mod_paths.insert(&item.path, index);
410480
}
411481
let mut paths = Vec::with_capacity(self.paths.len());
412-
for (ty, path) in &self.paths {
482+
for (ty, path, exact) in &self.paths {
413483
if path.len() < 2 {
414-
paths.push(Paths { ty: *ty, name: path[0], path: None });
484+
paths.push(Paths { ty: *ty, name: path[0], path: None, exact_path: None });
415485
continue;
416486
}
417487
let full_path = join_with_double_colon(&path[..path.len() - 1]);
488+
let full_exact_path = exact
489+
.as_ref()
490+
.filter(|exact| exact.last() == path.last() && exact.len() >= 2)
491+
.map(|exact| join_with_double_colon(&exact[..exact.len() - 1]));
492+
let exact_path = extra_paths.len() + self.items.len();
493+
let exact_path = full_exact_path.as_ref().map(|full_exact_path| match extra_paths
494+
.entry(full_exact_path.clone())
495+
{
496+
Entry::Occupied(entry) => *entry.get(),
497+
Entry::Vacant(entry) => {
498+
if let Some(index) = mod_paths.get(&full_exact_path) {
499+
return *index;
500+
}
501+
entry.insert(exact_path);
502+
if !revert_extra_paths.contains_key(&exact_path) {
503+
revert_extra_paths.insert(exact_path, full_exact_path.clone());
504+
}
505+
exact_path
506+
}
507+
});
418508
if let Some(index) = mod_paths.get(&full_path) {
419-
paths.push(Paths { ty: *ty, name: *path.last().unwrap(), path: Some(*index) });
509+
paths.push(Paths {
510+
ty: *ty,
511+
name: *path.last().unwrap(),
512+
path: Some(*index),
513+
exact_path,
514+
});
420515
continue;
421516
}
422517
// It means it comes from an external crate so the item and its path will be
423518
// stored into another array.
424519
//
425520
// `index` is put after the last `mod_paths`
426521
let index = extra_paths.len() + self.items.len();
427-
if !revert_extra_paths.contains_key(&index) {
428-
revert_extra_paths.insert(index, full_path.clone());
429-
}
430-
match extra_paths.entry(full_path) {
522+
match extra_paths.entry(full_path.clone()) {
431523
Entry::Occupied(entry) => {
432524
paths.push(Paths {
433525
ty: *ty,
434526
name: *path.last().unwrap(),
435527
path: Some(*entry.get()),
528+
exact_path,
436529
});
437530
}
438531
Entry::Vacant(entry) => {
439532
entry.insert(index);
533+
if !revert_extra_paths.contains_key(&index) {
534+
revert_extra_paths.insert(index, full_path);
535+
}
440536
paths.push(Paths {
441537
ty: *ty,
442538
name: *path.last().unwrap(),
443539
path: Some(index),
540+
exact_path,
444541
});
445542
}
446543
}
447544
}
448545

546+
// Direct exports use adjacent arrays for the current crate's items,
547+
// but re-exported exact paths don't.
548+
let mut re_exports = Vec::new();
549+
for (item_index, item) in self.items.iter().enumerate() {
550+
if let Some(exact_path) = item.exact_path.as_ref() {
551+
if let Some(path_index) = mod_paths.get(&exact_path) {
552+
re_exports.push((item_index, *path_index));
553+
} else {
554+
let path_index = extra_paths.len() + self.items.len();
555+
let path_index = match extra_paths.entry(exact_path.clone()) {
556+
Entry::Occupied(entry) => *entry.get(),
557+
Entry::Vacant(entry) => {
558+
entry.insert(path_index);
559+
if !revert_extra_paths.contains_key(&path_index) {
560+
revert_extra_paths.insert(path_index, exact_path.clone());
561+
}
562+
path_index
563+
}
564+
};
565+
re_exports.push((item_index, path_index));
566+
}
567+
}
568+
}
569+
449570
let mut names = Vec::with_capacity(self.items.len());
450571
let mut types = String::with_capacity(self.items.len());
451572
let mut full_paths = Vec::with_capacity(self.items.len());
@@ -501,6 +622,7 @@ pub(crate) fn build_index<'tcx>(
501622
crate_data.serialize_field("f", &functions)?;
502623
crate_data.serialize_field("D", &self.desc_index)?;
503624
crate_data.serialize_field("p", &paths)?;
625+
crate_data.serialize_field("r", &re_exports)?;
504626
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
505627
crate_data.serialize_field("c", &bitmap_to_string(&deprecated))?;
506628
crate_data.serialize_field("e", &bitmap_to_string(&self.empty_desc))?;

src/librustdoc/html/static/js/externs.js

+13-6
Original file line numberDiff line numberDiff line change
@@ -239,20 +239,27 @@ let FunctionType;
239239
* `doc` contains the description of the crate.
240240
*
241241
* `p` is a list of path/type pairs. It is used for parents and function parameters.
242+
* The first item is the type, the second is the name, the third is the visible path (if any) and
243+
* the fourth is the canonical path used for deduplication (if any).
244+
*
245+
* `r` is the canonical path used for deduplication of re-exported items.
246+
* It is not used for associated items like methods (that's the fourth element
247+
* of `p`) but is used for modules items like free functions.
242248
*
243249
* `c` is an array of item indices that are deprecated.
244250
* @typedef {{
245251
* doc: string,
246252
* a: Object,
247253
* n: Array<string>,
248-
* t: String,
254+
* t: string,
249255
* d: Array<string>,
250-
* q: Array<[Number, string]>,
251-
* i: Array<Number>,
256+
* q: Array<[number, string]>,
257+
* i: Array<number>,
252258
* f: string,
253-
* p: Array<Object>,
254-
* b: Array<[Number, String]>,
255-
* c: Array<Number>
259+
* p: Array<[number, string] | [number, string, number] | [number, string, number, number]>,
260+
* b: Array<[number, String]>,
261+
* c: Array<number>,
262+
* r: Array<[number, number]>,
256263
* }}
257264
*/
258265
let RawSearchIndexCrate;

0 commit comments

Comments
 (0)