From 315d6ade12efab4c5e6f5683a4ee0abf2cf9ca78 Mon Sep 17 00:00:00 2001
From: Jake Lishman <jake.lishman@ibm.com>
Date: Thu, 30 Jan 2025 14:54:35 +0000
Subject: [PATCH] Add merging of `Interner`s (#13752)

* Add merging of `Interner` instances

This makes it possible to merge one interner into another, using a
map-and-filter function to explain how the values should be considered
afterwards.  The main use-case for this is in DAG and circuit
substitution methods, where we want to be able to map
`PackedInstruction`s from a small circuit to new ones on a larger
circuit, without having to unwrap and re-hash the interned slices once
for instruction; instead, we do that once _per interner key_, and then
have a no-hash direct lookup table to remap the interner keys directly.

The methods here are designed to directly allow the reuse of the heap
allocations used to track the interner key mappings, to help reduce the
number of small-scale allocations done in substitution methods.

The filtering part of the mapping is because one cannot typically be
certain that an `Interner` contains the minimally necessary keys,
especially for DAGs; it can well be that substitutions and bit removals
cause old data to be present.

* Use new `Interner` merging in `DAGCircuit::from_circuit`

This converts one obvious candidate for the new interner-merging
functionality.  There are several others in the codebase (all uses of
`DAGCircuit::extend`, for example, and several other full rebuilds), but
the structure of their code wasn't designed with this in mind, so the
modifications would typically be far more involved and more suitable for
separate patches.

Using a timing script:

```python
        import itertools
        from qiskit.circuit import QuantumCircuit
from qiskit.converters import circuit_to_dag

qc = QuantumCircuit(100, 100)
for pair in itertools.permutations(qc.qubits, 2):
    qc.rz(0, pair[0])
    qc.sx(pair[0])
    qc.rz(0, pair[0])
    qc.rz(0, pair[1])
    qc.sx(pair[1])
    qc.rz(0, pair[1])
    qc.cx(*pair)

%timeit circuit_to_dag(qc, copy_operations=False)
```

this patch showed an improvement from 18.5(6)ms to 14.4(5)ms on my
machine, which is a 22% speedup, though admittedly this particular
function was doing larger-scale allocation work that could be removed
than other candidate replacements are.

* Fix typo

Co-authored-by: Kevin Hartman <kevin@hart.mn>

* Fix typos

Co-authored-by: Raynel Sanchez <87539502+raynelfss@users.noreply.github.com>

---------

Co-authored-by: Kevin Hartman <kevin@hart.mn>
Co-authored-by: Raynel Sanchez <87539502+raynelfss@users.noreply.github.com>
---
 crates/circuit/Cargo.toml         |   2 +-
 crates/circuit/src/dag_circuit.rs | 160 +++++++---
 crates/circuit/src/interner.rs    | 501 ++++++++++++++++++++++++++++--
 3 files changed, 593 insertions(+), 70 deletions(-)

diff --git a/crates/circuit/Cargo.toml b/crates/circuit/Cargo.toml
index 457d64108254..373295408be1 100644
--- a/crates/circuit/Cargo.toml
+++ b/crates/circuit/Cargo.toml
@@ -38,7 +38,7 @@ features = ["rayon"]
 
 [dependencies.smallvec]
 workspace = true
-features = ["union"]
+features = ["union", "const_generics"]
 
 [features]
 cache_pygates = []
diff --git a/crates/circuit/src/dag_circuit.rs b/crates/circuit/src/dag_circuit.rs
index ca1a9f59aa1d..d891dfe2af12 100644
--- a/crates/circuit/src/dag_circuit.rs
+++ b/crates/circuit/src/dag_circuit.rs
@@ -25,7 +25,7 @@ use crate::dag_node::{DAGInNode, DAGNode, DAGOpNode, DAGOutNode};
 use crate::dot_utils::build_dot;
 use crate::error::DAGCircuitError;
 use crate::imports;
-use crate::interner::{Interned, Interner};
+use crate::interner::{Interned, InternedMap, Interner};
 use crate::operations::{Operation, OperationRef, Param, PyInstruction, StandardGate};
 use crate::packed_instruction::{PackedInstruction, PackedOperation};
 use crate::rustworkx_core_vnext::isomorphism;
@@ -4810,6 +4810,76 @@ impl DAGCircuit {
         &self.vars
     }
 
+    /// Merge the `qargs` in a different [Interner] into this DAG, remapping the qubits.
+    ///
+    /// This is useful for simplifying the direct mapping of [PackedInstruction]s from one DAG to
+    /// another, like in substitution methods, or rebuilding a new DAG out of a lot of smaller ones.
+    /// See [Interner::merge_map_slice] for more information on the mapping function.
+    ///
+    /// The input [InternedMap] is cleared of its previous entries by this method, and then we
+    /// re-use the allocation.
+    pub fn merge_qargs_using(
+        &mut self,
+        other: &Interner<[Qubit]>,
+        map_fn: impl FnMut(&Qubit) -> Option<Qubit>,
+        map: &mut InternedMap<[Qubit]>,
+    ) {
+        // 4 is an arbitrary guess for the amount of stack space to allocate for mapping the
+        // `qargs`, but it doesn't matter if it's too short because it'll safely spill to the heap.
+        self.qargs_interner
+            .merge_map_slice_using::<4>(other, map_fn, map);
+    }
+
+    /// Merge the `qargs` in a different [Interner] into this DAG, remapping the qubits.
+    ///
+    /// This is useful for simplifying the direct mapping of [PackedInstruction]s from one DAG to
+    /// another, like in substitution methods, or rebuilding a new DAG out of a lot of smaller ones.
+    /// See [Interner::merge_map_slice] for more information on the mapping function.
+    pub fn merge_qargs(
+        &mut self,
+        other: &Interner<[Qubit]>,
+        map_fn: impl FnMut(&Qubit) -> Option<Qubit>,
+    ) -> InternedMap<[Qubit]> {
+        let mut out = InternedMap::new();
+        self.merge_qargs_using(other, map_fn, &mut out);
+        out
+    }
+
+    /// Merge the `cargs` in a different [Interner] into this DAG, remapping the clbits.
+    ///
+    /// This is useful for simplifying the direct mapping of [PackedInstruction]s from one DAG to
+    /// another, like in substitution methods, or rebuilding a new DAG out of a lot of smaller ones.
+    /// See [Interner::merge_map_slice] for more information on the mapping function.
+    ///
+    /// The input [InternedMap] is cleared of its previous entries by this method, and then we
+    /// re-use the allocation.
+    pub fn merge_cargs_using(
+        &mut self,
+        other: &Interner<[Clbit]>,
+        map_fn: impl FnMut(&Clbit) -> Option<Clbit>,
+        map: &mut InternedMap<[Clbit]>,
+    ) {
+        // 4 is an arbitrary guess for the amount of stack space to allocate for mapping the
+        // `cargs`, but it doesn't matter if it's too short because it'll safely spill to the heap.
+        self.cargs_interner
+            .merge_map_slice_using::<4>(other, map_fn, map);
+    }
+
+    /// Merge the `cargs` in a different [Interner] into this DAG, remapping the clbits.
+    ///
+    /// This is useful for simplifying the direct mapping of [PackedInstruction]s from one DAG to
+    /// another, like in substitution methods, or rebuilding a new DAG out of a lot of smaller ones.
+    /// See [Interner::merge_map_slice] for more information on the mapping function.
+    pub fn merge_cargs(
+        &mut self,
+        other: &Interner<[Clbit]>,
+        map_fn: impl FnMut(&Clbit) -> Option<Clbit>,
+    ) -> InternedMap<[Clbit]> {
+        let mut out = InternedMap::new();
+        self.merge_cargs_using(other, map_fn, &mut out);
+        out
+    }
+
     /// Return an iterator of gate runs with non-conditional op nodes of given names
     pub fn collect_runs(
         &self,
@@ -6383,10 +6453,24 @@ impl DAGCircuit {
         &self.op_names
     }
 
-    /// Extends the DAG with valid instances of [PackedInstruction]
+    /// Extends the DAG with valid instances of [PackedInstruction].
     pub fn extend<I>(&mut self, py: Python, iter: I) -> PyResult<Vec<NodeIndex>>
     where
         I: IntoIterator<Item = PackedInstruction>,
+    {
+        self.try_extend(
+            py,
+            iter.into_iter()
+                .map(|inst| -> Result<PackedInstruction, Infallible> { Ok(inst) }),
+        )
+    }
+
+    /// Extends the DAG with valid instances of [PackedInstruction], where the iterator produces the
+    /// results in a fallible manner.
+    pub fn try_extend<I, E>(&mut self, py: Python, iter: I) -> PyResult<Vec<NodeIndex>>
+    where
+        I: IntoIterator<Item = Result<PackedInstruction, E>>,
+        PyErr: From<E>,
     {
         // Create HashSets to keep track of each bit/var's last node
         let mut qubit_last_nodes: HashMap<Qubit, NodeIndex> = HashMap::default();
@@ -6400,6 +6484,7 @@ impl DAGCircuit {
         // Store new nodes to return
         let mut new_nodes = Vec::with_capacity(iter.size_hint().1.unwrap_or_default());
         for instr in iter {
+            let instr = instr?;
             let op_name = instr.op.name();
             let (all_cbits, vars): (Vec<Clbit>, Option<Vec<PyObject>>) = {
                 if self.may_have_additional_wires(py, &instr) {
@@ -6571,8 +6656,8 @@ impl DAGCircuit {
 
         new_dag.metadata = qc.metadata.map(|meta| meta.unbind());
 
-        // Add the qubits depending on order.
-        let qubit_map: Option<Vec<Qubit>> = if let Some(qubit_ordering) = qubit_order {
+        // Add the qubits depending on order, and produce the qargs map.
+        let qarg_map = if let Some(qubit_ordering) = qubit_order {
             let mut ordered_vec = Vec::from_iter((0..num_qubits as u32).map(Qubit));
             qubit_ordering
                 .into_iter()
@@ -6587,7 +6672,11 @@ impl DAGCircuit {
                     ordered_vec[qubit_index.index()] = new_dag.add_qubit_unchecked(py, &qubit)?;
                     Ok(())
                 })?;
-            Some(ordered_vec)
+            // The `Vec::get` use is because an arbitrary interner might contain old references to
+            // bit instances beyond `num_qubits`, such as if it's from a DAG that had wires removed.
+            new_dag.merge_qargs(qc_data.qargs_interner(), |bit| {
+                ordered_vec.get(bit.index()).copied()
+            })
         } else {
             qc_data
                 .qubits()
@@ -6597,11 +6686,11 @@ impl DAGCircuit {
                     new_dag.add_qubit_unchecked(py, qubit.bind(py))?;
                     Ok(())
                 })?;
-            None
+            new_dag.merge_qargs(qc_data.qargs_interner(), |bit| Some(*bit))
         };
 
-        // Add the clbits depending on order.
-        let clbit_map: Option<Vec<Clbit>> = if let Some(clbit_ordering) = clbit_order {
+        // Add the clbits depending on order, and produce the cargs map.
+        let carg_map = if let Some(clbit_ordering) = clbit_order {
             let mut ordered_vec = Vec::from_iter((0..num_clbits as u32).map(Clbit));
             clbit_ordering
                 .into_iter()
@@ -6616,7 +6705,11 @@ impl DAGCircuit {
                     ordered_vec[clbit_index.index()] = new_dag.add_clbit_unchecked(py, &clbit)?;
                     Ok(())
                 })?;
-            Some(ordered_vec)
+            // The `Vec::get` use is because an arbitrary interner might contain old references to
+            // bit instances beyond `num_clbits`, such as if it's from a DAG that had wires removed.
+            new_dag.merge_cargs(qc_data.cargs_interner(), |bit| {
+                ordered_vec.get(bit.index()).copied()
+            })
         } else {
             qc_data
                 .clbits()
@@ -6626,7 +6719,7 @@ impl DAGCircuit {
                     new_dag.add_clbit_unchecked(py, clbit.bind(py))?;
                     Ok(())
                 })?;
-            None
+            new_dag.merge_cargs(qc_data.cargs_interner(), |bit| Some(*bit))
         };
 
         // Add all of the new vars.
@@ -6655,57 +6748,24 @@ impl DAGCircuit {
             }
         }
 
-        // Pre-process and re-intern all indices again.
-        let instructions: Vec<PackedInstruction> = qc_data
-            .iter()
-            .map(|instr| -> PyResult<PackedInstruction> {
-                // Re-map the qubits
-                let new_qargs = if let Some(qubit_mapping) = &qubit_map {
-                    let qargs = qc_data
-                        .get_qargs(instr.qubits)
-                        .iter()
-                        .map(|bit| qubit_mapping[bit.index()])
-                        .collect();
-                    new_dag.qargs_interner.insert_owned(qargs)
-                } else {
-                    new_dag
-                        .qargs_interner
-                        .insert(qc_data.get_qargs(instr.qubits))
-                };
-                // Remap the clbits
-                let new_cargs = if let Some(clbit_mapping) = &clbit_map {
-                    let qargs = qc_data
-                        .get_cargs(instr.clbits)
-                        .iter()
-                        .map(|bit| clbit_mapping[bit.index()])
-                        .collect();
-                    new_dag.cargs_interner.insert_owned(qargs)
-                } else {
-                    new_dag
-                        .cargs_interner
-                        .insert(qc_data.get_cargs(instr.clbits))
-                };
-                // Copy the operations
-
+        new_dag.try_extend(
+            py,
+            qc_data.iter().map(|instr| -> PyResult<PackedInstruction> {
                 Ok(PackedInstruction {
                     op: if copy_op {
                         instr.op.py_deepcopy(py, None)?
                     } else {
                         instr.op.clone()
                     },
-                    qubits: new_qargs,
-                    clbits: new_cargs,
+                    qubits: qarg_map[instr.qubits],
+                    clbits: carg_map[instr.clbits],
                     params: instr.params.clone(),
                     extra_attrs: instr.extra_attrs.clone(),
                     #[cfg(feature = "cache_pygates")]
                     py_op: OnceLock::new(),
                 })
-            })
-            .collect::<PyResult<Vec<_>>>()?;
-
-        // Finally add all the instructions back
-        new_dag.extend(py, instructions)?;
-
+            }),
+        )?;
         Ok(new_dag)
     }
 
diff --git a/crates/circuit/src/interner.rs b/crates/circuit/src/interner.rs
index b77ecb51fa98..17065c5c92e1 100644
--- a/crates/circuit/src/interner.rs
+++ b/crates/circuit/src/interner.rs
@@ -10,18 +10,19 @@
 // copyright notice, and modified files need to carry a notice indicating
 // that they have been altered from the originals.
 
-use std::borrow::Borrow;
+use std::borrow::{Borrow, Cow};
 use std::fmt;
 use std::hash::Hash;
 use std::marker::PhantomData;
 
 use indexmap::IndexSet;
+use smallvec::SmallVec;
 
 /// A key to retrieve a value (by reference) from an interner of the same type.  This is narrower
 /// than a true reference, at the cost that it is explicitly not lifetime bound to the interner it
 /// came from; it is up to the user to ensure that they never attempt to query an interner with a
 /// key from a different interner.
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Eq, PartialEq, Hash)]
 pub struct Interned<T: ?Sized> {
     index: u32,
     // Storing the type of the interned value adds a small amount more type safety to the interner
@@ -43,6 +44,128 @@ impl<T: ?Sized> Copy for Interned<T> {}
 unsafe impl<T: ?Sized> Send for Interned<T> {}
 unsafe impl<T: ?Sized> Sync for Interned<T> {}
 
+/// A map of the indices from one interner to another.
+///
+/// This is created by the interner-merging functions like [Interner::merge_map] and
+/// [Interner::merge_slice_map].
+///
+/// This map can be indexed by the [Interned] keys of the smaller [Interner], and returns [Interned]
+/// keys that work on the larger [Interner] (the one that expanded itself).
+///
+/// The indexing implementation panics if asked for the new key for an object that was filtered out
+/// during the merge.
+#[derive(Clone, Debug, Default)]
+pub struct InternedMap<S: ?Sized, T: ?Sized = S> {
+    // We can use [Vec] here, because [Interner] keys are guaranteed to be consecutive integers
+    // counting from zero; it's effectively how an [Interner] does lookups from [Interned] already.
+    // The [Option] is to support filtering in the map; we don't use a hash-map because we expect
+    // filtering to only infrequently remove values.
+    map: Vec<Option<Interned<T>>>,
+    // We're pretending that we're a mapping type from [Interned<S>] to [Interned<T>].
+    _other: PhantomData<Interned<S>>,
+}
+impl<S: ?Sized, T: ?Sized> InternedMap<S, T> {
+    /// Create a new empty [InternedMap].
+    ///
+    /// You can use this as a persistent allocation for repeated calls to [Interner::merge_map] or
+    /// related functions.
+    pub fn new() -> Self {
+        Self::with_capacity(0)
+    }
+
+    /// Create a new empty [InternedMap] with pre-allocated capacity.
+    ///
+    /// You can use this as a persistent allocation for repeated calls to [Interner::merge_map] or
+    /// related functions.
+    pub fn with_capacity(cap: usize) -> Self {
+        Self {
+            map: Vec::with_capacity(cap),
+            _other: PhantomData,
+        }
+    }
+
+    /// An iterator over the pairs of values in the map.
+    ///
+    /// The first item of the tuple is the keys that can be used to index the map, the second is the
+    /// result from mapping that key.
+    pub fn iter(&self) -> impl Iterator<Item = (Interned<S>, Interned<T>)> + '_ {
+        self.map.iter().enumerate().filter_map(|(key, value)| {
+            value.map(|value| {
+                (
+                    Interned {
+                        index: key as u32,
+                        _type: PhantomData,
+                    },
+                    value,
+                )
+            })
+        })
+    }
+}
+impl<S: ?Sized, T: ?Sized> ::std::ops::Index<Interned<S>> for InternedMap<S, T> {
+    type Output = Interned<T>;
+
+    fn index(&self, index: Interned<S>) -> &Self::Output {
+        // We could write a fallable [Interner::get] for handling filtered keys safely, but I
+        // couldn't imagine a use-case for that.
+        self.map[index.index as usize]
+            .as_ref()
+            .expect("lookup keys should not have been filtered out")
+    }
+}
+impl<S: ?Sized, T: ?Sized> IntoIterator for InternedMap<S, T> {
+    type Item = <interned_map::IntoIter<S, T> as Iterator>::Item;
+    type IntoIter = interned_map::IntoIter<S, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        Self::IntoIter::from(self)
+    }
+}
+// Private namespace to hide the types of the iterator in.
+mod interned_map {
+    use super::*;
+    use std::{iter, vec};
+
+    pub struct IntoIter<S: ?Sized, T: ?Sized = S> {
+        // This ugly type is to try and re-use as much of the built-in [Iterator]-adaptor structure
+        // as possible.  We have to stop when we encounter what would be a [FilterMap] because we
+        // can't name the type of the mapping function.
+        iter: iter::Enumerate<vec::IntoIter<Option<Interned<T>>>>,
+        _type: PhantomData<S>,
+    }
+    impl<S: ?Sized, T: ?Sized> From<InternedMap<S, T>> for IntoIter<S, T> {
+        fn from(val: InternedMap<S, T>) -> Self {
+            Self {
+                iter: val.map.into_iter().enumerate(),
+                _type: PhantomData,
+            }
+        }
+    }
+    impl<S: ?Sized, T: ?Sized> Iterator for IntoIter<S, T> {
+        type Item = (Interned<S>, Interned<T>);
+        fn next(&mut self) -> Option<Self::Item> {
+            for (key, value) in self.iter.by_ref() {
+                let Some(value) = value else {
+                    continue;
+                };
+                return Some((
+                    Interned {
+                        index: key as u32,
+                        _type: PhantomData,
+                    },
+                    value,
+                ));
+            }
+            None
+        }
+        fn size_hint(&self) -> (usize, Option<usize>) {
+            self.iter.size_hint()
+        }
+    }
+    impl<S: ?Sized, T: ?Sized> ExactSizeIterator for IntoIter<S, T> {}
+    impl<S: ?Sized, T: ?Sized> iter::FusedIterator for IntoIter<S, T> {}
+}
+
 /// An append-only data structure for interning generic Rust types.
 ///
 /// The interner can lookup keys using a reference type, and will create the corresponding owned
@@ -95,6 +218,29 @@ where
         f.debug_tuple("Interner").field(&self.0).finish()
     }
 }
+// We can choose either [FromIterator<T>] or `FromIterator<<T as ToOwned>::Owned>` as the
+// implementation for [Interner<T>], but we can't have both, because the blanket implementation of
+// [ToOwned] for `T: Clone` would cause overlap.  If somebody's constructing a new [Interner] from
+// an iterator, chances are that they've either already got owned values, or there aren't going to
+// be too many duplicates.
+impl<T> ::std::iter::FromIterator<<T as ToOwned>::Owned> for Interner<T>
+where
+    T: ?Sized + ToOwned,
+    <T as ToOwned>::Owned: Hash + Eq + Default,
+{
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = <T as ToOwned>::Owned>,
+    {
+        let iter = iter.into_iter();
+        let (min, _) = iter.size_hint();
+        let mut out = Self::with_capacity(min + 1);
+        for x in iter {
+            out.insert_owned(x);
+        }
+        out
+    }
+}
 
 impl<T> Interner<T>
 where
@@ -138,7 +284,6 @@ where
 impl<T> Interner<T>
 where
     T: ?Sized + ToOwned,
-    <T as ToOwned>::Owned: Hash + Eq,
 {
     /// Retrieve a reference to the stored value for this key.
     pub fn get(&self, index: Interned<T>) -> &T {
@@ -150,6 +295,75 @@ where
             .borrow()
     }
 
+    /// The number of entries stored in the interner.
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    /// Whether there are zero stored keys.
+    ///
+    /// This is always false, because we always contain a default key, but clippy complains if we
+    /// don't have it.
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    /// An iterator over the [Interned] keys.
+    pub fn keys(&self) -> impl ExactSizeIterator<Item = Interned<T>> + '_ {
+        (0..self.len() as u32).map(|index| Interned {
+            index,
+            _type: PhantomData,
+        })
+    }
+
+    /// An iterator over the stored values.
+    pub fn values(&self) -> impl ExactSizeIterator<Item = &'_ T> + '_ {
+        self.0.iter().map(|x| x.borrow())
+    }
+
+    /// An iterator over pairs of the [Interned] keys and their associated values.
+    pub fn items(&self) -> impl ExactSizeIterator<Item = (Interned<T>, &'_ T)> + '_ {
+        self.0.iter().enumerate().map(|(i, v)| {
+            (
+                Interned {
+                    index: i as u32,
+                    _type: PhantomData,
+                },
+                v.borrow(),
+            )
+        })
+    }
+}
+
+impl<T> Interner<T>
+where
+    T: ?Sized + ToOwned + Hash + Eq,
+{
+    /// Get the [Interned] key corresponding to the given borrowed example, if it has already been
+    /// stored.
+    ///
+    /// This method does not store `value` if it is not present.
+    pub fn try_key(&self, value: &T) -> Option<Interned<T>> {
+        self.0.get_index_of(value).map(|index| Interned {
+            index: index as u32,
+            _type: PhantomData,
+        })
+    }
+
+    /// Return whether this value is already in the [Interner].
+    ///
+    /// Typically you want to use [try_key], which returns the key if present, or [insert], which
+    /// stores the value if it wasn't already present.
+    pub fn contains(&self, value: &T) -> bool {
+        self.try_key(value).is_some()
+    }
+}
+
+impl<T> Interner<T>
+where
+    T: ?Sized + ToOwned,
+    <T as ToOwned>::Owned: Hash + Eq,
+{
     /// Internal worker function that inserts an owned value assuming that the value didn't
     /// previously exist in the map.
     fn insert_new(&mut self, value: <T as ToOwned>::Owned) -> u32 {
@@ -162,15 +376,34 @@ where
         index as u32
     }
 
+    /// Get an interner key corresponding to the given owned type.  If not already stored, the value
+    /// will be used as the key, otherwise it will be dropped.
+    ///
+    /// If you don't already have the owned value, use `insert`; this will only allocate if the
+    /// lookup fails.
+    pub fn insert_owned(&mut self, value: <T as ToOwned>::Owned) -> Interned<T> {
+        let index = match self.0.get_index_of(&value) {
+            Some(index) => index as u32,
+            None => self.insert_new(value),
+        };
+        Interned {
+            index,
+            _type: PhantomData,
+        }
+    }
+}
+
+impl<T> Interner<T>
+where
+    T: ?Sized + ToOwned + Hash + Eq,
+    <T as ToOwned>::Owned: Hash + Eq,
+{
     /// Get an interner key corresponding to the given referenced type.  If not already stored, this
     /// function will allocate a new owned value to use as the storage.
     ///
     /// If you already have an owned value, use `insert_owned`, but in general this function will be
     /// more efficient *unless* you already had the value for other reasons.
-    pub fn insert(&mut self, value: &T) -> Interned<T>
-    where
-        T: Hash + Eq,
-    {
+    pub fn insert(&mut self, value: &T) -> Interned<T> {
         let index = match self.0.get_index_of(value) {
             Some(index) => index as u32,
             None => self.insert_new(value.to_owned()),
@@ -181,26 +414,138 @@ where
         }
     }
 
-    /// Get an interner key corresponding to the given owned type.  If not already stored, the value
-    /// will be used as the key, otherwise it will be dropped.
+    /// Get an interner key corresponding to the given [Cow].
     ///
-    /// If you don't already have the owned value, use `insert`; this will only allocate if the
-    /// lookup fails.
-    pub fn insert_owned(&mut self, value: <T as ToOwned>::Owned) -> Interned<T> {
-        let index = match self.0.get_index_of(&value) {
-            Some(index) => index as u32,
-            None => self.insert_new(value),
-        };
-        Interned {
-            index,
-            _type: PhantomData,
+    /// If not already stored, the value will be used as the key, cloning if required.  If it is
+    /// stored, the value is dropped.
+    #[inline]
+    pub fn insert_cow(&mut self, value: Cow<T>) -> Interned<T> {
+        match value {
+            Cow::Borrowed(value) => self.insert(value),
+            Cow::Owned(value) => self.insert_owned(value),
+        }
+    }
+
+    /// Merge another interner into this one, re-using the output storage for the key mapping.
+    ///
+    /// The output mapping converts [Interned] indices from `other` to their new representations in
+    /// `self`.  Strictly, the interners can be for different types, but in practice it likely makes
+    /// most sense for them to be the same.
+    pub fn merge_map_using<S>(
+        &mut self,
+        other: &Interner<S>,
+        mut map_fn: impl FnMut(&S) -> Option<Cow<T>>,
+        target: &mut InternedMap<S, T>,
+    ) where
+        S: ?Sized + ToOwned + Hash + Eq,
+    {
+        target.map.clear();
+        target.map.reserve(other.0.len());
+        for key in other.0.iter() {
+            target
+                .map
+                .push(map_fn(key.borrow()).map(|cow| self.insert_cow(cow)));
+        }
+    }
+
+    /// Merge another interner into this one.
+    ///
+    /// The output mapping converts [Interned] indices from `other` to their new representations in
+    /// `self`.  Strictly, the interners can be for different types, but in practice it likely makes
+    /// most sense for them to be the same.
+    pub fn merge_map<S>(
+        &mut self,
+        other: &Interner<S>,
+        map_fn: impl FnMut(&S) -> Option<Cow<T>>,
+    ) -> InternedMap<S, T>
+    where
+        S: ?Sized + ToOwned + Hash + Eq,
+    {
+        let mut out = InternedMap::new();
+        self.merge_map_using(other, map_fn, &mut out);
+        out
+    }
+}
+
+impl<T> Interner<[T]>
+where
+    T: Hash + Eq + Clone,
+{
+    /// Merge another interner into this one, re-using the output storage for the key mapping.
+    ///
+    /// The mapping function is for scalar elements of the slice, as opposed to in [merge_map] where
+    /// it is for the entire key at once.  This function makes it easier to avoid allocations when
+    /// mapping slice-based conversions (though if `T` is not [Copy] and you're expecting there to
+    /// be a lot of true insertions during the merge, there is a potential clone inefficiency).
+    ///
+    /// If the `scalar_map_fn` returns `None` for any element of a slice, that entire slice is
+    /// filtered out from the merge.  The subsequent [InternedMap] will panic if the corresponding
+    /// [Interned] key is used as a lookup.
+    pub fn merge_map_slice_using<const N: usize>(
+        &mut self,
+        // Actually, `other` could be [Interner<[S]>], but then you'd need to specify `S` whenever
+        // you want to set `N`, which is just an API annoyance since we'll probably never need the
+        // two interners to be different types.
+        other: &Self,
+        mut scalar_map_fn: impl FnMut(&T) -> Option<T>,
+        target: &mut InternedMap<[T]>,
+    ) {
+        // Workspace for the mapping function. The aim here is that we're working on the stack, so
+        // the mapping doesn't need to make heap allocations.  We could either guess (which the
+        // higher-level `merge_slice_map` does), or force the caller to tell us how much stack space
+        // to allocate.  This method is lower level, so in this case we ask them to tell us; if
+        // they're optimizing to the point of re-using the return allocations, they probably have a
+        // good idea about the maximum slice size of the interner they'll be merging in.
+        let mut work = SmallVec::<[T; N]>::with_capacity(N);
+        target.map.clear();
+        target.map.reserve(other.0.len());
+        for slice in other.0.iter() {
+            let new_slice = 'slice: {
+                work.clear();
+                work.reserve(slice.len());
+                for value in slice {
+                    let Some(scalar) = scalar_map_fn(value) else {
+                        break 'slice None;
+                    };
+                    work.push(scalar);
+                }
+                Some(work.as_slice())
+            };
+            target.map.push(new_slice.map(|slice| self.insert(slice)));
         }
     }
+
+    /// Merge another interner into this one.
+    ///
+    /// If you need to call this many times in a row, see [merge_map_slice_using] for a version that
+    /// can re-use the allocations of the output mapping.
+    ///
+    /// The mapping function is for scalar elements of the slice, as opposed to in [merge_map] where
+    /// it is for the entire key at once.  This function makes it easier to avoid allocations when
+    /// mapping slice-based conversions (though if `T` is not [Copy] and you're expecting there to
+    /// be a lot of true insertions during the merge, there is a potential clone inefficiency).
+    ///
+    /// If the `scalar_map_fn` returns `None` for any element of a slice, that entire slice is
+    /// filtered out from the merge.  The subsequent [InternedMap] will panic if the corresponding
+    /// [Interned] key is used as a lookup.
+    pub fn merge_map_slice(
+        &mut self,
+        other: &Self,
+        scalar_map_fn: impl FnMut(&T) -> Option<T>,
+    ) -> InternedMap<[T]> {
+        let mut out = InternedMap::new();
+        // We're specifying the stack space here.  This is just a guess, but it's not hugely
+        // important; we'll safely spill from the stack to the heap if needed, and this function is
+        // an API convenience at the cost of optimal allocation performance anyway.
+        self.merge_map_slice_using::<4>(other, scalar_map_fn, &mut out);
+        out
+    }
 }
 
 #[cfg(test)]
 mod test {
     use super::*;
+    use hashbrown::{HashMap, HashSet};
 
     #[test]
     fn default_key_exists() {
@@ -215,4 +560,122 @@ mod test {
         assert_eq!(capacity.get_default(), capacity.get_default());
         assert_eq!(capacity.get(capacity.get_default()), "");
     }
+
+    #[test]
+    fn can_merge_two_interners() {
+        let mut base = Interner::<str>::from_iter(["hello", "world"].map(String::from));
+        let other = Interner::<str>::from_iter(["a", "world", "b"].map(String::from));
+
+        fn to_hashmap<T: ?Sized + Hash + Eq + ToOwned>(
+            interner: &Interner<T>,
+        ) -> HashMap<Interned<T>, <T as ToOwned>::Owned> {
+            interner
+                .items()
+                .map(|(key, value)| (key, value.to_owned()))
+                .collect()
+        }
+
+        let initial = to_hashmap(&base);
+        // Sanity check that we start off with the values we expect.
+        let expected = ["", "hello", "world"]
+            .map(String::from)
+            .into_iter()
+            .collect::<HashSet<_>>();
+        assert_eq!(
+            expected,
+            HashSet::from_iter(base.values().map(String::from))
+        );
+
+        let other_items = to_hashmap(&other);
+        let other_map = base.merge_map(&other, |x| Some(x.into()));
+        // All of the keys from the previously stored values must be the same.
+        assert_eq!(
+            initial,
+            initial
+                .iter()
+                .map(|(key, value)| (base.try_key(value).unwrap(), base.get(*key).to_owned()))
+                .collect::<HashMap<_, _>>(),
+        );
+        // All of the keys from the merged-in map should now be present.
+        assert_eq!(
+            other_items,
+            other
+                .keys()
+                .map(|key| (key, base.get(other_map[key]).to_owned()))
+                .collect::<HashMap<_, _>>(),
+        );
+
+        // This interner is of a different type and will produce duplicate keys during the mapping.
+        let nums = Interner::<[u8]>::from_iter([vec![4], vec![1, 5], vec![2, 4], vec![3]]);
+        let map_fn = |x: &[u8]| x.iter().sum::<u8>().to_string();
+        let num_map = base.merge_map(&nums, |x| Some(map_fn(x).into()));
+        // All of the keys from the previously stored values must be the same.
+        assert_eq!(
+            initial,
+            initial
+                .iter()
+                .map(|(key, value)| (base.try_key(value).unwrap(), base.get(*key).to_owned()))
+                .collect::<HashMap<_, _>>(),
+        );
+        // All of the keys from the merged-in map should now be present.
+        assert_eq!(
+            nums.items()
+                .map(|(key, value)| (key, map_fn(value)))
+                .collect::<HashMap<_, _>>(),
+            nums.keys()
+                .map(|key| (key, base.get(num_map[key]).to_owned()))
+                .collect(),
+        );
+    }
+
+    #[test]
+    fn can_merge_two_sliced_interners() {
+        let mut map = InternedMap::<[u8]>::new();
+        let mut base = Interner::<[u8]>::from_iter([
+            vec![],
+            vec![0],
+            vec![1],
+            vec![2],
+            vec![0, 1],
+            vec![1, 2],
+        ]);
+        let only_2q = Interner::<[u8]>::from_iter([vec![0], vec![1], vec![0, 1]]);
+
+        // This is the identity map, so all the values should come out the same.
+        base.merge_map_slice_using::<2>(&only_2q, |x| Some(*x), &mut map);
+        let expected = [vec![], vec![0], vec![1], vec![0, 1]];
+        let (small, big): (Vec<_>, Vec<_>) = expected
+            .iter()
+            .map(|x| {
+                let key = only_2q.try_key(x).unwrap();
+                (only_2q.get(key).to_owned(), base.get(map[key]).to_owned())
+            })
+            .unzip();
+        assert_eq!(small, big);
+
+        // Map qubits [0, 1] to [2, 1].  This involves an insertion.
+        base.merge_map_slice_using::<2>(&only_2q, |x| [2u8, 1].get(*x as usize).copied(), &mut map);
+        let expected = HashSet::<(Vec<u8>, Vec<u8>)>::from([
+            (vec![], vec![]),
+            (vec![0], vec![2]),
+            (vec![1], vec![1]),
+            (vec![0, 1], vec![2, 1]),
+        ]);
+        let actual = map
+            .iter()
+            .map(|(small, big)| (only_2q.get(small).to_owned(), base.get(big).to_owned()))
+            .collect::<HashSet<_>>();
+        assert_eq!(expected, actual);
+        assert_eq!(&[2, 1], base.get(map[only_2q.try_key(&[0, 1]).unwrap()]));
+
+        // Map qubit [0] to [3], and drop things involving 1.
+        base.merge_map_slice_using::<2>(&only_2q, |x| [3u8].get(*x as usize).copied(), &mut map);
+        let expected = HashSet::<(Vec<u8>, Vec<u8>)>::from([(vec![], vec![]), (vec![0], vec![3])]);
+        // For the last test, we'll also use the `into_iter` method.
+        let actual = map
+            .into_iter()
+            .map(|(small, big)| (only_2q.get(small).to_owned(), base.get(big).to_owned()))
+            .collect::<HashSet<_>>();
+        assert_eq!(expected, actual);
+    }
 }