From 14e027d8213c236a2ab03cc886c03572a94de883 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Thu, 10 Oct 2024 18:19:17 -0700
Subject: [PATCH 01/11] Create KaliskiModInverse

---
 qualtran/bloqs/mod_arithmetic/__init__.py     |   1 +
 qualtran/bloqs/mod_arithmetic/mod_division.py | 532 ++++++++++++++++++
 .../bloqs/mod_arithmetic/mod_division_test.py |  58 ++
 3 files changed, 591 insertions(+)
 create mode 100644 qualtran/bloqs/mod_arithmetic/mod_division.py
 create mode 100644 qualtran/bloqs/mod_arithmetic/mod_division_test.py

diff --git a/qualtran/bloqs/mod_arithmetic/__init__.py b/qualtran/bloqs/mod_arithmetic/__init__.py
index ff0d3a3da..6e8b08f32 100644
--- a/qualtran/bloqs/mod_arithmetic/__init__.py
+++ b/qualtran/bloqs/mod_arithmetic/__init__.py
@@ -16,3 +16,4 @@
 from .mod_addition import CModAdd, CModAddK, CtrlScaleModAdd, ModAdd, ModAddK
 from .mod_multiplication import CModMulK, DirtyOutOfPlaceMontgomeryModMul, ModDbl
 from .mod_subtraction import CModNeg, CModSub, ModNeg, ModSub
+from .mod_division import KaliskiModInverse
\ No newline at end of file
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
new file mode 100644
index 000000000..091f75ef9
--- /dev/null
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -0,0 +1,532 @@
+#  Copyright 2024 Google LLC
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from functools import cached_property
+from typing import Dict, Set, TYPE_CHECKING, Union, Optional
+
+import numpy as np
+import sympy
+from attrs import field, frozen
+
+from qualtran import (
+    Bloq,
+    bloq_example,
+    BloqBuilder,
+    BloqDocSpec,
+    QBit,
+    QAny,
+    QMontgomeryUInt,
+    Register,
+    Side,
+    Signature,
+    Soquet,
+    SoquetT,
+)
+from qualtran.bloqs.arithmetic.controlled_addition import CAdd
+from qualtran.bloqs.arithmetic.bitwise import BitwiseNot
+from qualtran.bloqs.arithmetic.addition import AddK
+from qualtran.bloqs.arithmetic.subtraction import Subtract
+from qualtran.bloqs.arithmetic.comparison import LinearDepthGreaterThan
+from qualtran.bloqs.basic_gates import CNOT, TwoBitCSwap, XGate
+from qualtran.bloqs.mcmt import And, MultiAnd
+from qualtran.bloqs.mod_arithmetic.mod_multiplication import ModDbl
+from qualtran.bloqs.swap_network import CSwapApprox
+from qualtran.resource_counting import BloqCountDictT, BloqCountT
+from qualtran.resource_counting._call_graph import SympySymbolAllocator
+from qualtran.symbolics import HasLength, is_symbolic
+
+if TYPE_CHECKING:
+    from qualtran.simulation.classical_sim import ClassicalValT
+    from qualtran.symbolics import SymbolicInt
+    from qualtran.resource_counting import BloqCountDictT
+
+@frozen
+class _KaliskiIterationStep1(Bloq):
+    bitsize: 'SymbolicInt'
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        return Signature(
+            [
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('m', QBit()),
+                Register('f', QBit()),
+            ]
+        )
+
+    def on_classical_vals(self, v: int, m: int, f: int) -> Dict[str, 'ClassicalValT']:
+        m ^= f & (v == 0)
+        f ^= m
+        return {'v': v, 'm': m, 'f': f}
+
+    def build_composite_bloq(
+        self, bb: 'BloqBuilder', v: Soquet, m: Soquet, f: Soquet
+    ) -> Dict[str, 'SoquetT']:
+        v_arr = bb.split(v)
+        ctrls = np.concatenate([v_arr, [f]])
+        ctrls, junk, target = bb.add(MultiAnd(cvs=[0] * self.bitsize + [1]), ctrl=ctrls)
+        target, m = bb.add(CNOT(), ctrl=target, target=m)
+        ctrls = bb.add(
+            MultiAnd(cvs=[0] * self.bitsize + [1]).adjoint(), ctrl=ctrls, junk=junk, target=target
+        )
+        v_arr = ctrls[:-1]
+        f = ctrls[-1]
+        v = bb.join(v_arr)
+        m, f = bb.add(CNOT(), ctrl=m, target=f)
+        return {'v': v, 'm': m, 'f': f}
+
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        if is_symbolic(self.bitsize):
+            cvs = HasLength(self.bitsize)
+        else:
+            cvs = [0] * self.bitsize
+        return {
+            MultiAnd(cvs=cvs):1,
+            MultiAnd(cvs=cvs).adjoint(): 1,
+            CNOT(): 2,
+        }
+@frozen
+class _KaliskiIterationStep2(Bloq):
+    bitsize: 'SymbolicInt'
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        return Signature(
+            [
+                Register('u', QMontgomeryUInt(self.bitsize)),
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('b', QBit()),
+                Register('a', QBit()),
+                Register('m', QBit()),
+                Register('f', QBit()),
+            ]
+        )
+
+    def on_classical_vals(
+        self, u: int, v: int, b: int, a: int, m: int, f: int
+    ) -> Dict[str, 'ClassicalValT']:
+        a ^= ((u & 1) == 0) & f
+        m ^= ((v & 1) == 0) & (a == 0) & f
+        b ^= a
+        b ^= m
+        return {'u': u, 'v': v, 'b': b, 'a': a, 'm': m, 'f': f}
+
+    def build_composite_bloq(
+        self, bb: 'BloqBuilder', u: Soquet, v: Soquet, b: Soquet, a: Soquet, m: Soquet, f: Soquet
+    ) -> Dict[str, 'SoquetT']:
+        u_arr = bb.split(u)
+        v_arr = bb.split(v)
+
+        (f, u_arr[-1]), c = bb.add(And(1, 0), ctrl=(f, u_arr[-1]))
+        c, a = bb.add(CNOT(), ctrl=c, target=a)
+        f, u_arr[-1] = bb.add(And(1, 0).adjoint(), ctrl=(f, u_arr[-1]), target=c)
+
+        (f, v_arr[-1], a), junk, c = bb.add(MultiAnd(cvs=(1, 0, 0)), ctrl=(f, v_arr[-1], a))
+        c, m = bb.add(CNOT(), ctrl=c, target=m)
+        f, v_arr[-1], a = bb.add(
+            MultiAnd(cvs=(1, 0, 0)).adjoint(), ctrl=(f, v_arr[-1], a), junk=junk, target=c
+        )
+
+        a, b = bb.add(CNOT(), ctrl=a, target=b)
+        m, b = bb.add(CNOT(), ctrl=m, target=b)
+        u = bb.join(u_arr)
+        v = bb.join(v_arr)
+        return {'u': u, 'v': v, 'b': b, 'a': a, 'm': m, 'f': f}
+
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        return {
+            And(1, 0): 1,
+            And(1, 0).adjoint(): 1,
+            CNOT(): 4,  
+            MultiAnd((1, 0, 0)): 1, 
+            MultiAnd((1, 0, 0)).adjoint(): 1, 
+        }
+
+@frozen
+class _KaliskiIterationStep3(Bloq):
+    bitsize: 'SymbolicInt'
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        return Signature(
+            [
+                Register('u', QMontgomeryUInt(self.bitsize)),
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('b', QBit()),
+                Register('a', QBit()),
+                Register('m', QBit()),
+                Register('f', QBit()),
+            ]
+        )
+
+    def on_classical_vals(
+        self, u: int, v: int, b: int, a: int, m: int, f: int
+    ) -> Dict[str, 'ClassicalValT']:
+        c = (u > v) & (b == 0) & f
+        a ^= c
+        m ^= c
+        return {'u': u, 'v': v, 'b': b, 'a': a, 'm': m, 'f': f}
+
+    def build_composite_bloq(self, bb: 'BloqBuilder', u: Soquet, v: Soquet, b: Soquet, a: Soquet, m: Soquet, f: Soquet) -> Dict[str, 'SoquetT']:
+        greater_than = bb.allocate(1)
+        u, v, greater_than = bb.add(LinearDepthGreaterThan(self.bitsize, signed=False), a=u, b=v, target=greater_than)
+
+        (greater_than, f, b), junk, ctrl = bb.add(MultiAnd(cvs=(1, 1, 0)), ctrl=(greater_than, f, b))
+        
+        ctrl, a = bb.add(CNOT(), ctrl=ctrl, target=a)
+        ctrl, m = bb.add(CNOT(), ctrl=ctrl, target=m)
+
+        greater_than, f, b = bb.add(MultiAnd(cvs=(1, 1, 0)).adjoint(), ctrl=(greater_than, f, b), junk=junk, target=ctrl)
+        u, v, greater_than = bb.add(LinearDepthGreaterThan(self.bitsize), a=u, b=v, target=greater_than)
+        bb.free(greater_than)
+        return {
+            'u': u,
+            'v': v,
+            'b': b,
+            'a': a,
+            'm': m,
+            'f': f,
+        }
+
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        return {
+            LinearDepthGreaterThan(self.bitsize, signed=False): 2,
+            MultiAnd((1, 1, 0)): 1,
+            MultiAnd((1, 1, 0)).adjoint(): 1,
+            CNOT(): 2,
+        }
+
+@frozen
+class _KaliskiIterationStep4(Bloq):
+    bitsize: 'SymbolicInt'
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        return Signature(
+            [
+                Register('u', QMontgomeryUInt(self.bitsize)),
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('r', QMontgomeryUInt(self.bitsize)),
+                Register('s', QMontgomeryUInt(self.bitsize)),
+                Register('a', QBit()),
+            ]
+        )
+
+    def on_classical_vals(
+        self, u: int, v: int, r: int, s: int, a: int
+    ) -> Dict[str, 'ClassicalValT']:
+        if a:
+            u, v = v, u
+            r, s = s, r
+        return {'u': u, 'v': v, 'r': r, 's': s, 'a': a}
+
+    def build_composite_bloq(
+        self, bb: 'BloqBuilder', u: Soquet, v: Soquet, r: Soquet, s: Soquet, a: Soquet
+    ) -> Dict[str, 'SoquetT']:
+        # CSwapApprox is a CSWAP with a phase flip.
+        # Since we are doing two SWAPs the overal phase is correct.
+        a, u, v = bb.add(CSwapApprox(self.bitsize), ctrl=a, x=u, y=v)
+        a, r, s = bb.add(CSwapApprox(self.bitsize), ctrl=a, x=r, y=s)
+        return {'u': u, 'v': v, 'r': r, 's': s, 'a': a}
+
+    def build_call_graph(self, ssa: SympySymbolAllocator) -> 'BloqCountDictT':
+        return {CSwapApprox(self.bitsize): 2}
+
+
+@frozen
+class _KaliskiIterationStep5(Bloq):
+    bitsize: 'SymbolicInt'
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        return Signature(
+            [
+                Register('u', QMontgomeryUInt(self.bitsize)),
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('r', QMontgomeryUInt(self.bitsize)),
+                Register('s', QMontgomeryUInt(self.bitsize)),
+                Register('b', QBit()),
+                Register('f', QBit()),
+            ]
+        )
+
+    def on_classical_vals(
+        self, u: int, v: int, r: int, s: int, b: int, f: int
+    ) -> Dict[str, 'ClassicalValT']:
+        if f and b == 0:
+            v -= u
+            s += r
+        return {'u': u, 'v': v, 'r': r, 's': s, 'b': b, 'f': f}
+
+    def build_composite_bloq(
+        self, bb: 'BloqBuilder', u: Soquet, v: Soquet, r: Soquet, s: Soquet, b: Soquet, f: Soquet
+    ) -> Dict[str, 'SoquetT']:
+        (f, b), c = bb.add(And(1, 0), ctrl=(f, b))
+        v = bb.add(BitwiseNot(QMontgomeryUInt(self.bitsize)), x=v)
+        c, u, v = bb.add(CAdd(QMontgomeryUInt(self.bitsize)), ctrl=c, a=u, b=v)
+        v = bb.add(BitwiseNot(QMontgomeryUInt(self.bitsize)), x=v)
+        c, r, s = bb.add(CAdd(QMontgomeryUInt(self.bitsize)), ctrl=c, a=r, b=s)
+        f, b = bb.add(And(1, 0).adjoint(), ctrl=(f, b), target=c)
+        return {'u': u, 'v': v, 'r': r, 's': s, 'b': b, 'f': f}
+
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        return {
+            And(1, 0) : 1,
+            And(1, 0).adjoint(): 1,
+            BitwiseNot(QMontgomeryUInt(self.bitsize)): 2,
+            CAdd(QMontgomeryUInt(self.bitsize)): 2,
+        }
+
+@frozen
+class _KaliskiIterationStep6(Bloq):
+    bitsize: 'SymbolicInt'
+    mod: 'SymbolicInt'
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        return Signature(
+            [
+                Register('u', QMontgomeryUInt(self.bitsize)),
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('r', QMontgomeryUInt(self.bitsize)),
+                Register('s', QMontgomeryUInt(self.bitsize)),
+                Register('b', QBit()),
+                Register('a', QBit()),
+                Register('m', QBit()),
+                Register('f', QBit()),
+            ]
+        )
+
+    def on_classical_vals(
+        self, u: int, v: int, r: int, s: int, b: int, a: int, m: int, f: int
+    ) -> Dict[str, 'ClassicalValT']:
+        b ^= m
+        b ^= a
+        if f:
+            v >>= 1
+        r = (2 * r) % self.mod
+        if a:
+            r, s = s, r
+            u, v = v, u
+        if s % 2 == 0:
+            a ^= 1
+        return {'u': u, 'v': v, 'r': r, 's': s, 'b': b, 'a': a, 'm': m, 'f': f}
+
+    def build_composite_bloq(
+        self,
+        bb: 'BloqBuilder',
+        u: Soquet,
+        v: Soquet,
+        r: Soquet,
+        s: Soquet,
+        b: Soquet,
+        a: Soquet,
+        m: Soquet,
+        f: Soquet,
+    ) -> Dict[str, 'SoquetT']:
+        m, b = bb.add(CNOT(), ctrl=m, target=b)
+        a, b = bb.add(CNOT(), ctrl=a, target=b)
+
+        # Controlled Divison by 2. The control bit is set only iff the number is even so the divison becomes equivalent to a cyclic right shift.
+        v_arr = bb.split(v)
+        for i in reversed(range(self.bitsize - 1)):
+            f, v_arr[i], v_arr[i + 1] = bb.add(TwoBitCSwap(), ctrl=f, x=v_arr[i], y=v_arr[i + 1])
+        v = bb.join(v_arr)
+
+        r = bb.add(ModDbl(QMontgomeryUInt(self.bitsize), self.mod), x=r)
+
+        a, u, v = bb.add(CSwapApprox(self.bitsize), ctrl=a, x=u, y=v)
+        a, r, s = bb.add(CSwapApprox(self.bitsize), ctrl=a, x=r, y=s)
+
+        s_arr = bb.split(s)
+        s_arr[-1] = bb.add(XGate(), q=s_arr[-1])
+        s_arr[-1], a = bb.add(CNOT(), ctrl=s_arr[-1], target=a)
+        s_arr[-1] = bb.add(XGate(), q=s_arr[-1])
+        s = bb.join(s_arr)
+
+        return {'u': u, 'v': v, 'r': r, 's': s, 'b': b, 'a': a, 'm': m, 'f': f}
+
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        return {
+            CNOT(): 4,
+            XGate(): 2,
+            ModDbl(QMontgomeryUInt(self.bitsize), self.mod): 1,
+            CSwapApprox(self.bitsize): 2,
+            TwoBitCSwap(): self.bitsize - 1,
+        }
+
+@frozen
+class KaliskiIteration(Bloq):
+    bitsize: 'SymbolicInt'
+    mod: 'SymbolicInt'
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        return Signature(
+            [
+                Register('u', QMontgomeryUInt(self.bitsize)),
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('r', QMontgomeryUInt(self.bitsize)),
+                Register('s', QMontgomeryUInt(self.bitsize)),
+                Register('m', QBit()),
+                Register('f', QBit()),
+            ]
+        )
+
+    def build_composite_bloq(
+        self,
+        bb: 'BloqBuilder',
+        u: Soquet,
+        v: Soquet,
+        r: Soquet,
+        s: Soquet,
+        m: Soquet,
+        f: Soquet,
+    ) -> Dict[str, 'SoquetT']:
+        a = bb.allocate(1)
+        b = bb.allocate(1)
+
+        v, m, f = bb.add(_KaliskiIterationStep1(self.bitsize), v=v, m=m, f=f)
+        u, v, b, a, m, f = bb.add(_KaliskiIterationStep2(self.bitsize), u=u, v=v, b=b, a=a, m=m, f=f)
+        u, v, b, a, m, f = bb.add(_KaliskiIterationStep3(self.bitsize), u=u, v=v, b=b, a=a, m=m, f=f)
+        u, v, r, s, a = bb.add(_KaliskiIterationStep4(self.bitsize), u=u, v=v, r=r, s=s, a=a)
+        u, v, r, s, b, f = bb.add(_KaliskiIterationStep5(self.bitsize), u=u, v=v, r=r, s=s, b=b, f=f)
+        u, v, r, s, b, a, m, f = bb.add(_KaliskiIterationStep6(self.bitsize, self.mod), u=u, v=v, r=r, s=s, b=b, a=a, m=m, f=f)
+
+        bb.free(a)
+        bb.free(b)
+        return {
+            'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f,
+        }
+
+
+@frozen
+class _KaliskiModInverseImpl(Bloq):
+    bitsize: 'SymbolicInt'
+    mod: 'SymbolicInt'
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        return Signature(
+            [
+                Register('u', QMontgomeryUInt(self.bitsize)),
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('r', QMontgomeryUInt(self.bitsize)),
+                Register('s', QMontgomeryUInt(self.bitsize)),
+                Register('m', QAny(2*self.bitsize)),
+                Register('f', QBit()),
+            ]
+        )
+    
+
+    @cached_property
+    def _kaliski_iteration(self):
+        return KaliskiIteration(self.bitsize, self.mod)
+
+
+    def build_composite_bloq(
+        self,
+        bb: 'BloqBuilder',
+        u: Soquet,
+        v: Soquet,
+        r: Soquet,
+        s: Soquet,
+        m: Soquet,
+        f: Soquet,
+    ) -> Dict[str, 'SoquetT']:
+        f = bb.add(XGate(), q = f)
+        m_arr = bb.split(m)
+
+        for i in range(2*self.bitsize):
+            u, v, r, s, m_arr[i], f = bb.add(self._kaliski_iteration, u=u, v=v, r=r, s=s, m=m_arr[i], f=f)
+
+        r = bb.add(BitwiseNot(QMontgomeryUInt(self.bitsize)), x=r)
+        r = bb.add(AddK(self.bitsize, self.mod + 1, signed=False), x=r)
+
+        m = bb.join(m_arr)
+        return {
+            'u': u,
+            'v': v,
+            'r': r,
+            's': s,
+            'm': m,
+            'f': f,
+        }
+    
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        return {
+            self._kaliski_iteration:2*self.bitsize,
+            BitwiseNot(QMontgomeryUInt(self.bitsize)): 1,
+            AddK(self.bitsize, self.mod + 1, signed=False): 1,
+            XGate(): 1,
+        }
+
+@frozen
+class KaliskiModInverse(Bloq):
+    bitsize: 'SymbolicInt'
+    mod: 'SymbolicInt'
+    uncompute: bool = False
+
+    @cached_property
+    def signature(self) -> 'Signature':
+        side = Side.LEFT if self.uncompute else Side.RIGHT
+        return Signature(
+            [
+                Register('u', QMontgomeryUInt(self.bitsize)),
+                Register('v', QMontgomeryUInt(self.bitsize)),
+                Register('r', QMontgomeryUInt(self.bitsize)),
+                Register('s', QMontgomeryUInt(self.bitsize)),
+                Register('m', QAny(2*self.bitsize), side=side),
+                Register('f', QBit(), side=side),
+            ]
+        )
+    
+
+    def build_composite_bloq(
+        self,
+        bb: 'BloqBuilder',
+        u: Soquet,
+        v: Soquet,
+        r: Soquet,
+        s: Soquet,
+        m: Optional[Soquet] = None,
+        f: Optional[Soquet] = None,
+    ) -> Dict[str, 'SoquetT']:
+
+        if self.uncompute:
+            u, v, r, s, m, f = bb.add_from(_KaliskiModInverseImpl(self.bitsize, self.mod).adjoint(), u=u, v=v, r=r, s=s, m=m, f=f)
+            bb.free(m)
+            bb.free(f)
+            return {
+                'u': u,
+                'v': v,
+                'r': r,
+                's': s,
+            }
+
+        m = bb.allocate(2*self.bitsize)
+        # m = bb.split(m)
+        f = bb.allocate(1)
+        u, v, r, s, m, f = bb.add_from(_KaliskiModInverseImpl(self.bitsize, self.mod), u=u, v=v, r=r, s=s, m=m, f=f)
+        return {
+            'u': u,
+            'v': v,
+            'r': r,
+            's': s,
+            'm': m,
+            'f': f,
+        }
+
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        return _KaliskiModInverseImpl(self.bitsize, self.mod).build_call_graph(ssa)
\ No newline at end of file
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division_test.py b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
new file mode 100644
index 000000000..b5df73ba1
--- /dev/null
+++ b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
@@ -0,0 +1,58 @@
+#  Copyright 2024 Google LLC
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import pytest
+import math
+from qualtran import (
+    QMontgomeryUInt
+)
+
+from qualtran.resource_counting import get_cost_value, QECGatesCost
+from qualtran.resource_counting.generalizers import ignore_alloc_free, ignore_split_join
+from qualtran.bloqs.mod_arithmetic import KaliskiModInverse
+import qualtran.testing as qlt_testing
+
+
+@pytest.mark.parametrize('bitsize', [5, 6])
+@pytest.mark.parametrize('mod', [3, 5, 7, 11, 13, 15])
+def test_kaliski_mod_inverse_classical_action(bitsize, mod):
+    blq = KaliskiModInverse(bitsize, mod)
+    cblq = blq.decompose_bloq()
+    p2 = pow(2, bitsize, mod)
+    for x in range(1, mod):
+        if math.gcd(x, mod) != 1: continue
+        x_montgomery = (x * p2)%mod
+        inv_x = pow(x, -1, mod)
+        inv_x_montgomery = (inv_x * p2) % mod
+        res = blq.call_classically(u=mod, v=x_montgomery, r=0, s=1)
+        assert res == cblq.call_classically(u=mod, v=x_montgomery, r=0, s=1)
+        u, v, r, s = res[:4]
+    
+        # Invariants of the Kaliski algorithm.
+        assert u == 1
+        assert v == 0
+        assert s == mod
+        assert r == inv_x_montgomery
+
+@pytest.mark.parametrize('bitsize', [5, 6])
+@pytest.mark.parametrize('mod', [3, 5, 7, 11, 13, 15])
+def test_kaliski_mod_inverse_decomposition(bitsize, mod):
+    b = KaliskiModInverse(bitsize, mod)
+    qlt_testing.assert_valid_bloq_decomposition(b)
+
+@pytest.mark.parametrize('bitsize', [5, 6])
+@pytest.mark.parametrize('mod', [3, 5, 7, 11, 13, 15])
+def test_kaliski_mod_bloq_counts(bitsize, mod):
+    b = KaliskiModInverse(bitsize, mod)
+    qlt_testing.assert_equivalent_bloq_counts(b, [ignore_alloc_free, ignore_split_join])
\ No newline at end of file

From a57ff135d7c96a8c1bbd172fe67a09fe91fe9483 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Thu, 10 Oct 2024 18:20:19 -0700
Subject: [PATCH 02/11] format

---
 qualtran/bloqs/mod_arithmetic/__init__.py     |   2 +-
 qualtran/bloqs/mod_arithmetic/mod_division.py | 167 ++++++++----------
 .../bloqs/mod_arithmetic/mod_division_test.py |  22 +--
 3 files changed, 91 insertions(+), 100 deletions(-)

diff --git a/qualtran/bloqs/mod_arithmetic/__init__.py b/qualtran/bloqs/mod_arithmetic/__init__.py
index 6e8b08f32..0ddb3fc2b 100644
--- a/qualtran/bloqs/mod_arithmetic/__init__.py
+++ b/qualtran/bloqs/mod_arithmetic/__init__.py
@@ -14,6 +14,6 @@
 
 from ._shims import ModInv
 from .mod_addition import CModAdd, CModAddK, CtrlScaleModAdd, ModAdd, ModAddK
+from .mod_division import KaliskiModInverse
 from .mod_multiplication import CModMulK, DirtyOutOfPlaceMontgomeryModMul, ModDbl
 from .mod_subtraction import CModNeg, CModSub, ModNeg, ModSub
-from .mod_division import KaliskiModInverse
\ No newline at end of file
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index 091f75ef9..7dcad00a4 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -13,7 +13,7 @@
 #  limitations under the License.
 
 from functools import cached_property
-from typing import Dict, Set, TYPE_CHECKING, Union, Optional
+from typing import Dict, Optional, Set, TYPE_CHECKING, Union
 
 import numpy as np
 import sympy
@@ -24,8 +24,8 @@
     bloq_example,
     BloqBuilder,
     BloqDocSpec,
-    QBit,
     QAny,
+    QBit,
     QMontgomeryUInt,
     Register,
     Side,
@@ -33,11 +33,11 @@
     Soquet,
     SoquetT,
 )
-from qualtran.bloqs.arithmetic.controlled_addition import CAdd
-from qualtran.bloqs.arithmetic.bitwise import BitwiseNot
 from qualtran.bloqs.arithmetic.addition import AddK
-from qualtran.bloqs.arithmetic.subtraction import Subtract
+from qualtran.bloqs.arithmetic.bitwise import BitwiseNot
 from qualtran.bloqs.arithmetic.comparison import LinearDepthGreaterThan
+from qualtran.bloqs.arithmetic.controlled_addition import CAdd
+from qualtran.bloqs.arithmetic.subtraction import Subtract
 from qualtran.bloqs.basic_gates import CNOT, TwoBitCSwap, XGate
 from qualtran.bloqs.mcmt import And, MultiAnd
 from qualtran.bloqs.mod_arithmetic.mod_multiplication import ModDbl
@@ -47,9 +47,10 @@
 from qualtran.symbolics import HasLength, is_symbolic
 
 if TYPE_CHECKING:
+    from qualtran.resource_counting import BloqCountDictT
     from qualtran.simulation.classical_sim import ClassicalValT
     from qualtran.symbolics import SymbolicInt
-    from qualtran.resource_counting import BloqCountDictT
+
 
 @frozen
 class _KaliskiIterationStep1(Bloq):
@@ -91,11 +92,9 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
             cvs = HasLength(self.bitsize)
         else:
             cvs = [0] * self.bitsize
-        return {
-            MultiAnd(cvs=cvs):1,
-            MultiAnd(cvs=cvs).adjoint(): 1,
-            CNOT(): 2,
-        }
+        return {MultiAnd(cvs=cvs): 1, MultiAnd(cvs=cvs).adjoint(): 1, CNOT(): 2}
+
+
 @frozen
 class _KaliskiIterationStep2(Bloq):
     bitsize: 'SymbolicInt'
@@ -148,11 +147,12 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
             And(1, 0): 1,
             And(1, 0).adjoint(): 1,
-            CNOT(): 4,  
-            MultiAnd((1, 0, 0)): 1, 
-            MultiAnd((1, 0, 0)).adjoint(): 1, 
+            CNOT(): 4,
+            MultiAnd((1, 0, 0)): 1,
+            MultiAnd((1, 0, 0)).adjoint(): 1,
         }
 
+
 @frozen
 class _KaliskiIterationStep3(Bloq):
     bitsize: 'SymbolicInt'
@@ -178,26 +178,29 @@ def on_classical_vals(
         m ^= c
         return {'u': u, 'v': v, 'b': b, 'a': a, 'm': m, 'f': f}
 
-    def build_composite_bloq(self, bb: 'BloqBuilder', u: Soquet, v: Soquet, b: Soquet, a: Soquet, m: Soquet, f: Soquet) -> Dict[str, 'SoquetT']:
+    def build_composite_bloq(
+        self, bb: 'BloqBuilder', u: Soquet, v: Soquet, b: Soquet, a: Soquet, m: Soquet, f: Soquet
+    ) -> Dict[str, 'SoquetT']:
         greater_than = bb.allocate(1)
-        u, v, greater_than = bb.add(LinearDepthGreaterThan(self.bitsize, signed=False), a=u, b=v, target=greater_than)
+        u, v, greater_than = bb.add(
+            LinearDepthGreaterThan(self.bitsize, signed=False), a=u, b=v, target=greater_than
+        )
+
+        (greater_than, f, b), junk, ctrl = bb.add(
+            MultiAnd(cvs=(1, 1, 0)), ctrl=(greater_than, f, b)
+        )
 
-        (greater_than, f, b), junk, ctrl = bb.add(MultiAnd(cvs=(1, 1, 0)), ctrl=(greater_than, f, b))
-        
         ctrl, a = bb.add(CNOT(), ctrl=ctrl, target=a)
         ctrl, m = bb.add(CNOT(), ctrl=ctrl, target=m)
 
-        greater_than, f, b = bb.add(MultiAnd(cvs=(1, 1, 0)).adjoint(), ctrl=(greater_than, f, b), junk=junk, target=ctrl)
-        u, v, greater_than = bb.add(LinearDepthGreaterThan(self.bitsize), a=u, b=v, target=greater_than)
+        greater_than, f, b = bb.add(
+            MultiAnd(cvs=(1, 1, 0)).adjoint(), ctrl=(greater_than, f, b), junk=junk, target=ctrl
+        )
+        u, v, greater_than = bb.add(
+            LinearDepthGreaterThan(self.bitsize), a=u, b=v, target=greater_than
+        )
         bb.free(greater_than)
-        return {
-            'u': u,
-            'v': v,
-            'b': b,
-            'a': a,
-            'm': m,
-            'f': f,
-        }
+        return {'u': u, 'v': v, 'b': b, 'a': a, 'm': m, 'f': f}
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
@@ -207,6 +210,7 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
             CNOT(): 2,
         }
 
+
 @frozen
 class _KaliskiIterationStep4(Bloq):
     bitsize: 'SymbolicInt'
@@ -282,12 +286,13 @@ def build_composite_bloq(
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
-            And(1, 0) : 1,
+            And(1, 0): 1,
             And(1, 0).adjoint(): 1,
             BitwiseNot(QMontgomeryUInt(self.bitsize)): 2,
             CAdd(QMontgomeryUInt(self.bitsize)): 2,
         }
 
+
 @frozen
 class _KaliskiIterationStep6(Bloq):
     bitsize: 'SymbolicInt'
@@ -366,6 +371,7 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
             TwoBitCSwap(): self.bitsize - 1,
         }
 
+
 @frozen
 class KaliskiIteration(Bloq):
     bitsize: 'SymbolicInt'
@@ -385,30 +391,29 @@ def signature(self) -> 'Signature':
         )
 
     def build_composite_bloq(
-        self,
-        bb: 'BloqBuilder',
-        u: Soquet,
-        v: Soquet,
-        r: Soquet,
-        s: Soquet,
-        m: Soquet,
-        f: Soquet,
+        self, bb: 'BloqBuilder', u: Soquet, v: Soquet, r: Soquet, s: Soquet, m: Soquet, f: Soquet
     ) -> Dict[str, 'SoquetT']:
         a = bb.allocate(1)
         b = bb.allocate(1)
 
         v, m, f = bb.add(_KaliskiIterationStep1(self.bitsize), v=v, m=m, f=f)
-        u, v, b, a, m, f = bb.add(_KaliskiIterationStep2(self.bitsize), u=u, v=v, b=b, a=a, m=m, f=f)
-        u, v, b, a, m, f = bb.add(_KaliskiIterationStep3(self.bitsize), u=u, v=v, b=b, a=a, m=m, f=f)
+        u, v, b, a, m, f = bb.add(
+            _KaliskiIterationStep2(self.bitsize), u=u, v=v, b=b, a=a, m=m, f=f
+        )
+        u, v, b, a, m, f = bb.add(
+            _KaliskiIterationStep3(self.bitsize), u=u, v=v, b=b, a=a, m=m, f=f
+        )
         u, v, r, s, a = bb.add(_KaliskiIterationStep4(self.bitsize), u=u, v=v, r=r, s=s, a=a)
-        u, v, r, s, b, f = bb.add(_KaliskiIterationStep5(self.bitsize), u=u, v=v, r=r, s=s, b=b, f=f)
-        u, v, r, s, b, a, m, f = bb.add(_KaliskiIterationStep6(self.bitsize, self.mod), u=u, v=v, r=r, s=s, b=b, a=a, m=m, f=f)
+        u, v, r, s, b, f = bb.add(
+            _KaliskiIterationStep5(self.bitsize), u=u, v=v, r=r, s=s, b=b, f=f
+        )
+        u, v, r, s, b, a, m, f = bb.add(
+            _KaliskiIterationStep6(self.bitsize, self.mod), u=u, v=v, r=r, s=s, b=b, a=a, m=m, f=f
+        )
 
         bb.free(a)
         bb.free(b)
-        return {
-            'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f,
-        }
+        return {'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f}
 
 
 @frozen
@@ -424,54 +429,41 @@ def signature(self) -> 'Signature':
                 Register('v', QMontgomeryUInt(self.bitsize)),
                 Register('r', QMontgomeryUInt(self.bitsize)),
                 Register('s', QMontgomeryUInt(self.bitsize)),
-                Register('m', QAny(2*self.bitsize)),
+                Register('m', QAny(2 * self.bitsize)),
                 Register('f', QBit()),
             ]
         )
-    
 
     @cached_property
     def _kaliski_iteration(self):
         return KaliskiIteration(self.bitsize, self.mod)
 
-
     def build_composite_bloq(
-        self,
-        bb: 'BloqBuilder',
-        u: Soquet,
-        v: Soquet,
-        r: Soquet,
-        s: Soquet,
-        m: Soquet,
-        f: Soquet,
+        self, bb: 'BloqBuilder', u: Soquet, v: Soquet, r: Soquet, s: Soquet, m: Soquet, f: Soquet
     ) -> Dict[str, 'SoquetT']:
-        f = bb.add(XGate(), q = f)
+        f = bb.add(XGate(), q=f)
         m_arr = bb.split(m)
 
-        for i in range(2*self.bitsize):
-            u, v, r, s, m_arr[i], f = bb.add(self._kaliski_iteration, u=u, v=v, r=r, s=s, m=m_arr[i], f=f)
+        for i in range(2 * self.bitsize):
+            u, v, r, s, m_arr[i], f = bb.add(
+                self._kaliski_iteration, u=u, v=v, r=r, s=s, m=m_arr[i], f=f
+            )
 
         r = bb.add(BitwiseNot(QMontgomeryUInt(self.bitsize)), x=r)
         r = bb.add(AddK(self.bitsize, self.mod + 1, signed=False), x=r)
 
         m = bb.join(m_arr)
-        return {
-            'u': u,
-            'v': v,
-            'r': r,
-            's': s,
-            'm': m,
-            'f': f,
-        }
-    
+        return {'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f}
+
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
-            self._kaliski_iteration:2*self.bitsize,
+            self._kaliski_iteration: 2 * self.bitsize,
             BitwiseNot(QMontgomeryUInt(self.bitsize)): 1,
             AddK(self.bitsize, self.mod + 1, signed=False): 1,
             XGate(): 1,
         }
 
+
 @frozen
 class KaliskiModInverse(Bloq):
     bitsize: 'SymbolicInt'
@@ -487,11 +479,10 @@ def signature(self) -> 'Signature':
                 Register('v', QMontgomeryUInt(self.bitsize)),
                 Register('r', QMontgomeryUInt(self.bitsize)),
                 Register('s', QMontgomeryUInt(self.bitsize)),
-                Register('m', QAny(2*self.bitsize), side=side),
+                Register('m', QAny(2 * self.bitsize), side=side),
                 Register('f', QBit(), side=side),
             ]
         )
-    
 
     def build_composite_bloq(
         self,
@@ -505,28 +496,26 @@ def build_composite_bloq(
     ) -> Dict[str, 'SoquetT']:
 
         if self.uncompute:
-            u, v, r, s, m, f = bb.add_from(_KaliskiModInverseImpl(self.bitsize, self.mod).adjoint(), u=u, v=v, r=r, s=s, m=m, f=f)
+            u, v, r, s, m, f = bb.add_from(
+                _KaliskiModInverseImpl(self.bitsize, self.mod).adjoint(),
+                u=u,
+                v=v,
+                r=r,
+                s=s,
+                m=m,
+                f=f,
+            )
             bb.free(m)
             bb.free(f)
-            return {
-                'u': u,
-                'v': v,
-                'r': r,
-                's': s,
-            }
-
-        m = bb.allocate(2*self.bitsize)
+            return {'u': u, 'v': v, 'r': r, 's': s}
+
+        m = bb.allocate(2 * self.bitsize)
         # m = bb.split(m)
         f = bb.allocate(1)
-        u, v, r, s, m, f = bb.add_from(_KaliskiModInverseImpl(self.bitsize, self.mod), u=u, v=v, r=r, s=s, m=m, f=f)
-        return {
-            'u': u,
-            'v': v,
-            'r': r,
-            's': s,
-            'm': m,
-            'f': f,
-        }
+        u, v, r, s, m, f = bb.add_from(
+            _KaliskiModInverseImpl(self.bitsize, self.mod), u=u, v=v, r=r, s=s, m=m, f=f
+        )
+        return {'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f}
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
-        return _KaliskiModInverseImpl(self.bitsize, self.mod).build_call_graph(ssa)
\ No newline at end of file
+        return _KaliskiModInverseImpl(self.bitsize, self.mod).build_call_graph(ssa)
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division_test.py b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
index b5df73ba1..04aecd6ef 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division_test.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
@@ -12,16 +12,15 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-import pytest
 import math
-from qualtran import (
-    QMontgomeryUInt
-)
 
+import pytest
+
+import qualtran.testing as qlt_testing
+from qualtran import QMontgomeryUInt
+from qualtran.bloqs.mod_arithmetic import KaliskiModInverse
 from qualtran.resource_counting import get_cost_value, QECGatesCost
 from qualtran.resource_counting.generalizers import ignore_alloc_free, ignore_split_join
-from qualtran.bloqs.mod_arithmetic import KaliskiModInverse
-import qualtran.testing as qlt_testing
 
 
 @pytest.mark.parametrize('bitsize', [5, 6])
@@ -31,28 +30,31 @@ def test_kaliski_mod_inverse_classical_action(bitsize, mod):
     cblq = blq.decompose_bloq()
     p2 = pow(2, bitsize, mod)
     for x in range(1, mod):
-        if math.gcd(x, mod) != 1: continue
-        x_montgomery = (x * p2)%mod
+        if math.gcd(x, mod) != 1:
+            continue
+        x_montgomery = (x * p2) % mod
         inv_x = pow(x, -1, mod)
         inv_x_montgomery = (inv_x * p2) % mod
         res = blq.call_classically(u=mod, v=x_montgomery, r=0, s=1)
         assert res == cblq.call_classically(u=mod, v=x_montgomery, r=0, s=1)
         u, v, r, s = res[:4]
-    
+
         # Invariants of the Kaliski algorithm.
         assert u == 1
         assert v == 0
         assert s == mod
         assert r == inv_x_montgomery
 
+
 @pytest.mark.parametrize('bitsize', [5, 6])
 @pytest.mark.parametrize('mod', [3, 5, 7, 11, 13, 15])
 def test_kaliski_mod_inverse_decomposition(bitsize, mod):
     b = KaliskiModInverse(bitsize, mod)
     qlt_testing.assert_valid_bloq_decomposition(b)
 
+
 @pytest.mark.parametrize('bitsize', [5, 6])
 @pytest.mark.parametrize('mod', [3, 5, 7, 11, 13, 15])
 def test_kaliski_mod_bloq_counts(bitsize, mod):
     b = KaliskiModInverse(bitsize, mod)
-    qlt_testing.assert_equivalent_bloq_counts(b, [ignore_alloc_free, ignore_split_join])
\ No newline at end of file
+    qlt_testing.assert_equivalent_bloq_counts(b, [ignore_alloc_free, ignore_split_join])

From 1c3b0b90525e0640e832f0468e3c1aa0c65772ca Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Thu, 10 Oct 2024 18:37:13 -0700
Subject: [PATCH 03/11] change signature

---
 qualtran/bloqs/mod_arithmetic/mod_division.py | 40 +++++++++++--------
 .../bloqs/mod_arithmetic/mod_division_test.py | 13 ++----
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index 7dcad00a4..2a0598369 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -34,7 +34,7 @@
     SoquetT,
 )
 from qualtran.bloqs.arithmetic.addition import AddK
-from qualtran.bloqs.arithmetic.bitwise import BitwiseNot
+from qualtran.bloqs.arithmetic.bitwise import BitwiseNot, XorK
 from qualtran.bloqs.arithmetic.comparison import LinearDepthGreaterThan
 from qualtran.bloqs.arithmetic.controlled_addition import CAdd
 from qualtran.bloqs.arithmetic.subtraction import Subtract
@@ -475,24 +475,14 @@ def signature(self) -> 'Signature':
         side = Side.LEFT if self.uncompute else Side.RIGHT
         return Signature(
             [
-                Register('u', QMontgomeryUInt(self.bitsize)),
-                Register('v', QMontgomeryUInt(self.bitsize)),
-                Register('r', QMontgomeryUInt(self.bitsize)),
-                Register('s', QMontgomeryUInt(self.bitsize)),
+                Register('x', QMontgomeryUInt(self.bitsize)),
                 Register('m', QAny(2 * self.bitsize), side=side),
                 Register('f', QBit(), side=side),
             ]
         )
 
     def build_composite_bloq(
-        self,
-        bb: 'BloqBuilder',
-        u: Soquet,
-        v: Soquet,
-        r: Soquet,
-        s: Soquet,
-        m: Optional[Soquet] = None,
-        f: Optional[Soquet] = None,
+        self, bb: 'BloqBuilder', x: Soquet, m: Optional[Soquet] = None, f: Optional[Soquet] = None
     ) -> Dict[str, 'SoquetT']:
 
         if self.uncompute:
@@ -509,13 +499,29 @@ def build_composite_bloq(
             bb.free(f)
             return {'u': u, 'v': v, 'r': r, 's': s}
 
+        u = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
+        r = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
+        s = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
+        u = bb.add(XorK(QMontgomeryUInt(self.bitsize), self.mod), x=u)
+        s = bb.add(XorK(QMontgomeryUInt(self.bitsize), 1), x=s)
         m = bb.allocate(2 * self.bitsize)
         # m = bb.split(m)
         f = bb.allocate(1)
-        u, v, r, s, m, f = bb.add_from(
-            _KaliskiModInverseImpl(self.bitsize, self.mod), u=u, v=v, r=r, s=s, m=m, f=f
+        u, v, x, s, m, f = bb.add_from(
+            _KaliskiModInverseImpl(self.bitsize, self.mod), u=u, v=x, r=r, s=s, m=m, f=f
         )
-        return {'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f}
+
+        u = bb.add(XorK(QMontgomeryUInt(self.bitsize), 1), x=u)
+        s = bb.add(XorK(QMontgomeryUInt(self.bitsize), self.mod), x=s)
+
+        bb.free(u)
+        bb.free(v)
+        bb.free(s)
+
+        return {'x': x, 'm': m, 'f': f}
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
-        return _KaliskiModInverseImpl(self.bitsize, self.mod).build_call_graph(ssa)
+        return _KaliskiModInverseImpl(self.bitsize, self.mod).build_call_graph(ssa) | {
+            XorK(QMontgomeryUInt(self.bitsize), self.mod): 2,
+            XorK(QMontgomeryUInt(self.bitsize), 1): 2,
+        }
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division_test.py b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
index 04aecd6ef..d63d58a44 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division_test.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
@@ -35,15 +35,10 @@ def test_kaliski_mod_inverse_classical_action(bitsize, mod):
         x_montgomery = (x * p2) % mod
         inv_x = pow(x, -1, mod)
         inv_x_montgomery = (inv_x * p2) % mod
-        res = blq.call_classically(u=mod, v=x_montgomery, r=0, s=1)
-        assert res == cblq.call_classically(u=mod, v=x_montgomery, r=0, s=1)
-        u, v, r, s = res[:4]
-
-        # Invariants of the Kaliski algorithm.
-        assert u == 1
-        assert v == 0
-        assert s == mod
-        assert r == inv_x_montgomery
+        res = blq.call_classically(x=x_montgomery)
+        assert res == cblq.call_classically(x=x_montgomery)
+
+        assert res[0] == inv_x_montgomery
 
 
 @pytest.mark.parametrize('bitsize', [5, 6])

From cb7a9d13b1a130627a6897e8dc3bc329d7c0afc1 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Thu, 10 Oct 2024 21:01:28 -0700
Subject: [PATCH 04/11] free

---
 qualtran/bloqs/mod_arithmetic/mod_division.py | 42 ++++++++++---------
 .../bloqs/mod_arithmetic/mod_division_test.py |  2 +-
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index 2a0598369..9b99acd94 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -442,6 +442,9 @@ def build_composite_bloq(
         self, bb: 'BloqBuilder', u: Soquet, v: Soquet, r: Soquet, s: Soquet, m: Soquet, f: Soquet
     ) -> Dict[str, 'SoquetT']:
         f = bb.add(XGate(), q=f)
+        u = bb.add(XorK(QMontgomeryUInt(self.bitsize), self.mod), x=u)
+        s = bb.add(XorK(QMontgomeryUInt(self.bitsize), 1), x=s)
+
         m_arr = bb.split(m)
 
         for i in range(2 * self.bitsize):
@@ -452,6 +455,9 @@ def build_composite_bloq(
         r = bb.add(BitwiseNot(QMontgomeryUInt(self.bitsize)), x=r)
         r = bb.add(AddK(self.bitsize, self.mod + 1, signed=False), x=r)
 
+        u = bb.add(XorK(QMontgomeryUInt(self.bitsize), 1), x=u)
+        s = bb.add(XorK(QMontgomeryUInt(self.bitsize), self.mod), x=s)
+
         m = bb.join(m_arr)
         return {'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f}
 
@@ -461,6 +467,8 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
             BitwiseNot(QMontgomeryUInt(self.bitsize)): 1,
             AddK(self.bitsize, self.mod + 1, signed=False): 1,
             XGate(): 1,
+            XorK(QMontgomeryUInt(self.bitsize), self.mod): 2,
+            XorK(QMontgomeryUInt(self.bitsize), 1): 2,
         }
 
 
@@ -477,51 +485,45 @@ def signature(self) -> 'Signature':
             [
                 Register('x', QMontgomeryUInt(self.bitsize)),
                 Register('m', QAny(2 * self.bitsize), side=side),
-                Register('f', QBit(), side=side),
             ]
         )
 
     def build_composite_bloq(
         self, bb: 'BloqBuilder', x: Soquet, m: Optional[Soquet] = None, f: Optional[Soquet] = None
     ) -> Dict[str, 'SoquetT']:
+        u = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
+        r = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
+        s = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
+        f = bb.allocate(1)
 
         if self.uncompute:
-            u, v, r, s, m, f = bb.add_from(
+            u, x, r, s, m, f = bb.add_from(
                 _KaliskiModInverseImpl(self.bitsize, self.mod).adjoint(),
                 u=u,
-                v=v,
-                r=r,
+                v=r,
+                r=x,
                 s=s,
                 m=m,
                 f=f,
             )
+            bb.free(u)
+            bb.free(r)
+            bb.free(s)
             bb.free(m)
             bb.free(f)
-            return {'u': u, 'v': v, 'r': r, 's': s}
+            return {'x': x}
 
-        u = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
-        r = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
-        s = bb.allocate(self.bitsize, QMontgomeryUInt(self.bitsize))
-        u = bb.add(XorK(QMontgomeryUInt(self.bitsize), self.mod), x=u)
-        s = bb.add(XorK(QMontgomeryUInt(self.bitsize), 1), x=s)
         m = bb.allocate(2 * self.bitsize)
         # m = bb.split(m)
-        f = bb.allocate(1)
         u, v, x, s, m, f = bb.add_from(
             _KaliskiModInverseImpl(self.bitsize, self.mod), u=u, v=x, r=r, s=s, m=m, f=f
         )
 
-        u = bb.add(XorK(QMontgomeryUInt(self.bitsize), 1), x=u)
-        s = bb.add(XorK(QMontgomeryUInt(self.bitsize), self.mod), x=s)
-
         bb.free(u)
         bb.free(v)
         bb.free(s)
-
-        return {'x': x, 'm': m, 'f': f}
+        bb.free(f)
+        return {'x': x, 'm': m}
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
-        return _KaliskiModInverseImpl(self.bitsize, self.mod).build_call_graph(ssa) | {
-            XorK(QMontgomeryUInt(self.bitsize), self.mod): 2,
-            XorK(QMontgomeryUInt(self.bitsize), 1): 2,
-        }
+        return _KaliskiModInverseImpl(self.bitsize, self.mod).build_call_graph(ssa)
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division_test.py b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
index d63d58a44..3d65d94e7 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division_test.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
@@ -37,7 +37,7 @@ def test_kaliski_mod_inverse_classical_action(bitsize, mod):
         inv_x_montgomery = (inv_x * p2) % mod
         res = blq.call_classically(x=x_montgomery)
         assert res == cblq.call_classically(x=x_montgomery)
-
+        assert len(res) == 2
         assert res[0] == inv_x_montgomery
 
 

From 8c4ca8d6cfc9684dc0aff05477d9ebd2c26fa106 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Mon, 21 Oct 2024 10:47:41 -0700
Subject: [PATCH 05/11] use half comp

---
 qualtran/bloqs/mod_arithmetic/mod_division.py | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index 9b99acd94..ead22e15f 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -35,9 +35,8 @@
 )
 from qualtran.bloqs.arithmetic.addition import AddK
 from qualtran.bloqs.arithmetic.bitwise import BitwiseNot, XorK
-from qualtran.bloqs.arithmetic.comparison import LinearDepthGreaterThan
+from qualtran.bloqs.arithmetic.comparison import LinearDepthHalfGreaterThan
 from qualtran.bloqs.arithmetic.controlled_addition import CAdd
-from qualtran.bloqs.arithmetic.subtraction import Subtract
 from qualtran.bloqs.basic_gates import CNOT, TwoBitCSwap, XGate
 from qualtran.bloqs.mcmt import And, MultiAnd
 from qualtran.bloqs.mod_arithmetic.mod_multiplication import ModDbl
@@ -181,9 +180,8 @@ def on_classical_vals(
     def build_composite_bloq(
         self, bb: 'BloqBuilder', u: Soquet, v: Soquet, b: Soquet, a: Soquet, m: Soquet, f: Soquet
     ) -> Dict[str, 'SoquetT']:
-        greater_than = bb.allocate(1)
-        u, v, greater_than = bb.add(
-            LinearDepthGreaterThan(self.bitsize, signed=False), a=u, b=v, target=greater_than
+        u, v, junk, greater_than = bb.add(
+            LinearDepthHalfGreaterThan(QMontgomeryUInt(self.bitsize)), a=u, b=v
         )
 
         (greater_than, f, b), junk, ctrl = bb.add(
@@ -196,15 +194,19 @@ def build_composite_bloq(
         greater_than, f, b = bb.add(
             MultiAnd(cvs=(1, 1, 0)).adjoint(), ctrl=(greater_than, f, b), junk=junk, target=ctrl
         )
-        u, v, greater_than = bb.add(
-            LinearDepthGreaterThan(self.bitsize), a=u, b=v, target=greater_than
+        u, v = bb.add(
+            LinearDepthHalfGreaterThan(QMontgomeryUInt(self.bitsize)).adjoint(),
+            a=u,
+            b=v,
+            c=junk,
+            target=greater_than,
         )
-        bb.free(greater_than)
         return {'u': u, 'v': v, 'b': b, 'a': a, 'm': m, 'f': f}
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
-            LinearDepthGreaterThan(self.bitsize, signed=False): 2,
+            LinearDepthHalfGreaterThan(QMontgomeryUInt(self.bitsize)): 1,
+            LinearDepthHalfGreaterThan(QMontgomeryUInt(self.bitsize)).adjoint(): 1,
             MultiAnd((1, 1, 0)): 1,
             MultiAnd((1, 1, 0)).adjoint(): 1,
             CNOT(): 2,

From e24ef04b0055f390dc6d8b6de96005564e112b1b Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Mon, 21 Oct 2024 14:58:11 -0700
Subject: [PATCH 06/11] Add documentation

---
 .../qualtran_dev_tools/notebook_specs.py      |   5 +
 docs/bloqs/index.rst                          |   1 +
 .../bloqs/mod_arithmetic/mod_division.ipynb   | 169 ++++++++++++++++++
 qualtran/bloqs/mod_arithmetic/mod_division.py | 161 +++++++++++++++--
 .../bloqs/mod_arithmetic/mod_division_test.py |  35 +++-
 .../mod_arithmetic/mod_multiplication.py      |   2 +-
 qualtran/serialization/resolver_dict.py       |   3 +
 7 files changed, 357 insertions(+), 19 deletions(-)
 create mode 100644 qualtran/bloqs/mod_arithmetic/mod_division.ipynb

diff --git a/dev_tools/qualtran_dev_tools/notebook_specs.py b/dev_tools/qualtran_dev_tools/notebook_specs.py
index 6dc00babe..00bde2cb8 100644
--- a/dev_tools/qualtran_dev_tools/notebook_specs.py
+++ b/dev_tools/qualtran_dev_tools/notebook_specs.py
@@ -520,6 +520,11 @@
             qualtran.bloqs.mod_arithmetic.mod_multiplication._DIRTY_OUT_OF_PLACE_MONTGOMERY_MOD_MUL_DOC,
         ],
     ),
+    NotebookSpecV2(
+        title='Modular Divison',
+        module=qualtran.bloqs.mod_arithmetic.mod_division,
+        bloq_specs=[qualtran.bloqs.mod_arithmetic.mod_division._KALISKI_MOD_INVERSE_DOC],
+    ),
     NotebookSpecV2(
         title='Factoring RSA',
         module=qualtran.bloqs.factoring.rsa,
diff --git a/docs/bloqs/index.rst b/docs/bloqs/index.rst
index 16c591baa..7d7f1a27f 100644
--- a/docs/bloqs/index.rst
+++ b/docs/bloqs/index.rst
@@ -83,6 +83,7 @@ Bloqs Library
     mod_arithmetic/mod_addition.ipynb
     mod_arithmetic/mod_subtraction.ipynb
     mod_arithmetic/mod_multiplication.ipynb
+    mod_arithmetic/mod_division.ipynb
     factoring/rsa/rsa.ipynb
     factoring/ecc/ec_add.ipynb
     factoring/ecc/ecc.ipynb
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.ipynb b/qualtran/bloqs/mod_arithmetic/mod_division.ipynb
new file mode 100644
index 000000000..a01906ac3
--- /dev/null
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.ipynb
@@ -0,0 +1,169 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "1c5f2b28",
+   "metadata": {
+    "cq.autogen": "title_cell"
+   },
+   "source": [
+    "# Modular Divison"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8751aa36",
+   "metadata": {
+    "cq.autogen": "top_imports"
+   },
+   "outputs": [],
+   "source": [
+    "from qualtran import Bloq, CompositeBloq, BloqBuilder, Signature, Register\n",
+    "from qualtran import QBit, QInt, QUInt, QAny\n",
+    "from qualtran.drawing import show_bloq, show_call_graph, show_counts_sigma\n",
+    "from typing import *\n",
+    "import numpy as np\n",
+    "import sympy\n",
+    "import cirq"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d680443c",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.bloq_doc.md"
+   },
+   "source": [
+    "## `KaliskiModInverse`\n",
+    "Compute modular multiplicative inverse -inplace- of numbers in montgomery form.\n",
+    "\n",
+    "Applies the transformation\n",
+    "$$\n",
+    "    \\ket{x} \\ket{0} \\rightarrow \\ket{x^{-1} 2^{2n} \\mod \\mathrm{mod}} \\ket{\\mathrm{garbage}}\n",
+    "$$\n",
+    "\n",
+    "#### Parameters\n",
+    " - `bitsize`: size of the number.\n",
+    " - `mod`: The integer modulus.\n",
+    " - `uncompute`: whether to compute or uncompute. \n",
+    "\n",
+    "#### Registers\n",
+    " - `x`: The register for which we compute the multiplicative inverse.\n",
+    " - `m`: A 2*bitsize register of intermediate values needed for uncomputation. \n",
+    "\n",
+    "#### References\n",
+    " - [Performance Analysis of a Repetition Cat Code Architecture: Computing 256-bit Elliptic Curve Logarithm in 9 Hours with 126 133 Cat Qubits](https://arxiv.org/abs/2302.06639).     Appendix C5.\n",
+    " - [How to compute a 256-bit elliptic curve private key with only 50 million Toffoli gates](https://arxiv.org/abs/2306.08585).     page 8.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5917d72",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.bloq_doc.py"
+   },
+   "outputs": [],
+   "source": [
+    "from qualtran.bloqs.mod_arithmetic import KaliskiModInverse"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d44329eb",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.example_instances.md"
+   },
+   "source": [
+    "### Example Instances"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "31a37cf6",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.kaliskimodinverse_example"
+   },
+   "outputs": [],
+   "source": [
+    "kaliskimodinverse_example = KaliskiModInverse(32, 10**9 + 7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "58c697e6",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.kaliskimodinverse_symbolic"
+   },
+   "outputs": [],
+   "source": [
+    "n, p = sympy.symbols('n p')\n",
+    "kaliskimodinverse_symbolic = KaliskiModInverse(n, p)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9bf1e17c",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.graphical_signature.md"
+   },
+   "source": [
+    "#### Graphical Signature"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eca3a706",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.graphical_signature.py"
+   },
+   "outputs": [],
+   "source": [
+    "from qualtran.drawing import show_bloqs\n",
+    "show_bloqs([kaliskimodinverse_example, kaliskimodinverse_symbolic],\n",
+    "           ['`kaliskimodinverse_example`', '`kaliskimodinverse_symbolic`'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "69fd8906",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.call_graph.md"
+   },
+   "source": [
+    "### Call Graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15c6fabe",
+   "metadata": {
+    "cq.autogen": "KaliskiModInverse.call_graph.py"
+   },
+   "outputs": [],
+   "source": [
+    "from qualtran.resource_counting.generalizers import ignore_split_join\n",
+    "kaliskimodinverse_example_g, kaliskimodinverse_example_sigma = kaliskimodinverse_example.call_graph(max_depth=1, generalizer=ignore_split_join)\n",
+    "show_call_graph(kaliskimodinverse_example_g)\n",
+    "show_counts_sigma(kaliskimodinverse_example_sigma)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index ead22e15f..6d24625ce 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -13,17 +13,18 @@
 #  limitations under the License.
 
 from functools import cached_property
-from typing import Dict, Optional, Set, TYPE_CHECKING, Union
+from typing import cast, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
 
 import numpy as np
 import sympy
-from attrs import field, frozen
+from attrs import frozen
 
 from qualtran import (
     Bloq,
     bloq_example,
     BloqBuilder,
     BloqDocSpec,
+    DecomposeTypeError,
     QAny,
     QBit,
     QMontgomeryUInt,
@@ -41,7 +42,7 @@
 from qualtran.bloqs.mcmt import And, MultiAnd
 from qualtran.bloqs.mod_arithmetic.mod_multiplication import ModDbl
 from qualtran.bloqs.swap_network import CSwapApprox
-from qualtran.resource_counting import BloqCountDictT, BloqCountT
+from qualtran.resource_counting import BloqCountDictT
 from qualtran.resource_counting._call_graph import SympySymbolAllocator
 from qualtran.symbolics import HasLength, is_symbolic
 
@@ -53,6 +54,8 @@
 
 @frozen
 class _KaliskiIterationStep1(Bloq):
+    """The first layer of operations in figure 15 of https://arxiv.org/pdf/2302.06639."""
+
     bitsize: 'SymbolicInt'
 
     @cached_property
@@ -73,6 +76,8 @@ def on_classical_vals(self, v: int, m: int, f: int) -> Dict[str, 'ClassicalValT'
     def build_composite_bloq(
         self, bb: 'BloqBuilder', v: Soquet, m: Soquet, f: Soquet
     ) -> Dict[str, 'SoquetT']:
+        if is_symbolic(self.bitsize):
+            raise DecomposeTypeError(f'symbolic decomposition is not supported for {self}')
         v_arr = bb.split(v)
         ctrls = np.concatenate([v_arr, [f]])
         ctrls, junk, target = bb.add(MultiAnd(cvs=[0] * self.bitsize + [1]), ctrl=ctrls)
@@ -88,14 +93,16 @@ def build_composite_bloq(
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         if is_symbolic(self.bitsize):
-            cvs = HasLength(self.bitsize)
+            cvs: Union[HasLength, List[int]] = HasLength(self.bitsize)
         else:
-            cvs = [0] * self.bitsize
+            cvs = [0] * int(self.bitsize)
         return {MultiAnd(cvs=cvs): 1, MultiAnd(cvs=cvs).adjoint(): 1, CNOT(): 2}
 
 
 @frozen
 class _KaliskiIterationStep2(Bloq):
+    """The second layer of operations in figure 15 of https://arxiv.org/pdf/2302.06639."""
+
     bitsize: 'SymbolicInt'
 
     @cached_property
@@ -154,6 +161,8 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
 
 @frozen
 class _KaliskiIterationStep3(Bloq):
+    """The third layer of operations in figure 15 of https://arxiv.org/pdf/2302.06639."""
+
     bitsize: 'SymbolicInt'
 
     @cached_property
@@ -215,6 +224,8 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
 
 @frozen
 class _KaliskiIterationStep4(Bloq):
+    """The fourth layer of operations in figure 15 of https://arxiv.org/pdf/2302.06639."""
+
     bitsize: 'SymbolicInt'
 
     @cached_property
@@ -252,6 +263,8 @@ def build_call_graph(self, ssa: SympySymbolAllocator) -> 'BloqCountDictT':
 
 @frozen
 class _KaliskiIterationStep5(Bloq):
+    """The fifth layer of operations in figure 15 of https://arxiv.org/pdf/2302.06639."""
+
     bitsize: 'SymbolicInt'
 
     @cached_property
@@ -297,6 +310,8 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
 
 @frozen
 class _KaliskiIterationStep6(Bloq):
+    """The sixth layer of operations in figure 15 of https://arxiv.org/pdf/2302.06639."""
+
     bitsize: 'SymbolicInt'
     mod: 'SymbolicInt'
 
@@ -342,6 +357,8 @@ def build_composite_bloq(
         m: Soquet,
         f: Soquet,
     ) -> Dict[str, 'SoquetT']:
+        if is_symbolic(self.bitsize, self.mod):
+            raise DecomposeTypeError(f'symbolic decomposition is not supported for {self}')
         m, b = bb.add(CNOT(), ctrl=m, target=b)
         a, b = bb.add(CNOT(), ctrl=a, target=b)
 
@@ -375,7 +392,9 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
 
 
 @frozen
-class KaliskiIteration(Bloq):
+class _KaliskiIteration(Bloq):
+    """The single full iteration of Kaliski. see figure 15 of https://arxiv.org/pdf/2302.06639."""
+
     bitsize: 'SymbolicInt'
     mod: 'SymbolicInt'
 
@@ -417,9 +436,58 @@ def build_composite_bloq(
         bb.free(b)
         return {'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f}
 
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        return {
+            _KaliskiIterationStep1(self.bitsize): 1,
+            _KaliskiIterationStep2(self.bitsize): 1,
+            _KaliskiIterationStep3(self.bitsize): 1,
+            _KaliskiIterationStep4(self.bitsize): 1,
+            _KaliskiIterationStep5(self.bitsize): 1,
+            _KaliskiIterationStep6(self.bitsize, self.mod): 1,
+        }
+
+    def on_classical_vals(
+        self, u: int, v: int, r: int, s: int, m: int, f: int
+    ) -> Dict[str, 'ClassicalValT']:
+        a = b = 0
+        assert m == 0
+        m ^= f & (v == 0)
+        f ^= m
+
+        a ^= f & (u % 2 == 0)
+        m ^= f & (a == 0) & (v % 2 == 0)
+        b ^= a
+        b ^= m
+
+        t = (u > v) & (b == 0) & f
+        a ^= t
+        m ^= t
+
+        if a:
+            u, v = v, u
+            r, s = s, r
+
+        if f and b == 0:
+            v -= u
+            s += r
+
+        b ^= m
+        b ^= a
+        if f:
+            assert v % 2 == 0, f'{u=} {v=} {r=} {s=} {a=} {b=} {m=} {f=}'
+            v >>= 1
+        r = (r << 1) % self.mod
+        if a:
+            u, v = v, u
+            s, r = r, s
+        a ^= s == 0
+        return {'u': u, 'v': v, 'r': r, 's': s, 'a': a, 'b': b, 'm': m, 'f': f}
+
 
 @frozen
 class _KaliskiModInverseImpl(Bloq):
+    """The full KaliskiIteration algorithm. see C5 https://arxiv.org/pdf/2302.06639"""
+
     bitsize: 'SymbolicInt'
     mod: 'SymbolicInt'
 
@@ -438,7 +506,7 @@ def signature(self) -> 'Signature':
 
     @cached_property
     def _kaliski_iteration(self):
-        return KaliskiIteration(self.bitsize, self.mod)
+        return _KaliskiIteration(self.bitsize, self.mod)
 
     def build_composite_bloq(
         self, bb: 'BloqBuilder', u: Soquet, v: Soquet, r: Soquet, s: Soquet, m: Soquet, f: Soquet
@@ -476,6 +544,30 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
 
 @frozen
 class KaliskiModInverse(Bloq):
+    r"""Compute modular multiplicative inverse -inplace- of numbers in montgomery form.
+
+    Applies the transformation
+    $$
+        \ket{x} \ket{0} \rightarrow \ket{x^{-1} 2^{2n} \mod p} \ket{\mathrm{garbage}}
+    $$
+
+    Args:
+        bitsize: size of the number.
+        mod: The integer modulus.
+        uncompute: whether to compute or uncompute.
+
+    Registers:
+        x: The register for which we compute the multiplicative inverse.
+        m: A 2*bitsize register of intermediate values needed for uncomputation.
+
+    References:
+        [Performance Analysis of a Repetition Cat Code Architecture: Computing 256-bit Elliptic Curve Logarithm in 9 Hours with 126 133 Cat Qubits](https://arxiv.org/abs/2302.06639)
+            Appendix C5.
+
+        [How to compute a 256-bit elliptic curve private key with only 50 million Toffoli gates](https://arxiv.org/abs/2306.08585)
+            page 8.
+    """
+
     bitsize: 'SymbolicInt'
     mod: 'SymbolicInt'
     uncompute: bool = False
@@ -499,14 +591,18 @@ def build_composite_bloq(
         f = bb.allocate(1)
 
         if self.uncompute:
-            u, x, r, s, m, f = bb.add_from(
-                _KaliskiModInverseImpl(self.bitsize, self.mod).adjoint(),
-                u=u,
-                v=r,
-                r=x,
-                s=s,
-                m=m,
-                f=f,
+            assert m is not None
+            u, x, r, s, m, f = cast(
+                Tuple[Soquet, Soquet, Soquet, Soquet, Soquet, Soquet],
+                bb.add_from(
+                    _KaliskiModInverseImpl(self.bitsize, self.mod).adjoint(),
+                    u=u,
+                    v=r,
+                    r=x,
+                    s=s,
+                    m=m,
+                    f=f,
+                ),
             )
             bb.free(u)
             bb.free(r)
@@ -516,11 +612,14 @@ def build_composite_bloq(
             return {'x': x}
 
         m = bb.allocate(2 * self.bitsize)
-        # m = bb.split(m)
         u, v, x, s, m, f = bb.add_from(
             _KaliskiModInverseImpl(self.bitsize, self.mod), u=u, v=x, r=r, s=s, m=m, f=f
         )
 
+        assert isinstance(u, Soquet)
+        assert isinstance(v, Soquet)
+        assert isinstance(s, Soquet)
+        assert isinstance(f, Soquet)
         bb.free(u)
         bb.free(v)
         bb.free(s)
@@ -529,3 +628,33 @@ def build_composite_bloq(
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return _KaliskiModInverseImpl(self.bitsize, self.mod).build_call_graph(ssa)
+
+    def on_classical_vals(self, x: int, m: int = 0) -> Dict[str, 'ClassicalValT']:
+        u, v, r, s, f = int(self.mod), x, 0, 1, 1
+        iteration = _KaliskiModInverseImpl(self.bitsize, self.mod)._kaliski_iteration
+        for _ in range(2 * int(self.bitsize)):
+            u, v, r, s, m_i, f = iteration.call_classically(u=u, v=v, r=r, s=s, m=0, f=f)
+            m = (m << 1) | m_i
+        assert u == 1
+        assert s == self.mod
+        assert f == 0
+        assert v == 0
+        return {'x': self.mod - r, 'm': m}
+
+
+@bloq_example
+def _kaliskimodinverse_example() -> KaliskiModInverse:
+    kaliskimodinverse_example = KaliskiModInverse(32, 10**9 + 7)
+    return kaliskimodinverse_example
+
+
+@bloq_example
+def _kaliskimodinverse_symbolic() -> KaliskiModInverse:
+    n, p = sympy.symbols('n p')
+    kaliskimodinverse_symbolic = KaliskiModInverse(n, p)
+    return kaliskimodinverse_symbolic
+
+
+_KALISKI_MOD_INVERSE_DOC = BloqDocSpec(
+    bloq_cls=KaliskiModInverse, examples=[_kaliskimodinverse_example, _kaliskimodinverse_symbolic]
+)
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division_test.py b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
index 3d65d94e7..aa39424ff 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division_test.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
@@ -15,10 +15,10 @@
 import math
 
 import pytest
+import sympy
 
 import qualtran.testing as qlt_testing
-from qualtran import QMontgomeryUInt
-from qualtran.bloqs.mod_arithmetic import KaliskiModInverse
+from qualtran.bloqs.mod_arithmetic.mod_division import _kaliskimodinverse_example, KaliskiModInverse
 from qualtran.resource_counting import get_cost_value, QECGatesCost
 from qualtran.resource_counting.generalizers import ignore_alloc_free, ignore_split_join
 
@@ -53,3 +53,34 @@ def test_kaliski_mod_inverse_decomposition(bitsize, mod):
 def test_kaliski_mod_bloq_counts(bitsize, mod):
     b = KaliskiModInverse(bitsize, mod)
     qlt_testing.assert_equivalent_bloq_counts(b, [ignore_alloc_free, ignore_split_join])
+
+
+def test_kaliski_symbolic_cost():
+    n, p = sympy.symbols('n p')
+    b = KaliskiModInverse(n, p)
+    cost = get_cost_value(b, QECGatesCost()).total_t_and_ccz_count()
+    # We have some T gates since we use CSwapApprox instead of n CSWAPs.
+    total_toff = (cost['n_t'] / 4 + cost['n_ccz']) * sympy.Integer(1)
+    total_toff = total_toff.expand()
+
+    # The toffoli cost from Litinski https://arxiv.org/abs/2306.08585 is 26n^2 + 2n.
+    # The cost of Kaliski is 2*n*(cost of an iteration) + (cost of computing $p - x$)
+    #
+    #   - The cost of of computing  $p-x$ in Litinski is 2n (Neg -> Add(p)). In our
+    #       construction this is just $n-1$ (BitwiseNot -> Add(p+1)).
+    #   - The cost of an iteration in Litinski $13n$ since they ignore constants.
+    #       Our construction is exactly the same but we also count the constants
+    #       which amout to $3$. for a total cost of $13n + 3$.
+    # For example the cost of ModDbl is 2n+1. In their figure 8, they report
+    # it as just $2n$. ModDbl gets executed within the 2n loop so its contribution
+    # to the overal cost should be 4n^2 + 2n instead of just 4n^2.
+    assert total_toff == 26 * n**2 + 7 * n - 1
+
+
+def test_kaliskimodinverse_example(bloq_autotester):
+    bloq_autotester(_kaliskimodinverse_example)
+
+
+@pytest.mark.notebook
+def test_notebook():
+    qlt_testing.execute_notebook('mod_division')
diff --git a/qualtran/bloqs/mod_arithmetic/mod_multiplication.py b/qualtran/bloqs/mod_arithmetic/mod_multiplication.py
index 95f74edc7..c94f177ed 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_multiplication.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_multiplication.py
@@ -72,7 +72,7 @@ class ModDbl(Bloq):
     """
 
     dtype: Union[QUInt, QMontgomeryUInt]
-    mod: int = attrs.field()
+    mod: 'SymbolicInt' = attrs.field()
 
     @mod.validator
     def _validate_mod(self, attribute, value):
diff --git a/qualtran/serialization/resolver_dict.py b/qualtran/serialization/resolver_dict.py
index bc3be1a65..3117a2495 100644
--- a/qualtran/serialization/resolver_dict.py
+++ b/qualtran/serialization/resolver_dict.py
@@ -115,6 +115,7 @@
 import qualtran.bloqs.mean_estimation.complex_phase_oracle
 import qualtran.bloqs.mean_estimation.mean_estimation_operator
 import qualtran.bloqs.mod_arithmetic
+import qualtran.bloqs.mod_arithmetic.mod_division
 import qualtran.bloqs.mod_arithmetic.mod_multiplication
 import qualtran.bloqs.mod_arithmetic.mod_subtraction
 import qualtran.bloqs.multiplexers.apply_gate_to_lth_target
@@ -347,6 +348,8 @@
     "qualtran.bloqs.mod_arithmetic.mod_multiplication.CModMulK": qualtran.bloqs.mod_arithmetic.mod_multiplication.CModMulK,
     "qualtran.bloqs.mod_arithmetic.mod_multiplication.DirtyOutOfPlaceMontgomeryModMul": qualtran.bloqs.mod_arithmetic.mod_multiplication.DirtyOutOfPlaceMontgomeryModMul,
     "qualtran.bloqs.mod_arithmetic.mod_multiplication.SingleWindowModMul": qualtran.bloqs.mod_arithmetic.mod_multiplication.SingleWindowModMul,
+    "qualtran.bloqs.mod_arithmetic.mod_division.KaliskiModInverse": qualtran.bloqs.mod_arithmetic.mod_division.KaliskiModInverse,
+    "qualtran.bloqs.mod_arithmetic.mod_division._KaliskiIteration": qualtran.bloqs.mod_arithmetic.mod_division._KaliskiIteration,
     "qualtran.bloqs.factoring._factoring_shims.MeasureQFT": qualtran.bloqs.factoring._factoring_shims.MeasureQFT,
     "qualtran.bloqs.factoring.rsa.rsa_phase_estimate.RSAPhaseEstimate": qualtran.bloqs.factoring.rsa.rsa_phase_estimate.RSAPhaseEstimate,
     "qualtran.bloqs.factoring.rsa.rsa_mod_exp.ModExp": qualtran.bloqs.factoring.rsa.rsa_mod_exp.ModExp,

From 6bad0609f407ed2aacb04482f8e4391000507eb3 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Mon, 21 Oct 2024 15:02:54 -0700
Subject: [PATCH 07/11] nit

---
 qualtran/bloqs/mod_arithmetic/mod_division.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.ipynb b/qualtran/bloqs/mod_arithmetic/mod_division.ipynb
index a01906ac3..96cc07c04 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.ipynb
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.ipynb
@@ -40,7 +40,7 @@
     "\n",
     "Applies the transformation\n",
     "$$\n",
-    "    \\ket{x} \\ket{0} \\rightarrow \\ket{x^{-1} 2^{2n} \\mod \\mathrm{mod}} \\ket{\\mathrm{garbage}}\n",
+    "    \\ket{x} \\ket{0} \\rightarrow \\ket{x^{-1} 2^{2n} \\mod p} \\ket{\\mathrm{garbage}}\n",
     "$$\n",
     "\n",
     "#### Parameters\n",

From 6f2c4480cdb08d8d99a9940903063a02bb6cf0a5 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Tue, 29 Oct 2024 10:59:52 -0700
Subject: [PATCH 08/11] address comments

---
 qualtran/bloqs/mod_arithmetic/mod_division.py      | 11 +++++++++++
 qualtran/bloqs/mod_arithmetic/mod_division_test.py | 11 ++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index 6d24625ce..0588ec8bb 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -449,6 +449,17 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
     def on_classical_vals(
         self, u: int, v: int, r: int, s: int, m: int, f: int
     ) -> Dict[str, 'ClassicalValT']:
+        """This is a classical encoding of figure 15 of https://arxiv.org/pdf/2302.06639.
+
+        The variables `m` and the local variables `a` and `b` translate into evaluating the if
+        conditions in `Algorithm 2 `. The meaning of the variables are:
+            - `a`: is `u` even?
+            - `b`: are both `u` and `v` even?
+            - `m`: is `u` odd and `v` even?
+            - `f`: classically once `f = 0` the algorithm terminates.
+        `a` and `b` are local and cleaned after each iteration. The variable `m` is kept and
+        is used in uncomputation.
+        """
         a = b = 0
         assert m == 0
         m ^= f & (v == 0)
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division_test.py b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
index aa39424ff..4cd0c5a7e 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division_test.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
@@ -18,6 +18,7 @@
 import sympy
 
 import qualtran.testing as qlt_testing
+from qualtran import QMontgomeryUInt
 from qualtran.bloqs.mod_arithmetic.mod_division import _kaliskimodinverse_example, KaliskiModInverse
 from qualtran.resource_counting import get_cost_value, QECGatesCost
 from qualtran.resource_counting.generalizers import ignore_alloc_free, ignore_split_join
@@ -28,17 +29,17 @@
 def test_kaliski_mod_inverse_classical_action(bitsize, mod):
     blq = KaliskiModInverse(bitsize, mod)
     cblq = blq.decompose_bloq()
-    p2 = pow(2, bitsize, mod)
+    dtype = QMontgomeryUInt(bitsize)
+    R = pow(2, bitsize, mod)
     for x in range(1, mod):
         if math.gcd(x, mod) != 1:
             continue
-        x_montgomery = (x * p2) % mod
-        inv_x = pow(x, -1, mod)
-        inv_x_montgomery = (inv_x * p2) % mod
+        x_montgomery = dtype.uint_to_montgomery(x, mod)
         res = blq.call_classically(x=x_montgomery)
         assert res == cblq.call_classically(x=x_montgomery)
         assert len(res) == 2
-        assert res[0] == inv_x_montgomery
+        assert res[0] == dtype.montgomery_inverse(x_montgomery, mod)
+        assert dtype.montgomery_product(res[0], x_montgomery, mod) == R
 
 
 @pytest.mark.parametrize('bitsize', [5, 6])

From e6c18d7054541cf909399e7a02dd86d263ac7b96 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Tue, 29 Oct 2024 11:08:41 -0700
Subject: [PATCH 09/11] type

---
 qualtran/bloqs/mod_arithmetic/mod_division_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qualtran/bloqs/mod_arithmetic/mod_division_test.py b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
index 4cd0c5a7e..31c56d394 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division_test.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division_test.py
@@ -39,7 +39,7 @@ def test_kaliski_mod_inverse_classical_action(bitsize, mod):
         assert res == cblq.call_classically(x=x_montgomery)
         assert len(res) == 2
         assert res[0] == dtype.montgomery_inverse(x_montgomery, mod)
-        assert dtype.montgomery_product(res[0], x_montgomery, mod) == R
+        assert dtype.montgomery_product(int(res[0]), x_montgomery, mod) == R
 
 
 @pytest.mark.parametrize('bitsize', [5, 6])

From 2246805a2cfa06b0894c41dad3cf5affef759929 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Tue, 29 Oct 2024 12:11:28 -0700
Subject: [PATCH 10/11] update classical action

---
 .../bloqs/mod_arithmetic/mod_division.ipynb   |  1 +
 qualtran/bloqs/mod_arithmetic/mod_division.py | 77 +++++++++----------
 2 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.ipynb b/qualtran/bloqs/mod_arithmetic/mod_division.ipynb
index 96cc07c04..fd34e136d 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.ipynb
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.ipynb
@@ -54,6 +54,7 @@
     "\n",
     "#### References\n",
     " - [Performance Analysis of a Repetition Cat Code Architecture: Computing 256-bit Elliptic Curve Logarithm in 9 Hours with 126 133 Cat Qubits](https://arxiv.org/abs/2302.06639).     Appendix C5.\n",
+    " - [Improved quantum circuits for elliptic curve discrete logarithms](https://arxiv.org/abs/2001.09580).     Fig 7(b)\n",
     " - [How to compute a 256-bit elliptic curve private key with only 50 million Toffoli gates](https://arxiv.org/abs/2306.08585).     page 8.\n"
    ]
   },
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index 0588ec8bb..948d0df3d 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -449,50 +449,44 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
     def on_classical_vals(
         self, u: int, v: int, r: int, s: int, m: int, f: int
     ) -> Dict[str, 'ClassicalValT']:
-        """This is a classical encoding of figure 15 of https://arxiv.org/pdf/2302.06639.
-
-        The variables `m` and the local variables `a` and `b` translate into evaluating the if
-        conditions in `Algorithm 2 `. The meaning of the variables are:
-            - `a`: is `u` even?
-            - `b`: are both `u` and `v` even?
-            - `m`: is `u` odd and `v` even?
-            - `f`: classically once `f = 0` the algorithm terminates.
-        `a` and `b` are local and cleaned after each iteration. The variable `m` is kept and
-        is used in uncomputation.
-        """
-        a = b = 0
-        assert m == 0
-        m ^= f & (v == 0)
-        f ^= m
-
-        a ^= f & (u % 2 == 0)
-        m ^= f & (a == 0) & (v % 2 == 0)
-        b ^= a
-        b ^= m
-
-        t = (u > v) & (b == 0) & f
-        a ^= t
-        m ^= t
+        """This is the Kaliski algorithm as described in Fig7 of https://arxiv.org/pdf/2001.09580.
 
-        if a:
-            u, v = v, u
-            r, s = s, r
+        The following implementation merges together the pseudocode from Fig7 of https://arxiv.org/pdf/2001.09580
+        and the circuit in figure 15 of https://arxiv.org/pdf/2302.06639; This is in order to compute the values
+        of `f` and `m`.
 
-        if f and b == 0:
-            v -= u
-            s += r
 
-        b ^= m
-        b ^= a
-        if f:
-            assert v % 2 == 0, f'{u=} {v=} {r=} {s=} {a=} {b=} {m=} {f=}'
+        """
+        assert m == 0
+        if f == 0:
+            # When `f = 0` this means that the algorithm is nearly over and that we just need to
+            # double the value of `r`.
+            r = (r << 1) % self.mod
+        elif v == 0:
+            # `v = 0` is the termination condition of the algorithm and it means that the only
+            # remaining step is multiplying `r` by 2 raised to the number of remaining iterations.
+            # Classically this translates into a `r = (r * pow(2, k, p))%p` where k is the number
+            # of iterations left followed by a break statement.
+            m = u & 1
+            f = 0
+            r = (r << 1) % self.mod
+        else:
+            m = (u % 2 == 1) & (v % 2 == 0)
+            # Kaliski iteration as described in Fig7 of https://arxiv.org/pdf/2001.09580.
+            swap = (u % 2 == 0 and v % 2 == 1) or (u % 2 == 1 and v % 2 == 1 and u > v)
+            if swap:
+                u, v = v, u
+                r, s = s, r
+            if u % 2 == 1 and v % 2 == 1:
+                v -= u
+                s += r
+            assert v % 2 == 0, f'{u=} {v=} {swap=}'
             v >>= 1
-        r = (r << 1) % self.mod
-        if a:
-            u, v = v, u
-            s, r = r, s
-        a ^= s == 0
-        return {'u': u, 'v': v, 'r': r, 's': s, 'a': a, 'b': b, 'm': m, 'f': f}
+            r = (r << 1) % self.mod
+            if swap:
+                u, v = v, u
+                r, s = s, r
+        return {'u': u, 'v': v, 'r': r, 's': s, 'm': m, 'f': f}
 
 
 @frozen
@@ -575,6 +569,9 @@ class KaliskiModInverse(Bloq):
         [Performance Analysis of a Repetition Cat Code Architecture: Computing 256-bit Elliptic Curve Logarithm in 9 Hours with 126 133 Cat Qubits](https://arxiv.org/abs/2302.06639)
             Appendix C5.
 
+        [Improved quantum circuits for elliptic curve discrete logarithms](https://arxiv.org/abs/2001.09580)
+            Fig 7(b)
+
         [How to compute a 256-bit elliptic curve private key with only 50 million Toffoli gates](https://arxiv.org/abs/2306.08585)
             page 8.
     """

From aab7179ef4eef0ecf4fea4d3a2f838f31fd24665 Mon Sep 17 00:00:00 2001
From: Nour Yosri <noureldinyosri@gmail.com>
Date: Tue, 29 Oct 2024 12:13:10 -0700
Subject: [PATCH 11/11] nit

---
 qualtran/bloqs/mod_arithmetic/mod_division.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index 948d0df3d..d5bad9fc1 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -454,8 +454,6 @@ def on_classical_vals(
         The following implementation merges together the pseudocode from Fig7 of https://arxiv.org/pdf/2001.09580
         and the circuit in figure 15 of https://arxiv.org/pdf/2302.06639; This is in order to compute the values
         of `f` and `m`.
-
-
         """
         assert m == 0
         if f == 0: