diff --git a/internal/stats/latest.stats b/internal/stats/latest.stats
index 329496c6f7..37f16d35c6 100644
Binary files a/internal/stats/latest.stats and b/internal/stats/latest.stats differ
diff --git a/std/algebra/emulated/sw_emulated/point.go b/std/algebra/emulated/sw_emulated/point.go
index 0ed2f78307..6c73bb046c 100644
--- a/std/algebra/emulated/sw_emulated/point.go
+++ b/std/algebra/emulated/sw_emulated/point.go
@@ -582,7 +582,7 @@ func (c *Curve[B, S]) scalarMulGLV(Q *AffinePoint[B], s *emulated.Element[S], op
 	// 		B1 = Q+Φ(Q)
 	// 		B2 = -Q-Φ(Q)
 	// 		B3 = Q-Φ(Q)
-	// 		B4 = -QΦ(Q)
+	// 		B4 = -Q+Φ(Q)
 	//
 	// If we extend this by merging two iterations, we need to look up P and P'
 	// both from {B1, B2, B3, B4} and compute:
diff --git a/std/algebra/native/fields_bls12377/e12_pairing.go b/std/algebra/native/fields_bls12377/e12_pairing.go
index 08624038f1..c6f87c4a0a 100644
--- a/std/algebra/native/fields_bls12377/e12_pairing.go
+++ b/std/algebra/native/fields_bls12377/e12_pairing.go
@@ -22,16 +22,17 @@ func (e *E12) Square034(api frontend.API, x E12) *E12 {
 
 	c0.B0.Sub(api, x.C0.B0, x.C1.B0)
 	c0.B1.Neg(api, x.C1.B1)
-	c0.B2 = E2{0, 0}
 
 	c3.B0 = x.C0.B0
 	c3.B1.Neg(api, x.C1.B0)
 	c3.B2.Neg(api, x.C1.B1)
 
 	c2.Mul0By01(api, x.C0.B0, x.C1.B0, x.C1.B1)
-	c3.MulBy01(api, c0.B0, c0.B1).Add(api, c3, c2)
-	e.C1.B0.Add(api, c2.B0, c2.B0)
-	e.C1.B1.Add(api, c2.B1, c2.B1)
+	c3.MulBy01(api, c0.B0, c0.B1)
+	c3.B0.Add(api, c3.B0, c2.B0)
+	c3.B1.Add(api, c3.B1, c2.B1)
+	e.C1.B0.MulByFp(api, c2.B0, 2)
+	e.C1.B1.MulByFp(api, c2.B1, 2)
 
 	e.C0.B0 = c3.B0
 	e.C0.B1.Add(api, c3.B1, c2.B0)
@@ -49,8 +50,7 @@ func (e *E12) MulBy034(api frontend.API, c3, c4 E2) *E12 {
 	b := e.C1
 
 	b.MulBy01(api, c3, c4)
-
-	c3.Add(api, E2{A0: 1, A1: 0}, c3)
+	c3.A0 = api.Add(1, c3.A0)
 	d.Add(api, e.C0, e.C1)
 	d.MulBy01(api, c3, c4)
 
@@ -81,17 +81,19 @@ func Mul034By034(api frontend.API, d3, d4, c3, c4 E2) *[5]E2 {
 }
 
 func Mul01234By034(api frontend.API, x [5]E2, z3, z4 E2) *E12 {
-	var a, b, z1, z0, one E6
-	var zero E2
-	zero.SetZero()
-	one.SetOne()
+	var a, b, z1, z0 E6
 	c0 := &E6{B0: x[0], B1: x[1], B2: x[2]}
-	c1 := &E6{B0: x[3], B1: x[4], B2: zero}
-	a.Add(api, one, E6{B0: z3, B1: z4, B2: zero})
-	b.Add(api, *c0, *c1)
-	a.Mul(api, a, b)
+	a.B0.A0 = api.Add(z3.A0, 1)
+	a.B0.A1 = z3.A1
+	a.B1 = z4
+	a.B2.A0 = 0
+	a.B2.A1 = 0
+	b.B0.Add(api, c0.B0, x[3])
+	b.B1.Add(api, c0.B1, x[4])
+	b.B2 = c0.B2
+	b.MulBy01(api, a.B0, a.B1)
 	c := *Mul01By01(api, z3, z4, x[3], x[4])
-	z1.Sub(api, a, *c0)
+	z1.Sub(api, b, *c0)
 	z1.Sub(api, z1, c)
 	z0.MulByNonResidue(api, c)
 	z0.Add(api, z0, *c0)
@@ -103,12 +105,11 @@ func Mul01234By034(api frontend.API, x [5]E2, z3, z4 E2) *E12 {
 
 func (e *E12) MulBy01234(api frontend.API, x [5]E2) *E12 {
 	var a, b, c, z1, z0 E6
-	var zero E2
-	zero.SetZero()
 	c0 := &E6{B0: x[0], B1: x[1], B2: x[2]}
-	c1 := &E6{B0: x[3], B1: x[4], B2: zero}
 	a.Add(api, e.C0, e.C1)
-	b.Add(api, *c0, *c1)
+	b.B0.Add(api, x[0], x[3])
+	b.B1.Add(api, x[1], x[4])
+	b.B2 = x[2]
 	a.Mul(api, a, b)
 	b.Mul(api, e.C0, *c0)
 	c = e.C1
diff --git a/std/algebra/native/fields_bls12377/e2.go b/std/algebra/native/fields_bls12377/e2.go
index a203843157..fd6f99ecde 100644
--- a/std/algebra/native/fields_bls12377/e2.go
+++ b/std/algebra/native/fields_bls12377/e2.go
@@ -68,8 +68,8 @@ func (e *E2) Add(api frontend.API, e1, e2 E2) *E2 {
 
 // Double e2 elmt
 func (e *E2) Double(api frontend.API, e1 E2) *E2 {
-	e.A0 = api.Add(e1.A0, e1.A0)
-	e.A1 = api.Add(e1.A1, e1.A1)
+	e.A0 = api.Mul(e1.A0, 2)
+	e.A1 = api.Mul(e1.A1, 2)
 	return e
 }
 
diff --git a/std/algebra/native/sw_bls12377/g1.go b/std/algebra/native/sw_bls12377/g1.go
index c823e69c29..7b5936a8c4 100644
--- a/std/algebra/native/sw_bls12377/g1.go
+++ b/std/algebra/native/sw_bls12377/g1.go
@@ -186,29 +186,30 @@ func (P *G1Affine) ScalarMul(api frontend.API, Q G1Affine, s interface{}, opts .
 	}
 }
 
-// varScalarMul sets P = [s] Q and returns P.
+// varScalarMul sets P = [s]Q and returns P. It doesn't modify Q nor s.
+// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C).
+//
+// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// (0,0) is not on the curve but we conventionally take it as the
+// neutral/infinity point as per the [EVM].
+//
+// [Halo]: https://eprint.iacr.org/2019/1021.pdf
+// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
 func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine {
 	cfg, err := algopts.NewConfig(opts...)
 	if err != nil {
 		panic(err)
 	}
-	// This method computes [s] Q. We use several methods to reduce the number
-	// of added constraints - first, instead of classical double-and-add, we use
-	// the optimized version from https://github.com/zcash/zcash/issues/3924
-	// which allows to omit computation of several intermediate values.
-	// Secondly, we use the GLV scalar multiplication to reduce the number
-	// iterations in the main loop. There is a small difference though - as
-	// two-bit select takes three constraints, then it takes as many constraints
-	// to compute ± Q ± Φ(Q) every iteration instead of selecting the value
-	// from a precomputed table. However, precomputing the table adds 12
-	// additional constraints and thus table-version is more expensive than
-	// addition-version.
 	var selector frontend.Variable
 	if cfg.CompleteArithmetic {
 		// if Q=(0,0) we assign a dummy (1,1) to Q and continue
 		selector = api.And(api.IsZero(Q.X), api.IsZero(Q.Y))
 		Q.Select(api, selector, G1Affine{X: 1, Y: 1}, Q)
 	}
+
+	// We use the endomorphism à la GLV to compute [s]Q as
+	// 		[s1]Q + [s2]Φ(Q)
+	//
 	// The context we are working is based on the `outer` curve. However, the
 	// points and the operations on the points are performed on the `inner`
 	// curve of the outer curve. We require some parameters from the inner
@@ -218,31 +219,24 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl
 	// the hints allow to decompose the scalar s into s1 and s2 such that
 	//     s1 + λ * s2 == s mod r,
 	// where λ is third root of one in 𝔽_r.
-	sd, err := api.Compiler().NewHint(decomposeScalarG1, 3, s)
+	sd, err := api.Compiler().NewHint(decomposeScalarG1Simple, 2, s)
 	if err != nil {
 		// err is non-nil only for invalid number of inputs
 		panic(err)
 	}
 	s1, s2 := sd[0], sd[1]
 
-	// when we split scalar, then s1, s2 < lambda by default. However, to have
-	// the high 1-2 bits of s1, s2 set, the hint functions compute the
-	// decomposition for
-	//     s + k*r (for some k)
-	// instead and omits the last reduction. Thus, to constrain s1 and s2, we
-	// have to assert that
-	//     s1 + λ * s2 == s + k*r
-	api.AssertIsEqual(api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2])))
-
-	// As the decomposed scalars are not fully reduced, then in addition of
-	// having the high bit set, an overflow bit may also be set. Thus, the total
-	// number of bits may be one more than the bitlength of λ.
-	nbits := cc.lambda.BitLen() + 1
+	// s1 + λ * s2 == s
+	api.AssertIsEqual(
+		api.Add(s1, api.Mul(s2, cc.lambda)),
+		s,
+	)
 
+	// For BLS12 λ bitsize is 127 equal to half of r bitsize
+	nbits := cc.lambda.BitLen()
 	s1bits := api.ToBinary(s1, nbits)
 	s2bits := api.ToBinary(s2, nbits)
 
-	var Acc /*accumulator*/, B, B2 /*tmp vars*/ G1Affine
 	// precompute -Q, -Φ(Q), Φ(Q)
 	var tableQ, tablePhiQ [2]G1Affine
 	tableQ[1] = Q
@@ -250,45 +244,51 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl
 	cc.phi1(api, &tablePhiQ[1], &Q)
 	tablePhiQ[0].Neg(api, tablePhiQ[1])
 
-	// We now initialize the accumulator. Due to the way the scalar is
-	// decomposed, either the high bits of s1 or s2 are set and we can use the
-	// incomplete addition laws.
-
-	// Acc = Q + Φ(Q) = -Φ²(Q)
+	// we suppose that the first bits of the sub-scalars are 1 and set:
+	// 		Acc = Q + Φ(Q) = -Φ²(Q)
+	var Acc, B G1Affine
 	cc.phi2Neg(api, &Acc, &Q)
 
-	// However, we can not directly add step value conditionally as we may get
-	// to incomplete path of the addition formula. We either add or subtract
-	// step value from [2] Acc (instead of conditionally adding step value to
-	// Acc):
-	//     Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q)
-	// only y coordinate differs for negation, select on that instead.
-	// first bit
-	B.X = tableQ[0].X
-	B.Y = api.Select(s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y = api.Select(s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	// second bit
-	B.X = tableQ[0].X
-	B.Y = api.Select(s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y = api.Select(s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	B2.X = tablePhiQ[0].X
-	for i := nbits - 3; i > 0; i-- {
-		B.X = Q.X
-		B.Y = api.Select(s1bits[i], tableQ[1].Y, tableQ[0].Y)
-		B2.Y = api.Select(s2bits[i], tablePhiQ[1].Y, tablePhiQ[0].Y)
-		B.AddAssign(api, B2)
+	// At each iteration we need to compute:
+	// 		[2]Acc ± Q ± Φ(Q).
+	// We can compute [2]Acc and look up the (precomputed) point B from:
+	// 		B1 = +Q + Φ(Q)
+	B1 := Acc
+	// 		B2 = -Q - Φ(Q)
+	B2 := G1Affine{}
+	B2.Neg(api, B1)
+	// 		B3 = +Q - Φ(Q)
+	B3 := tableQ[1]
+	B3.AddAssign(api, tablePhiQ[0])
+	// 		B4 = -Q + Φ(Q)
+	B4 := G1Affine{}
+	B4.Neg(api, B3)
+	//
+	// Note that half the points are negatives of the other half,
+	// hence have the same X coordinates.
+
+	// However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen
+	// that Acc==B or -B. So we add the point H=(0,1) on BLS12-377 of order 2
+	// to it to avoid incomplete additions in the loop by forcing Acc to be
+	// different than the stored B.  Normally, the point H should be "killed
+	// out" by the first doubling in the loop and the result will remain
+	// unchanged. However, we are using affine coordinates that do not encode
+	// the infinity point. Given the affine formulae, doubling (0,1) results in
+	// (0,-1). Since the loop size N=nbits-1 is even we need to subtract
+	// [2^N]H = (0,1) from the result at the end.
+	//
+	// Acc = Q + Φ(Q) + H
+	Acc.AddAssign(api, G1Affine{X: 0, Y: 1})
+
+	for i := nbits - 1; i > 0; i-- {
+		B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X)
+		B.Y = api.Lookup2(s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y)
+		// Acc = [2]Acc + B
 		Acc.DoubleAndAdd(api, &Acc, &B)
 	}
 
 	// i = 0
+	// subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0.
 	// When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means
 	// when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0).
 	if cfg.CompleteArithmetic {
@@ -304,6 +304,15 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl
 		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
 	}
 
+	if cfg.CompleteArithmetic {
+		// subtract [2^N]G = (0,1) since we added H at the beginning
+		Acc.AddUnified(api, G1Affine{X: 0, Y: -1})
+		Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc)
+	} else {
+		// subtract [2^N]G = (0,1) since we added H at the beginning
+		Acc.AddAssign(api, G1Affine{X: 0, Y: -1})
+	}
+
 	P.X = Acc.X
 	P.Y = Acc.Y
 
@@ -555,7 +564,7 @@ func (P *G1Affine) jointScalarMulUnsafe(api frontend.API, Q, R G1Affine, s, t fr
 	return P
 }
 
-// scalarBitsMul computes s * p and returns it where sBits is the bit decomposition of s. It doesn't modify p nor sBits.
+// scalarBitsMul computes [s]Q and returns it where sBits is the bit decomposition of s. It doesn't modify Q nor sBits.
 // The method is similar to varScalarMul.
 func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine {
 	cfg, err := algopts.NewConfig(opts...)
@@ -568,9 +577,19 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []
 		selector = api.And(api.IsZero(Q.X), api.IsZero(Q.Y))
 		Q.Select(api, selector, G1Affine{X: 1, Y: 1}, Q)
 	}
+
+	// We use the endomorphism à la GLV to compute [s]Q as
+	// 		[s1]Q + [s2]Φ(Q)
+	//
+	// The context we are working is based on the `outer` curve. However, the
+	// points and the operations on the points are performed on the `inner`
+	// curve of the outer curve. We require some parameters from the inner
+	// curve.
 	cc := getInnerCurveConfig(api.Compiler().Field())
-	nbits := cc.lambda.BitLen() + 1
-	var Acc /*accumulator*/, B, B2 /*tmp vars*/ G1Affine
+
+	// For BLS12 λ bitsize is 127 equal to half of r bitsize
+	nbits := cc.lambda.BitLen()
+
 	// precompute -Q, -Φ(Q), Φ(Q)
 	var tableQ, tablePhiQ [2]G1Affine
 	tableQ[1] = Q
@@ -578,45 +597,51 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []
 	cc.phi1(api, &tablePhiQ[1], &Q)
 	tablePhiQ[0].Neg(api, tablePhiQ[1])
 
-	// We now initialize the accumulator. Due to the way the scalar is
-	// decomposed, either the high bits of s1 or s2 are set and we can use the
-	// incomplete addition laws.
-
-	//     Acc = Q + Φ(Q)
-	Acc = tableQ[1]
-	Acc.AddAssign(api, tablePhiQ[1])
-
-	// However, we can not directly add step value conditionally as we may get
-	// to incomplete path of the addition formula. We either add or subtract
-	// step value from [2] Acc (instead of conditionally adding step value to
-	// Acc):
-	//     Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q)
-	// only y coordinate differs for negation, select on that instead.
-	B.X = tableQ[0].X
-	B.Y = api.Select(s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y = api.Select(s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	// second bit
-	B.X = tableQ[0].X
-	B.Y = api.Select(s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y = api.Select(s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	B2.X = tablePhiQ[0].X
-	for i := nbits - 3; i > 0; i-- {
-		B.X = Q.X
-		B.Y = api.Select(s1bits[i], tableQ[1].Y, tableQ[0].Y)
-		B2.Y = api.Select(s2bits[i], tablePhiQ[1].Y, tablePhiQ[0].Y)
-		B.AddAssign(api, B2)
+	// we suppose that the first bits of the sub-scalars are 1 and set:
+	// 		Acc = Q + Φ(Q) = -Φ²(Q)
+	var Acc, B G1Affine
+	cc.phi2Neg(api, &Acc, &Q)
+
+	// At each iteration we need to compute:
+	// 		[2]Acc ± Q ± Φ(Q).
+	// We can compute [2]Acc and look up the (precomputed) point B from:
+	// 		B1 = +Q + Φ(Q)
+	B1 := Acc
+	// 		B2 = -Q - Φ(Q)
+	B2 := G1Affine{}
+	B2.Neg(api, B1)
+	// 		B3 = +Q - Φ(Q)
+	B3 := tableQ[1]
+	B3.AddAssign(api, tablePhiQ[0])
+	// 		B4 = -Q + Φ(Q)
+	B4 := G1Affine{}
+	B4.Neg(api, B3)
+	//
+	// Note that half the points are negatives of the other half,
+	// hence have the same X coordinates.
+
+	// However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen
+	// that Acc==B or -B. So we add the point H=(0,1) on BLS12-377 of order 2
+	// to it to avoid incomplete additions in the loop by forcing Acc to be
+	// different than the stored B.  Normally, the point H should be "killed
+	// out" by the first doubling in the loop and the result will remain
+	// unchanged. However, we are using affine coordinates that do not encode
+	// the infinity point. Given the affine formulae, doubling (0,1) results in
+	// (0,-1). Since the loop size N=nbits-1 is even we need to subtract
+	// [2^N]H = (0,1) from the result at the end.
+	//
+	// Acc = Q + Φ(Q) + H
+	Acc.AddAssign(api, G1Affine{X: 0, Y: 1})
+
+	for i := nbits - 1; i > 0; i-- {
+		B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X)
+		B.Y = api.Lookup2(s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y)
+		// Acc = [2]Acc + B
 		Acc.DoubleAndAdd(api, &Acc, &B)
 	}
 
 	// i = 0
+	// subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0.
 	// When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means
 	// when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0).
 	if cfg.CompleteArithmetic {
@@ -632,6 +657,16 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []
 		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
 	}
 
+	if cfg.CompleteArithmetic {
+		// subtract [2^N]G = (0,1) since we added H at the beginning
+		Acc.AddUnified(api, G1Affine{X: 0, Y: -1})
+		Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc)
+	} else {
+		// subtract [2^N]G = (0,1) since we added H at the beginning
+		Acc.AddAssign(api, G1Affine{X: 0, Y: -1})
+
+	}
+
 	P.X = Acc.X
 	P.Y = Acc.Y
 
diff --git a/std/algebra/native/sw_bls12377/g2.go b/std/algebra/native/sw_bls12377/g2.go
index d096da0e97..3b8e273dc5 100644
--- a/std/algebra/native/sw_bls12377/g2.go
+++ b/std/algebra/native/sw_bls12377/g2.go
@@ -194,30 +194,32 @@ func (P *g2AffP) ScalarMul(api frontend.API, Q g2AffP, s interface{}, opts ...al
 	}
 }
 
-// varScalarMul sets P = [s] Q and returns P.
+// varScalarMul sets P = [s]Q and returns P. It doesn't modify Q nor s.
+// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C).
+//
+// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// (0,0) is not on the curve but we conventionally take it as the
+// neutral/infinity point as per the [EVM].
+//
+// [Halo]: https://eprint.iacr.org/2019/1021.pdf
+// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
 func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, opts ...algopts.AlgebraOption) *g2AffP {
 	cfg, err := algopts.NewConfig(opts...)
 	if err != nil {
 		panic(err)
 	}
-	// This method computes [s] Q. We use several methods to reduce the number
-	// of added constraints - first, instead of classical double-and-add, we use
-	// the optimized version from https://github.com/zcash/zcash/issues/3924
-	// which allows to omit computation of several intermediate values.
-	// Secondly, we use the GLV scalar multiplication to reduce the number
-	// iterations in the main loop. There is a small difference though - as
-	// two-bit select takes three constraints, then it takes as many constraints
-	// to compute ± Q ± Φ(Q) every iteration instead of selecting the value
-	// from a precomputed table. However, precomputing the table adds 12
-	// additional constraints and thus table-version is more expensive than
-	// addition-version.
 	var selector frontend.Variable
+	one := fields_bls12377.E2{A0: 1, A1: 0}
+	zero := fields_bls12377.E2{A0: 0, A1: 0}
 	if cfg.CompleteArithmetic {
 		// if Q=(0,0) we assign a dummy (1,1) to Q and continue
 		selector = api.And(Q.X.IsZero(api), Q.Y.IsZero(api))
-		one := fields_bls12377.E2{A0: 1, A1: 0}
 		Q.Select(api, selector, g2AffP{X: one, Y: one}, Q)
 	}
+
+	// We use the endomorphism à la GLV to compute [s]Q as
+	// 		[s1]Q + [s2]Φ(Q)
+	//
 	// The context we are working is based on the `outer` curve. However, the
 	// points and the operations on the points are performed on the `inner`
 	// curve of the outer curve. We require some parameters from the inner
@@ -227,31 +229,24 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o
 	// the hints allow to decompose the scalar s into s1 and s2 such that
 	//     s1 + λ * s2 == s mod r,
 	// where λ is third root of one in 𝔽_r.
-	sd, err := api.Compiler().NewHint(decomposeScalarG2, 3, s)
+	sd, err := api.Compiler().NewHint(decomposeScalarG1Simple, 2, s)
 	if err != nil {
 		// err is non-nil only for invalid number of inputs
 		panic(err)
 	}
 	s1, s2 := sd[0], sd[1]
 
-	// when we split scalar, then s1, s2 < lambda by default. However, to have
-	// the high 1-2 bits of s1, s2 set, the hint functions compute the
-	// decomposition for
-	//     s + k*r (for some k)
-	// instead and omits the last reduction. Thus, to constrain s1 and s2, we
-	// have to assert that
-	//     s1 + λ * s2 == s + k*r
-	api.AssertIsEqual(api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2])))
-
-	// As the decomposed scalars are not fully reduced, then in addition of
-	// having the high bit set, an overflow bit may also be set. Thus, the total
-	// number of bits may be one more than the bitlength of λ.
-	nbits := cc.lambda.BitLen() + 1
+	// s1 + λ * s2 == s
+	api.AssertIsEqual(
+		api.Add(s1, api.Mul(s2, cc.lambda)),
+		s,
+	)
 
+	// For BLS12 λ bitsize is 127 equal to half of r bitsize
+	nbits := cc.lambda.BitLen()
 	s1bits := api.ToBinary(s1, nbits)
 	s2bits := api.ToBinary(s2, nbits)
 
-	var Acc, B, B1, B2, B3, B4 g2AffP
 	// precompute -Q, -Φ(Q), Φ(Q)
 	var tableQ, tablePhiQ [2]g2AffP
 	tableQ[1] = Q
@@ -259,49 +254,53 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o
 	cc.phi2(api, &tablePhiQ[1], &Q)
 	tablePhiQ[0].Neg(api, tablePhiQ[1])
 
-	// We now initialize the accumulator. Due to the way the scalar is
-	// decomposed, either the high bits of s1 or s2 are set and we can use the
-	// incomplete addition laws.
-
-	// Acc = Q + Φ(Q) = B1
+	// we suppose that the first bits of the sub-scalars are 1 and set:
+	// 		Acc = Q + Φ(Q) = -Φ²(Q)
+	var Acc, B g2AffP
 	cc.phi1Neg(api, &Acc, &Q)
-	B1 = Acc
-
-	// However, we can not directly add step value conditionally as we may get
-	// to incomplete path of the addition formula. We either add or subtract
-	// step value from [2] Acc (instead of conditionally adding step value to
-	// Acc):
-	//     Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q)
-	// only y coordinate differs for negation, select on that instead.
-	B.X = tableQ[0].X
-	B.Y.Select(api, s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y.Select(api, s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	// second bit
-	B.X = tableQ[0].X
-	B.Y.Select(api, s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y.Select(api, s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	// B2 = -Q-Φ(Q)
+
+	// At each iteration we need to compute:
+	// 		[2]Acc ± Q ± Φ(Q).
+	// We can compute [2]Acc and look up the (precomputed) point B from:
+	// 		B1 = +Q + Φ(Q)
+	B1 := Acc
+	// 		B2 = -Q - Φ(Q)
+	B2 := g2AffP{}
 	B2.Neg(api, B1)
-	// B3 = Q-Φ(Q)
-	B3 = tablePhiQ[0]
-	B3.AddAssign(api, tableQ[1])
-	// B4 = -Q+Φ(Q)
+	// 		B3 = +Q - Φ(Q)
+	B3 := tableQ[1]
+	B3.AddAssign(api, tablePhiQ[0])
+	// 		B4 = -Q + Φ(Q)
+	B4 := g2AffP{}
 	B4.Neg(api, B3)
-	for i := nbits - 3; i > 0; i-- {
+	//
+	// Note that half the points are negatives of the other half,
+	// hence have the same X coordinates.
+
+	// However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen
+	// that Acc==B or -B. So we add the base point G to it to avoid incomplete
+	// additions in the loop by forcing Acc to be different than the stored B.
+	// However we need at the end to subtract [2^nbits]G or conditionally
+	// [2^nbits]Φ²(G) from the result.
+	//
+	// Acc = Q + Φ(Q) + G
+	points := getTwistPoints()
+	Acc.AddAssign(api,
+		g2AffP{
+			X: fields_bls12377.E2{A0: points.G2x[0], A1: points.G2x[1]},
+			Y: fields_bls12377.E2{A0: points.G2y[0], A1: points.G2y[1]},
+		},
+	)
+
+	for i := nbits - 1; i > 0; i-- {
 		B.X.Select(api, api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X)
 		B.Y.Lookup2(api, s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y)
+		// Acc = [2]Acc + B
 		Acc.DoubleAndAdd(api, &Acc, &B)
 	}
 
 	// i = 0
+	// subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0.
 	// When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means
 	// when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0).
 	if cfg.CompleteArithmetic {
@@ -309,7 +308,6 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o
 		Acc.Select(api, s1bits[0], Acc, tableQ[0])
 		tablePhiQ[0].AddUnified(api, Acc)
 		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
-		zero := fields_bls12377.E2{A0: 0, A1: 0}
 		Acc.Select(api, selector, g2AffP{X: zero, Y: zero}, Acc)
 	} else {
 		tableQ[0].AddAssign(api, Acc)
@@ -318,6 +316,26 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o
 		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
 	}
 
+	// subtract [2^nbits]G since we added G at the beginning
+	B.X = fields_bls12377.E2{
+		A0: points.G2m[nbits-1][0],
+		A1: points.G2m[nbits-1][1],
+	}
+	B.Y = fields_bls12377.E2{
+		A0: points.G2m[nbits-1][2],
+		A1: points.G2m[nbits-1][3],
+	}
+	B.Y.Neg(api, B.Y)
+	if cfg.CompleteArithmetic {
+		Acc.AddUnified(api, B)
+	} else {
+		Acc.AddAssign(api, B)
+	}
+
+	if cfg.CompleteArithmetic {
+		Acc.Select(api, selector, g2AffP{X: zero, Y: zero}, Acc)
+	}
+
 	P.X = Acc.X
 	P.Y = Acc.Y
 
diff --git a/std/algebra/native/sw_bls12377/hints.go b/std/algebra/native/sw_bls12377/hints.go
index d34b321af9..d59ef955ef 100644
--- a/std/algebra/native/sw_bls12377/hints.go
+++ b/std/algebra/native/sw_bls12377/hints.go
@@ -1,6 +1,7 @@
 package sw_bls12377
 
 import (
+	"fmt"
 	"math/big"
 
 	"github.com/consensys/gnark-crypto/ecc"
@@ -10,6 +11,7 @@ import (
 func GetHints() []solver.Hint {
 	return []solver.Hint{
 		decomposeScalarG1,
+		decomposeScalarG1Simple,
 		decomposeScalarG2,
 	}
 }
@@ -18,44 +20,71 @@ func init() {
 	solver.RegisterHint(GetHints()...)
 }
 
-func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, res []*big.Int) error {
+func decomposeScalarG1Simple(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	if len(inputs) != 1 {
+		return fmt.Errorf("expecting one input")
+	}
+	if len(outputs) != 2 {
+		return fmt.Errorf("expecting two outputs")
+	}
+	cc := getInnerCurveConfig(scalarField)
+	sp := ecc.SplitScalar(inputs[0], cc.glvBasis)
+	outputs[0].Set(&(sp[0]))
+	outputs[1].Set(&(sp[1]))
+
+	return nil
+}
+
+func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	if len(inputs) != 1 {
+		return fmt.Errorf("expecting one input")
+	}
+	if len(outputs) != 3 {
+		return fmt.Errorf("expecting three outputs")
+	}
 	cc := getInnerCurveConfig(scalarField)
 	sp := ecc.SplitScalar(inputs[0], cc.glvBasis)
-	res[0].Set(&(sp[0]))
-	res[1].Set(&(sp[1]))
+	outputs[0].Set(&(sp[0]))
+	outputs[1].Set(&(sp[1]))
 	one := big.NewInt(1)
 	// add (lambda+1, lambda) until scalar compostion is over Fr to ensure that
 	// the high bits are set in decomposition.
-	for res[0].Cmp(cc.lambda) < 1 && res[1].Cmp(cc.lambda) < 1 {
-		res[0].Add(res[0], cc.lambda)
-		res[0].Add(res[0], one)
-		res[1].Add(res[1], cc.lambda)
+	for outputs[0].Cmp(cc.lambda) < 1 && outputs[1].Cmp(cc.lambda) < 1 {
+		outputs[0].Add(outputs[0], cc.lambda)
+		outputs[0].Add(outputs[0], one)
+		outputs[1].Add(outputs[1], cc.lambda)
 	}
 	// figure out how many times we have overflowed
-	res[2].Mul(res[1], cc.lambda).Add(res[2], res[0])
-	res[2].Sub(res[2], inputs[0])
-	res[2].Div(res[2], cc.fr)
+	outputs[2].Mul(outputs[1], cc.lambda).Add(outputs[2], outputs[0])
+	outputs[2].Sub(outputs[2], inputs[0])
+	outputs[2].Div(outputs[2], cc.fr)
 
 	return nil
 }
 
-func decomposeScalarG2(scalarField *big.Int, inputs []*big.Int, res []*big.Int) error {
+func decomposeScalarG2(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	if len(inputs) != 1 {
+		return fmt.Errorf("expecting one input")
+	}
+	if len(outputs) != 3 {
+		return fmt.Errorf("expecting three outputs")
+	}
 	cc := getInnerCurveConfig(scalarField)
 	sp := ecc.SplitScalar(inputs[0], cc.glvBasis)
-	res[0].Set(&(sp[0]))
-	res[1].Set(&(sp[1]))
+	outputs[0].Set(&(sp[0]))
+	outputs[1].Set(&(sp[1]))
 	one := big.NewInt(1)
 	// add (lambda+1, lambda) until scalar compostion is over Fr to ensure that
 	// the high bits are set in decomposition.
-	for res[0].Cmp(cc.lambda) < 1 && res[1].Cmp(cc.lambda) < 1 {
-		res[0].Add(res[0], cc.lambda)
-		res[0].Add(res[0], one)
-		res[1].Add(res[1], cc.lambda)
+	for outputs[0].Cmp(cc.lambda) < 1 && outputs[1].Cmp(cc.lambda) < 1 {
+		outputs[0].Add(outputs[0], cc.lambda)
+		outputs[0].Add(outputs[0], one)
+		outputs[1].Add(outputs[1], cc.lambda)
 	}
 	// figure out how many times we have overflowed
-	res[2].Mul(res[1], cc.lambda).Add(res[2], res[0])
-	res[2].Sub(res[2], inputs[0])
-	res[2].Div(res[2], cc.fr)
+	outputs[2].Mul(outputs[1], cc.lambda).Add(outputs[2], outputs[0])
+	outputs[2].Sub(outputs[2], inputs[0])
+	outputs[2].Div(outputs[2], cc.fr)
 
 	return nil
 }
diff --git a/std/algebra/native/sw_bls12377/pairing.go b/std/algebra/native/sw_bls12377/pairing.go
index 2cdfbd9b84..fa9febf7c3 100644
--- a/std/algebra/native/sw_bls12377/pairing.go
+++ b/std/algebra/native/sw_bls12377/pairing.go
@@ -277,14 +277,14 @@ func doubleAndAddStep(api frontend.API, p1, p2 *g2AffP) (g2AffP, *lineEvaluation
 	d.Sub(api, p1.X, p2.X)
 	l1.DivUnchecked(api, n, d)
 
-	// x3 =lambda1**2-p1.x-p2.x
-	x3.Square(api, l1).
-		Sub(api, x3, p1.X).
-		Sub(api, x3, p2.X)
+	// x3 =lambda1**2-(p1.x+p2.x)
+	x3.Square(api, l1)
+	n.Add(api, p1.X, p2.X)
+	x3.Sub(api, x3, n)
 
-		// omit y3 computation
+	// omit y3 computation
 
-		// compute line1
+	// compute line1
 	line1.R0 = l1
 	line1.R1.Mul(api, l1, p1.X).Sub(api, line1.R1, p1.Y)
 
@@ -294,10 +294,10 @@ func doubleAndAddStep(api frontend.API, p1, p2 *g2AffP) (g2AffP, *lineEvaluation
 	l2.DivUnchecked(api, n, d)
 	l2.Add(api, l2, l1).Neg(api, l2)
 
-	// compute x4 = lambda2**2-x1-x3
-	x4.Square(api, l2).
-		Sub(api, x4, p1.X).
-		Sub(api, x4, x3)
+	// compute x4 = lambda2**2-(x1+x3)
+	x4.Square(api, l2)
+	n.Add(api, p1.X, x3)
+	x4.Sub(api, x4, n)
 
 	// compute y4 = lambda2*(x1 - x4)-y1
 	y4.Sub(api, p1.X, x4).
@@ -328,9 +328,9 @@ func doubleStep(api frontend.API, p1 *g2AffP) (g2AffP, *lineEvaluation) {
 	l.DivUnchecked(api, n, d)
 
 	// xr = lambda**2-2*p1.x
-	xr.Square(api, l).
-		Sub(api, xr, p1.X).
-		Sub(api, xr, p1.X)
+	xr.Square(api, l)
+	n.MulByFp(api, p1.X, 2)
+	xr.Sub(api, xr, n)
 
 	// yr = lambda*(p.x-xr)-p.y
 	yr.Sub(api, p1.X, xr).
@@ -359,9 +359,9 @@ func linesCompute(api frontend.API, p1, p2 *g2AffP) (*lineEvaluation, *lineEvalu
 	l1.DivUnchecked(api, n, d)
 
 	// x3 =lambda1**2-p1.x-p2.x
-	x3.Square(api, l1).
-		Sub(api, x3, p1.X).
-		Sub(api, x3, p2.X)
+	x3.Square(api, l1)
+	n.Add(api, p1.X, p2.X)
+	x3.Sub(api, x3, n)
 
 	// omit y3 computation
 	// compute line1
diff --git a/std/algebra/native/sw_bls12377/pairing2.go b/std/algebra/native/sw_bls12377/pairing2.go
index 05b00818af..f977ab916d 100644
--- a/std/algebra/native/sw_bls12377/pairing2.go
+++ b/std/algebra/native/sw_bls12377/pairing2.go
@@ -175,13 +175,16 @@ func (c *Curve) MultiScalarMul(P []*G1Affine, scalars []*Scalar, opts ...algopts
 		gamma := c.packScalarToVar(scalars[0])
 		// decompose gamma in the endomorphism eigenvalue basis and bit-decompose the sub-scalars
 		cc := getInnerCurveConfig(c.api.Compiler().Field())
-		sd, err := c.api.Compiler().NewHint(decomposeScalarG1, 3, gamma)
+		sd, err := c.api.Compiler().NewHint(decomposeScalarG1Simple, 2, gamma)
 		if err != nil {
 			panic(err)
 		}
 		gamma1, gamma2 := sd[0], sd[1]
-		c.api.AssertIsEqual(c.api.Add(gamma1, c.api.Mul(gamma2, cc.lambda)), c.api.Add(gamma, c.api.Mul(cc.fr, sd[2])))
-		nbits := cc.lambda.BitLen() + 1
+		c.api.AssertIsEqual(
+			c.api.Add(gamma1, c.api.Mul(gamma2, cc.lambda)),
+			gamma,
+		)
+		nbits := cc.lambda.BitLen()
 		gamma1Bits := c.api.ToBinary(gamma1, nbits)
 		gamma2Bits := c.api.ToBinary(gamma2, nbits)
 
diff --git a/std/algebra/native/sw_bls24315/g1.go b/std/algebra/native/sw_bls24315/g1.go
index 2f21900e78..d57481bd92 100644
--- a/std/algebra/native/sw_bls24315/g1.go
+++ b/std/algebra/native/sw_bls24315/g1.go
@@ -158,29 +158,30 @@ func (P *G1Affine) ScalarMul(api frontend.API, Q G1Affine, s interface{}, opts .
 	}
 }
 
-// varScalarMul sets P = [s] Q and returns P.
+// varScalarMul sets P = [s]Q and returns P. It doesn't modify Q nor s.
+// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C).
+//
+// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// (0,0) is not on the curve but we conventionally take it as the
+// neutral/infinity point as per the [EVM].
+//
+// [Halo]: https://eprint.iacr.org/2019/1021.pdf
+// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
 func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine {
 	cfg, err := algopts.NewConfig(opts...)
 	if err != nil {
 		panic(err)
 	}
-	// This method computes [s] Q. We use several methods to reduce the number
-	// of added constraints - first, instead of classical double-and-add, we use
-	// the optimized version from https://github.com/zcash/zcash/issues/3924
-	// which allows to omit computation of several intermediate values.
-	// Secondly, we use the GLV scalar multiplication to reduce the number
-	// iterations in the main loop. There is a small difference though - as
-	// two-bit select takes three constraints, then it takes as many constraints
-	// to compute ± Q ± Φ(Q) every iteration instead of selecting the value
-	// from a precomputed table. However, precomputing the table adds 12
-	// additional constraints and thus table-version is more expensive than
-	// addition-version.
 	var selector frontend.Variable
 	if cfg.CompleteArithmetic {
 		// if Q=(0,0) we assign a dummy (1,1) to Q and continue
 		selector = api.And(api.IsZero(Q.X), api.IsZero(Q.Y))
 		Q.Select(api, selector, G1Affine{X: 1, Y: 1}, Q)
 	}
+
+	// We use the endomorphism à la GLV to compute [s]Q as
+	// 		[s1]Q + [s2]Φ(Q)
+	//
 	// The context we are working is based on the `outer` curve. However, the
 	// points and the operations on the points are performed on the `inner`
 	// curve of the outer curve. We require some parameters from the inner
@@ -190,31 +191,23 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl
 	// the hints allow to decompose the scalar s into s1 and s2 such that
 	//     s1 + λ * s2 == s mod r,
 	// where λ is third root of one in 𝔽_r.
-	sd, err := api.Compiler().NewHint(decomposeScalarG1, 3, s)
+	sd, err := api.Compiler().NewHint(decomposeScalarG1Simple, 3, s)
 	if err != nil {
 		// err is non-nil only for invalid number of inputs
 		panic(err)
 	}
 	s1, s2 := sd[0], sd[1]
 
-	// when we split scalar, then s1, s2 < lambda by default. However, to have
-	// the high 1-2 bits of s1, s2 set, the hint functions compute the
-	// decomposition for
-	//     s + k*r (for some k)
-	// instead and omits the last reduction. Thus, to constrain s1 and s2, we
-	// have to assert that
-	//     s1 + λ * s2 == s + k*r
-	api.AssertIsEqual(api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2])))
-
-	// As the decomposed scalars are not fully reduced, then in addition of
-	// having the high bit set, an overflow bit may also be set. Thus, the total
-	// number of bits may be one more than the bitlength of λ.
-	nbits := cc.lambda.BitLen() + 1
+	// s1 + λ * s2 == s mod r
+	api.AssertIsEqual(
+		api.Add(s1, api.Mul(s2, cc.lambda)),
+		api.Add(s, api.Mul(cc.fr, sd[2])),
+	)
 
+	nbits := 127
 	s1bits := api.ToBinary(s1, nbits)
 	s2bits := api.ToBinary(s2, nbits)
 
-	var Acc /*accumulator*/, B, B2 /*tmp vars*/ G1Affine
 	// precompute -Q, -Φ(Q), Φ(Q)
 	var tableQ, tablePhiQ [2]G1Affine
 	tableQ[1] = Q
@@ -222,45 +215,51 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl
 	cc.phi1(api, &tablePhiQ[1], &Q)
 	tablePhiQ[0].Neg(api, tablePhiQ[1])
 
-	// We now initialize the accumulator. Due to the way the scalar is
-	// decomposed, either the high bits of s1 or s2 are set and we can use the
-	// incomplete addition laws.
-
-	// Acc = Q + Φ(Q) = -Φ²(Q)
+	// we suppose that the first bits of the sub-scalars are 1 and set:
+	// 		Acc = Q + Φ(Q) = -Φ²(Q)
+	var Acc, B G1Affine
 	cc.phi2Neg(api, &Acc, &Q)
 
-	// However, we can not directly add step value conditionally as we may get
-	// to incomplete path of the addition formula. We either add or subtract
-	// step value from [2] Acc (instead of conditionally adding step value to
-	// Acc):
-	//     Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q)
-	// only y coordinate differs for negation, select on that instead.
-	// first bit
-	B.X = tableQ[0].X
-	B.Y = api.Select(s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y = api.Select(s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	// second bit
-	B.X = tableQ[0].X
-	B.Y = api.Select(s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y = api.Select(s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	B2.X = tablePhiQ[0].X
-	for i := nbits - 3; i > 0; i-- {
-		B.X = Q.X
-		B.Y = api.Select(s1bits[i], tableQ[1].Y, tableQ[0].Y)
-		B2.Y = api.Select(s2bits[i], tablePhiQ[1].Y, tablePhiQ[0].Y)
-		B.AddAssign(api, B2)
+	// At each iteration we need to compute:
+	// 		[2]Acc ± Q ± Φ(Q).
+	// We can compute [2]Acc and look up the (precomputed) point B from:
+	// 		B1 = +Q + Φ(Q)
+	B1 := Acc
+	// 		B2 = -Q - Φ(Q)
+	B2 := G1Affine{}
+	B2.Neg(api, B1)
+	// 		B3 = +Q - Φ(Q)
+	B3 := tableQ[1]
+	B3.AddAssign(api, tablePhiQ[0])
+	// 		B4 = -Q + Φ(Q)
+	B4 := G1Affine{}
+	B4.Neg(api, B3)
+	//
+	// Note that half the points are negatives of the other half,
+	// hence have the same X coordinates.
+
+	// However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen
+	// that Acc==B or -B. So we add the point H=(0,1) on BLS12-377 of order 2
+	// to it to avoid incomplete additions in the loop by forcing Acc to be
+	// different than the stored B.  Normally, the point H should be "killed
+	// out" by the first doubling in the loop and the result will remain
+	// unchanged. However, we are using affine coordinates that do not encode
+	// the infinity point. Given the affine formulae, doubling (0,1) results in
+	// (0,-1). Since the loop size N=nbits-1 is even we need to subtract
+	// [2^N]H = (0,1) from the result at the end.
+	//
+	// Acc = Q + Φ(Q) + H
+	Acc.AddAssign(api, G1Affine{X: 0, Y: 1})
+
+	for i := nbits - 1; i > 0; i-- {
+		B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X)
+		B.Y = api.Lookup2(s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y)
+		// Acc = [2]Acc + B
 		Acc.DoubleAndAdd(api, &Acc, &B)
 	}
 
 	// i = 0
+	// subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0.
 	// When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means
 	// when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0).
 	if cfg.CompleteArithmetic {
@@ -276,6 +275,15 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl
 		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
 	}
 
+	if cfg.CompleteArithmetic {
+		// subtract [2^N]G = (0,1) since we added H at the beginning
+		Acc.AddUnified(api, G1Affine{X: 0, Y: -1})
+		Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc)
+	} else {
+		// subtract [2^N]G = (0,1) since we added H at the beginning
+		Acc.AddAssign(api, G1Affine{X: 0, Y: -1})
+	}
+
 	P.X = Acc.X
 	P.Y = Acc.Y
 
@@ -424,8 +432,25 @@ func (P *G1Affine) ScalarMulBase(api frontend.API, s frontend.Variable, opts ...
 	return P.ScalarMul(api, generator, s, opts...)
 }
 
+func (P *G1Affine) jointScalarMul(api frontend.API, Q, R G1Affine, s, t frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine {
+	cfg, err := algopts.NewConfig(opts...)
+	if err != nil {
+		panic(err)
+	}
+	if cfg.CompleteArithmetic {
+		// TODO @yelhousni: optimize
+		var tmp G1Affine
+		P.ScalarMul(api, Q, s, opts...)
+		tmp.ScalarMul(api, R, t, opts...)
+		P.AddUnified(api, tmp)
+	} else {
+		P.jointScalarMulUnsafe(api, Q, R, s, t)
+	}
+	return P
+}
+
 // P = [s]Q + [t]R using Shamir's trick
-func (P *G1Affine) jointScalarMul(api frontend.API, Q, R G1Affine, s, t frontend.Variable) *G1Affine {
+func (P *G1Affine) jointScalarMulUnsafe(api frontend.API, Q, R G1Affine, s, t frontend.Variable) *G1Affine {
 	cc := getInnerCurveConfig(api.Compiler().Field())
 
 	sd, err := api.Compiler().NewHint(decomposeScalarG1, 3, s)
@@ -510,12 +535,30 @@ func (P *G1Affine) jointScalarMul(api frontend.API, Q, R G1Affine, s, t frontend
 	return P
 }
 
-// scalarBitsMul computes s * p and returns it where sBits is the bit decomposition of s. It doesn't modify p nor sBits.
+// scalarBitsMul computes [s]Q and returns it where sBits is the bit decomposition of s. It doesn't modify Q nor sBits.
 // The method is similar to varScalarMul.
-func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []frontend.Variable) *G1Affine {
+func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine {
+	cfg, err := algopts.NewConfig(opts...)
+	if err != nil {
+		panic(err)
+	}
+	var selector frontend.Variable
+	if cfg.CompleteArithmetic {
+		// if Q=(0,0) we assign a dummy (1,1) to Q and continue
+		selector = api.And(api.IsZero(Q.X), api.IsZero(Q.Y))
+		Q.Select(api, selector, G1Affine{X: 1, Y: 1}, Q)
+	}
+
+	// We use the endomorphism à la GLV to compute [s]Q as
+	// 		[s1]Q + [s2]Φ(Q)
+	//
+	// The context we are working is based on the `outer` curve. However, the
+	// points and the operations on the points are performed on the `inner`
+	// curve of the outer curve. We require some parameters from the inner
+	// curve.
 	cc := getInnerCurveConfig(api.Compiler().Field())
-	nbits := cc.lambda.BitLen() + 1
-	var Acc /*accumulator*/, B, B2 /*tmp vars*/ G1Affine
+	nbits := 127
+
 	// precompute -Q, -Φ(Q), Φ(Q)
 	var tableQ, tablePhiQ [2]G1Affine
 	tableQ[1] = Q
@@ -523,48 +566,75 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []
 	cc.phi1(api, &tablePhiQ[1], &Q)
 	tablePhiQ[0].Neg(api, tablePhiQ[1])
 
-	// We now initialize the accumulator. Due to the way the scalar is
-	// decomposed, either the high bits of s1 or s2 are set and we can use the
-	// incomplete addition laws.
-
-	//     Acc = Q + Φ(Q)
-	Acc = tableQ[1]
-	Acc.AddAssign(api, tablePhiQ[1])
-
-	// However, we can not directly add step value conditionally as we may get
-	// to incomplete path of the addition formula. We either add or subtract
-	// step value from [2] Acc (instead of conditionally adding step value to
-	// Acc):
-	//     Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q)
-	// only y coordinate differs for negation, select on that instead.
-	B.X = tableQ[0].X
-	B.Y = api.Select(s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y = api.Select(s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	// second bit
-	B.X = tableQ[0].X
-	B.Y = api.Select(s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y = api.Select(s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	B2.X = tablePhiQ[0].X
-	for i := nbits - 3; i > 0; i-- {
-		B.X = Q.X
-		B.Y = api.Select(s1bits[i], tableQ[1].Y, tableQ[0].Y)
-		B2.Y = api.Select(s2bits[i], tablePhiQ[1].Y, tablePhiQ[0].Y)
-		B.AddAssign(api, B2)
+	// we suppose that the first bits of the sub-scalars are 1 and set:
+	// 		Acc = Q + Φ(Q) = -Φ²(Q)
+	var Acc, B G1Affine
+	cc.phi2Neg(api, &Acc, &Q)
+
+	// At each iteration we need to compute:
+	// 		[2]Acc ± Q ± Φ(Q).
+	// We can compute [2]Acc and look up the (precomputed) point B from:
+	// 		B1 = +Q + Φ(Q)
+	B1 := Acc
+	// 		B2 = -Q - Φ(Q)
+	B2 := G1Affine{}
+	B2.Neg(api, B1)
+	// 		B3 = +Q - Φ(Q)
+	B3 := tableQ[1]
+	B3.AddAssign(api, tablePhiQ[0])
+	// 		B4 = -Q + Φ(Q)
+	B4 := G1Affine{}
+	B4.Neg(api, B3)
+	//
+	// Note that half the points are negatives of the other half,
+	// hence have the same X coordinates.
+
+	// However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen
+	// that Acc==B or -B. So we add the point H=(0,1) on BLS12-377 of order 2
+	// to it to avoid incomplete additions in the loop by forcing Acc to be
+	// different than the stored B.  Normally, the point H should be "killed
+	// out" by the first doubling in the loop and the result will remain
+	// unchanged. However, we are using affine coordinates that do not encode
+	// the infinity point. Given the affine formulae, doubling (0,1) results in
+	// (0,-1). Since the loop size N=nbits-1 is even we need to subtract
+	// [2^N]H = (0,1) from the result at the end.
+	//
+	// Acc = Q + Φ(Q) + H
+	Acc.AddAssign(api, G1Affine{X: 0, Y: 1})
+
+	for i := nbits - 1; i > 0; i-- {
+		B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X)
+		B.Y = api.Lookup2(s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y)
+		// Acc = [2]Acc + B
 		Acc.DoubleAndAdd(api, &Acc, &B)
 	}
 
-	tableQ[0].AddAssign(api, Acc)
-	Acc.Select(api, s1bits[0], Acc, tableQ[0])
-	tablePhiQ[0].AddAssign(api, Acc)
-	Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
+	// i = 0
+	// subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0.
+	// When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means
+	// when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0).
+	if cfg.CompleteArithmetic {
+		tableQ[0].AddUnified(api, Acc)
+		Acc.Select(api, s1bits[0], Acc, tableQ[0])
+		tablePhiQ[0].AddUnified(api, Acc)
+		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
+		Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc)
+	} else {
+		tableQ[0].AddAssign(api, Acc)
+		Acc.Select(api, s1bits[0], Acc, tableQ[0])
+		tablePhiQ[0].AddAssign(api, Acc)
+		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
+	}
+
+	if cfg.CompleteArithmetic {
+		// subtract [2^N]G = (0,1) since we added H at the beginning
+		Acc.AddUnified(api, G1Affine{X: 0, Y: -1})
+		Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc)
+	} else {
+		// subtract [2^N]G = (0,1) since we added H at the beginning
+		Acc.AddAssign(api, G1Affine{X: 0, Y: -1})
+
+	}
 
 	P.X = Acc.X
 	P.Y = Acc.Y
diff --git a/std/algebra/native/sw_bls24315/g1_test.go b/std/algebra/native/sw_bls24315/g1_test.go
index 629647bc96..4387f94c05 100644
--- a/std/algebra/native/sw_bls24315/g1_test.go
+++ b/std/algebra/native/sw_bls24315/g1_test.go
@@ -26,6 +26,7 @@ import (
 	"github.com/consensys/gnark/frontend"
 	"github.com/consensys/gnark/std/algebra/algopts"
 	"github.com/consensys/gnark/std/math/emulated"
+	"github.com/consensys/gnark/std/math/emulated/emparams"
 	"github.com/consensys/gnark/test"
 
 	bls24315 "github.com/consensys/gnark-crypto/ecc/bls24-315"
@@ -456,6 +457,114 @@ func TestVarScalarMulBaseG1(t *testing.T) {
 	assert.CheckCircuit(&circuit, test.WithValidAssignment(&witness), test.WithCurves(ecc.BW6_633), test.NoProverChecks())
 }
 
+type MultiScalarMulEdgeCasesTest struct {
+	Points  []G1Affine
+	Scalars []emulated.Element[ScalarField]
+	Res     G1Affine
+}
+
+func (c *MultiScalarMulEdgeCasesTest) Define(api frontend.API) error {
+	cr, err := NewCurve(api)
+	if err != nil {
+		return err
+	}
+	ps := make([]*G1Affine, len(c.Points))
+	for i := range c.Points {
+		ps[i] = &c.Points[i]
+	}
+	ss := make([]*emulated.Element[ScalarField], len(c.Scalars))
+	for i := range c.Scalars {
+		ss[i] = &c.Scalars[i]
+	}
+	res, err := cr.MultiScalarMul(ps, ss, algopts.WithCompleteArithmetic())
+	if err != nil {
+		return err
+	}
+	cr.AssertIsEqual(res, &c.Res)
+	return nil
+}
+
+func TestMultiScalarMulEdgeCases(t *testing.T) {
+	assert := test.NewAssert(t)
+	nbLen := 5
+	P := make([]bls24315.G1Affine, nbLen)
+	S := make([]fr.Element, nbLen)
+	for i := 0; i < nbLen; i++ {
+		S[i].SetRandom()
+		P[i].ScalarMultiplicationBase(S[i].BigInt(new(big.Int)))
+	}
+	var res, infinity bls24315.G1Affine
+	_, err := res.MultiExp(P, S, ecc.MultiExpConfig{})
+
+	assert.NoError(err)
+	cP := make([]G1Affine, len(P))
+	cS := make([]emulated.Element[ScalarField], len(S))
+
+	// s1 * (0,0) + s2 * (0,0) + s3 * (0,0) + s4 * (0,0)  + s5 * (0,0) == (0,0)
+	for i := range cP {
+		cP[i] = NewG1Affine(infinity)
+	}
+	for i := range cS {
+		cS[i] = NewScalar(S[i])
+	}
+	assignment1 := MultiScalarMulEdgeCasesTest{
+		Points:  cP,
+		Scalars: cS,
+		Res:     NewG1Affine(infinity),
+	}
+	err = test.IsSolved(&MultiScalarMulEdgeCasesTest{
+		Points:  make([]G1Affine, nbLen),
+		Scalars: make([]emulated.Element[ScalarField], nbLen),
+	}, &assignment1, ecc.BW6_633.ScalarField())
+	assert.NoError(err)
+
+	// 0 * P1 + 0 * P2 + 0 * P3 + 0 * P4 + 0 * P5 == (0,0)
+	for i := range cP {
+		cP[i] = NewG1Affine(P[i])
+	}
+	for i := range cS {
+		cS[i] = emulated.ValueOf[emparams.BLS24315Fr](0)
+	}
+	assignment2 := MultiScalarMulEdgeCasesTest{
+		Points:  cP,
+		Scalars: cS,
+		Res:     NewG1Affine(infinity),
+	}
+	err = test.IsSolved(&MultiScalarMulEdgeCasesTest{
+		Points:  make([]G1Affine, nbLen),
+		Scalars: make([]emulated.Element[ScalarField], nbLen),
+	}, &assignment2, ecc.BW6_633.ScalarField())
+	assert.NoError(err)
+
+	// s1 * (0,0) + s2 * P2 + s3 * (0,0) + s4 * P4 + 0 * P5 == s2 * P + s4 * P4
+	var res3 bls24315.G1Affine
+	res3.ScalarMultiplication(&P[1], S[1].BigInt(new(big.Int)))
+	res.ScalarMultiplication(&P[3], S[3].BigInt(new(big.Int)))
+	res3.Add(&res3, &res)
+	for i := range cP {
+		cP[i] = NewG1Affine(P[i])
+	}
+	cP[0].X = infinity.X
+	cP[0].Y = infinity.Y
+	cP[2].X = infinity.X
+	cP[2].Y = infinity.Y
+	for i := range cS {
+		cS[i] = NewScalar(S[i])
+	}
+	cS[4] = emulated.ValueOf[emparams.BLS24315Fr](0)
+
+	assignment3 := MultiScalarMulEdgeCasesTest{
+		Points:  cP,
+		Scalars: cS,
+		Res:     NewG1Affine(res3),
+	}
+	err = test.IsSolved(&MultiScalarMulEdgeCasesTest{
+		Points:  make([]G1Affine, nbLen),
+		Scalars: make([]emulated.Element[ScalarField], nbLen),
+	}, &assignment3, ecc.BW6_633.ScalarField())
+	assert.NoError(err)
+}
+
 type MultiScalarMulTest struct {
 	Points  []G1Affine
 	Scalars []emulated.Element[ScalarField]
@@ -516,6 +625,61 @@ func TestMultiScalarMul(t *testing.T) {
 	assert.NoError(err)
 }
 
+type g1JointScalarMulEdgeCases struct {
+	A, B G1Affine
+	C    G1Affine `gnark:",public"`
+	R, S frontend.Variable
+}
+
+func (circuit *g1JointScalarMulEdgeCases) Define(api frontend.API) error {
+	expected1 := G1Affine{}
+	expected2 := G1Affine{}
+	expected3 := G1Affine{}
+	expected4 := G1Affine{}
+	infinity := G1Affine{X: 0, Y: 0}
+	expected1.jointScalarMul(api, infinity, infinity, circuit.R, circuit.S, algopts.WithCompleteArithmetic())
+	expected2.jointScalarMul(api, circuit.A, circuit.B, big.NewInt(0), big.NewInt(0), algopts.WithCompleteArithmetic())
+	expected3.jointScalarMul(api, circuit.A, infinity, circuit.R, circuit.S, algopts.WithCompleteArithmetic())
+	expected4.jointScalarMul(api, circuit.A, circuit.B, circuit.R, big.NewInt(0), algopts.WithCompleteArithmetic())
+	_expected := G1Affine{}
+	_expected.ScalarMul(api, circuit.A, circuit.R, algopts.WithCompleteArithmetic())
+	expected1.AssertIsEqual(api, infinity)
+	expected2.AssertIsEqual(api, infinity)
+	expected3.AssertIsEqual(api, _expected)
+	expected4.AssertIsEqual(api, _expected)
+	return nil
+}
+
+func TestJointScalarMulG1EdgeCases(t *testing.T) {
+	// sample random point
+	_a := randomPointG1()
+	_b := randomPointG1()
+	var a, b, c bls24315.G1Affine
+	a.FromJacobian(&_a)
+	b.FromJacobian(&_b)
+
+	// create the cs
+	var circuit, witness g1JointScalarMulEdgeCases
+	var r, s fr.Element
+	_, _ = r.SetRandom()
+	_, _ = s.SetRandom()
+	witness.R = r.String()
+	witness.S = s.String()
+	// assign the inputs
+	witness.A.Assign(&a)
+	witness.B.Assign(&b)
+	// compute the result
+	var br, bs big.Int
+	_a.ScalarMultiplication(&_a, r.BigInt(&br))
+	_b.ScalarMultiplication(&_b, s.BigInt(&bs))
+	_a.AddAssign(&_b)
+	c.FromJacobian(&_a)
+	witness.C.Assign(&c)
+
+	assert := test.NewAssert(t)
+	assert.CheckCircuit(&circuit, test.WithValidAssignment(&witness), test.WithCurves(ecc.BW6_633))
+}
+
 type g1JointScalarMul struct {
 	A, B G1Affine
 	C    G1Affine `gnark:",public"`
@@ -616,3 +780,156 @@ func randomPointG1() bls24315.G1Jac {
 
 	return p1
 }
+
+type MultiScalarMulFoldedEdgeCasesTest struct {
+	Points  []G1Affine
+	Scalars []emulated.Element[ScalarField]
+	Res     G1Affine
+}
+
+func (c *MultiScalarMulFoldedEdgeCasesTest) Define(api frontend.API) error {
+	cr, err := NewCurve(api)
+	if err != nil {
+		return err
+	}
+	ps := make([]*G1Affine, len(c.Points))
+	for i := range c.Points {
+		ps[i] = &c.Points[i]
+	}
+	ss := make([]*emulated.Element[ScalarField], len(c.Scalars))
+	for i := range c.Scalars {
+		ss[i] = &c.Scalars[i]
+	}
+	res, err := cr.MultiScalarMul(ps, ss, algopts.WithFoldingScalarMul(), algopts.WithCompleteArithmetic())
+	if err != nil {
+		return err
+	}
+	cr.AssertIsEqual(res, &c.Res)
+	return nil
+}
+
+func TestMultiScalarMulFoldedEdgeCases(t *testing.T) {
+	assert := test.NewAssert(t)
+	nbLen := 5
+	P := make([]bls24315.G1Affine, nbLen)
+	S := make([]fr.Element, nbLen)
+	S[0].SetOne()
+	S[1].SetRandom()
+	S[2].Square(&S[1])
+	S[3].Mul(&S[1], &S[2])
+	S[4].Mul(&S[1], &S[3])
+	for i := 0; i < nbLen; i++ {
+		P[i].ScalarMultiplicationBase(S[i].BigInt(new(big.Int)))
+	}
+	var res, infinity bls24315.G1Affine
+	_, err := res.MultiExp(P, S, ecc.MultiExpConfig{})
+
+	assert.NoError(err)
+	cP := make([]G1Affine, len(P))
+	cS := make([]emulated.Element[ScalarField], len(S))
+
+	// s^0 * (0,0) + s^1 * (0,0) + s^2 * (0,0) + s^3 * (0,0)  + s^4 * (0,0) == (0,0)
+	for i := range cP {
+		cP[i] = NewG1Affine(infinity)
+	}
+	// s0 = s
+	S[0].Set(&S[1])
+	for i := range cS {
+		cS[i] = NewScalar(S[i])
+	}
+	assignment1 := MultiScalarMulFoldedEdgeCasesTest{
+		Points:  cP,
+		Scalars: cS,
+		Res:     NewG1Affine(infinity),
+	}
+	err = test.IsSolved(&MultiScalarMulFoldedEdgeCasesTest{
+		Points:  make([]G1Affine, nbLen),
+		Scalars: make([]emulated.Element[ScalarField], nbLen),
+	}, &assignment1, ecc.BW6_633.ScalarField())
+	assert.NoError(err)
+
+	// 0^0 * P1 + 0 * P2 + 0 * P3 + 0 * P4 + 0 * P5 == P1
+	for i := range cP {
+		cP[i] = NewG1Affine(P[i])
+	}
+	for i := range cS {
+		cS[i] = emulated.ValueOf[emparams.BLS24315Fr](0)
+	}
+
+	assignment3 := MultiScalarMulFoldedEdgeCasesTest{
+		Points:  cP,
+		Scalars: cS,
+		Res:     NewG1Affine(P[0]),
+	}
+	err = test.IsSolved(&MultiScalarMulFoldedEdgeCasesTest{
+		Points:  make([]G1Affine, nbLen),
+		Scalars: make([]emulated.Element[ScalarField], nbLen),
+	}, &assignment3, ecc.BW6_633.ScalarField())
+	assert.NoError(err)
+}
+
+type MultiScalarMulFoldedTest struct {
+	Points  []G1Affine
+	Scalars []emulated.Element[ScalarField]
+	Res     G1Affine
+}
+
+func (c *MultiScalarMulFoldedTest) Define(api frontend.API) error {
+	cr, err := NewCurve(api)
+	if err != nil {
+		return err
+	}
+	ps := make([]*G1Affine, len(c.Points))
+	for i := range c.Points {
+		ps[i] = &c.Points[i]
+	}
+	ss := make([]*emulated.Element[ScalarField], len(c.Scalars))
+	for i := range c.Scalars {
+		ss[i] = &c.Scalars[i]
+	}
+	res, err := cr.MultiScalarMul(ps, ss, algopts.WithFoldingScalarMul())
+	if err != nil {
+		return err
+	}
+	cr.AssertIsEqual(res, &c.Res)
+	return nil
+}
+
+func TestMultiScalarMulFolded(t *testing.T) {
+	assert := test.NewAssert(t)
+	nbLen := 4
+	P := make([]bls24315.G1Affine, nbLen)
+	S := make([]fr.Element, nbLen)
+	// [s^0]P0 + [s^1]P1 + [s^2]P2 + [s^3]P3 = P0 + [s]P1 + [s^2]P2 + [s^3]P3
+	S[0].SetOne()
+	S[1].SetRandom()
+	S[2].Square(&S[1])
+	S[3].Mul(&S[1], &S[2])
+	for i := 0; i < nbLen; i++ {
+		P[i].ScalarMultiplicationBase(S[i].BigInt(new(big.Int)))
+	}
+	var res bls24315.G1Affine
+	_, err := res.MultiExp(P, S, ecc.MultiExpConfig{})
+
+	assert.NoError(err)
+	cP := make([]G1Affine, len(P))
+	for i := range cP {
+		cP[i] = NewG1Affine(P[i])
+	}
+	cS := make([]emulated.Element[ScalarField], len(S))
+	// s0 = s
+	S[0].Set(&S[1])
+	for i := range cS {
+		cS[i] = NewScalar(S[i])
+	}
+	assignment := MultiScalarMulFoldedTest{
+		Points:  cP,
+		Scalars: cS,
+		Res:     NewG1Affine(res),
+	}
+	err = test.IsSolved(&MultiScalarMulFoldedTest{
+		Points:  make([]G1Affine, nbLen),
+		Scalars: make([]emulated.Element[ScalarField], nbLen),
+	}, &assignment, ecc.BW6_633.ScalarField())
+	assert.NoError(err)
+}
diff --git a/std/algebra/native/sw_bls24315/g2.go b/std/algebra/native/sw_bls24315/g2.go
index f8d64acf8d..85aa37cf8f 100644
--- a/std/algebra/native/sw_bls24315/g2.go
+++ b/std/algebra/native/sw_bls24315/g2.go
@@ -167,30 +167,34 @@ func (P *g2AffP) ScalarMul(api frontend.API, Q g2AffP, s interface{}, opts ...al
 	}
 }
 
-// varScalarMul sets P = [s] Q and returns P.
+// varScalarMul sets P = [s]Q and returns P. It doesn't modify Q nor s.
+// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C).
+//
+// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// (0,0) is not on the curve but we conventionally take it as the
+// neutral/infinity point as per the [EVM].
+//
+// [Halo]: https://eprint.iacr.org/2019/1021.pdf
+// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
 func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, opts ...algopts.AlgebraOption) *g2AffP {
 	cfg, err := algopts.NewConfig(opts...)
 	if err != nil {
 		panic(err)
 	}
-	// This method computes [s] Q. We use several methods to reduce the number
-	// of added constraints - first, instead of classical double-and-add, we use
-	// the optimized version from https://github.com/zcash/zcash/issues/3924
-	// which allows to omit computation of several intermediate values.
-	// Secondly, we use the GLV scalar multiplication to reduce the number
-	// iterations in the main loop. There is a small difference though - as
-	// two-bit select takes three constraints, then it takes as many constraints
-	// to compute ± Q ± Φ(Q) every iteration instead of selecting the value
-	// from a precomputed table. However, precomputing the table adds 12
-	// additional constraints and thus table-version is more expensive than
-	// addition-version.
 	var selector frontend.Variable
+	oneE2 := fields_bls24315.E2{A0: 1, A1: 0}
+	zeroE2 := fields_bls24315.E2{A0: 0, A1: 0}
+	zeroE4 := fields_bls24315.E4{B0: zeroE2, B1: zeroE2}
+	oneE4 := fields_bls24315.E4{B0: oneE2, B1: zeroE2}
 	if cfg.CompleteArithmetic {
 		// if Q=(0,0) we assign a dummy (1,1) to Q and continue
 		selector = api.And(Q.X.IsZero(api), Q.Y.IsZero(api))
-		one := fields_bls24315.E4{B0: fields_bls24315.E2{A0: 1, A1: 0}, B1: fields_bls24315.E2{A0: 0, A1: 0}}
-		Q.Select(api, selector, g2AffP{X: one, Y: one}, Q)
+		Q.Select(api, selector, g2AffP{X: oneE4, Y: oneE4}, Q)
 	}
+
+	// We use the endomorphism à la GLV to compute [s]Q as
+	// 		[s1]Q + [s2]Φ(Q)
+	//
 	// The context we are working is based on the `outer` curve. However, the
 	// points and the operations on the points are performed on the `inner`
 	// curve of the outer curve. We require some parameters from the inner
@@ -200,31 +204,23 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o
 	// the hints allow to decompose the scalar s into s1 and s2 such that
 	//     s1 + λ * s2 == s mod r,
 	// where λ is third root of one in 𝔽_r.
-	sd, err := api.Compiler().NewHint(decomposeScalarG2, 3, s)
+	sd, err := api.Compiler().NewHint(decomposeScalarG1Simple, 3, s)
 	if err != nil {
 		// err is non-nil only for invalid number of inputs
 		panic(err)
 	}
 	s1, s2 := sd[0], sd[1]
 
-	// when we split scalar, then s1, s2 < lambda by default. However, to have
-	// the high 1-2 bits of s1, s2 set, the hint functions compute the
-	// decomposition for
-	//     s + k*r (for some k)
-	// instead and omits the last reduction. Thus, to constrain s1 and s2, we
-	// have to assert that
-	//     s1 + λ * s2 == s + k*r
-	api.AssertIsEqual(api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2])))
-
-	// As the decomposed scalars are not fully reduced, then in addition of
-	// having the high bit set, an overflow bit may also be set. Thus, the total
-	// number of bits may be one more than the bitlength of λ.
-	nbits := cc.lambda.BitLen() + 1
+	// s1 + λ * s2 == s mod r,
+	api.AssertIsEqual(
+		api.Add(s1, api.Mul(s2, cc.lambda)),
+		api.Add(s, api.Mul(cc.fr, sd[2])),
+	)
 
+	nbits := 127
 	s1bits := api.ToBinary(s1, nbits)
 	s2bits := api.ToBinary(s2, nbits)
 
-	var Acc, B, B1, B2, B3, B4 g2AffP
 	// precompute -Q, -Φ(Q), Φ(Q)
 	var tableQ, tablePhiQ [2]g2AffP
 	tableQ[1] = Q
@@ -232,49 +228,71 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o
 	cc.phi2(api, &tablePhiQ[1], &Q)
 	tablePhiQ[0].Neg(api, tablePhiQ[1])
 
-	// We now initialize the accumulator. Due to the way the scalar is
-	// decomposed, either the high bits of s1 or s2 are set and we can use the
-	// incomplete addition laws.
-
-	// Acc = Q + Φ(Q) = B1
+	// we suppose that the first bits of the sub-scalars are 1 and set:
+	// 		Acc = Q + Φ(Q) = -Φ²(Q)
+	var Acc, B g2AffP
 	cc.phi1Neg(api, &Acc, &Q)
-	B1 = Acc
-
-	// However, we can not directly add step value conditionally as we may get
-	// to incomplete path of the addition formula. We either add or subtract
-	// step value from [2] Acc (instead of conditionally adding step value to
-	// Acc):
-	//     Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q)
-	// only y coordinate differs for negation, select on that instead.
-	B.X = tableQ[0].X
-	B.Y.Select(api, s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y.Select(api, s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	// second bit
-	B.X = tableQ[0].X
-	B.Y.Select(api, s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y)
-	Acc.DoubleAndAdd(api, &Acc, &B)
-	B.X = tablePhiQ[0].X
-	B.Y.Select(api, s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y)
-	Acc.AddAssign(api, B)
-
-	// B2 = -Q-Φ(Q)
+
+	// At each iteration we need to compute:
+	// 		[2]Acc ± Q ± Φ(Q).
+	// We can compute [2]Acc and look up the (precomputed) point B from:
+	// 		B1 = +Q + Φ(Q)
+	B1 := Acc
+	// 		B2 = -Q - Φ(Q)
+	B2 := g2AffP{}
 	B2.Neg(api, B1)
-	// B3 = Q-Φ(Q)
-	B3 = tablePhiQ[0]
-	B3.AddAssign(api, tableQ[1])
-	// B4 = -Q+Φ(Q)
+	// 		B3 = +Q - Φ(Q)
+	B3 := tableQ[1]
+	B3.AddAssign(api, tablePhiQ[0])
+	// 		B4 = -Q + Φ(Q)
+	B4 := g2AffP{}
 	B4.Neg(api, B3)
-	for i := nbits - 3; i > 0; i-- {
+	//
+	// Note that half the points are negatives of the other half,
+	// hence have the same X coordinates.
+
+	// However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen
+	// that Acc==B or -B. So we add the base point G to it to avoid incomplete
+	// additions in the loop by forcing Acc to be different than the stored B.
+	// However we need at the end to subtract [2^nbits]G or conditionally
+	// [2^nbits]Φ²(G) from the result.
+	//
+	// Acc = Q + Φ(Q) + G
+	points := getTwistPoints()
+	Acc.AddAssign(api,
+		g2AffP{
+			X: fields_bls24315.E4{
+				B0: fields_bls24315.E2{
+					A0: points.G2x[0],
+					A1: points.G2x[1],
+				},
+				B1: fields_bls24315.E2{
+					A0: points.G2x[2],
+					A1: points.G2x[3],
+				},
+			},
+			Y: fields_bls24315.E4{
+				B0: fields_bls24315.E2{
+					A0: points.G2y[0],
+					A1: points.G2y[1],
+				},
+				B1: fields_bls24315.E2{
+					A0: points.G2y[2],
+					A1: points.G2y[3],
+				},
+			},
+		},
+	)
+
+	for i := nbits - 1; i > 0; i-- {
 		B.X.Select(api, api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X)
 		B.Y.Lookup2(api, s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y)
+		// Acc = [2]Acc + B
 		Acc.DoubleAndAdd(api, &Acc, &B)
 	}
 
 	// i = 0
+	// subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0.
 	// When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means
 	// when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0).
 	if cfg.CompleteArithmetic {
@@ -282,8 +300,7 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o
 		Acc.Select(api, s1bits[0], Acc, tableQ[0])
 		tablePhiQ[0].AddUnified(api, Acc)
 		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
-		zero := fields_bls24315.E4{B0: fields_bls24315.E2{A0: 0, A1: 0}, B1: fields_bls24315.E2{A0: 0, A1: 0}}
-		Acc.Select(api, selector, g2AffP{X: zero, Y: zero}, Acc)
+		Acc.Select(api, selector, g2AffP{X: zeroE4, Y: zeroE4}, Acc)
 	} else {
 		tableQ[0].AddAssign(api, Acc)
 		Acc.Select(api, s1bits[0], Acc, tableQ[0])
@@ -291,6 +308,38 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o
 		Acc.Select(api, s2bits[0], Acc, tablePhiQ[0])
 	}
 
+	// subtract [2^nbits]G since we added G at the beginning
+	B.X = fields_bls24315.E4{
+		B0: fields_bls24315.E2{
+			A0: points.G2m[nbits-1][0],
+			A1: points.G2m[nbits-1][1],
+		},
+		B1: fields_bls24315.E2{
+			A0: points.G2m[nbits-1][2],
+			A1: points.G2m[nbits-1][3],
+		},
+	}
+	B.Y = fields_bls24315.E4{
+		B0: fields_bls24315.E2{
+			A0: points.G2m[nbits-1][4],
+			A1: points.G2m[nbits-1][5],
+		},
+		B1: fields_bls24315.E2{
+			A0: points.G2m[nbits-1][6],
+			A1: points.G2m[nbits-1][7],
+		},
+	}
+	B.Y.Neg(api, B.Y)
+	if cfg.CompleteArithmetic {
+		Acc.AddUnified(api, B)
+	} else {
+		Acc.AddAssign(api, B)
+	}
+
+	if cfg.CompleteArithmetic {
+		Acc.Select(api, selector, g2AffP{X: zeroE4, Y: zeroE4}, Acc)
+	}
+
 	P.X = Acc.X
 	P.Y = Acc.Y
 
diff --git a/std/algebra/native/sw_bls24315/hints.go b/std/algebra/native/sw_bls24315/hints.go
index 1269ecaca8..0404212674 100644
--- a/std/algebra/native/sw_bls24315/hints.go
+++ b/std/algebra/native/sw_bls24315/hints.go
@@ -1,6 +1,7 @@
 package sw_bls24315
 
 import (
+	"fmt"
 	"math/big"
 
 	"github.com/consensys/gnark-crypto/ecc"
@@ -10,6 +11,7 @@ import (
 func GetHints() []solver.Hint {
 	return []solver.Hint{
 		decomposeScalarG1,
+		decomposeScalarG1Simple,
 		decomposeScalarG2,
 	}
 }
@@ -18,6 +20,25 @@ func init() {
 	solver.RegisterHint(GetHints()...)
 }
 
+func decomposeScalarG1Simple(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	if len(inputs) != 1 {
+		return fmt.Errorf("expecting one input")
+	}
+	if len(outputs) != 3 {
+		return fmt.Errorf("expecting three outputs")
+	}
+	cc := getInnerCurveConfig(scalarField)
+	sp := ecc.SplitScalar(inputs[0], cc.glvBasis)
+	outputs[0].Set(&(sp[0]))
+	outputs[1].Set(&(sp[1]))
+	// figure out how many times we have overflowed
+	outputs[2].Mul(outputs[1], cc.lambda).Add(outputs[2], outputs[0])
+	outputs[2].Sub(outputs[2], inputs[0])
+	outputs[2].Div(outputs[2], cc.fr)
+
+	return nil
+}
+
 func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, res []*big.Int) error {
 	cc := getInnerCurveConfig(scalarField)
 	sp := ecc.SplitScalar(inputs[0], cc.glvBasis)
diff --git a/std/algebra/native/sw_bls24315/pairing2.go b/std/algebra/native/sw_bls24315/pairing2.go
index d3830cca42..3bbbc4d041 100644
--- a/std/algebra/native/sw_bls24315/pairing2.go
+++ b/std/algebra/native/sw_bls24315/pairing2.go
@@ -107,7 +107,7 @@ func (c *Curve) jointScalarMul(P1, P2 *G1Affine, s1, s2 *Scalar, opts ...algopts
 	res := &G1Affine{}
 	varScalar1 := c.packScalarToVar(s1)
 	varScalar2 := c.packScalarToVar(s2)
-	res.jointScalarMul(c.api, *P1, *P2, varScalar1, varScalar2)
+	res.jointScalarMul(c.api, *P1, *P2, varScalar1, varScalar2, opts...)
 	return res
 }
 
@@ -119,7 +119,7 @@ func (c *Curve) ScalarMul(P *G1Affine, s *Scalar, opts ...algopts.AlgebraOption)
 		Y: P.Y,
 	}
 	varScalar := c.packScalarToVar(s)
-	res.ScalarMul(c.api, *P, varScalar)
+	res.ScalarMul(c.api, *P, varScalar, opts...)
 	return res
 }
 
@@ -128,7 +128,7 @@ func (c *Curve) ScalarMul(P *G1Affine, s *Scalar, opts ...algopts.AlgebraOption)
 func (c *Curve) ScalarMulBase(s *Scalar, opts ...algopts.AlgebraOption) *G1Affine {
 	res := new(G1Affine)
 	varScalar := c.packScalarToVar(s)
-	res.ScalarMulBase(c.api, varScalar)
+	res.ScalarMulBase(c.api, varScalar, opts...)
 	return res
 }
 
@@ -146,6 +146,10 @@ func (c *Curve) MultiScalarMul(P []*G1Affine, scalars []*Scalar, opts ...algopts
 	if err != nil {
 		return nil, fmt.Errorf("new config: %w", err)
 	}
+	addFn := c.Add
+	if cfg.CompleteArithmetic {
+		addFn = c.AddUnified
+	}
 	if !cfg.FoldMulti {
 		if len(P) != len(scalars) {
 			return nil, fmt.Errorf("mismatching points and scalars slice lengths")
@@ -160,7 +164,7 @@ func (c *Curve) MultiScalarMul(P []*G1Affine, scalars []*Scalar, opts ...algopts
 		}
 		for i := 1; i < n-1; i += 2 {
 			q := c.jointScalarMul(P[i-1], P[i], scalars[i-1], scalars[i], opts...)
-			res = c.Add(res, q)
+			res = addFn(res, q)
 		}
 		return res, nil
 	} else {
@@ -171,24 +175,27 @@ func (c *Curve) MultiScalarMul(P []*G1Affine, scalars []*Scalar, opts ...algopts
 		gamma := c.packScalarToVar(scalars[0])
 		// decompose gamma in the endomorphism eigenvalue basis and bit-decompose the sub-scalars
 		cc := getInnerCurveConfig(c.api.Compiler().Field())
-		sd, err := c.api.Compiler().NewHint(decomposeScalarG1, 3, gamma)
+		sd, err := c.api.Compiler().NewHint(decomposeScalarG1Simple, 3, gamma)
 		if err != nil {
 			panic(err)
 		}
 		gamma1, gamma2 := sd[0], sd[1]
-		c.api.AssertIsEqual(c.api.Add(gamma1, c.api.Mul(gamma2, cc.lambda)), c.api.Add(gamma, c.api.Mul(cc.fr, sd[2])))
-		nbits := cc.lambda.BitLen() + 1
+		c.api.AssertIsEqual(
+			c.api.Add(gamma1, c.api.Mul(gamma2, cc.lambda)),
+			c.api.Add(gamma, c.api.Mul(cc.fr, sd[2])),
+		)
+		nbits := 127
 		gamma1Bits := c.api.ToBinary(gamma1, nbits)
 		gamma2Bits := c.api.ToBinary(gamma2, nbits)
 
 		// points and scalars must be non-zero
 		var res G1Affine
-		res.scalarBitsMul(c.api, *P[len(P)-1], gamma1Bits, gamma2Bits)
+		res.scalarBitsMul(c.api, *P[len(P)-1], gamma1Bits, gamma2Bits, opts...)
 		for i := len(P) - 2; i > 0; i-- {
-			res = *c.Add(P[i], &res)
-			res.scalarBitsMul(c.api, res, gamma1Bits, gamma2Bits)
+			res = *addFn(P[i], &res)
+			res.scalarBitsMul(c.api, res, gamma1Bits, gamma2Bits, opts...)
 		}
-		res = *c.Add(P[0], &res)
+		res = *addFn(P[0], &res)
 		return &res, nil
 	}
 }
@@ -475,4 +482,4 @@ func (c *Curve) packScalarToVar(s *Scalar) frontend.Variable {
 }
 
 // ScalarField defines the [emulated.FieldParams] implementation on a one limb of the scalar field.
-type ScalarField = emparams.BLS12315Fr
+type ScalarField = emparams.BLS24315Fr
diff --git a/std/commitments/kzg/verifier.go b/std/commitments/kzg/verifier.go
index 25fd482544..74262a4c5c 100644
--- a/std/commitments/kzg/verifier.go
+++ b/std/commitments/kzg/verifier.go
@@ -424,22 +424,20 @@ func NewVerifier[FR emulated.FieldParams, G1El algebra.G1ElementT, G2El algebra.
 // commitment at point.
 func (v *Verifier[FR, G1El, G2El, GTEl]) CheckOpeningProof(commitment Commitment[G1El], proof OpeningProof[FR, G1El], point emulated.Element[FR], vk VerifyingKey[G1El, G2El]) error {
 
-	claimedValueG1 := v.curve.ScalarMul(&vk.G1, &proof.ClaimedValue)
-
-	// [f(α) - f(a)]G₁
-	fminusfaG1 := v.curve.Neg(claimedValueG1)
-	fminusfaG1 = v.curve.Add(fminusfaG1, &commitment.G1El)
-
-	// [-H(α)]G₁
-	negQuotientPoly := v.curve.Neg(&proof.Quotient)
+	// [f(a)]G1 + [-a]([H(α)]G₁) = [f(a) - a*H(α)]G₁
+	pointNeg := v.scalarApi.Neg(&point)
+	totalG1, err := v.curve.MultiScalarMul([]*G1El{&vk.G1, &proof.Quotient}, []*emulated.Element[FR]{&proof.ClaimedValue, pointNeg})
+	if err != nil {
+		return fmt.Errorf("check opening proof: %w", err)
+	}
 
-	// [f(α) - f(a) + a*H(α)]G₁
-	totalG1 := v.curve.ScalarMul(&proof.Quotient, &point)
-	totalG1 = v.curve.Add(totalG1, fminusfaG1)
+	// [f(a) - a*H(α)]G₁ + [-f(α)]G₁  = [f(a) - f(α) - a*H(α)]G₁
+	commitmentNeg := v.curve.Neg(&commitment.G1El)
+	totalG1 = v.curve.Add(totalG1, commitmentNeg)
 
-	// e([f(α)-f(a)+aH(α)]G₁], G₂).e([-H(α)]G₁, [α]G₂) == 1
+	// e([f(a)-f(α)-a*H(α)]G₁], G₂).e([H(α)]G₁, [α]G₂) == 1
 	if err := v.pairing.PairingCheck(
-		[]*G1El{totalG1, negQuotientPoly},
+		[]*G1El{totalG1, &proof.Quotient},
 		[]*G2El{&vk.G2[0], &vk.G2[1]},
 	); err != nil {
 		return fmt.Errorf("pairing check: %w", err)
diff --git a/std/math/emulated/emparams/emparams.go b/std/math/emulated/emparams/emparams.go
index ebbf7d5d8c..b07fb6e96b 100644
--- a/std/math/emulated/emparams/emparams.go
+++ b/std/math/emulated/emparams/emparams.go
@@ -254,7 +254,7 @@ type BW6761Fr struct{ sixLimbPrimeField }
 
 func (fp BW6761Fr) Modulus() *big.Int { return ecc.BW6_761.ScalarField() }
 
-// BLS12315Fp provides type parametrization for field emulation:
+// BLS24315Fp provides type parametrization for field emulation:
 //   - limbs: 5
 //   - limb width: 64 bits
 //
@@ -264,11 +264,11 @@ func (fp BW6761Fr) Modulus() *big.Int { return ecc.BW6_761.ScalarField() }
 //	39705142709513438335025689890408969744933502416914749335064285505637884093126342347073617133569 (base 10)
 //
 // This is the base field of the BLS24-315 curve.
-type BLS12315Fp struct{ fiveLimbPrimeField }
+type BLS24315Fp struct{ fiveLimbPrimeField }
 
-func (fp BLS12315Fp) Modulus() *big.Int { return ecc.BLS24_315.BaseField() }
+func (fp BLS24315Fp) Modulus() *big.Int { return ecc.BLS24_315.BaseField() }
 
-// BLS12315Fr provides type parametrization for field emulation:
+// BLS24315Fr provides type parametrization for field emulation:
 //   - limbs: 4
 //   - limb width: 64 bits
 //
@@ -278,6 +278,6 @@ func (fp BLS12315Fp) Modulus() *big.Int { return ecc.BLS24_315.BaseField() }
 //	0x196deac24a9da12b25fc7ec9cf927a98c8c480ece644e36419d0c5fd00c00001 (base 10)
 //
 // This is the scalar field of the BLS24-315 curve.
-type BLS12315Fr struct{ fourLimbPrimeField }
+type BLS24315Fr struct{ fourLimbPrimeField }
 
-func (fr BLS12315Fr) Modulus() *big.Int { return ecc.BLS24_315.ScalarField() }
+func (fr BLS24315Fr) Modulus() *big.Int { return ecc.BLS24_315.ScalarField() }