flipjump/stl/bit/div.fj

ns bit {
    // Complexity: n(5@+11)
    //   dst[:n], src[:n] = src[:n] / 10, src[:n] % 10.
    def div10 n, dst, src @ zero, end {
        .zero n, dst
        .div10.cmp_sub_10 zero, src+(n-4)*dw, dst+(n-4)*dw
        rep(n-4, i) .div10.cmp_sub_10 src+(n-1-i)*dw, src+(n-5-i)*dw, dst+(n-5-i)*dw
        ;end
      zero: .bit 0  // this bit is never modified
      end:
    }
    ns div10 {
        // Complexity: 4@+12
        //   if (val > 10) {
        //     val -= 10;
        //     res  = !res;
        //   }
        // for val4:val[3,2,1,0] of length 5, and Assumes val <= 19.
        // This macro is getting called on consecutive 5-bits, shifting the 5-bit-frame 1 bit right at a time.
        def cmp_sub_10 val4, val, res @ yes, no, _1xxxx, _1xx0x, _1xx1x, _01x1x, _0110x {
            ..if1 val4, _1xxxx
            ..if0 val+3*dw, no
            ..if1 val+  dw, _01x1x
            ..if0 val+2*dw, no

          _0110x:
            ..not val+2*dw
            val+3*dw+dbit; yes

          _01x1x:
            val+3*dw+dbit; yes

          _1xxxx:
            ..not val4
            ..if val+dw, _1xx0x, _1xx1x
          _1xx0x:
            val+2*dw+dbit; yes
          _1xx1x:
            val+3*dw+dbit; yes

          yes:
            ..not val+dw
            ..not res
          no:
        }
    }


    //  Time Complexity: n^2(10@+20)
    // Space Complexity: n^2(11@+22)
    //   if b==0: goto end (do nothing)
    //   q = a/b  (signed division)
    //   r = a%b  (signed modulo - sign(r)==sign(a))
    // @NOTE: a,b are SIGNED numbers. If you want a division with unsigned ints, use the div macro.
    // @NOTE: this division implementation is WASTEFUL in space, yet saves running time, compared to div_loop.
    //
    // @NOTE: There is a better version: This one is slow, big, and doesn't error on b==0.
    //        Also it supports only one convention for the sign of the reminder.
    //        For a faster & better division see hex.div, hex.idiv.
    // q,a,b,r are bit[:n].
    def idiv n, a, b, q, r @ negative_a, negative_b, one_negative, neg_b_1, do_div, neg_b_2, neg_ans, end {
        .mov negative_a, a+dw*(n-1)
        .mov negative_b, b+dw*(n-1)
        .zero one_negative

        .if0 negative_a, neg_b_1
        .not one_negative
        .neg n, a
      neg_b_1:
        .if0 negative_b, do_div
        .not one_negative
        .neg n, b
      do_div:
        .div n, a, b, q, r

        .if0 negative_a, neg_b_2
        .neg n, a
        .neg n, r
      neg_b_2:
        .if0 negative_b, neg_ans
        .neg n, b
      neg_ans:
        .if0 one_negative, end
        .neg n, q
        ;end

      negative_a:
        .bit
      negative_b:
        .bit
      one_negative:
        .bit
      end:
    }


    //  Time Complexity: n^2(10@+20)
    // Space Complexity: n^2(11@+22)
    //   if b==0: goto end (do nothing)
    //   q = a/b  (unsigned division)
    //   r = a%b  (unsigned modulo)
    // @NOTE: a,b are UNSIGNED numbers. If you want a division with signed ints, use the idiv macro.
    // @NOTE: this division implementation is WASTEFUL in space, yet saves running time, compared to div_loop.
    //
    // @NOTE: There is a better version: This one is slow, big, and doesn't error on b==0.
    //        For a faster & better division see hex.div, hex.idiv.
    // q,a,b,r are bit[:n].
    def div n, a, b, q, r @ Q, R, end {
        .if0 n, b, end
        .zero 2*n, R
        .zero   n, Q

        rep(n, i) .div.div_step n,  a+(n-1-i)*dw,  b,  R+(n-1-i)*dw,  Q+(n-1-i)*dw

        .mov n, r, R
        .mov n, q, Q
        ;end

      R:
        .vec 2*n
      Q:
        .vec n
      end:
    }
    ns div {
        //  Time Complexity: n(10@+20)
        // Space Complexity: n(11@+22)
        //   R[0] ^= N
        //   if R[:n] >= D[:n]:
        //     R -= D
        //     not Q[0]
        //
        // R,D are bit[:n], while R,N are bits.
        def div_step n, N, D, R, Q @ do_sub, end {
            ..xor R, N
            ..cmp n, R, D, end, do_sub, do_sub
          do_sub:
            ..sub n, R, D
            ..not Q
          end:
        }
    }


    //  Time Complexity: n^2(18@+18)
    // Space Complexity: n(37@+58)
    //   if b==0: goto end (do nothing)
    //   q = a/b  (signed division)
    //   r = a%b  (signed modulo - sign(r)==sign(a))
    // @NOTE: a,b are SIGNED numbers. If you want a division with unsigned ints, use the div_loop macro.
    // @NOTE: this division implementation saves space, yet is slower, compared to div.
    //
    // @NOTE: There is a better version: This one is slow, big, and doesn't error on b==0.
    //        Also it supports only one convention for the sign of the reminder.
    //        For a faster & better division see hex.div, hex.idiv.
    // q,a,b,r are bit[:n].
    def idiv_loop n, a, b, q, r @ negative_a, negative_b, one_negative, neg_b_1, do_div, neg_b_2, neg_ans, end {
        .mov negative_a, a+dw*(n-1)
        .mov negative_b, b+dw*(n-1)
        .zero one_negative

        .if0 negative_a, neg_b_1
        .not one_negative
        .neg n, a
      neg_b_1:
        .if0 negative_b, do_div
        .not one_negative
        .neg n, b
      do_div:
        .div_loop n, a, b, q, r

        .if0 negative_a, neg_b_2
        .neg n, a
        .neg n, r
      neg_b_2:
        .if0 negative_b, neg_ans
        .neg n, b
      neg_ans:
        .if0 one_negative, end
        .neg n, q
        ;end

      negative_a:
        .bit
      negative_b:
        .bit
      one_negative:
        .bit
      end:
    }


    //  Time Complexity: n^2(18@+18)
    // Space Complexity: n(25@+22)
    //   if b==0: goto end (do nothing)
    //   q = a/b  (unsigned division)
    //   r = a%b  (unsigned modulo)
    // @NOTE: a,b are UNSIGNED numbers. If you want a division with signed ints, use the idiv_loop macro.
    // @NOTE: this division implementation saves space, yet is slower, compared to div.
    //
    // @NOTE: There is a better version: This one is slow, big, and doesn't error on b==0.
    //        For a faster division see hex.div, hex.idiv.
    // q,a,b,r are bit[:n].
    def div_loop n, a, b, q, r @ loop, do_sub, loop_end, after_loop, A, Q, R, i, end {
        .if0 n, b, end
        .zero n, R
        .zero n, Q
        .mov n, A, a
        .zero n, i
        .not i+(n-1)*dw

      loop:
        .if0 n, i, after_loop                //Comp: n(@+2)
        .shl n, R                            //Comp: n(2@-1)
        .xor R, A+(n-1)*dw
        .cmp n, R, b, loop_end, do_sub, do_sub   //Comp: n(2@+4)  (Space=n(3@+6))
      do_sub:
        .sub n, R, b                         //Comp: n(8@+16)
        .xor n, Q, i                         //Comp: n(@-1)
      loop_end:
        .shr n, i                            //Comp: n(2@-1)
        .shl n, A                            //Comp: n(2@-1)
        ;loop

      after_loop:
        .mov n, r, R
        .mov n, q, Q
        ;end

      A:
        .vec n
      R:
        .vec n
      Q:
        .vec n
      i:
        .vec n

      end:
    }
}