From 9c1d1643cfd0107685e1ce61ff470bbfc7f4345e Mon Sep 17 00:00:00 2001 From: Ni Trieu Date: Thu, 7 May 2020 03:11:14 -0700 Subject: [PATCH] added ntl --- .vs/nPSI/v16/Solution.VC.db-shm | Bin 32768 -> 32768 bytes CMakeLists.txt | 2 +- thirdparty/all_linux.get | 3 +- thirdparty/linux/mpir_patch/mpir.h | 2494 ------ thirdparty/linux/mpir_patch/mpirxx.h | 3611 --------- thirdparty/linux/nasm.get | 7 - thirdparty/linux/ntl.get | 10 +- thirdparty/linux/ntl/README | 19 + .../linux/ntl/doc/BasicThreadPool.cpp.html | 387 + thirdparty/linux/ntl/doc/BasicThreadPool.txt | 377 + thirdparty/linux/ntl/doc/GF2.cpp.html | 251 + thirdparty/linux/ntl/doc/GF2.txt | 241 + thirdparty/linux/ntl/doc/GF2E.cpp.html | 419 + thirdparty/linux/ntl/doc/GF2E.txt | 409 + thirdparty/linux/ntl/doc/GF2EX.cpp.html | 899 +++ thirdparty/linux/ntl/doc/GF2EX.txt | 889 ++ .../linux/ntl/doc/GF2EXFactoring.cpp.html | 238 + thirdparty/linux/ntl/doc/GF2EXFactoring.txt | 228 + thirdparty/linux/ntl/doc/GF2X.cpp.html | 825 ++ thirdparty/linux/ntl/doc/GF2X.txt | 815 ++ .../linux/ntl/doc/GF2XFactoring.cpp.html | 127 + thirdparty/linux/ntl/doc/GF2XFactoring.txt | 117 + thirdparty/linux/ntl/doc/GF2XVec.cpp.html | 73 + thirdparty/linux/ntl/doc/GF2XVec.txt | 63 + thirdparty/linux/ntl/doc/HNF.cpp.html | 39 + thirdparty/linux/ntl/doc/HNF.txt | 29 + thirdparty/linux/ntl/doc/LLL.cpp.html | 442 + thirdparty/linux/ntl/doc/LLL.txt | 432 + thirdparty/linux/ntl/doc/Lazy.cpp.html | 101 + thirdparty/linux/ntl/doc/Lazy.txt | 91 + thirdparty/linux/ntl/doc/LazyTable.cpp.html | 95 + thirdparty/linux/ntl/doc/LazyTable.txt | 85 + thirdparty/linux/ntl/doc/RR.cpp.html | 606 ++ thirdparty/linux/ntl/doc/RR.txt | 596 ++ thirdparty/linux/ntl/doc/SmartPtr.cpp.html | 957 +++ thirdparty/linux/ntl/doc/SmartPtr.txt | 947 +++ thirdparty/linux/ntl/doc/ZZ.cpp.html | 1125 +++ thirdparty/linux/ntl/doc/ZZ.txt | 1115 +++ thirdparty/linux/ntl/doc/ZZVec.cpp.html | 78 + thirdparty/linux/ntl/doc/ZZVec.txt | 68 + thirdparty/linux/ntl/doc/ZZX.cpp.html | 608 ++ thirdparty/linux/ntl/doc/ZZX.txt | 598 ++ .../linux/ntl/doc/ZZXFactoring.cpp.html | 181 + thirdparty/linux/ntl/doc/ZZXFactoring.txt | 171 + thirdparty/linux/ntl/doc/ZZ_p.cpp.html | 421 + thirdparty/linux/ntl/doc/ZZ_p.txt | 411 + thirdparty/linux/ntl/doc/ZZ_pE.cpp.html | 402 + thirdparty/linux/ntl/doc/ZZ_pE.txt | 392 + thirdparty/linux/ntl/doc/ZZ_pEX.cpp.html | 900 +++ thirdparty/linux/ntl/doc/ZZ_pEX.txt | 890 ++ .../linux/ntl/doc/ZZ_pEXFactoring.cpp.html | 199 + thirdparty/linux/ntl/doc/ZZ_pEXFactoring.txt | 189 + thirdparty/linux/ntl/doc/ZZ_pX.cpp.html | 907 +++ thirdparty/linux/ntl/doc/ZZ_pX.txt | 897 ++ .../linux/ntl/doc/ZZ_pXFactoring.cpp.html | 208 + thirdparty/linux/ntl/doc/ZZ_pXFactoring.txt | 198 + thirdparty/linux/ntl/doc/arrow1.gif | Bin 0 -> 967 bytes thirdparty/linux/ntl/doc/arrow2.gif | Bin 0 -> 942 bytes thirdparty/linux/ntl/doc/arrow3.gif | Bin 0 -> 955 bytes thirdparty/linux/ntl/doc/config.txt | 666 ++ thirdparty/linux/ntl/doc/conversions.txt | 183 + thirdparty/linux/ntl/doc/copying.txt | 361 + thirdparty/linux/ntl/doc/flags.txt | 40 + thirdparty/linux/ntl/doc/lzz_p.cpp.html | 432 + thirdparty/linux/ntl/doc/lzz_p.txt | 422 + thirdparty/linux/ntl/doc/lzz_pE.cpp.html | 400 + thirdparty/linux/ntl/doc/lzz_pE.txt | 390 + thirdparty/linux/ntl/doc/lzz_pEX.cpp.html | 894 ++ thirdparty/linux/ntl/doc/lzz_pEX.txt | 884 ++ .../linux/ntl/doc/lzz_pEXFactoring.cpp.html | 198 + thirdparty/linux/ntl/doc/lzz_pEXFactoring.txt | 188 + thirdparty/linux/ntl/doc/lzz_pX.cpp.html | 940 +++ thirdparty/linux/ntl/doc/lzz_pX.txt | 930 +++ .../linux/ntl/doc/lzz_pXFactoring.cpp.html | 187 + thirdparty/linux/ntl/doc/lzz_pXFactoring.txt | 177 + thirdparty/linux/ntl/doc/mat_GF2.cpp.html | 177 + thirdparty/linux/ntl/doc/mat_GF2.txt | 167 + thirdparty/linux/ntl/doc/mat_GF2E.cpp.html | 179 + thirdparty/linux/ntl/doc/mat_GF2E.txt | 169 + thirdparty/linux/ntl/doc/mat_RR.cpp.html | 152 + thirdparty/linux/ntl/doc/mat_RR.txt | 142 + thirdparty/linux/ntl/doc/mat_ZZ.cpp.html | 187 + thirdparty/linux/ntl/doc/mat_ZZ.txt | 177 + thirdparty/linux/ntl/doc/mat_ZZ_p.cpp.html | 171 + thirdparty/linux/ntl/doc/mat_ZZ_p.txt | 161 + thirdparty/linux/ntl/doc/mat_ZZ_pE.cpp.html | 176 + thirdparty/linux/ntl/doc/mat_ZZ_pE.txt | 166 + thirdparty/linux/ntl/doc/mat_lzz_p.cpp.html | 215 + thirdparty/linux/ntl/doc/mat_lzz_p.txt | 205 + thirdparty/linux/ntl/doc/mat_lzz_pE.cpp.html | 176 + thirdparty/linux/ntl/doc/mat_lzz_pE.txt | 166 + thirdparty/linux/ntl/doc/mat_poly_ZZ.cpp.html | 31 + thirdparty/linux/ntl/doc/mat_poly_ZZ.txt | 21 + .../linux/ntl/doc/mat_poly_ZZ_p.cpp.html | 30 + thirdparty/linux/ntl/doc/mat_poly_ZZ_p.txt | 20 + .../linux/ntl/doc/mat_poly_lzz_p.cpp.html | 31 + thirdparty/linux/ntl/doc/mat_poly_lzz_p.txt | 21 + thirdparty/linux/ntl/doc/matrix.cpp.html | 195 + thirdparty/linux/ntl/doc/matrix.txt | 185 + thirdparty/linux/ntl/doc/names.txt | 106 + thirdparty/linux/ntl/doc/pair.cpp.html | 89 + thirdparty/linux/ntl/doc/pair.txt | 79 + thirdparty/linux/ntl/doc/quad_float.cpp.html | 391 + thirdparty/linux/ntl/doc/quad_float.txt | 381 + thirdparty/linux/ntl/doc/sedscript.txt | 82 + thirdparty/linux/ntl/doc/tools.cpp.html | 208 + thirdparty/linux/ntl/doc/tools.txt | 198 + thirdparty/linux/ntl/doc/tour-ack.html | 79 + thirdparty/linux/ntl/doc/tour-changes.html | 2932 +++++++ thirdparty/linux/ntl/doc/tour-ex1.html | 725 ++ thirdparty/linux/ntl/doc/tour-ex2.html | 385 + thirdparty/linux/ntl/doc/tour-ex3.html | 361 + thirdparty/linux/ntl/doc/tour-ex4.html | 725 ++ thirdparty/linux/ntl/doc/tour-ex5.html | 136 + thirdparty/linux/ntl/doc/tour-ex6.html | 137 + thirdparty/linux/ntl/doc/tour-ex7.html | 200 + thirdparty/linux/ntl/doc/tour-examples.html | 44 + thirdparty/linux/ntl/doc/tour-gf2x.html | 173 + thirdparty/linux/ntl/doc/tour-gmp.html | 301 + thirdparty/linux/ntl/doc/tour-impl.html | 476 ++ thirdparty/linux/ntl/doc/tour-intro.html | 130 + thirdparty/linux/ntl/doc/tour-modules.html | 739 ++ thirdparty/linux/ntl/doc/tour-roadmap.html | 185 + thirdparty/linux/ntl/doc/tour-struct.html | 1354 ++++ thirdparty/linux/ntl/doc/tour-time.html | 117 + thirdparty/linux/ntl/doc/tour-tips.html | 132 + thirdparty/linux/ntl/doc/tour-unix.html | 546 ++ thirdparty/linux/ntl/doc/tour-win.html | 287 + thirdparty/linux/ntl/doc/tour.html | 113 + thirdparty/linux/ntl/doc/vec_GF2.cpp.html | 242 + thirdparty/linux/ntl/doc/vec_GF2.txt | 232 + thirdparty/linux/ntl/doc/vec_GF2E.cpp.html | 112 + thirdparty/linux/ntl/doc/vec_GF2E.txt | 102 + thirdparty/linux/ntl/doc/vec_RR.cpp.html | 88 + thirdparty/linux/ntl/doc/vec_RR.txt | 78 + thirdparty/linux/ntl/doc/vec_ZZ.cpp.html | 87 + thirdparty/linux/ntl/doc/vec_ZZ.txt | 77 + thirdparty/linux/ntl/doc/vec_ZZ_p.cpp.html | 107 + thirdparty/linux/ntl/doc/vec_ZZ_p.txt | 97 + thirdparty/linux/ntl/doc/vec_ZZ_pE.cpp.html | 102 + thirdparty/linux/ntl/doc/vec_ZZ_pE.txt | 92 + thirdparty/linux/ntl/doc/vec_lzz_p.cpp.html | 103 + thirdparty/linux/ntl/doc/vec_lzz_p.txt | 93 + thirdparty/linux/ntl/doc/vec_lzz_pE.cpp.html | 102 + thirdparty/linux/ntl/doc/vec_lzz_pE.txt | 92 + thirdparty/linux/ntl/doc/vector.cpp.html | 464 ++ thirdparty/linux/ntl/doc/vector.txt | 454 ++ thirdparty/linux/ntl/doc/version.cpp.html | 37 + thirdparty/linux/ntl/doc/version.txt | 27 + thirdparty/linux/ntl/doc/xdouble.cpp.html | 215 + thirdparty/linux/ntl/doc/xdouble.txt | 205 + .../linux/ntl/include/NTL/BasicThreadPool.h | 725 ++ thirdparty/linux/ntl/include/NTL/FFT.h | 210 + thirdparty/linux/ntl/include/NTL/FacVec.h | 27 + thirdparty/linux/ntl/include/NTL/GF2.h | 506 ++ thirdparty/linux/ntl/include/NTL/GF2E.h | 494 ++ thirdparty/linux/ntl/include/NTL/GF2EX.h | 1052 +++ .../linux/ntl/include/NTL/GF2EXFactoring.h | 245 + thirdparty/linux/ntl/include/NTL/GF2X.h | 762 ++ .../linux/ntl/include/NTL/GF2XFactoring.h | 60 + thirdparty/linux/ntl/include/NTL/GF2XVec.h | 62 + thirdparty/linux/ntl/include/NTL/HAVE_AVX.h | 3 + .../linux/ntl/include/NTL/HAVE_BUILTIN_CLZL.h | 3 + thirdparty/linux/ntl/include/NTL/HAVE_FMA.h | 3 + .../linux/ntl/include/NTL/HAVE_LL_TYPE.h | 3 + thirdparty/linux/ntl/include/NTL/HNF.h | 25 + thirdparty/linux/ntl/include/NTL/LLL.h | 149 + thirdparty/linux/ntl/include/NTL/Lazy.h | 206 + thirdparty/linux/ntl/include/NTL/LazyTable.h | 155 + thirdparty/linux/ntl/include/NTL/RR.h | 543 ++ thirdparty/linux/ntl/include/NTL/SPMM_ASM.h | 202 + thirdparty/linux/ntl/include/NTL/SmartPtr.h | 1677 ++++ thirdparty/linux/ntl/include/NTL/WordVector.h | 169 + thirdparty/linux/ntl/include/NTL/ZZ.h | 1585 ++++ thirdparty/linux/ntl/include/NTL/ZZVec.h | 61 + thirdparty/linux/ntl/include/NTL/ZZX.h | 754 ++ .../linux/ntl/include/NTL/ZZXFactoring.h | 65 + thirdparty/linux/ntl/include/NTL/ZZ_p.h | 552 ++ thirdparty/linux/ntl/include/NTL/ZZ_pE.h | 532 ++ thirdparty/linux/ntl/include/NTL/ZZ_pEX.h | 1050 +++ .../linux/ntl/include/NTL/ZZ_pEXFactoring.h | 192 + thirdparty/linux/ntl/include/NTL/ZZ_pX.h | 1292 +++ .../linux/ntl/include/NTL/ZZ_pXFactoring.h | 227 + thirdparty/linux/ntl/include/NTL/c_lip.h | 566 ++ thirdparty/linux/ntl/include/NTL/config.h | 632 ++ thirdparty/linux/ntl/include/NTL/config_log.h | 2 + thirdparty/linux/ntl/include/NTL/ctools.h | 480 ++ thirdparty/linux/ntl/include/NTL/def_config.h | 632 ++ thirdparty/linux/ntl/include/NTL/fileio.h | 59 + thirdparty/linux/ntl/include/NTL/g_lip.h | 592 ++ thirdparty/linux/ntl/include/NTL/gmp_aux.h | 0 thirdparty/linux/ntl/include/NTL/lip.h | 81 + thirdparty/linux/ntl/include/NTL/lzz_p.h | 511 ++ thirdparty/linux/ntl/include/NTL/lzz_pE.h | 524 ++ thirdparty/linux/ntl/include/NTL/lzz_pEX.h | 1046 +++ .../linux/ntl/include/NTL/lzz_pEXFactoring.h | 194 + thirdparty/linux/ntl/include/NTL/lzz_pX.h | 1333 +++ .../linux/ntl/include/NTL/lzz_pXFactoring.h | 255 + thirdparty/linux/ntl/include/NTL/mach_desc.h | 850 ++ thirdparty/linux/ntl/include/NTL/mat_GF2.h | 186 + thirdparty/linux/ntl/include/NTL/mat_GF2E.h | 187 + thirdparty/linux/ntl/include/NTL/mat_RR.h | 162 + thirdparty/linux/ntl/include/NTL/mat_ZZ.h | 184 + thirdparty/linux/ntl/include/NTL/mat_ZZ_p.h | 168 + thirdparty/linux/ntl/include/NTL/mat_ZZ_pE.h | 178 + thirdparty/linux/ntl/include/NTL/mat_lzz_p.h | 216 + thirdparty/linux/ntl/include/NTL/mat_lzz_pE.h | 178 + .../linux/ntl/include/NTL/mat_poly_ZZ.h | 14 + .../linux/ntl/include/NTL/mat_poly_ZZ_p.h | 15 + .../linux/ntl/include/NTL/mat_poly_lzz_p.h | 15 + thirdparty/linux/ntl/include/NTL/matrix.h | 254 + thirdparty/linux/ntl/include/NTL/new.h | 11 + thirdparty/linux/ntl/include/NTL/pair.h | 90 + .../linux/ntl/include/NTL/pair_GF2EX_long.h | 18 + .../linux/ntl/include/NTL/pair_GF2X_long.h | 16 + .../linux/ntl/include/NTL/pair_ZZX_long.h | 16 + .../linux/ntl/include/NTL/pair_ZZ_pEX_long.h | 16 + .../linux/ntl/include/NTL/pair_ZZ_pX_long.h | 16 + .../linux/ntl/include/NTL/pair_lzz_pEX_long.h | 16 + .../linux/ntl/include/NTL/pair_lzz_pX_long.h | 16 + thirdparty/linux/ntl/include/NTL/quad_float.h | 325 + thirdparty/linux/ntl/include/NTL/sp_arith.h | 1193 +++ thirdparty/linux/ntl/include/NTL/thread.h | 229 + thirdparty/linux/ntl/include/NTL/tools.h | 939 +++ thirdparty/linux/ntl/include/NTL/vec_GF2.h | 214 + thirdparty/linux/ntl/include/NTL/vec_GF2E.h | 110 + .../linux/ntl/include/NTL/vec_GF2XVec.h | 14 + thirdparty/linux/ntl/include/NTL/vec_RR.h | 88 + thirdparty/linux/ntl/include/NTL/vec_ZZ.h | 90 + thirdparty/linux/ntl/include/NTL/vec_ZZVec.h | 14 + thirdparty/linux/ntl/include/NTL/vec_ZZ_p.h | 97 + thirdparty/linux/ntl/include/NTL/vec_ZZ_pE.h | 107 + thirdparty/linux/ntl/include/NTL/vec_double.h | 13 + thirdparty/linux/ntl/include/NTL/vec_long.h | 13 + thirdparty/linux/ntl/include/NTL/vec_lzz_p.h | 107 + thirdparty/linux/ntl/include/NTL/vec_lzz_pE.h | 107 + .../linux/ntl/include/NTL/vec_quad_float.h | 14 + thirdparty/linux/ntl/include/NTL/vec_ulong.h | 13 + .../linux/ntl/include/NTL/vec_vec_GF2.h | 14 + .../linux/ntl/include/NTL/vec_vec_GF2E.h | 13 + thirdparty/linux/ntl/include/NTL/vec_vec_RR.h | 13 + thirdparty/linux/ntl/include/NTL/vec_vec_ZZ.h | 13 + .../linux/ntl/include/NTL/vec_vec_ZZ_p.h | 13 + .../linux/ntl/include/NTL/vec_vec_ZZ_pE.h | 13 + .../linux/ntl/include/NTL/vec_vec_long.h | 14 + .../linux/ntl/include/NTL/vec_vec_lzz_p.h | 13 + .../linux/ntl/include/NTL/vec_vec_lzz_pE.h | 13 + .../linux/ntl/include/NTL/vec_vec_ulong.h | 14 + .../linux/ntl/include/NTL/vec_xdouble.h | 15 + thirdparty/linux/ntl/include/NTL/vector.h | 808 ++ thirdparty/linux/ntl/include/NTL/version.h | 12 + thirdparty/linux/ntl/include/NTL/wizard_log.h | 21 + thirdparty/linux/ntl/include/NTL/xdouble.h | 281 + thirdparty/linux/ntl/src/BasicThreadPool.c | 32 + thirdparty/linux/ntl/src/BerlekampTest.c | 81 + thirdparty/linux/ntl/src/BerlekampTestIn | 2 + thirdparty/linux/ntl/src/BerlekampTestOut | 1 + thirdparty/linux/ntl/src/BitMatTest.c | 99 + thirdparty/linux/ntl/src/CanZassTest.c | 80 + thirdparty/linux/ntl/src/CanZassTestIn | 2 + thirdparty/linux/ntl/src/CanZassTestOut | 1 + thirdparty/linux/ntl/src/CharPolyTest.c | 19 + thirdparty/linux/ntl/src/CharPolyTestIn | 2 + thirdparty/linux/ntl/src/CharPolyTestOut | 1 + thirdparty/linux/ntl/src/CheckAVX.c | 63 + thirdparty/linux/ntl/src/CheckCLZL.c | 21 + thirdparty/linux/ntl/src/CheckCLZLAux.c | 2 + thirdparty/linux/ntl/src/CheckCompile.c | 2 + thirdparty/linux/ntl/src/CheckFMA.c | 65 + thirdparty/linux/ntl/src/CheckFeature.log | 8 + thirdparty/linux/ntl/src/CheckFlag.log | 2 + thirdparty/linux/ntl/src/CheckLL.c | 57 + thirdparty/linux/ntl/src/CheckLLAux.c | 4 + thirdparty/linux/ntl/src/CopyFeatures | 5 + thirdparty/linux/ntl/src/DIRNAME | 1 + thirdparty/linux/ntl/src/DispSettings.c | 199 + thirdparty/linux/ntl/src/DoConfig | 461 ++ thirdparty/linux/ntl/src/ExceptionTest.c | 70 + thirdparty/linux/ntl/src/FFT.c | 2288 ++++++ thirdparty/linux/ntl/src/FacVec.c | 85 + thirdparty/linux/ntl/src/GF2.c | 41 + thirdparty/linux/ntl/src/GF2E.c | 177 + thirdparty/linux/ntl/src/GF2EX.c | 3457 ++++++++ thirdparty/linux/ntl/src/GF2EXFactoring.c | 2163 +++++ thirdparty/linux/ntl/src/GF2EXTest.c | 133 + thirdparty/linux/ntl/src/GF2X.c | 2038 +++++ thirdparty/linux/ntl/src/GF2X1.c | 3705 +++++++++ thirdparty/linux/ntl/src/GF2XFactoring.c | 964 +++ thirdparty/linux/ntl/src/GF2XTest.c | 97 + thirdparty/linux/ntl/src/GF2XTimeTest.c | 149 + thirdparty/linux/ntl/src/GF2XVec.c | 94 + thirdparty/linux/ntl/src/G_LLL_FP.c | 1570 ++++ thirdparty/linux/ntl/src/G_LLL_QP.c | 2063 +++++ thirdparty/linux/ntl/src/G_LLL_RR.c | 1367 ++++ thirdparty/linux/ntl/src/G_LLL_XD.c | 1320 +++ thirdparty/linux/ntl/src/GetPID.c | 9 + thirdparty/linux/ntl/src/GetPID1.c | 9 + thirdparty/linux/ntl/src/GetPID2.c | 6 + thirdparty/linux/ntl/src/GetTime.c | 19 + thirdparty/linux/ntl/src/GetTime0.c | 126 + thirdparty/linux/ntl/src/GetTime1.c | 19 + thirdparty/linux/ntl/src/GetTime2.c | 21 + thirdparty/linux/ntl/src/GetTime3.c | 18 + thirdparty/linux/ntl/src/GetTime4.c | 35 + thirdparty/linux/ntl/src/GetTime5.c | 7 + thirdparty/linux/ntl/src/HNF.c | 128 + thirdparty/linux/ntl/src/InitSettings.c | 169 + thirdparty/linux/ntl/src/LLL.c | 706 ++ thirdparty/linux/ntl/src/LLLTest.c | 139 + thirdparty/linux/ntl/src/LLLTestIn | 13 + thirdparty/linux/ntl/src/LLLTestOut | 28 + thirdparty/linux/ntl/src/LLL_FP.c | 1692 ++++ thirdparty/linux/ntl/src/LLL_QP.c | 1994 +++++ thirdparty/linux/ntl/src/LLL_RR.c | 1357 ++++ thirdparty/linux/ntl/src/LLL_XD.c | 1227 +++ thirdparty/linux/ntl/src/MakeCheckFeature | 26 + thirdparty/linux/ntl/src/MakeDesc.c | 1206 +++ thirdparty/linux/ntl/src/MakeDescAux.c | 56 + thirdparty/linux/ntl/src/MakeGetPID | 26 + thirdparty/linux/ntl/src/MakeGetTime | 83 + thirdparty/linux/ntl/src/MatrixTest.c | 58 + thirdparty/linux/ntl/src/MatrixTestIn | 13 + thirdparty/linux/ntl/src/MatrixTestOut | 35 + thirdparty/linux/ntl/src/MoreFacTest.c | 66 + thirdparty/linux/ntl/src/MoreFacTestIn | 1161 +++ thirdparty/linux/ntl/src/MulTimeTest.c | 161 + thirdparty/linux/ntl/src/NOTES | 49 + thirdparty/linux/ntl/src/Poly1TimeTest.c | 237 + thirdparty/linux/ntl/src/Poly2TimeTest.c | 186 + thirdparty/linux/ntl/src/Poly3TimeTest.c | 188 + thirdparty/linux/ntl/src/QuadTest.c | 108 + thirdparty/linux/ntl/src/QuadTestIn | 2 + thirdparty/linux/ntl/src/QuadTestOut | 18 + thirdparty/linux/ntl/src/QuickTest.c | 478 ++ thirdparty/linux/ntl/src/RR.c | 2123 +++++ thirdparty/linux/ntl/src/RRTest.c | 27 + thirdparty/linux/ntl/src/RRTestIn | 22 + thirdparty/linux/ntl/src/RRTestOut | 3 + thirdparty/linux/ntl/src/RemoveProg | 12 + thirdparty/linux/ntl/src/ResetFeatures | 5 + thirdparty/linux/ntl/src/TestGetPID.c | 17 + thirdparty/linux/ntl/src/TestGetTime.c | 56 + thirdparty/linux/ntl/src/TestScript | 204 + thirdparty/linux/ntl/src/ThreadTest.c | 139 + thirdparty/linux/ntl/src/Timing.c | 194 + thirdparty/linux/ntl/src/VERSION_INFO | 1 + thirdparty/linux/ntl/src/WINDIR | 1 + thirdparty/linux/ntl/src/Wizard | 119 + thirdparty/linux/ntl/src/WizardAux | 360 + thirdparty/linux/ntl/src/WordVector.c | 398 + thirdparty/linux/ntl/src/ZZ.c | 2393 ++++++ thirdparty/linux/ntl/src/ZZVec.c | 85 + thirdparty/linux/ntl/src/ZZX.c | 972 +++ thirdparty/linux/ntl/src/ZZX1.c | 2715 +++++++ thirdparty/linux/ntl/src/ZZXCharPoly.c | 79 + thirdparty/linux/ntl/src/ZZXFacTest.c | 77 + thirdparty/linux/ntl/src/ZZXFacTestIn | 159 + thirdparty/linux/ntl/src/ZZXFacTestOut | 2 + thirdparty/linux/ntl/src/ZZXFactoring.c | 3817 +++++++++ thirdparty/linux/ntl/src/ZZ_p.c | 365 + thirdparty/linux/ntl/src/ZZ_pE.c | 147 + thirdparty/linux/ntl/src/ZZ_pEX.c | 3444 ++++++++ thirdparty/linux/ntl/src/ZZ_pEXFactoring.c | 1594 ++++ thirdparty/linux/ntl/src/ZZ_pEXTest.c | 57 + thirdparty/linux/ntl/src/ZZ_pX.c | 4036 +++++++++ thirdparty/linux/ntl/src/ZZ_pX1.c | 2079 +++++ thirdparty/linux/ntl/src/ZZ_pXCharPoly.c | 77 + thirdparty/linux/ntl/src/ZZ_pXFactoring.c | 1916 +++++ thirdparty/linux/ntl/src/all | 0 thirdparty/linux/ntl/src/c_lip_impl.h | 6774 ++++++++++++++++ thirdparty/linux/ntl/src/cfile | 632 ++ thirdparty/linux/ntl/src/cfileout | 632 ++ thirdparty/linux/ntl/src/configure | 19 + thirdparty/linux/ntl/src/ctools.c | 92 + thirdparty/linux/ntl/src/def_makefile | 575 ++ thirdparty/linux/ntl/src/dosify | 67 + thirdparty/linux/ntl/src/fileio.c | 134 + thirdparty/linux/ntl/src/g_lip_impl.h | 7057 ++++++++++++++++ thirdparty/linux/ntl/src/gen_gmp_aux.c | 121 + thirdparty/linux/ntl/src/hfileout | 2 + thirdparty/linux/ntl/src/lip.c | 12 + thirdparty/linux/ntl/src/lzz_p.c | 402 + thirdparty/linux/ntl/src/lzz_pE.c | 149 + thirdparty/linux/ntl/src/lzz_pEX.c | 3445 ++++++++ thirdparty/linux/ntl/src/lzz_pEXFactoring.c | 1594 ++++ thirdparty/linux/ntl/src/lzz_pEXTest.c | 57 + thirdparty/linux/ntl/src/lzz_pX.c | 3330 ++++++++ thirdparty/linux/ntl/src/lzz_pX1.c | 2485 ++++++ thirdparty/linux/ntl/src/lzz_pXCharPoly.c | 77 + thirdparty/linux/ntl/src/lzz_pXFactoring.c | 1925 +++++ thirdparty/linux/ntl/src/mach_desc.win | 631 ++ thirdparty/linux/ntl/src/makefile | 575 ++ thirdparty/linux/ntl/src/mat_GF2.c | 755 ++ thirdparty/linux/ntl/src/mat_GF2E.c | 808 ++ thirdparty/linux/ntl/src/mat_RR.c | 680 ++ thirdparty/linux/ntl/src/mat_ZZ.c | 1337 +++ thirdparty/linux/ntl/src/mat_ZZ_p.c | 859 ++ thirdparty/linux/ntl/src/mat_ZZ_pE.c | 882 ++ thirdparty/linux/ntl/src/mat_lzz_p.c | 7191 +++++++++++++++++ thirdparty/linux/ntl/src/mat_lzz_pE.c | 884 ++ thirdparty/linux/ntl/src/mat_lzz_pTest.c | 269 + thirdparty/linux/ntl/src/mat_poly_ZZ.c | 116 + thirdparty/linux/ntl/src/mat_poly_ZZ_p.c | 88 + thirdparty/linux/ntl/src/mat_poly_lzz_p.c | 87 + thirdparty/linux/ntl/src/mfile | 575 ++ thirdparty/linux/ntl/src/mfileout | 575 ++ thirdparty/linux/ntl/src/newnames.c | 173 + thirdparty/linux/ntl/src/ppscript | 14 + thirdparty/linux/ntl/src/quad_float.c | 951 +++ thirdparty/linux/ntl/src/subset.c | 155 + thirdparty/linux/ntl/src/thread.c | 37 + thirdparty/linux/ntl/src/tools.c | 152 + thirdparty/linux/ntl/src/unixify | 30 + thirdparty/linux/ntl/src/vec_GF2.c | 650 ++ thirdparty/linux/ntl/src/vec_GF2E.c | 227 + thirdparty/linux/ntl/src/vec_RR.c | 143 + thirdparty/linux/ntl/src/vec_ZZ.c | 144 + thirdparty/linux/ntl/src/vec_ZZ_p.c | 249 + thirdparty/linux/ntl/src/vec_ZZ_pE.c | 179 + thirdparty/linux/ntl/src/vec_lzz_p.c | 316 + thirdparty/linux/ntl/src/vec_lzz_pE.c | 178 + thirdparty/linux/ntl/src/xdouble.c | 999 +++ 422 files changed, 187723 insertions(+), 6122 deletions(-) delete mode 100644 thirdparty/linux/mpir_patch/mpir.h delete mode 100644 thirdparty/linux/mpir_patch/mpirxx.h delete mode 100644 thirdparty/linux/nasm.get create mode 100644 thirdparty/linux/ntl/README create mode 100644 thirdparty/linux/ntl/doc/BasicThreadPool.cpp.html create mode 100644 thirdparty/linux/ntl/doc/BasicThreadPool.txt create mode 100644 thirdparty/linux/ntl/doc/GF2.cpp.html create mode 100644 thirdparty/linux/ntl/doc/GF2.txt create mode 100644 thirdparty/linux/ntl/doc/GF2E.cpp.html create mode 100644 thirdparty/linux/ntl/doc/GF2E.txt create mode 100644 thirdparty/linux/ntl/doc/GF2EX.cpp.html create mode 100644 thirdparty/linux/ntl/doc/GF2EX.txt create mode 100644 thirdparty/linux/ntl/doc/GF2EXFactoring.cpp.html create mode 100644 thirdparty/linux/ntl/doc/GF2EXFactoring.txt create mode 100644 thirdparty/linux/ntl/doc/GF2X.cpp.html create mode 100644 thirdparty/linux/ntl/doc/GF2X.txt create mode 100644 thirdparty/linux/ntl/doc/GF2XFactoring.cpp.html create mode 100644 thirdparty/linux/ntl/doc/GF2XFactoring.txt create mode 100644 thirdparty/linux/ntl/doc/GF2XVec.cpp.html create mode 100644 thirdparty/linux/ntl/doc/GF2XVec.txt create mode 100644 thirdparty/linux/ntl/doc/HNF.cpp.html create mode 100644 thirdparty/linux/ntl/doc/HNF.txt create mode 100644 thirdparty/linux/ntl/doc/LLL.cpp.html create mode 100644 thirdparty/linux/ntl/doc/LLL.txt create mode 100644 thirdparty/linux/ntl/doc/Lazy.cpp.html create mode 100644 thirdparty/linux/ntl/doc/Lazy.txt create mode 100644 thirdparty/linux/ntl/doc/LazyTable.cpp.html create mode 100644 thirdparty/linux/ntl/doc/LazyTable.txt create mode 100644 thirdparty/linux/ntl/doc/RR.cpp.html create mode 100644 thirdparty/linux/ntl/doc/RR.txt create mode 100644 thirdparty/linux/ntl/doc/SmartPtr.cpp.html create mode 100644 thirdparty/linux/ntl/doc/SmartPtr.txt create mode 100644 thirdparty/linux/ntl/doc/ZZ.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZ.txt create mode 100644 thirdparty/linux/ntl/doc/ZZVec.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZVec.txt create mode 100644 thirdparty/linux/ntl/doc/ZZX.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZX.txt create mode 100644 thirdparty/linux/ntl/doc/ZZXFactoring.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZXFactoring.txt create mode 100644 thirdparty/linux/ntl/doc/ZZ_p.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZ_p.txt create mode 100644 thirdparty/linux/ntl/doc/ZZ_pE.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZ_pE.txt create mode 100644 thirdparty/linux/ntl/doc/ZZ_pEX.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZ_pEX.txt create mode 100644 thirdparty/linux/ntl/doc/ZZ_pEXFactoring.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZ_pEXFactoring.txt create mode 100644 thirdparty/linux/ntl/doc/ZZ_pX.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZ_pX.txt create mode 100644 thirdparty/linux/ntl/doc/ZZ_pXFactoring.cpp.html create mode 100644 thirdparty/linux/ntl/doc/ZZ_pXFactoring.txt create mode 100644 thirdparty/linux/ntl/doc/arrow1.gif create mode 100644 thirdparty/linux/ntl/doc/arrow2.gif create mode 100644 thirdparty/linux/ntl/doc/arrow3.gif create mode 100644 thirdparty/linux/ntl/doc/config.txt create mode 100644 thirdparty/linux/ntl/doc/conversions.txt create mode 100644 thirdparty/linux/ntl/doc/copying.txt create mode 100644 thirdparty/linux/ntl/doc/flags.txt create mode 100644 thirdparty/linux/ntl/doc/lzz_p.cpp.html create mode 100644 thirdparty/linux/ntl/doc/lzz_p.txt create mode 100644 thirdparty/linux/ntl/doc/lzz_pE.cpp.html create mode 100644 thirdparty/linux/ntl/doc/lzz_pE.txt create mode 100644 thirdparty/linux/ntl/doc/lzz_pEX.cpp.html create mode 100644 thirdparty/linux/ntl/doc/lzz_pEX.txt create mode 100644 thirdparty/linux/ntl/doc/lzz_pEXFactoring.cpp.html create mode 100644 thirdparty/linux/ntl/doc/lzz_pEXFactoring.txt create mode 100644 thirdparty/linux/ntl/doc/lzz_pX.cpp.html create mode 100644 thirdparty/linux/ntl/doc/lzz_pX.txt create mode 100644 thirdparty/linux/ntl/doc/lzz_pXFactoring.cpp.html create mode 100644 thirdparty/linux/ntl/doc/lzz_pXFactoring.txt create mode 100644 thirdparty/linux/ntl/doc/mat_GF2.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_GF2.txt create mode 100644 thirdparty/linux/ntl/doc/mat_GF2E.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_GF2E.txt create mode 100644 thirdparty/linux/ntl/doc/mat_RR.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_RR.txt create mode 100644 thirdparty/linux/ntl/doc/mat_ZZ.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_ZZ.txt create mode 100644 thirdparty/linux/ntl/doc/mat_ZZ_p.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_ZZ_p.txt create mode 100644 thirdparty/linux/ntl/doc/mat_ZZ_pE.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_ZZ_pE.txt create mode 100644 thirdparty/linux/ntl/doc/mat_lzz_p.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_lzz_p.txt create mode 100644 thirdparty/linux/ntl/doc/mat_lzz_pE.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_lzz_pE.txt create mode 100644 thirdparty/linux/ntl/doc/mat_poly_ZZ.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_poly_ZZ.txt create mode 100644 thirdparty/linux/ntl/doc/mat_poly_ZZ_p.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_poly_ZZ_p.txt create mode 100644 thirdparty/linux/ntl/doc/mat_poly_lzz_p.cpp.html create mode 100644 thirdparty/linux/ntl/doc/mat_poly_lzz_p.txt create mode 100644 thirdparty/linux/ntl/doc/matrix.cpp.html create mode 100644 thirdparty/linux/ntl/doc/matrix.txt create mode 100644 thirdparty/linux/ntl/doc/names.txt create mode 100644 thirdparty/linux/ntl/doc/pair.cpp.html create mode 100644 thirdparty/linux/ntl/doc/pair.txt create mode 100644 thirdparty/linux/ntl/doc/quad_float.cpp.html create mode 100644 thirdparty/linux/ntl/doc/quad_float.txt create mode 100644 thirdparty/linux/ntl/doc/sedscript.txt create mode 100644 thirdparty/linux/ntl/doc/tools.cpp.html create mode 100644 thirdparty/linux/ntl/doc/tools.txt create mode 100644 thirdparty/linux/ntl/doc/tour-ack.html create mode 100644 thirdparty/linux/ntl/doc/tour-changes.html create mode 100644 thirdparty/linux/ntl/doc/tour-ex1.html create mode 100644 thirdparty/linux/ntl/doc/tour-ex2.html create mode 100644 thirdparty/linux/ntl/doc/tour-ex3.html create mode 100644 thirdparty/linux/ntl/doc/tour-ex4.html create mode 100644 thirdparty/linux/ntl/doc/tour-ex5.html create mode 100644 thirdparty/linux/ntl/doc/tour-ex6.html create mode 100644 thirdparty/linux/ntl/doc/tour-ex7.html create mode 100644 thirdparty/linux/ntl/doc/tour-examples.html create mode 100644 thirdparty/linux/ntl/doc/tour-gf2x.html create mode 100644 thirdparty/linux/ntl/doc/tour-gmp.html create mode 100644 thirdparty/linux/ntl/doc/tour-impl.html create mode 100644 thirdparty/linux/ntl/doc/tour-intro.html create mode 100644 thirdparty/linux/ntl/doc/tour-modules.html create mode 100644 thirdparty/linux/ntl/doc/tour-roadmap.html create mode 100644 thirdparty/linux/ntl/doc/tour-struct.html create mode 100644 thirdparty/linux/ntl/doc/tour-time.html create mode 100644 thirdparty/linux/ntl/doc/tour-tips.html create mode 100644 thirdparty/linux/ntl/doc/tour-unix.html create mode 100644 thirdparty/linux/ntl/doc/tour-win.html create mode 100644 thirdparty/linux/ntl/doc/tour.html create mode 100644 thirdparty/linux/ntl/doc/vec_GF2.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vec_GF2.txt create mode 100644 thirdparty/linux/ntl/doc/vec_GF2E.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vec_GF2E.txt create mode 100644 thirdparty/linux/ntl/doc/vec_RR.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vec_RR.txt create mode 100644 thirdparty/linux/ntl/doc/vec_ZZ.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vec_ZZ.txt create mode 100644 thirdparty/linux/ntl/doc/vec_ZZ_p.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vec_ZZ_p.txt create mode 100644 thirdparty/linux/ntl/doc/vec_ZZ_pE.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vec_ZZ_pE.txt create mode 100644 thirdparty/linux/ntl/doc/vec_lzz_p.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vec_lzz_p.txt create mode 100644 thirdparty/linux/ntl/doc/vec_lzz_pE.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vec_lzz_pE.txt create mode 100644 thirdparty/linux/ntl/doc/vector.cpp.html create mode 100644 thirdparty/linux/ntl/doc/vector.txt create mode 100644 thirdparty/linux/ntl/doc/version.cpp.html create mode 100644 thirdparty/linux/ntl/doc/version.txt create mode 100644 thirdparty/linux/ntl/doc/xdouble.cpp.html create mode 100644 thirdparty/linux/ntl/doc/xdouble.txt create mode 100644 thirdparty/linux/ntl/include/NTL/BasicThreadPool.h create mode 100644 thirdparty/linux/ntl/include/NTL/FFT.h create mode 100644 thirdparty/linux/ntl/include/NTL/FacVec.h create mode 100644 thirdparty/linux/ntl/include/NTL/GF2.h create mode 100644 thirdparty/linux/ntl/include/NTL/GF2E.h create mode 100644 thirdparty/linux/ntl/include/NTL/GF2EX.h create mode 100644 thirdparty/linux/ntl/include/NTL/GF2EXFactoring.h create mode 100644 thirdparty/linux/ntl/include/NTL/GF2X.h create mode 100644 thirdparty/linux/ntl/include/NTL/GF2XFactoring.h create mode 100644 thirdparty/linux/ntl/include/NTL/GF2XVec.h create mode 100644 thirdparty/linux/ntl/include/NTL/HAVE_AVX.h create mode 100644 thirdparty/linux/ntl/include/NTL/HAVE_BUILTIN_CLZL.h create mode 100644 thirdparty/linux/ntl/include/NTL/HAVE_FMA.h create mode 100644 thirdparty/linux/ntl/include/NTL/HAVE_LL_TYPE.h create mode 100644 thirdparty/linux/ntl/include/NTL/HNF.h create mode 100644 thirdparty/linux/ntl/include/NTL/LLL.h create mode 100644 thirdparty/linux/ntl/include/NTL/Lazy.h create mode 100644 thirdparty/linux/ntl/include/NTL/LazyTable.h create mode 100644 thirdparty/linux/ntl/include/NTL/RR.h create mode 100644 thirdparty/linux/ntl/include/NTL/SPMM_ASM.h create mode 100644 thirdparty/linux/ntl/include/NTL/SmartPtr.h create mode 100644 thirdparty/linux/ntl/include/NTL/WordVector.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZ.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZVec.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZX.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZXFactoring.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZ_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZ_pE.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZ_pEX.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZ_pEXFactoring.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZ_pX.h create mode 100644 thirdparty/linux/ntl/include/NTL/ZZ_pXFactoring.h create mode 100644 thirdparty/linux/ntl/include/NTL/c_lip.h create mode 100644 thirdparty/linux/ntl/include/NTL/config.h create mode 100644 thirdparty/linux/ntl/include/NTL/config_log.h create mode 100644 thirdparty/linux/ntl/include/NTL/ctools.h create mode 100644 thirdparty/linux/ntl/include/NTL/def_config.h create mode 100644 thirdparty/linux/ntl/include/NTL/fileio.h create mode 100644 thirdparty/linux/ntl/include/NTL/g_lip.h create mode 100644 thirdparty/linux/ntl/include/NTL/gmp_aux.h create mode 100644 thirdparty/linux/ntl/include/NTL/lip.h create mode 100644 thirdparty/linux/ntl/include/NTL/lzz_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/lzz_pE.h create mode 100644 thirdparty/linux/ntl/include/NTL/lzz_pEX.h create mode 100644 thirdparty/linux/ntl/include/NTL/lzz_pEXFactoring.h create mode 100644 thirdparty/linux/ntl/include/NTL/lzz_pX.h create mode 100644 thirdparty/linux/ntl/include/NTL/lzz_pXFactoring.h create mode 100644 thirdparty/linux/ntl/include/NTL/mach_desc.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_GF2.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_GF2E.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_RR.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_ZZ.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_ZZ_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_ZZ_pE.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_lzz_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_lzz_pE.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_poly_ZZ.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_poly_ZZ_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/mat_poly_lzz_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/matrix.h create mode 100644 thirdparty/linux/ntl/include/NTL/new.h create mode 100644 thirdparty/linux/ntl/include/NTL/pair.h create mode 100644 thirdparty/linux/ntl/include/NTL/pair_GF2EX_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/pair_GF2X_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/pair_ZZX_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/pair_ZZ_pEX_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/pair_ZZ_pX_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/pair_lzz_pEX_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/pair_lzz_pX_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/quad_float.h create mode 100644 thirdparty/linux/ntl/include/NTL/sp_arith.h create mode 100644 thirdparty/linux/ntl/include/NTL/thread.h create mode 100644 thirdparty/linux/ntl/include/NTL/tools.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_GF2.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_GF2E.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_GF2XVec.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_RR.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_ZZ.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_ZZVec.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_ZZ_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_ZZ_pE.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_double.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_lzz_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_lzz_pE.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_quad_float.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_ulong.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_GF2.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_GF2E.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_RR.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_ZZ.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_ZZ_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_ZZ_pE.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_long.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_lzz_p.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_lzz_pE.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_vec_ulong.h create mode 100644 thirdparty/linux/ntl/include/NTL/vec_xdouble.h create mode 100644 thirdparty/linux/ntl/include/NTL/vector.h create mode 100644 thirdparty/linux/ntl/include/NTL/version.h create mode 100644 thirdparty/linux/ntl/include/NTL/wizard_log.h create mode 100644 thirdparty/linux/ntl/include/NTL/xdouble.h create mode 100644 thirdparty/linux/ntl/src/BasicThreadPool.c create mode 100644 thirdparty/linux/ntl/src/BerlekampTest.c create mode 100644 thirdparty/linux/ntl/src/BerlekampTestIn create mode 100644 thirdparty/linux/ntl/src/BerlekampTestOut create mode 100644 thirdparty/linux/ntl/src/BitMatTest.c create mode 100644 thirdparty/linux/ntl/src/CanZassTest.c create mode 100644 thirdparty/linux/ntl/src/CanZassTestIn create mode 100644 thirdparty/linux/ntl/src/CanZassTestOut create mode 100644 thirdparty/linux/ntl/src/CharPolyTest.c create mode 100644 thirdparty/linux/ntl/src/CharPolyTestIn create mode 100644 thirdparty/linux/ntl/src/CharPolyTestOut create mode 100644 thirdparty/linux/ntl/src/CheckAVX.c create mode 100644 thirdparty/linux/ntl/src/CheckCLZL.c create mode 100644 thirdparty/linux/ntl/src/CheckCLZLAux.c create mode 100644 thirdparty/linux/ntl/src/CheckCompile.c create mode 100644 thirdparty/linux/ntl/src/CheckFMA.c create mode 100644 thirdparty/linux/ntl/src/CheckFeature.log create mode 100644 thirdparty/linux/ntl/src/CheckFlag.log create mode 100644 thirdparty/linux/ntl/src/CheckLL.c create mode 100644 thirdparty/linux/ntl/src/CheckLLAux.c create mode 100644 thirdparty/linux/ntl/src/CopyFeatures create mode 100644 thirdparty/linux/ntl/src/DIRNAME create mode 100644 thirdparty/linux/ntl/src/DispSettings.c create mode 100644 thirdparty/linux/ntl/src/DoConfig create mode 100644 thirdparty/linux/ntl/src/ExceptionTest.c create mode 100644 thirdparty/linux/ntl/src/FFT.c create mode 100644 thirdparty/linux/ntl/src/FacVec.c create mode 100644 thirdparty/linux/ntl/src/GF2.c create mode 100644 thirdparty/linux/ntl/src/GF2E.c create mode 100644 thirdparty/linux/ntl/src/GF2EX.c create mode 100644 thirdparty/linux/ntl/src/GF2EXFactoring.c create mode 100644 thirdparty/linux/ntl/src/GF2EXTest.c create mode 100644 thirdparty/linux/ntl/src/GF2X.c create mode 100644 thirdparty/linux/ntl/src/GF2X1.c create mode 100644 thirdparty/linux/ntl/src/GF2XFactoring.c create mode 100644 thirdparty/linux/ntl/src/GF2XTest.c create mode 100644 thirdparty/linux/ntl/src/GF2XTimeTest.c create mode 100644 thirdparty/linux/ntl/src/GF2XVec.c create mode 100644 thirdparty/linux/ntl/src/G_LLL_FP.c create mode 100644 thirdparty/linux/ntl/src/G_LLL_QP.c create mode 100644 thirdparty/linux/ntl/src/G_LLL_RR.c create mode 100644 thirdparty/linux/ntl/src/G_LLL_XD.c create mode 100644 thirdparty/linux/ntl/src/GetPID.c create mode 100644 thirdparty/linux/ntl/src/GetPID1.c create mode 100644 thirdparty/linux/ntl/src/GetPID2.c create mode 100644 thirdparty/linux/ntl/src/GetTime.c create mode 100644 thirdparty/linux/ntl/src/GetTime0.c create mode 100644 thirdparty/linux/ntl/src/GetTime1.c create mode 100644 thirdparty/linux/ntl/src/GetTime2.c create mode 100644 thirdparty/linux/ntl/src/GetTime3.c create mode 100644 thirdparty/linux/ntl/src/GetTime4.c create mode 100644 thirdparty/linux/ntl/src/GetTime5.c create mode 100644 thirdparty/linux/ntl/src/HNF.c create mode 100644 thirdparty/linux/ntl/src/InitSettings.c create mode 100644 thirdparty/linux/ntl/src/LLL.c create mode 100644 thirdparty/linux/ntl/src/LLLTest.c create mode 100644 thirdparty/linux/ntl/src/LLLTestIn create mode 100644 thirdparty/linux/ntl/src/LLLTestOut create mode 100644 thirdparty/linux/ntl/src/LLL_FP.c create mode 100644 thirdparty/linux/ntl/src/LLL_QP.c create mode 100644 thirdparty/linux/ntl/src/LLL_RR.c create mode 100644 thirdparty/linux/ntl/src/LLL_XD.c create mode 100644 thirdparty/linux/ntl/src/MakeCheckFeature create mode 100644 thirdparty/linux/ntl/src/MakeDesc.c create mode 100644 thirdparty/linux/ntl/src/MakeDescAux.c create mode 100644 thirdparty/linux/ntl/src/MakeGetPID create mode 100644 thirdparty/linux/ntl/src/MakeGetTime create mode 100644 thirdparty/linux/ntl/src/MatrixTest.c create mode 100644 thirdparty/linux/ntl/src/MatrixTestIn create mode 100644 thirdparty/linux/ntl/src/MatrixTestOut create mode 100644 thirdparty/linux/ntl/src/MoreFacTest.c create mode 100644 thirdparty/linux/ntl/src/MoreFacTestIn create mode 100644 thirdparty/linux/ntl/src/MulTimeTest.c create mode 100644 thirdparty/linux/ntl/src/NOTES create mode 100644 thirdparty/linux/ntl/src/Poly1TimeTest.c create mode 100644 thirdparty/linux/ntl/src/Poly2TimeTest.c create mode 100644 thirdparty/linux/ntl/src/Poly3TimeTest.c create mode 100644 thirdparty/linux/ntl/src/QuadTest.c create mode 100644 thirdparty/linux/ntl/src/QuadTestIn create mode 100644 thirdparty/linux/ntl/src/QuadTestOut create mode 100644 thirdparty/linux/ntl/src/QuickTest.c create mode 100644 thirdparty/linux/ntl/src/RR.c create mode 100644 thirdparty/linux/ntl/src/RRTest.c create mode 100644 thirdparty/linux/ntl/src/RRTestIn create mode 100644 thirdparty/linux/ntl/src/RRTestOut create mode 100644 thirdparty/linux/ntl/src/RemoveProg create mode 100644 thirdparty/linux/ntl/src/ResetFeatures create mode 100644 thirdparty/linux/ntl/src/TestGetPID.c create mode 100644 thirdparty/linux/ntl/src/TestGetTime.c create mode 100644 thirdparty/linux/ntl/src/TestScript create mode 100644 thirdparty/linux/ntl/src/ThreadTest.c create mode 100644 thirdparty/linux/ntl/src/Timing.c create mode 100644 thirdparty/linux/ntl/src/VERSION_INFO create mode 100644 thirdparty/linux/ntl/src/WINDIR create mode 100644 thirdparty/linux/ntl/src/Wizard create mode 100644 thirdparty/linux/ntl/src/WizardAux create mode 100644 thirdparty/linux/ntl/src/WordVector.c create mode 100644 thirdparty/linux/ntl/src/ZZ.c create mode 100644 thirdparty/linux/ntl/src/ZZVec.c create mode 100644 thirdparty/linux/ntl/src/ZZX.c create mode 100644 thirdparty/linux/ntl/src/ZZX1.c create mode 100644 thirdparty/linux/ntl/src/ZZXCharPoly.c create mode 100644 thirdparty/linux/ntl/src/ZZXFacTest.c create mode 100644 thirdparty/linux/ntl/src/ZZXFacTestIn create mode 100644 thirdparty/linux/ntl/src/ZZXFacTestOut create mode 100644 thirdparty/linux/ntl/src/ZZXFactoring.c create mode 100644 thirdparty/linux/ntl/src/ZZ_p.c create mode 100644 thirdparty/linux/ntl/src/ZZ_pE.c create mode 100644 thirdparty/linux/ntl/src/ZZ_pEX.c create mode 100644 thirdparty/linux/ntl/src/ZZ_pEXFactoring.c create mode 100644 thirdparty/linux/ntl/src/ZZ_pEXTest.c create mode 100644 thirdparty/linux/ntl/src/ZZ_pX.c create mode 100644 thirdparty/linux/ntl/src/ZZ_pX1.c create mode 100644 thirdparty/linux/ntl/src/ZZ_pXCharPoly.c create mode 100644 thirdparty/linux/ntl/src/ZZ_pXFactoring.c create mode 100644 thirdparty/linux/ntl/src/all create mode 100644 thirdparty/linux/ntl/src/c_lip_impl.h create mode 100644 thirdparty/linux/ntl/src/cfile create mode 100644 thirdparty/linux/ntl/src/cfileout create mode 100644 thirdparty/linux/ntl/src/configure create mode 100644 thirdparty/linux/ntl/src/ctools.c create mode 100644 thirdparty/linux/ntl/src/def_makefile create mode 100644 thirdparty/linux/ntl/src/dosify create mode 100644 thirdparty/linux/ntl/src/fileio.c create mode 100644 thirdparty/linux/ntl/src/g_lip_impl.h create mode 100644 thirdparty/linux/ntl/src/gen_gmp_aux.c create mode 100644 thirdparty/linux/ntl/src/hfileout create mode 100644 thirdparty/linux/ntl/src/lip.c create mode 100644 thirdparty/linux/ntl/src/lzz_p.c create mode 100644 thirdparty/linux/ntl/src/lzz_pE.c create mode 100644 thirdparty/linux/ntl/src/lzz_pEX.c create mode 100644 thirdparty/linux/ntl/src/lzz_pEXFactoring.c create mode 100644 thirdparty/linux/ntl/src/lzz_pEXTest.c create mode 100644 thirdparty/linux/ntl/src/lzz_pX.c create mode 100644 thirdparty/linux/ntl/src/lzz_pX1.c create mode 100644 thirdparty/linux/ntl/src/lzz_pXCharPoly.c create mode 100644 thirdparty/linux/ntl/src/lzz_pXFactoring.c create mode 100644 thirdparty/linux/ntl/src/mach_desc.win create mode 100644 thirdparty/linux/ntl/src/makefile create mode 100644 thirdparty/linux/ntl/src/mat_GF2.c create mode 100644 thirdparty/linux/ntl/src/mat_GF2E.c create mode 100644 thirdparty/linux/ntl/src/mat_RR.c create mode 100644 thirdparty/linux/ntl/src/mat_ZZ.c create mode 100644 thirdparty/linux/ntl/src/mat_ZZ_p.c create mode 100644 thirdparty/linux/ntl/src/mat_ZZ_pE.c create mode 100644 thirdparty/linux/ntl/src/mat_lzz_p.c create mode 100644 thirdparty/linux/ntl/src/mat_lzz_pE.c create mode 100644 thirdparty/linux/ntl/src/mat_lzz_pTest.c create mode 100644 thirdparty/linux/ntl/src/mat_poly_ZZ.c create mode 100644 thirdparty/linux/ntl/src/mat_poly_ZZ_p.c create mode 100644 thirdparty/linux/ntl/src/mat_poly_lzz_p.c create mode 100644 thirdparty/linux/ntl/src/mfile create mode 100644 thirdparty/linux/ntl/src/mfileout create mode 100644 thirdparty/linux/ntl/src/newnames.c create mode 100644 thirdparty/linux/ntl/src/ppscript create mode 100644 thirdparty/linux/ntl/src/quad_float.c create mode 100644 thirdparty/linux/ntl/src/subset.c create mode 100644 thirdparty/linux/ntl/src/thread.c create mode 100644 thirdparty/linux/ntl/src/tools.c create mode 100644 thirdparty/linux/ntl/src/unixify create mode 100644 thirdparty/linux/ntl/src/vec_GF2.c create mode 100644 thirdparty/linux/ntl/src/vec_GF2E.c create mode 100644 thirdparty/linux/ntl/src/vec_RR.c create mode 100644 thirdparty/linux/ntl/src/vec_ZZ.c create mode 100644 thirdparty/linux/ntl/src/vec_ZZ_p.c create mode 100644 thirdparty/linux/ntl/src/vec_ZZ_pE.c create mode 100644 thirdparty/linux/ntl/src/vec_lzz_p.c create mode 100644 thirdparty/linux/ntl/src/vec_lzz_pE.c create mode 100644 thirdparty/linux/ntl/src/xdouble.c diff --git a/.vs/nPSI/v16/Solution.VC.db-shm b/.vs/nPSI/v16/Solution.VC.db-shm index 85d30e60523da8d0d8dd7d568142c5e73db308b3..bbdf749b5060e3f0e0c0a7a5ff26f3e6f53fdf8c 100644 GIT binary patch delta 1209 zcmb7?YfzMB6vv-)9@f=$w~MYb4Gk)@6(KegT^Og@z=2#BT|h2{6D3N?$dn=us9V5( z%*M(%t63SCU;!E}yoN|>np$Z@sHA3v3tdFIQOz_CGmib=xBbx9-kIN?-?=^e%)1*3 zaVW&~w~XAS%qI?zQa6pWQaO1RhPuWXo`n&v!O*>--D?f=eOhvUOM z_xgXHSlzBRqoI`4*=BaOxE&5NGw)(EOLJt9DrNShxZB%M+5`53YhW6#=ms;uOfU<0 z<+MG0=}B}4KzyuoH)0xi5Ih7P2I(LJWP&X42v`7oU?K2>Y_JF{1_6*G*Wx_3e=T)m ztuf$exK|1@N~u{oGn=GvzFT+l7~E^L&aWc9g^wb+U-C0csl^(}&vNT7{#ll1PB)&< z+D!T-eie;1dLw^>m^^icTBaVQR+`Sck^5z0L4`K>7jr>^z7@0;)dXY0=Ok)?uGK;Q z7O_D3{b9Y2e}#KxSN1&GW)8~L>>)Y>OO&gNocd)J_!|8je}`Bre=n}kAMkJ1;WdFY z8bo!L^ab4dHU3zJYno-iktL^c=IK{BjHz2?c5X2ZVJb;><~rz|*i4=@=4I&Zd@Pp7 zOXHG3x`eqDY0P)fyRrHmQdLkwucPiU{w$~?ikc(Fj-olN5A#_hOC-LykcMGJa=f^2 z>VWmnP--jvC3xewPrI#OT~{BOv-Wc8p*y)79-t9?rWo5FKg0Arv|h%Z+(%!*5{)Cv zT8Vyu78!?^JBZ$Z`ej<#3OWT#(1+*%Vv-yzYu7*W@9<=4c(ugP)U-JiYUEh&bc)a#fi0d;FR8dv~q7k3K}X zNw4E0h{>)S)jevd%A*!?ag(_)C8*ZYF-qfJWC1CucI)@KAMTY%^_cG9lh#R*nsao) zJV7GsowO5{AQ#t%b%eh(XN`^xlZf_1J@$p_1U*xLhGTXFX delta 854 zcmb7B-Aj{U6hCLF<@RP_E5r+v7-YJlsfcvbM42_y%{gU}Inz>-5}Ww3nQk|Mg%{y5 z=ptm4!H|%-H^juEs0hTmFq7!Q=B~`Ez6jZko#%PiAJ7Budmhg34G_l z;~-8pe+aODOb1}&%lnm_Piv#^#!r9T?D;lmws>=sC6QZ8`Txt8=KK@BCXPF)lgejk zkzVN!&rTWjRO%0?t;+8b{+&Q_TU#mN43oRUNHLpLja2xMFXiN_)BsnNk=iN`sW+>} zkc$02^wX@ZDDxnAp$Q{+fHs^pl(cmT5(TQ;Se&OOs}+qLV7L*#Q@s+TI=d`E5AEfG zMBDatjz?&_Mv=xt%nj0?bDC9uZ5gBav|MZC1{aCDrKLJeneXs0RzP8gK82Ojq|=(Z zr_p5nD)Y@W>H3Ffk*}edH750E!w3pceb88_BbVZ)T#uU*P4vdIY)H)v?%^-8_=h<^ zz3T()aQ_YIoj8vL+~bgX|H6unp%87gJQYt+UwR|Lv`Kw&NsruyowD2IB=9-?XgNlU z?b8gKY5cN@(I|CXk;65N!aT%b1lO^dr+StaIyBzhPKAB*)P+uI7fyc$8}OB_95jD5 znW=R-0Yxl2N{OyGn}!XyVF3~l#5i7*W^>n)Y;uy~*F#*qknVKrWwUeLn_|UQTJMQ* zG#RqM0v>SSBUFS?-Q$};xz1Q`sc^<#P8S*WK{1>MCr;sBLE9aG8~7T_cp?sJ?$dS~ z?c0*dX#W~p7gBUUK7_suOo)=|%wUG7ZAw1_Z}AjMA7h^|r /* This is Bill Hart's fix, but I've applied it only */ -#include /* on Sun Studio */ -#endif - -#if defined (__cplusplus) - -#include /* for size_t */ -#include /* for std::istream, std::ostream, std::string */ -#include -#endif - - -/* Instantiated by configure. */ -#if ! defined (__GMP_WITHIN_CONFIGURE) -#define __GMP_BITS_PER_MP_LIMB 64 -#define GMP_LIMB_BITS 64 -#define GMP_NAIL_BITS 0 -#endif -#define GMP_NUMB_BITS (GMP_LIMB_BITS - GMP_NAIL_BITS) -#define GMP_NUMB_MASK ((~ __GMP_CAST (mp_limb_t, 0)) >> GMP_NAIL_BITS) -#define GMP_NUMB_MAX GMP_NUMB_MASK -#define GMP_NAIL_MASK (~ GMP_NUMB_MASK) - - -#ifndef __GNU_MP__ -#define __GNU_MP__ 4 - -#define __need_size_t /* tell gcc stddef.h we only want size_t */ -#if ! defined (__cplusplus) -#include /* for size_t */ -#endif -#undef __need_size_t - -/* Instantiated by configure. */ -#if ! defined (__GMP_WITHIN_CONFIGURE) -/* #undef _LONG_LONG_LIMB */ -#define __GMP_LIBGMP_DLL 0 -#endif - -/* #if defined(__GMP_WITHIN_CONFIGURE) && defined(_WIN64) */ -#ifdef __WIN64 -#define _LONG_LONG_LIMB 1 -#endif - -/* __STDC__ - some ANSI compilers define this only to 0, hence the use of - "defined" and not "__STDC__-0". In particular Sun workshop C 5.0 - sets __STDC__ to 0, but requires "##" for token pasting. - - _AIX - gnu ansidecl.h asserts that all known AIX compilers are ANSI but - don't always define __STDC__. - - __DECC - current versions of DEC C (5.9 for instance) for alpha are ANSI, - but don't define __STDC__ in their default mode. Don't know if old - versions might have been K&R, but let's not worry about that unless - someone is still using one. - - _mips - gnu ansidecl.h says the RISC/OS MIPS compiler is ANSI in SVR4 - mode, but doesn't define __STDC__. - - _MSC_VER - Microsoft C is ANSI, but __STDC__ is undefined unless the /Za - option is given (in which case it's 1). - - _WIN32 - tested for by gnu ansidecl.h, no doubt on the assumption that - all w32 compilers are ansi. - - Note: This same set of tests is used by gen-psqr.c and - demos/expr/expr-impl.h, so if anything needs adding, then be sure to - update those too. */ - -#if defined (__STDC__) \ - || defined (__cplusplus) \ - || defined (_AIX) \ - || defined (__DECC) \ - || (defined (__mips) && defined (_SYSTYPE_SVR4)) \ - || defined (_MSC_VER) \ - || defined (_WIN32) -#define __GMP_HAVE_CONST 1 -#define __GMP_HAVE_PROTOTYPES 1 -#define __GMP_HAVE_TOKEN_PASTE 1 -#else -#define __GMP_HAVE_CONST 0 -#define __GMP_HAVE_PROTOTYPES 0 -#define __GMP_HAVE_TOKEN_PASTE 0 -#endif - - -#if __GMP_HAVE_CONST -#define __gmp_const const -#define __gmp_signed signed -#else -#define __gmp_const -#define __gmp_signed -#endif - - -/* __GMP_DECLSPEC supports Windows DLL versions of libmpir, and is empty in - all other circumstances. - - When compiling objects for libmpir, __GMP_DECLSPEC is an export directive, - or when compiling for an application it's an import directive. The two - cases are differentiated by __GMP_WITHIN_GMP defined by the GMP Makefiles - (and not defined from an application). - - __GMP_DECLSPEC_XX is similarly used for libmpirxx. __GMP_WITHIN_GMPXX - indicates when building libmpirxx, and in that case libmpirxx functions are - exports, but libmpir functions which might get called are imports. - - libmp.la uses __GMP_DECLSPEC, just as if it were libmpir.la. libmpir and - libmp don't call each other, so there's no conflict or confusion. - - Libtool DLL_EXPORT define is not used. - - There's no attempt to support GMP built both static and DLL. Doing so - would mean applications would have to tell us which of the two is going - to be used when linking, and that seems very tedious and error prone if - using GMP by hand, and equally tedious from a package since autoconf and - automake don't give much help. - - __GMP_DECLSPEC is required on all documented global functions and - variables, the various internals in gmp-impl.h etc can be left unadorned. - But internals used by the test programs or speed measuring programs - should have __GMP_DECLSPEC, and certainly constants or variables must - have it or the wrong address will be resolved. - - In gcc __declspec can go at either the start or end of a prototype. - - In Microsoft C __declspec must go at the start, or after the type like - void __declspec(...) *foo()". There's no __dllexport or anything to - guard against someone foolish #defining dllexport. _export used to be - available, but no longer. - - In Borland C _export still exists, but needs to go after the type, like - "void _export foo();". Would have to change the __GMP_DECLSPEC syntax to - make use of that. Probably more trouble than it's worth. */ - -#if defined (__GNUC__) -#define __GMP_DECLSPEC_EXPORT __declspec(__dllexport__) -#define __GMP_DECLSPEC_IMPORT __declspec(__dllimport__) -#endif -#if defined (_MSC_VER) || defined (__BORLANDC__) -#define __GMP_DECLSPEC_EXPORT __declspec(dllexport) -#define __GMP_DECLSPEC_IMPORT __declspec(dllimport) -#endif -#ifdef __WATCOMC__ -#define __GMP_DECLSPEC_EXPORT __export -#define __GMP_DECLSPEC_IMPORT __import -#endif -#ifdef __IBMC__ -#define __GMP_DECLSPEC_EXPORT _Export -#define __GMP_DECLSPEC_IMPORT _Import -#endif - -#if defined( _MSC_VER ) -# if defined( MSC_BUILD_DLL ) -# define __GMP_LIBGMP_DLL 1 -# define __GMP_WITHIN_GMP 1 -# define __GMP_WITHIN_GMPXX 1 -# elif defined( MSC_USE_DLL ) -# define __GMP_LIBGMP_DLL 1 -# endif -#endif - -#if __GMP_LIBGMP_DLL -#if __GMP_WITHIN_GMP -/* compiling to go into a DLL libmpir */ -#define __GMP_DECLSPEC __GMP_DECLSPEC_EXPORT -#else -/* compiling to go into an application which will link to a DLL libmpir */ -#define __GMP_DECLSPEC __GMP_DECLSPEC_IMPORT -#endif -#else -/* all other cases */ -#define __GMP_DECLSPEC -#endif - - -#ifdef __GMP_SHORT_LIMB -typedef unsigned int mp_limb_t; -typedef int mp_limb_signed_t; -#else -#ifdef _LONG_LONG_LIMB -typedef unsigned long long int mp_limb_t; -typedef long long int mp_limb_signed_t; -#else -typedef unsigned long int mp_limb_t; -typedef long int mp_limb_signed_t; -#endif -#endif - -#ifdef _WIN64 -#define BITS_PER_UI BITS_PER_MP_LIMB -typedef mp_limb_t mpir_ui; -typedef mp_limb_signed_t mpir_si; -typedef mpir_ui mp_bitcnt_t; -#else -#define BITS_PER_UI BITS_PER_ULONG -typedef unsigned long mpir_ui; -typedef long mpir_si; -typedef mpir_ui mp_bitcnt_t; -#endif -#define GMP_UI_MAX ((mpir_ui)(~(mpir_ui)0)) -#define GMP_UI_HIBIT (GMP_UI_MAX ^ (GMP_UI_MAX >> 1)) -#define GMP_SI_MAX ((mpir_si)(GMP_UI_MAX ^ GMP_UI_HIBIT)) -#define GMP_SI_MIN ((mpir_si)GMP_UI_HIBIT) -#define __GMP_BITCNT_MAX (~(mp_bitcnt_t)0) - -/* For reference, note that the name __mpz_struct gets into C++ mangled - function names, which means although the "__" suggests an internal, we - must leave this name for binary compatibility. */ -typedef struct -{ - int _mp_alloc; /* Number of *limbs* allocated and pointed - to by the _mp_d field. */ - int _mp_size; /* abs(_mp_size) is the number of limbs the - last field points to. If _mp_size is - negative this is a negative number. */ - mp_limb_t *_mp_d; /* Pointer to the limbs. */ -} __mpz_struct; - -#endif /* __GNU_MP__ */ - -typedef __mpz_struct mpz_t[1]; - -typedef mp_limb_t * mp_ptr; -typedef __gmp_const mp_limb_t * mp_srcptr; -#if defined( _WIN64) -#define __GMP_MP_SIZE_T_INT 0 -typedef long long int mp_size_t; -typedef long int mp_exp_t; -#else -#define __GMP_MP_SIZE_T_INT 0 -typedef long int mp_size_t; -typedef long int mp_exp_t; -#endif - -typedef struct -{ - __mpz_struct _mp_num; - __mpz_struct _mp_den; -} __mpq_struct; - -typedef __mpq_struct mpq_t[1]; - -typedef struct -{ - int _mp_prec; /* Max precision, in number of `mp_limb_t's. - Set by mpf_init and modified by - mpf_set_prec. The area pointed to by the - _mp_d field contains `prec' + 1 limbs. */ - int _mp_size; /* abs(_mp_size) is the number of limbs the - last field points to. If _mp_size is - negative this is a negative number. */ - mp_exp_t _mp_exp; /* Exponent, in the base of `mp_limb_t'. */ - mp_limb_t *_mp_d; /* Pointer to the limbs. */ -} __mpf_struct; - -typedef __mpf_struct mpf_t[1]; - -/* Available random number generation algorithms. */ -typedef enum -{ - GMP_RAND_ALG_DEFAULT = 0, - GMP_RAND_ALG_LC = GMP_RAND_ALG_DEFAULT /* Linear congruential. */ -} gmp_randalg_t; - -/* Random state struct. */ -typedef struct -{ - mpz_t _mp_seed; /* _mp_d member points to state of the generator. */ - gmp_randalg_t _mp_alg; /* Currently unused. */ - union { - void *_mp_lc; /* Pointer to function pointers structure. */ - } _mp_algdata; -} __gmp_randstate_struct; -typedef __gmp_randstate_struct gmp_randstate_t[1]; - -/* Types for function declarations in gmp files. */ -/* ??? Should not pollute user name space with these ??? */ -typedef __gmp_const __mpz_struct *mpz_srcptr; -typedef __mpz_struct *mpz_ptr; -typedef __gmp_const __mpf_struct *mpf_srcptr; -typedef __mpf_struct *mpf_ptr; -typedef __gmp_const __mpq_struct *mpq_srcptr; -typedef __mpq_struct *mpq_ptr; - - -#if __GMP_LIBGMP_DLL -#if __GMP_WITHIN_GMPXX -/* compiling to go into a DLL libmpirxx */ -#define __GMP_DECLSPEC_XX __GMP_DECLSPEC_EXPORT -#else -/* compiling to go into a application which will link to a DLL libmpirxx */ -#define __GMP_DECLSPEC_XX __GMP_DECLSPEC_IMPORT -#endif -#else -/* all other cases */ -#define __GMP_DECLSPEC_XX -#endif - - -#if __GMP_HAVE_PROTOTYPES -#define __GMP_PROTO(x) x -#else -#define __GMP_PROTO(x) () -#endif - -#ifndef __MPN -#if __GMP_HAVE_TOKEN_PASTE -#define __MPN(x) __gmpn_##x -#else -#define __MPN(x) __gmpn_/**/x -#endif -#endif - -/* For reference, "defined(EOF)" cannot be used here. In g++ 2.95.4, - defines EOF but not FILE. */ -#if defined (FILE) \ - || defined (H_STDIO) \ - || defined (_H_STDIO) /* AIX */ \ - || defined (_STDIO_H) /* glibc, Sun, SCO */ \ - || defined (_STDIO_H_) /* BSD, OSF */ \ - || defined (__STDIO_H) /* Borland */ \ - || defined (__STDIO_H__) /* IRIX */ \ - || defined (_STDIO_INCLUDED) /* HPUX */ \ - || defined (_FILE_DEFINED) /* Microsoft */ \ - || defined (__STDIO__) /* Apple MPW MrC */ \ - || defined (_MSL_STDIO_H) /* Metrowerks */ \ - || defined (_STDIO_H_INCLUDED) /* QNX4 */ \ - || defined (_ISO_STDIO_ISO_H) /* Sun C++ */ -#define _GMP_H_HAVE_FILE 1 -#endif - -/* In ISO C, if a prototype involving "struct obstack *" is given without - that structure defined, then the struct is scoped down to just the - prototype, causing a conflict if it's subsequently defined for real. So - only give prototypes if we've got obstack.h. */ -#if defined (_OBSTACK_H) /* glibc */ -#define _GMP_H_HAVE_OBSTACK 1 -#endif - -/* The prototypes for gmp_vprintf etc are provided only if va_list is - available, via an application having included or . - Usually va_list is a typedef so can't be tested directly, but C99 - specifies that va_start is a macro (and it was normally a macro on past - systems too), so look for that. - - will define some sort of va_list for vprintf and vfprintf, but - let's not bother trying to use that since it's not standard and since - application uses for gmp_vprintf etc will almost certainly require the - whole or anyway. */ - -#ifdef va_start -#define _GMP_H_HAVE_VA_LIST 1 -#endif - -/* Test for gcc >= maj.min, as per __GNUC_PREREQ in glibc */ -#if defined (__GNUC__) && defined (__GNUC_MINOR__) -#define __GMP_GNUC_PREREQ(maj, min) \ - ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) -#else -#define __GMP_GNUC_PREREQ(maj, min) 0 -#endif - -/* "pure" is in gcc 2.96 and up, see "(gcc)Function Attributes". Basically - it means a function does nothing but examine its arguments and memory - (global or via arguments) to generate a return value, but changes nothing - and has no side-effects. __GMP_NO_ATTRIBUTE_CONST_PURE lets - tune/common.c etc turn this off when trying to write timing loops. */ -#if __GMP_GNUC_PREREQ (2,96) && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE) -#define __GMP_ATTRIBUTE_PURE __attribute__ ((__pure__)) -#else -#define __GMP_ATTRIBUTE_PURE -#endif - - -/* __GMP_CAST allows us to use static_cast in C++, so our macros are clean - to "g++ -Wold-style-cast". - - Casts in "extern inline" code within an extern "C" block don't induce - these warnings, so __GMP_CAST only needs to be used on documented - macros. */ - -#ifdef __cplusplus -#define __GMP_CAST(type, expr) (static_cast (expr)) -#else -#define __GMP_CAST(type, expr) ((type) (expr)) -#endif - - -/* An empty "throw ()" means the function doesn't throw any C++ exceptions, - this can save some stack frame info in applications. - - Currently it's given only on functions which never divide-by-zero etc, - don't allocate memory, and are expected to never need to allocate memory. - This leaves open the possibility of a C++ throw from a future GMP - exceptions scheme. - - mpz_set_ui etc are omitted to leave open the lazy allocation scheme - described in doc/tasks.html. mpz_get_d etc are omitted to leave open - exceptions for float overflows. - - Note that __GMP_NOTHROW must be given on any inlines the same as on their - prototypes (for g++ at least, where they're used together). Note also - that g++ 3.0 demands that __GMP_NOTHROW is before other attributes like - __GMP_ATTRIBUTE_PURE. */ - -#if defined (__cplusplus) -#define __GMP_NOTHROW throw () -#else -#define __GMP_NOTHROW -#endif - -/* PORTME: What other compilers have a useful "extern inline"? "static - inline" would be an acceptable substitute if the compiler (or linker) - discards unused statics. */ - -/* gcc has __inline__ in all modes, including strict ansi. Give a prototype - for an inline too, so as to correctly specify "dllimport" on windows, in - case the function is called rather than inlined. */ - -#ifdef __GNUC__ -#if defined(__APPLE_CC__) && (__APPLE_CC__ != 1) /* FSF GCC sets this flag to 1 on Apple machines */ - -#if ! (__APPLE_CC__ >= 5465 && __STDC_VERSION__ >= 199901L) -#define __GMP_EXTERN_INLINE extern __inline__ -#define __GMP_INLINE_PROTOTYPES 1 -#endif - -#else /*GNU CC*/ - -#if defined(__GNUC_STDC_INLINE__) || defined (__GNUC_GNU_INLINE__) -#define __GMP_EXTERN_INLINE extern __inline__ __attribute__((__gnu_inline__)) -#else -#define __GMP_EXTERN_INLINE extern __inline__ -#endif -#define __GMP_INLINE_PROTOTYPES 1 - -#endif -#endif - -/* DEC C (eg. version 5.9) supports "static __inline foo()", even in -std1 - strict ANSI mode. Inlining is done even when not optimizing (ie. -O0 - mode, which is the default), but an unnecessary local copy of foo is - emitted unless -O is used. "extern __inline" is accepted, but the - "extern" appears to be ignored, ie. it becomes a plain global function - but which is inlined within its file. Don't know if all old versions of - DEC C supported __inline, but as a start let's do the right thing for - current versions. */ -#ifdef __DECC -#define __GMP_EXTERN_INLINE static __inline -#endif - -/* SCO OpenUNIX 8 cc supports "static inline foo()" but not in -Xc strict - ANSI mode (__STDC__ is 1 in that mode). Inlining only actually takes - place under -O. Without -O "foo" seems to be emitted whether it's used - or not, which is wasteful. "extern inline foo()" isn't useful, the - "extern" is apparently ignored, so foo is inlined if possible but also - emitted as a global, which causes multiple definition errors when - building a shared libmpir. */ -#ifdef __SCO_VERSION__ -#if __SCO_VERSION__ > 400000000 && __STDC__ != 1 \ - && ! defined (__GMP_EXTERN_INLINE) -#define __GMP_EXTERN_INLINE static inline -#endif -#endif - -#if defined _MSC_VER -#define __GMP_EXTERN_INLINE static __inline -#endif - -/* C++ always has "inline" and since it's a normal feature the linker should - discard duplicate non-inlined copies, or if it doesn't then that's a - problem for everyone, not just GMP. */ -#if defined (__cplusplus) && ! defined (__GMP_EXTERN_INLINE) -#define __GMP_EXTERN_INLINE inline -#endif - -/* Don't do any inlining within a configure run, since if the compiler ends - up emitting copies of the code into the object file it can end up - demanding the various support routines (like mpn_popcount) for linking, - making the "alloca" test and perhaps others fail. And on hppa ia64 a - pre-release gcc 3.2 was seen not respecting the "extern" in "extern - __inline__", triggering this problem too. */ -#if defined (__GMP_WITHIN_CONFIGURE) && ! __GMP_WITHIN_CONFIGURE_INLINE -#undef __GMP_EXTERN_INLINE -#endif - -/* By default, don't give a prototype when there's going to be an inline - version. Note in particular that Cray C++ objects to the combination of - prototype and inline. */ -#ifdef __GMP_EXTERN_INLINE -#ifndef __GMP_INLINE_PROTOTYPES -#define __GMP_INLINE_PROTOTYPES 0 -#endif -#else -#define __GMP_INLINE_PROTOTYPES 1 -#endif - - -#define __GMP_ABS(x) ((x) >= 0 ? (x) : -(x)) -#define __GMP_MAX(h,i) ((h) > (i) ? (h) : (i)) - -/* __GMP_USHRT_MAX is not "~ (unsigned short) 0" because short is promoted - to int by "~". */ -#define __GMP_UINT_MAX (~ (unsigned) 0) -#define __GMP_ULONG_MAX (~ (unsigned long) 0) -#define __GMP_USHRT_MAX ((unsigned short) ~0) - - -/* __builtin_expect is in gcc 3.0, and not in 2.95. */ -#if __GMP_GNUC_PREREQ (3,0) -#define __GMP_LIKELY(cond) __builtin_expect ((cond) != 0, 1) -#define __GMP_UNLIKELY(cond) __builtin_expect ((cond) != 0, 0) -#else -#define __GMP_LIKELY(cond) (cond) -#define __GMP_UNLIKELY(cond) (cond) -#endif - -/* Allow direct user access to numerator and denominator of a mpq_t object. */ -#define mpq_numref(Q) (&((Q)->_mp_num)) -#define mpq_denref(Q) (&((Q)->_mp_den)) - - -#if defined (__cplusplus) -extern "C" { -using std::FILE; -#endif - -#define mp_set_memory_functions __gmp_set_memory_functions -__GMP_DECLSPEC void mp_set_memory_functions __GMP_PROTO ((void *(*) (size_t), - void *(*) (void *, size_t, size_t), - void (*) (void *, size_t))) __GMP_NOTHROW; - -#define mp_get_memory_functions __gmp_get_memory_functions -__GMP_DECLSPEC void mp_get_memory_functions __GMP_PROTO ((void *(**) (size_t), - void *(**) (void *, size_t, size_t), - void (**) (void *, size_t))) __GMP_NOTHROW; - -#define mp_bits_per_limb __gmp_bits_per_limb -__GMP_DECLSPEC extern __gmp_const int mp_bits_per_limb; - -#define gmp_errno __gmp_errno -__GMP_DECLSPEC extern int gmp_errno; - -#define gmp_version __gmp_version -__GMP_DECLSPEC extern __gmp_const char * __gmp_const gmp_version; - -#define mpir_version __mpir_version -__GMP_DECLSPEC extern __gmp_const char * __gmp_const mpir_version; - - -/**************** Random number routines. ****************/ - -#define gmp_randinit_default __gmp_randinit_default -__GMP_DECLSPEC void gmp_randinit_default __GMP_PROTO ((gmp_randstate_t)); - -#define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp -__GMP_DECLSPEC void gmp_randinit_lc_2exp __GMP_PROTO ((gmp_randstate_t, - mpz_srcptr, mpir_ui, - mp_bitcnt_t)); - -#define gmp_randinit_lc_2exp_size __gmp_randinit_lc_2exp_size -__GMP_DECLSPEC int gmp_randinit_lc_2exp_size __GMP_PROTO ((gmp_randstate_t, mp_bitcnt_t)); - -#define gmp_randinit_mt __gmp_randinit_mt -__GMP_DECLSPEC void gmp_randinit_mt __GMP_PROTO ((gmp_randstate_t)); - -#define gmp_randinit_set __gmp_randinit_set -__GMP_DECLSPEC void gmp_randinit_set __GMP_PROTO ((gmp_randstate_t, __gmp_const __gmp_randstate_struct *)); - -#define gmp_randseed __gmp_randseed -__GMP_DECLSPEC void gmp_randseed __GMP_PROTO ((gmp_randstate_t, mpz_srcptr)); - -#define gmp_randseed_ui __gmp_randseed_ui -__GMP_DECLSPEC void gmp_randseed_ui __GMP_PROTO ((gmp_randstate_t, mpir_ui)); - -#define gmp_randclear __gmp_randclear -__GMP_DECLSPEC void gmp_randclear __GMP_PROTO ((gmp_randstate_t)); - -#define gmp_urandomb_ui __gmp_urandomb_ui -__GMP_DECLSPEC mpir_ui gmp_urandomb_ui __GMP_PROTO ((gmp_randstate_t, mpir_ui)); - -#define gmp_urandomm_ui __gmp_urandomm_ui -__GMP_DECLSPEC mpir_ui gmp_urandomm_ui __GMP_PROTO ((gmp_randstate_t, mpir_ui)); - - -/**************** Formatted output routines. ****************/ - -#define gmp_asprintf __gmp_asprintf -__GMP_DECLSPEC int gmp_asprintf __GMP_PROTO ((char **, __gmp_const char *, ...)); - -#define gmp_fprintf __gmp_fprintf -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC int gmp_fprintf __GMP_PROTO ((FILE *, __gmp_const char *, ...)); -#endif - -#define gmp_obstack_printf __gmp_obstack_printf -#if defined (_GMP_H_HAVE_OBSTACK) -__GMP_DECLSPEC int gmp_obstack_printf __GMP_PROTO ((struct obstack *, __gmp_const char *, ...)); -#endif - -#define gmp_obstack_vprintf __gmp_obstack_vprintf -#if defined (_GMP_H_HAVE_OBSTACK) && defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_obstack_vprintf __GMP_PROTO ((struct obstack *, __gmp_const char *, va_list)); -#endif - -#define gmp_printf __gmp_printf -__GMP_DECLSPEC int gmp_printf __GMP_PROTO ((__gmp_const char *, ...)); - -#define gmp_snprintf __gmp_snprintf -__GMP_DECLSPEC int gmp_snprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, ...)); - -#define gmp_sprintf __gmp_sprintf -__GMP_DECLSPEC int gmp_sprintf __GMP_PROTO ((char *, __gmp_const char *, ...)); - -#define gmp_vasprintf __gmp_vasprintf -#if defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_vasprintf __GMP_PROTO ((char **, __gmp_const char *, va_list)); -#endif - -#define gmp_vfprintf __gmp_vfprintf -#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_vfprintf __GMP_PROTO ((FILE *, __gmp_const char *, va_list)); -#endif - -#define gmp_vprintf __gmp_vprintf -#if defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_vprintf __GMP_PROTO ((__gmp_const char *, va_list)); -#endif - -#define gmp_vsnprintf __gmp_vsnprintf -#if defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_vsnprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, va_list)); -#endif - -#define gmp_vsprintf __gmp_vsprintf -#if defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_vsprintf __GMP_PROTO ((char *, __gmp_const char *, va_list)); -#endif - - -/**************** Formatted input routines. ****************/ - -#define gmp_fscanf __gmp_fscanf -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC int gmp_fscanf __GMP_PROTO ((FILE *, __gmp_const char *, ...)); -#endif - -#define gmp_scanf __gmp_scanf -__GMP_DECLSPEC int gmp_scanf __GMP_PROTO ((__gmp_const char *, ...)); - -#define gmp_sscanf __gmp_sscanf -__GMP_DECLSPEC int gmp_sscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, ...)); - -#define gmp_vfscanf __gmp_vfscanf -#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_vfscanf __GMP_PROTO ((FILE *, __gmp_const char *, va_list)); -#endif - -#define gmp_vscanf __gmp_vscanf -#if defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_vscanf __GMP_PROTO ((__gmp_const char *, va_list)); -#endif - -#define gmp_vsscanf __gmp_vsscanf -#if defined (_GMP_H_HAVE_VA_LIST) -__GMP_DECLSPEC int gmp_vsscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, va_list)); -#endif - - -/**************** Integer (i.e. Z) routines. ****************/ - -#define _mpz_realloc __gmpz_realloc -#define mpz_realloc __gmpz_realloc -__GMP_DECLSPEC void *_mpz_realloc __GMP_PROTO ((mpz_ptr, mp_size_t)); - -#define mpz_abs __gmpz_abs -#define __GMP_MPZ_ABS_MIN_ALLOC(x,y) (__GMP_ABS(y->_mp_size)) -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_abs) -__GMP_DECLSPEC void mpz_abs __GMP_PROTO ((mpz_ptr, mpz_srcptr)); -#endif - -#define __GMP_MPZ_ADD_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(y->_mp_size),__GMP_ABS(z->_mp_size))+1) -#define mpz_add __gmpz_add -__GMP_DECLSPEC void mpz_add __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define __GMP_MPZ_ADD_UI_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(y->_mp_size),1+(GMP_BITS_PER_UI-1)/GMP_NUMB_BITS)+1) -#define mpz_add_ui __gmpz_add_ui -__GMP_DECLSPEC void mpz_add_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_addmul __gmpz_addmul -__GMP_DECLSPEC void mpz_addmul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_addmul_ui __gmpz_addmul_ui -__GMP_DECLSPEC void mpz_addmul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_and __gmpz_and -__GMP_DECLSPEC void mpz_and __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_array_init __gmpz_array_init -__GMP_DECLSPEC void mpz_array_init __GMP_PROTO ((mpz_ptr, mp_size_t, mp_size_t)); - -#define mpz_bin_ui __gmpz_bin_ui -__GMP_DECLSPEC void mpz_bin_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_bin_uiui __gmpz_bin_uiui -__GMP_DECLSPEC void mpz_bin_uiui __GMP_PROTO ((mpz_ptr, mpir_ui, mpir_ui)); - -#define mpz_cdiv_q __gmpz_cdiv_q -__GMP_DECLSPEC void mpz_cdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_cdiv_q_2exp __gmpz_cdiv_q_2exp -__GMP_DECLSPEC void mpz_cdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t)); - -#define mpz_cdiv_q_ui __gmpz_cdiv_q_ui -__GMP_DECLSPEC mpir_ui mpz_cdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_cdiv_qr __gmpz_cdiv_qr -__GMP_DECLSPEC void mpz_cdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui -__GMP_DECLSPEC mpir_ui mpz_cdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_cdiv_r __gmpz_cdiv_r -__GMP_DECLSPEC void mpz_cdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_cdiv_r_2exp __gmpz_cdiv_r_2exp -__GMP_DECLSPEC void mpz_cdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t)); - -#define mpz_cdiv_r_ui __gmpz_cdiv_r_ui -__GMP_DECLSPEC mpir_ui mpz_cdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_cdiv_ui __gmpz_cdiv_ui -__GMP_DECLSPEC mpir_ui mpz_cdiv_ui __GMP_PROTO ((mpz_srcptr, mpir_ui)) __GMP_ATTRIBUTE_PURE; - -#define mpz_clear __gmpz_clear -__GMP_DECLSPEC void mpz_clear __GMP_PROTO ((mpz_ptr)); - -#define mpz_clears __gmpz_clears -__GMP_DECLSPEC void mpz_clears __GMP_PROTO ((mpz_ptr, ...)); - -#define mpz_clrbit __gmpz_clrbit -__GMP_DECLSPEC void mpz_clrbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t)); - -#define mpz_cmp __gmpz_cmp -__GMP_DECLSPEC int mpz_cmp __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_cmp_d __gmpz_cmp_d -__GMP_DECLSPEC int mpz_cmp_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE; - -#define _mpz_cmp_si __gmpz_cmp_si -__GMP_DECLSPEC int _mpz_cmp_si __GMP_PROTO ((mpz_srcptr, mpir_si)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define _mpz_cmp_ui __gmpz_cmp_ui -__GMP_DECLSPEC int _mpz_cmp_ui __GMP_PROTO ((mpz_srcptr, mpir_ui)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_cmpabs __gmpz_cmpabs -__GMP_DECLSPEC int mpz_cmpabs __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_cmpabs_d __gmpz_cmpabs_d -__GMP_DECLSPEC int mpz_cmpabs_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE; - -#define mpz_cmpabs_ui __gmpz_cmpabs_ui -__GMP_DECLSPEC int mpz_cmpabs_ui __GMP_PROTO ((mpz_srcptr, mpir_ui)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_com __gmpz_com -__GMP_DECLSPEC void mpz_com __GMP_PROTO ((mpz_ptr, mpz_srcptr)); - -#define mpz_combit __gmpz_combit -__GMP_DECLSPEC void mpz_combit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t)); - -#define mpz_congruent_p __gmpz_congruent_p -__GMP_DECLSPEC int mpz_congruent_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpz_congruent_2exp_p __gmpz_congruent_2exp_p -__GMP_DECLSPEC int mpz_congruent_2exp_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_congruent_ui_p __gmpz_congruent_ui_p -__GMP_DECLSPEC int mpz_congruent_ui_p __GMP_PROTO ((mpz_srcptr, mpir_ui, mpir_ui)) __GMP_ATTRIBUTE_PURE; - -#define mpz_divexact __gmpz_divexact -__GMP_DECLSPEC void mpz_divexact __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_divexact_ui __gmpz_divexact_ui -__GMP_DECLSPEC void mpz_divexact_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_divisible_p __gmpz_divisible_p -__GMP_DECLSPEC int mpz_divisible_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpz_divisible_ui_p __gmpz_divisible_ui_p -__GMP_DECLSPEC int mpz_divisible_ui_p __GMP_PROTO ((mpz_srcptr, mpir_ui)) __GMP_ATTRIBUTE_PURE; - -#define mpz_divisible_2exp_p __gmpz_divisible_2exp_p -__GMP_DECLSPEC int mpz_divisible_2exp_p __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_dump __gmpz_dump -__GMP_DECLSPEC void mpz_dump __GMP_PROTO ((mpz_srcptr)); - -#define mpz_export __gmpz_export -__GMP_DECLSPEC void *mpz_export __GMP_PROTO ((void *, size_t *, int, size_t, int, size_t, mpz_srcptr)); - -#define mpz_fac_ui __gmpz_fac_ui -__GMP_DECLSPEC void mpz_fac_ui __GMP_PROTO ((mpz_ptr, mpir_ui)); - -#define mpz_2fac_ui __gmpz_2fac_ui -__GMP_DECLSPEC void mpz_2fac_ui __GMP_PROTO ((mpz_ptr, mpir_ui)); - -#define mpz_mfac_uiui __gmpz_mfac_uiui -__GMP_DECLSPEC void mpz_mfac_uiui __GMP_PROTO ((mpz_ptr, mpir_ui, mpir_ui)); - -#define mpz_primorial_ui __gmpz_primorial_ui -__GMP_DECLSPEC void mpz_primorial_ui __GMP_PROTO ((mpz_ptr, mpir_ui)); - -#define mpz_fdiv_q __gmpz_fdiv_q -__GMP_DECLSPEC void mpz_fdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp -__GMP_DECLSPEC void mpz_fdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t)); - -#define mpz_fdiv_q_ui __gmpz_fdiv_q_ui -__GMP_DECLSPEC mpir_ui mpz_fdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_fdiv_qr __gmpz_fdiv_qr -__GMP_DECLSPEC void mpz_fdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui -__GMP_DECLSPEC mpir_ui mpz_fdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_fdiv_r __gmpz_fdiv_r -__GMP_DECLSPEC void mpz_fdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp -__GMP_DECLSPEC void mpz_fdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t)); - -#define mpz_fdiv_r_ui __gmpz_fdiv_r_ui -__GMP_DECLSPEC mpir_ui mpz_fdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_fdiv_ui __gmpz_fdiv_ui -__GMP_DECLSPEC mpir_ui mpz_fdiv_ui __GMP_PROTO ((mpz_srcptr, mpir_ui)) __GMP_ATTRIBUTE_PURE; - -#define mpz_fib_ui __gmpz_fib_ui -__GMP_DECLSPEC void mpz_fib_ui __GMP_PROTO ((mpz_ptr, mpir_ui)); - -#define mpz_fib2_ui __gmpz_fib2_ui -__GMP_DECLSPEC void mpz_fib2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpir_ui)); - -#define mpz_fits_sint_p __gmpz_fits_sint_p -__GMP_DECLSPEC int mpz_fits_sint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_fits_si_p __gmpz_fits_si_p -__GMP_DECLSPEC int mpz_fits_si_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_fits_slong_p __gmpz_fits_slong_p -__GMP_DECLSPEC int mpz_fits_slong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_fits_sshort_p __gmpz_fits_sshort_p -__GMP_DECLSPEC int mpz_fits_sshort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_fits_uint_p __gmpz_fits_uint_p -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_uint_p) -__GMP_DECLSPEC int mpz_fits_uint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_fits_ui_p __gmpz_fits_ui_p -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ui_p) -__GMP_DECLSPEC int mpz_fits_ui_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_fits_ulong_p __gmpz_fits_ulong_p -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ulong_p) -__GMP_DECLSPEC int mpz_fits_ulong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_fits_ushort_p __gmpz_fits_ushort_p -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ushort_p) -__GMP_DECLSPEC int mpz_fits_ushort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_gcd __gmpz_gcd -__GMP_DECLSPEC void mpz_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_gcd_ui __gmpz_gcd_ui -__GMP_DECLSPEC mpir_ui mpz_gcd_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_gcdext __gmpz_gcdext -__GMP_DECLSPEC void mpz_gcdext __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_get_d __gmpz_get_d -__GMP_DECLSPEC double mpz_get_d __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpz_get_d_2exp __gmpz_get_d_2exp -__GMP_DECLSPEC double mpz_get_d_2exp __GMP_PROTO ((mpir_si *, mpz_srcptr)); - -#define mpz_get_si __gmpz_get_si -__GMP_DECLSPEC /* signed */ mpir_si mpz_get_si __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_get_str __gmpz_get_str -__GMP_DECLSPEC char *mpz_get_str __GMP_PROTO ((char *, int, mpz_srcptr)); - -#define mpz_get_ui __gmpz_get_ui -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_get_ui) -__GMP_DECLSPEC mpir_ui mpz_get_ui __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_getlimbn __gmpz_getlimbn -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_getlimbn) -__GMP_DECLSPEC mp_limb_t mpz_getlimbn __GMP_PROTO ((mpz_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_hamdist __gmpz_hamdist -__GMP_DECLSPEC mp_bitcnt_t mpz_hamdist __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_import __gmpz_import -__GMP_DECLSPEC void mpz_import __GMP_PROTO ((mpz_ptr, size_t, int, size_t, int, size_t, __gmp_const void *)); - -#define mpz_init __gmpz_init -__GMP_DECLSPEC void mpz_init __GMP_PROTO ((mpz_ptr)); - -#define mpz_init2 __gmpz_init2 -__GMP_DECLSPEC void mpz_init2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t)); - -#define mpz_inits __gmpz_inits -__GMP_DECLSPEC void mpz_inits __GMP_PROTO ((mpz_ptr, ...)); - -#define mpz_init_set __gmpz_init_set -__GMP_DECLSPEC void mpz_init_set __GMP_PROTO ((mpz_ptr, mpz_srcptr)); - -#define mpz_init_set_d __gmpz_init_set_d -__GMP_DECLSPEC void mpz_init_set_d __GMP_PROTO ((mpz_ptr, double)); - -#define mpz_init_set_si __gmpz_init_set_si -__GMP_DECLSPEC void mpz_init_set_si __GMP_PROTO ((mpz_ptr, mpir_si)); - -#define mpz_init_set_str __gmpz_init_set_str -__GMP_DECLSPEC int mpz_init_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int)); - -#define mpz_init_set_ui __gmpz_init_set_ui -__GMP_DECLSPEC void mpz_init_set_ui __GMP_PROTO ((mpz_ptr, mpir_ui)); - -#define mpz_inp_raw __gmpz_inp_raw -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC size_t mpz_inp_raw __GMP_PROTO ((mpz_ptr, FILE *)); -#endif - -#define mpz_inp_str __gmpz_inp_str -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC size_t mpz_inp_str __GMP_PROTO ((mpz_ptr, FILE *, int)); -#endif - -#define mpz_invert __gmpz_invert -__GMP_DECLSPEC int mpz_invert __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_ior __gmpz_ior -__GMP_DECLSPEC void mpz_ior __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_jacobi __gmpz_jacobi -__GMP_DECLSPEC int mpz_jacobi __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpz_kronecker mpz_jacobi /* alias */ - -#define mpz_kronecker_si __gmpz_kronecker_si -__GMP_DECLSPEC int mpz_kronecker_si __GMP_PROTO ((mpz_srcptr, mpir_si)) __GMP_ATTRIBUTE_PURE; - -#define mpz_kronecker_ui __gmpz_kronecker_ui -__GMP_DECLSPEC int mpz_kronecker_ui __GMP_PROTO ((mpz_srcptr, mpir_ui)) __GMP_ATTRIBUTE_PURE; - -#define mpz_si_kronecker __gmpz_si_kronecker -__GMP_DECLSPEC int mpz_si_kronecker __GMP_PROTO ((mpir_si, mpz_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpz_ui_kronecker __gmpz_ui_kronecker -__GMP_DECLSPEC int mpz_ui_kronecker __GMP_PROTO ((mpir_ui, mpz_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpz_lcm __gmpz_lcm -__GMP_DECLSPEC void mpz_lcm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_lcm_ui __gmpz_lcm_ui -__GMP_DECLSPEC void mpz_lcm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_legendre mpz_jacobi /* alias */ - -#define mpz_lucnum_ui __gmpz_lucnum_ui -__GMP_DECLSPEC void mpz_lucnum_ui __GMP_PROTO ((mpz_ptr, mpir_ui)); - -#define mpz_lucnum2_ui __gmpz_lucnum2_ui -__GMP_DECLSPEC void mpz_lucnum2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpir_ui)); - -#define mpz_millerrabin __gmpz_millerrabin -__GMP_DECLSPEC int mpz_millerrabin __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE; - -#define mpz_miller_rabin __gmpz_miller_rabin -__GMP_DECLSPEC int mpz_miller_rabin __GMP_PROTO ((mpz_srcptr, int, gmp_randstate_t)) __GMP_ATTRIBUTE_PURE; - -#define mpz_mod __gmpz_mod -__GMP_DECLSPEC void mpz_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_mod_ui mpz_fdiv_r_ui /* same as fdiv_r because divisor unsigned */ - -#define __GMP_MPZ_MUL_MIN_ALLOC(x,y,z) (__GMP_ABS(y->_mp_size)+__GMP_ABS(z->_mp_size)+1) -#define mpz_mul __gmpz_mul -__GMP_DECLSPEC void mpz_mul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_mul_2exp __gmpz_mul_2exp -__GMP_DECLSPEC void mpz_mul_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t)); - -#define __GMP_MPZ_MUL_SI_MIN_ALLOC(x,y,z) (__GMP_ABS(y->_mp_size)+(GMP_BITS_PER_UI-1)/GMP_NUMB_BITS+1) -#define mpz_mul_si __gmpz_mul_si -__GMP_DECLSPEC void mpz_mul_si __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_si)); - -#define __GMP_MPZ_MUL_UI_MIN_ALLOC(x,y,z) (__GMP_ABS(y->_mp_size)+(GMP_BITS_PER_UI-1)/GMP_NUMB_BITS+1) -#define mpz_mul_ui __gmpz_mul_ui -__GMP_DECLSPEC void mpz_mul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_neg __gmpz_neg -#define __GMP_MPZ_NEG_MIN_ALLOC(x,y) (__GMP_ABS(y->_mp_size)) -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_neg) -__GMP_DECLSPEC void mpz_neg __GMP_PROTO ((mpz_ptr, mpz_srcptr)); -#endif - -#define mpz_nextprime __gmpz_nextprime -__GMP_DECLSPEC void mpz_nextprime __GMP_PROTO ((mpz_ptr, mpz_srcptr)); - -#define mpz_next_prime_candidate __gmpz_next_prime_candidate -__GMP_DECLSPEC void mpz_next_prime_candidate __GMP_PROTO ((mpz_ptr, mpz_srcptr, gmp_randstate_t)); - -#define mpz_out_raw __gmpz_out_raw -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC size_t mpz_out_raw __GMP_PROTO ((FILE *, mpz_srcptr)); -#endif - -#define mpz_out_str __gmpz_out_str -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC size_t mpz_out_str __GMP_PROTO ((FILE *, int, mpz_srcptr)); -#endif - -#define mpz_perfect_power_p __gmpz_perfect_power_p -__GMP_DECLSPEC int mpz_perfect_power_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpz_perfect_square_p __gmpz_perfect_square_p -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_perfect_square_p) -__GMP_DECLSPEC int mpz_perfect_square_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_popcount __gmpz_popcount -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_popcount) -__GMP_DECLSPEC mp_bitcnt_t mpz_popcount __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_pow_ui __gmpz_pow_ui -__GMP_DECLSPEC void mpz_pow_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_powm __gmpz_powm -__GMP_DECLSPEC void mpz_powm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_powm_ui __gmpz_powm_ui -__GMP_DECLSPEC void mpz_powm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui, mpz_srcptr)); - -#define mpz_probab_prime_p __gmpz_probab_prime_p -__GMP_DECLSPEC int mpz_probab_prime_p __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE; - -#define mpz_probable_prime_p __gmpz_probable_prime_p -__GMP_DECLSPEC int mpz_probable_prime_p __GMP_PROTO ((mpz_srcptr,gmp_randstate_t, int, mpir_ui)); - -#define mpz_likely_prime_p __gmpz_likely_prime_p -__GMP_DECLSPEC int mpz_likely_prime_p __GMP_PROTO ((mpz_srcptr,gmp_randstate_t, mpir_ui)); - -#define mpz_realloc2 __gmpz_realloc2 -__GMP_DECLSPEC void mpz_realloc2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t)); - -#define mpz_remove __gmpz_remove -__GMP_DECLSPEC mp_bitcnt_t mpz_remove __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_root __gmpz_root -__GMP_DECLSPEC int mpz_root __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_nthroot __gmpz_nthroot -__GMP_DECLSPEC void mpz_nthroot __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_rootrem __gmpz_rootrem -__GMP_DECLSPEC void mpz_rootrem __GMP_PROTO ((mpz_ptr,mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_rrandomb __gmpz_rrandomb -__GMP_DECLSPEC void mpz_rrandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t)); - -#define mpz_scan0 __gmpz_scan0 -__GMP_DECLSPEC mp_bitcnt_t mpz_scan0 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_scan1 __gmpz_scan1 -__GMP_DECLSPEC mp_bitcnt_t mpz_scan1 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define __GMP_MPZ_SET_MIN_ALLOC(x,y) __GMP_ABS(y->_mp_size) -#define mpz_set __gmpz_set -__GMP_DECLSPEC void mpz_set __GMP_PROTO ((mpz_ptr, mpz_srcptr)); - -#define mpz_set_d __gmpz_set_d -__GMP_DECLSPEC void mpz_set_d __GMP_PROTO ((mpz_ptr, double)); - -#define mpz_set_f __gmpz_set_f -__GMP_DECLSPEC void mpz_set_f __GMP_PROTO ((mpz_ptr, mpf_srcptr)); - -#define mpz_set_q __gmpz_set_q -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_set_q) -__GMP_DECLSPEC void mpz_set_q __GMP_PROTO ((mpz_ptr, mpq_srcptr)); -#endif - -#define __GMP_MPZ_SET_SI_MIN_ALLOC(x,y) (1+(GMP_BITS_PER_UI-1)/GMP_NUMB_BITS) -#define mpz_set_si __gmpz_set_si -__GMP_DECLSPEC void mpz_set_si __GMP_PROTO ((mpz_ptr, mpir_si)); - -#define mpz_set_str __gmpz_set_str -__GMP_DECLSPEC int mpz_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int)); - -#define __GMP_MPZ_SET_UI_MIN_ALLOC(x,y) (1+(GMP_BITS_PER_UI-1)/GMP_NUMB_BITS) -#define mpz_set_ui __gmpz_set_ui -__GMP_DECLSPEC void mpz_set_ui __GMP_PROTO ((mpz_ptr, mpir_ui)); - -#define mpz_setbit __gmpz_setbit -__GMP_DECLSPEC void mpz_setbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t)); - -#define mpz_size __gmpz_size -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_size) -__GMP_DECLSPEC size_t mpz_size __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpz_sizeinbase __gmpz_sizeinbase -__GMP_DECLSPEC size_t mpz_sizeinbase __GMP_PROTO ((mpz_srcptr, int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_sqrt __gmpz_sqrt -__GMP_DECLSPEC void mpz_sqrt __GMP_PROTO ((mpz_ptr, mpz_srcptr)); - -#define mpz_sqrtrem __gmpz_sqrtrem -__GMP_DECLSPEC void mpz_sqrtrem __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr)); - -#define __GMP_MPZ_SUB_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(y->_mp_size),__GMP_ABS(z->_mp_size))+1) -#define mpz_sub __gmpz_sub -__GMP_DECLSPEC void mpz_sub __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define __GMP_MPZ_SUB_UI_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(y->_mp_size),1+(GMP_BITS_PER_UI-1)/GMP_NUMB_BITS)+1) -#define mpz_sub_ui __gmpz_sub_ui -__GMP_DECLSPEC void mpz_sub_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define __GMP_MPZ_UI_SUB_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(z->_mp_size),1+(GMP_BITS_PER_UI-1)/GMP_NUMB_BITS)+1) -#define mpz_ui_sub __gmpz_ui_sub -__GMP_DECLSPEC void mpz_ui_sub __GMP_PROTO ((mpz_ptr, mpir_ui, mpz_srcptr)); - -#define mpz_submul __gmpz_submul -__GMP_DECLSPEC void mpz_submul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_submul_ui __gmpz_submul_ui -__GMP_DECLSPEC void mpz_submul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_swap __gmpz_swap -__GMP_DECLSPEC void mpz_swap __GMP_PROTO ((mpz_ptr, mpz_ptr)) __GMP_NOTHROW; - -#define mpz_tdiv_ui __gmpz_tdiv_ui -__GMP_DECLSPEC mpir_ui mpz_tdiv_ui __GMP_PROTO ((mpz_srcptr, mpir_ui)) __GMP_ATTRIBUTE_PURE; - -#define mpz_tdiv_q __gmpz_tdiv_q -__GMP_DECLSPEC void mpz_tdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp -__GMP_DECLSPEC void mpz_tdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t)); - -#define mpz_tdiv_q_ui __gmpz_tdiv_q_ui -__GMP_DECLSPEC mpir_ui mpz_tdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_tdiv_qr __gmpz_tdiv_qr -__GMP_DECLSPEC void mpz_tdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui -__GMP_DECLSPEC mpir_ui mpz_tdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_tdiv_r __gmpz_tdiv_r -__GMP_DECLSPEC void mpz_tdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - -#define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp -__GMP_DECLSPEC void mpz_tdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t)); - -#define mpz_tdiv_r_ui __gmpz_tdiv_r_ui -__GMP_DECLSPEC mpir_ui mpz_tdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpir_ui)); - -#define mpz_tstbit __gmpz_tstbit -__GMP_DECLSPEC int mpz_tstbit __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpz_ui_pow_ui __gmpz_ui_pow_ui -__GMP_DECLSPEC void mpz_ui_pow_ui __GMP_PROTO ((mpz_ptr, mpir_ui, mpir_ui)); - -#define mpz_urandomb __gmpz_urandomb -__GMP_DECLSPEC void mpz_urandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t)); - -#define mpz_urandomm __gmpz_urandomm -__GMP_DECLSPEC void mpz_urandomm __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mpz_srcptr)); - -#define mpz_xor __gmpz_xor -#define mpz_eor __gmpz_xor -__GMP_DECLSPEC void mpz_xor __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); - - -/****** Integer (i.e. Z) routines for intmaax_t/uintmax_t types ******/ - -/* if stdint.h is available -- n.b: we do NOT include stdint.h ourselves */ -#if defined(INTMAX_MAX) - -#define __GMP_BITS_PER_UINTMAX (8*sizeof(uintmax_t)) - -#define mpz_get_ux __gmpz_get_ux -__GMP_DECLSPEC uintmax_t mpz_get_ux __GMP_PROTO ((mpz_srcptr)); - -#define mpz_get_sx __gmpz_get_sx -__GMP_DECLSPEC intmax_t mpz_get_sx __GMP_PROTO ((mpz_srcptr)); - -#define mpz_set_ux __gmpz_set_ux -__GMP_DECLSPEC void mpz_set_ux __GMP_PROTO ((mpz_ptr, uintmax_t)); - -#define mpz_set_sx __gmpz_set_sx -__GMP_DECLSPEC void mpz_set_sx __GMP_PROTO ((mpz_ptr, intmax_t)); - -#define mpz_init_set_ux __gmpz_init_set_ux -__GMP_DECLSPEC void mpz_init_set_ux __GMP_PROTO ((mpz_ptr, uintmax_t)); - -#define mpz_init_set_sx __gmpz_init_set_sx -__GMP_DECLSPEC void mpz_init_set_sx __GMP_PROTO ((mpz_ptr, intmax_t)); - -#endif - - -/**************** Rational (i.e. Q) routines. ****************/ - -#define mpq_abs __gmpq_abs -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_abs) -__GMP_DECLSPEC void mpq_abs __GMP_PROTO ((mpq_ptr, mpq_srcptr)); -#endif - -#define mpq_add __gmpq_add -__GMP_DECLSPEC void mpq_add __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr)); - -#define mpq_canonicalize __gmpq_canonicalize -__GMP_DECLSPEC void mpq_canonicalize __GMP_PROTO ((mpq_ptr)); - -#define mpq_clear __gmpq_clear -__GMP_DECLSPEC void mpq_clear __GMP_PROTO ((mpq_ptr)); - -#define mpq_clears __gmpq_clears -__GMP_DECLSPEC void mpq_clears __GMP_PROTO ((mpq_ptr, ...)); - -#define mpq_cmp __gmpq_cmp -__GMP_DECLSPEC int mpq_cmp __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define _mpq_cmp_si __gmpq_cmp_si -__GMP_DECLSPEC int _mpq_cmp_si __GMP_PROTO ((mpq_srcptr, mpir_si, mpir_ui)) __GMP_ATTRIBUTE_PURE; - -#define _mpq_cmp_ui __gmpq_cmp_ui -__GMP_DECLSPEC int _mpq_cmp_ui __GMP_PROTO ((mpq_srcptr, mpir_ui, mpir_ui)) __GMP_ATTRIBUTE_PURE; - -#define mpq_div __gmpq_div -__GMP_DECLSPEC void mpq_div __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr)); - -#define mpq_div_2exp __gmpq_div_2exp -__GMP_DECLSPEC void mpq_div_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t)); - -#define mpq_equal __gmpq_equal -__GMP_DECLSPEC int mpq_equal __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpq_get_num __gmpq_get_num -__GMP_DECLSPEC void mpq_get_num __GMP_PROTO ((mpz_ptr, mpq_srcptr)); - -#define mpq_get_den __gmpq_get_den -__GMP_DECLSPEC void mpq_get_den __GMP_PROTO ((mpz_ptr, mpq_srcptr)); - -#define mpq_get_d __gmpq_get_d -__GMP_DECLSPEC double mpq_get_d __GMP_PROTO ((mpq_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpq_get_str __gmpq_get_str -__GMP_DECLSPEC char *mpq_get_str __GMP_PROTO ((char *, int, mpq_srcptr)); - -#define mpq_init __gmpq_init -__GMP_DECLSPEC void mpq_init __GMP_PROTO ((mpq_ptr)); - -#define mpq_inits __gmpq_inits -__GMP_DECLSPEC void mpq_inits __GMP_PROTO ((mpq_ptr, ...)); - -#define mpq_inp_str __gmpq_inp_str -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC size_t mpq_inp_str __GMP_PROTO ((mpq_ptr, FILE *, int)); -#endif - -#define mpq_inv __gmpq_inv -__GMP_DECLSPEC void mpq_inv __GMP_PROTO ((mpq_ptr, mpq_srcptr)); - -#define mpq_mul __gmpq_mul -__GMP_DECLSPEC void mpq_mul __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr)); - -#define mpq_mul_2exp __gmpq_mul_2exp -__GMP_DECLSPEC void mpq_mul_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t)); - -#define mpq_neg __gmpq_neg -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_neg) -__GMP_DECLSPEC void mpq_neg __GMP_PROTO ((mpq_ptr, mpq_srcptr)); -#endif - -#define mpq_out_str __gmpq_out_str -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC size_t mpq_out_str __GMP_PROTO ((FILE *, int, mpq_srcptr)); -#endif - -#define mpq_set __gmpq_set -__GMP_DECLSPEC void mpq_set __GMP_PROTO ((mpq_ptr, mpq_srcptr)); - -#define mpq_set_d __gmpq_set_d -__GMP_DECLSPEC void mpq_set_d __GMP_PROTO ((mpq_ptr, double)); - -#define mpq_set_den __gmpq_set_den -__GMP_DECLSPEC void mpq_set_den __GMP_PROTO ((mpq_ptr, mpz_srcptr)); - -#define mpq_set_f __gmpq_set_f -__GMP_DECLSPEC void mpq_set_f __GMP_PROTO ((mpq_ptr, mpf_srcptr)); - -#define mpq_set_num __gmpq_set_num -__GMP_DECLSPEC void mpq_set_num __GMP_PROTO ((mpq_ptr, mpz_srcptr)); - -#define mpq_set_si __gmpq_set_si -__GMP_DECLSPEC void mpq_set_si __GMP_PROTO ((mpq_ptr, mpir_si, mpir_ui)); - -#define mpq_set_str __gmpq_set_str -__GMP_DECLSPEC int mpq_set_str __GMP_PROTO ((mpq_ptr, __gmp_const char *, int)); - -#define mpq_set_ui __gmpq_set_ui -__GMP_DECLSPEC void mpq_set_ui __GMP_PROTO ((mpq_ptr, mpir_ui, mpir_ui)); - -#define mpq_set_z __gmpq_set_z -__GMP_DECLSPEC void mpq_set_z __GMP_PROTO ((mpq_ptr, mpz_srcptr)); - -#define mpq_sub __gmpq_sub -__GMP_DECLSPEC void mpq_sub __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr)); - -#define mpq_swap __gmpq_swap -__GMP_DECLSPEC void mpq_swap __GMP_PROTO ((mpq_ptr, mpq_ptr)) __GMP_NOTHROW; - - -/**************** Float (i.e. F) routines. ****************/ - -#define mpf_abs __gmpf_abs -__GMP_DECLSPEC void mpf_abs __GMP_PROTO ((mpf_ptr, mpf_srcptr)); - -#define mpf_add __gmpf_add -__GMP_DECLSPEC void mpf_add __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); - -#define mpf_add_ui __gmpf_add_ui -__GMP_DECLSPEC void mpf_add_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpir_ui)); -#define mpf_ceil __gmpf_ceil -__GMP_DECLSPEC void mpf_ceil __GMP_PROTO ((mpf_ptr, mpf_srcptr)); - -#define mpf_clear __gmpf_clear -__GMP_DECLSPEC void mpf_clear __GMP_PROTO ((mpf_ptr)); - -#define mpf_clears __gmpf_clears -__GMP_DECLSPEC void mpf_clears __GMP_PROTO ((mpf_ptr, ...)); - -#define mpf_cmp __gmpf_cmp -__GMP_DECLSPEC int mpf_cmp __GMP_PROTO ((mpf_srcptr, mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_cmp_d __gmpf_cmp_d -__GMP_DECLSPEC int mpf_cmp_d __GMP_PROTO ((mpf_srcptr, double)) __GMP_ATTRIBUTE_PURE; - -#define mpf_cmp_si __gmpf_cmp_si -__GMP_DECLSPEC int mpf_cmp_si __GMP_PROTO ((mpf_srcptr, mpir_si)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_cmp_ui __gmpf_cmp_ui -__GMP_DECLSPEC int mpf_cmp_ui __GMP_PROTO ((mpf_srcptr, mpir_ui)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_div __gmpf_div -__GMP_DECLSPEC void mpf_div __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); - -#define mpf_div_2exp __gmpf_div_2exp -__GMP_DECLSPEC void mpf_div_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t)); - -#define mpf_div_ui __gmpf_div_ui -__GMP_DECLSPEC void mpf_div_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpir_ui)); - -#define mpf_dump __gmpf_dump -__GMP_DECLSPEC void mpf_dump __GMP_PROTO ((mpf_srcptr)); - -#define mpf_eq __gmpf_eq -__GMP_DECLSPEC int mpf_eq __GMP_PROTO ((mpf_srcptr, mpf_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE; - -#define mpf_fits_sint_p __gmpf_fits_sint_p -__GMP_DECLSPEC int mpf_fits_sint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_fits_si_p __gmpf_fits_si_p -__GMP_DECLSPEC int mpf_fits_si_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_fits_slong_p __gmpf_fits_slong_p -__GMP_DECLSPEC int mpf_fits_slong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_fits_sshort_p __gmpf_fits_sshort_p -__GMP_DECLSPEC int mpf_fits_sshort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_fits_uint_p __gmpf_fits_uint_p -__GMP_DECLSPEC int mpf_fits_uint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_fits_ui_p __gmpf_fits_ui_p -__GMP_DECLSPEC int mpf_fits_ui_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_fits_ulong_p __gmpf_fits_ulong_p -__GMP_DECLSPEC int mpf_fits_ulong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_fits_ushort_p __gmpf_fits_ushort_p -__GMP_DECLSPEC int mpf_fits_ushort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_floor __gmpf_floor -__GMP_DECLSPEC void mpf_floor __GMP_PROTO ((mpf_ptr, mpf_srcptr)); - -#define mpf_get_d __gmpf_get_d -__GMP_DECLSPEC double mpf_get_d __GMP_PROTO ((mpf_srcptr)) __GMP_ATTRIBUTE_PURE; - -#define mpf_get_d_2exp __gmpf_get_d_2exp -__GMP_DECLSPEC double mpf_get_d_2exp __GMP_PROTO ((mpir_si *, mpf_srcptr)); - -#define mpf_get_default_prec __gmpf_get_default_prec -__GMP_DECLSPEC mp_bitcnt_t mpf_get_default_prec __GMP_PROTO ((void)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_get_prec __gmpf_get_prec -__GMP_DECLSPEC mp_bitcnt_t mpf_get_prec __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_get_si __gmpf_get_si -__GMP_DECLSPEC mpir_si mpf_get_si __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_get_str __gmpf_get_str -__GMP_DECLSPEC char *mpf_get_str __GMP_PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr)); - -#define mpf_get_ui __gmpf_get_ui -__GMP_DECLSPEC mpir_ui mpf_get_ui __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_init __gmpf_init -__GMP_DECLSPEC void mpf_init __GMP_PROTO ((mpf_ptr)); - -#define mpf_init2 __gmpf_init2 -__GMP_DECLSPEC void mpf_init2 __GMP_PROTO ((mpf_ptr, mp_bitcnt_t)); - -#define mpf_inits __gmpf_inits -__GMP_DECLSPEC void mpf_inits __GMP_PROTO ((mpf_ptr, ...)); - -#define mpf_init_set __gmpf_init_set -__GMP_DECLSPEC void mpf_init_set __GMP_PROTO ((mpf_ptr, mpf_srcptr)); - -#define mpf_init_set_d __gmpf_init_set_d -__GMP_DECLSPEC void mpf_init_set_d __GMP_PROTO ((mpf_ptr, double)); - -#define mpf_init_set_si __gmpf_init_set_si -__GMP_DECLSPEC void mpf_init_set_si __GMP_PROTO ((mpf_ptr, mpir_si)); - -#define mpf_init_set_str __gmpf_init_set_str -__GMP_DECLSPEC int mpf_init_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int)); - -#define mpf_init_set_ui __gmpf_init_set_ui -__GMP_DECLSPEC void mpf_init_set_ui __GMP_PROTO ((mpf_ptr, mpir_ui)); - -#define mpf_inp_str __gmpf_inp_str -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC size_t mpf_inp_str __GMP_PROTO ((mpf_ptr, FILE *, int)); -#endif - -#define mpf_integer_p __gmpf_integer_p -__GMP_DECLSPEC int mpf_integer_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_mul __gmpf_mul -__GMP_DECLSPEC void mpf_mul __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); - -#define mpf_mul_2exp __gmpf_mul_2exp -__GMP_DECLSPEC void mpf_mul_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t)); - -#define mpf_mul_ui __gmpf_mul_ui -__GMP_DECLSPEC void mpf_mul_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpir_ui)); - -#define mpf_neg __gmpf_neg -__GMP_DECLSPEC void mpf_neg __GMP_PROTO ((mpf_ptr, mpf_srcptr)); - -#define mpf_out_str __gmpf_out_str -#ifdef _GMP_H_HAVE_FILE -__GMP_DECLSPEC size_t mpf_out_str __GMP_PROTO ((FILE *, int, size_t, mpf_srcptr)); -#endif - -#define mpf_pow_ui __gmpf_pow_ui -__GMP_DECLSPEC void mpf_pow_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpir_ui)); - -#define mpf_random2 __gmpf_random2 -__GMP_DECLSPEC void mpf_random2 __GMP_PROTO ((mpf_ptr, mp_size_t, mp_exp_t)); - -#define mpf_rrandomb __gmpf_rrandomb -__GMP_DECLSPEC void mpf_rrandomb __GMP_PROTO ((mpf_ptr, gmp_randstate_t, mp_size_t, mp_exp_t)); - -#define mpf_reldiff __gmpf_reldiff -__GMP_DECLSPEC void mpf_reldiff __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); - -#define mpf_set __gmpf_set -__GMP_DECLSPEC void mpf_set __GMP_PROTO ((mpf_ptr, mpf_srcptr)); - -#define mpf_set_d __gmpf_set_d -__GMP_DECLSPEC void mpf_set_d __GMP_PROTO ((mpf_ptr, double)); - -#define mpf_set_default_prec __gmpf_set_default_prec -__GMP_DECLSPEC void mpf_set_default_prec __GMP_PROTO ((mp_bitcnt_t)) __GMP_NOTHROW; - -#define mpf_set_prec __gmpf_set_prec -__GMP_DECLSPEC void mpf_set_prec __GMP_PROTO ((mpf_ptr, mp_bitcnt_t)); - -#define mpf_set_prec_raw __gmpf_set_prec_raw -__GMP_DECLSPEC void mpf_set_prec_raw __GMP_PROTO ((mpf_ptr, mp_bitcnt_t)) __GMP_NOTHROW; - -#define mpf_set_q __gmpf_set_q -__GMP_DECLSPEC void mpf_set_q __GMP_PROTO ((mpf_ptr, mpq_srcptr)); - -#define mpf_set_si __gmpf_set_si -__GMP_DECLSPEC void mpf_set_si __GMP_PROTO ((mpf_ptr, mpir_si)); - -#define mpf_set_str __gmpf_set_str -__GMP_DECLSPEC int mpf_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int)); - -#define mpf_set_ui __gmpf_set_ui -__GMP_DECLSPEC void mpf_set_ui __GMP_PROTO ((mpf_ptr, mpir_ui)); - -#define mpf_set_z __gmpf_set_z -__GMP_DECLSPEC void mpf_set_z __GMP_PROTO ((mpf_ptr, mpz_srcptr)); - -#define mpf_size __gmpf_size -__GMP_DECLSPEC size_t mpf_size __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpf_sqrt __gmpf_sqrt -__GMP_DECLSPEC void mpf_sqrt __GMP_PROTO ((mpf_ptr, mpf_srcptr)); - -#define mpf_sqrt_ui __gmpf_sqrt_ui -__GMP_DECLSPEC void mpf_sqrt_ui __GMP_PROTO ((mpf_ptr, mpir_ui)); - -#define mpf_sub __gmpf_sub -__GMP_DECLSPEC void mpf_sub __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); - -#define mpf_sub_ui __gmpf_sub_ui -__GMP_DECLSPEC void mpf_sub_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpir_ui)); - -#define mpf_swap __gmpf_swap -__GMP_DECLSPEC void mpf_swap __GMP_PROTO ((mpf_ptr, mpf_ptr)) __GMP_NOTHROW; - -#define mpf_trunc __gmpf_trunc -__GMP_DECLSPEC void mpf_trunc __GMP_PROTO ((mpf_ptr, mpf_srcptr)); - -#define mpf_ui_div __gmpf_ui_div -__GMP_DECLSPEC void mpf_ui_div __GMP_PROTO ((mpf_ptr, mpir_ui, mpf_srcptr)); - -#define mpf_ui_sub __gmpf_ui_sub -__GMP_DECLSPEC void mpf_ui_sub __GMP_PROTO ((mpf_ptr, mpir_ui, mpf_srcptr)); - -#define mpf_urandomb __gmpf_urandomb -__GMP_DECLSPEC void mpf_urandomb __GMP_PROTO ((mpf_t, gmp_randstate_t, mp_bitcnt_t)); - - -/************ Low level positive-integer (i.e. N) routines. ************/ - -/* This is ugly, but we need to make user calls reach the prefixed function. */ - -#define mpn_add __MPN(add) -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add) -__GMP_DECLSPEC mp_limb_t mpn_add __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t)); -#endif - -#define mpn_add_1 __MPN(add_1) -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add_1) -__GMP_DECLSPEC mp_limb_t mpn_add_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW; -#endif - -#define mpn_add_n __MPN(add_n) -__GMP_DECLSPEC mp_limb_t mpn_add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); - -#define mpn_addmul_1 __MPN(addmul_1) -__GMP_DECLSPEC mp_limb_t mpn_addmul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); - -#define mpn_bdivmod __MPN(bdivmod) -__GMP_DECLSPEC mp_limb_t mpn_bdivmod __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mpir_ui)); - -#define mpn_divrem __MPN(divrem) -__GMP_DECLSPEC mp_limb_t mpn_divrem __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t)); - -#define mpn_mulmod_Bexpp1 __MPN(mulmod_Bexpp1) -__GMP_DECLSPEC int mpn_mulmod_Bexpp1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); - -#define mpn_mulmod_2expp1 __MPN(mulmod_2expp1_basecase) -__GMP_DECLSPEC int mpn_mulmod_2expp1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr,int,mpir_ui, mp_ptr)); - -#define mpn_mulmod_2expm1 __MPN(mulmod_2expm1) -__GMP_DECLSPEC void mpn_mulmod_2expm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mpir_ui, mp_ptr)); - -#define mpn_cmp __MPN(cmp) -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_cmp) -__GMP_DECLSPEC int mpn_cmp __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; -#endif - -#define mpn_redc_1 __MPN(redc_1) -__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);) - -#define mpn_redc_2 __MPN(redc_2) -__GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); -#define mpn_redc_n __MPN(redc_n) -__GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); - -#define mpn_divexact_by3(dst,src,size) \ - mpn_divexact_by3c (dst, src, size, __GMP_CAST (mp_limb_t, 0)) - -#define mpn_divexact_by3c __MPN(divexact_by3c) -__GMP_DECLSPEC mp_limb_t mpn_divexact_by3c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); - -#define mpn_divmod_1(qp,np,nsize,dlimb) \ - mpn_divrem_1 (qp, __GMP_CAST (mp_size_t, 0), np, nsize, dlimb) - -#define mpn_divrem_1 __MPN(divrem_1) -__GMP_DECLSPEC mp_limb_t mpn_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); - -#define mpn_divrem_2 __MPN(divrem_2) -__GMP_DECLSPEC mp_limb_t mpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr)); - -#define mpn_invert __MPN(invert) -__GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr xp, mp_srcptr ap, mp_size_t n)); - -#define mpn_sb_divappr_q __MPN(sb_divappr_q) -__GMP_DECLSPEC mp_limb_t mpn_sb_divappr_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dip, mp_limb_t d1ip)); - -#define mpn_dc_bdiv_q_n __MPN(dc_bdiv_q_n) -__GMP_DECLSPEC void mpn_dc_bdiv_q_n __GMP_PROTO ((mp_ptr qp, mp_ptr wp, mp_ptr np, mp_srcptr dp, mp_size_t n, - mp_limb_t dinv, mp_ptr scratch)); - -#define mpn_inv_divappr_q_n __MPN(inv_divappr_q_n) -__GMP_DECLSPEC mp_limb_t mpn_inv_divappr_q_n __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, - mp_srcptr dip)); - -#define mpn_dc_divappr_q __MPN(dc_divappr_q) -__GMP_DECLSPEC mp_limb_t mpn_dc_divappr_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, - mp_size_t n, mp_limb_t dinv, mp_limb_t d1inv)); - -#define mpn_dc_div_q __MPN(dc_div_q) -__GMP_DECLSPEC mp_limb_t mpn_dc_div_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dinv, mp_limb_t d1inv)); - -#define mpn_inv_divappr_q __MPN(inv_divappr_q) -__GMP_DECLSPEC mp_limb_t mpn_inv_divappr_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t n, - mp_srcptr dinv)); - -#define mpn_inv_div_q __MPN(inv_div_q) -__GMP_DECLSPEC mp_limb_t mpn_inv_div_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_srcptr dinv)); - -#define mpn_inv_div_qr __MPN(inv_div_qr) -__GMP_DECLSPEC mp_limb_t mpn_inv_div_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_srcptr dinv)); - -#define mpn_inv_div_qr_n __MPN(inv_div_qr_n) -__GMP_DECLSPEC mp_limb_t mpn_inv_div_qr_n __GMP_PROTO ((mp_ptr qp, mp_ptr np, - mp_srcptr dp, mp_size_t dn, mp_srcptr dinv)); - -#define mpn_dc_div_qr __MPN(dc_div_qr) -__GMP_DECLSPEC mp_limb_t mpn_dc_div_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dinv, mp_limb_t d1inv)); - -#define mpn_dc_div_qr_n __MPN(dc_div_qr_n) -__GMP_DECLSPEC mp_limb_t mpn_dc_div_qr_n __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, - mp_limb_t dinv, mp_limb_t d1inv, mp_ptr tp)); - -#define mpn_sb_div_q __MPN(sb_div_q) -__GMP_DECLSPEC mp_limb_t mpn_sb_div_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dinv, mp_limb_t d1inv)); - -#define mpn_sb_bdiv_q __MPN(sb_bdiv_q) -__GMP_DECLSPEC void mpn_sb_bdiv_q __GMP_PROTO ((mp_ptr qp, mp_ptr wp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)); - -#define mpn_dc_bdiv_q __MPN(dc_bdiv_q) -__GMP_DECLSPEC void mpn_dc_bdiv_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)); - -#define mpn_dc_bdiv_qr __MPN(dc_bdiv_qr) -__GMP_DECLSPEC mp_limb_t mpn_dc_bdiv_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)); - -#define mpn_dc_bdiv_qr_n __MPN(dc_bdiv_qr_n) -__GMP_DECLSPEC mp_limb_t mpn_dc_bdiv_qr_n __GMP_PROTO ((mp_ptr qp, mp_ptr np, - mp_srcptr dp, mp_size_t n, mp_limb_t dinv, mp_ptr tp)); - -#define mpn_sb_div_qr __MPN(sb_div_qr) -__GMP_DECLSPEC mp_limb_t mpn_sb_div_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dinv, mp_limb_t d1inv)); - -#define mpn_sb_bdiv_qr __MPN(sb_bdiv_qr) -__GMP_DECLSPEC mp_limb_t mpn_sb_bdiv_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)); - -#define mpn_tdiv_q __MPN(tdiv_q) -__GMP_DECLSPEC void mpn_tdiv_q __GMP_PROTO ((mp_ptr qp, mp_srcptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn)); - -#define mpn_divexact __MPN(divexact) -__GMP_DECLSPEC void mpn_divexact __GMP_PROTO ((mp_ptr qp, - mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)); - -#define mpn_gcd __MPN(gcd) -__GMP_DECLSPEC mp_size_t mpn_gcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); - -#define mpn_gcd_1 __MPN(gcd_1) -__GMP_DECLSPEC mp_limb_t mpn_gcd_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE; - -#define mpn_gcdext_1 __MPN(gcdext_1) -__GMP_DECLSPEC mp_limb_t mpn_gcdext_1 __GMP_PROTO ((mp_limb_signed_t *, mp_limb_signed_t *, mp_limb_t, mp_limb_t)); - -#define mpn_gcdext __MPN(gcdext) -__GMP_DECLSPEC mp_size_t mpn_gcdext __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); - -#define mpn_get_str __MPN(get_str) -__GMP_DECLSPEC size_t mpn_get_str __GMP_PROTO ((unsigned char *, int, mp_ptr, mp_size_t)); - -#define mpn_hamdist __MPN(hamdist) -__GMP_DECLSPEC mp_bitcnt_t mpn_hamdist __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpn_lshift __MPN(lshift) -__GMP_DECLSPEC mp_limb_t mpn_lshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int)); - -#define mpn_mod_1 __MPN(mod_1) -__GMP_DECLSPEC mp_limb_t mpn_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE; - -#define mpn_mul __MPN(mul) -__GMP_DECLSPEC mp_limb_t mpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); - -#define mpn_mul_1 __MPN(mul_1) -__GMP_DECLSPEC mp_limb_t mpn_mul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); - -#define mpn_mul_n __MPN(mul_n) -__GMP_DECLSPEC void mpn_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); - -#define mpn_sqr __MPN(sqr) -__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); - -#define mpn_neg_n __MPN(neg_n) -#define mpn_neg __MPN(neg_n) -__GMP_DECLSPEC mp_limb_t mpn_neg_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); - -#define mpn_com_n __MPN(com_n) -#define mpn_com __MPN(com_n) -__GMP_DECLSPEC void mpn_com_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); - -#define mpn_perfect_square_p __MPN(perfect_square_p) -__GMP_DECLSPEC int mpn_perfect_square_p __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE; - -#define mpn_popcount __MPN(popcount) -__GMP_DECLSPEC mp_bitcnt_t mpn_popcount __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE; - -#define mpn_pow_1 __MPN(pow_1) -__GMP_DECLSPEC mp_size_t mpn_pow_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr)); - -/* undocumented now, but retained here for upward compatibility */ -#define mpn_preinv_mod_1 __MPN(preinv_mod_1) -__GMP_DECLSPEC mp_limb_t mpn_preinv_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE; - -#define mpn_random __MPN(random) -__GMP_DECLSPEC void mpn_random __GMP_PROTO ((mp_ptr, mp_size_t)); - -#define mpn_random2 __MPN(random2) -__GMP_DECLSPEC void mpn_random2 __GMP_PROTO ((mp_ptr, mp_size_t)); - -#define mpn_urandomb __MPN(urandomb) -__GMP_DECLSPEC void mpn_urandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mpir_ui)); - -#define mpn_urandomm __MPN(urandomm) -__GMP_DECLSPEC void mpn_urandomm __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_srcptr, mp_size_t)); - -#define mpn_randomb __MPN(randomb) -__GMP_DECLSPEC void mpn_randomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_size_t)); - -#define mpn_rrandom __MPN(rrandom) -__GMP_DECLSPEC void mpn_rrandom __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_size_t)); - -#define mpn_rshift __MPN(rshift) -__GMP_DECLSPEC mp_limb_t mpn_rshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int)); - -#define mpn_scan0 __MPN(scan0) -__GMP_DECLSPEC mp_bitcnt_t mpn_scan0 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE; - -#define mpn_scan1 __MPN(scan1) -__GMP_DECLSPEC mp_bitcnt_t mpn_scan1 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE; - -#define mpn_set_str __MPN(set_str) -__GMP_DECLSPEC mp_size_t mpn_set_str __GMP_PROTO ((mp_ptr, __gmp_const unsigned char *, size_t, int)); - -#define mpn_sqrtrem __MPN(sqrtrem) -__GMP_DECLSPEC mp_size_t mpn_sqrtrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t)); - -#define mpn_sub __MPN(sub) -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub) -__GMP_DECLSPEC mp_limb_t mpn_sub __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t)); -#endif - -#define mpn_sub_1 __MPN(sub_1) -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub_1) -__GMP_DECLSPEC mp_limb_t mpn_sub_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW; -#endif - -#define mpn_sub_n __MPN(sub_n) -__GMP_DECLSPEC mp_limb_t mpn_sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); - -#define mpn_submul_1 __MPN(submul_1) -__GMP_DECLSPEC mp_limb_t mpn_submul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); - -#define mpn_tdiv_qr __MPN(tdiv_qr) -__GMP_DECLSPEC void mpn_tdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); - -#define mpn_and_n __MPN(and_n) -__GMP_DECLSPEC void mpn_and_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#define mpn_andn_n __MPN(andn_n) -__GMP_DECLSPEC void mpn_andn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#define mpn_nand_n __MPN(nand_n) -__GMP_DECLSPEC void mpn_nand_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#define mpn_ior_n __MPN(ior_n) -__GMP_DECLSPEC void mpn_ior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#define mpn_iorn_n __MPN(iorn_n) -__GMP_DECLSPEC void mpn_iorn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#define mpn_nior_n __MPN(nior_n) -__GMP_DECLSPEC void mpn_nior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#define mpn_xor_n __MPN(xor_n) -__GMP_DECLSPEC void mpn_xor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#define mpn_xnor_n __MPN(xnor_n) -__GMP_DECLSPEC void mpn_xnor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); - -#define mpn_copyi __MPN(copyi) -__GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); -#define mpn_copyd __MPN(copyd) -__GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); -#define mpn_zero __MPN(zero) -__GMP_DECLSPEC void mpn_zero __GMP_PROTO ((mp_ptr, mp_size_t)); - -#ifndef mpn_sumdiff_n /* if not done with cpuvec in a fat binary of in gmp-impl.h*/ -#define mpn_sumdiff_n __MPN(sumdiff_n) -__GMP_DECLSPEC mp_limb_t mpn_sumdiff_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#endif - -/**************** MPN API for FFT ****************/ - -#define mpn_mul_fft_main __MPN(mul_fft_main) -__GMP_DECLSPEC void mpn_mul_fft_main __GMP_PROTO ((mp_ptr r1, mp_srcptr i1, mp_size_t n1, mp_srcptr i2, mp_size_t n2)); - -#define mpn_mul_fft __MPN(mul_fft) -__GMP_DECLSPEC int mpn_mul_fft __GMP_PROTO((mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, int k)); - -/**************** mpz inlines ****************/ - -/* The following are provided as inlines where possible, but always exist as - library functions too, for binary compatibility. - - Within gmp itself this inlining generally isn't relied on, since it - doesn't get done for all compilers, whereas if something is worth - inlining then it's worth arranging always. - - There are two styles of inlining here. When the same bit of code is - wanted for the inline as for the library version, then __GMP_FORCE_foo - arranges for that code to be emitted and the __GMP_EXTERN_INLINE - directive suppressed, eg. mpz_fits_uint_p. When a different bit of code - is wanted for the inline than for the library version, then - __GMP_FORCE_foo arranges the inline to be suppressed, eg. mpz_abs. */ - -#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpz_abs) -__GMP_EXTERN_INLINE void -mpz_abs (mpz_ptr __gmp_w, mpz_srcptr __gmp_u) -{ - if (__gmp_w != __gmp_u) - mpz_set (__gmp_w, __gmp_u); - __gmp_w->_mp_size = __GMP_ABS (__gmp_w->_mp_size); -} -#endif - -#if GMP_NAIL_BITS == 0 -#define __GMPZ_FITS_UTYPE_P(z,maxval) \ - mp_size_t __gmp_n = z->_mp_size; \ - mp_ptr __gmp_p = z->_mp_d; \ - return (__gmp_n == 0 || (__gmp_n == 1 && __gmp_p[0] <= maxval)); -#else -#define __GMPZ_FITS_UTYPE_P(z,maxval) \ - mp_size_t __gmp_n = z->_mp_size; \ - mp_ptr __gmp_p = z->_mp_d; \ - return (__gmp_n == 0 || (__gmp_n == 1 && __gmp_p[0] <= maxval) \ - || (__gmp_n == 2 && __gmp_p[1] <= ((mp_limb_t) maxval >> GMP_NUMB_BITS))); -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_uint_p) -#if ! defined (__GMP_FORCE_mpz_fits_uint_p) -__GMP_EXTERN_INLINE -#endif -int -mpz_fits_uint_p (mpz_srcptr __gmp_z) __GMP_NOTHROW -{ - __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_UINT_MAX); -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ui_p) -#if ! defined (__GMP_FORCE_mpz_fits_ui_p) -__GMP_EXTERN_INLINE -#endif -int -mpz_fits_ui_p (mpz_srcptr __gmp_z) __GMP_NOTHROW -{ - __GMPZ_FITS_UTYPE_P (__gmp_z, GMP_UI_MAX); -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ulong_p) -#if ! defined (__GMP_FORCE_mpz_fits_ulong_p) -__GMP_EXTERN_INLINE -#endif -int -mpz_fits_ulong_p (mpz_srcptr __gmp_z) __GMP_NOTHROW -{ - __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_ULONG_MAX); -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ushort_p) -#if ! defined (__GMP_FORCE_mpz_fits_ushort_p) -__GMP_EXTERN_INLINE -#endif -int -mpz_fits_ushort_p (mpz_srcptr __gmp_z) __GMP_NOTHROW -{ - __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_USHRT_MAX); -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_get_ui) -#if ! defined (__GMP_FORCE_mpz_get_ui) -__GMP_EXTERN_INLINE -#endif -mpir_ui -mpz_get_ui (mpz_srcptr __gmp_z) __GMP_NOTHROW -{ - mp_ptr __gmp_p = __gmp_z->_mp_d; - mp_size_t __gmp_n = __gmp_z->_mp_size; - mp_limb_t __gmp_l = __gmp_p[0]; - /* This is a "#if" rather than a plain "if" so as to avoid gcc warnings - about "<< GMP_NUMB_BITS" exceeding the type size, and to avoid Borland - C++ 6.0 warnings about condition always true for something like - "__GMP_ULONG_MAX < GMP_NUMB_MASK". */ -#if GMP_NAIL_BITS == 0 || defined (_LONG_LONG_LIMB) - /* limb==long and no nails, or limb==longlong, one limb is enough */ - return (mpir_ui)(__gmp_n != 0 ? __gmp_l : 0); -#else - /* limb==long and nails, need two limbs when available */ - __gmp_n = __GMP_ABS (__gmp_n); - if (__gmp_n <= 1) - return (mpir_ui)(__gmp_n != 0 ? __gmp_l : 0); - else - return (mpir_ui)(__gmp_l + (__gmp_p[1] << GMP_NUMB_BITS)); -#endif -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_getlimbn) -#if ! defined (__GMP_FORCE_mpz_getlimbn) -__GMP_EXTERN_INLINE -#endif -mp_limb_t -mpz_getlimbn (mpz_srcptr __gmp_z, mp_size_t __gmp_n) __GMP_NOTHROW -{ - mp_limb_t __gmp_result = 0; - if (__GMP_LIKELY (__gmp_n >= 0 && __gmp_n < __GMP_ABS (__gmp_z->_mp_size))) - __gmp_result = __gmp_z->_mp_d[__gmp_n]; - return __gmp_result; -} -#endif - -#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpz_neg) -__GMP_EXTERN_INLINE void -mpz_neg (mpz_ptr __gmp_w, mpz_srcptr __gmp_u) -{ - if (__gmp_w != __gmp_u) - mpz_set (__gmp_w, __gmp_u); - __gmp_w->_mp_size = - __gmp_w->_mp_size; -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_perfect_square_p) -#if ! defined (__GMP_FORCE_mpz_perfect_square_p) -__GMP_EXTERN_INLINE -#endif -int -mpz_perfect_square_p (mpz_srcptr __gmp_a) -{ - mp_size_t __gmp_asize; - int __gmp_result; - - __gmp_asize = __gmp_a->_mp_size; - __gmp_result = (__gmp_asize >= 0); /* zero is a square, negatives are not */ - if (__GMP_LIKELY (__gmp_asize > 0)) - __gmp_result = mpn_perfect_square_p (__gmp_a->_mp_d, __gmp_asize); - return __gmp_result; -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_popcount) -#if ! defined (__GMP_FORCE_mpz_popcount) -__GMP_EXTERN_INLINE -#endif -mp_bitcnt_t -mpz_popcount (mpz_srcptr __gmp_u) __GMP_NOTHROW -{ - mp_size_t __gmp_usize; - mp_bitcnt_t __gmp_result; - - __gmp_usize = __gmp_u->_mp_size; - __gmp_result = (__gmp_usize < 0 ? __GMP_BITCNT_MAX : 0); - if (__GMP_LIKELY (__gmp_usize > 0)) - __gmp_result = mpn_popcount (__gmp_u->_mp_d, __gmp_usize); - return __gmp_result; -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_set_q) -#if ! defined (__GMP_FORCE_mpz_set_q) -__GMP_EXTERN_INLINE -#endif -void -mpz_set_q (mpz_ptr __gmp_w, mpq_srcptr __gmp_u) -{ - mpz_tdiv_q (__gmp_w, mpq_numref (__gmp_u), mpq_denref (__gmp_u)); -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_size) -#if ! defined (__GMP_FORCE_mpz_size) -__GMP_EXTERN_INLINE -#endif -size_t -mpz_size (mpz_srcptr __gmp_z) __GMP_NOTHROW -{ - return __GMP_ABS (__gmp_z->_mp_size); -} -#endif - - -/**************** mpq inlines ****************/ - -#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpq_abs) -__GMP_EXTERN_INLINE void -mpq_abs (mpq_ptr __gmp_w, mpq_srcptr __gmp_u) -{ - if (__gmp_w != __gmp_u) - mpq_set (__gmp_w, __gmp_u); - __gmp_w->_mp_num._mp_size = __GMP_ABS (__gmp_w->_mp_num._mp_size); -} -#endif - -#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpq_neg) -__GMP_EXTERN_INLINE void -mpq_neg (mpq_ptr __gmp_w, mpq_srcptr __gmp_u) -{ - if (__gmp_w != __gmp_u) - mpq_set (__gmp_w, __gmp_u); - __gmp_w->_mp_num._mp_size = - __gmp_w->_mp_num._mp_size; -} -#endif - - -/**************** mpn inlines ****************/ - -/* The comments with __GMPN_ADD_1 below apply here too. - - The test for FUNCTION returning 0 should predict well. If it's assumed - {yp,ysize} will usually have a random number of bits then the high limb - won't be full and a carry out will occur a good deal less than 50% of the - time. - - ysize==0 isn't a documented feature, but is used internally in a few - places. - - Producing cout last stops it using up a register during the main part of - the calculation, though gcc (as of 3.0) on an "if (mpn_add (...))" - doesn't seem able to move the true and false legs of the conditional up - to the two places cout is generated. */ - -#define __GMPN_AORS(cout, wp, xp, xsize, yp, ysize, FUNCTION, TEST) \ - do { \ - mp_size_t __gmp_i; \ - mp_limb_t __gmp_x; \ - \ - /* ASSERT ((ysize) >= 0); */ \ - /* ASSERT ((xsize) >= (ysize)); */ \ - /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, xp, xsize)); */ \ - /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, yp, ysize)); */ \ - \ - __gmp_i = (ysize); \ - if (__gmp_i != 0) \ - { \ - if (FUNCTION (wp, xp, yp, __gmp_i)) \ - { \ - do \ - { \ - if (__gmp_i >= (xsize)) \ - { \ - (cout) = 1; \ - goto __gmp_done; \ - } \ - __gmp_x = (xp)[__gmp_i]; \ - } \ - while (TEST); \ - } \ - } \ - if ((wp) != (xp)) \ - __GMPN_COPY_REST (wp, xp, xsize, __gmp_i); \ - (cout) = 0; \ - __gmp_done: \ - ; \ - } while (0) - -#define __GMPN_ADD(cout, wp, xp, xsize, yp, ysize) \ - __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_add_n, \ - (((wp)[__gmp_i++] = (__gmp_x + 1) & GMP_NUMB_MASK) == 0)) -#define __GMPN_SUB(cout, wp, xp, xsize, yp, ysize) \ - __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_sub_n, \ - (((wp)[__gmp_i++] = (__gmp_x - 1) & GMP_NUMB_MASK), __gmp_x == 0)) - - -/* The use of __gmp_i indexing is designed to ensure a compile time src==dst - remains nice and clear to the compiler, so that __GMPN_COPY_REST can - disappear, and the load/add/store gets a chance to become a - read-modify-write on CISC CPUs. - - Alternatives: - - Using a pair of pointers instead of indexing would be possible, but gcc - isn't able to recognise compile-time src==dst in that case, even when the - pointers are incremented more or less together. Other compilers would - very likely have similar difficulty. - - gcc could use "if (__builtin_constant_p(src==dst) && src==dst)" or - similar to detect a compile-time src==dst. This works nicely on gcc - 2.95.x, it's not good on gcc 3.0 where __builtin_constant_p(p==p) seems - to be always false, for a pointer p. But the current code form seems - good enough for src==dst anyway. - - gcc on x86 as usual doesn't give particularly good flags handling for the - carry/borrow detection. It's tempting to want some multi instruction asm - blocks to help it, and this was tried, but in truth there's only a few - instructions to save and any gain is all too easily lost by register - juggling setting up for the asm. */ - -#if GMP_NAIL_BITS == 0 -#define __GMPN_AORS_1(cout, dst, src, n, v, OP, CB) \ - do { \ - mp_size_t __gmp_i; \ - mp_limb_t __gmp_x, __gmp_r; \ - \ - /* ASSERT ((n) >= 1); */ \ - /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, n)); */ \ - \ - __gmp_x = (src)[0]; \ - __gmp_r = __gmp_x OP (v); \ - (dst)[0] = __gmp_r; \ - if (CB (__gmp_r, __gmp_x, (v))) \ - { \ - (cout) = 1; \ - for (__gmp_i = 1; __gmp_i < (n);) \ - { \ - __gmp_x = (src)[__gmp_i]; \ - __gmp_r = __gmp_x OP 1; \ - (dst)[__gmp_i] = __gmp_r; \ - ++__gmp_i; \ - if (!CB (__gmp_r, __gmp_x, 1)) \ - { \ - if ((src) != (dst)) \ - __GMPN_COPY_REST (dst, src, n, __gmp_i); \ - (cout) = 0; \ - break; \ - } \ - } \ - } \ - else \ - { \ - if ((src) != (dst)) \ - __GMPN_COPY_REST (dst, src, n, 1); \ - (cout) = 0; \ - } \ - } while (0) -#endif - -#if GMP_NAIL_BITS >= 1 -#define __GMPN_AORS_1(cout, dst, src, n, v, OP, CB) \ - do { \ - mp_size_t __gmp_i; \ - mp_limb_t __gmp_x, __gmp_r; \ - \ - /* ASSERT ((n) >= 1); */ \ - /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, n)); */ \ - \ - __gmp_x = (src)[0]; \ - __gmp_r = __gmp_x OP (v); \ - (dst)[0] = __gmp_r & GMP_NUMB_MASK; \ - if (__gmp_r >> GMP_NUMB_BITS != 0) \ - { \ - (cout) = 1; \ - for (__gmp_i = 1; __gmp_i < (n);) \ - { \ - __gmp_x = (src)[__gmp_i]; \ - __gmp_r = __gmp_x OP 1; \ - (dst)[__gmp_i] = __gmp_r & GMP_NUMB_MASK; \ - ++__gmp_i; \ - if (__gmp_r >> GMP_NUMB_BITS == 0) \ - { \ - if ((src) != (dst)) \ - __GMPN_COPY_REST (dst, src, n, __gmp_i); \ - (cout) = 0; \ - break; \ - } \ - } \ - } \ - else \ - { \ - if ((src) != (dst)) \ - __GMPN_COPY_REST (dst, src, n, 1); \ - (cout) = 0; \ - } \ - } while (0) -#endif - -#define __GMPN_ADDCB(r,x,y) ((r) < (y)) -#define __GMPN_SUBCB(r,x,y) ((x) < (y)) - -#define __GMPN_ADD_1(cout, dst, src, n, v) \ - __GMPN_AORS_1(cout, dst, src, n, v, +, __GMPN_ADDCB) -#define __GMPN_SUB_1(cout, dst, src, n, v) \ - __GMPN_AORS_1(cout, dst, src, n, v, -, __GMPN_SUBCB) - - -/* Compare {xp,size} and {yp,size}, setting "result" to positive, zero or - negative. size==0 is allowed. On random data usually only one limb will - need to be examined to get a result, so it's worth having it inline. */ -#define __GMPN_CMP(result, xp, yp, size) \ - do { \ - mp_size_t __gmp_i; \ - mp_limb_t __gmp_x, __gmp_y; \ - \ - /* ASSERT ((size) >= 0); */ \ - \ - (result) = 0; \ - __gmp_i = (size); \ - while (--__gmp_i >= 0) \ - { \ - __gmp_x = (xp)[__gmp_i]; \ - __gmp_y = (yp)[__gmp_i]; \ - if (__gmp_x != __gmp_y) \ - { \ - /* Cannot use __gmp_x - __gmp_y, may overflow an "int" */ \ - (result) = (__gmp_x > __gmp_y ? 1 : -1); \ - break; \ - } \ - } \ - } while (0) - - -#if defined (__GMPN_COPY) && ! defined (__GMPN_COPY_REST) -#define __GMPN_COPY_REST(dst, src, size, start) \ - do { \ - /* ASSERT ((start) >= 0); */ \ - /* ASSERT ((start) <= (size)); */ \ - __GMPN_COPY ((dst)+(start), (src)+(start), (size)-(start)); \ - } while (0) -#endif - -/* Copy {src,size} to {dst,size}, starting at "start". This is designed to - keep the indexing dst[j] and src[j] nice and simple for __GMPN_ADD_1, - __GMPN_ADD, etc. */ -#if ! defined (__GMPN_COPY_REST) -#define __GMPN_COPY_REST(dst, src, size, start) \ - do { \ - mp_size_t __gmp_j; \ - /* ASSERT ((size) >= 0); */ \ - /* ASSERT ((start) >= 0); */ \ - /* ASSERT ((start) <= (size)); */ \ - /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \ - for (__gmp_j = (start); __gmp_j < (size); __gmp_j++) \ - (dst)[__gmp_j] = (src)[__gmp_j]; \ - } while (0) -#endif - -/* Enhancement: Use some of the smarter code from gmp-impl.h. Maybe use - mpn_copyi if there's a native version, and if we don't mind demanding - binary compatibility for it (on targets which use it). */ - -#if ! defined (__GMPN_COPY) -#define __GMPN_COPY(dst, src, size) __GMPN_COPY_REST (dst, src, size, 0) -#endif - - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_add) -#if ! defined (__GMP_FORCE_mpn_add) -__GMP_EXTERN_INLINE -#endif -mp_limb_t -mpn_add (mp_ptr __gmp_wp, mp_srcptr __gmp_xp, mp_size_t __gmp_xsize, mp_srcptr __gmp_yp, mp_size_t __gmp_ysize) -{ - mp_limb_t __gmp_c; - __GMPN_ADD (__gmp_c, __gmp_wp, __gmp_xp, __gmp_xsize, __gmp_yp, __gmp_ysize); - return __gmp_c; -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_add_1) -#if ! defined (__GMP_FORCE_mpn_add_1) -__GMP_EXTERN_INLINE -#endif -mp_limb_t -mpn_add_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_t __gmp_n) __GMP_NOTHROW -{ - mp_limb_t __gmp_c; - __GMPN_ADD_1 (__gmp_c, __gmp_dst, __gmp_src, __gmp_size, __gmp_n); - return __gmp_c; -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_cmp) -#if ! defined (__GMP_FORCE_mpn_cmp) -__GMP_EXTERN_INLINE -#endif -int -mpn_cmp (mp_srcptr __gmp_xp, mp_srcptr __gmp_yp, mp_size_t __gmp_size) __GMP_NOTHROW -{ - int __gmp_result; - __GMPN_CMP (__gmp_result, __gmp_xp, __gmp_yp, __gmp_size); - return __gmp_result; -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_sub) -#if ! defined (__GMP_FORCE_mpn_sub) -__GMP_EXTERN_INLINE -#endif -mp_limb_t -mpn_sub (mp_ptr __gmp_wp, mp_srcptr __gmp_xp, mp_size_t __gmp_xsize, mp_srcptr __gmp_yp, mp_size_t __gmp_ysize) -{ - mp_limb_t __gmp_c; - __GMPN_SUB (__gmp_c, __gmp_wp, __gmp_xp, __gmp_xsize, __gmp_yp, __gmp_ysize); - return __gmp_c; -} -#endif - -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_sub_1) -#if ! defined (__GMP_FORCE_mpn_sub_1) -__GMP_EXTERN_INLINE -#endif -mp_limb_t -mpn_sub_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_t __gmp_n) __GMP_NOTHROW -{ - mp_limb_t __gmp_c; - __GMPN_SUB_1 (__gmp_c, __gmp_dst, __gmp_src, __gmp_size, __gmp_n); - return __gmp_c; -} -#endif - -#if defined (__cplusplus) -} -#endif - - -/* Allow faster testing for negative, zero, and positive. */ -#define mpz_sgn(Z) ((Z)->_mp_size < 0 ? -1 : (Z)->_mp_size > 0) -#define mpf_sgn(F) ((F)->_mp_size < 0 ? -1 : (F)->_mp_size > 0) -#define mpq_sgn(Q) ((Q)->_mp_num._mp_size < 0 ? -1 : (Q)->_mp_num._mp_size > 0) - -/* When using GCC, optimize certain common comparisons. */ -#if defined (__GNUC__) -#define mpz_cmp_ui(Z,UI) \ - (__builtin_constant_p (UI) && (UI) == 0 \ - ? mpz_sgn (Z) : _mpz_cmp_ui (Z,UI)) -#define mpz_cmp_si(Z,SI) \ - (__builtin_constant_p (SI) && (SI) == 0 ? mpz_sgn (Z) \ - : __builtin_constant_p (SI) && (SI) > 0 \ - ? _mpz_cmp_ui (Z, __GMP_CAST (unsigned long int, SI)) \ - : _mpz_cmp_si (Z,SI)) -#define mpq_cmp_ui(Q,NUI,DUI) \ - (__builtin_constant_p (NUI) && (NUI) == 0 \ - ? mpq_sgn (Q) : _mpq_cmp_ui (Q,NUI,DUI)) -#define mpq_cmp_si(q,n,d) \ - (__builtin_constant_p ((n) >= 0) && (n) >= 0 \ - ? mpq_cmp_ui (q, __GMP_CAST (unsigned long, n), d) \ - : _mpq_cmp_si (q, n, d)) -#else -#define mpz_cmp_ui(Z,UI) _mpz_cmp_ui (Z,UI) -#define mpz_cmp_si(Z,UI) _mpz_cmp_si (Z,UI) -#define mpq_cmp_ui(Q,NUI,DUI) _mpq_cmp_ui (Q,NUI,DUI) -#define mpq_cmp_si(q,n,d) _mpq_cmp_si(q,n,d) -#endif - - -/* Using "&" rather than "&&" means these can come out branch-free. Every - mpz_t has at least one limb allocated, so fetching the low limb is always - allowed. */ -#define mpz_odd_p(z) (((z)->_mp_size != 0) & __GMP_CAST (int, (z)->_mp_d[0])) -#define mpz_even_p(z) (! mpz_odd_p (z)) - - -/**************** C++ routines ****************/ - -#ifdef __cplusplus -__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpz_srcptr); -__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpq_srcptr); -__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpf_srcptr); -__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpz_ptr); -__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpq_ptr); -__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpf_ptr); -#endif - -/* Source-level compatibility with GMP 1. */ -#define mpz_mdiv mpz_fdiv_q -#define mpz_mdivmod mpz_fdiv_qr -#define mpz_mmod mpz_fdiv_r -#define mpz_mdiv_ui mpz_fdiv_q_ui -#define mpz_mdivmod_ui(q,r,n,d) \ - (((r) == 0) ? mpz_fdiv_q_ui (q,n,d) : mpz_fdiv_qr_ui (q,r,n,d)) -#define mpz_mmod_ui(r,n,d) \ - (((r) == 0) ? mpz_fdiv_ui (n,d) : mpz_fdiv_r_ui (r,n,d)) - -#define gmp_randinit(x,y,z) gmp_randinit_lc_2exp_size(x,z) - -typedef __mpz_struct MP_INT; /* gmp 1 source compatibility */ -typedef __mpq_struct MP_RAT; /* gmp 1 source compatibility */ - -#define mpz_div mpz_fdiv_q -#define mpz_divmod mpz_fdiv_qr -#define mpz_div_ui mpz_fdiv_q_ui -#define mpz_divmod_ui mpz_fdiv_qr_ui -#define mpz_div_2exp mpz_fdiv_q_2exp -#define mpz_mod_2exp mpz_fdiv_r_2exp - -enum -{ - GMP_ERROR_NONE = 0, - GMP_ERROR_UNSUPPORTED_ARGUMENT = 1, - GMP_ERROR_DIVISION_BY_ZERO = 2, - GMP_ERROR_SQRT_OF_NEGATIVE = 4, - GMP_ERROR_INVALID_ARGUMENT = 8 -}; - -/* Major version number is the value of __GNU_MP__ too, above and in mp.h. */ -#define __GNU_MP_VERSION 5 -#define __GNU_MP_VERSION_MINOR 1 -#define __GNU_MP_VERSION_PATCHLEVEL 3 -#define GMP_VERSION "5.1.3" -#define __GNU_MP_RELEASE (__GNU_MP_VERSION * 10000 + __GNU_MP_VERSION_MINOR * 100 + __GNU_MP_VERSION_PATCHLEVEL) - -#define __MPIR_VERSION 2 -#define __MPIR_VERSION_MINOR 7 -#define __MPIR_VERSION_PATCHLEVEL 0 -#if defined( _MSC_VER ) -#define _MSC_MPIR_VERSION "2.7.0" -#endif -#define __MPIR_RELEASE (__MPIR_VERSION * 10000 + __MPIR_VERSION_MINOR * 100 + __MPIR_VERSION_PATCHLEVEL) - -/* These are for programs like MPFR to use the same CC and CFLAGS as MPIR */ - -#if ! defined (__GMP_WITHIN_CONFIGURE) -/* No __GMP_CC here as --enable-gmpcompat option not selected */ - -#define __MPIR_CC "gcc -std=gnu99" -#define __MPIR_CFLAGS "-m64 -O2 -march=corei7-avx -mtune=corei7-avx" -#endif - -#define __GMP_H__ -#endif /* __GMP_H__ */ diff --git a/thirdparty/linux/mpir_patch/mpirxx.h b/thirdparty/linux/mpir_patch/mpirxx.h deleted file mode 100644 index eaaea2a87c..0000000000 --- a/thirdparty/linux/mpir_patch/mpirxx.h +++ /dev/null @@ -1,3611 +0,0 @@ -/* gmpxx.h -- C++ class wrapper for GMP types. -*- C++ -*- - -Copyright 2001, 2002, 2003, 2006, 2008, 2011, 2012 Free Software Foundation, -Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - -/* the C++ compiler must implement the following features: - - member templates - - partial specialization of templates - - namespace support - for g++, this means version 2.91 or higher - for other compilers, I don't know */ -#ifdef __GNUC__ -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91) -#error mpirxx.h requires g++ version 2.91 (egcs 1.1.2) or higher -#endif -#endif -#ifndef __GMP_PLUSPLUS__ -#define __GMP_PLUSPLUS__ - -#include /* for size_t */ - -#include - -#include /* for strlen */ -#include /* numeric_limits */ -#include -#include -#include -#include -#include /* swap */ -#include "mpir.h" - -#if defined( _MSC_VER ) && _MSC_VER >= 1700 -# define MSC_CXX_11 1 -#elif defined( __INTEL_COMPILER ) && __INTEL_COMPILER > 1310 -# define MSC_CXX_11 1 -#elif defined( __ICL ) && __ICL > 1310 -# define MSC_CXX_11 1 -#endif - -#if defined(LLONG_MAX) && defined(LONG_MAX) -#if LLONG_MAX != LONG_MAX -#define MPIRXX_HAVE_LLONG 1 -#endif -#endif - -/* check availability of stdint.h -- note we do not include this ourselves */ -#if defined(INTMAX_MAX) -# if defined(LONG_MAX) && defined(INTMAX_MAX) && INTMAX_MAX != LONG_MAX && (INTMAX_MAX != LLONG_MAX || !defined(MPIRXX_HAVE_LLONG)) -# define MPIRXX_INTMAX_T 1 -# endif -# if defined(ULONG_MAX) && defined(UINTMAX_MAX) && UINTMAX_MAX != ULONG_MAX && (UINTMAX_MAX != ULLONG_MAX || !defined(MPIRXX_HAVE_LLONG)) -# define MPIRXX_UINTMAX_T 1 -# endif -#endif - -// wrapper for gcc's __builtin_constant_p -// __builtin_constant_p has been in gcc since forever, -// but g++-3.4 miscompiles it. -#if __GMP_GNUC_PREREQ(4, 2) -#define __GMPXX_CONSTANT(X) __builtin_constant_p(X) -#else -#define __GMPXX_CONSTANT(X) false -#endif - -// Use C++11 features -#ifndef __GMPXX_USE_CXX11 -#if __cplusplus >= 201103L -#define __GMPXX_USE_CXX11 1 -#else -#define __GMPXX_USE_CXX11 0 -#endif -#endif - -#if __GMPXX_USE_CXX11 -#define __GMPXX_NOEXCEPT noexcept -#include // for common_type -#else -#define __GMPXX_NOEXCEPT -#endif - -// Max allocations for plain types when converted to mpz_t -#define __GMPZ_DBL_LIMBS (2 + DBL_MAX_EXP / GMP_NUMB_BITS) - -#if GMP_NAIL_BITS != 0 && ! defined _LONG_LONG_LIMB -#define __GMPZ_ULI_LIMBS 2 -#else -#define __GMPZ_ULI_LIMBS 1 -#endif - -inline void __mpz_set_ui_safe(mpz_ptr p, mpir_ui l) -{ - p->_mp_size = (l != 0); - p->_mp_d[0] = l & GMP_NUMB_MASK; -#if __GMPZ_ULI_LIMBS > 1 - l >>= GMP_NUMB_BITS; - p->_mp_d[1] = l; - p->_mp_size += (l != 0); -#endif -} - -inline void __mpz_set_si_safe(mpz_ptr p, mpir_si l) -{ - if(l < 0) - { - __mpz_set_ui_safe(p, -static_cast(l)); - mpz_neg(p, p); - } - else - __mpz_set_ui_safe(p, l); - // Note: we know the high bit of l is 0 so we could do slightly better -} - -// Fake temporary variables -#define __GMPXX_TMPZ_UI \ - mpz_t temp; \ - mp_limb_t limbs[__GMPZ_ULI_LIMBS]; \ - temp->_mp_d = limbs; \ - __mpz_set_ui_safe (temp, l) -#define __GMPXX_TMPZ_SI \ - mpz_t temp; \ - mp_limb_t limbs[__GMPZ_ULI_LIMBS]; \ - temp->_mp_d = limbs; \ - __mpz_set_si_safe (temp, l) -#define __GMPXX_TMPZ_D \ - mpz_t temp; \ - mp_limb_t limbs[__GMPZ_DBL_LIMBS]; \ - temp->_mp_d = limbs; \ - temp->_mp_alloc = __GMPZ_DBL_LIMBS; \ - mpz_set_d (temp, d) - -#define __GMPXX_TMPQ_UI \ - mpq_t temp; \ - mp_limb_t limbs[__GMPZ_ULI_LIMBS+1]; \ - mpq_numref(temp)->_mp_d = limbs; \ - __mpz_set_ui_safe (mpq_numref(temp), l); \ - mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS; \ - mpq_denref(temp)->_mp_size = 1; \ - mpq_denref(temp)->_mp_d[0] = 1 -#define __GMPXX_TMPQ_SI \ - mpq_t temp; \ - mp_limb_t limbs[__GMPZ_ULI_LIMBS+1]; \ - mpq_numref(temp)->_mp_d = limbs; \ - __mpz_set_si_safe (mpq_numref(temp), l); \ - mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS; \ - mpq_denref(temp)->_mp_size = 1; \ - mpq_denref(temp)->_mp_d[0] = 1 - -inline mpir_ui __gmpxx_abs_ui (mpir_si l) -{ - return l >= 0 ? static_cast(l) - : -static_cast(l); -} - -/**************** Function objects ****************/ -/* Any evaluation of a __gmp_expr ends up calling one of these functions - all intermediate functions being inline, the evaluation should optimize - to a direct call to the relevant function, thus yielding no overhead - over the C interface. */ - -struct __gmp_unary_plus -{ - static void eval(mpz_ptr z, mpz_srcptr w) { mpz_set(z, w); } - static void eval(mpq_ptr q, mpq_srcptr r) { mpq_set(q, r); } - static void eval(mpf_ptr f, mpf_srcptr g) { mpf_set(f, g); } -}; - -struct __gmp_unary_minus -{ - static void eval(mpz_ptr z, mpz_srcptr w) { mpz_neg(z, w); } - static void eval(mpq_ptr q, mpq_srcptr r) { mpq_neg(q, r); } - static void eval(mpf_ptr f, mpf_srcptr g) { mpf_neg(f, g); } -}; - -struct __gmp_unary_com -{ - static void eval(mpz_ptr z, mpz_srcptr w) { mpz_com(z, w); } -}; - -struct __gmp_binary_plus -{ - static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v) - { mpz_add(z, w, v); } - - static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l) - { - // Ideally, those checks should happen earlier so that the tree - // generated for a+0+b would just be sum(a,b). - if (__GMPXX_CONSTANT(l) && l == 0) - { - if (z != w) mpz_set(z, w); - } - else - mpz_add_ui(z, w, l); - } - static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l) - { - if (l >= 0) - eval(z, w, static_cast(l)); - else - mpz_sub_ui(z, w, -static_cast(l)); - } - static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, double d) - { __GMPXX_TMPZ_D; mpz_add (z, w, temp); } - static void eval(mpz_ptr z, double d, mpz_srcptr w) - { eval(z, w, d); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s) - { mpq_add(q, r, s); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpir_ui l) - { - if (__GMPXX_CONSTANT(l) && l == 0) - { - if (q != r) mpq_set(q, r); - } - else - { - if (q == r) - mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); - else - { - mpz_mul_ui(mpq_numref(q), mpq_denref(r), l); - mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r)); - mpz_set(mpq_denref(q), mpq_denref(r)); - } - } - } - static void eval(mpq_ptr q, mpir_ui l, mpq_srcptr r) - { eval(q, r, l); } - static inline void eval(mpq_ptr q, mpq_srcptr r, mpir_si l); - // defined after __gmp_binary_minus - static void eval(mpq_ptr q, mpir_si l, mpq_srcptr r) - { eval(q, r, l); } - static void eval(mpq_ptr q, mpq_srcptr r, double d) - { - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - mpq_add(q, r, temp); - mpq_clear(temp); - } - static void eval(mpq_ptr q, double d, mpq_srcptr r) - { eval(q, r, d); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z) - { - if (q == r) - mpz_addmul(mpq_numref(q), mpq_denref(q), z); - else - { - mpz_mul(mpq_numref(q), mpq_denref(r), z); - mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r)); - mpz_set(mpq_denref(q), mpq_denref(r)); - } - } - static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r) - { eval(q, r, z); } - - static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h) - { mpf_add(f, g, h); } - - static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l) - { mpf_add_ui(f, g, l); } - static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g) - { mpf_add_ui(f, g, l); } - static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l) - { - if (l >= 0) - mpf_add_ui(f, g, l); - else - mpf_sub_ui(f, g, -static_cast(l)); - } - static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g) - { eval(f, g, l); } - static void eval(mpf_ptr f, mpf_srcptr g, double d) - { - mpf_t temp; - mpf_init2(temp, 8*sizeof(double)); - mpf_set_d(temp, d); - mpf_add(f, g, temp); - mpf_clear(temp); - } - static void eval(mpf_ptr f, double d, mpf_srcptr g) - { eval(f, g, d); } -}; - -struct __gmp_binary_minus -{ - static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v) - { mpz_sub(z, w, v); } - - static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l) - { - if (__GMPXX_CONSTANT(l) && l == 0) - { - if (z != w) mpz_set(z, w); - } - else - mpz_sub_ui(z, w, l); - } - static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w) - { - if (__GMPXX_CONSTANT(l) && l == 0) - { - mpz_neg(z, w); - } - else - mpz_ui_sub(z, l, w); - } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l) - { - if (l >= 0) - eval(z, w, static_cast(l)); - else - mpz_add_ui(z, w, -static_cast(l)); - } - static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w) - { - if (l >= 0) - eval(z, static_cast(l), w); - else - { - mpz_add_ui(z, w, -static_cast(l)); - mpz_neg(z, z); - } - } - static void eval(mpz_ptr z, mpz_srcptr w, double d) - { __GMPXX_TMPZ_D; mpz_sub (z, w, temp); } - static void eval(mpz_ptr z, double d, mpz_srcptr w) - { __GMPXX_TMPZ_D; mpz_sub (z, temp, w); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s) - { mpq_sub(q, r, s); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpir_ui l) - { - if (__GMPXX_CONSTANT(l) && l == 0) - { - if (q != r) mpq_set(q, r); - } - else - { - if (q == r) - mpz_submul_ui(mpq_numref(q), mpq_denref(q), l); - else - { - mpz_mul_ui(mpq_numref(q), mpq_denref(r), l); - mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q)); - mpz_set(mpq_denref(q), mpq_denref(r)); - } - } - } - static void eval(mpq_ptr q, mpir_ui l, mpq_srcptr r) - { eval(q, r, l); mpq_neg(q, q); } - static void eval(mpq_ptr q, mpq_srcptr r, mpir_si l) - { - if (l >= 0) - eval(q, r, static_cast(l)); - else - __gmp_binary_plus::eval(q, r, -static_cast(l)); - } - static void eval(mpq_ptr q, mpir_si l, mpq_srcptr r) - { eval(q, r, l); mpq_neg(q, q); } - static void eval(mpq_ptr q, mpq_srcptr r, double d) - { - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - mpq_sub(q, r, temp); - mpq_clear(temp); - } - static void eval(mpq_ptr q, double d, mpq_srcptr r) - { - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - mpq_sub(q, temp, r); - mpq_clear(temp); - } - - static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z) - { - if (q == r) - mpz_submul(mpq_numref(q), mpq_denref(q), z); - else - { - mpz_mul(mpq_numref(q), mpq_denref(r), z); - mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q)); - mpz_set(mpq_denref(q), mpq_denref(r)); - } - } - static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r) - { eval(q, r, z); mpq_neg(q, q); } - - static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h) - { mpf_sub(f, g, h); } - - static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l) - { mpf_sub_ui(f, g, l); } - static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g) - { mpf_ui_sub(f, l, g); } - static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l) - { - if (l >= 0) - mpf_sub_ui(f, g, l); - else - mpf_add_ui(f, g, -static_cast(l)); - } - static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g) - { - if (l >= 0) - mpf_sub_ui(f, g, l); - else - mpf_add_ui(f, g, -static_cast(l)); - mpf_neg(f, f); - } - static void eval(mpf_ptr f, mpf_srcptr g, double d) - { - mpf_t temp; - mpf_init2(temp, 8*sizeof(double)); - mpf_set_d(temp, d); - mpf_sub(f, g, temp); - mpf_clear(temp); - } - static void eval(mpf_ptr f, double d, mpf_srcptr g) - { - mpf_t temp; - mpf_init2(temp, 8*sizeof(double)); - mpf_set_d(temp, d); - mpf_sub(f, temp, g); - mpf_clear(temp); - } -}; - -// defined here so it can reference __gmp_binary_minus -inline void -__gmp_binary_plus::eval(mpq_ptr q, mpq_srcptr r, mpir_si l) -{ - if (l >= 0) - eval(q, r, static_cast(l)); - else - __gmp_binary_minus::eval(q, r, -static_cast(l)); -} - -struct __gmp_binary_lshift -{ - static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l) - { - if (__GMPXX_CONSTANT(l) && (l == 0)) - { - if (z != w) mpz_set(z, w); - } - else - mpz_mul_2exp(z, w, l); - } - static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l) - { - if (__GMPXX_CONSTANT(l) && (l == 0)) - { - if (q != r) mpq_set(q, r); - } - else - mpq_mul_2exp(q, r, l); - } - static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l) - { mpf_mul_2exp(f, g, l); } -}; - -struct __gmp_binary_rshift -{ - static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l) - { - if (__GMPXX_CONSTANT(l) && (l == 0)) - { - if (z != w) mpz_set(z, w); - } - else - mpz_fdiv_q_2exp(z, w, l); - } - static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l) - { - if (__GMPXX_CONSTANT(l) && (l == 0)) - { - if (q != r) mpq_set(q, r); - } - else - mpq_div_2exp(q, r, l); - } - static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l) - { mpf_div_2exp(f, g, l); } -}; - -struct __gmp_binary_multiplies -{ - static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v) - { mpz_mul(z, w, v); } - - static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l) - { -// gcc-3.3 doesn't have __builtin_ctzl. Don't bother optimizing for old gcc. -#if __GMP_GNUC_PREREQ(3, 4) - if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0) - { - if (l == 0) - { - z->_mp_size = 0; - } - else - { - __gmp_binary_lshift::eval(z, w, __builtin_ctzl(l)); - } - } - else -#endif - mpz_mul_ui(z, w, l); - } - static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l) - { - if (__GMPXX_CONSTANT(l)) - { - if (l >= 0) - eval(z, w, static_cast(l)); - else - { - eval(z, w, -static_cast(l)); - mpz_neg(z, z); - } - } - else - mpz_mul_si (z, w, l); - } - static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, double d) - { __GMPXX_TMPZ_D; mpz_mul (z, w, temp); } - static void eval(mpz_ptr z, double d, mpz_srcptr w) - { eval(z, w, d); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s) - { mpq_mul(q, r, s); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpir_ui l) - { -#if __GMP_GNUC_PREREQ(3, 4) - if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0) - { - if (l == 0) - { - mpq_set_ui(q, 0, 1); - } - else - { - __gmp_binary_lshift::eval(q, r, __builtin_ctzl(l)); - } - } - else -#endif - { - __GMPXX_TMPQ_UI; - mpq_mul (q, r, temp); - } - } - static void eval(mpq_ptr q, mpir_ui l, mpq_srcptr r) - { eval(q, r, l); } - static void eval(mpq_ptr q, mpq_srcptr r, mpir_si l) - { - if (__GMPXX_CONSTANT(l)) - { - if (l >= 0) - eval(q, r, static_cast(l)); - else - { - eval(q, r, -static_cast(l)); - mpq_neg(q, q); - } - } - else - { - __GMPXX_TMPQ_SI; - mpq_mul (q, r, temp); - } - } - static void eval(mpq_ptr q, mpir_si l, mpq_srcptr r) - { eval(q, r, l); } - static void eval(mpq_ptr q, mpq_srcptr r, double d) - { - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - mpq_mul(q, r, temp); - mpq_clear(temp); - } - static void eval(mpq_ptr q, double d, mpq_srcptr r) - { eval(q, r, d); } - - static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h) - { mpf_mul(f, g, h); } - - static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l) - { mpf_mul_ui(f, g, l); } - static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g) - { mpf_mul_ui(f, g, l); } - static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l) - { - if (l >= 0) - mpf_mul_ui(f, g, l); - else - { - mpf_mul_ui(f, g, -static_cast(l)); - mpf_neg(f, f); - } - } - static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g) - { eval(f, g, l); } - static void eval(mpf_ptr f, mpf_srcptr g, double d) - { - mpf_t temp; - mpf_init2(temp, 8*sizeof(double)); - mpf_set_d(temp, d); - mpf_mul(f, g, temp); - mpf_clear(temp); - } - static void eval(mpf_ptr f, double d, mpf_srcptr g) - { eval(f, g, d); } -}; - -struct __gmp_binary_divides -{ - static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v) - { mpz_tdiv_q(z, w, v); } - - static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l) - { -#if __GMP_GNUC_PREREQ(3, 4) - // Don't optimize division by 0... - if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0) - { - if (l == 1) - { - if (z != w) mpz_set(z, w); - } - else - mpz_tdiv_q_2exp(z, w, __builtin_ctzl(l)); - // warning: do not use rshift (fdiv) - } - else -#endif - mpz_tdiv_q_ui(z, w, l); - } - static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w) - { - if (mpz_sgn(w) >= 0) - { - if (mpz_fits_ui_p(w)) - mpz_set_ui(z, l / mpz_get_ui(w)); - else - mpz_set_ui(z, 0); - } - else - { - mpz_neg(z, w); - if (mpz_fits_ui_p(z)) - { - mpz_set_ui(z, l / mpz_get_ui(z)); - mpz_neg(z, z); - } - else - mpz_set_ui(z, 0); - } - } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l) - { - if (l >= 0) - eval(z, w, static_cast(l)); - else - { - eval(z, w, -static_cast(l)); - mpz_neg(z, z); - } - } - static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w) - { - if (mpz_fits_si_p(w)) - mpz_set_si(z, l / mpz_get_si(w)); - else - { - /* if w is bigger than a long then the quotient must be zero, unless - l==LONG_MIN and w==-LONG_MIN in which case the quotient is -1 */ - mpz_set_si (z, (mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? -1 : 0)); - } - } - static void eval(mpz_ptr z, mpz_srcptr w, double d) - { __GMPXX_TMPZ_D; mpz_tdiv_q (z, w, temp); } - static void eval(mpz_ptr z, double d, mpz_srcptr w) - { __GMPXX_TMPZ_D; mpz_tdiv_q (z, temp, w); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s) - { mpq_div(q, r, s); } - - static void eval(mpq_ptr q, mpq_srcptr r, mpir_ui l) - { -#if __GMP_GNUC_PREREQ(3, 4) - if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0) - __gmp_binary_rshift::eval(q, r, __builtin_ctzl(l)); - else -#endif - { - __GMPXX_TMPQ_UI; - mpq_div (q, r, temp); - } - } - static void eval(mpq_ptr q, mpir_ui l, mpq_srcptr r) - { __GMPXX_TMPQ_UI; mpq_div (q, temp, r); } - static void eval(mpq_ptr q, mpq_srcptr r, mpir_si l) - { - if (__GMPXX_CONSTANT(l)) - { - if (l >= 0) - eval(q, r, static_cast(l)); - else - { - eval(q, r, -static_cast(l)); - mpq_neg(q, q); - } - } - else - { - __GMPXX_TMPQ_SI; - mpq_div (q, r, temp); - } - } - static void eval(mpq_ptr q, mpir_si l, mpq_srcptr r) - { __GMPXX_TMPQ_SI; mpq_div (q, temp, r); } - static void eval(mpq_ptr q, mpq_srcptr r, double d) - { - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - mpq_div(q, r, temp); - mpq_clear(temp); - } - static void eval(mpq_ptr q, double d, mpq_srcptr r) - { - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - mpq_div(q, temp, r); - mpq_clear(temp); - } - - static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h) - { mpf_div(f, g, h); } - - static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l) - { mpf_div_ui(f, g, l); } - static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g) - { mpf_ui_div(f, l, g); } - static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l) - { - if (l >= 0) - mpf_div_ui(f, g, l); - else - { - mpf_div_ui(f, g, -static_cast(l)); - mpf_neg(f, f); - } - } - static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g) - { - if (l >= 0) - mpf_ui_div(f, l, g); - else - { - mpf_ui_div(f, -static_cast(l), g); - mpf_neg(f, f); - } - } - static void eval(mpf_ptr f, mpf_srcptr g, double d) - { - mpf_t temp; - mpf_init2(temp, 8*sizeof(double)); - mpf_set_d(temp, d); - mpf_div(f, g, temp); - mpf_clear(temp); - } - static void eval(mpf_ptr f, double d, mpf_srcptr g) - { - mpf_t temp; - mpf_init2(temp, 8*sizeof(double)); - mpf_set_d(temp, d); - mpf_div(f, temp, g); - mpf_clear(temp); - } -}; - -struct __gmp_binary_modulus -{ - static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v) - { mpz_tdiv_r(z, w, v); } - - static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l) - { mpz_tdiv_r_ui(z, w, l); } - static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w) - { - if (mpz_sgn(w) >= 0) - { - if (mpz_fits_ui_p(w)) - mpz_set_ui(z, l % mpz_get_ui(w)); - else - mpz_set_ui(z, l); - } - else - { - mpz_neg(z, w); - if (mpz_fits_ui_p(z)) - mpz_set_ui(z, l % mpz_get_ui(z)); - else - mpz_set_ui(z, l); - } - } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l) - { - mpz_tdiv_r_ui (z, w, (l >= 0 ? l : -l)); - } - static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w) - { - if (mpz_fits_si_p(w)) - mpz_set_si(z, l % mpz_get_si(w)); - else - { - /* if w is bigger than a long then the remainder is l unchanged, - unless l==LONG_MIN and w==-LONG_MIN in which case it's 0 */ - mpz_set_si (z, mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? 0 : l); - } - } - static void eval(mpz_ptr z, mpz_srcptr w, double d) - { __GMPXX_TMPZ_D; mpz_tdiv_r (z, w, temp); } - static void eval(mpz_ptr z, double d, mpz_srcptr w) - { __GMPXX_TMPZ_D; mpz_tdiv_r (z, temp, w); } -}; - -struct __gmp_binary_and -{ - static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v) - { mpz_and(z, w, v); } - - static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l) - { __GMPXX_TMPZ_UI; mpz_and (z, w, temp); } - static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l) - { __GMPXX_TMPZ_SI; mpz_and (z, w, temp); } - static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, double d) - { __GMPXX_TMPZ_D; mpz_and (z, w, temp); } - static void eval(mpz_ptr z, double d, mpz_srcptr w) - { eval(z, w, d); } -}; - -struct __gmp_binary_ior -{ - static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v) - { mpz_ior(z, w, v); } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l) - { __GMPXX_TMPZ_UI; mpz_ior (z, w, temp); } - static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l) - { __GMPXX_TMPZ_SI; mpz_ior (z, w, temp); } - static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, double d) - { __GMPXX_TMPZ_D; mpz_ior (z, w, temp); } - static void eval(mpz_ptr z, double d, mpz_srcptr w) - { eval(z, w, d); } -}; - -struct __gmp_binary_xor -{ - static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v) - { mpz_xor(z, w, v); } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l) - { __GMPXX_TMPZ_UI; mpz_xor (z, w, temp); } - static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l) - { __GMPXX_TMPZ_SI; mpz_xor (z, w, temp); } - static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w) - { eval(z, w, l); } - static void eval(mpz_ptr z, mpz_srcptr w, double d) - { __GMPXX_TMPZ_D; mpz_xor (z, w, temp); } - static void eval(mpz_ptr z, double d, mpz_srcptr w) - { eval(z, w, d); } -}; - -struct __gmp_binary_equal -{ - static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) == 0; } - - static bool eval(mpz_srcptr z, mpir_ui l) - { return mpz_cmp_ui(z, l) == 0; } - static bool eval(mpir_ui l, mpz_srcptr z) - { return mpz_cmp_ui(z, l) == 0; } - static bool eval(mpz_srcptr z, mpir_si l) - { return mpz_cmp_si(z, l) == 0; } - static bool eval(mpir_si l, mpz_srcptr z) - { return mpz_cmp_si(z, l) == 0; } - static bool eval(mpz_srcptr z, double d) - { return mpz_cmp_d(z, d) == 0; } - static bool eval(double d, mpz_srcptr z) - { return mpz_cmp_d(z, d) == 0; } - - static bool eval(mpq_srcptr q, mpq_srcptr r) - { return mpq_equal(q, r) != 0; } - - static bool eval(mpq_srcptr q, mpir_ui l) - { return mpq_cmp_ui(q, l, 1) == 0; } - static bool eval(mpir_ui l, mpq_srcptr q) - { return mpq_cmp_ui(q, l, 1) == 0; } - static bool eval(mpq_srcptr q, mpir_si l) - { return mpq_cmp_si(q, l, 1) == 0; } - static bool eval(mpir_si l, mpq_srcptr q) - { return mpq_cmp_si(q, l, 1) == 0; } - static bool eval(mpq_srcptr q, double d) - { - bool b; - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - b = (mpq_equal(q, temp) != 0); - mpq_clear(temp); - return b; - } - static bool eval(double d, mpq_srcptr q) - { - return eval(q, d); - } - - static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) == 0; } - - static bool eval(mpf_srcptr f, mpir_ui l) - { return mpf_cmp_ui(f, l) == 0; } - static bool eval(mpir_ui l, mpf_srcptr f) - { return mpf_cmp_ui(f, l) == 0; } - static bool eval(mpf_srcptr f, mpir_si l) - { return mpf_cmp_si(f, l) == 0; } - static bool eval(mpir_si l, mpf_srcptr f) - { return mpf_cmp_si(f, l) == 0; } - static bool eval(mpf_srcptr f, double d) - { return mpf_cmp_d(f, d) == 0; } - static bool eval(double d, mpf_srcptr f) - { return mpf_cmp_d(f, d) == 0; } -}; - -struct __gmp_binary_less -{ - static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) < 0; } - - static bool eval(mpz_srcptr z, mpir_ui l) - { return mpz_cmp_ui(z, l) < 0; } - static bool eval(mpir_ui l, mpz_srcptr z) - { return mpz_cmp_ui(z, l) > 0; } - static bool eval(mpz_srcptr z, mpir_si l) - { return mpz_cmp_si(z, l) < 0; } - static bool eval(mpir_si l, mpz_srcptr z) - { return mpz_cmp_si(z, l) > 0; } - static bool eval(mpz_srcptr z, double d) - { return mpz_cmp_d(z, d) < 0; } - static bool eval(double d, mpz_srcptr z) - { return mpz_cmp_d(z, d) > 0; } - - static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) < 0; } - - static bool eval(mpq_srcptr q, mpir_ui l) - { return mpq_cmp_ui(q, l, 1) < 0; } - static bool eval(mpir_ui l, mpq_srcptr q) - { return mpq_cmp_ui(q, l, 1) > 0; } - static bool eval(mpq_srcptr q, mpir_si l) - { return mpq_cmp_si(q, l, 1) < 0; } - static bool eval(mpir_si l, mpq_srcptr q) - { return mpq_cmp_si(q, l, 1) > 0; } - static bool eval(mpq_srcptr q, double d) - { - bool b; - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - b = (mpq_cmp(q, temp) < 0); - mpq_clear(temp); - return b; - } - static bool eval(double d, mpq_srcptr q) - { - bool b; - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - b = (mpq_cmp(temp, q) < 0); - mpq_clear(temp); - return b; - } - - static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) < 0; } - - static bool eval(mpf_srcptr f, mpir_ui l) - { return mpf_cmp_ui(f, l) < 0; } - static bool eval(mpir_ui l, mpf_srcptr f) - { return mpf_cmp_ui(f, l) > 0; } - static bool eval(mpf_srcptr f, mpir_si l) - { return mpf_cmp_si(f, l) < 0; } - static bool eval(mpir_si l, mpf_srcptr f) - { return mpf_cmp_si(f, l) > 0; } - static bool eval(mpf_srcptr f, double d) - { return mpf_cmp_d(f, d) < 0; } - static bool eval(double d, mpf_srcptr f) - { return mpf_cmp_d(f, d) > 0; } -}; - -struct __gmp_binary_greater -{ - static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) > 0; } - - static bool eval(mpz_srcptr z, mpir_ui l) - { return mpz_cmp_ui(z, l) > 0; } - static bool eval(mpir_ui l, mpz_srcptr z) - { return mpz_cmp_ui(z, l) < 0; } - static bool eval(mpz_srcptr z, mpir_si l) - { return mpz_cmp_si(z, l) > 0; } - static bool eval(mpir_si l, mpz_srcptr z) - { return mpz_cmp_si(z, l) < 0; } - static bool eval(mpz_srcptr z, double d) - { return mpz_cmp_d(z, d) > 0; } - static bool eval(double d, mpz_srcptr z) - { return mpz_cmp_d(z, d) < 0; } - - static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) > 0; } - - static bool eval(mpq_srcptr q, mpir_ui l) - { return mpq_cmp_ui(q, l, 1) > 0; } - static bool eval(mpir_ui l, mpq_srcptr q) - { return mpq_cmp_ui(q, l, 1) < 0; } - static bool eval(mpq_srcptr q, mpir_si l) - { return mpq_cmp_si(q, l, 1) > 0; } - static bool eval(mpir_si l, mpq_srcptr q) - { return mpq_cmp_si(q, l, 1) < 0; } - static bool eval(mpq_srcptr q, double d) - { - bool b; - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - b = (mpq_cmp(q, temp) > 0); - mpq_clear(temp); - return b; - } - static bool eval(double d, mpq_srcptr q) - { - bool b; - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - b = (mpq_cmp(temp, q) > 0); - mpq_clear(temp); - return b; - } - - static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) > 0; } - - static bool eval(mpf_srcptr f, mpir_ui l) - { return mpf_cmp_ui(f, l) > 0; } - static bool eval(mpir_ui l, mpf_srcptr f) - { return mpf_cmp_ui(f, l) < 0; } - static bool eval(mpf_srcptr f, mpir_si l) - { return mpf_cmp_si(f, l) > 0; } - static bool eval(mpir_si l, mpf_srcptr f) - { return mpf_cmp_si(f, l) < 0; } - static bool eval(mpf_srcptr f, double d) - { return mpf_cmp_d(f, d) > 0; } - static bool eval(double d, mpf_srcptr f) - { return mpf_cmp_d(f, d) < 0; } -}; - -struct __gmp_unary_increment -{ - static void eval(mpz_ptr z) { mpz_add_ui(z, z, 1); } - static void eval(mpq_ptr q) - { mpz_add(mpq_numref(q), mpq_numref(q), mpq_denref(q)); } - static void eval(mpf_ptr f) { mpf_add_ui(f, f, 1); } -}; - -struct __gmp_unary_decrement -{ - static void eval(mpz_ptr z) { mpz_sub_ui(z, z, 1); } - static void eval(mpq_ptr q) - { mpz_sub(mpq_numref(q), mpq_numref(q), mpq_denref(q)); } - static void eval(mpf_ptr f) { mpf_sub_ui(f, f, 1); } -}; - -struct __gmp_abs_function -{ - static void eval(mpz_ptr z, mpz_srcptr w) { mpz_abs(z, w); } - static void eval(mpq_ptr q, mpq_srcptr r) { mpq_abs(q, r); } - static void eval(mpf_ptr f, mpf_srcptr g) { mpf_abs(f, g); } -}; - -struct __gmp_trunc_function -{ - static void eval(mpf_ptr f, mpf_srcptr g) { mpf_trunc(f, g); } -}; - -struct __gmp_floor_function -{ - static void eval(mpf_ptr f, mpf_srcptr g) { mpf_floor(f, g); } -}; - -struct __gmp_ceil_function -{ - static void eval(mpf_ptr f, mpf_srcptr g) { mpf_ceil(f, g); } -}; - -struct __gmp_sqrt_function -{ - static void eval(mpz_ptr z, mpz_srcptr w) { mpz_sqrt(z, w); } - static void eval(mpf_ptr f, mpf_srcptr g) { mpf_sqrt(f, g); } -}; - -struct __gmp_hypot_function -{ - static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h) - { - mpf_t temp; - mpf_init2(temp, mpf_get_prec(f)); - mpf_mul(temp, g, g); - mpf_mul(f, h, h); - mpf_add(f, f, temp); - mpf_sqrt(f, f); - mpf_clear(temp); - } - - static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l) - { - mpf_t temp; - mpf_init2(temp, mpf_get_prec(f)); - mpf_mul(temp, g, g); - mpf_set_ui(f, l); - mpf_mul(f, f, f); - mpf_add(f, f, temp); - mpf_sqrt(f, f); - mpf_clear(temp); - } - static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g) - { eval(f, g, l); } - static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l) - { - mpf_t temp; - mpf_init2(temp, mpf_get_prec(f)); - mpf_mul(temp, g, g); - mpf_set_si(f, l); - mpf_mul(f, f, f); - mpf_add(f, f, temp); - mpf_sqrt(f, f); - mpf_clear(temp); - } - static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g) - { eval(f, g, l); } - static void eval(mpf_ptr f, mpf_srcptr g, double d) - { - mpf_t temp; - mpf_init2(temp, mpf_get_prec(f)); - mpf_mul(temp, g, g); - mpf_set_d(f, d); - mpf_mul(f, f, f); - mpf_add(f, f, temp); - mpf_sqrt(f, f); - mpf_clear(temp); - } - static void eval(mpf_ptr f, double d, mpf_srcptr g) - { eval(f, g, d); } -}; - -struct __gmp_sgn_function -{ - static int eval(mpz_srcptr z) { return mpz_sgn(z); } - static int eval(mpq_srcptr q) { return mpq_sgn(q); } - static int eval(mpf_srcptr f) { return mpf_sgn(f); } -}; - -struct __gmp_cmp_function -{ - static int eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w); } - - static int eval(mpz_srcptr z, mpir_ui l) - { return mpz_cmp_ui(z, l); } - static int eval(mpir_ui l, mpz_srcptr z) - { return -mpz_cmp_ui(z, l); } - static int eval(mpz_srcptr z, mpir_si l) - { return mpz_cmp_si(z, l); } - static int eval(mpir_si l, mpz_srcptr z) - { return -mpz_cmp_si(z, l); } - static int eval(mpz_srcptr z, double d) - { return mpz_cmp_d(z, d); } - static int eval(double d, mpz_srcptr z) - { return -mpz_cmp_d(z, d); } - - static int eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r); } - - static int eval(mpq_srcptr q, mpir_ui l) - { return mpq_cmp_ui(q, l, 1); } - static int eval(mpir_ui l, mpq_srcptr q) - { return -mpq_cmp_ui(q, l, 1); } - static int eval(mpq_srcptr q, mpir_si l) - { return mpq_cmp_si(q, l, 1); } - static int eval(mpir_si l, mpq_srcptr q) - { return -mpq_cmp_si(q, l, 1); } - static int eval(mpq_srcptr q, double d) - { - int i; - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - i = mpq_cmp(q, temp); - mpq_clear(temp); - return i; - } - static int eval(double d, mpq_srcptr q) - { - int i; - mpq_t temp; - mpq_init(temp); - mpq_set_d(temp, d); - i = mpq_cmp(temp, q); - mpq_clear(temp); - return i; - } - - static int eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g); } - - static int eval(mpf_srcptr f, mpir_ui l) - { return mpf_cmp_ui(f, l); } - static int eval(mpir_ui l, mpf_srcptr f) - { return -mpf_cmp_ui(f, l); } - static int eval(mpf_srcptr f, mpir_si l) - { return mpf_cmp_si(f, l); } - static int eval(mpir_si l, mpf_srcptr f) - { return -mpf_cmp_si(f, l); } - static int eval(mpf_srcptr f, double d) - { return mpf_cmp_d(f, d); } - static int eval(double d, mpf_srcptr f) - { return -mpf_cmp_d(f, d); } -}; - -struct __gmp_rand_function -{ - static void eval(mpz_ptr z, gmp_randstate_t s, mp_bitcnt_t l) - { mpz_urandomb(z, s, l); } - static void eval(mpz_ptr z, gmp_randstate_t s, mpz_srcptr w) - { mpz_urandomm(z, s, w); } - static void eval(mpf_ptr f, gmp_randstate_t s, mp_bitcnt_t prec) - { mpf_urandomb(f, s, prec); } -}; - - -/**************** Auxiliary classes ****************/ - -/* this is much the same as gmp_allocated_string in gmp-impl.h - since gmp-impl.h is not publicly available, I redefine it here - I use a different name to avoid possible clashes */ - -extern "C" { - typedef void (*__gmp_freefunc_t) (void *, size_t); -} -struct __gmp_alloc_cstring -{ - char *str; - __gmp_alloc_cstring(char *s) { str = s; } - ~__gmp_alloc_cstring() - { - __gmp_freefunc_t freefunc; - mp_get_memory_functions (NULL, NULL, &freefunc); - (*freefunc) (str, std::strlen(str)+1); - } -}; - - -// general expression template class -template -class __gmp_expr; - - -// templates for resolving expression types -template -struct __gmp_resolve_ref -{ - typedef T ref_type; -}; - -template -struct __gmp_resolve_ref<__gmp_expr > -{ - typedef const __gmp_expr & ref_type; -}; - - -template -struct __gmp_resolve_expr; - -template <> -struct __gmp_resolve_expr -{ - typedef mpz_t value_type; - typedef mpz_ptr ptr_type; - typedef mpz_srcptr srcptr_type; -}; - -template <> -struct __gmp_resolve_expr -{ - typedef mpq_t value_type; - typedef mpq_ptr ptr_type; - typedef mpq_srcptr srcptr_type; -}; - -template <> -struct __gmp_resolve_expr -{ - typedef mpf_t value_type; - typedef mpf_ptr ptr_type; - typedef mpf_srcptr srcptr_type; -}; - -template <> -struct __gmp_resolve_expr -{ - typedef mpq_t value_type; -}; - -template <> -struct __gmp_resolve_expr -{ - typedef mpq_t value_type; -}; - -template <> -struct __gmp_resolve_expr -{ - typedef mpf_t value_type; -}; - -template <> -struct __gmp_resolve_expr -{ - typedef mpf_t value_type; -}; - -template <> -struct __gmp_resolve_expr -{ - typedef mpf_t value_type; -}; - -template <> -struct __gmp_resolve_expr -{ - typedef mpf_t value_type; -}; - -#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 ) -namespace std { - template - struct common_type <__gmp_expr, __gmp_expr > - { - private: - typedef typename __gmp_resolve_expr::value_type X; - public: - typedef __gmp_expr type; - }; - - template - struct common_type <__gmp_expr, __gmp_expr > - { - typedef __gmp_expr type; - }; - -#define __GMPXX_DECLARE_COMMON_TYPE(typ) \ - template \ - struct common_type <__gmp_expr, typ > \ - { \ - typedef __gmp_expr type; \ - }; \ - \ - template \ - struct common_type > \ - { \ - typedef __gmp_expr type; \ - } - - __GMPXX_DECLARE_COMMON_TYPE(signed char); - __GMPXX_DECLARE_COMMON_TYPE(unsigned char); - __GMPXX_DECLARE_COMMON_TYPE(signed int); - __GMPXX_DECLARE_COMMON_TYPE(unsigned int); - __GMPXX_DECLARE_COMMON_TYPE(signed short int); - __GMPXX_DECLARE_COMMON_TYPE(unsigned short int); - __GMPXX_DECLARE_COMMON_TYPE(signed long int); - __GMPXX_DECLARE_COMMON_TYPE(unsigned long int); - __GMPXX_DECLARE_COMMON_TYPE(float); - __GMPXX_DECLARE_COMMON_TYPE(double); -#undef __GMPXX_DECLARE_COMMON_TYPE -} -#endif - -// classes for evaluating unary and binary expressions -template -struct __gmp_unary_expr -{ - const T &val; - - __gmp_unary_expr(const T &v) : val(v) { } -private: - __gmp_unary_expr(); -}; - -template -struct __gmp_binary_expr -{ - typename __gmp_resolve_ref::ref_type val1; - typename __gmp_resolve_ref::ref_type val2; - - __gmp_binary_expr(const T &v1, const U &v2) : val1(v1), val2(v2) { } -private: - __gmp_binary_expr(); -}; - - - -/**************** Macros for in-class declarations ****************/ -/* This is just repetitive code that is easier to maintain if it's written - only once */ - -#define __GMPP_DECLARE_COMPOUND_OPERATOR(fun) \ - template \ - __gmp_expr & fun(const __gmp_expr &); -#ifdef MPIRXX_HAVE_LLONG -#define __GMPN_DECLARE_COMPOUND_OPERATOR(fun) \ - __gmp_expr & fun(signed char); \ - __gmp_expr & fun(unsigned char); \ - __gmp_expr & fun(signed int); \ - __gmp_expr & fun(unsigned int); \ - __gmp_expr & fun(signed short int); \ - __gmp_expr & fun(unsigned short int); \ - __gmp_expr & fun(signed long int); \ - __gmp_expr & fun(unsigned long int); \ - __gmp_expr & fun(signed long long int); \ - __gmp_expr & fun(unsigned long long int); \ - __gmp_expr & fun(float); \ - __gmp_expr & fun(double); \ - __gmp_expr & fun(long double); -#else -#define __GMPN_DECLARE_COMPOUND_OPERATOR(fun) \ - __gmp_expr & fun(signed char); \ - __gmp_expr & fun(unsigned char); \ - __gmp_expr & fun(signed int); \ - __gmp_expr & fun(unsigned int); \ - __gmp_expr & fun(signed short int); \ - __gmp_expr & fun(unsigned short int); \ - __gmp_expr & fun(signed long int); \ - __gmp_expr & fun(unsigned long int); \ - __gmp_expr & fun(float); \ - __gmp_expr & fun(double); \ - __gmp_expr & fun(long double); -#endif - -#define __GMP_DECLARE_COMPOUND_OPERATOR(fun) \ -__GMPP_DECLARE_COMPOUND_OPERATOR(fun) \ -__GMPN_DECLARE_COMPOUND_OPERATOR(fun) - -#define __GMP_DECLARE_COMPOUND_OPERATOR_UI(fun) \ - __gmp_expr & fun(mp_bitcnt_t); - -#define __GMP_DECLARE_INCREMENT_OPERATOR(fun) \ - inline __gmp_expr & fun(); \ - inline __gmp_expr fun(int); - - -/**************** mpz_class -- wrapper for mpz_t ****************/ - -template <> -class __gmp_expr -{ -private: - typedef mpz_t value_type; - value_type mp; -public: - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } - - // constructors and destructor - __gmp_expr() { mpz_init(mp); } - - __gmp_expr(const __gmp_expr &z) { mpz_init_set(mp, z.mp); } -#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 ) - __gmp_expr(__gmp_expr &&z) - { *mp = *z.mp; mpz_init(z.mp); } -#endif - template - __gmp_expr(const __gmp_expr &expr) - { mpz_init(mp); __gmp_set_expr(mp, expr); } - template - explicit __gmp_expr(const __gmp_expr &expr) - { mpz_init(mp); __gmp_set_expr(mp, expr); } - - __gmp_expr(signed char c) { mpz_init_set_si(mp, c); } - __gmp_expr(unsigned char c) { mpz_init_set_ui(mp, c); } - - __gmp_expr(signed int i) { mpz_init_set_si(mp, i); } - __gmp_expr(unsigned int i) { mpz_init_set_ui(mp, i); } - - __gmp_expr(signed short int s) { mpz_init_set_si(mp, s); } - __gmp_expr(unsigned short int s) { mpz_init_set_ui(mp, s); } - - __gmp_expr(signed long int l) { mpz_init_set_si(mp, l); } - __gmp_expr(unsigned long int l) { mpz_init_set_ui(mp, l); } - -#ifdef MPIRXX_HAVE_LLONG - __gmp_expr(signed long long int l) { mpz_init_set_si(mp, l); } - __gmp_expr(unsigned long long int l) { mpz_init_set_ui(mp, l); } -#endif - -#ifdef MPIRXX_INTMAX_T - __gmp_expr(intmax_t l) { mpz_init_set_sx(mp, l); } -#endif - -#ifdef MPIRXX_UINTMAX_T - __gmp_expr(uintmax_t l) { mpz_init_set_ux(mp, l); } -#endif - - __gmp_expr(float f) { mpz_init_set_d(mp, f); } - __gmp_expr(double d) { mpz_init_set_d(mp, d); } - // __gmp_expr(long double ld) { mpz_init_set_d(mp, ld); } - - explicit __gmp_expr(const char *s, int base = 0) - { - if (mpz_init_set_str (mp, s, base) != 0) - { - mpz_clear (mp); - throw std::invalid_argument ("mpz_set_str"); - } - } - explicit __gmp_expr(const std::string &s, int base = 0) - { - if (mpz_init_set_str(mp, s.c_str(), base) != 0) - { - mpz_clear (mp); - throw std::invalid_argument ("mpz_set_str"); - } - } - - explicit __gmp_expr(mpz_srcptr z) { mpz_init_set(mp, z); } - - ~__gmp_expr() { mpz_clear(mp); } - - void swap(__gmp_expr& z) __GMPXX_NOEXCEPT { std::swap(*mp, *z.mp); } - - // assignment operators - __gmp_expr & operator=(const __gmp_expr &z) - { mpz_set(mp, z.mp); return *this; } -#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 ) - __gmp_expr & operator=(__gmp_expr &&z) __GMPXX_NOEXCEPT - { swap(z); return *this; } -#endif - template - __gmp_expr & operator=(const __gmp_expr &expr) - { __gmp_set_expr(mp, expr); return *this; } - -__gmp_expr & operator=(signed char c) { mpz_set_si(mp, c); return *this; } -__gmp_expr & operator=(unsigned char c) { mpz_set_ui(mp, c); return *this; } - -__gmp_expr & operator=(signed int i) { mpz_set_si(mp, i); return *this; } -__gmp_expr & operator=(unsigned int i) { mpz_set_ui(mp, i); return *this; } - - __gmp_expr & operator=(signed short int s) - { mpz_set_si(mp, s); return *this; } - __gmp_expr & operator=(unsigned short int s) - { mpz_set_ui(mp, s); return *this; } - - __gmp_expr & operator=(signed long int l) - { mpz_set_si(mp, l); return *this; } - __gmp_expr & operator=(unsigned long int l) - { mpz_set_ui(mp, l); return *this; } - -#ifdef MPIRXX_HAVE_LLONG - __gmp_expr & operator=(signed long long int i) { mpz_set_si(mp, i); return *this; } - __gmp_expr & operator=(unsigned long long int i) { mpz_set_ui(mp, i); return *this; } -#endif - -#ifdef MPIRXX_INTMAX_T - __gmp_expr & operator=(intmax_t i) { mpz_set_sx(mp, i); return *this; } -#endif - -#ifdef MPIRXX_UINTMAX_T - __gmp_expr & operator=(uintmax_t i) { mpz_set_ux(mp, i); return *this; } -#endif - - __gmp_expr & operator=(float f) { mpz_set_d(mp, f); return *this; } - __gmp_expr & operator=(double d) { mpz_set_d(mp, d); return *this; } - // __gmp_expr & operator=(long double ld) - // { mpz_set_ld(mp, ld); return *this; } - - __gmp_expr & operator=(const char *s) - { - if (mpz_set_str (mp, s, 0) != 0) - throw std::invalid_argument ("mpz_set_str"); - return *this; - } - __gmp_expr & operator=(const std::string &s) - { - if (mpz_set_str(mp, s.c_str(), 0) != 0) - throw std::invalid_argument ("mpz_set_str"); - return *this; - } - - // string input/output functions - int set_str(const char *s, int base) - { return mpz_set_str(mp, s, base); } - int set_str(const std::string &s, int base) - { return mpz_set_str(mp, s.c_str(), base); } - std::string get_str(int base = 10) const - { - __gmp_alloc_cstring temp(mpz_get_str(0, base, mp)); - return std::string(temp.str); - } - - // conversion functions - mpz_srcptr __get_mp() const { return mp; } - mpz_ptr __get_mp() { return mp; } - mpz_srcptr get_mpz_t() const { return mp; } - mpz_ptr get_mpz_t() { return mp; } - - mpir_si get_si() const { return mpz_get_si(mp); } - mpir_ui get_ui() const { return mpz_get_ui(mp); } - -#ifdef MPIRXX_INTMAX_T - intmax_t get_sx() const { return mpz_get_sx(mp); } -#endif -#ifdef MPIRXX_UINTMAX_T - uintmax_t get_ux() const { return mpz_get_ux(mp); } -#endif - - double get_d() const { return mpz_get_d(mp); } - - // bool fits_schar_p() const { return mpz_fits_schar_p(mp); } - // bool fits_uchar_p() const { return mpz_fits_uchar_p(mp); } - bool fits_sint_p() const { return mpz_fits_sint_p(mp); } - bool fits_uint_p() const { return mpz_fits_uint_p(mp); } - bool fits_si_p() const { return mpz_fits_si_p(mp); } - bool fits_ui_p() const { return mpz_fits_ui_p(mp); } - bool fits_sshort_p() const { return mpz_fits_sshort_p(mp); } - bool fits_ushort_p() const { return mpz_fits_ushort_p(mp); } - bool fits_slong_p() const { return mpz_fits_slong_p(mp); } - bool fits_ulong_p() const { return mpz_fits_ulong_p(mp); } - // bool fits_float_p() const { return mpz_fits_float_p(mp); } - // bool fits_double_p() const { return mpz_fits_double_p(mp); } - // bool fits_ldouble_p() const { return mpz_fits_ldouble_p(mp); } - -#if __GMPXX_USE_CXX11 - explicit operator bool() const { return mp->_mp_size != 0; } -#endif - - // member operators - __GMP_DECLARE_COMPOUND_OPERATOR(operator+=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator-=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator*=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator/=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator%=) - - __GMP_DECLARE_COMPOUND_OPERATOR(operator&=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator|=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator^=) - - __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=) - __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=) - - __GMP_DECLARE_INCREMENT_OPERATOR(operator++) - __GMP_DECLARE_INCREMENT_OPERATOR(operator--) -}; - -typedef __gmp_expr mpz_class; - - -/**************** mpq_class -- wrapper for mpq_t ****************/ - -template <> -class __gmp_expr -{ -private: - typedef mpq_t value_type; - value_type mp; -public: - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } - void canonicalize() { mpq_canonicalize(mp); } - - // constructors and destructor - __gmp_expr() { mpq_init(mp); } - - __gmp_expr(const __gmp_expr &q) - { - mpz_init_set(mpq_numref(mp), mpq_numref(q.mp)); - mpz_init_set(mpq_denref(mp), mpq_denref(q.mp)); - } -#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 ) - __gmp_expr(__gmp_expr &&q) - { *mp = *q.mp; mpq_init(q.mp); } -#endif - template - __gmp_expr(const __gmp_expr &expr) - { mpq_init(mp); __gmp_set_expr(mp, expr); } - template - __gmp_expr(const __gmp_expr &expr) - { mpq_init(mp); __gmp_set_expr(mp, expr); } - template - explicit __gmp_expr(const __gmp_expr &expr) - { mpq_init(mp); __gmp_set_expr(mp, expr); } - - __gmp_expr(signed char c) { mpq_init(mp); mpq_set_si(mp, c, 1); } - __gmp_expr(unsigned char c) { mpq_init(mp); mpq_set_ui(mp, c, 1); } - - __gmp_expr(signed int i) { mpq_init(mp); mpq_set_si(mp, i, 1); } - __gmp_expr(unsigned int i) { mpq_init(mp); mpq_set_ui(mp, i, 1); } - - __gmp_expr(signed short int s) { mpq_init(mp); mpq_set_si(mp, s, 1); } - __gmp_expr(unsigned short int s) { mpq_init(mp); mpq_set_ui(mp, s, 1); } - - __gmp_expr(signed long int l) { mpq_init(mp); mpq_set_si(mp, l, 1); } - __gmp_expr(unsigned long int l) { mpq_init(mp); mpq_set_ui(mp, l, 1); } - -#ifdef MPIRXX_HAVE_LLONG - __gmp_expr(signed long long int l) { mpq_init(mp); mpq_set_si(mp, l, 1); } - __gmp_expr(unsigned long long int l) { mpq_init(mp); mpq_set_ui(mp, l, 1); } -#endif - - __gmp_expr(float f) { mpq_init(mp); mpq_set_d(mp, f); } - __gmp_expr(double d) { mpq_init(mp); mpq_set_d(mp, d); } - // __gmp_expr(long double ld) { mpq_init(mp); mpq_set_ld(mp, ld); } - - explicit __gmp_expr(const char *s, int base = 0) - { - mpq_init (mp); - // If s is the literal 0, we meant to call another constructor. - // If s just happens to evaluate to 0, we would crash, so whatever. - if (s == 0) - { - // Don't turn mpq_class(0,0) into 0 - mpz_set_si(mpq_denref(mp), base); - } - else if (mpq_set_str(mp, s, base) != 0) - { - mpq_clear (mp); - throw std::invalid_argument ("mpq_set_str"); - } - } - explicit __gmp_expr(const std::string &s, int base = 0) - { - mpq_init(mp); - if (mpq_set_str (mp, s.c_str(), base) != 0) - { - mpq_clear (mp); - throw std::invalid_argument ("mpq_set_str"); - } - } - explicit __gmp_expr(mpq_srcptr q) - { - mpz_init_set(mpq_numref(mp), mpq_numref(q)); - mpz_init_set(mpq_denref(mp), mpq_denref(q)); - } - - __gmp_expr(const mpz_class &num, const mpz_class &den) - { - mpz_init_set(mpq_numref(mp), num.get_mpz_t()); - mpz_init_set(mpq_denref(mp), den.get_mpz_t()); - } - - ~__gmp_expr() { mpq_clear(mp); } - - void swap(__gmp_expr& q) __GMPXX_NOEXCEPT { std::swap(*mp, *q.mp); } - - // assignment operators - __gmp_expr & operator=(const __gmp_expr &q) - { mpq_set(mp, q.mp); return *this; } -#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 ) - __gmp_expr & operator=(__gmp_expr &&q) __GMPXX_NOEXCEPT - { swap(q); return *this; } - __gmp_expr & operator=(mpz_class &&z)__GMPXX_NOEXCEPT - { get_num() = std::move(z); get_den() = 1u; return *this; } -#endif - template - __gmp_expr & operator=(const __gmp_expr &expr) - { __gmp_set_expr(mp, expr); return *this; } - - __gmp_expr & operator=(signed char c) - { mpq_set_si(mp, c, 1); return *this; } - __gmp_expr & operator=(unsigned char c) - { mpq_set_ui(mp, c, 1); return *this; } - - __gmp_expr & operator=(signed int i) { mpq_set_si(mp, i, 1); return *this; } - __gmp_expr & operator=(unsigned int i) - { mpq_set_ui(mp, i, 1); return *this; } - - __gmp_expr & operator=(signed short int s) - { mpq_set_si(mp, s, 1); return *this; } - __gmp_expr & operator=(unsigned short int s) - { mpq_set_ui(mp, s, 1); return *this; } - - __gmp_expr & operator=(signed long int l) - { mpq_set_si(mp, l, 1); return *this; } - __gmp_expr & operator=(unsigned long int l) - { mpq_set_ui(mp, l, 1); return *this; } - -#ifdef MPIRXX_HAVE_LLONG - __gmp_expr & operator=(signed long long int l) - { mpq_set_si(mp, l, 1); return *this; } - __gmp_expr & operator=(unsigned long long int l) - { mpq_set_ui(mp, l, 1); return *this; } -#endif - - __gmp_expr & operator=(float f) { mpq_set_d(mp, f); return *this; } - __gmp_expr & operator=(double d) { mpq_set_d(mp, d); return *this; } - // __gmp_expr & operator=(long double ld) - // { mpq_set_ld(mp, ld); return *this; } - - __gmp_expr & operator=(const char *s) - { - if (mpq_set_str (mp, s, 0) != 0) - throw std::invalid_argument ("mpq_set_str"); - return *this; - } - __gmp_expr & operator=(const std::string &s) - { - if (mpq_set_str(mp, s.c_str(), 0) != 0) - throw std::invalid_argument ("mpq_set_str"); - return *this; - } - - // string input/output functions - int set_str(const char *s, int base) - { return mpq_set_str(mp, s, base); } - int set_str(const std::string &s, int base) - { return mpq_set_str(mp, s.c_str(), base); } - std::string get_str(int base = 10) const - { - __gmp_alloc_cstring temp(mpq_get_str(0, base, mp)); - return std::string(temp.str); - } - - // conversion functions - - // casting a reference to an mpz_t to mpz_class & is a dirty hack, - // but works because the internal representation of mpz_class is - // exactly an mpz_t - const mpz_class & get_num() const - { return reinterpret_cast(*mpq_numref(mp)); } - mpz_class & get_num() - { return reinterpret_cast(*mpq_numref(mp)); } - const mpz_class & get_den() const - { return reinterpret_cast(*mpq_denref(mp)); } - mpz_class & get_den() - { return reinterpret_cast(*mpq_denref(mp)); } - - mpq_srcptr __get_mp() const { return mp; } - mpq_ptr __get_mp() { return mp; } - mpq_srcptr get_mpq_t() const { return mp; } - mpq_ptr get_mpq_t() { return mp; } - - mpz_srcptr get_num_mpz_t() const { return mpq_numref(mp); } - mpz_ptr get_num_mpz_t() { return mpq_numref(mp); } - mpz_srcptr get_den_mpz_t() const { return mpq_denref(mp); } - mpz_ptr get_den_mpz_t() { return mpq_denref(mp); } - - double get_d() const { return mpq_get_d(mp); } - -#if __GMPXX_USE_CXX11 - explicit operator bool() const { return mpq_numref(mp)->_mp_size != 0; } -#endif - - // compound assignments - __GMP_DECLARE_COMPOUND_OPERATOR(operator+=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator-=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator*=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator/=) - - __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=) - __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=) - - __GMP_DECLARE_INCREMENT_OPERATOR(operator++) - __GMP_DECLARE_INCREMENT_OPERATOR(operator--) -}; - -typedef __gmp_expr mpq_class; - - -/**************** mpf_class -- wrapper for mpf_t ****************/ - -template <> -class __gmp_expr -{ -private: - typedef mpf_t value_type; - value_type mp; -public: - mp_bitcnt_t get_prec() const { return mpf_get_prec(mp); } - - void set_prec(mp_bitcnt_t prec) { mpf_set_prec(mp, prec); } - void set_prec_raw(mp_bitcnt_t prec) { mpf_set_prec_raw(mp, prec); } - - // constructors and destructor - __gmp_expr() { mpf_init(mp); } - - __gmp_expr(const __gmp_expr &f) - { mpf_init2(mp, f.get_prec()); mpf_set(mp, f.mp); } -#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 ) - __gmp_expr(__gmp_expr &&f) - { *mp = *f.mp; mpf_init2(f.mp, get_prec()); } -#endif - __gmp_expr(const __gmp_expr &f, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set(mp, f.mp); } - template - __gmp_expr(const __gmp_expr &expr) - { mpf_init2(mp, expr.get_prec()); __gmp_set_expr(mp, expr); } - template - __gmp_expr(const __gmp_expr &expr, mp_bitcnt_t prec) - { mpf_init2(mp, prec); __gmp_set_expr(mp, expr); } - - __gmp_expr(signed char c) { mpf_init_set_si(mp, c); } - __gmp_expr(signed char c, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_si(mp, c); } - __gmp_expr(unsigned char c) { mpf_init_set_ui(mp, c); } - __gmp_expr(unsigned char c, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_ui(mp, c); } - - __gmp_expr(signed int i) { mpf_init_set_si(mp, i); } - __gmp_expr(signed int i, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_si(mp, i); } - __gmp_expr(unsigned int i) { mpf_init_set_ui(mp, i); } - __gmp_expr(unsigned int i, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_ui(mp, i); } - - __gmp_expr(signed short int s) { mpf_init_set_si(mp, s); } - __gmp_expr(signed short int s, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_si(mp, s); } - __gmp_expr(unsigned short int s) { mpf_init_set_ui(mp, s); } - __gmp_expr(unsigned short int s, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_ui(mp, s); } - - __gmp_expr(signed long int l) { mpf_init_set_si(mp, l); } - __gmp_expr(signed long int l, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_si(mp, l); } - __gmp_expr(unsigned long int l) { mpf_init_set_ui(mp, l); } - __gmp_expr(unsigned long int l, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_ui(mp, l); } -#ifdef MPIRXX_HAVE_LLONG - __gmp_expr(signed long long int s) { mpf_init_set_si(mp, s); } - __gmp_expr(signed long long int s, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_si(mp, s); } - __gmp_expr(unsigned long long int s) { mpf_init_set_ui(mp, s); } - __gmp_expr(unsigned long long int s, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_ui(mp, s); } -#endif - - __gmp_expr(float f) { mpf_init_set_d(mp, f); } - __gmp_expr(float f, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_d(mp, f); } - __gmp_expr(double d) { mpf_init_set_d(mp, d); } - __gmp_expr(double d, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set_d(mp, d); } - // __gmp_expr(long double ld) { mpf_init_set_d(mp, ld); } - // __gmp_expr(long double ld, mp_bitcnt_t prec) - // { mpf_init2(mp, prec); mpf_set_d(mp, ld); } - - explicit __gmp_expr(const char *s) - { - if (mpf_init_set_str (mp, s, 0) != 0) - { - mpf_clear (mp); - throw std::invalid_argument ("mpf_set_str"); - } - } - __gmp_expr(const char *s, mp_bitcnt_t prec, int base = 0) - { - mpf_init2(mp, prec); - if (mpf_set_str(mp, s, base) != 0) - { - mpf_clear (mp); - throw std::invalid_argument ("mpf_set_str"); - } - } - explicit __gmp_expr(const std::string &s) - { - if (mpf_init_set_str(mp, s.c_str(), 0) != 0) - { - mpf_clear (mp); - throw std::invalid_argument ("mpf_set_str"); - } - } - __gmp_expr(const std::string &s, mp_bitcnt_t prec, int base = 0) - { - mpf_init2(mp, prec); - if (mpf_set_str(mp, s.c_str(), base) != 0) - { - mpf_clear (mp); - throw std::invalid_argument ("mpf_set_str"); - } - } - - explicit __gmp_expr(mpf_srcptr f) - { mpf_init2(mp, mpf_get_prec(f)); mpf_set(mp, f); } - __gmp_expr(mpf_srcptr f, mp_bitcnt_t prec) - { mpf_init2(mp, prec); mpf_set(mp, f); } - - ~__gmp_expr() { mpf_clear(mp); } - - void swap(__gmp_expr& f) __GMPXX_NOEXCEPT { std::swap(*mp, *f.mp); } - - // assignment operators - __gmp_expr & operator=(const __gmp_expr &f) - { mpf_set(mp, f.mp); return *this; } -#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 ) - __gmp_expr & operator=(__gmp_expr &&f) __GMPXX_NOEXCEPT - { swap(f); return *this; } -#endif - template - __gmp_expr & operator=(const __gmp_expr &expr) - { __gmp_set_expr(mp, expr); return *this; } - - __gmp_expr & operator=(signed char c) { mpf_set_si(mp, c); return *this; } - __gmp_expr & operator=(unsigned char c) { mpf_set_ui(mp, c); return *this; } - - __gmp_expr & operator=(signed int i) { mpf_set_si(mp, i); return *this; } - __gmp_expr & operator=(unsigned int i) { mpf_set_ui(mp, i); return *this; } - - __gmp_expr & operator=(signed short int s) - { mpf_set_si(mp, s); return *this; } - __gmp_expr & operator=(unsigned short int s) - { mpf_set_ui(mp, s); return *this; } - - __gmp_expr & operator=(signed long int l) - { mpf_set_si(mp, l); return *this; } - __gmp_expr & operator=(unsigned long int l) - { mpf_set_ui(mp, l); return *this; } - -#ifdef MPIRXX_HAVE_LLONG - __gmp_expr & operator=(signed long long int l) - { mpf_set_si(mp, l); return *this; } - __gmp_expr & operator=(unsigned long long int l) - { mpf_set_ui(mp, l); return *this; } -#endif - - __gmp_expr & operator=(float f) { mpf_set_d(mp, f); return *this; } - __gmp_expr & operator=(double d) { mpf_set_d(mp, d); return *this; } - // __gmp_expr & operator=(long double ld) - // { mpf_set_ld(mp, ld); return *this; } - - __gmp_expr & operator=(const char *s) - { - if (mpf_set_str (mp, s, 0) != 0) - throw std::invalid_argument ("mpf_set_str"); - return *this; - } - __gmp_expr & operator=(const std::string &s) - { - if (mpf_set_str(mp, s.c_str(), 0) != 0) - throw std::invalid_argument ("mpf_set_str"); - return *this; - } - - // string input/output functions - int set_str(const char *s, int base) - { return mpf_set_str(mp, s, base); } - int set_str(const std::string &s, int base) - { return mpf_set_str(mp, s.c_str(), base); } - std::string get_str(mp_exp_t &expo, int base = 10, size_t size = 0) const - { - __gmp_alloc_cstring temp(mpf_get_str(0, &expo, base, size, mp)); - return std::string(temp.str); - } - - // conversion functions - mpf_srcptr __get_mp() const { return mp; } - mpf_ptr __get_mp() { return mp; } - mpf_srcptr get_mpf_t() const { return mp; } - mpf_ptr get_mpf_t() { return mp; } - - mpir_si get_si() const { return mpf_get_si(mp); } - mpir_ui get_ui() const { return mpf_get_ui(mp); } - double get_d() const { return mpf_get_d(mp); } - - // bool fits_schar_p() const { return mpf_fits_schar_p(mp); } - // bool fits_uchar_p() const { return mpf_fits_uchar_p(mp); } - bool fits_sint_p() const { return mpf_fits_sint_p(mp); } - bool fits_uint_p() const { return mpf_fits_uint_p(mp); } - bool fits_si_p() const { return mpf_fits_si_p(mp); } - bool fits_ui_p() const { return mpf_fits_ui_p(mp); } - bool fits_sshort_p() const { return mpf_fits_sshort_p(mp); } - bool fits_ushort_p() const { return mpf_fits_ushort_p(mp); } - bool fits_slong_p() const { return mpf_fits_slong_p(mp); } - bool fits_ulong_p() const { return mpf_fits_ulong_p(mp); } - // bool fits_float_p() const { return mpf_fits_float_p(mp); } - // bool fits_double_p() const { return mpf_fits_double_p(mp); } - // bool fits_ldouble_p() const { return mpf_fits_ldouble_p(mp); } - -#if __GMPXX_USE_CXX11 - explicit operator bool() const { return mp->_mp_size != 0; } -#endif - - // compound assignments - __GMP_DECLARE_COMPOUND_OPERATOR(operator+=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator-=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator*=) - __GMP_DECLARE_COMPOUND_OPERATOR(operator/=) - - __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=) - __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=) - - __GMP_DECLARE_INCREMENT_OPERATOR(operator++) - __GMP_DECLARE_INCREMENT_OPERATOR(operator--) -}; - -typedef __gmp_expr mpf_class; - - - -/**************** User-defined literals ****************/ - -#if __GMPXX_USE_CXX11 -inline mpz_class operator"" _mpz(const char* s) -{ - return mpz_class(s); -} - -inline mpq_class operator"" _mpq(const char* s) -{ - mpq_class q; - q.get_num() = s; - return q; -} - -inline mpf_class operator"" _mpf(const char* s) -{ - return mpf_class(s); -} -#endif - -/**************** I/O operators ****************/ - -// these should (and will) be provided separately - -template -inline std::ostream & operator<< -(std::ostream &o, const __gmp_expr &expr) -{ - __gmp_expr const& temp(expr); - return o << temp.__get_mp(); -} - -template -inline std::istream & operator>>(std::istream &i, __gmp_expr &expr) -{ - return i >> expr.__get_mp(); -} - -/* -// you might want to uncomment this -inline std::istream & operator>>(std::istream &i, mpq_class &q) -{ - i >> q.get_mpq_t(); - q.canonicalize(); - return i; -} -*/ - - -/**************** Functions for type conversion ****************/ - -inline void __gmp_set_expr(mpz_ptr z, const mpz_class &w) -{ - mpz_set(z, w.get_mpz_t()); -} - -template -inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr &expr) -{ - expr.eval(z); -} - -template -inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr &expr) -{ - mpq_class const& temp(expr); - mpz_set_q(z, temp.get_mpq_t()); -} - -template -inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr &expr) -{ - mpf_class const& temp(expr); - mpz_set_f(z, temp.get_mpf_t()); -} - -inline void __gmp_set_expr(mpq_ptr q, const mpz_class &z) -{ - mpq_set_z(q, z.get_mpz_t()); -} - -template -inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr &expr) -{ - __gmp_set_expr(mpq_numref(q), expr); - mpz_set_ui(mpq_denref(q), 1); -} - -inline void __gmp_set_expr(mpq_ptr q, const mpq_class &r) -{ - mpq_set(q, r.get_mpq_t()); -} - -template -inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr &expr) -{ - expr.eval(q); -} - -template -inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr &expr) -{ - mpf_class const& temp(expr); - mpq_set_f(q, temp.get_mpf_t()); -} - -template -inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr &expr) -{ - mpz_class const& temp(expr); - mpf_set_z(f, temp.get_mpz_t()); -} - -template -inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr &expr) -{ - mpq_class const& temp(expr); - mpf_set_q(f, temp.get_mpq_t()); -} - -inline void __gmp_set_expr(mpf_ptr f, const mpf_class &g) -{ - mpf_set(f, g.get_mpf_t()); -} - -template -inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr &expr) -{ - expr.eval(f); -} - - -/* Temporary objects */ - -template -class __gmp_temp -{ - __gmp_expr val; - public: - template - __gmp_temp(U const& u, V) : val (u) {} - typename __gmp_resolve_expr::srcptr_type - __get_mp() const { return val.__get_mp(); } -}; - -template <> -class __gmp_temp -{ - mpf_class val; - public: - template - __gmp_temp(U const& u, mpf_ptr res) : val (u, mpf_get_prec(res)) {} - mpf_srcptr __get_mp() const { return val.__get_mp(); } -}; - -/**************** Specializations of __gmp_expr ****************/ -/* The eval() method of __gmp_expr evaluates the corresponding - expression and assigns the result to its argument, which is either an - mpz_t, mpq_t, or mpf_t as specified by the T argument. - Compound expressions are evaluated recursively (temporaries are created - to hold intermediate values), while for simple expressions the eval() - method of the appropriate function object (available as the Op argument - of either __gmp_unary_expr or __gmp_binary_expr) is - called. */ - - -/**************** Unary expressions ****************/ -/* cases: - - simple: argument is mp*_class, that is, __gmp_expr - - compound: argument is __gmp_expr (with U not equal to T) */ - - -// simple expressions - -template -class __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val_type; - - __gmp_unary_expr expr; -public: - explicit __gmp_expr(const val_type &val) : expr(val) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { Op::eval(p, expr.val.__get_mp()); } - const val_type & get_val() const { return expr.val; } - mp_bitcnt_t get_prec() const { return expr.val.get_prec(); } -}; - - -// compound expressions - -template -class __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val_type; - - __gmp_unary_expr expr; -public: - explicit __gmp_expr(const val_type &val) : expr(val) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { expr.val.eval(p); Op::eval(p, p); } - const val_type & get_val() const { return expr.val; } - mp_bitcnt_t get_prec() const { return expr.val.get_prec(); } -}; - - -/**************** Binary expressions ****************/ -/* simple: - - arguments are both mp*_class - - one argument is mp*_class, one is a built-in type - compound: - - one is mp*_class, one is __gmp_expr - - one is __gmp_expr, one is built-in - - both arguments are __gmp_expr<...> */ - - -// simple expressions - -template -class __gmp_expr -, __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { Op::eval(p, expr.val1.__get_mp(), expr.val2.__get_mp()); } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const - { - mp_bitcnt_t prec1 = expr.val1.get_prec(), - prec2 = expr.val2.get_prec(); - return (prec1 > prec2) ? prec1 : prec2; - } -}; - - -// simple expressions, T is a built-in numerical type - -template -class __gmp_expr, U, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef U val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { Op::eval(p, expr.val1.__get_mp(), expr.val2); } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); } -}; - -template -class __gmp_expr, Op> > -{ -private: - typedef U val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { Op::eval(p, expr.val1, expr.val2.__get_mp()); } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); } -}; - - -// compound expressions, one argument is a subexpression - -template -class __gmp_expr -, __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - if(p != expr.val1.__get_mp()) - { - __gmp_set_expr(p, expr.val2); - Op::eval(p, expr.val1.__get_mp(), p); - } - else - { - __gmp_temp temp(expr.val2, p); - Op::eval(p, expr.val1.__get_mp(), temp.__get_mp()); - } - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const - { - mp_bitcnt_t prec1 = expr.val1.get_prec(), - prec2 = expr.val2.get_prec(); - return (prec1 > prec2) ? prec1 : prec2; - } -}; - -template -class __gmp_expr -, __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - if(p != expr.val2.__get_mp()) - { - __gmp_set_expr(p, expr.val1); - Op::eval(p, p, expr.val2.__get_mp()); - } - else - { - __gmp_temp temp(expr.val1, p); - Op::eval(p, temp.__get_mp(), expr.val2.__get_mp()); - } - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const - { - mp_bitcnt_t prec1 = expr.val1.get_prec(), - prec2 = expr.val2.get_prec(); - return (prec1 > prec2) ? prec1 : prec2; - } -}; - -template -class __gmp_expr -, __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - if(p != expr.val1.__get_mp()) - { - __gmp_set_expr(p, expr.val2); - Op::eval(p, expr.val1.__get_mp(), p); - } - else - { - __gmp_temp temp(expr.val2, p); - Op::eval(p, expr.val1.__get_mp(), temp.__get_mp()); - } - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const - { - mp_bitcnt_t prec1 = expr.val1.get_prec(), - prec2 = expr.val2.get_prec(); - return (prec1 > prec2) ? prec1 : prec2; - } -}; - -template -class __gmp_expr -, __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - if(p != expr.val2.__get_mp()) - { - __gmp_set_expr(p, expr.val1); - Op::eval(p, p, expr.val2.__get_mp()); - } - else - { - __gmp_temp temp(expr.val1, p); - Op::eval(p, temp.__get_mp(), expr.val2.__get_mp()); - } - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const - { - mp_bitcnt_t prec1 = expr.val1.get_prec(), - prec2 = expr.val2.get_prec(); - return (prec1 > prec2) ? prec1 : prec2; - } -}; - - -// one argument is a subexpression, one is a built-in - -template -class __gmp_expr, V, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef V val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - expr.val1.eval(p); - Op::eval(p, p, expr.val2); - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); } -}; - -template -class __gmp_expr, Op> > -{ -private: - typedef U val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - expr.val2.eval(p); - Op::eval(p, expr.val1, p); - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); } -}; - - -// both arguments are subexpressions - -template -class __gmp_expr -, __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - __gmp_temp temp2(expr.val2, p); - expr.val1.eval(p); - Op::eval(p, p, temp2.__get_mp()); - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const - { - mp_bitcnt_t prec1 = expr.val1.get_prec(), - prec2 = expr.val2.get_prec(); - return (prec1 > prec2) ? prec1 : prec2; - } -}; - -template -class __gmp_expr -, __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - __gmp_temp temp1(expr.val1, p); - expr.val2.eval(p); - Op::eval(p, temp1.__get_mp(), p); - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const - { - mp_bitcnt_t prec1 = expr.val1.get_prec(), - prec2 = expr.val2.get_prec(); - return (prec1 > prec2) ? prec1 : prec2; - } -}; - -template -class __gmp_expr -, __gmp_expr, Op> > -{ -private: - typedef __gmp_expr val1_type; - typedef __gmp_expr val2_type; - - __gmp_binary_expr expr; -public: - __gmp_expr(const val1_type &val1, const val2_type &val2) - : expr(val1, val2) { } - void eval(typename __gmp_resolve_expr::ptr_type p) const - { - __gmp_temp temp2(expr.val2, p); - expr.val1.eval(p); - Op::eval(p, p, temp2.__get_mp()); - } - const val1_type & get_val1() const { return expr.val1; } - const val2_type & get_val2() const { return expr.val2; } - mp_bitcnt_t get_prec() const - { - mp_bitcnt_t prec1 = expr.val1.get_prec(), - prec2 = expr.val2.get_prec(); - return (prec1 > prec2) ? prec1 : prec2; - } -}; - - -/**************** Special cases ****************/ - -/* Some operations (i.e., add and subtract) with mixed mpz/mpq arguments - can be done directly without first converting the mpz to mpq. - Appropriate specializations of __gmp_expr are required. */ - - -#define __GMPZQ_DEFINE_EXPR(eval_fun) \ - \ -template <> \ -class __gmp_expr > \ -{ \ -private: \ - typedef mpz_class val1_type; \ - typedef mpq_class val2_type; \ - \ - __gmp_binary_expr expr; \ -public: \ - __gmp_expr(const val1_type &val1, const val2_type &val2) \ - : expr(val1, val2) { } \ - void eval(mpq_ptr q) const \ - { eval_fun::eval(q, expr.val1.get_mpz_t(), expr.val2.get_mpq_t()); } \ - const val1_type & get_val1() const { return expr.val1; } \ - const val2_type & get_val2() const { return expr.val2; } \ - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \ -}; \ - \ -template <> \ -class __gmp_expr > \ -{ \ -private: \ - typedef mpq_class val1_type; \ - typedef mpz_class val2_type; \ - \ - __gmp_binary_expr expr; \ -public: \ - __gmp_expr(const val1_type &val1, const val2_type &val2) \ - : expr(val1, val2) { } \ - void eval(mpq_ptr q) const \ - { eval_fun::eval(q, expr.val1.get_mpq_t(), expr.val2.get_mpz_t()); } \ - const val1_type & get_val1() const { return expr.val1; } \ - const val2_type & get_val2() const { return expr.val2; } \ - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \ -}; \ - \ -template \ -class __gmp_expr \ -, eval_fun> > \ -{ \ -private: \ - typedef mpz_class val1_type; \ - typedef __gmp_expr val2_type; \ - \ - __gmp_binary_expr expr; \ -public: \ - __gmp_expr(const val1_type &val1, const val2_type &val2) \ - : expr(val1, val2) { } \ - void eval(mpq_ptr q) const \ - { \ - mpq_class temp(expr.val2); \ - eval_fun::eval(q, expr.val1.get_mpz_t(), temp.get_mpq_t()); \ - } \ - const val1_type & get_val1() const { return expr.val1; } \ - const val2_type & get_val2() const { return expr.val2; } \ - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \ -}; \ - \ -template \ -class __gmp_expr \ -, eval_fun> > \ -{ \ -private: \ - typedef mpq_class val1_type; \ - typedef __gmp_expr val2_type; \ - \ - __gmp_binary_expr expr; \ -public: \ - __gmp_expr(const val1_type &val1, const val2_type &val2) \ - : expr(val1, val2) { } \ - void eval(mpq_ptr q) const \ - { \ - mpz_class temp(expr.val2); \ - eval_fun::eval(q, expr.val1.get_mpq_t(), temp.get_mpz_t()); \ - } \ - const val1_type & get_val1() const { return expr.val1; } \ - const val2_type & get_val2() const { return expr.val2; } \ - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \ -}; \ - \ -template \ -class __gmp_expr \ -, mpq_class, eval_fun> > \ -{ \ -private: \ - typedef __gmp_expr val1_type; \ - typedef mpq_class val2_type; \ - \ - __gmp_binary_expr expr; \ -public: \ - __gmp_expr(const val1_type &val1, const val2_type &val2) \ - : expr(val1, val2) { } \ - void eval(mpq_ptr q) const \ - { \ - mpz_class temp(expr.val1); \ - eval_fun::eval(q, temp.get_mpz_t(), expr.val2.get_mpq_t()); \ - } \ - const val1_type & get_val1() const { return expr.val1; } \ - const val2_type & get_val2() const { return expr.val2; } \ - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \ -}; \ - \ -template \ -class __gmp_expr \ -, mpz_class, eval_fun> > \ -{ \ -private: \ - typedef __gmp_expr val1_type; \ - typedef mpz_class val2_type; \ - \ - __gmp_binary_expr expr; \ -public: \ - __gmp_expr(const val1_type &val1, const val2_type &val2) \ - : expr(val1, val2) { } \ - void eval(mpq_ptr q) const \ - { \ - mpq_class temp(expr.val1); \ - eval_fun::eval(q, temp.get_mpq_t(), expr.val2.get_mpz_t()); \ - } \ - const val1_type & get_val1() const { return expr.val1; } \ - const val2_type & get_val2() const { return expr.val2; } \ - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \ -}; \ - \ -template \ -class __gmp_expr, __gmp_expr, eval_fun> > \ -{ \ -private: \ - typedef __gmp_expr val1_type; \ - typedef __gmp_expr val2_type; \ - \ - __gmp_binary_expr expr; \ -public: \ - __gmp_expr(const val1_type &val1, const val2_type &val2) \ - : expr(val1, val2) { } \ - void eval(mpq_ptr q) const \ - { \ - mpz_class temp1(expr.val1); \ - expr.val2.eval(q); \ - eval_fun::eval(q, temp1.get_mpz_t(), q); \ - } \ - const val1_type & get_val1() const { return expr.val1; } \ - const val2_type & get_val2() const { return expr.val2; } \ - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \ -}; \ - \ -template \ -class __gmp_expr, __gmp_expr, eval_fun> > \ -{ \ -private: \ - typedef __gmp_expr val1_type; \ - typedef __gmp_expr val2_type; \ - \ - __gmp_binary_expr expr; \ -public: \ - __gmp_expr(const val1_type &val1, const val2_type &val2) \ - : expr(val1, val2) { } \ - void eval(mpq_ptr q) const \ - { \ - mpz_class temp2(expr.val2); \ - expr.val1.eval(q); \ - eval_fun::eval(q, q, temp2.get_mpz_t()); \ - } \ - const val1_type & get_val1() const { return expr.val1; } \ - const val2_type & get_val2() const { return expr.val2; } \ - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \ -}; - - -__GMPZQ_DEFINE_EXPR(__gmp_binary_plus) -__GMPZQ_DEFINE_EXPR(__gmp_binary_minus) - - - -/**************** Macros for defining functions ****************/ -/* Results of operators and functions are instances of __gmp_expr. - T determines the numerical type of the expression: it can be either - mpz_t, mpq_t, or mpf_t. When the arguments of a binary - expression have different numerical types, __gmp_resolve_expr is used - to determine the "larger" type. - U is either __gmp_unary_expr or __gmp_binary_expr, - where V and W are the arguments' types -- they can in turn be - expressions, thus allowing to build compound expressions to any - degree of complexity. - Op is a function object that must have an eval() method accepting - appropriate arguments. - Actual evaluation of a __gmp_expr object is done when it gets - assigned to an mp*_class ("lazy" evaluation): this is done by calling - its eval() method. */ - - -// non-member unary operators and functions - -#define __GMP_DEFINE_UNARY_FUNCTION(fun, eval_fun) \ - \ -template \ -inline __gmp_expr, eval_fun> > \ -fun(const __gmp_expr &expr) \ -{ \ - return __gmp_expr, eval_fun> >(expr); \ -} - -#define __GMP_DEFINE_UNARY_TYPE_FUNCTION(type, fun, eval_fun) \ - \ -template \ -inline type fun(const __gmp_expr &expr) \ -{ \ - __gmp_expr const& temp(expr); \ - return eval_fun::eval(temp.__get_mp()); \ -} - - -// non-member binary operators and functions - -#define __GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \ - \ -template \ -inline __gmp_expr::value_type, \ -__gmp_binary_expr<__gmp_expr, __gmp_expr, eval_fun> > \ -fun(const __gmp_expr &expr1, const __gmp_expr &expr2) \ -{ \ - return __gmp_expr::value_type, \ - __gmp_binary_expr<__gmp_expr, __gmp_expr, eval_fun> > \ - (expr1, expr2); \ -} - -#define __GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, bigtype) \ - \ -template \ -inline __gmp_expr \ -, bigtype, eval_fun> > \ -fun(const __gmp_expr &expr, type t) \ -{ \ - return __gmp_expr \ - , bigtype, eval_fun> >(expr, t); \ -} \ - \ -template \ -inline __gmp_expr \ -, eval_fun> > \ -fun(type t, const __gmp_expr &expr) \ -{ \ - return __gmp_expr \ - , eval_fun> >(t, expr); \ -} - -#define __GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, type) \ -__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, mpir_si) - -#define __GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, type) \ -__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, mpir_ui) - -#define __GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, type) \ -__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, double) - -#define __GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, type) \ -__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, long double) - -#ifdef MPIRXX_HAVE_LLONG -#define __GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed char) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned char) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed int) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned int) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed short int) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned short int) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long int) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long int) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long long int) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long long int) \ -__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, float) \ -__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, double) \ -__GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double) -#else -#define __GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed char) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned char) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed int) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned int) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed short int) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned short int) \ -__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long int) \ -__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long int) \ -__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, float) \ -__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, double) \ -__GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double) -#endif - -#define __GMP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \ -__GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \ -__GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun) - - -#define __GMP_DEFINE_BINARY_FUNCTION_UI(fun, eval_fun) \ - \ -template \ -inline __gmp_expr \ -, mp_bitcnt_t, eval_fun> > \ -fun(const __gmp_expr &expr, mp_bitcnt_t l) \ -{ \ - return __gmp_expr, mpir_ui, eval_fun> >(expr, l); \ -} - - -#define __GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \ - \ -template \ -inline type fun(const __gmp_expr &expr1, \ - const __gmp_expr &expr2) \ -{ \ - typedef typename __gmp_resolve_expr::value_type eval_type; \ - __gmp_expr const& temp1(expr1); \ - __gmp_expr const& temp2(expr2); \ - return eval_fun::eval(temp1.__get_mp(), temp2.__get_mp()); \ -} - -#define __GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, \ - type2, bigtype) \ - \ -template \ -inline type fun(const __gmp_expr &expr, type2 t) \ -{ \ - __gmp_expr const& temp(expr); \ - return eval_fun::eval(temp.__get_mp(), static_cast(t)); \ -} \ - \ -template \ -inline type fun(type2 t, const __gmp_expr &expr) \ -{ \ - __gmp_expr const& temp(expr); \ - return eval_fun::eval(static_cast(t), temp.__get_mp()); \ -} - -#define __GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \ -__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, \ - type2, mpir_si) - -#define __GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \ -__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, \ - type2, mpir_ui) - -#define __GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \ -__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2, double) - -#define __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \ -__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2, long double) - -#ifdef MPIRXX_HAVE_LLONG -#define __GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed char) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned char) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed int) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned int) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed short int) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned short int) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long int) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long int) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long long int) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long long int) \ -__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, float) \ -__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, double) \ -__GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double) -#else -#define __GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed char) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned char) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed int) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned int) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed short int) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned short int) \ -__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long int) \ -__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long int) \ -__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, float) \ -__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, double) \ -__GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double) -#endif - -#define __GMP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \ -__GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \ -__GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) - - -// member operators - -#define __GMPP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) \ - \ -template \ -inline type##_class & type##_class::fun(const __gmp_expr &expr) \ -{ \ - __gmp_set_expr(mp, __gmp_expr, eval_fun> >(*this, expr)); \ - return *this; \ -} - -#define __GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, \ - type2, bigtype) \ - \ -inline type##_class & type##_class::fun(type2 t) \ -{ \ - __gmp_set_expr(mp, __gmp_expr >(*this, t)); \ - return *this; \ -} - -#define __GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \ -__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, \ - type2, mpir_si) - -#define __GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \ -__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, \ - type2, mpir_ui) - -#define __GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \ -__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2, double) - -#define __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \ -__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2, long double) - -#ifdef MPIRXX_HAVE_LLONG -#define __GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed char) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned char) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed int) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned int) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed short int) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned short int) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed long int) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned long int) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed long long int) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned long long int) \ -__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, float) \ -__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, double) \ -/* __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, long double) */ -#else -#define __GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed char) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned char) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed int) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned int) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed short int) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned short int) \ -__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed long int) \ -__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned long int) \ -__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, float) \ -__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, double) \ -/* __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, long double) */ -#endif - -#define __GMP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) \ -__GMPP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) \ -__GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) - -#define __GMPZ_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \ -__GMP_DEFINE_COMPOUND_OPERATOR(mpz, fun, eval_fun) - -#define __GMPQ_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \ -__GMP_DEFINE_COMPOUND_OPERATOR(mpq, fun, eval_fun) - -#define __GMPF_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \ -__GMP_DEFINE_COMPOUND_OPERATOR(mpf, fun, eval_fun) - - - -#define __GMP_DEFINE_COMPOUND_OPERATOR_UI(type, fun, eval_fun) \ - \ -inline type##_class & type##_class::fun(mpir_ui l) \ -{ \ - __gmp_set_expr(mp, __gmp_expr >(*this, l)); \ - return *this; \ -} - -#define __GMPZ_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \ -__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpz, fun, eval_fun) - -#define __GMPQ_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \ -__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpq, fun, eval_fun) - -#define __GMPF_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \ -__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpf, fun, eval_fun) - - - -#define __GMP_DEFINE_INCREMENT_OPERATOR(type, fun, eval_fun) \ - \ -inline type##_class & type##_class::fun() \ -{ \ - eval_fun::eval(mp); \ - return *this; \ -} \ - \ -inline type##_class type##_class::fun(int) \ -{ \ - type##_class temp(*this); \ - eval_fun::eval(mp); \ - return temp; \ -} - -#define __GMPZ_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \ -__GMP_DEFINE_INCREMENT_OPERATOR(mpz, fun, eval_fun) - -#define __GMPQ_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \ -__GMP_DEFINE_INCREMENT_OPERATOR(mpq, fun, eval_fun) - -#define __GMPF_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \ -__GMP_DEFINE_INCREMENT_OPERATOR(mpf, fun, eval_fun) - - - -/**************** Arithmetic operators and functions ****************/ - -// non-member operators and functions - -__GMP_DEFINE_UNARY_FUNCTION(operator+, __gmp_unary_plus) -__GMP_DEFINE_UNARY_FUNCTION(operator-, __gmp_unary_minus) -__GMP_DEFINE_UNARY_FUNCTION(operator~, __gmp_unary_com) - -__GMP_DEFINE_BINARY_FUNCTION(operator+, __gmp_binary_plus) -__GMP_DEFINE_BINARY_FUNCTION(operator-, __gmp_binary_minus) -__GMP_DEFINE_BINARY_FUNCTION(operator*, __gmp_binary_multiplies) -__GMP_DEFINE_BINARY_FUNCTION(operator/, __gmp_binary_divides) -__GMP_DEFINE_BINARY_FUNCTION(operator%, __gmp_binary_modulus) -__GMP_DEFINE_BINARY_FUNCTION(operator&, __gmp_binary_and) -__GMP_DEFINE_BINARY_FUNCTION(operator|, __gmp_binary_ior) -__GMP_DEFINE_BINARY_FUNCTION(operator^, __gmp_binary_xor) - -__GMP_DEFINE_BINARY_FUNCTION_UI(operator<<, __gmp_binary_lshift) -__GMP_DEFINE_BINARY_FUNCTION_UI(operator>>, __gmp_binary_rshift) - -__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator==, __gmp_binary_equal) -__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator!=, ! __gmp_binary_equal) -__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<, __gmp_binary_less) -__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<=, ! __gmp_binary_greater) -__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>, __gmp_binary_greater) -__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>=, ! __gmp_binary_less) - -__GMP_DEFINE_UNARY_FUNCTION(abs, __gmp_abs_function) -__GMP_DEFINE_UNARY_FUNCTION(trunc, __gmp_trunc_function) -__GMP_DEFINE_UNARY_FUNCTION(floor, __gmp_floor_function) -__GMP_DEFINE_UNARY_FUNCTION(ceil, __gmp_ceil_function) -__GMP_DEFINE_UNARY_FUNCTION(sqrt, __gmp_sqrt_function) -__GMP_DEFINE_BINARY_FUNCTION(hypot, __gmp_hypot_function) - -__GMP_DEFINE_UNARY_TYPE_FUNCTION(int, sgn, __gmp_sgn_function) -__GMP_DEFINE_BINARY_TYPE_FUNCTION(int, cmp, __gmp_cmp_function) - -template -void swap(__gmp_expr& x, __gmp_expr& y) __GMPXX_NOEXCEPT -{ x.swap(y); } - -// member operators for mpz_class - -__GMPZ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus) -__GMPZ_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus) -__GMPZ_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies) -__GMPZ_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides) -__GMPZ_DEFINE_COMPOUND_OPERATOR(operator%=, __gmp_binary_modulus) - -__GMPZ_DEFINE_COMPOUND_OPERATOR(operator&=, __gmp_binary_and) -__GMPZ_DEFINE_COMPOUND_OPERATOR(operator|=, __gmp_binary_ior) -__GMPZ_DEFINE_COMPOUND_OPERATOR(operator^=, __gmp_binary_xor) - -__GMPZ_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift) -__GMPZ_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift) - -__GMPZ_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment) -__GMPZ_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement) - -// member operators for mpq_class - -__GMPQ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus) -__GMPQ_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus) -__GMPQ_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies) -__GMPQ_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides) - -__GMPQ_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift) -__GMPQ_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift) - -__GMPQ_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment) -__GMPQ_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement) - -// member operators for mpf_class - -__GMPF_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus) -__GMPF_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus) -__GMPF_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies) -__GMPF_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides) - -__GMPF_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift) -__GMPF_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift) - -__GMPF_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment) -__GMPF_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement) - - - -/**************** Class wrapper for gmp_randstate_t ****************/ - -class __gmp_urandomb_value { }; -class __gmp_urandomm_value { }; - -template <> -class __gmp_expr -{ -private: - __gmp_randstate_struct *state; - mp_bitcnt_t bits; -public: - __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { } - void eval(mpz_ptr z) const { __gmp_rand_function::eval(z, state, bits); } - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } -}; - -template <> -class __gmp_expr -{ -private: - __gmp_randstate_struct *state; - mpz_class range; -public: - __gmp_expr(gmp_randstate_t s, const mpz_class &z) : state(s), range(z) { } - void eval(mpz_ptr z) const - { __gmp_rand_function::eval(z, state, range.get_mpz_t()); } - mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } -}; - -template <> -class __gmp_expr -{ -private: - __gmp_randstate_struct *state; - mp_bitcnt_t bits; -public: - __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { } - void eval(mpf_ptr f) const - { - __gmp_rand_function::eval(f, state, - (bits>0) ? bits : mpf_get_prec(f)); - } - mp_bitcnt_t get_prec() const - { - if (bits == 0) - return mpf_get_default_prec(); - else - return bits; - } -}; - -extern "C" { - typedef void __gmp_randinit_default_t (gmp_randstate_t); - typedef void __gmp_randinit_lc_2exp_t (gmp_randstate_t, mpz_srcptr, mpir_ui, mp_bitcnt_t); - typedef int __gmp_randinit_lc_2exp_size_t (gmp_randstate_t, mp_bitcnt_t); -} - -class gmp_randclass -{ -private: - gmp_randstate_t state; - - // copy construction and assignment not allowed - gmp_randclass(const gmp_randclass &); - void operator=(const gmp_randclass &); -public: - // constructors and destructor - gmp_randclass(gmp_randalg_t alg, mp_bitcnt_t size) - { - switch (alg) - { - case GMP_RAND_ALG_LC: // no other cases for now - default: - gmp_randinit(state, alg, size); - break; - } - } - - // gmp_randinit_default - gmp_randclass(__gmp_randinit_default_t* f) { f(state); } - - // gmp_randinit_lc_2exp - gmp_randclass(__gmp_randinit_lc_2exp_t* f, - mpz_class z, mpir_ui l1, mp_bitcnt_t l2) - { f(state, z.get_mpz_t(), l1, l2); } - - // gmp_randinit_lc_2exp_size - gmp_randclass(__gmp_randinit_lc_2exp_size_t* f, - mp_bitcnt_t size) - { - if (f (state, size) == 0) - throw std::length_error ("gmp_randinit_lc_2exp_size"); - } - - ~gmp_randclass() { gmp_randclear(state); } - - // initialize - void seed(); // choose a random seed some way (?) - void seed(mpir_ui s) { gmp_randseed_ui(state, s); } - void seed(const mpz_class &z) { gmp_randseed(state, z.get_mpz_t()); } - - // get random number - __gmp_expr get_z_bits(mp_bitcnt_t l) - { return __gmp_expr(state, l); } - __gmp_expr get_z_bits(const mpz_class &z) - { return get_z_bits(z.get_ui()); } - // FIXME: z.get_bitcnt_t() ? - - __gmp_expr get_z_range(const mpz_class &z) - { return __gmp_expr(state, z); } - - __gmp_expr get_f(mp_bitcnt_t prec = 0) - { return __gmp_expr(state, prec); } -}; - - -/**************** Specialize std::numeric_limits ****************/ - -namespace std { - template <> class numeric_limits - { - public: - static const bool is_specialized = true; - static mpz_class min() { return mpz_class(); } - static mpz_class max() { return mpz_class(); } - static mpz_class lowest() { return mpz_class(); } - static const int digits = 0; - static const int digits10 = 0; - static const int max_digits10 = 0; - static const bool is_signed = true; - static const bool is_integer = true; - static const bool is_exact = true; - static const int radix = 2; - static mpz_class epsilon() { return mpz_class(); } - static mpz_class round_error() { return mpz_class(); } - static const int min_exponent = 0; - static const int min_exponent10 = 0; - static const int max_exponent = 0; - static const int max_exponent10 = 0; - static const bool has_infinity = false; - static const bool has_quiet_NaN = false; - static const bool has_signaling_NaN = false; - static const float_denorm_style has_denorm = denorm_absent; - static const bool has_denorm_loss = false; - static mpz_class infinity() { return mpz_class(); } - static mpz_class quiet_NaN() { return mpz_class(); } - static mpz_class signaling_NaN() { return mpz_class(); } - static mpz_class denorm_min() { return mpz_class(); } - static const bool is_iec559 = false; - static const bool is_bounded = false; - static const bool is_modulo = false; - static const bool traps = false; - static const bool tinyness_before = false; - static const float_round_style round_style = round_toward_zero; - }; - - template <> class numeric_limits - { - public: - static const bool is_specialized = true; - static mpq_class min() { return mpq_class(); } - static mpq_class max() { return mpq_class(); } - static mpq_class lowest() { return mpq_class(); } - static const int digits = 0; - static const int digits10 = 0; - static const int max_digits10 = 0; - static const bool is_signed = true; - static const bool is_integer = false; - static const bool is_exact = true; - static const int radix = 2; - static mpq_class epsilon() { return mpq_class(); } - static mpq_class round_error() { return mpq_class(); } - static const int min_exponent = 0; - static const int min_exponent10 = 0; - static const int max_exponent = 0; - static const int max_exponent10 = 0; - static const bool has_infinity = false; - static const bool has_quiet_NaN = false; - static const bool has_signaling_NaN = false; - static const float_denorm_style has_denorm = denorm_absent; - static const bool has_denorm_loss = false; - static mpq_class infinity() { return mpq_class(); } - static mpq_class quiet_NaN() { return mpq_class(); } - static mpq_class signaling_NaN() { return mpq_class(); } - static mpq_class denorm_min() { return mpq_class(); } - static const bool is_iec559 = false; - static const bool is_bounded = false; - static const bool is_modulo = false; - static const bool traps = false; - static const bool tinyness_before = false; - static const float_round_style round_style = round_toward_zero; - }; - - template <> class numeric_limits - { - public: - static const bool is_specialized = true; - static mpf_class min() { return mpf_class(); } - static mpf_class max() { return mpf_class(); } - static mpf_class lowest() { return mpf_class(); } - static const int digits = 0; - static const int digits10 = 0; - static const int max_digits10 = 0; - static const bool is_signed = true; - static const bool is_integer = false; - static const bool is_exact = false; - static const int radix = 2; - static mpf_class epsilon() { return mpf_class(); } - static mpf_class round_error() { return mpf_class(); } - static const int min_exponent = 0; - static const int min_exponent10 = 0; - static const int max_exponent = 0; - static const int max_exponent10 = 0; - static const bool has_infinity = false; - static const bool has_quiet_NaN = false; - static const bool has_signaling_NaN = false; - static const float_denorm_style has_denorm = denorm_absent; - static const bool has_denorm_loss = false; - static mpf_class infinity() { return mpf_class(); } - static mpf_class quiet_NaN() { return mpf_class(); } - static mpf_class signaling_NaN() { return mpf_class(); } - static mpf_class denorm_min() { return mpf_class(); } - static const bool is_iec559 = false; - static const bool is_bounded = false; - static const bool is_modulo = false; - static const bool traps = false; - static const bool tinyness_before = false; - static const float_round_style round_style = round_indeterminate; - }; -} - - -/**************** #undef all private macros ****************/ - -#undef __GMPP_DECLARE_COMPOUND_OPERATOR -#undef __GMPN_DECLARE_COMPOUND_OPERATOR -#undef __GMP_DECLARE_COMPOUND_OPERATOR -#undef __GMP_DECLARE_COMPOUND_OPERATOR_UI -#undef __GMP_DECLARE_INCREMENT_OPERATOR - -#undef __GMPZQ_DEFINE_EXPR - -#undef __GMP_DEFINE_UNARY_FUNCTION -#undef __GMP_DEFINE_UNARY_TYPE_FUNCTION - -#undef __GMPP_DEFINE_BINARY_FUNCTION -#undef __GMPNN_DEFINE_BINARY_FUNCTION -#undef __GMPNS_DEFINE_BINARY_FUNCTION -#undef __GMPNU_DEFINE_BINARY_FUNCTION -#undef __GMPND_DEFINE_BINARY_FUNCTION -#undef __GMPNLD_DEFINE_BINARY_FUNCTION -#undef __GMPN_DEFINE_BINARY_FUNCTION -#undef __GMP_DEFINE_BINARY_FUNCTION - -#undef __GMP_DEFINE_BINARY_FUNCTION_UI - -#undef __GMPP_DEFINE_BINARY_TYPE_FUNCTION -#undef __GMPNN_DEFINE_BINARY_TYPE_FUNCTION -#undef __GMPNS_DEFINE_BINARY_TYPE_FUNCTION -#undef __GMPNU_DEFINE_BINARY_TYPE_FUNCTION -#undef __GMPND_DEFINE_BINARY_TYPE_FUNCTION -#undef __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION -#undef __GMPN_DEFINE_BINARY_TYPE_FUNCTION -#undef __GMP_DEFINE_BINARY_TYPE_FUNCTION - -#undef __GMPZ_DEFINE_COMPOUND_OPERATOR - -#undef __GMPP_DEFINE_COMPOUND_OPERATOR -#undef __GMPNN_DEFINE_COMPOUND_OPERATOR -#undef __GMPNS_DEFINE_COMPOUND_OPERATOR -#undef __GMPNU_DEFINE_COMPOUND_OPERATOR -#undef __GMPND_DEFINE_COMPOUND_OPERATOR -#undef __GMPNLD_DEFINE_COMPOUND_OPERATOR -#undef __GMPN_DEFINE_COMPOUND_OPERATOR -#undef __GMP_DEFINE_COMPOUND_OPERATOR - -#undef __GMPQ_DEFINE_COMPOUND_OPERATOR -#undef __GMPF_DEFINE_COMPOUND_OPERATOR - -#undef __GMP_DEFINE_COMPOUND_OPERATOR_UI -#undef __GMPZ_DEFINE_COMPOUND_OPERATOR_UI -#undef __GMPQ_DEFINE_COMPOUND_OPERATOR_UI -#undef __GMPF_DEFINE_COMPOUND_OPERATOR_UI - -#undef __GMP_DEFINE_INCREMENT_OPERATOR -#undef __GMPZ_DEFINE_INCREMENT_OPERATOR -#undef __GMPQ_DEFINE_INCREMENT_OPERATOR -#undef __GMPF_DEFINE_INCREMENT_OPERATOR - -#undef __GMPXX_CONSTANT - -#endif /* __GMP_PLUSPLUS__ */ diff --git a/thirdparty/linux/nasm.get b/thirdparty/linux/nasm.get deleted file mode 100644 index a8c4f16822..0000000000 --- a/thirdparty/linux/nasm.get +++ /dev/null @@ -1,7 +0,0 @@ -wget http://www.nasm.us/pub/nasm/releasebuilds/2.13.01/nasm-2.13.01.tar.bz2 -tar xjvf nasm-2.13.01.tar.bz2 -cd nasm-2.13.01 -./autogen.sh -./configure -make -make install \ No newline at end of file diff --git a/thirdparty/linux/ntl.get b/thirdparty/linux/ntl.get index 4cb1dc6cb5..46ee3a6e1d 100644 --- a/thirdparty/linux/ntl.get +++ b/thirdparty/linux/ntl.get @@ -1,11 +1,5 @@ -wget http://www.shoup.net/ntl/ntl-9.11.0.tar.gz -tar -zxvf ntl-9.11.0.tar.gz -mv ntl-9.11.0 ntl -rm ntl-9.11.0.tar.gz - - cd ntl/src -./configure -make +make clean +make ./ntl.a mv ./ntl.a ./libntl.a \ No newline at end of file diff --git a/thirdparty/linux/ntl/README b/thirdparty/linux/ntl/README new file mode 100644 index 0000000000..e814a52663 --- /dev/null +++ b/thirdparty/linux/ntl/README @@ -0,0 +1,19 @@ +NTL -- a library for doing numbery theory -- version 9.11.0 +Release date: 2016.08.22 + +Author: Victor Shoup (victor@shoup.net) + +NTL is open-source software distributed under the terms of the +GNU General Public License. +See the file doc/copying.txt for complete details on the licensing +of NTL. + +Documentation is available in the file doc/tour.html, which can +be viewed with a web browser. + +For a detailed guide to installation, please see the appropriate +documentation: + * doc/tour-unix.html for unix systems + * doc/tour-win.html for Windows and other systems + +The latest version of NTL is available at http://www.shoup.net. diff --git a/thirdparty/linux/ntl/doc/BasicThreadPool.cpp.html b/thirdparty/linux/ntl/doc/BasicThreadPool.cpp.html new file mode 100644 index 0000000000..e153d837de --- /dev/null +++ b/thirdparty/linux/ntl/doc/BasicThreadPool.cpp.html @@ -0,0 +1,387 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/BasicThreadPool.cpp.html + + + + +
+
+/************************************************************************
+
+MODULE: BasicThreadPool
+
+SUMMARY:
+
+A simple thread pool class BasicThreadPool, as well as some higher-level macros
+which facilitite simple parallel for loops.
+
+
+***************************************************************************/
+
+
+// ********************** Simple parallel for loops **************************
+//
+// We begin with a description of the higher-level macros for writing simple
+// parallel for loops.  These facilitaties are activated only when NTL is
+// configured with NTL_THREAD_BOOST=on (which implies NTL_THREADS=on).
+// However, code that uses these facilties should still compile and run
+// correctly even when NTL_THREAD_BOOST=off, or even when NTL_THREADS=off, so
+// this is the simplest way to write parallel for loops across a range of
+// compile-time and run-time environments.  Note that if NTL_THREADS=on, C++11
+// features are reqired, but when NTL_THREADS=off, these features are not
+// required, so the code should compile on older C++ compilers.
+//
+// Here is a simple recipe for writing parallel for loop.
+//
+// At the start of program execution, your program should execute
+
+   SetNumThreads(nt);
+
+// You can choose nt to be any positive integer, but for best results, it
+// should correspond to the number of available cores on your machine.
+// [NOTE: if NTL_THREAD_BOOST=off, this function is still defined, but does
+// nothing.]
+//
+// Now consider the following routine:
+
+   void mul(ZZ *x, const ZZ *a, const ZZ *b, long n)
+   {
+      for (long i = 0; i < n; i++)
+         mul(x[i], a[i], b[i]);
+   }
+
+// We can parallelize it as follows:
+
+   void mul(ZZ *x, const ZZ *a, const ZZ *b, long n)
+   {
+      NTL_EXEC_RANGE(n, first, last)
+
+         for (long i = first; i < last; i++)
+            mul(x[i], a[i], b[i]);
+
+      NTL_EXEC_RANGE_END
+   }
+
+// NTL_EXEC_RANGE and NTL_EXEC_RANGE_END are macros that just "do the right
+// thing".  If there are nt threads available, the interval [0..n) will be
+// partitioned into (up to)  nt subintervals, and a different thread will be
+// used to process each subinterval. You still have to write the for loop
+// yourself: the macro just declares and initializes variables "first" and
+// "last" (or whatever you want to call them) of type long that represent the
+// subinterval [first..last) to be processed by one thread.
+//
+// Note that the current thread participates as one of the nt available
+// threads, and that the current thread will wait for all other participating threads
+// to finish their task before proceeding. The current thread can be identified
+// as the one with first == 0.
+//
+// Withing the "body" of this construct, you can freely reference any variables
+// that are visible at this point.  This is implemented using the C++ lambda
+// feature (capturing all variables by reference).
+//
+// This construct will still work even if threads are disabled, in which case
+// it runs single-threaded with first=0 and last=n.
+//
+// Note that the code within the EXEC_RANGE body could call other routines that
+// themselves attempt to execute an EXEC_RANGE: if this happens, the latter
+// EXEC_RANGE will detect this and run single-threaded.
+//
+// You may wish to do other things within the EXEC_RANGE body than just execute
+// a loop.  One thing you may want to do is to declare variables.  Another
+// thing you may want to do is setup a local context for a ZZ_p modulus (or
+// other type of modulus).  Here is an example of doing this:
+
+
+   void mul(ZZ_p *x, const ZZ_p *a, const ZZ_p *b, long n)
+   {
+      ZZ_pContext context;
+      context.save();
+
+      NTL_EXEC_RANGE(n, first, last)
+      
+         context.restore();
+
+         for (long i = first; i < last; i++)
+            mul(x[i], a[i], b[i]);
+
+      NTL_EXEC_RANGE_END
+   }
+
+
+// Another useful function is AvailableThreads(), which will return the number
+// of available threads.  If threads or thread boosting is not enabled, this
+// will return 1.  Even if thread boosting is enabled, this may return 1 if for
+// whatever reason, the thread pool is not available for use (for example,
+// SetNumThreads was never called, or the thread pool is already active).
+//
+// A lower-level set of tools is available, which allow you to simply run a
+// specified number of threads.  Assuming nt <= AvailableThreads(), the code
+
+   NTL_EXEC_INDEX(nt, index)
+
+      ... code ...
+
+   NTL_EXEC_INDEX_END
+
+// will execute the body on nt different threads, each with a unique index in
+// the range [0..nt).  A variable named "index" (or whatever name you specify)
+// of type long will hold the given index.  Just as with EXEC_RANGE, the current
+// thread will participate as one of the nt threads, and will always be
+// assigned an index of 0.
+//
+// This tool is useful if you need to manage memory a bit more carefully.  For
+// example, the following code will compute an inner product using all
+// available threads:
+
+   ZZ InnerProd(const ZZ *a, const ZZ *b, long n)
+   {
+      PartitionInfo pinfo(n);
+
+      long cnt = pinfo.NumIntervals();
+
+      Vec<ZZ> acc;
+      acc.SetLength(cnt);
+
+      NTL_EXEC_INDEX(cnt, index)
+
+         long first, last;
+         pinfo.interval(first, last, index);
+
+         ZZ& sum = acc[index];
+         sum = 0;
+         for (long i = first; i < last; i++)
+            MulAddTo(sum, a[i], b[i]);
+
+      NTL_EXEC_INDEX_END
+
+      ZZ sum;
+      sum = 0;
+      for (long i = 0; i < cnt; i++)
+         sum += acc[i];
+
+      return sum;
+   }
+
+// This example also illustrates the class PartitionInfo, which is useful for
+// partitioning a large interval into smaller intervals (it is used internally
+// by EXEC_RANGE).  The constructor takes a single argument (in this example n)
+// and computes a partition of [0..n) into nearly equally sized subintervals.
+// The method NumIntervals() returns the number of subintervals, and the method
+// interval(first, last, index) sets first and last according to the endpoints
+// of the subinterval [first..last) with the given index.
+//
+// So in this example, cnt threads will run, each accumulating a sum into a
+// corresponding element of the vector acc, and afterwords, these elements are
+// summed.
+//
+// Note that if threads are not enabled or otherwise unavailable, the above
+// code will compile and run correctly (just using one thread).
+//
+// Finally, there is a "guarded" version of NTL_EXEC_RANGE called
+// NTL_GEXEC_RANGE.  This allows one to dynamically "guard" against parallel
+// execution. For example, on very small problems the runtime overhead of a
+// parallel for loop may not be worthwhile, or in other situations parallel
+// execution could cause incorrect behavior.  See below for details.
+
+
+// ************************** Thread Pools ******************************
+//
+// The above facilities are built on top of a more general thread pool class,
+// which you may use for your own purposes.
+//    
+// You create a thread pool by constructing a BasicThreadPool object.  For
+// example:
+
+   long nthreads = 4;
+   BasicThreadPool pool(nthreads);
+
+// creates a thread pool of 4 threads.  These threads will exist until the
+// destructor for pool is called.  
+//
+// The simplest way to use a thread pools is as follows.  Suppose you have a
+// task that consists of sz subtasks, indexed 0..sz-1.  Then you can write:
+
+   pool.exec_range(sz,
+      [&](long first, long last) {
+         for (long i = first; i < last; i++) {
+            ... code to process subtask i ...
+         }
+      }
+   );
+
+// The second argument to exec_range is a C++11 "lambda".  The "[&]" indicates
+// that all local variables in the calling context are captured by reference,
+// so the lambda body can reference all visible local variables directly.
+// C++11 provides other methods for capturing local variables.  The interval
+// [0..sz) is partitioned into subintervals of the form [first..last), which
+// are processed by the code in the supplied lambda.
+//
+// A lower-level interface is also provided.  One can write:
+
+   pool.exec_index(cnt,
+      [&](long index) {
+         ... code to process index i ...
+      }
+   );
+
+// This will activate exactly cnt threads with indices 0..cnt-1, and execute
+// the given code on each index.  The parameter cnt must not exceed nthreads,
+// otherwise an error is raised.
+
+
+// ====================================================================
+//
+// NOTES:
+//
+// When one activates a thread pool with nthreads threads, the *current* thread
+// (the one activating the pool) will also participate in the computation.
+// This means that the thread pool only contains nthreads-1 other threads.
+//
+// If, during an activation, any thread throws an exception, it will be caught
+// and rethrown in the activating thread when all the threads complete.  If
+// more than one thread throws an exception, the first one that is caught is
+// the one that is rethrown.
+//
+// Methods are also provided for adding, deleting, and moving threads in and
+// among thread pools.
+//
+// If NTL_THREADS=off, the corresponding header file may be included, but the
+// BasicThreadPool class is not defined.
+//
+// Unlike most classes in NTL, the BasicThreadPool is not relocatable and hence
+// cannot be used in a Vec.  One should first wrap it in a pointer class, such
+// as UniquePtr.
+
+
+
+// class BasicThreadPool: provided basic functionality for thread pools
+
+class BasicThreadPool {
+private:
+
+  BasicThreadPool(const BasicThreadPool&); // disabled
+  void operator=(const BasicThreadPool&); // disabled
+
+public:
+
+  explicit
+  BasicThreadPool(long nthreads);
+  // creates a pool with nthreads threads, including the current thread
+  // (so nthreads-1 other threads get created)
+
+  template<class Fct>
+  void exec_range(long sz, const Fct& fct);
+  // activate by range (see example usage above)
+
+  template<class Fct>
+  void exec_index(long cnt, const Fct& fct);
+  // activate by index (see example usage above)
+
+  void add(long n = 1);
+  // add n threads to the pool
+
+  long NumThreads() const;
+  // return number of threads (including current thread)
+
+  void remove(long n = 1);
+  // remove n threads from the pool
+  
+  void move(BasicThreadPool& other, long n = 1)
+  // move n threads from other pool to this pool
+
+  bool active() const;
+  // indicates an activation is in process: invoking any of the methods
+  // exec_index, exec_range, add, remove, move, or the destructor
+  // whie active will raise an error
+
+  template<class Fct>
+  static void relaxed_exec_range(BasicThreadPool *pool, long sz, const Fct& fct);
+  // similar to pool->exec_range(sz, fct), but will still work even
+  // if !pool or pool->active(), using just the current thread
+
+  template<class Fct>
+  static void relaxed_exec_index(BasicThreadPool *pool, long cnt, const Fct& fct);
+  // similar to pool->exec_index(cnt, fct), but will still work even
+  // if !pool or pool->active(), provided cnt <= 1, using just the current thread
+
+};
+
+
+
+
+// THREAD BOOSTING FEATURES:
+
+void SetNumThreads(long nt);
+// convenience routine to set NTL's thread pool.
+// If called more than once, the old thread pool is destroyed and
+// replaced by a new one.
+// If NTL_THREAD_BOOST=off, then this is still defined, but does nothing.
+
+long AvailableThreads();
+// Number of threads currently availble to use in NTL's thread pool.  This is
+// always at least 1 (for the current thread).  
+// If NTL_THREAD_BOOST=off, then this is still defined, and always returns 1.
+
+BasicThreadPool *GetThreadPool();
+void ResetThreadPool(BasicThreadPool *pool = 0);
+BasicThreadPool *ReleaseThreadPool();
+// Routines to get and set NTL's thread pool.  The interfaces parallel NTL's
+// UniquePtr class, and indeed, behind the scenes, NTL's thread pool is stored
+// as a UniquePtr<BasicThreadPool>.
+// These are only declared when NTL_THREAD_BOOST=on.  
+
+
+#define NTL_EXEC_RANGE(sz, first, last) ...
+#define NTL_EXEC_RANGE_END ...
+#define NTL_EXEC_INDEX(cnt, index) ...
+#define NTL_EXEC_INDEX_END ...
+// convenience macros to implement "parallel for loops" using NTL's thread
+// pool.  See examples above for usage.  If NTL_THREAD_BOOST=off, then these
+// are still defined, and code will run on a single thread
+
+
+#define NTL_GEXEC_RANGE(seq, sz, first, last) ...
+#define NTL_GEXEC_RANGE_END ...
+// "guarded" version of NTL_EXEC_RANGE: if seq evaluates to true, the code runs
+// on a single thread.  This is useful in avoiding situations where the
+// overhead of a parallel loop is too high.  If seq evaluates to the constant
+// true, a good compiler will optimize code to run on a single thread, with no
+// overhead.
+
+#define NTL_IMPORT(x)
+// To be used in conjunction with NTL_EXEC_RANGE and friends.  When
+// NTL_THREAD_BOOST=on, this will copy the variable named x from the enclosing
+// scope to a local copy.  This should only be used for types with cheap
+// copies, such as scalars and pointers.  In some situations, this allows the
+// compiler to optimize a bit more aggressively.  One or more of these may be
+// placed right after an NTL_EXEC_RANGE.
+// When NTL_THREAD_BOOST=off, this is still defined, and does nothing.
+
+
+// class PartitionInfo: A helper class to facilitate partitioning an interval
+// into subintervals.  NOTE: this class is available, even when
+// NTL_THREAD_BOOST=off.
+
+class PartitionInfo {
+public:
+
+   explicit
+   PartitionInfo(long sz, long nt = AvailableThreads());
+   // partitions [0..sz) into at most nt subintervals.  sz may be 0 or
+   // negative, in which case the number of subintervals is 0.
+
+   long NumIntervals() const;
+   // return the number of subintervals
+
+   void interval(long& first, long& last, long i) const;
+   // [first..last) is the ith interval, where i in [0..NumInvervals()).  No
+   // range checking is performed.
+
+};
+
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/BasicThreadPool.txt b/thirdparty/linux/ntl/doc/BasicThreadPool.txt new file mode 100644 index 0000000000..a125eddcab --- /dev/null +++ b/thirdparty/linux/ntl/doc/BasicThreadPool.txt @@ -0,0 +1,377 @@ + + +/************************************************************************ + +MODULE: BasicThreadPool + +SUMMARY: + +A simple thread pool class BasicThreadPool, as well as some higher-level macros +which facilitite simple parallel for loops. + + +***************************************************************************/ + + +// ********************** Simple parallel for loops ************************** +// +// We begin with a description of the higher-level macros for writing simple +// parallel for loops. These facilitaties are activated only when NTL is +// configured with NTL_THREAD_BOOST=on (which implies NTL_THREADS=on). +// However, code that uses these facilties should still compile and run +// correctly even when NTL_THREAD_BOOST=off, or even when NTL_THREADS=off, so +// this is the simplest way to write parallel for loops across a range of +// compile-time and run-time environments. Note that if NTL_THREADS=on, C++11 +// features are reqired, but when NTL_THREADS=off, these features are not +// required, so the code should compile on older C++ compilers. +// +// Here is a simple recipe for writing parallel for loop. +// +// At the start of program execution, your program should execute + + SetNumThreads(nt); + +// You can choose nt to be any positive integer, but for best results, it +// should correspond to the number of available cores on your machine. +// [NOTE: if NTL_THREAD_BOOST=off, this function is still defined, but does +// nothing.] +// +// Now consider the following routine: + + void mul(ZZ *x, const ZZ *a, const ZZ *b, long n) + { + for (long i = 0; i < n; i++) + mul(x[i], a[i], b[i]); + } + +// We can parallelize it as follows: + + void mul(ZZ *x, const ZZ *a, const ZZ *b, long n) + { + NTL_EXEC_RANGE(n, first, last) + + for (long i = first; i < last; i++) + mul(x[i], a[i], b[i]); + + NTL_EXEC_RANGE_END + } + +// NTL_EXEC_RANGE and NTL_EXEC_RANGE_END are macros that just "do the right +// thing". If there are nt threads available, the interval [0..n) will be +// partitioned into (up to) nt subintervals, and a different thread will be +// used to process each subinterval. You still have to write the for loop +// yourself: the macro just declares and initializes variables "first" and +// "last" (or whatever you want to call them) of type long that represent the +// subinterval [first..last) to be processed by one thread. +// +// Note that the current thread participates as one of the nt available +// threads, and that the current thread will wait for all other participating threads +// to finish their task before proceeding. The current thread can be identified +// as the one with first == 0. +// +// Withing the "body" of this construct, you can freely reference any variables +// that are visible at this point. This is implemented using the C++ lambda +// feature (capturing all variables by reference). +// +// This construct will still work even if threads are disabled, in which case +// it runs single-threaded with first=0 and last=n. +// +// Note that the code within the EXEC_RANGE body could call other routines that +// themselves attempt to execute an EXEC_RANGE: if this happens, the latter +// EXEC_RANGE will detect this and run single-threaded. +// +// You may wish to do other things within the EXEC_RANGE body than just execute +// a loop. One thing you may want to do is to declare variables. Another +// thing you may want to do is setup a local context for a ZZ_p modulus (or +// other type of modulus). Here is an example of doing this: + + + void mul(ZZ_p *x, const ZZ_p *a, const ZZ_p *b, long n) + { + ZZ_pContext context; + context.save(); + + NTL_EXEC_RANGE(n, first, last) + + context.restore(); + + for (long i = first; i < last; i++) + mul(x[i], a[i], b[i]); + + NTL_EXEC_RANGE_END + } + + +// Another useful function is AvailableThreads(), which will return the number +// of available threads. If threads or thread boosting is not enabled, this +// will return 1. Even if thread boosting is enabled, this may return 1 if for +// whatever reason, the thread pool is not available for use (for example, +// SetNumThreads was never called, or the thread pool is already active). +// +// A lower-level set of tools is available, which allow you to simply run a +// specified number of threads. Assuming nt <= AvailableThreads(), the code + + NTL_EXEC_INDEX(nt, index) + + ... code ... + + NTL_EXEC_INDEX_END + +// will execute the body on nt different threads, each with a unique index in +// the range [0..nt). A variable named "index" (or whatever name you specify) +// of type long will hold the given index. Just as with EXEC_RANGE, the current +// thread will participate as one of the nt threads, and will always be +// assigned an index of 0. +// +// This tool is useful if you need to manage memory a bit more carefully. For +// example, the following code will compute an inner product using all +// available threads: + + ZZ InnerProd(const ZZ *a, const ZZ *b, long n) + { + PartitionInfo pinfo(n); + + long cnt = pinfo.NumIntervals(); + + Vec acc; + acc.SetLength(cnt); + + NTL_EXEC_INDEX(cnt, index) + + long first, last; + pinfo.interval(first, last, index); + + ZZ& sum = acc[index]; + sum = 0; + for (long i = first; i < last; i++) + MulAddTo(sum, a[i], b[i]); + + NTL_EXEC_INDEX_END + + ZZ sum; + sum = 0; + for (long i = 0; i < cnt; i++) + sum += acc[i]; + + return sum; + } + +// This example also illustrates the class PartitionInfo, which is useful for +// partitioning a large interval into smaller intervals (it is used internally +// by EXEC_RANGE). The constructor takes a single argument (in this example n) +// and computes a partition of [0..n) into nearly equally sized subintervals. +// The method NumIntervals() returns the number of subintervals, and the method +// interval(first, last, index) sets first and last according to the endpoints +// of the subinterval [first..last) with the given index. +// +// So in this example, cnt threads will run, each accumulating a sum into a +// corresponding element of the vector acc, and afterwords, these elements are +// summed. +// +// Note that if threads are not enabled or otherwise unavailable, the above +// code will compile and run correctly (just using one thread). +// +// Finally, there is a "guarded" version of NTL_EXEC_RANGE called +// NTL_GEXEC_RANGE. This allows one to dynamically "guard" against parallel +// execution. For example, on very small problems the runtime overhead of a +// parallel for loop may not be worthwhile, or in other situations parallel +// execution could cause incorrect behavior. See below for details. + + +// ************************** Thread Pools ****************************** +// +// The above facilities are built on top of a more general thread pool class, +// which you may use for your own purposes. +// +// You create a thread pool by constructing a BasicThreadPool object. For +// example: + + long nthreads = 4; + BasicThreadPool pool(nthreads); + +// creates a thread pool of 4 threads. These threads will exist until the +// destructor for pool is called. +// +// The simplest way to use a thread pools is as follows. Suppose you have a +// task that consists of sz subtasks, indexed 0..sz-1. Then you can write: + + pool.exec_range(sz, + [&](long first, long last) { + for (long i = first; i < last; i++) { + ... code to process subtask i ... + } + } + ); + +// The second argument to exec_range is a C++11 "lambda". The "[&]" indicates +// that all local variables in the calling context are captured by reference, +// so the lambda body can reference all visible local variables directly. +// C++11 provides other methods for capturing local variables. The interval +// [0..sz) is partitioned into subintervals of the form [first..last), which +// are processed by the code in the supplied lambda. +// +// A lower-level interface is also provided. One can write: + + pool.exec_index(cnt, + [&](long index) { + ... code to process index i ... + } + ); + +// This will activate exactly cnt threads with indices 0..cnt-1, and execute +// the given code on each index. The parameter cnt must not exceed nthreads, +// otherwise an error is raised. + + +// ==================================================================== +// +// NOTES: +// +// When one activates a thread pool with nthreads threads, the *current* thread +// (the one activating the pool) will also participate in the computation. +// This means that the thread pool only contains nthreads-1 other threads. +// +// If, during an activation, any thread throws an exception, it will be caught +// and rethrown in the activating thread when all the threads complete. If +// more than one thread throws an exception, the first one that is caught is +// the one that is rethrown. +// +// Methods are also provided for adding, deleting, and moving threads in and +// among thread pools. +// +// If NTL_THREADS=off, the corresponding header file may be included, but the +// BasicThreadPool class is not defined. +// +// Unlike most classes in NTL, the BasicThreadPool is not relocatable and hence +// cannot be used in a Vec. One should first wrap it in a pointer class, such +// as UniquePtr. + + + +// class BasicThreadPool: provided basic functionality for thread pools + +class BasicThreadPool { +private: + + BasicThreadPool(const BasicThreadPool&); // disabled + void operator=(const BasicThreadPool&); // disabled + +public: + + explicit + BasicThreadPool(long nthreads); + // creates a pool with nthreads threads, including the current thread + // (so nthreads-1 other threads get created) + + template + void exec_range(long sz, const Fct& fct); + // activate by range (see example usage above) + + template + void exec_index(long cnt, const Fct& fct); + // activate by index (see example usage above) + + void add(long n = 1); + // add n threads to the pool + + long NumThreads() const; + // return number of threads (including current thread) + + void remove(long n = 1); + // remove n threads from the pool + + void move(BasicThreadPool& other, long n = 1) + // move n threads from other pool to this pool + + bool active() const; + // indicates an activation is in process: invoking any of the methods + // exec_index, exec_range, add, remove, move, or the destructor + // whie active will raise an error + + template + static void relaxed_exec_range(BasicThreadPool *pool, long sz, const Fct& fct); + // similar to pool->exec_range(sz, fct), but will still work even + // if !pool or pool->active(), using just the current thread + + template + static void relaxed_exec_index(BasicThreadPool *pool, long cnt, const Fct& fct); + // similar to pool->exec_index(cnt, fct), but will still work even + // if !pool or pool->active(), provided cnt <= 1, using just the current thread + +}; + + + + +// THREAD BOOSTING FEATURES: + +void SetNumThreads(long nt); +// convenience routine to set NTL's thread pool. +// If called more than once, the old thread pool is destroyed and +// replaced by a new one. +// If NTL_THREAD_BOOST=off, then this is still defined, but does nothing. + +long AvailableThreads(); +// Number of threads currently availble to use in NTL's thread pool. This is +// always at least 1 (for the current thread). +// If NTL_THREAD_BOOST=off, then this is still defined, and always returns 1. + +BasicThreadPool *GetThreadPool(); +void ResetThreadPool(BasicThreadPool *pool = 0); +BasicThreadPool *ReleaseThreadPool(); +// Routines to get and set NTL's thread pool. The interfaces parallel NTL's +// UniquePtr class, and indeed, behind the scenes, NTL's thread pool is stored +// as a UniquePtr. +// These are only declared when NTL_THREAD_BOOST=on. + + +#define NTL_EXEC_RANGE(sz, first, last) ... +#define NTL_EXEC_RANGE_END ... +#define NTL_EXEC_INDEX(cnt, index) ... +#define NTL_EXEC_INDEX_END ... +// convenience macros to implement "parallel for loops" using NTL's thread +// pool. See examples above for usage. If NTL_THREAD_BOOST=off, then these +// are still defined, and code will run on a single thread + + +#define NTL_GEXEC_RANGE(seq, sz, first, last) ... +#define NTL_GEXEC_RANGE_END ... +// "guarded" version of NTL_EXEC_RANGE: if seq evaluates to true, the code runs +// on a single thread. This is useful in avoiding situations where the +// overhead of a parallel loop is too high. If seq evaluates to the constant +// true, a good compiler will optimize code to run on a single thread, with no +// overhead. + +#define NTL_IMPORT(x) +// To be used in conjunction with NTL_EXEC_RANGE and friends. When +// NTL_THREAD_BOOST=on, this will copy the variable named x from the enclosing +// scope to a local copy. This should only be used for types with cheap +// copies, such as scalars and pointers. In some situations, this allows the +// compiler to optimize a bit more aggressively. One or more of these may be +// placed right after an NTL_EXEC_RANGE. +// When NTL_THREAD_BOOST=off, this is still defined, and does nothing. + + +// class PartitionInfo: A helper class to facilitate partitioning an interval +// into subintervals. NOTE: this class is available, even when +// NTL_THREAD_BOOST=off. + +class PartitionInfo { +public: + + explicit + PartitionInfo(long sz, long nt = AvailableThreads()); + // partitions [0..sz) into at most nt subintervals. sz may be 0 or + // negative, in which case the number of subintervals is 0. + + long NumIntervals() const; + // return the number of subintervals + + void interval(long& first, long& last, long i) const; + // [first..last) is the ith interval, where i in [0..NumInvervals()). No + // range checking is performed. + +}; + + + diff --git a/thirdparty/linux/ntl/doc/GF2.cpp.html b/thirdparty/linux/ntl/doc/GF2.cpp.html new file mode 100644 index 0000000000..b78a501e4f --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2.cpp.html @@ -0,0 +1,251 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/GF2.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: GF2
+
+SUMMARY:
+
+The class GF2 represents the field GF(2).
+Computationally speaking, it is not a particularly useful class.
+Its main use is to make the interfaces to the various finite
+field classes as uniform as possible.
+
+The header file for GF2 also declares the class ref_GF2, which
+use used to represent non-const references to GF2's, such as
+those obtained from indexing a vec_GF2, which "packs" GF2's
+into words.
+
+There are implicit conversions from ref_GF2 to const GF2
+and from GF2& to ref_GF2.  Therefore, if you want to declare
+a function that takes a non-const reference to a GF2, you
+should declare the parameter of type ref_GF2: this will
+allow you to pass variables of type GF2 as well as
+elements of vec_GF2's obtained through indexing.
+
+For all functions defined below which take a parameter of type
+GF2&, there is also a function that takes a parameter of type ref_GF2.
+Theoretically, we could have just defined the functions that take
+the ref_GF2 parameter type, because of the implicit conversion
+from GF2& to ref_GF2; however, for efficiency reasons, both
+flavors are actually provided.   It is recommended that higher-level
+functions use the ref_GF2 type exclusively.
+
+
+\**************************************************************************/
+
+#include <NTL/ZZ.h>
+#include <NTL/vector.h>
+
+
+class GF2 {
+public:
+  
+   GF2(); // initial value 0
+
+   GF2(const GF2& a); // copy constructor
+   explicit GF2(long a); // promotion constructor
+
+   GF2& operator=(const GF2& a); // assignment
+   GF2& operator=(long a); // assignment
+
+   // typedefs to aid in generic programming
+   typedef long rep_type;
+   typedef GF2Context context_type;
+   typedef GF2Bak bak_type;
+   typedef GF2Push push_type;
+   typedef GF2X poly_type;
+
+};
+
+
+long rep(GF2 a); // read-only access to representation of a
+
+
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(GF2 a, GF2 b);
+long operator!=(GF2 a, GF2 b);
+
+long IsZero(GF2 a);  // test for 0
+long IsOne(GF2 a);  // test for 1
+
+// PROMOTIONS: operators ==, != promote long to GF2 on (a, b).
+
+
+/**************************************************************************\
+
+                                    Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2 operator+(GF2 a, GF2 b);
+GF2 operator-(GF2 a, GF2 b);
+
+GF2 operator-(GF2 a); // unary -
+
+GF2& operator+=(GF2& x, GF2 a);
+GF2& operator+=(GF2& x, long a);
+
+GF2& operator-=(GF2& x, GF2 a);
+GF2& operator-=(GF2& x, long a);
+
+GF2& operator++(GF2& x);  // prefix
+void operator++(GF2& x, int);  // postfix
+
+GF2& operator--(GF2& x);  // prefix
+void operator--(GF2& x, int);  // postfix
+
+// procedural versions:
+
+
+void add(GF2& x, GF2 a, GF2 b); // x = a + b
+void sub(GF2& x, GF2 a, GF2 b); // x = a - b
+void negate(GF2& x, GF2 a); // x = -a
+
+// PROMOTIONS: binary +, -, and procedures add, sub promote
+// from long to GF2 on (a, b).
+
+
+/**************************************************************************\
+
+                                  Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2 operator*(GF2 a, GF2 b);
+
+GF2& operator*=(GF2& x, GF2 a);
+GF2& operator*=(GF2& x, long a);
+
+// procedural versions:
+
+void mul(GF2& x, GF2 a, GF2 b); // x = a * b
+
+void sqr(GF2& x, GF2 a); // x = a^2
+GF2 sqr(GF2 a);
+
+// PROMOTIONS: operator * and procedure mul promote from long to GF2
+// on (a, b).
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+operator notation:
+
+GF2 operator/(z_p a, GF2 b);
+
+GF2& operator/=(GF2& x, GF2 a);
+GF2& operator/=(GF2& x, long a);
+
+procedural versions:
+
+void div(GF2& x, GF2 a, GF2 b);
+// x = a/b
+
+void inv(GF2& x, GF2 a);
+GF2 inv(GF2 a);
+// x = 1/a
+
+// PROMOTIONS: operator / and procedure div promote from long to GF2
+// on (a, b).
+
+
+/**************************************************************************\
+
+                                  Exponentiation
+
+\**************************************************************************/
+
+
+void power(GF2& x, GF2 a, long e); // x = a^e (e may be negative)
+GF2 power(GF2 a, long e);
+
+
+/**************************************************************************\
+
+                               Random Elements
+
+\**************************************************************************/
+
+
+void random(GF2& x);
+GF2 random_GF2();
+// x = random element in GF2.  Uses RandomBnd from ZZ.
+
+
+/**************************************************************************\
+
+                                Input/Output
+
+\**************************************************************************/
+
+
+ostream& operator<<(ostream& s, GF2 a);
+
+istream& operator>>(istream& s, GF2& x);
+// a ZZ is read and reduced mod 2
+
+
+/**************************************************************************\
+
+                               Miscellany
+
+\**************************************************************************/
+
+
+void clear(GF2& x); // x = 0
+void set(GF2& x); // x = 1
+
+void GF2::swap(GF2& x);
+void swap(GF2& x, GF2& y);
+// swap
+
+static GF2 GF2::zero();
+// GF2::zero() yields a read-only reference to zero
+
+static long GF2::modulus();
+// GF2::modulus() returns the value 2
+
+template<> class Vec<GF2>;
+// Forward declaration of the explicit specialization
+// of Vec<GF2>.  This specialization is defined in <NTL/vec_GF2.h>,
+// which must be included in source files that need to use Vec<GF2>.
+
+GF2::GF2(INIT_NO_ALLOC_TYPE);
+// provided for consistency with other classes, initialize to zero
+
+GF2::GF2(INIT_ALLOC_TYPE);
+// provided for consistency with other classes, initialize to zero
+
+GF2::allocate();
+// provided for consistency with other classes, no action
+
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/GF2.txt b/thirdparty/linux/ntl/doc/GF2.txt new file mode 100644 index 0000000000..e0eb7d8182 --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2.txt @@ -0,0 +1,241 @@ + + +/**************************************************************************\ + +MODULE: GF2 + +SUMMARY: + +The class GF2 represents the field GF(2). +Computationally speaking, it is not a particularly useful class. +Its main use is to make the interfaces to the various finite +field classes as uniform as possible. + +The header file for GF2 also declares the class ref_GF2, which +use used to represent non-const references to GF2's, such as +those obtained from indexing a vec_GF2, which "packs" GF2's +into words. + +There are implicit conversions from ref_GF2 to const GF2 +and from GF2& to ref_GF2. Therefore, if you want to declare +a function that takes a non-const reference to a GF2, you +should declare the parameter of type ref_GF2: this will +allow you to pass variables of type GF2 as well as +elements of vec_GF2's obtained through indexing. + +For all functions defined below which take a parameter of type +GF2&, there is also a function that takes a parameter of type ref_GF2. +Theoretically, we could have just defined the functions that take +the ref_GF2 parameter type, because of the implicit conversion +from GF2& to ref_GF2; however, for efficiency reasons, both +flavors are actually provided. It is recommended that higher-level +functions use the ref_GF2 type exclusively. + + +\**************************************************************************/ + +#include +#include + + +class GF2 { +public: + + GF2(); // initial value 0 + + GF2(const GF2& a); // copy constructor + explicit GF2(long a); // promotion constructor + + GF2& operator=(const GF2& a); // assignment + GF2& operator=(long a); // assignment + + // typedefs to aid in generic programming + typedef long rep_type; + typedef GF2Context context_type; + typedef GF2Bak bak_type; + typedef GF2Push push_type; + typedef GF2X poly_type; + +}; + + +long rep(GF2 a); // read-only access to representation of a + + + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(GF2 a, GF2 b); +long operator!=(GF2 a, GF2 b); + +long IsZero(GF2 a); // test for 0 +long IsOne(GF2 a); // test for 1 + +// PROMOTIONS: operators ==, != promote long to GF2 on (a, b). + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +GF2 operator+(GF2 a, GF2 b); +GF2 operator-(GF2 a, GF2 b); + +GF2 operator-(GF2 a); // unary - + +GF2& operator+=(GF2& x, GF2 a); +GF2& operator+=(GF2& x, long a); + +GF2& operator-=(GF2& x, GF2 a); +GF2& operator-=(GF2& x, long a); + +GF2& operator++(GF2& x); // prefix +void operator++(GF2& x, int); // postfix + +GF2& operator--(GF2& x); // prefix +void operator--(GF2& x, int); // postfix + +// procedural versions: + + +void add(GF2& x, GF2 a, GF2 b); // x = a + b +void sub(GF2& x, GF2 a, GF2 b); // x = a - b +void negate(GF2& x, GF2 a); // x = -a + +// PROMOTIONS: binary +, -, and procedures add, sub promote +// from long to GF2 on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +GF2 operator*(GF2 a, GF2 b); + +GF2& operator*=(GF2& x, GF2 a); +GF2& operator*=(GF2& x, long a); + +// procedural versions: + +void mul(GF2& x, GF2 a, GF2 b); // x = a * b + +void sqr(GF2& x, GF2 a); // x = a^2 +GF2 sqr(GF2 a); + +// PROMOTIONS: operator * and procedure mul promote from long to GF2 +// on (a, b). + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + +operator notation: + +GF2 operator/(z_p a, GF2 b); + +GF2& operator/=(GF2& x, GF2 a); +GF2& operator/=(GF2& x, long a); + +procedural versions: + +void div(GF2& x, GF2 a, GF2 b); +// x = a/b + +void inv(GF2& x, GF2 a); +GF2 inv(GF2 a); +// x = 1/a + +// PROMOTIONS: operator / and procedure div promote from long to GF2 +// on (a, b). + + +/**************************************************************************\ + + Exponentiation + +\**************************************************************************/ + + +void power(GF2& x, GF2 a, long e); // x = a^e (e may be negative) +GF2 power(GF2 a, long e); + + +/**************************************************************************\ + + Random Elements + +\**************************************************************************/ + + +void random(GF2& x); +GF2 random_GF2(); +// x = random element in GF2. Uses RandomBnd from ZZ. + + +/**************************************************************************\ + + Input/Output + +\**************************************************************************/ + + +ostream& operator<<(ostream& s, GF2 a); + +istream& operator>>(istream& s, GF2& x); +// a ZZ is read and reduced mod 2 + + +/**************************************************************************\ + + Miscellany + +\**************************************************************************/ + + +void clear(GF2& x); // x = 0 +void set(GF2& x); // x = 1 + +void GF2::swap(GF2& x); +void swap(GF2& x, GF2& y); +// swap + +static GF2 GF2::zero(); +// GF2::zero() yields a read-only reference to zero + +static long GF2::modulus(); +// GF2::modulus() returns the value 2 + +template<> class Vec; +// Forward declaration of the explicit specialization +// of Vec. This specialization is defined in , +// which must be included in source files that need to use Vec. + +GF2::GF2(INIT_NO_ALLOC_TYPE); +// provided for consistency with other classes, initialize to zero + +GF2::GF2(INIT_ALLOC_TYPE); +// provided for consistency with other classes, initialize to zero + +GF2::allocate(); +// provided for consistency with other classes, no action + + + diff --git a/thirdparty/linux/ntl/doc/GF2E.cpp.html b/thirdparty/linux/ntl/doc/GF2E.cpp.html new file mode 100644 index 0000000000..708a614ffd --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2E.cpp.html @@ -0,0 +1,419 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/GF2E.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: GF2E
+
+SUMMARY:
+
+The class GF2E is used to represent polynomials in F_2[X] modulo a
+polynomial P.  The modulus P may be any polynomial with deg(P) > 0,
+not necessarily irreducible.  
+
+Objects of the class GF2E are represented as a GF2X of degree < deg(P).
+
+An executing program maintains a "current modulus", which is set to P using
+GF2E::init(P).  The current modulus *must* be initialized before any operations
+on GF2E's are performed.  The modulus may be changed, and a mechanism is provided
+for saving and restoring a modulus (see classes GF2EPush and GF2EContext below).
+
+
+NOTE: if P is a trinomial X^n + X^k + 1, or a pentanomial
+X^n + X^k3 + X^k2 + X^k1 + 1, or of the form X^n + g, where
+g has low degree, then performance will be somewhat improved.
+Such polynomials are constructed by the routines
+BuildSparseIrred and BuildIrred in GF2XFactoring.
+
+
+\**************************************************************************/
+
+#include <NTL/GF2X.h>
+#include <NTL/SmartPtr.h>
+
+
+class GF2E {
+public:
+  
+   GF2E(); // initial value 0
+
+   GF2E(const GF2E& a); // copy constructor
+   explicit GF2E(GF2 a); // promotion constructor
+   explicit GF2E(long a); // promotion constructor
+  
+   GF2E& operator=(const GF2E& a); // assignment
+   GF2E& operator=(GF2 a); // assignment
+   GF2E& operator=(long a); // assignment
+  
+   ~GF2E(); // destructor
+
+   void init(const GF2X& P);
+   // GF2E::init(P) initializes the current modulus to P;
+   // required: deg(P) >= 1.
+  
+   static const GF2XModulus& modulus();
+   // GF2E::modulus() yields read-only reference to the current modulus
+
+   static long degree();
+   // GF2E::degree() returns deg(P)
+
+   // typedefs to aid generic programming
+   typedef GF2X rep_type;
+   typedef GF2EContext context_type;
+   typedef GF2EBak bak_type;
+   typedef GF2EPush push_type;
+   typedef GF2EX poly_type;
+
+};
+
+
+const GF2X& rep(const GF2E& a); // read-only access to representation of a
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+long operator==(const GF2E& a, const GF2E& b);
+long operator!=(const GF2E& a, const GF2E& b);
+
+long IsZero(const GF2E& a);  // test for 0
+long IsOne(const GF2E& a);  // test for 1
+
+// PROMOTIONS: ==, != promote {long, GF2} to GF2E on (a, b).
+
+
+/**************************************************************************\
+
+                                    Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2E operator+(const GF2E& a, const GF2E& b);
+
+GF2E operator-(const GF2E& a, const GF2E& b);
+GF2E operator-(const GF2E& a);
+
+GF2E& operator+=(GF2E& x, const GF2E& a);
+GF2E& operator+=(GF2E& x, GF2 a);
+GF2E& operator+=(GF2E& x, long a);
+
+GF2E& operator++(GF2E& x); // prefix
+void operator++(GF2E& x, int); // postfix
+
+GF2E& operator-=(GF2E& x, const GF2E& a);
+GF2E& operator-=(GF2E& x, GF2 a);
+GF2E& operator-=(GF2E& x, long a);
+
+GF2E& operator--(GF2E& x); // prefix
+void operator--(GF2E& x, int); // postfix
+
+// procedural versions:
+
+void add(GF2E& x, const GF2E& a, const GF2E& b); // x = a + b
+void sub(GF2E& x, const GF2E& a, const GF2E& b); // x = a - b = a + b
+void negate(GF2E& x, const GF2E& a); // x = - a = a
+
+// PROMOTIONS: +, -, add, sub promote {long, GF2} to GF2E on (a, b).
+
+
+/**************************************************************************\
+
+                                  Multiplication
+
+\**************************************************************************/
+
+
+// operator notation:
+
+GF2E operator*(const GF2E& a, const GF2E& b);
+
+GF2E& operator*=(GF2E& x, const GF2E& a);
+GF2E& operator*=(GF2E& x, GF2 a);
+GF2E& operator*=(GF2E& x, long a);
+
+// procedural versions:
+
+
+void mul(GF2E& x, const GF2E& a, const GF2E& b); // x = a * b
+
+void sqr(GF2E& x, const GF2E& a); // x = a^2
+GF2E sqr(const GF2E& a);
+
+// PROMOTIONS: *, mul promote {long, GF2} to GF2E on (a, b).
+
+
+/**************************************************************************\
+
+                                     Division
+
+\**************************************************************************/
+
+
+// operator notation:
+
+GF2E operator/(const GF2E& a, const GF2E& b);
+
+GF2E& operator/=(GF2E& x, const GF2E& a);
+GF2E& operator/=(GF2E& x, GF2 a);
+GF2E& operator/=(GF2E& x, long a);
+
+
+// procedural versions:
+
+void div(GF2E& x, const GF2E& a, const GF2E& b);
+// x = a/b.  If b is not invertible, an error is raised.
+
+void inv(GF2E& x, const GF2E& a);
+GF2E inv(const GF2E& a);
+// x = 1/a
+
+PROMOTIONS: /, div promote {long, GF2} to GF2E on (a, b).
+
+
+/**************************************************************************\
+
+                                  Exponentiation
+
+\**************************************************************************/
+
+
+
+void power(GF2E& x, const GF2E& a, const ZZ& e);
+GF2E power(const GF2E& a, const ZZ& e);
+
+void power(GF2E& x, const GF2E& a, long e);
+GF2E power(const GF2E& a, long e);
+
+// x = a^e (e may be negative)
+
+
+
+/**************************************************************************\
+
+                               Random Elements
+
+\**************************************************************************/
+
+
+void random(GF2E& x);
+GF2E random_GF2E();
+// x = random element in GF2E.
+
+/**************************************************************************\
+
+                                  Traces
+
+\**************************************************************************/
+
+
+void trace(GF2& x, const GF2E& a);  // x = trace of a
+GF2 trace(const GF2E& a);
+
+
+
+/**************************************************************************\
+
+                                Input/Output
+
+\**************************************************************************/
+
+
+ostream& operator<<(ostream& s, const GF2E& a);
+
+istream& operator>>(istream& s, GF2E& x);
+// a GF2X is read and reduced mod p
+
+
+/**************************************************************************\
+
+                       Modulus Switching
+
+A class GF2EPush is provided for "backing up" the current modulus
+and installing a new one.
+
+Here is what you do to save the current modulus, temporarily
+set it to P, and automatically restore it:
+
+   {
+      GF2EPush push(P);
+
+      ...
+
+   }
+
+The constructor for push will save the current modulus, and install P as the
+current modulus.  The destructor for push will restore the old modulus when the
+scope enclosing it exits.  This is the so-called RAII (resource acquisition is
+initialization) paradigm.
+
+You could also do the following:
+
+   {
+      GF2EPush push; // just backup current modulus
+
+        ...
+
+      GF2E::init(P1); // install P1
+
+        ...
+
+      GF2E::init(P2); // install P2
+
+      // reinstall original modulus as close of scope
+   }
+
+      
+The GF2EPush interface is good for implementing simple stack-like
+modulus "context switching".  For more general context switching,
+see GF2EContext below.  There is also an older GF2EBak class
+that may also be useful.
+
+..........................................................................
+
+It is critical that GF2E objects created under one GF2E modulus are not used in
+any non-trivial way "out of context", i.e., under a different (or undefined)
+GF2E modulus.  However, for ease-of-use, some operations may be safely
+performed out of context.  These safe operations include: the default and copy
+constructor, the destructor, and the assignment operator.  In addition is is
+generally safe to read any GF2E object out of context (i.e., printing it out, or
+fetching its underlying representive using the rep() function).
+
+Any unsafe uses out of context are not in general checked, and may
+lead to unpredictable behavior.
+
+
+NOTE: the implementation of Vec<GF2E> is specialized to manage memory more
+efficiently than in the default implementation of Vec<T>.  Specifically,
+contiguous elements in a Vec<GF2E> are allocated in a contiguous region of
+memory.  This reduces the number of calls to the memory allocator, and --- more
+significantly --- leads to greater locality of reference.  A consequence of
+this implementation is that any calls to SetLength on a Vec<GF2E> object will
+need to use information about the current modulus, and so such calls should
+only be done "in context".  That said, it is still safe to construct a
+Vec<GF2E> using the default or copy contructor, and to assign or append one
+Vec<GF2E> to another "out of context".
+
+\**************************************************************************/
+
+
+// A convenient interface for common cases
+
+class GF2EPush {
+
+public:
+GF2EPush();  // backup current modulus
+explicit GF2EPush(const GF2X& P);
+explicit GF2EPush(const GF2EContext& context);
+  // backup current modulus and install the given one
+
+private:
+GF2EPush(const GF2EPush&); // disabled
+void operator=(const GF2EPush&); // disabled
+
+};
+
+
+
+// more general context switching:
+// A GF2EContext object has a modulus Q (possibly "null"),
+
+class GF2EContext {
+
+
+public:
+
+GF2EContext(); // Q = "null"
+explicit GF2EContext(const GF2X& P); // Q = P
+
+void save(); // Q = CurrentModulus
+void restore() const; // CurrentModulus = Q
+
+GF2EContext(const GF2EContext&);  // copy
+GF2EContext& operator=(const GF2EContext&); // assignment
+~GF2EContext(); // destructor
+
+
+};
+
+
+// An older interface:
+// To describe this logic, think of a GF2EBak object
+// of having two components: a modulus Q (possibly "null") and
+// an "auto-restore bit" b.
+
+
+class GF2EBak {
+public:
+
+
+   GF2EBak();  // Q = "null", b = 0
+
+   ~GF2EBak();  // if (b) CurrentModulus = Q
+
+   void save();  // Q = CurrentModulus, b = 1
+   void restore();  // CurrentModulus = Q, b = 0
+
+
+private:
+   GF2EBak(const GF2EBak&);  // copy disabled
+   void operator=(const GF2EBak&);  // assignment disabled
+};
+
+
+
+
+
+
+/**************************************************************************\
+
+                               Miscellany
+
+\**************************************************************************/
+
+void clear(GF2E& x); // x = 0
+void set(GF2E& x); // x = 1
+
+static const GF2E& GF2E::zero();
+// GF2E::zero() yields a read-only reference to zero
+
+static long GF2X::WordLength();
+// GF2E::size() returns # of words needed to store a polynomial of
+// degree < GF2E::degree()
+
+void GF2E::swap(GF2E& x);
+void swap(GF2E& x, GF2E& y);
+// swap (done by "pointer swapping", if possible).
+
+static ZZ& GF2E::cardinality();
+// yields the cardinality, i.e., 2^{GF2E::degree()}
+
+
+GF2E::GF2E(INIT_NO_ALLOC_TYPE);
+// special constructor: invoke as GF2E x(INIT_NO_ALLOC);
+// initializes x to 0, but allocates no space (this is now the default)
+
+GF2E::GF2E(INIT_ALLOC_TYPE);
+// special constructor: invoke as GF2E x(INIT_ALLOC);
+// initializes x to 0, but allocates space
+
+GF2E::allocate();
+// useful in conjunction with the INIT_NO_ALLLOC constructor:
+// x.allocate() will pre-allocate space for x, using the
+// current modulus
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/GF2E.txt b/thirdparty/linux/ntl/doc/GF2E.txt new file mode 100644 index 0000000000..6f96e1baeb --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2E.txt @@ -0,0 +1,409 @@ + + +/**************************************************************************\ + +MODULE: GF2E + +SUMMARY: + +The class GF2E is used to represent polynomials in F_2[X] modulo a +polynomial P. The modulus P may be any polynomial with deg(P) > 0, +not necessarily irreducible. + +Objects of the class GF2E are represented as a GF2X of degree < deg(P). + +An executing program maintains a "current modulus", which is set to P using +GF2E::init(P). The current modulus *must* be initialized before any operations +on GF2E's are performed. The modulus may be changed, and a mechanism is provided +for saving and restoring a modulus (see classes GF2EPush and GF2EContext below). + + +NOTE: if P is a trinomial X^n + X^k + 1, or a pentanomial +X^n + X^k3 + X^k2 + X^k1 + 1, or of the form X^n + g, where +g has low degree, then performance will be somewhat improved. +Such polynomials are constructed by the routines +BuildSparseIrred and BuildIrred in GF2XFactoring. + + +\**************************************************************************/ + +#include +#include + + +class GF2E { +public: + + GF2E(); // initial value 0 + + GF2E(const GF2E& a); // copy constructor + explicit GF2E(GF2 a); // promotion constructor + explicit GF2E(long a); // promotion constructor + + GF2E& operator=(const GF2E& a); // assignment + GF2E& operator=(GF2 a); // assignment + GF2E& operator=(long a); // assignment + + ~GF2E(); // destructor + + void init(const GF2X& P); + // GF2E::init(P) initializes the current modulus to P; + // required: deg(P) >= 1. + + static const GF2XModulus& modulus(); + // GF2E::modulus() yields read-only reference to the current modulus + + static long degree(); + // GF2E::degree() returns deg(P) + + // typedefs to aid generic programming + typedef GF2X rep_type; + typedef GF2EContext context_type; + typedef GF2EBak bak_type; + typedef GF2EPush push_type; + typedef GF2EX poly_type; + +}; + + +const GF2X& rep(const GF2E& a); // read-only access to representation of a + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + +long operator==(const GF2E& a, const GF2E& b); +long operator!=(const GF2E& a, const GF2E& b); + +long IsZero(const GF2E& a); // test for 0 +long IsOne(const GF2E& a); // test for 1 + +// PROMOTIONS: ==, != promote {long, GF2} to GF2E on (a, b). + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +GF2E operator+(const GF2E& a, const GF2E& b); + +GF2E operator-(const GF2E& a, const GF2E& b); +GF2E operator-(const GF2E& a); + +GF2E& operator+=(GF2E& x, const GF2E& a); +GF2E& operator+=(GF2E& x, GF2 a); +GF2E& operator+=(GF2E& x, long a); + +GF2E& operator++(GF2E& x); // prefix +void operator++(GF2E& x, int); // postfix + +GF2E& operator-=(GF2E& x, const GF2E& a); +GF2E& operator-=(GF2E& x, GF2 a); +GF2E& operator-=(GF2E& x, long a); + +GF2E& operator--(GF2E& x); // prefix +void operator--(GF2E& x, int); // postfix + +// procedural versions: + +void add(GF2E& x, const GF2E& a, const GF2E& b); // x = a + b +void sub(GF2E& x, const GF2E& a, const GF2E& b); // x = a - b = a + b +void negate(GF2E& x, const GF2E& a); // x = - a = a + +// PROMOTIONS: +, -, add, sub promote {long, GF2} to GF2E on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + + +// operator notation: + +GF2E operator*(const GF2E& a, const GF2E& b); + +GF2E& operator*=(GF2E& x, const GF2E& a); +GF2E& operator*=(GF2E& x, GF2 a); +GF2E& operator*=(GF2E& x, long a); + +// procedural versions: + + +void mul(GF2E& x, const GF2E& a, const GF2E& b); // x = a * b + +void sqr(GF2E& x, const GF2E& a); // x = a^2 +GF2E sqr(const GF2E& a); + +// PROMOTIONS: *, mul promote {long, GF2} to GF2E on (a, b). + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + + +// operator notation: + +GF2E operator/(const GF2E& a, const GF2E& b); + +GF2E& operator/=(GF2E& x, const GF2E& a); +GF2E& operator/=(GF2E& x, GF2 a); +GF2E& operator/=(GF2E& x, long a); + + +// procedural versions: + +void div(GF2E& x, const GF2E& a, const GF2E& b); +// x = a/b. If b is not invertible, an error is raised. + +void inv(GF2E& x, const GF2E& a); +GF2E inv(const GF2E& a); +// x = 1/a + +PROMOTIONS: /, div promote {long, GF2} to GF2E on (a, b). + + +/**************************************************************************\ + + Exponentiation + +\**************************************************************************/ + + + +void power(GF2E& x, const GF2E& a, const ZZ& e); +GF2E power(const GF2E& a, const ZZ& e); + +void power(GF2E& x, const GF2E& a, long e); +GF2E power(const GF2E& a, long e); + +// x = a^e (e may be negative) + + + +/**************************************************************************\ + + Random Elements + +\**************************************************************************/ + + +void random(GF2E& x); +GF2E random_GF2E(); +// x = random element in GF2E. + +/**************************************************************************\ + + Traces + +\**************************************************************************/ + + +void trace(GF2& x, const GF2E& a); // x = trace of a +GF2 trace(const GF2E& a); + + + +/**************************************************************************\ + + Input/Output + +\**************************************************************************/ + + +ostream& operator<<(ostream& s, const GF2E& a); + +istream& operator>>(istream& s, GF2E& x); +// a GF2X is read and reduced mod p + + +/**************************************************************************\ + + Modulus Switching + +A class GF2EPush is provided for "backing up" the current modulus +and installing a new one. + +Here is what you do to save the current modulus, temporarily +set it to P, and automatically restore it: + + { + GF2EPush push(P); + + ... + + } + +The constructor for push will save the current modulus, and install P as the +current modulus. The destructor for push will restore the old modulus when the +scope enclosing it exits. This is the so-called RAII (resource acquisition is +initialization) paradigm. + +You could also do the following: + + { + GF2EPush push; // just backup current modulus + + ... + + GF2E::init(P1); // install P1 + + ... + + GF2E::init(P2); // install P2 + + // reinstall original modulus as close of scope + } + + +The GF2EPush interface is good for implementing simple stack-like +modulus "context switching". For more general context switching, +see GF2EContext below. There is also an older GF2EBak class +that may also be useful. + +.......................................................................... + +It is critical that GF2E objects created under one GF2E modulus are not used in +any non-trivial way "out of context", i.e., under a different (or undefined) +GF2E modulus. However, for ease-of-use, some operations may be safely +performed out of context. These safe operations include: the default and copy +constructor, the destructor, and the assignment operator. In addition is is +generally safe to read any GF2E object out of context (i.e., printing it out, or +fetching its underlying representive using the rep() function). + +Any unsafe uses out of context are not in general checked, and may +lead to unpredictable behavior. + + +NOTE: the implementation of Vec is specialized to manage memory more +efficiently than in the default implementation of Vec. Specifically, +contiguous elements in a Vec are allocated in a contiguous region of +memory. This reduces the number of calls to the memory allocator, and --- more +significantly --- leads to greater locality of reference. A consequence of +this implementation is that any calls to SetLength on a Vec object will +need to use information about the current modulus, and so such calls should +only be done "in context". That said, it is still safe to construct a +Vec using the default or copy contructor, and to assign or append one +Vec to another "out of context". + +\**************************************************************************/ + + +// A convenient interface for common cases + +class GF2EPush { + +public: +GF2EPush(); // backup current modulus +explicit GF2EPush(const GF2X& P); +explicit GF2EPush(const GF2EContext& context); + // backup current modulus and install the given one + +private: +GF2EPush(const GF2EPush&); // disabled +void operator=(const GF2EPush&); // disabled + +}; + + + +// more general context switching: +// A GF2EContext object has a modulus Q (possibly "null"), + +class GF2EContext { + + +public: + +GF2EContext(); // Q = "null" +explicit GF2EContext(const GF2X& P); // Q = P + +void save(); // Q = CurrentModulus +void restore() const; // CurrentModulus = Q + +GF2EContext(const GF2EContext&); // copy +GF2EContext& operator=(const GF2EContext&); // assignment +~GF2EContext(); // destructor + + +}; + + +// An older interface: +// To describe this logic, think of a GF2EBak object +// of having two components: a modulus Q (possibly "null") and +// an "auto-restore bit" b. + + +class GF2EBak { +public: + + + GF2EBak(); // Q = "null", b = 0 + + ~GF2EBak(); // if (b) CurrentModulus = Q + + void save(); // Q = CurrentModulus, b = 1 + void restore(); // CurrentModulus = Q, b = 0 + + +private: + GF2EBak(const GF2EBak&); // copy disabled + void operator=(const GF2EBak&); // assignment disabled +}; + + + + + + +/**************************************************************************\ + + Miscellany + +\**************************************************************************/ + +void clear(GF2E& x); // x = 0 +void set(GF2E& x); // x = 1 + +static const GF2E& GF2E::zero(); +// GF2E::zero() yields a read-only reference to zero + +static long GF2X::WordLength(); +// GF2E::size() returns # of words needed to store a polynomial of +// degree < GF2E::degree() + +void GF2E::swap(GF2E& x); +void swap(GF2E& x, GF2E& y); +// swap (done by "pointer swapping", if possible). + +static ZZ& GF2E::cardinality(); +// yields the cardinality, i.e., 2^{GF2E::degree()} + + +GF2E::GF2E(INIT_NO_ALLOC_TYPE); +// special constructor: invoke as GF2E x(INIT_NO_ALLOC); +// initializes x to 0, but allocates no space (this is now the default) + +GF2E::GF2E(INIT_ALLOC_TYPE); +// special constructor: invoke as GF2E x(INIT_ALLOC); +// initializes x to 0, but allocates space + +GF2E::allocate(); +// useful in conjunction with the INIT_NO_ALLLOC constructor: +// x.allocate() will pre-allocate space for x, using the +// current modulus + + diff --git a/thirdparty/linux/ntl/doc/GF2EX.cpp.html b/thirdparty/linux/ntl/doc/GF2EX.cpp.html new file mode 100644 index 0000000000..333e3a740f --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2EX.cpp.html @@ -0,0 +1,899 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/GF2EX.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: GF2EX
+
+SUMMARY:
+
+The class GF2EX represents polynomials over GF2E,
+and so can be used, for example, for arithmentic in GF(2^n)[X].
+However, except where mathematically necessary (e.g., GCD computations),
+GF2E need not be a field.
+
+\**************************************************************************/
+
+#include <NTL/GF2E.h>
+#include <NTL/vec_GF2E.h>
+
+class GF2EX {
+public:
+
+   GF2EX(); // initial value 0
+
+   GF2EX(const GF2EX& a); // copy
+   explicit GF2EX(const GF2E& a);  // promotion
+   explicit GF2EX(GF2 a);
+   explicit GF2EX(long a);
+
+   GF2EX& operator=(const GF2EX& a); // assignment
+   GF2EX& operator=(const GF2E& a);
+   GF2EX& operator=(GF2 a);
+   GF2EX& operator=(long a);
+
+   ~GF2EX(); // destructor
+
+   GF2EX(INIT_MONO_TYPE, long i, const GF2E& c);
+   GF2EX(INIT_MONO_TYPE, long i, GF2 c);
+   GF2EX(INIT_MONO_TYPE, long i, long c);
+   // initialize to c*X^i, invoke as GF2EX(INIT_MONO, i)
+
+   GF2EX(INIT_MONO_TYPE, long i);
+   // initialize to X^i, invoke as GF2EX(INIT_MONO, i)
+
+
+
+
+   // typedefs to aid in generic programming
+
+   typedef GF2E coeff_type;
+   typedef GF2EXModulus modulus_type;
+   // ...
+
+};
+
+
+
+
+
+/**************************************************************************\
+
+                              Accessing coefficients
+
+The degree of a polynomial f is obtained as deg(f),
+where the zero polynomial, by definition, has degree -1.
+
+A polynomial f is represented as a coefficient vector.
+Coefficients may be accesses in one of two ways.
+
+The safe, high-level method is to call the function
+coeff(f, i) to get the coefficient of X^i in the polynomial f,
+and to call the function SetCoeff(f, i, a) to set the coefficient
+of X^i in f to the scalar a.
+
+One can also access the coefficients more directly via a lower level
+interface.  The coefficient of X^i in f may be accessed using
+subscript notation f[i].  In addition, one may write f.SetLength(n)
+to set the length of the underlying coefficient vector to n,
+and f.SetMaxLength(n) to allocate space for n coefficients,
+without changing the coefficient vector itself.
+
+After setting coefficients using this low-level interface,
+one must ensure that leading zeros in the coefficient vector
+are stripped afterwards by calling the function f.normalize().
+
+NOTE: the coefficient vector of f may also be accessed directly
+as f.rep; however, this is not recommended. Also, for a properly
+normalized polynomial f, we have f.rep.length() == deg(f)+1,
+and deg(f) >= 0  =>  f.rep[deg(f)] != 0.
+
+\**************************************************************************/
+
+
+
+long deg(const GF2EX& a);  // return deg(a); deg(0) == -1.
+
+const GF2E& coeff(const GF2EX& a, long i);
+// returns the coefficient of X^i, or zero if i not in range
+
+const GF2E& LeadCoeff(const GF2EX& a);
+// returns leading term of a, or zero if a == 0
+
+const GF2E& ConstTerm(const GF2EX& a);
+// returns constant term of a, or zero if a == 0
+
+void SetCoeff(GF2EX& x, long i, const GF2E& a);
+void SetCoeff(GF2EX& x, long i, GF2 a);
+void SetCoeff(GF2EX& x, long i, long a);
+// makes coefficient of X^i equal to a; error is raised if i < 0
+
+void SetCoeff(GF2EX& x, long i);
+// makes coefficient of X^i equal to 1;  error is raised if i < 0
+
+void SetX(GF2EX& x); // x is set to the monomial X
+
+long IsX(const GF2EX& a); // test if x = X
+
+
+
+
+GF2E& GF2EX::operator[](long i);
+const GF2E& GF2EX::operator[](long i) const;
+// indexing operators: f[i] is the coefficient of X^i ---
+// i should satsify i >= 0 and i <= deg(f).
+// No range checking (unless NTL_RANGE_CHECK is defined).
+
+void GF2EX::SetLength(long n);
+// f.SetLength(n) sets the length of the inderlying coefficient
+// vector to n --- after this call, indexing f[i] for i = 0..n-1
+// is valid.
+
+void GF2EX::normalize();  
+// f.normalize() strips leading zeros from coefficient vector of f
+
+void GF2EX::SetMaxLength(long n);
+// f.SetMaxLength(n) pre-allocate spaces for n coefficients.  The
+// polynomial that f represents is unchanged.
+
+
+
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(const GF2EX& a, const GF2EX& b);
+long operator!=(const GF2EX& a, const GF2EX& b);
+
+long IsZero(const GF2EX& a); // test for 0
+long IsOne(const GF2EX& a); // test for 1
+
+// PROMOTIONS: ==, != promote {long,GF2,GF2E} to GF2EX on (a, b).
+
+/**************************************************************************\
+
+                                   Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2EX operator+(const GF2EX& a, const GF2EX& b);
+GF2EX operator-(const GF2EX& a, const GF2EX& b);
+GF2EX operator-(const GF2EX& a);
+
+GF2EX& operator+=(GF2EX& x, const GF2EX& a);
+GF2EX& operator+=(GF2EX& x, const GF2E& a);
+GF2EX& operator+=(GF2EX& x, GF2 a);
+GF2EX& operator+=(GF2EX& x, long a);
+
+
+GF2EX& operator++(GF2EX& x);  // prefix
+void operator++(GF2EX& x, int);  // postfix
+
+GF2EX& operator-=(GF2EX& x, const GF2EX& a);
+GF2EX& operator-=(GF2EX& x, const GF2E& a);
+GF2EX& operator-=(GF2EX& x, GF2 a);
+GF2EX& operator-=(GF2EX& x, long a);
+
+GF2EX& operator--(GF2EX& x);  // prefix
+void operator--(GF2EX& x, int);  // postfix
+
+// procedural versions:
+
+void add(GF2EX& x, const GF2EX& a, const GF2EX& b); // x = a + b
+void sub(GF2EX& x, const GF2EX& a, const GF2EX& b); // x = a - b
+void negate(GF2EX& x, const GF2EX& a); // x = - a
+
+// PROMOTIONS: +, -, add, sub promote {long,GF2,GF2E} to GF2EX on (a, b).
+
+
+
+/**************************************************************************\
+
+                               Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2EX operator*(const GF2EX& a, const GF2EX& b);
+
+GF2EX& operator*=(GF2EX& x, const GF2EX& a);
+GF2EX& operator*=(GF2EX& x, const GF2E& a);
+GF2EX& operator*=(GF2EX& x, GF2 a);
+GF2EX& operator*=(GF2EX& x, long a);
+
+
+// procedural versions:
+
+
+void mul(GF2EX& x, const GF2EX& a, const GF2EX& b); // x = a * b
+
+void sqr(GF2EX& x, const GF2EX& a); // x = a^2
+GF2EX sqr(const GF2EX& a);
+
+// PROMOTIONS: *, mul promote {long,GF2,GF2E} to GF2EX on (a, b).
+
+void power(GF2EX& x, const GF2EX& a, long e);  // x = a^e (e >= 0)
+GF2EX power(const GF2EX& a, long e);
+
+
+/**************************************************************************\
+
+                               Shift Operations
+
+LeftShift by n means multiplication by X^n
+RightShift by n means division by X^n
+
+A negative shift amount reverses the direction of the shift.
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2EX operator<<(const GF2EX& a, long n);
+GF2EX operator>>(const GF2EX& a, long n);
+
+GF2EX& operator<<=(GF2EX& x, long n);
+GF2EX& operator>>=(GF2EX& x, long n);
+
+// procedural versions:
+
+void LeftShift(GF2EX& x, const GF2EX& a, long n);
+GF2EX LeftShift(const GF2EX& a, long n);
+
+void RightShift(GF2EX& x, const GF2EX& a, long n);
+GF2EX RightShift(const GF2EX& a, long n);
+
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2EX operator/(const GF2EX& a, const GF2EX& b);
+GF2EX operator/(const GF2EX& a, const GF2E& b);
+GF2EX operator/(const GF2EX& a, GF2 b);
+GF2EX operator/(const GF2EX& a, long b);
+
+GF2EX operator%(const GF2EX& a, const GF2EX& b);
+
+GF2EX& operator/=(GF2EX& x, const GF2EX& a);
+GF2EX& operator/=(GF2EX& x, const GF2E& a);
+GF2EX& operator/=(GF2EX& x, GF2 a);
+GF2EX& operator/=(GF2EX& x, long a);
+
+GF2EX& operator%=(GF2EX& x, const GF2EX& a);
+
+// procedural versions:
+
+
+void DivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EX& b);
+// q = a/b, r = a%b
+
+void div(GF2EX& q, const GF2EX& a, const GF2EX& b);
+void div(GF2EX& q, const GF2EX& a, const GF2E& b);
+void div(GF2EX& q, const GF2EX& a, GF2 b);
+void div(GF2EX& q, const GF2EX& a, long b);
+// q = a/b
+
+void rem(GF2EX& r, const GF2EX& a, const GF2EX& b);
+// r = a%b
+
+long divide(GF2EX& q, const GF2EX& a, const GF2EX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+long divide(const GF2EX& a, const GF2EX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+
+/**************************************************************************\
+
+                                   GCD's
+
+These routines are intended for use when GF2E is a field.
+
+\**************************************************************************/
+
+
+void GCD(GF2EX& x, const GF2EX& a, const GF2EX& b);
+GF2EX GCD(const GF2EX& a, const GF2EX& b);
+// x = GCD(a, b),  x is always monic (or zero if a==b==0).
+
+
+void XGCD(GF2EX& d, GF2EX& s, GF2EX& t, const GF2EX& a, const GF2EX& b);
+// d = gcd(a,b), a s + b t = d
+
+
+/**************************************************************************\
+
+                                  Input/Output
+
+I/O format:
+
+   [a_0 a_1 ... a_n],
+
+represents the polynomial a_0 + a_1*X + ... + a_n*X^n.
+
+On output, all coefficients will be polynomials of degree < GF2E::degree() and
+a_n not zero (the zero polynomial is [ ]).  On input, the coefficients
+are arbitrary polynomials which are reduced modulo GF2E::modulus(), and leading
+zeros stripped.
+
+\**************************************************************************/
+
+istream& operator>>(istream& s, GF2EX& x);
+ostream& operator<<(ostream& s, const GF2EX& a);
+
+
+/**************************************************************************\
+
+                              Some utility routines
+
+\**************************************************************************/
+
+
+void diff(GF2EX& x, const GF2EX& a); // x = derivative of a
+GF2EX diff(const GF2EX& a);
+
+void MakeMonic(GF2EX& x);
+// if x != 0 makes x into its monic associate; LeadCoeff(x) must be
+// invertible in this case
+
+void reverse(GF2EX& x, const GF2EX& a, long hi);
+GF2EX reverse(const GF2EX& a, long hi);
+
+void reverse(GF2EX& x, const GF2EX& a);
+GF2EX reverse(const GF2EX& a);
+
+// x = reverse of a[0]..a[hi] (hi >= -1);
+// hi defaults to deg(a) in second version
+
+void VectorCopy(vec_GF2E& x, const GF2EX& a, long n);
+vec_GF2E VectorCopy(const GF2EX& a, long n);
+// x = copy of coefficient vector of a of length exactly n.
+// input is truncated or padded with zeroes as appropriate.
+
+
+
+
+/**************************************************************************\
+
+                             Random Polynomials
+
+\**************************************************************************/
+
+void random(GF2EX& x, long n);
+GF2EX random_GF2EX(long n);
+// x = random polynomial of degree < n
+
+
+/**************************************************************************\
+
+                    Polynomial Evaluation and related problems
+
+\**************************************************************************/
+
+
+void BuildFromRoots(GF2EX& x, const vec_GF2E& a);
+GF2EX BuildFromRoots(const vec_GF2E& a);
+// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length()
+
+void eval(GF2E& b, const GF2EX& f, const GF2E& a);
+GF2E eval(const GF2EX& f, const GF2E& a);
+// b = f(a)
+
+void eval(GF2E& b, const GF2X& f, const GF2E& a);
+GF2E eval(const GF2EX& f, const GF2E& a);
+// b = f(a); uses ModComp algorithm for GF2X
+
+void eval(vec_GF2E& b, const GF2EX& f, const vec_GF2E& a);
+vec_GF2E eval(const GF2EX& f, const vec_GF2E& a);
+//  b.SetLength(a.length()); b[i] = f(a[i]) for 0 <= i < a.length()
+
+void interpolate(GF2EX& f, const vec_GF2E& a, const vec_GF2E& b);
+GF2EX interpolate(const vec_GF2E& a, const vec_GF2E& b);
+// interpolates the polynomial f satisfying f(a[i]) = b[i].  
+
+/**************************************************************************\
+
+                       Arithmetic mod X^n
+
+Required: n >= 0; otherwise, an error is raised.
+
+\**************************************************************************/
+
+void trunc(GF2EX& x, const GF2EX& a, long n); // x = a % X^n
+GF2EX trunc(const GF2EX& a, long n);
+
+void MulTrunc(GF2EX& x, const GF2EX& a, const GF2EX& b, long n);
+GF2EX MulTrunc(const GF2EX& a, const GF2EX& b, long n);
+// x = a * b % X^n
+
+void SqrTrunc(GF2EX& x, const GF2EX& a, long n);
+GF2EX SqrTrunc(const GF2EX& a, long n);
+// x = a^2 % X^n
+
+void InvTrunc(GF2EX& x, const GF2EX& a, long n);
+GF2EX InvTrunc(GF2EX& x, const GF2EX& a, long n);
+// computes x = a^{-1} % X^m.  Must have ConstTerm(a) invertible.
+
+/**************************************************************************\
+
+                Modular Arithmetic (without pre-conditioning)
+
+Arithmetic mod f.
+
+All inputs and outputs are polynomials of degree less than deg(f), and
+deg(f) > 0.
+
+
+NOTE: if you want to do many computations with a fixed f, use the
+GF2EXModulus data structure and associated routines below for better
+performance.
+
+\**************************************************************************/
+
+void MulMod(GF2EX& x, const GF2EX& a, const GF2EX& b, const GF2EX& f);
+GF2EX MulMod(const GF2EX& a, const GF2EX& b, const GF2EX& f);
+// x = (a * b) % f
+
+void SqrMod(GF2EX& x, const GF2EX& a, const GF2EX& f);
+GF2EX SqrMod(const GF2EX& a, const GF2EX& f);
+// x = a^2 % f
+
+void MulByXMod(GF2EX& x, const GF2EX& a, const GF2EX& f);
+GF2EX MulByXMod(const GF2EX& a, const GF2EX& f);
+// x = (a * X) mod f
+
+void InvMod(GF2EX& x, const GF2EX& a, const GF2EX& f);
+GF2EX InvMod(const GF2EX& a, const GF2EX& f);
+// x = a^{-1} % f, error is a is not invertible
+
+long InvModStatus(GF2EX& x, const GF2EX& a, const GF2EX& f);
+// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise,
+// returns 1 and sets x = (a, f)
+
+
+/**************************************************************************\
+
+                     Modular Arithmetic with Pre-Conditioning
+
+If you need to do a lot of arithmetic modulo a fixed f, build
+GF2EXModulus F for f.  This pre-computes information about f that
+speeds up subsequent computations.
+
+As an example, the following routine the product modulo f of a vector
+of polynomials.
+
+#include <NTL/GF2EX.h>
+
+void product(GF2EX& x, const vec_GF2EX& v, const GF2EX& f)
+{
+   GF2EXModulus F(f);
+   GF2EX res;
+   res = 1;
+   long i;
+   for (i = 0; i < v.length(); i++)
+      MulMod(res, res, v[i], F);
+   x = res;
+}
+
+NOTE: A GF2EX may be used wherever a GF2EXModulus is required,
+and a GF2EXModulus may be used wherever a GF2EX is required.
+
+
+\**************************************************************************/
+
+class GF2EXModulus {
+public:
+   GF2EXModulus(); // initially in an unusable state
+
+   GF2EXModulus(const GF2EX& f); // initialize with f, deg(f) > 0
+
+   GF2EXModulus(const GF2EXModulus&); // copy
+
+   GF2EXModulus& operator=(const GF2EXModulus&); // assignment
+
+   ~GF2EXModulus(); // destructor
+
+   operator const GF2EX& () const; // implicit read-only access to f
+
+   const GF2EX& val() const; // explicit read-only access to f
+};
+
+void build(GF2EXModulus& F, const GF2EX& f);
+// pre-computes information about f and stores it in F.  Must have
+// deg(f) > 0.  Note that the declaration GF2EXModulus F(f) is
+// equivalent to GF2EXModulus F; build(F, f).
+
+// In the following, f refers to the polynomial f supplied to the
+// build routine, and n = deg(f).
+
+
+long deg(const GF2EXModulus& F);  // return n=deg(f)
+
+void MulMod(GF2EX& x, const GF2EX& a, const GF2EX& b, const GF2EXModulus& F);
+GF2EX MulMod(const GF2EX& a, const GF2EX& b, const GF2EXModulus& F);
+// x = (a * b) % f; deg(a), deg(b) < n
+
+void SqrMod(GF2EX& x, const GF2EX& a, const GF2EXModulus& F);
+GF2EX SqrMod(const GF2EX& a, const GF2EXModulus& F);
+// x = a^2 % f; deg(a) < n
+
+void PowerMod(GF2EX& x, const GF2EX& a, const ZZ& e, const GF2EXModulus& F);
+GF2EX PowerMod(const GF2EX& a, const ZZ& e, const GF2EXModulus& F);
+
+void PowerMod(GF2EX& x, const GF2EX& a, long e, const GF2EXModulus& F);
+GF2EX PowerMod(const GF2EX& a, long e, const GF2EXModulus& F);
+
+// x = a^e % f; e >= 0, deg(a) < n.  Uses a sliding window algorithm.
+// (e may be negative)
+
+void PowerXMod(GF2EX& x, const ZZ& e, const GF2EXModulus& F);
+GF2EX PowerXMod(const ZZ& e, const GF2EXModulus& F);
+
+void PowerXMod(GF2EX& x, long e, const GF2EXModulus& F);
+GF2EX PowerXMod(long e, const GF2EXModulus& F);
+
+// x = X^e % f (e may be negative)
+
+void rem(GF2EX& x, const GF2EX& a, const GF2EXModulus& F);
+// x = a % f
+
+void DivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EXModulus& F);
+// q = a/f, r = a%f
+
+void div(GF2EX& q, const GF2EX& a, const GF2EXModulus& F);
+// q = a/f
+
+// operator notation:
+
+GF2EX operator/(const GF2EX& a, const GF2EXModulus& F);
+GF2EX operator%(const GF2EX& a, const GF2EXModulus& F);
+
+GF2EX& operator/=(GF2EX& x, const GF2EXModulus& F);
+GF2EX& operator%=(GF2EX& x, const GF2EXModulus& F);
+
+
+
+/**************************************************************************\
+
+                             vectors of GF2EX's
+
+\**************************************************************************/
+
+typedef Vec<GF2EX> vec_GF2EX; // backward compatibility
+
+
+
+/**************************************************************************\
+
+                              Modular Composition
+
+Modular composition is the problem of computing g(h) mod f for
+polynomials f, g, and h.
+
+The algorithm employed is that of Brent & Kung (Fast algorithms for
+manipulating formal power series, JACM 25:581-595, 1978), which uses
+O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar
+operations.
+
+
+\**************************************************************************/
+
+void CompMod(GF2EX& x, const GF2EX& g, const GF2EX& h, const GF2EXModulus& F);
+GF2EX CompMod(const GF2EX& g, const GF2EX& h,
+                    const GF2EXModulus& F);
+
+// x = g(h) mod f; deg(h) < n
+
+void Comp2Mod(GF2EX& x1, GF2EX& x2, const GF2EX& g1, const GF2EX& g2,
+              const GF2EX& h, const GF2EXModulus& F);
+// xi = gi(h) mod f (i=1,2); deg(h) < n.
+
+
+void Comp3Mod(GF2EX& x1, GF2EX& x2, GF2EX& x3,
+              const GF2EX& g1, const GF2EX& g2, const GF2EX& g3,
+              const GF2EX& h, const GF2EXModulus& F);
+// xi = gi(h) mod f (i=1..3); deg(h) < n.
+
+
+
+/**************************************************************************\
+
+                     Composition with Pre-Conditioning
+
+If a single h is going to be used with many g's then you should build
+a GF2EXArgument for h, and then use the compose routine below.  The
+routine build computes and stores h, h^2, ..., h^m mod f.  After this
+pre-computation, composing a polynomial of degree roughly n with h
+takes n/m multiplies mod f, plus n^2 scalar multiplies.  Thus,
+increasing m increases the space requirement and the pre-computation
+time, but reduces the composition time.
+
+\**************************************************************************/
+
+
+struct GF2EXArgument {
+   vec_GF2EX H;
+};
+
+void build(GF2EXArgument& H, const GF2EX& h, const GF2EXModulus& F, long m);
+// Pre-Computes information about h.  m > 0, deg(h) < n.
+
+void CompMod(GF2EX& x, const GF2EX& g, const GF2EXArgument& H,
+             const GF2EXModulus& F);
+
+GF2EX CompMod(const GF2EX& g, const GF2EXArgument& H,
+                    const GF2EXModulus& F);
+
+extern long GF2EXArgBound;
+
+// Initially 0.  If this is set to a value greater than zero, then
+// composition routines will allocate a table of no than about
+// GF2EXArgBound KB.  Setting this value affects all compose routines
+// and the power projection and minimal polynomial routines below,
+// and indirectly affects many routines in GF2EXFactoring.
+
+/**************************************************************************\
+
+                     power projection routines
+
+\**************************************************************************/
+
+void project(GF2E& x, const GF2EVector& a, const GF2EX& b);
+GF2E project(const GF2EVector& a, const GF2EX& b);
+// x = inner product of a with coefficient vector of b
+
+
+void ProjectPowers(vec_GF2E& x, const vec_GF2E& a, long k,
+                   const GF2EX& h, const GF2EXModulus& F);
+
+vec_GF2E ProjectPowers(const vec_GF2E& a, long k,
+                   const GF2EX& h, const GF2EXModulus& F);
+
+// Computes the vector
+
+//    project(a, 1), project(a, h), ..., project(a, h^{k-1} % f).  
+
+// This operation is the "transpose" of the modular composition operation.
+
+void ProjectPowers(vec_GF2E& x, const vec_GF2E& a, long k,
+                   const GF2EXArgument& H, const GF2EXModulus& F);
+
+vec_GF2E ProjectPowers(const vec_GF2E& a, long k,
+                   const GF2EXArgument& H, const GF2EXModulus& F);
+
+// same as above, but uses a pre-computed GF2EXArgument
+
+class GF2EXTransMultiplier { /* ... */ };
+
+void build(GF2EXTransMultiplier& B, const GF2EX& b, const GF2EXModulus& F);
+
+
+
+void UpdateMap(vec_GF2E& x, const vec_GF2E& a,
+               const GF2EXMultiplier& B, const GF2EXModulus& F);
+
+vec_GF2E UpdateMap(const vec_GF2E& a,
+               const GF2EXMultiplier& B, const GF2EXModulus& F);
+
+// Computes the vector
+
+//    project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f)
+
+// Restriction: a.length() <= deg(F), deg(b) < deg(F).
+// This is "transposed" MulMod by B.
+// Input may have "high order" zeroes stripped.
+// Output always has high order zeroes stripped.
+
+
+/**************************************************************************\
+
+                              Minimum Polynomials
+
+These routines should be used only when GF2E is a field.
+
+All of these routines implement the algorithm from [Shoup, J. Symbolic
+Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397,
+1995], based on transposed modular composition and the
+Berlekamp/Massey algorithm.
+
+\**************************************************************************/
+
+
+void MinPolySeq(GF2EX& h, const vec_GF2E& a, long m);
+GF2EX MinPolySeq(const vec_GF2E& a, long m);
+// computes the minimum polynomial of a linealy generated sequence; m
+// is a bound on the degree of the polynomial; required: a.length() >=
+// 2*m
+
+
+void ProbMinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m);
+GF2EX ProbMinPolyMod(const GF2EX& g, const GF2EXModulus& F, long m);
+
+void ProbMinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F);
+GF2EX ProbMinPolyMod(const GF2EX& g, const GF2EXModulus& F);
+
+// computes the monic minimal polynomial if (g mod f).  m = a bound on
+// the degree of the minimal polynomial; in the second version, this
+// argument defaults to n.  The algorithm is probabilistic, always
+// returns a divisor of the minimal polynomial, and returns a proper
+// divisor with probability at most m/2^{GF2E::degree()}.
+
+void MinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m);
+GF2EX MinPolyMod(const GF2EX& g, const GF2EXModulus& F, long m);
+
+void MinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F);
+GF2EX MinPolyMod(const GF2EX& g, const GF2EXModulus& F);
+
+// same as above, but guarantees that result is correct
+
+void IrredPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m);
+GF2EX IrredPolyMod(const GF2EX& g, const GF2EXModulus& F, long m);
+
+void IrredPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F);
+GF2EX IrredPolyMod(const GF2EX& g, const GF2EXModulus& F);
+
+// same as above, but assumes that f is irreducible, or at least that
+// the minimal poly of g is itself irreducible.  The algorithm is
+// deterministic (and is always correct).
+
+
+/**************************************************************************\
+
+           Composition and Minimal Polynomials in towers
+
+These are implementations of algorithms that will be described
+and analyzed in a forthcoming paper.
+
+GF2E need not be a field.
+
+\**************************************************************************/
+
+
+void CompTower(GF2EX& x, const GF2X& g, const GF2EXArgument& h,
+             const GF2EXModulus& F);
+
+GF2EX CompTower(const GF2X& g, const GF2EXArgument& h,
+             const GF2EXModulus& F);
+
+void CompTower(GF2EX& x, const GF2X& g, const GF2EX& h,
+             const GF2EXModulus& F);
+
+GF2EX CompTower(const GF2X& g, const GF2EX& h,
+             const GF2EXModulus& F);
+
+
+// x = g(h) mod f
+
+
+void ProbMinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F,
+                      long m);
+
+GF2X ProbMinPolyTower(const GF2EX& g, const GF2EXModulus& F, long m);
+
+void ProbMinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F);
+
+GF2X ProbMinPolyTower(const GF2EX& g, const GF2EXModulus& F);
+
+// Uses a probabilistic algorithm to compute the minimal
+// polynomial of (g mod f) over GF2.
+// The parameter m is a bound on the degree of the minimal polynomial
+// (default = deg(f)*GF2E::degree()).
+// In general, the result will be a divisor of the true minimimal
+// polynomial.  For correct results, use the MinPoly routines below.
+
+
+
+void MinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, long m);
+
+GF2X MinPolyTower(const GF2EX& g, const GF2EXModulus& F, long m);
+
+void MinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F);
+
+GF2X MinPolyTower(const GF2EX& g, const GF2EXModulus& F);
+
+// Same as above, but result is always correct.
+
+
+void IrredPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, long m);
+
+GF2X IrredPolyTower(const GF2EX& g, const GF2EXModulus& F, long m);
+
+void IrredPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F);
+
+GF2X IrredPolyTower(const GF2EX& g, const GF2EXModulus& F);
+
+// Same as above, but assumes the minimal polynomial is
+// irreducible, and uses a slightly faster, deterministic algorithm.
+
+
+
+/**************************************************************************\
+
+                   Traces, norms, resultants
+
+\**************************************************************************/
+
+
+void TraceMod(GF2E& x, const GF2EX& a, const GF2EXModulus& F);
+GF2E TraceMod(const GF2EX& a, const GF2EXModulus& F);
+
+void TraceMod(GF2E& x, const GF2EX& a, const GF2EX& f);
+GF2E TraceMod(const GF2EX& a, const GF2EXModulus& f);
+// x = Trace(a mod f); deg(a) < deg(f)
+
+
+void TraceVec(vec_GF2E& S, const GF2EX& f);
+vec_GF2E TraceVec(const GF2EX& f);
+// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f)
+
+// The above trace routines implement the asymptotically fast trace
+// algorithm from [von zur Gathen and Shoup, Computational Complexity,
+// 1992].
+
+void NormMod(GF2E& x, const GF2EX& a, const GF2EX& f);
+GF2E NormMod(const GF2EX& a, const GF2EX& f);
+// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f)
+
+void resultant(GF2E& x, const GF2EX& a, const GF2EX& b);
+GF2E resultant(const GF2EX& a, const GF2EX& b);
+// x = resultant(a, b)
+
+// NormMod and resultant require that GF2E is a field.
+
+
+
+/**************************************************************************\
+
+                           Miscellany
+
+
+\**************************************************************************/
+
+
+void clear(GF2EX& x) // x = 0
+void set(GF2EX& x); // x = 1
+
+
+void GF2EX::kill();
+// f.kill() sets f to 0 and frees all memory held by f.  Equivalent to
+// f.rep.kill().
+
+GF2EX::GF2EX(INIT_SIZE_TYPE, long n);
+// GF2EX(INIT_SIZE, n) initializes to zero, but space is pre-allocated
+// for n coefficients
+
+static const GF2EX& zero();
+// GF2EX::zero() is a read-only reference to 0
+
+void GF2EX::swap(GF2EX& x);
+void swap(GF2EX& x, GF2EX& y);
+// swap (via "pointer swapping")
+
+GF2EX::GF2EX(long i, const GF2E& c);
+GF2EX::GF2EX(long i, GF2 c);
+GF2EX::GF2EX(long i, long c);
+// initialize to X^i*c, provided for backward compatibility
+
+
+ diff --git a/thirdparty/linux/ntl/doc/GF2EX.txt b/thirdparty/linux/ntl/doc/GF2EX.txt new file mode 100644 index 0000000000..d0b37ace7c --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2EX.txt @@ -0,0 +1,889 @@ + +/**************************************************************************\ + +MODULE: GF2EX + +SUMMARY: + +The class GF2EX represents polynomials over GF2E, +and so can be used, for example, for arithmentic in GF(2^n)[X]. +However, except where mathematically necessary (e.g., GCD computations), +GF2E need not be a field. + +\**************************************************************************/ + +#include +#include + +class GF2EX { +public: + + GF2EX(); // initial value 0 + + GF2EX(const GF2EX& a); // copy + explicit GF2EX(const GF2E& a); // promotion + explicit GF2EX(GF2 a); + explicit GF2EX(long a); + + GF2EX& operator=(const GF2EX& a); // assignment + GF2EX& operator=(const GF2E& a); + GF2EX& operator=(GF2 a); + GF2EX& operator=(long a); + + ~GF2EX(); // destructor + + GF2EX(INIT_MONO_TYPE, long i, const GF2E& c); + GF2EX(INIT_MONO_TYPE, long i, GF2 c); + GF2EX(INIT_MONO_TYPE, long i, long c); + // initialize to c*X^i, invoke as GF2EX(INIT_MONO, i) + + GF2EX(INIT_MONO_TYPE, long i); + // initialize to X^i, invoke as GF2EX(INIT_MONO, i) + + + + + // typedefs to aid in generic programming + + typedef GF2E coeff_type; + typedef GF2EXModulus modulus_type; + // ... + +}; + + + + + +/**************************************************************************\ + + Accessing coefficients + +The degree of a polynomial f is obtained as deg(f), +where the zero polynomial, by definition, has degree -1. + +A polynomial f is represented as a coefficient vector. +Coefficients may be accesses in one of two ways. + +The safe, high-level method is to call the function +coeff(f, i) to get the coefficient of X^i in the polynomial f, +and to call the function SetCoeff(f, i, a) to set the coefficient +of X^i in f to the scalar a. + +One can also access the coefficients more directly via a lower level +interface. The coefficient of X^i in f may be accessed using +subscript notation f[i]. In addition, one may write f.SetLength(n) +to set the length of the underlying coefficient vector to n, +and f.SetMaxLength(n) to allocate space for n coefficients, +without changing the coefficient vector itself. + +After setting coefficients using this low-level interface, +one must ensure that leading zeros in the coefficient vector +are stripped afterwards by calling the function f.normalize(). + +NOTE: the coefficient vector of f may also be accessed directly +as f.rep; however, this is not recommended. Also, for a properly +normalized polynomial f, we have f.rep.length() == deg(f)+1, +and deg(f) >= 0 => f.rep[deg(f)] != 0. + +\**************************************************************************/ + + + +long deg(const GF2EX& a); // return deg(a); deg(0) == -1. + +const GF2E& coeff(const GF2EX& a, long i); +// returns the coefficient of X^i, or zero if i not in range + +const GF2E& LeadCoeff(const GF2EX& a); +// returns leading term of a, or zero if a == 0 + +const GF2E& ConstTerm(const GF2EX& a); +// returns constant term of a, or zero if a == 0 + +void SetCoeff(GF2EX& x, long i, const GF2E& a); +void SetCoeff(GF2EX& x, long i, GF2 a); +void SetCoeff(GF2EX& x, long i, long a); +// makes coefficient of X^i equal to a; error is raised if i < 0 + +void SetCoeff(GF2EX& x, long i); +// makes coefficient of X^i equal to 1; error is raised if i < 0 + +void SetX(GF2EX& x); // x is set to the monomial X + +long IsX(const GF2EX& a); // test if x = X + + + + +GF2E& GF2EX::operator[](long i); +const GF2E& GF2EX::operator[](long i) const; +// indexing operators: f[i] is the coefficient of X^i --- +// i should satsify i >= 0 and i <= deg(f). +// No range checking (unless NTL_RANGE_CHECK is defined). + +void GF2EX::SetLength(long n); +// f.SetLength(n) sets the length of the inderlying coefficient +// vector to n --- after this call, indexing f[i] for i = 0..n-1 +// is valid. + +void GF2EX::normalize(); +// f.normalize() strips leading zeros from coefficient vector of f + +void GF2EX::SetMaxLength(long n); +// f.SetMaxLength(n) pre-allocate spaces for n coefficients. The +// polynomial that f represents is unchanged. + + + + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(const GF2EX& a, const GF2EX& b); +long operator!=(const GF2EX& a, const GF2EX& b); + +long IsZero(const GF2EX& a); // test for 0 +long IsOne(const GF2EX& a); // test for 1 + +// PROMOTIONS: ==, != promote {long,GF2,GF2E} to GF2EX on (a, b). + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +GF2EX operator+(const GF2EX& a, const GF2EX& b); +GF2EX operator-(const GF2EX& a, const GF2EX& b); +GF2EX operator-(const GF2EX& a); + +GF2EX& operator+=(GF2EX& x, const GF2EX& a); +GF2EX& operator+=(GF2EX& x, const GF2E& a); +GF2EX& operator+=(GF2EX& x, GF2 a); +GF2EX& operator+=(GF2EX& x, long a); + + +GF2EX& operator++(GF2EX& x); // prefix +void operator++(GF2EX& x, int); // postfix + +GF2EX& operator-=(GF2EX& x, const GF2EX& a); +GF2EX& operator-=(GF2EX& x, const GF2E& a); +GF2EX& operator-=(GF2EX& x, GF2 a); +GF2EX& operator-=(GF2EX& x, long a); + +GF2EX& operator--(GF2EX& x); // prefix +void operator--(GF2EX& x, int); // postfix + +// procedural versions: + +void add(GF2EX& x, const GF2EX& a, const GF2EX& b); // x = a + b +void sub(GF2EX& x, const GF2EX& a, const GF2EX& b); // x = a - b +void negate(GF2EX& x, const GF2EX& a); // x = - a + +// PROMOTIONS: +, -, add, sub promote {long,GF2,GF2E} to GF2EX on (a, b). + + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +GF2EX operator*(const GF2EX& a, const GF2EX& b); + +GF2EX& operator*=(GF2EX& x, const GF2EX& a); +GF2EX& operator*=(GF2EX& x, const GF2E& a); +GF2EX& operator*=(GF2EX& x, GF2 a); +GF2EX& operator*=(GF2EX& x, long a); + + +// procedural versions: + + +void mul(GF2EX& x, const GF2EX& a, const GF2EX& b); // x = a * b + +void sqr(GF2EX& x, const GF2EX& a); // x = a^2 +GF2EX sqr(const GF2EX& a); + +// PROMOTIONS: *, mul promote {long,GF2,GF2E} to GF2EX on (a, b). + +void power(GF2EX& x, const GF2EX& a, long e); // x = a^e (e >= 0) +GF2EX power(const GF2EX& a, long e); + + +/**************************************************************************\ + + Shift Operations + +LeftShift by n means multiplication by X^n +RightShift by n means division by X^n + +A negative shift amount reverses the direction of the shift. + +\**************************************************************************/ + +// operator notation: + +GF2EX operator<<(const GF2EX& a, long n); +GF2EX operator>>(const GF2EX& a, long n); + +GF2EX& operator<<=(GF2EX& x, long n); +GF2EX& operator>>=(GF2EX& x, long n); + +// procedural versions: + +void LeftShift(GF2EX& x, const GF2EX& a, long n); +GF2EX LeftShift(const GF2EX& a, long n); + +void RightShift(GF2EX& x, const GF2EX& a, long n); +GF2EX RightShift(const GF2EX& a, long n); + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + +// operator notation: + +GF2EX operator/(const GF2EX& a, const GF2EX& b); +GF2EX operator/(const GF2EX& a, const GF2E& b); +GF2EX operator/(const GF2EX& a, GF2 b); +GF2EX operator/(const GF2EX& a, long b); + +GF2EX operator%(const GF2EX& a, const GF2EX& b); + +GF2EX& operator/=(GF2EX& x, const GF2EX& a); +GF2EX& operator/=(GF2EX& x, const GF2E& a); +GF2EX& operator/=(GF2EX& x, GF2 a); +GF2EX& operator/=(GF2EX& x, long a); + +GF2EX& operator%=(GF2EX& x, const GF2EX& a); + +// procedural versions: + + +void DivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EX& b); +// q = a/b, r = a%b + +void div(GF2EX& q, const GF2EX& a, const GF2EX& b); +void div(GF2EX& q, const GF2EX& a, const GF2E& b); +void div(GF2EX& q, const GF2EX& a, GF2 b); +void div(GF2EX& q, const GF2EX& a, long b); +// q = a/b + +void rem(GF2EX& r, const GF2EX& a, const GF2EX& b); +// r = a%b + +long divide(GF2EX& q, const GF2EX& a, const GF2EX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const GF2EX& a, const GF2EX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + + +/**************************************************************************\ + + GCD's + +These routines are intended for use when GF2E is a field. + +\**************************************************************************/ + + +void GCD(GF2EX& x, const GF2EX& a, const GF2EX& b); +GF2EX GCD(const GF2EX& a, const GF2EX& b); +// x = GCD(a, b), x is always monic (or zero if a==b==0). + + +void XGCD(GF2EX& d, GF2EX& s, GF2EX& t, const GF2EX& a, const GF2EX& b); +// d = gcd(a,b), a s + b t = d + + +/**************************************************************************\ + + Input/Output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be polynomials of degree < GF2E::degree() and +a_n not zero (the zero polynomial is [ ]). On input, the coefficients +are arbitrary polynomials which are reduced modulo GF2E::modulus(), and leading +zeros stripped. + +\**************************************************************************/ + +istream& operator>>(istream& s, GF2EX& x); +ostream& operator<<(ostream& s, const GF2EX& a); + + +/**************************************************************************\ + + Some utility routines + +\**************************************************************************/ + + +void diff(GF2EX& x, const GF2EX& a); // x = derivative of a +GF2EX diff(const GF2EX& a); + +void MakeMonic(GF2EX& x); +// if x != 0 makes x into its monic associate; LeadCoeff(x) must be +// invertible in this case + +void reverse(GF2EX& x, const GF2EX& a, long hi); +GF2EX reverse(const GF2EX& a, long hi); + +void reverse(GF2EX& x, const GF2EX& a); +GF2EX reverse(const GF2EX& a); + +// x = reverse of a[0]..a[hi] (hi >= -1); +// hi defaults to deg(a) in second version + +void VectorCopy(vec_GF2E& x, const GF2EX& a, long n); +vec_GF2E VectorCopy(const GF2EX& a, long n); +// x = copy of coefficient vector of a of length exactly n. +// input is truncated or padded with zeroes as appropriate. + + + + +/**************************************************************************\ + + Random Polynomials + +\**************************************************************************/ + +void random(GF2EX& x, long n); +GF2EX random_GF2EX(long n); +// x = random polynomial of degree < n + + +/**************************************************************************\ + + Polynomial Evaluation and related problems + +\**************************************************************************/ + + +void BuildFromRoots(GF2EX& x, const vec_GF2E& a); +GF2EX BuildFromRoots(const vec_GF2E& a); +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + +void eval(GF2E& b, const GF2EX& f, const GF2E& a); +GF2E eval(const GF2EX& f, const GF2E& a); +// b = f(a) + +void eval(GF2E& b, const GF2X& f, const GF2E& a); +GF2E eval(const GF2EX& f, const GF2E& a); +// b = f(a); uses ModComp algorithm for GF2X + +void eval(vec_GF2E& b, const GF2EX& f, const vec_GF2E& a); +vec_GF2E eval(const GF2EX& f, const vec_GF2E& a); +// b.SetLength(a.length()); b[i] = f(a[i]) for 0 <= i < a.length() + +void interpolate(GF2EX& f, const vec_GF2E& a, const vec_GF2E& b); +GF2EX interpolate(const vec_GF2E& a, const vec_GF2E& b); +// interpolates the polynomial f satisfying f(a[i]) = b[i]. + +/**************************************************************************\ + + Arithmetic mod X^n + +Required: n >= 0; otherwise, an error is raised. + +\**************************************************************************/ + +void trunc(GF2EX& x, const GF2EX& a, long n); // x = a % X^n +GF2EX trunc(const GF2EX& a, long n); + +void MulTrunc(GF2EX& x, const GF2EX& a, const GF2EX& b, long n); +GF2EX MulTrunc(const GF2EX& a, const GF2EX& b, long n); +// x = a * b % X^n + +void SqrTrunc(GF2EX& x, const GF2EX& a, long n); +GF2EX SqrTrunc(const GF2EX& a, long n); +// x = a^2 % X^n + +void InvTrunc(GF2EX& x, const GF2EX& a, long n); +GF2EX InvTrunc(GF2EX& x, const GF2EX& a, long n); +// computes x = a^{-1} % X^m. Must have ConstTerm(a) invertible. + +/**************************************************************************\ + + Modular Arithmetic (without pre-conditioning) + +Arithmetic mod f. + +All inputs and outputs are polynomials of degree less than deg(f), and +deg(f) > 0. + + +NOTE: if you want to do many computations with a fixed f, use the +GF2EXModulus data structure and associated routines below for better +performance. + +\**************************************************************************/ + +void MulMod(GF2EX& x, const GF2EX& a, const GF2EX& b, const GF2EX& f); +GF2EX MulMod(const GF2EX& a, const GF2EX& b, const GF2EX& f); +// x = (a * b) % f + +void SqrMod(GF2EX& x, const GF2EX& a, const GF2EX& f); +GF2EX SqrMod(const GF2EX& a, const GF2EX& f); +// x = a^2 % f + +void MulByXMod(GF2EX& x, const GF2EX& a, const GF2EX& f); +GF2EX MulByXMod(const GF2EX& a, const GF2EX& f); +// x = (a * X) mod f + +void InvMod(GF2EX& x, const GF2EX& a, const GF2EX& f); +GF2EX InvMod(const GF2EX& a, const GF2EX& f); +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(GF2EX& x, const GF2EX& a, const GF2EX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise, +// returns 1 and sets x = (a, f) + + +/**************************************************************************\ + + Modular Arithmetic with Pre-Conditioning + +If you need to do a lot of arithmetic modulo a fixed f, build +GF2EXModulus F for f. This pre-computes information about f that +speeds up subsequent computations. + +As an example, the following routine the product modulo f of a vector +of polynomials. + +#include + +void product(GF2EX& x, const vec_GF2EX& v, const GF2EX& f) +{ + GF2EXModulus F(f); + GF2EX res; + res = 1; + long i; + for (i = 0; i < v.length(); i++) + MulMod(res, res, v[i], F); + x = res; +} + +NOTE: A GF2EX may be used wherever a GF2EXModulus is required, +and a GF2EXModulus may be used wherever a GF2EX is required. + + +\**************************************************************************/ + +class GF2EXModulus { +public: + GF2EXModulus(); // initially in an unusable state + + GF2EXModulus(const GF2EX& f); // initialize with f, deg(f) > 0 + + GF2EXModulus(const GF2EXModulus&); // copy + + GF2EXModulus& operator=(const GF2EXModulus&); // assignment + + ~GF2EXModulus(); // destructor + + operator const GF2EX& () const; // implicit read-only access to f + + const GF2EX& val() const; // explicit read-only access to f +}; + +void build(GF2EXModulus& F, const GF2EX& f); +// pre-computes information about f and stores it in F. Must have +// deg(f) > 0. Note that the declaration GF2EXModulus F(f) is +// equivalent to GF2EXModulus F; build(F, f). + +// In the following, f refers to the polynomial f supplied to the +// build routine, and n = deg(f). + + +long deg(const GF2EXModulus& F); // return n=deg(f) + +void MulMod(GF2EX& x, const GF2EX& a, const GF2EX& b, const GF2EXModulus& F); +GF2EX MulMod(const GF2EX& a, const GF2EX& b, const GF2EXModulus& F); +// x = (a * b) % f; deg(a), deg(b) < n + +void SqrMod(GF2EX& x, const GF2EX& a, const GF2EXModulus& F); +GF2EX SqrMod(const GF2EX& a, const GF2EXModulus& F); +// x = a^2 % f; deg(a) < n + +void PowerMod(GF2EX& x, const GF2EX& a, const ZZ& e, const GF2EXModulus& F); +GF2EX PowerMod(const GF2EX& a, const ZZ& e, const GF2EXModulus& F); + +void PowerMod(GF2EX& x, const GF2EX& a, long e, const GF2EXModulus& F); +GF2EX PowerMod(const GF2EX& a, long e, const GF2EXModulus& F); + +// x = a^e % f; e >= 0, deg(a) < n. Uses a sliding window algorithm. +// (e may be negative) + +void PowerXMod(GF2EX& x, const ZZ& e, const GF2EXModulus& F); +GF2EX PowerXMod(const ZZ& e, const GF2EXModulus& F); + +void PowerXMod(GF2EX& x, long e, const GF2EXModulus& F); +GF2EX PowerXMod(long e, const GF2EXModulus& F); + +// x = X^e % f (e may be negative) + +void rem(GF2EX& x, const GF2EX& a, const GF2EXModulus& F); +// x = a % f + +void DivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EXModulus& F); +// q = a/f, r = a%f + +void div(GF2EX& q, const GF2EX& a, const GF2EXModulus& F); +// q = a/f + +// operator notation: + +GF2EX operator/(const GF2EX& a, const GF2EXModulus& F); +GF2EX operator%(const GF2EX& a, const GF2EXModulus& F); + +GF2EX& operator/=(GF2EX& x, const GF2EXModulus& F); +GF2EX& operator%=(GF2EX& x, const GF2EXModulus& F); + + + +/**************************************************************************\ + + vectors of GF2EX's + +\**************************************************************************/ + +typedef Vec vec_GF2EX; // backward compatibility + + + +/**************************************************************************\ + + Modular Composition + +Modular composition is the problem of computing g(h) mod f for +polynomials f, g, and h. + +The algorithm employed is that of Brent & Kung (Fast algorithms for +manipulating formal power series, JACM 25:581-595, 1978), which uses +O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar +operations. + + +\**************************************************************************/ + +void CompMod(GF2EX& x, const GF2EX& g, const GF2EX& h, const GF2EXModulus& F); +GF2EX CompMod(const GF2EX& g, const GF2EX& h, + const GF2EXModulus& F); + +// x = g(h) mod f; deg(h) < n + +void Comp2Mod(GF2EX& x1, GF2EX& x2, const GF2EX& g1, const GF2EX& g2, + const GF2EX& h, const GF2EXModulus& F); +// xi = gi(h) mod f (i=1,2); deg(h) < n. + + +void Comp3Mod(GF2EX& x1, GF2EX& x2, GF2EX& x3, + const GF2EX& g1, const GF2EX& g2, const GF2EX& g3, + const GF2EX& h, const GF2EXModulus& F); +// xi = gi(h) mod f (i=1..3); deg(h) < n. + + + +/**************************************************************************\ + + Composition with Pre-Conditioning + +If a single h is going to be used with many g's then you should build +a GF2EXArgument for h, and then use the compose routine below. The +routine build computes and stores h, h^2, ..., h^m mod f. After this +pre-computation, composing a polynomial of degree roughly n with h +takes n/m multiplies mod f, plus n^2 scalar multiplies. Thus, +increasing m increases the space requirement and the pre-computation +time, but reduces the composition time. + +\**************************************************************************/ + + +struct GF2EXArgument { + vec_GF2EX H; +}; + +void build(GF2EXArgument& H, const GF2EX& h, const GF2EXModulus& F, long m); +// Pre-Computes information about h. m > 0, deg(h) < n. + +void CompMod(GF2EX& x, const GF2EX& g, const GF2EXArgument& H, + const GF2EXModulus& F); + +GF2EX CompMod(const GF2EX& g, const GF2EXArgument& H, + const GF2EXModulus& F); + +extern long GF2EXArgBound; + +// Initially 0. If this is set to a value greater than zero, then +// composition routines will allocate a table of no than about +// GF2EXArgBound KB. Setting this value affects all compose routines +// and the power projection and minimal polynomial routines below, +// and indirectly affects many routines in GF2EXFactoring. + +/**************************************************************************\ + + power projection routines + +\**************************************************************************/ + +void project(GF2E& x, const GF2EVector& a, const GF2EX& b); +GF2E project(const GF2EVector& a, const GF2EX& b); +// x = inner product of a with coefficient vector of b + + +void ProjectPowers(vec_GF2E& x, const vec_GF2E& a, long k, + const GF2EX& h, const GF2EXModulus& F); + +vec_GF2E ProjectPowers(const vec_GF2E& a, long k, + const GF2EX& h, const GF2EXModulus& F); + +// Computes the vector + +// project(a, 1), project(a, h), ..., project(a, h^{k-1} % f). + +// This operation is the "transpose" of the modular composition operation. + +void ProjectPowers(vec_GF2E& x, const vec_GF2E& a, long k, + const GF2EXArgument& H, const GF2EXModulus& F); + +vec_GF2E ProjectPowers(const vec_GF2E& a, long k, + const GF2EXArgument& H, const GF2EXModulus& F); + +// same as above, but uses a pre-computed GF2EXArgument + +class GF2EXTransMultiplier { /* ... */ }; + +void build(GF2EXTransMultiplier& B, const GF2EX& b, const GF2EXModulus& F); + + + +void UpdateMap(vec_GF2E& x, const vec_GF2E& a, + const GF2EXMultiplier& B, const GF2EXModulus& F); + +vec_GF2E UpdateMap(const vec_GF2E& a, + const GF2EXMultiplier& B, const GF2EXModulus& F); + +// Computes the vector + +// project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f) + +// Restriction: a.length() <= deg(F), deg(b) < deg(F). +// This is "transposed" MulMod by B. +// Input may have "high order" zeroes stripped. +// Output always has high order zeroes stripped. + + +/**************************************************************************\ + + Minimum Polynomials + +These routines should be used only when GF2E is a field. + +All of these routines implement the algorithm from [Shoup, J. Symbolic +Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397, +1995], based on transposed modular composition and the +Berlekamp/Massey algorithm. + +\**************************************************************************/ + + +void MinPolySeq(GF2EX& h, const vec_GF2E& a, long m); +GF2EX MinPolySeq(const vec_GF2E& a, long m); +// computes the minimum polynomial of a linealy generated sequence; m +// is a bound on the degree of the polynomial; required: a.length() >= +// 2*m + + +void ProbMinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m); +GF2EX ProbMinPolyMod(const GF2EX& g, const GF2EXModulus& F, long m); + +void ProbMinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F); +GF2EX ProbMinPolyMod(const GF2EX& g, const GF2EXModulus& F); + +// computes the monic minimal polynomial if (g mod f). m = a bound on +// the degree of the minimal polynomial; in the second version, this +// argument defaults to n. The algorithm is probabilistic, always +// returns a divisor of the minimal polynomial, and returns a proper +// divisor with probability at most m/2^{GF2E::degree()}. + +void MinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m); +GF2EX MinPolyMod(const GF2EX& g, const GF2EXModulus& F, long m); + +void MinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F); +GF2EX MinPolyMod(const GF2EX& g, const GF2EXModulus& F); + +// same as above, but guarantees that result is correct + +void IrredPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m); +GF2EX IrredPolyMod(const GF2EX& g, const GF2EXModulus& F, long m); + +void IrredPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F); +GF2EX IrredPolyMod(const GF2EX& g, const GF2EXModulus& F); + +// same as above, but assumes that f is irreducible, or at least that +// the minimal poly of g is itself irreducible. The algorithm is +// deterministic (and is always correct). + + +/**************************************************************************\ + + Composition and Minimal Polynomials in towers + +These are implementations of algorithms that will be described +and analyzed in a forthcoming paper. + +GF2E need not be a field. + +\**************************************************************************/ + + +void CompTower(GF2EX& x, const GF2X& g, const GF2EXArgument& h, + const GF2EXModulus& F); + +GF2EX CompTower(const GF2X& g, const GF2EXArgument& h, + const GF2EXModulus& F); + +void CompTower(GF2EX& x, const GF2X& g, const GF2EX& h, + const GF2EXModulus& F); + +GF2EX CompTower(const GF2X& g, const GF2EX& h, + const GF2EXModulus& F); + + +// x = g(h) mod f + + +void ProbMinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, + long m); + +GF2X ProbMinPolyTower(const GF2EX& g, const GF2EXModulus& F, long m); + +void ProbMinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F); + +GF2X ProbMinPolyTower(const GF2EX& g, const GF2EXModulus& F); + +// Uses a probabilistic algorithm to compute the minimal +// polynomial of (g mod f) over GF2. +// The parameter m is a bound on the degree of the minimal polynomial +// (default = deg(f)*GF2E::degree()). +// In general, the result will be a divisor of the true minimimal +// polynomial. For correct results, use the MinPoly routines below. + + + +void MinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, long m); + +GF2X MinPolyTower(const GF2EX& g, const GF2EXModulus& F, long m); + +void MinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F); + +GF2X MinPolyTower(const GF2EX& g, const GF2EXModulus& F); + +// Same as above, but result is always correct. + + +void IrredPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, long m); + +GF2X IrredPolyTower(const GF2EX& g, const GF2EXModulus& F, long m); + +void IrredPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F); + +GF2X IrredPolyTower(const GF2EX& g, const GF2EXModulus& F); + +// Same as above, but assumes the minimal polynomial is +// irreducible, and uses a slightly faster, deterministic algorithm. + + + +/**************************************************************************\ + + Traces, norms, resultants + +\**************************************************************************/ + + +void TraceMod(GF2E& x, const GF2EX& a, const GF2EXModulus& F); +GF2E TraceMod(const GF2EX& a, const GF2EXModulus& F); + +void TraceMod(GF2E& x, const GF2EX& a, const GF2EX& f); +GF2E TraceMod(const GF2EX& a, const GF2EXModulus& f); +// x = Trace(a mod f); deg(a) < deg(f) + + +void TraceVec(vec_GF2E& S, const GF2EX& f); +vec_GF2E TraceVec(const GF2EX& f); +// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f) + +// The above trace routines implement the asymptotically fast trace +// algorithm from [von zur Gathen and Shoup, Computational Complexity, +// 1992]. + +void NormMod(GF2E& x, const GF2EX& a, const GF2EX& f); +GF2E NormMod(const GF2EX& a, const GF2EX& f); +// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f) + +void resultant(GF2E& x, const GF2EX& a, const GF2EX& b); +GF2E resultant(const GF2EX& a, const GF2EX& b); +// x = resultant(a, b) + +// NormMod and resultant require that GF2E is a field. + + + +/**************************************************************************\ + + Miscellany + + +\**************************************************************************/ + + +void clear(GF2EX& x) // x = 0 +void set(GF2EX& x); // x = 1 + + +void GF2EX::kill(); +// f.kill() sets f to 0 and frees all memory held by f. Equivalent to +// f.rep.kill(). + +GF2EX::GF2EX(INIT_SIZE_TYPE, long n); +// GF2EX(INIT_SIZE, n) initializes to zero, but space is pre-allocated +// for n coefficients + +static const GF2EX& zero(); +// GF2EX::zero() is a read-only reference to 0 + +void GF2EX::swap(GF2EX& x); +void swap(GF2EX& x, GF2EX& y); +// swap (via "pointer swapping") + +GF2EX::GF2EX(long i, const GF2E& c); +GF2EX::GF2EX(long i, GF2 c); +GF2EX::GF2EX(long i, long c); +// initialize to X^i*c, provided for backward compatibility + diff --git a/thirdparty/linux/ntl/doc/GF2EXFactoring.cpp.html b/thirdparty/linux/ntl/doc/GF2EXFactoring.cpp.html new file mode 100644 index 0000000000..d17551116c --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2EXFactoring.cpp.html @@ -0,0 +1,238 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/GF2EXFactoring.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: GF2EXFactoring
+
+SUMMARY:
+
+Routines are provided for factorization of polynomials over GF2E, as
+well as routines for related problems such as testing irreducibility
+and constructing irreducible polynomials of given degree.
+
+\**************************************************************************/
+
+#include <NTL/GF2EX.h>
+#include <NTL/pair_GF2EX_long.h>
+
+void SquareFreeDecomp(vec_pair_GF2EX_long& u, const GF2EX& f);
+vec_pair_GF2EX_long SquareFreeDecomp(const GF2EX& f);
+
+// Performs square-free decomposition.  f must be monic.  If f =
+// prod_i g_i^i, then u is set to a list of pairs (g_i, i).  The list
+// is is increasing order of i, with trivial terms (i.e., g_i = 1)
+// deleted.
+
+
+void FindRoots(vec_GF2E& x, const GF2EX& f);
+vec_GF2E FindRoots(const GF2EX& f);
+
+// f is monic, and has deg(f) distinct roots.  returns the list of
+// roots
+
+void FindRoot(GF2E& root, const GF2EX& f);
+GF2E FindRoot(const GF2EX& f);
+
+
+// finds a single root of f.  assumes that f is monic and splits into
+// distinct linear factors
+
+
+void SFBerlekamp(vec_GF2EX& factors, const GF2EX& f, long verbose=0);
+vec_GF2EX  SFBerlekamp(const GF2EX& f, long verbose=0);
+
+// Assumes f is square-free and monic.  returns list of factors of f.
+// Uses "Berlekamp" approach, as described in detail in [Shoup,
+// J. Symbolic Comp. 20:363-397, 1995].
+
+
+void berlekamp(vec_pair_GF2EX_long& factors, const GF2EX& f,
+               long verbose=0);
+
+vec_pair_GF2EX_long berlekamp(const GF2EX& f, long verbose=0);
+
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SFBerlekamp.
+
+
+
+void NewDDF(vec_pair_GF2EX_long& factors, const GF2EX& f, const GF2EX& h,
+         long verbose=0);
+
+vec_pair_GF2EX_long NewDDF(const GF2EX& f, const GF2EX& h,
+         long verbose=0);
+
+
+// This computes a distinct-degree factorization.  The input must be
+// monic and square-free.  factors is set to a list of pairs (g, d),
+// where g is the product of all irreducible factors of f of degree d.
+// Only nontrivial pairs (i.e., g != 1) are included.  The polynomial
+// h is assumed to be equal to X^{2^{GF2E::degree()}} mod f,
+// which can be computed efficiently using the function FrobeniusMap
+// (see below).
+// This routine  implements the baby step/giant step algorithm
+// of [Kaltofen and Shoup, STOC 1995],
+// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995].
+
+// NOTE: When factoring "large" polynomials,
+// this routine uses external files to store some intermediate
+// results, which are removed if the routine terminates normally.
+// These files are stored in the current directory under names of the
+// form tmp-*.
+// The definition of "large" is controlled by the variable
+
+      extern double GF2EXFileThresh
+
+// which can be set by the user.  If the sizes of the tables
+// exceeds GF2EXFileThresh KB, external files are used.
+// Initial value is NTL_FILE_THRESH (defined in tools.h).
+
+
+
+void EDF(vec_GF2EX& factors, const GF2EX& f, const GF2EX& h,
+         long d, long verbose=0);
+
+vec_GF2EX EDF(const GF2EX& f, const GF2EX& h,
+         long d, long verbose=0);
+
+// Performs equal-degree factorization.  f is monic, square-free, and
+// all irreducible factors have same degree.  
+// h = X^{2^{GF2E::degree()}} mod f,
+// which can be computed efficiently using the function FrobeniusMap
+// (see below).
+// d = degree of irreducible factors of f.  
+// This routine implements the algorithm of [von zur Gathen and Shoup,
+// Computational Complexity 2:187-224, 1992]
+
+void RootEDF(vec_GF2EX& factors, const GF2EX& f, long verbose=0);
+vec_GF2EX RootEDF(const GF2EX& f, long verbose=0);
+
+// EDF for d==1
+
+
+void SFCanZass(vec_GF2EX& factors, const GF2EX& f, long verbose=0);
+vec_GF2EX SFCanZass(const GF2EX& f, long verbose=0);
+
+// Assumes f is monic and square-free.  returns list of factors of f.
+// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and
+// EDF above.
+
+
+void CanZass(vec_pair_GF2EX_long& factors, const GF2EX& f,
+             long verbose=0);
+
+vec_pair_GF2EX_long CanZass(const GF2EX& f, long verbose=0);
+
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SquareFreeDecomp and SFCanZass.
+
+// NOTE: these routines use modular composition.  The space
+// used for the required tables can be controlled by the variable
+// GF2EXArgBound (see GF2EX.txt).
+
+
+
+void mul(GF2EX& f, const vec_pair_GF2EX_long& v);
+GF2EX mul(const vec_pair_GF2EX_long& v);
+
+// multiplies polynomials, with multiplicities
+
+
+/**************************************************************************\
+
+                            Irreducible Polynomials
+
+\**************************************************************************/
+
+long ProbIrredTest(const GF2EX& f, long iter=1);
+
+// performs a fast, probabilistic irreduciblity test.  The test can
+// err only if f is reducible, and the error probability is bounded by
+// 2^{-iter*GF2E::degree()}.  This implements an algorithm from [Shoup,
+// J. Symbolic Comp. 17:371-391, 1994].
+
+long DetIrredTest(const GF2EX& f);
+
+// performs a recursive deterministic irreducibility test.  Fast in
+// the worst-case (when input is irreducible).  This implements an
+// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994].
+
+long IterIrredTest(const GF2EX& f);
+
+// performs an iterative deterministic irreducibility test, based on
+// DDF.  Fast on average (when f has a small factor).
+
+void BuildIrred(GF2EX& f, long n);
+GF2EX BuildIrred_GF2EX(long n);
+
+// Build a monic irreducible poly of degree n.
+
+void BuildRandomIrred(GF2EX& f, const GF2EX& g);
+GF2EX BuildRandomIrred(const GF2EX& g);
+
+// g is a monic irreducible polynomial.  Constructs a random monic
+// irreducible polynomial f of the same degree.
+
+void FrobeniusMap(GF2EX& h, const GF2EXModulus& F);
+GF2EX FrobeniusMap(const GF2EXModulus& F);
+
+// Computes h = X^{2^{GF2E::degree()}} mod F,
+// by either iterated squaring or modular
+// composition.  The latter method is based on a technique developed
+// in Kaltofen & Shoup (Faster polynomial factorization over high
+// algebraic extensions of finite fields, ISSAC 1997).  This method is
+// faster than iterated squaring when deg(F) is large relative to
+// GF2E::degree().
+
+
+long IterComputeDegree(const GF2EX& h, const GF2EXModulus& F);
+
+// f is assumed to be an "equal degree" polynomial, and h =
+// X^{2^{GF2E::degree()}} mod f (see function FrobeniusMap above)
+// The common degree of the irreducible factors
+// of f is computed.  Uses a "baby step/giant step" algorithm, similar
+// to NewDDF.  Although asymptotocally slower than RecComputeDegree
+// (below), it is faster for reasonably sized inputs.
+
+long RecComputeDegree(const GF2EX& h, const GF2EXModulus& F);
+
+// f is assumed to be an "equal degree" polynomial, h = X^{2^{GF2E::degree()}}
+// mod f (see function FrobeniusMap above).  
+// The common degree of the irreducible factors of f is
+// computed. Uses a recursive algorithm similar to DetIrredTest.
+
+void TraceMap(GF2EX& w, const GF2EX& a, long d, const GF2EXModulus& F,
+              const GF2EX& h);
+
+GF2EX TraceMap(const GF2EX& a, long d, const GF2EXModulus& F,
+              const GF2EX& h);
+
+// Computes w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0,
+// and h = X^q mod f, q a power of 2^{GF2E::degree()}.  This routine
+// implements an algorithm from [von zur Gathen and Shoup,
+// Computational Complexity 2:187-224, 1992].
+// If q = 2^{GF2E::degree()}, then h can be computed most efficiently
+// by using the function FrobeniusMap above.
+
+void PowerCompose(GF2EX& w, const GF2EX& h, long d, const GF2EXModulus& F);
+
+GF2EX PowerCompose(const GF2EX& h, long d, const GF2EXModulus& F);
+
+// Computes w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q
+// mod f, q a power of 2^{GF2E::degree()}.  This routine implements an
+// algorithm from [von zur Gathen and Shoup, Computational Complexity
+// 2:187-224, 1992].
+// If q = 2^{GF2E::degree()}, then h can be computed most efficiently
+// by using the function FrobeniusMap above.
+
+
+ diff --git a/thirdparty/linux/ntl/doc/GF2EXFactoring.txt b/thirdparty/linux/ntl/doc/GF2EXFactoring.txt new file mode 100644 index 0000000000..69f3d0438e --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2EXFactoring.txt @@ -0,0 +1,228 @@ + +/**************************************************************************\ + +MODULE: GF2EXFactoring + +SUMMARY: + +Routines are provided for factorization of polynomials over GF2E, as +well as routines for related problems such as testing irreducibility +and constructing irreducible polynomials of given degree. + +\**************************************************************************/ + +#include +#include + +void SquareFreeDecomp(vec_pair_GF2EX_long& u, const GF2EX& f); +vec_pair_GF2EX_long SquareFreeDecomp(const GF2EX& f); + +// Performs square-free decomposition. f must be monic. If f = +// prod_i g_i^i, then u is set to a list of pairs (g_i, i). The list +// is is increasing order of i, with trivial terms (i.e., g_i = 1) +// deleted. + + +void FindRoots(vec_GF2E& x, const GF2EX& f); +vec_GF2E FindRoots(const GF2EX& f); + +// f is monic, and has deg(f) distinct roots. returns the list of +// roots + +void FindRoot(GF2E& root, const GF2EX& f); +GF2E FindRoot(const GF2EX& f); + + +// finds a single root of f. assumes that f is monic and splits into +// distinct linear factors + + +void SFBerlekamp(vec_GF2EX& factors, const GF2EX& f, long verbose=0); +vec_GF2EX SFBerlekamp(const GF2EX& f, long verbose=0); + +// Assumes f is square-free and monic. returns list of factors of f. +// Uses "Berlekamp" approach, as described in detail in [Shoup, +// J. Symbolic Comp. 20:363-397, 1995]. + + +void berlekamp(vec_pair_GF2EX_long& factors, const GF2EX& f, + long verbose=0); + +vec_pair_GF2EX_long berlekamp(const GF2EX& f, long verbose=0); + + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SFBerlekamp. + + + +void NewDDF(vec_pair_GF2EX_long& factors, const GF2EX& f, const GF2EX& h, + long verbose=0); + +vec_pair_GF2EX_long NewDDF(const GF2EX& f, const GF2EX& h, + long verbose=0); + + +// This computes a distinct-degree factorization. The input must be +// monic and square-free. factors is set to a list of pairs (g, d), +// where g is the product of all irreducible factors of f of degree d. +// Only nontrivial pairs (i.e., g != 1) are included. The polynomial +// h is assumed to be equal to X^{2^{GF2E::degree()}} mod f, +// which can be computed efficiently using the function FrobeniusMap +// (see below). +// This routine implements the baby step/giant step algorithm +// of [Kaltofen and Shoup, STOC 1995], +// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995]. + +// NOTE: When factoring "large" polynomials, +// this routine uses external files to store some intermediate +// results, which are removed if the routine terminates normally. +// These files are stored in the current directory under names of the +// form tmp-*. +// The definition of "large" is controlled by the variable + + extern double GF2EXFileThresh + +// which can be set by the user. If the sizes of the tables +// exceeds GF2EXFileThresh KB, external files are used. +// Initial value is NTL_FILE_THRESH (defined in tools.h). + + + +void EDF(vec_GF2EX& factors, const GF2EX& f, const GF2EX& h, + long d, long verbose=0); + +vec_GF2EX EDF(const GF2EX& f, const GF2EX& h, + long d, long verbose=0); + +// Performs equal-degree factorization. f is monic, square-free, and +// all irreducible factors have same degree. +// h = X^{2^{GF2E::degree()}} mod f, +// which can be computed efficiently using the function FrobeniusMap +// (see below). +// d = degree of irreducible factors of f. +// This routine implements the algorithm of [von zur Gathen and Shoup, +// Computational Complexity 2:187-224, 1992] + +void RootEDF(vec_GF2EX& factors, const GF2EX& f, long verbose=0); +vec_GF2EX RootEDF(const GF2EX& f, long verbose=0); + +// EDF for d==1 + + +void SFCanZass(vec_GF2EX& factors, const GF2EX& f, long verbose=0); +vec_GF2EX SFCanZass(const GF2EX& f, long verbose=0); + +// Assumes f is monic and square-free. returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and +// EDF above. + + +void CanZass(vec_pair_GF2EX_long& factors, const GF2EX& f, + long verbose=0); + +vec_pair_GF2EX_long CanZass(const GF2EX& f, long verbose=0); + + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SquareFreeDecomp and SFCanZass. + +// NOTE: these routines use modular composition. The space +// used for the required tables can be controlled by the variable +// GF2EXArgBound (see GF2EX.txt). + + + +void mul(GF2EX& f, const vec_pair_GF2EX_long& v); +GF2EX mul(const vec_pair_GF2EX_long& v); + +// multiplies polynomials, with multiplicities + + +/**************************************************************************\ + + Irreducible Polynomials + +\**************************************************************************/ + +long ProbIrredTest(const GF2EX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test. The test can +// err only if f is reducible, and the error probability is bounded by +// 2^{-iter*GF2E::degree()}. This implements an algorithm from [Shoup, +// J. Symbolic Comp. 17:371-391, 1994]. + +long DetIrredTest(const GF2EX& f); + +// performs a recursive deterministic irreducibility test. Fast in +// the worst-case (when input is irreducible). This implements an +// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994]. + +long IterIrredTest(const GF2EX& f); + +// performs an iterative deterministic irreducibility test, based on +// DDF. Fast on average (when f has a small factor). + +void BuildIrred(GF2EX& f, long n); +GF2EX BuildIrred_GF2EX(long n); + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(GF2EX& f, const GF2EX& g); +GF2EX BuildRandomIrred(const GF2EX& g); + +// g is a monic irreducible polynomial. Constructs a random monic +// irreducible polynomial f of the same degree. + +void FrobeniusMap(GF2EX& h, const GF2EXModulus& F); +GF2EX FrobeniusMap(const GF2EXModulus& F); + +// Computes h = X^{2^{GF2E::degree()}} mod F, +// by either iterated squaring or modular +// composition. The latter method is based on a technique developed +// in Kaltofen & Shoup (Faster polynomial factorization over high +// algebraic extensions of finite fields, ISSAC 1997). This method is +// faster than iterated squaring when deg(F) is large relative to +// GF2E::degree(). + + +long IterComputeDegree(const GF2EX& h, const GF2EXModulus& F); + +// f is assumed to be an "equal degree" polynomial, and h = +// X^{2^{GF2E::degree()}} mod f (see function FrobeniusMap above) +// The common degree of the irreducible factors +// of f is computed. Uses a "baby step/giant step" algorithm, similar +// to NewDDF. Although asymptotocally slower than RecComputeDegree +// (below), it is faster for reasonably sized inputs. + +long RecComputeDegree(const GF2EX& h, const GF2EXModulus& F); + +// f is assumed to be an "equal degree" polynomial, h = X^{2^{GF2E::degree()}} +// mod f (see function FrobeniusMap above). +// The common degree of the irreducible factors of f is +// computed. Uses a recursive algorithm similar to DetIrredTest. + +void TraceMap(GF2EX& w, const GF2EX& a, long d, const GF2EXModulus& F, + const GF2EX& h); + +GF2EX TraceMap(const GF2EX& a, long d, const GF2EXModulus& F, + const GF2EX& h); + +// Computes w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0, +// and h = X^q mod f, q a power of 2^{GF2E::degree()}. This routine +// implements an algorithm from [von zur Gathen and Shoup, +// Computational Complexity 2:187-224, 1992]. +// If q = 2^{GF2E::degree()}, then h can be computed most efficiently +// by using the function FrobeniusMap above. + +void PowerCompose(GF2EX& w, const GF2EX& h, long d, const GF2EXModulus& F); + +GF2EX PowerCompose(const GF2EX& h, long d, const GF2EXModulus& F); + +// Computes w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q +// mod f, q a power of 2^{GF2E::degree()}. This routine implements an +// algorithm from [von zur Gathen and Shoup, Computational Complexity +// 2:187-224, 1992]. +// If q = 2^{GF2E::degree()}, then h can be computed most efficiently +// by using the function FrobeniusMap above. + diff --git a/thirdparty/linux/ntl/doc/GF2X.cpp.html b/thirdparty/linux/ntl/doc/GF2X.cpp.html new file mode 100644 index 0000000000..90a29d8255 --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2X.cpp.html @@ -0,0 +1,825 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/GF2X.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: GF2X
+
+SUMMARY:
+
+The class GF2X implements polynomial arithmetic modulo 2.
+
+Polynomial arithmetic is implemented using a combination of classical
+routines and Karatsuba.
+
+\**************************************************************************/
+
+#include <NTL/GF2.h>
+#include <NTL/vec_GF2.h>
+
+class GF2X {
+public:
+
+   GF2X(); // initial value 0
+
+   GF2X(const GF2X& a); // copy
+   explicit GF2X(long a); // promotion
+   explicit GF2X(GF2 a); // promotion
+
+   GF2X& operator=(const GF2X& a); // assignment
+   GF2X& operator=(GF2 a);
+   GF2X& operator=(long a);
+
+   ~GF2X(); // destructor
+
+   GF2X(INIT_MONO_TYPE, long i, GF2 c);
+   GF2X(INIT_MONO_TYPE, long i, long c);
+   // initialize to c*X^i, invoke as GF2X(INIT_MONO, i, c)
+
+   GF2X(INIT_MONO_TYPE, long i);
+   // initialize to c*X^i, invoke as GF2X(INIT_MONO, i)
+
+   // typedefs to aid in generic programming
+   typedef GF2 coeff_type;
+   typedef GF2E residue_type;
+   typedef GF2XModulus modulus_type;
+
+
+   // ...
+
+};
+
+
+
+/**************************************************************************\
+
+                              Accessing coefficients
+
+The degree of a polynomial f is obtained as deg(f),
+where the zero polynomial, by definition, has degree -1.
+
+A polynomial f is represented as a coefficient vector.
+Coefficients may be accesses in one of two ways.
+
+The safe, high-level method is to call the function
+coeff(f, i) to get the coefficient of X^i in the polynomial f,
+and to call the function SetCoeff(f, i, a) to set the coefficient
+of X^i in f to the scalar a.
+
+One can also access the coefficients more directly via a lower level
+interface.  The coefficient of X^i in f may be accessed using
+subscript notation f[i].  In addition, one may write f.SetLength(n)
+to set the length of the underlying coefficient vector to n,
+and f.SetMaxLength(n) to allocate space for n coefficients,
+without changing the coefficient vector itself.
+
+After setting coefficients using this low-level interface,
+one must ensure that leading zeros in the coefficient vector
+are stripped afterwards by calling the function f.normalize().
+
+NOTE: unlike other polynomial classes, the coefficient vector
+for GF2X has a special representation, packing coefficients into
+words.  This has two consequences.  First, when using the indexing
+notation on a non-const polynomial f, the return type is ref_GF2,
+rather than GF2&.  For the most part, a ref_GF2 may be used like
+a GF2& --- see GF2.txt for more details.  Second, when applying
+f.SetLength(n) to a polynomial f, this essentially has the effect
+of zeroing out the coefficients of X^i for i >= n.
+
+\**************************************************************************/
+
+long deg(const GF2X& a);  // return deg(a); deg(0) == -1.
+
+const GF2 coeff(const GF2X& a, long i);
+// returns the coefficient of X^i, or zero if i not in range
+
+const GF2 LeadCoeff(const GF2X& a);
+// returns leading term of a, or zero if a == 0
+
+const GF2 ConstTerm(const GF2X& a);
+// returns constant term of a, or zero if a == 0
+
+void SetCoeff(GF2X& x, long i, GF2 a);
+void SetCoeff(GF2X& x, long i, long a);
+// makes coefficient of X^i equal to a; error is raised if i < 0
+
+void SetCoeff(GF2X& x, long i);
+// makes coefficient of X^i equal to 1;  error is raised if i < 0
+
+void SetX(GF2X& x); // x is set to the monomial X
+
+long IsX(const GF2X& a); // test if x = X
+
+
+
+
+ref_GF2 GF2X::operator[](long i);
+const GF2 GF2X::operator[](long i) const;
+// indexing operators: f[i] is the coefficient of X^i ---
+// i should satsify i >= 0 and i <= deg(f)
+
+void GF2X::SetLength(long n);
+// f.SetLength(n) sets the length of the inderlying coefficient
+// vector to n --- after this call, indexing f[i] for i = 0..n-1
+// is valid.
+
+void GF2X::normalize();  
+// f.normalize() strips leading zeros from coefficient vector of f
+
+void GF2X::SetMaxLength(long n);
+// f.SetMaxLength(n) pre-allocate spaces for n coefficients.  The
+// polynomial that f represents is unchanged.
+
+
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(const GF2X& a, const GF2X& b);
+long operator!=(const GF2X& a, const GF2X& b);
+
+long IsZero(const GF2X& a); // test for 0
+long IsOne(const GF2X& a); // test for 1
+
+// PROMOTIONS: operators ==, != promote {long, GF2} to GF2X on (a, b)
+
+
+/**************************************************************************\
+
+                                   Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2X operator+(const GF2X& a, const GF2X& b);
+GF2X operator-(const GF2X& a, const GF2X& b);
+
+GF2X operator-(const GF2X& a); // unary -
+
+GF2X& operator+=(GF2X& x, const GF2X& a);
+GF2X& operator+=(GF2X& x, GF2 a);
+GF2X& operator+=(GF2X& x, long a);
+
+GF2X& operator-=(GF2X& x, const GF2X& a);
+GF2X& operator-=(GF2X& x, GF2 a);
+GF2X& operator-=(GF2X& x, long a);
+
+GF2X& operator++(GF2X& x);  // prefix
+void operator++(GF2X& x, int);  // postfix
+
+GF2X& operator--(GF2X& x);  // prefix
+void operator--(GF2X& x, int);  // postfix
+
+// procedural versions:
+
+
+void add(GF2X& x, const GF2X& a, const GF2X& b); // x = a + b
+void sub(GF2X& x, const GF2X& a, const GF2X& b); // x = a - b
+void negate(GF2X& x, const GF2X& a); // x = -a
+
+// PROMOTIONS: binary +, - and procedures add, sub promote {long, GF2}
+// to GF2X on (a, b).
+
+
+/**************************************************************************\
+
+                               Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2X operator*(const GF2X& a, const GF2X& b);
+
+GF2X& operator*=(GF2X& x, const GF2X& a);
+GF2X& operator*=(GF2X& x, GF2 a);
+GF2X& operator*=(GF2X& x, long a);
+
+// procedural versions:
+
+void mul(GF2X& x, const GF2X& a, const GF2X& b); // x = a * b
+
+void sqr(GF2X& x, const GF2X& a); // x = a^2
+GF2X sqr(const GF2X& a);
+
+// PROMOTIONS: operator * and procedure mul promote {long, GF2} to GF2X
+// on (a, b).
+
+
+/**************************************************************************\
+
+                               Shift Operations
+
+LeftShift by n means multiplication by X^n
+RightShift by n means division by X^n
+
+A negative shift amount reverses the direction of the shift.
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2X operator<<(const GF2X& a, long n);
+GF2X operator>>(const GF2X& a, long n);
+
+GF2X& operator<<=(GF2X& x, long n);
+GF2X& operator>>=(GF2X& x, long n);
+
+// procedural versions:
+
+void LeftShift(GF2X& x, const GF2X& a, long n);
+GF2X LeftShift(const GF2X& a, long n);
+
+void RightShift(GF2X& x, const GF2X& a, long n);
+GF2X RightShift(const GF2X& a, long n);
+
+void MulByX(GF2X& x, const GF2X& a);
+GF2X MulByX(const GF2X& a);
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+// operator notation:
+
+GF2X operator/(const GF2X& a, const GF2X& b);
+GF2X operator%(const GF2X& a, const GF2X& b);
+
+GF2X& operator/=(GF2X& x, const GF2X& a);
+GF2X& operator/=(GF2X& x, GF2 a);
+GF2X& operator/=(GF2X& x, long a);
+
+GF2X& operator%=(GF2X& x, const GF2X& b);
+
+
+// procedural versions:
+
+
+void DivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2X& b);
+// q = a/b, r = a%b
+
+void div(GF2X& q, const GF2X& a, const GF2X& b);
+// q = a/b
+
+void rem(GF2X& r, const GF2X& a, const GF2X& b);
+// r = a%b
+
+long divide(GF2X& q, const GF2X& a, const GF2X& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+long divide(const GF2X& a, const GF2X& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+// PROMOTIONS: operator / and procedure div promote {long, GF2} to GF2X
+// on (a, b).
+
+
+/**************************************************************************\
+
+                                   GCD's
+
+\**************************************************************************/
+
+
+void GCD(GF2X& x, const GF2X& a, const GF2X& b);
+GF2X GCD(const GF2X& a, const GF2X& b);
+// x = GCD(a, b) (zero if a==b==0).
+
+
+void XGCD(GF2X& d, GF2X& s, GF2X& t, const GF2X& a, const GF2X& b);
+// d = gcd(a,b), a s + b t = d
+
+
+/**************************************************************************\
+
+                                  Input/Output
+
+I/O format:
+
+   [a_0 a_1 ... a_n],
+
+represents the polynomial a_0 + a_1*X + ... + a_n*X^n.
+
+On output, all coefficients will be 0 or 1, and
+a_n not zero (the zero polynomial is [ ]).  On input, the coefficients
+may be arbitrary integers which are reduced modulo 2, and leading zeros
+stripped.
+
+There is also a more compact hex I/O format.  To output in this
+format, set GF2X::HexOutput to a nonzero value.  On input, if the first
+non-blank character read is 'x' or 'X', then a hex format is assumed.
+
+
+\**************************************************************************/
+
+istream& operator>>(istream& s, GF2X& x);
+ostream& operator<<(ostream& s, const GF2X& a);
+
+
+/**************************************************************************\
+
+                              Some utility routines
+
+\**************************************************************************/
+
+
+void diff(GF2X& x, const GF2X& a);
+GF2X diff(const GF2X& a);
+// x = derivative of a
+
+
+void reverse(GF2X& x, const GF2X& a, long hi);
+GF2X reverse(const GF2X& a, long hi);
+
+void reverse(GF2X& x, const GF2X& a);
+GF2X reverse(const GF2X& a);
+
+// x = reverse of a[0]..a[hi] (hi >= -1);
+// hi defaults to deg(a) in second version
+
+
+void VectorCopy(vec_GF2& x, const GF2X& a, long n);
+vec_GF2 VectorCopy(const GF2X& a, long n);
+// x = copy of coefficient vector of a of length exactly n.
+// input is truncated or padded with zeroes as appropriate.
+
+// Note that there is also a conversion routine from GF2X to vec_GF2
+// that makes the length of the vector match the number of coefficients
+// of the polynomial.
+
+long weight(const GF2X& a);
+// returns the # of nonzero coefficients in a
+
+void GF2XFromBytes(GF2X& x, const unsigned char *p, long n);
+GF2X GF2XFromBytes(const unsigned char *p, long n);
+// conversion from byte vector to polynomial.
+// x = sum(p[i]*X^(8*i), i = 0..n-1), where the bits of p[i] are interpretted
+// as a polynomial in the natural way (i.e., p[i] = 1 is interpretted as 1,
+// p[i] = 2 is interpretted as X, p[i] = 3 is interpretted as X+1, etc.).
+// In the unusual event that characters are wider than 8 bits,
+// only the low-order 8 bits of p[i] are used.
+
+void BytesFromGF2X(unsigned char *p, const GF2X& a, long n);
+// conversion from polynomial to byte vector.
+// p[0..n-1] are computed so that
+//     a = sum(p[i]*X^(8*i), i = 0..n-1) mod X^(8*n),
+// where the values p[i] are interpretted as polynomials as in GF2XFromBytes
+// above.
+
+long NumBits(const GF2X& a);
+// returns number of bits of a, i.e., deg(a) + 1.
+
+long NumBytes(const GF2X& a);
+// returns number of bytes of a, i.e., floor((NumBits(a)+7)/8)
+
+
+
+
+/**************************************************************************\
+
+                             Random Polynomials
+
+\**************************************************************************/
+
+void random(GF2X& x, long n);
+GF2X random_GF2X(long n);
+// x = random polynomial of degree < n
+
+
+
+/**************************************************************************\
+
+                       Arithmetic mod X^n
+
+Required: n >= 0; otherwise, an error is raised.
+
+\**************************************************************************/
+
+void trunc(GF2X& x, const GF2X& a, long n); // x = a % X^n
+GF2X trunc(const GF2X& a, long n);
+
+void MulTrunc(GF2X& x, const GF2X& a, const GF2X& b, long n);
+GF2X MulTrunc(const GF2X& a, const GF2X& b, long n);
+// x = a * b % X^n
+
+void SqrTrunc(GF2X& x, const GF2X& a, long n);
+GF2X SqrTrunc(const GF2X& a, long n);
+// x = a^2 % X^n
+
+void InvTrunc(GF2X& x, const GF2X& a, long n);
+GF2X InvTrunc(const GF2X& a, long n);
+// computes x = a^{-1} % X^n.  Must have ConstTerm(a) invertible.
+
+/**************************************************************************\
+
+                Modular Arithmetic (without pre-conditioning)
+
+Arithmetic mod f.
+
+All inputs and outputs are polynomials of degree less than deg(f), and
+deg(f) > 0.
+
+NOTE: if you want to do many computations with a fixed f, use the
+GF2XModulus data structure and associated routines below for better
+performance.
+
+\**************************************************************************/
+
+void MulMod(GF2X& x, const GF2X& a, const GF2X& b, const GF2X& f);
+GF2X MulMod(const GF2X& a, const GF2X& b, const GF2X& f);
+// x = (a * b) % f
+
+void SqrMod(GF2X& x, const GF2X& a, const GF2X& f);
+GF2X SqrMod(const GF2X& a, const GF2X& f);
+// x = a^2 % f
+
+void MulByXMod(GF2X& x, const GF2X& a, const GF2X& f);
+GF2X MulByXMod(const GF2X& a, const GF2X& f);
+// x = (a * X) mod f
+
+void InvMod(GF2X& x, const GF2X& a, const GF2X& f);
+GF2X InvMod(const GF2X& a, const GF2X& f);
+// x = a^{-1} % f, error is a is not invertible
+
+long InvModStatus(GF2X& x, const GF2X& a, const GF2X& f);
+// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise,
+// returns 1 and sets x = (a, f)
+
+
+// for modular exponentiation, see below
+
+
+
+/**************************************************************************\
+
+                     Modular Arithmetic with Pre-Conditioning
+
+If you need to do a lot of arithmetic modulo a fixed f, build
+GF2XModulus F for f.  This pre-computes information about f that
+speeds up subsequent computations.
+
+As an example, the following routine computes the product modulo f of a vector
+of polynomials.
+
+#include <NTL/GF2X.h>
+
+void product(GF2X& x, const vec_GF2X& v, const GF2X& f)
+{
+   GF2XModulus F(f);
+   GF2X res;
+   res = 1;
+   long i;
+   for (i = 0; i < v.length(); i++)
+      MulMod(res, res, v[i], F);
+   x = res;
+}
+
+
+Note that automatic conversions are provided so that a GF2X can
+be used wherever a GF2XModulus is required, and a GF2XModulus
+can be used wherever a GF2X is required.
+
+The GF2XModulus routines optimize several important special cases:
+
+  - f = X^n + X^k + 1, where k <= min((n+1)/2, n-NTL_BITS_PER_LONG)
+
+  - f = X^n + X^{k_3} + X^{k_2} + X^{k_1} + 1,
+      where k_3 <= min((n+1)/2, n-NTL_BITS_PER_LONG)
+
+  - f = X^n + g, where deg(g) is small
+
+
+\**************************************************************************/
+
+class GF2XModulus {
+public:
+   GF2XModulus(); // initially in an unusable state
+   ~GF2XModulus();
+
+   GF2XModulus(const GF2XModulus&);  // copy
+
+   GF2XModulus& operator=(const GF2XModulus&);   // assignment
+
+   GF2XModulus(const GF2X& f); // initialize with f, deg(f) > 0
+
+   operator const GF2X& () const;
+   // read-only access to f, implicit conversion operator
+
+   const GF2X& val() const;
+   // read-only access to f, explicit notation
+
+   long WordLength() const;
+   // returns word-length of resisues
+};
+
+void build(GF2XModulus& F, const GF2X& f);
+// pre-computes information about f and stores it in F; deg(f) > 0.
+// Note that the declaration GF2XModulus F(f) is equivalent to
+// GF2XModulus F; build(F, f).
+
+// In the following, f refers to the polynomial f supplied to the
+// build routine, and n = deg(f).
+
+long deg(const GF2XModulus& F);  // return deg(f)
+
+void MulMod(GF2X& x, const GF2X& a, const GF2X& b, const GF2XModulus& F);
+GF2X MulMod(const GF2X& a, const GF2X& b, const GF2XModulus& F);
+// x = (a * b) % f; deg(a), deg(b) < n
+
+void SqrMod(GF2X& x, const GF2X& a, const GF2XModulus& F);
+GF2X SqrMod(const GF2X& a, const GF2XModulus& F);
+// x = a^2 % f; deg(a) < n
+
+void MulByXMod(GF2X& x, const GF2X& a, const GF2XModulus& F);
+GF2X MulByXMod(const GF2X& a, const GF2XModulus& F);
+// x = (a * X) mod F
+
+void PowerMod(GF2X& x, const GF2X& a, const ZZ& e, const GF2XModulus& F);
+GF2X PowerMod(const GF2X& a, const ZZ& e, const GF2XModulus& F);
+
+void PowerMod(GF2X& x, const GF2X& a, long e, const GF2XModulus& F);
+GF2X PowerMod(const GF2X& a, long e, const GF2XModulus& F);
+
+// x = a^e % f; deg(a) < n (e may be negative)
+
+void PowerXMod(GF2X& x, const ZZ& e, const GF2XModulus& F);
+GF2X PowerXMod(const ZZ& e, const GF2XModulus& F);
+
+void PowerXMod(GF2X& x, long e, const GF2XModulus& F);
+GF2X PowerXMod(long e, const GF2XModulus& F);
+
+// x = X^e % f (e may be negative)
+
+
+void rem(GF2X& x, const GF2X& a, const GF2XModulus& F);
+// x = a % f
+
+void DivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2XModulus& F);
+// q = a/f, r = a%f
+
+void div(GF2X& q, const GF2X& a, const GF2XModulus& F);
+// q = a/f
+
+// operator notation:
+
+GF2X operator/(const GF2X& a, const GF2XModulus& F);
+GF2X operator%(const GF2X& a, const GF2XModulus& F);
+
+GF2X& operator/=(GF2X& x, const GF2XModulus& F);
+GF2X& operator%=(GF2X& x, const GF2XModulus& F);
+
+
+/**************************************************************************\
+
+                             vectors of GF2X's
+
+\**************************************************************************/
+
+
+typedef Vec<GF2X> vec_GF2X; // backward compatibility
+
+
+/**************************************************************************\
+
+                              Modular Composition
+
+Modular composition is the problem of computing g(h) mod f for
+polynomials f, g, and h.
+
+The algorithm employed is that of Brent & Kung (Fast algorithms for
+manipulating formal power series, JACM 25:581-595, 1978), which uses
+O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar
+operations.
+
+
+
+\**************************************************************************/
+
+void CompMod(GF2X& x, const GF2X& g, const GF2X& h, const GF2XModulus& F);
+GF2X CompMod(const GF2X& g, const GF2X& h, const GF2XModulus& F);
+// x = g(h) mod f; deg(h) < n
+
+void Comp2Mod(GF2X& x1, GF2X& x2, const GF2X& g1, const GF2X& g2,
+              const GF2X& h, const GF2XModulus& F);
+// xi = gi(h) mod f (i=1,2), deg(h) < n.
+
+void CompMod3(GF2X& x1, GF2X& x2, GF2X& x3,
+              const GF2X& g1, const GF2X& g2, const GF2X& g3,
+              const GF2X& h, const GF2XModulus& F);
+// xi = gi(h) mod f (i=1..3), deg(h) < n
+
+
+/**************************************************************************\
+
+                     Composition with Pre-Conditioning
+
+If a single h is going to be used with many g's then you should build
+a GF2XArgument for h, and then use the compose routine below.  The
+routine build computes and stores h, h^2, ..., h^m mod f.  After this
+pre-computation, composing a polynomial of degree roughly n with h
+takes n/m multiplies mod f, plus n^2 scalar multiplies.  Thus,
+increasing m increases the space requirement and the pre-computation
+time, but reduces the composition time.
+
+\**************************************************************************/
+
+
+struct GF2XArgument {
+   vec_GF2X H;
+};
+
+void build(GF2XArgument& H, const GF2X& h, const GF2XModulus& F, long m);
+// Pre-Computes information about h.  m > 0, deg(h) < n
+
+void CompMod(GF2X& x, const GF2X& g, const GF2XArgument& H,
+             const GF2XModulus& F);
+
+GF2X CompMod(const GF2X& g, const GF2XArgument& H,
+             const GF2XModulus& F);
+
+
+extern long GF2XArgBound;
+
+// Initially 0.  If this is set to a value greater than zero, then
+// composition routines will allocate a table of no than about
+// GF2XArgBound KB.  Setting this value affects all compose routines
+// and the power projection and minimal polynomial routines below,
+// and indirectly affects many routines in GF2XFactoring.
+
+/**************************************************************************\
+
+                     Power Projection routines
+
+\**************************************************************************/
+
+void project(GF2& x, const vec_GF2& a, const GF2X& b);
+GF2 project(const vec_GF2& a, const GF2X& b);
+// x = inner product of a with coefficient vector of b
+
+
+void ProjectPowers(vec_GF2& x, const vec_GF2& a, long k,
+                   const GF2X& h, const GF2XModulus& F);
+
+vec_GF2 ProjectPowers(const vec_GF2& a, long k,
+                   const GF2X& h, const GF2XModulus& F);
+
+// Computes the vector
+
+//   (project(a, 1), project(a, h), ..., project(a, h^{k-1} % f).  
+
+// Restriction: must have a.length <= deg(F) and deg(h) < deg(F).
+// This operation is really the "transpose" of the modular composition
+// operation.
+
+void ProjectPowers(vec_GF2& x, const vec_GF2& a, long k,
+                   const GF2XArgument& H, const GF2XModulus& F);
+
+vec_GF2 ProjectPowers(const vec_GF2& a, long k,
+                   const GF2XArgument& H, const GF2XModulus& F);
+
+// same as above, but uses a pre-computed GF2XArgument
+
+
+// lower-level routines for transposed modular multiplication:
+
+class GF2XTransMultiplier { /* ... */ };
+
+void build(GF2XTransMultiplier& B, const GF2X& b, const GF2XModulus& F);
+
+// build a GF2XTransMultiplier to use in the following routine:
+
+void UpdateMap(vec_GF2& x, const vec_GF2& a, const GF2XTransMultiplier& B,
+         const GF2XModulus& F);
+
+vec_GF2 UpdateMap(const vec_GF2& a, const GF2XTransMultiplier& B,
+         const GF2XModulus& F);
+
+// Computes the vector
+
+//   project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f)
+
+// Restriction: must have a.length() <= deg(F) and deg(b) < deg(F).
+// This is really the transpose of modular multiplication.
+// Input may have "high order" zeroes stripped.
+// Output always has high order zeroes stripped.
+
+
+/**************************************************************************\
+
+                              Minimum Polynomials
+
+All of these routines implement the algorithm from [Shoup, J. Symbolic
+Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397,
+1995], based on transposed modular composition and the
+Berlekamp/Massey algorithm.
+
+\**************************************************************************/
+
+
+void MinPolySeq(GF2X& h, const vec_GF2& a, long m);
+// computes the minimum polynomial of a linealy generated sequence; m
+// is a bound on the degree of the polynomial; required: a.length() >=
+// 2*m
+
+void ProbMinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m);
+GF2X ProbMinPolyMod(const GF2X& g, const GF2XModulus& F, long m);
+
+void ProbMinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F);
+GF2X ProbMinPolyMod(const GF2X& g, const GF2XModulus& F);
+
+// computes the monic minimal polynomial if (g mod f).  m = a bound on
+// the degree of the minimal polynomial; in the second version, this
+// argument defaults to n.  The algorithm is probabilistic; it always
+// returns a divisor of the minimal polynomial, possibly a proper divisor.
+
+void MinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m);
+GF2X MinPolyMod(const GF2X& g, const GF2XModulus& F, long m);
+
+void MinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F);
+GF2X MinPolyMod(const GF2X& g, const GF2XModulus& F);
+
+// same as above, but guarantees that result is correct
+
+void IrredPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m);
+GF2X IrredPolyMod(const GF2X& g, const GF2XModulus& F, long m);
+
+void IrredPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F);
+GF2X IrredPolyMod(const GF2X& g, const GF2XModulus& F);
+
+// same as above, but assumes that F is irreducible, or at least that
+// the minimal poly of g is itself irreducible.  The algorithm is
+// deterministic (and is always correct).
+
+
+/**************************************************************************\
+
+                                Traces
+
+\**************************************************************************/
+
+
+void TraceMod(GF2& x, const GF2X& a, const GF2XModulus& F);
+GF2 TraceMod(const GF2X& a, const GF2XModulus& F);
+
+void TraceMod(GF2& x, const GF2X& a, const GF2X& f);
+GF2 TraceMod(const GF2X& a, const GF2X& f);
+// x = Trace(a mod f); deg(a) < deg(f)
+
+
+void TraceVec(vec_GF2& S, const GF2X& f);
+vec_GF2 TraceVec(const GF2X& f);
+// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f)
+
+// The above routines implement the asymptotically fast trace
+// algorithm from [von zur Gathen and Shoup, Computational Complexity,
+// 1992].
+
+
+/**************************************************************************\
+
+                           Miscellany
+
+\**************************************************************************/
+
+
+void clear(GF2X& x) // x = 0
+void set(GF2X& x); // x = 1
+
+
+void GF2X::kill();
+// f.kill() sets f to 0 and frees all memory held by f.  
+
+GF2X::GF2X(INIT_SIZE_TYPE, long n);
+// GF2X(INIT_SIZE, n) initializes to zero, but space is pre-allocated
+// for n coefficients
+
+static const GF2X& zero();
+// GF2X::zero() is a read-only reference to 0
+
+void GF2X::swap(GF2X& x);
+void swap(GF2X& x, GF2X& y);
+// swap (via "pointer swapping" -- if possible)
+
+GF2X::GF2X(long i, GF2 c);
+GF2X::GF2X(long i, long c);
+// initialize to c*X^i, provided for backward compatibility
+
+// SIZE INVARIANT: for any f in GF2X, deg(f)+1 < 2^(NTL_BITS_PER_LONG-4).
+
+ diff --git a/thirdparty/linux/ntl/doc/GF2X.txt b/thirdparty/linux/ntl/doc/GF2X.txt new file mode 100644 index 0000000000..34daf2e262 --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2X.txt @@ -0,0 +1,815 @@ + +/**************************************************************************\ + +MODULE: GF2X + +SUMMARY: + +The class GF2X implements polynomial arithmetic modulo 2. + +Polynomial arithmetic is implemented using a combination of classical +routines and Karatsuba. + +\**************************************************************************/ + +#include +#include + +class GF2X { +public: + + GF2X(); // initial value 0 + + GF2X(const GF2X& a); // copy + explicit GF2X(long a); // promotion + explicit GF2X(GF2 a); // promotion + + GF2X& operator=(const GF2X& a); // assignment + GF2X& operator=(GF2 a); + GF2X& operator=(long a); + + ~GF2X(); // destructor + + GF2X(INIT_MONO_TYPE, long i, GF2 c); + GF2X(INIT_MONO_TYPE, long i, long c); + // initialize to c*X^i, invoke as GF2X(INIT_MONO, i, c) + + GF2X(INIT_MONO_TYPE, long i); + // initialize to c*X^i, invoke as GF2X(INIT_MONO, i) + + // typedefs to aid in generic programming + typedef GF2 coeff_type; + typedef GF2E residue_type; + typedef GF2XModulus modulus_type; + + + // ... + +}; + + + +/**************************************************************************\ + + Accessing coefficients + +The degree of a polynomial f is obtained as deg(f), +where the zero polynomial, by definition, has degree -1. + +A polynomial f is represented as a coefficient vector. +Coefficients may be accesses in one of two ways. + +The safe, high-level method is to call the function +coeff(f, i) to get the coefficient of X^i in the polynomial f, +and to call the function SetCoeff(f, i, a) to set the coefficient +of X^i in f to the scalar a. + +One can also access the coefficients more directly via a lower level +interface. The coefficient of X^i in f may be accessed using +subscript notation f[i]. In addition, one may write f.SetLength(n) +to set the length of the underlying coefficient vector to n, +and f.SetMaxLength(n) to allocate space for n coefficients, +without changing the coefficient vector itself. + +After setting coefficients using this low-level interface, +one must ensure that leading zeros in the coefficient vector +are stripped afterwards by calling the function f.normalize(). + +NOTE: unlike other polynomial classes, the coefficient vector +for GF2X has a special representation, packing coefficients into +words. This has two consequences. First, when using the indexing +notation on a non-const polynomial f, the return type is ref_GF2, +rather than GF2&. For the most part, a ref_GF2 may be used like +a GF2& --- see GF2.txt for more details. Second, when applying +f.SetLength(n) to a polynomial f, this essentially has the effect +of zeroing out the coefficients of X^i for i >= n. + +\**************************************************************************/ + +long deg(const GF2X& a); // return deg(a); deg(0) == -1. + +const GF2 coeff(const GF2X& a, long i); +// returns the coefficient of X^i, or zero if i not in range + +const GF2 LeadCoeff(const GF2X& a); +// returns leading term of a, or zero if a == 0 + +const GF2 ConstTerm(const GF2X& a); +// returns constant term of a, or zero if a == 0 + +void SetCoeff(GF2X& x, long i, GF2 a); +void SetCoeff(GF2X& x, long i, long a); +// makes coefficient of X^i equal to a; error is raised if i < 0 + +void SetCoeff(GF2X& x, long i); +// makes coefficient of X^i equal to 1; error is raised if i < 0 + +void SetX(GF2X& x); // x is set to the monomial X + +long IsX(const GF2X& a); // test if x = X + + + + +ref_GF2 GF2X::operator[](long i); +const GF2 GF2X::operator[](long i) const; +// indexing operators: f[i] is the coefficient of X^i --- +// i should satsify i >= 0 and i <= deg(f) + +void GF2X::SetLength(long n); +// f.SetLength(n) sets the length of the inderlying coefficient +// vector to n --- after this call, indexing f[i] for i = 0..n-1 +// is valid. + +void GF2X::normalize(); +// f.normalize() strips leading zeros from coefficient vector of f + +void GF2X::SetMaxLength(long n); +// f.SetMaxLength(n) pre-allocate spaces for n coefficients. The +// polynomial that f represents is unchanged. + + + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(const GF2X& a, const GF2X& b); +long operator!=(const GF2X& a, const GF2X& b); + +long IsZero(const GF2X& a); // test for 0 +long IsOne(const GF2X& a); // test for 1 + +// PROMOTIONS: operators ==, != promote {long, GF2} to GF2X on (a, b) + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +GF2X operator+(const GF2X& a, const GF2X& b); +GF2X operator-(const GF2X& a, const GF2X& b); + +GF2X operator-(const GF2X& a); // unary - + +GF2X& operator+=(GF2X& x, const GF2X& a); +GF2X& operator+=(GF2X& x, GF2 a); +GF2X& operator+=(GF2X& x, long a); + +GF2X& operator-=(GF2X& x, const GF2X& a); +GF2X& operator-=(GF2X& x, GF2 a); +GF2X& operator-=(GF2X& x, long a); + +GF2X& operator++(GF2X& x); // prefix +void operator++(GF2X& x, int); // postfix + +GF2X& operator--(GF2X& x); // prefix +void operator--(GF2X& x, int); // postfix + +// procedural versions: + + +void add(GF2X& x, const GF2X& a, const GF2X& b); // x = a + b +void sub(GF2X& x, const GF2X& a, const GF2X& b); // x = a - b +void negate(GF2X& x, const GF2X& a); // x = -a + +// PROMOTIONS: binary +, - and procedures add, sub promote {long, GF2} +// to GF2X on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +GF2X operator*(const GF2X& a, const GF2X& b); + +GF2X& operator*=(GF2X& x, const GF2X& a); +GF2X& operator*=(GF2X& x, GF2 a); +GF2X& operator*=(GF2X& x, long a); + +// procedural versions: + +void mul(GF2X& x, const GF2X& a, const GF2X& b); // x = a * b + +void sqr(GF2X& x, const GF2X& a); // x = a^2 +GF2X sqr(const GF2X& a); + +// PROMOTIONS: operator * and procedure mul promote {long, GF2} to GF2X +// on (a, b). + + +/**************************************************************************\ + + Shift Operations + +LeftShift by n means multiplication by X^n +RightShift by n means division by X^n + +A negative shift amount reverses the direction of the shift. + +\**************************************************************************/ + +// operator notation: + +GF2X operator<<(const GF2X& a, long n); +GF2X operator>>(const GF2X& a, long n); + +GF2X& operator<<=(GF2X& x, long n); +GF2X& operator>>=(GF2X& x, long n); + +// procedural versions: + +void LeftShift(GF2X& x, const GF2X& a, long n); +GF2X LeftShift(const GF2X& a, long n); + +void RightShift(GF2X& x, const GF2X& a, long n); +GF2X RightShift(const GF2X& a, long n); + +void MulByX(GF2X& x, const GF2X& a); +GF2X MulByX(const GF2X& a); + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + +// operator notation: + +GF2X operator/(const GF2X& a, const GF2X& b); +GF2X operator%(const GF2X& a, const GF2X& b); + +GF2X& operator/=(GF2X& x, const GF2X& a); +GF2X& operator/=(GF2X& x, GF2 a); +GF2X& operator/=(GF2X& x, long a); + +GF2X& operator%=(GF2X& x, const GF2X& b); + + +// procedural versions: + + +void DivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2X& b); +// q = a/b, r = a%b + +void div(GF2X& q, const GF2X& a, const GF2X& b); +// q = a/b + +void rem(GF2X& r, const GF2X& a, const GF2X& b); +// r = a%b + +long divide(GF2X& q, const GF2X& a, const GF2X& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const GF2X& a, const GF2X& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +// PROMOTIONS: operator / and procedure div promote {long, GF2} to GF2X +// on (a, b). + + +/**************************************************************************\ + + GCD's + +\**************************************************************************/ + + +void GCD(GF2X& x, const GF2X& a, const GF2X& b); +GF2X GCD(const GF2X& a, const GF2X& b); +// x = GCD(a, b) (zero if a==b==0). + + +void XGCD(GF2X& d, GF2X& s, GF2X& t, const GF2X& a, const GF2X& b); +// d = gcd(a,b), a s + b t = d + + +/**************************************************************************\ + + Input/Output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be 0 or 1, and +a_n not zero (the zero polynomial is [ ]). On input, the coefficients +may be arbitrary integers which are reduced modulo 2, and leading zeros +stripped. + +There is also a more compact hex I/O format. To output in this +format, set GF2X::HexOutput to a nonzero value. On input, if the first +non-blank character read is 'x' or 'X', then a hex format is assumed. + + +\**************************************************************************/ + +istream& operator>>(istream& s, GF2X& x); +ostream& operator<<(ostream& s, const GF2X& a); + + +/**************************************************************************\ + + Some utility routines + +\**************************************************************************/ + + +void diff(GF2X& x, const GF2X& a); +GF2X diff(const GF2X& a); +// x = derivative of a + + +void reverse(GF2X& x, const GF2X& a, long hi); +GF2X reverse(const GF2X& a, long hi); + +void reverse(GF2X& x, const GF2X& a); +GF2X reverse(const GF2X& a); + +// x = reverse of a[0]..a[hi] (hi >= -1); +// hi defaults to deg(a) in second version + + +void VectorCopy(vec_GF2& x, const GF2X& a, long n); +vec_GF2 VectorCopy(const GF2X& a, long n); +// x = copy of coefficient vector of a of length exactly n. +// input is truncated or padded with zeroes as appropriate. + +// Note that there is also a conversion routine from GF2X to vec_GF2 +// that makes the length of the vector match the number of coefficients +// of the polynomial. + +long weight(const GF2X& a); +// returns the # of nonzero coefficients in a + +void GF2XFromBytes(GF2X& x, const unsigned char *p, long n); +GF2X GF2XFromBytes(const unsigned char *p, long n); +// conversion from byte vector to polynomial. +// x = sum(p[i]*X^(8*i), i = 0..n-1), where the bits of p[i] are interpretted +// as a polynomial in the natural way (i.e., p[i] = 1 is interpretted as 1, +// p[i] = 2 is interpretted as X, p[i] = 3 is interpretted as X+1, etc.). +// In the unusual event that characters are wider than 8 bits, +// only the low-order 8 bits of p[i] are used. + +void BytesFromGF2X(unsigned char *p, const GF2X& a, long n); +// conversion from polynomial to byte vector. +// p[0..n-1] are computed so that +// a = sum(p[i]*X^(8*i), i = 0..n-1) mod X^(8*n), +// where the values p[i] are interpretted as polynomials as in GF2XFromBytes +// above. + +long NumBits(const GF2X& a); +// returns number of bits of a, i.e., deg(a) + 1. + +long NumBytes(const GF2X& a); +// returns number of bytes of a, i.e., floor((NumBits(a)+7)/8) + + + + +/**************************************************************************\ + + Random Polynomials + +\**************************************************************************/ + +void random(GF2X& x, long n); +GF2X random_GF2X(long n); +// x = random polynomial of degree < n + + + +/**************************************************************************\ + + Arithmetic mod X^n + +Required: n >= 0; otherwise, an error is raised. + +\**************************************************************************/ + +void trunc(GF2X& x, const GF2X& a, long n); // x = a % X^n +GF2X trunc(const GF2X& a, long n); + +void MulTrunc(GF2X& x, const GF2X& a, const GF2X& b, long n); +GF2X MulTrunc(const GF2X& a, const GF2X& b, long n); +// x = a * b % X^n + +void SqrTrunc(GF2X& x, const GF2X& a, long n); +GF2X SqrTrunc(const GF2X& a, long n); +// x = a^2 % X^n + +void InvTrunc(GF2X& x, const GF2X& a, long n); +GF2X InvTrunc(const GF2X& a, long n); +// computes x = a^{-1} % X^n. Must have ConstTerm(a) invertible. + +/**************************************************************************\ + + Modular Arithmetic (without pre-conditioning) + +Arithmetic mod f. + +All inputs and outputs are polynomials of degree less than deg(f), and +deg(f) > 0. + +NOTE: if you want to do many computations with a fixed f, use the +GF2XModulus data structure and associated routines below for better +performance. + +\**************************************************************************/ + +void MulMod(GF2X& x, const GF2X& a, const GF2X& b, const GF2X& f); +GF2X MulMod(const GF2X& a, const GF2X& b, const GF2X& f); +// x = (a * b) % f + +void SqrMod(GF2X& x, const GF2X& a, const GF2X& f); +GF2X SqrMod(const GF2X& a, const GF2X& f); +// x = a^2 % f + +void MulByXMod(GF2X& x, const GF2X& a, const GF2X& f); +GF2X MulByXMod(const GF2X& a, const GF2X& f); +// x = (a * X) mod f + +void InvMod(GF2X& x, const GF2X& a, const GF2X& f); +GF2X InvMod(const GF2X& a, const GF2X& f); +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(GF2X& x, const GF2X& a, const GF2X& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise, +// returns 1 and sets x = (a, f) + + +// for modular exponentiation, see below + + + +/**************************************************************************\ + + Modular Arithmetic with Pre-Conditioning + +If you need to do a lot of arithmetic modulo a fixed f, build +GF2XModulus F for f. This pre-computes information about f that +speeds up subsequent computations. + +As an example, the following routine computes the product modulo f of a vector +of polynomials. + +#include + +void product(GF2X& x, const vec_GF2X& v, const GF2X& f) +{ + GF2XModulus F(f); + GF2X res; + res = 1; + long i; + for (i = 0; i < v.length(); i++) + MulMod(res, res, v[i], F); + x = res; +} + + +Note that automatic conversions are provided so that a GF2X can +be used wherever a GF2XModulus is required, and a GF2XModulus +can be used wherever a GF2X is required. + +The GF2XModulus routines optimize several important special cases: + + - f = X^n + X^k + 1, where k <= min((n+1)/2, n-NTL_BITS_PER_LONG) + + - f = X^n + X^{k_3} + X^{k_2} + X^{k_1} + 1, + where k_3 <= min((n+1)/2, n-NTL_BITS_PER_LONG) + + - f = X^n + g, where deg(g) is small + + +\**************************************************************************/ + +class GF2XModulus { +public: + GF2XModulus(); // initially in an unusable state + ~GF2XModulus(); + + GF2XModulus(const GF2XModulus&); // copy + + GF2XModulus& operator=(const GF2XModulus&); // assignment + + GF2XModulus(const GF2X& f); // initialize with f, deg(f) > 0 + + operator const GF2X& () const; + // read-only access to f, implicit conversion operator + + const GF2X& val() const; + // read-only access to f, explicit notation + + long WordLength() const; + // returns word-length of resisues +}; + +void build(GF2XModulus& F, const GF2X& f); +// pre-computes information about f and stores it in F; deg(f) > 0. +// Note that the declaration GF2XModulus F(f) is equivalent to +// GF2XModulus F; build(F, f). + +// In the following, f refers to the polynomial f supplied to the +// build routine, and n = deg(f). + +long deg(const GF2XModulus& F); // return deg(f) + +void MulMod(GF2X& x, const GF2X& a, const GF2X& b, const GF2XModulus& F); +GF2X MulMod(const GF2X& a, const GF2X& b, const GF2XModulus& F); +// x = (a * b) % f; deg(a), deg(b) < n + +void SqrMod(GF2X& x, const GF2X& a, const GF2XModulus& F); +GF2X SqrMod(const GF2X& a, const GF2XModulus& F); +// x = a^2 % f; deg(a) < n + +void MulByXMod(GF2X& x, const GF2X& a, const GF2XModulus& F); +GF2X MulByXMod(const GF2X& a, const GF2XModulus& F); +// x = (a * X) mod F + +void PowerMod(GF2X& x, const GF2X& a, const ZZ& e, const GF2XModulus& F); +GF2X PowerMod(const GF2X& a, const ZZ& e, const GF2XModulus& F); + +void PowerMod(GF2X& x, const GF2X& a, long e, const GF2XModulus& F); +GF2X PowerMod(const GF2X& a, long e, const GF2XModulus& F); + +// x = a^e % f; deg(a) < n (e may be negative) + +void PowerXMod(GF2X& x, const ZZ& e, const GF2XModulus& F); +GF2X PowerXMod(const ZZ& e, const GF2XModulus& F); + +void PowerXMod(GF2X& x, long e, const GF2XModulus& F); +GF2X PowerXMod(long e, const GF2XModulus& F); + +// x = X^e % f (e may be negative) + + +void rem(GF2X& x, const GF2X& a, const GF2XModulus& F); +// x = a % f + +void DivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2XModulus& F); +// q = a/f, r = a%f + +void div(GF2X& q, const GF2X& a, const GF2XModulus& F); +// q = a/f + +// operator notation: + +GF2X operator/(const GF2X& a, const GF2XModulus& F); +GF2X operator%(const GF2X& a, const GF2XModulus& F); + +GF2X& operator/=(GF2X& x, const GF2XModulus& F); +GF2X& operator%=(GF2X& x, const GF2XModulus& F); + + +/**************************************************************************\ + + vectors of GF2X's + +\**************************************************************************/ + + +typedef Vec vec_GF2X; // backward compatibility + + +/**************************************************************************\ + + Modular Composition + +Modular composition is the problem of computing g(h) mod f for +polynomials f, g, and h. + +The algorithm employed is that of Brent & Kung (Fast algorithms for +manipulating formal power series, JACM 25:581-595, 1978), which uses +O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar +operations. + + + +\**************************************************************************/ + +void CompMod(GF2X& x, const GF2X& g, const GF2X& h, const GF2XModulus& F); +GF2X CompMod(const GF2X& g, const GF2X& h, const GF2XModulus& F); +// x = g(h) mod f; deg(h) < n + +void Comp2Mod(GF2X& x1, GF2X& x2, const GF2X& g1, const GF2X& g2, + const GF2X& h, const GF2XModulus& F); +// xi = gi(h) mod f (i=1,2), deg(h) < n. + +void CompMod3(GF2X& x1, GF2X& x2, GF2X& x3, + const GF2X& g1, const GF2X& g2, const GF2X& g3, + const GF2X& h, const GF2XModulus& F); +// xi = gi(h) mod f (i=1..3), deg(h) < n + + +/**************************************************************************\ + + Composition with Pre-Conditioning + +If a single h is going to be used with many g's then you should build +a GF2XArgument for h, and then use the compose routine below. The +routine build computes and stores h, h^2, ..., h^m mod f. After this +pre-computation, composing a polynomial of degree roughly n with h +takes n/m multiplies mod f, plus n^2 scalar multiplies. Thus, +increasing m increases the space requirement and the pre-computation +time, but reduces the composition time. + +\**************************************************************************/ + + +struct GF2XArgument { + vec_GF2X H; +}; + +void build(GF2XArgument& H, const GF2X& h, const GF2XModulus& F, long m); +// Pre-Computes information about h. m > 0, deg(h) < n + +void CompMod(GF2X& x, const GF2X& g, const GF2XArgument& H, + const GF2XModulus& F); + +GF2X CompMod(const GF2X& g, const GF2XArgument& H, + const GF2XModulus& F); + + +extern long GF2XArgBound; + +// Initially 0. If this is set to a value greater than zero, then +// composition routines will allocate a table of no than about +// GF2XArgBound KB. Setting this value affects all compose routines +// and the power projection and minimal polynomial routines below, +// and indirectly affects many routines in GF2XFactoring. + +/**************************************************************************\ + + Power Projection routines + +\**************************************************************************/ + +void project(GF2& x, const vec_GF2& a, const GF2X& b); +GF2 project(const vec_GF2& a, const GF2X& b); +// x = inner product of a with coefficient vector of b + + +void ProjectPowers(vec_GF2& x, const vec_GF2& a, long k, + const GF2X& h, const GF2XModulus& F); + +vec_GF2 ProjectPowers(const vec_GF2& a, long k, + const GF2X& h, const GF2XModulus& F); + +// Computes the vector + +// (project(a, 1), project(a, h), ..., project(a, h^{k-1} % f). + +// Restriction: must have a.length <= deg(F) and deg(h) < deg(F). +// This operation is really the "transpose" of the modular composition +// operation. + +void ProjectPowers(vec_GF2& x, const vec_GF2& a, long k, + const GF2XArgument& H, const GF2XModulus& F); + +vec_GF2 ProjectPowers(const vec_GF2& a, long k, + const GF2XArgument& H, const GF2XModulus& F); + +// same as above, but uses a pre-computed GF2XArgument + + +// lower-level routines for transposed modular multiplication: + +class GF2XTransMultiplier { /* ... */ }; + +void build(GF2XTransMultiplier& B, const GF2X& b, const GF2XModulus& F); + +// build a GF2XTransMultiplier to use in the following routine: + +void UpdateMap(vec_GF2& x, const vec_GF2& a, const GF2XTransMultiplier& B, + const GF2XModulus& F); + +vec_GF2 UpdateMap(const vec_GF2& a, const GF2XTransMultiplier& B, + const GF2XModulus& F); + +// Computes the vector + +// project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f) + +// Restriction: must have a.length() <= deg(F) and deg(b) < deg(F). +// This is really the transpose of modular multiplication. +// Input may have "high order" zeroes stripped. +// Output always has high order zeroes stripped. + + +/**************************************************************************\ + + Minimum Polynomials + +All of these routines implement the algorithm from [Shoup, J. Symbolic +Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397, +1995], based on transposed modular composition and the +Berlekamp/Massey algorithm. + +\**************************************************************************/ + + +void MinPolySeq(GF2X& h, const vec_GF2& a, long m); +// computes the minimum polynomial of a linealy generated sequence; m +// is a bound on the degree of the polynomial; required: a.length() >= +// 2*m + +void ProbMinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m); +GF2X ProbMinPolyMod(const GF2X& g, const GF2XModulus& F, long m); + +void ProbMinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F); +GF2X ProbMinPolyMod(const GF2X& g, const GF2XModulus& F); + +// computes the monic minimal polynomial if (g mod f). m = a bound on +// the degree of the minimal polynomial; in the second version, this +// argument defaults to n. The algorithm is probabilistic; it always +// returns a divisor of the minimal polynomial, possibly a proper divisor. + +void MinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m); +GF2X MinPolyMod(const GF2X& g, const GF2XModulus& F, long m); + +void MinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F); +GF2X MinPolyMod(const GF2X& g, const GF2XModulus& F); + +// same as above, but guarantees that result is correct + +void IrredPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m); +GF2X IrredPolyMod(const GF2X& g, const GF2XModulus& F, long m); + +void IrredPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F); +GF2X IrredPolyMod(const GF2X& g, const GF2XModulus& F); + +// same as above, but assumes that F is irreducible, or at least that +// the minimal poly of g is itself irreducible. The algorithm is +// deterministic (and is always correct). + + +/**************************************************************************\ + + Traces + +\**************************************************************************/ + + +void TraceMod(GF2& x, const GF2X& a, const GF2XModulus& F); +GF2 TraceMod(const GF2X& a, const GF2XModulus& F); + +void TraceMod(GF2& x, const GF2X& a, const GF2X& f); +GF2 TraceMod(const GF2X& a, const GF2X& f); +// x = Trace(a mod f); deg(a) < deg(f) + + +void TraceVec(vec_GF2& S, const GF2X& f); +vec_GF2 TraceVec(const GF2X& f); +// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f) + +// The above routines implement the asymptotically fast trace +// algorithm from [von zur Gathen and Shoup, Computational Complexity, +// 1992]. + + +/**************************************************************************\ + + Miscellany + +\**************************************************************************/ + + +void clear(GF2X& x) // x = 0 +void set(GF2X& x); // x = 1 + + +void GF2X::kill(); +// f.kill() sets f to 0 and frees all memory held by f. + +GF2X::GF2X(INIT_SIZE_TYPE, long n); +// GF2X(INIT_SIZE, n) initializes to zero, but space is pre-allocated +// for n coefficients + +static const GF2X& zero(); +// GF2X::zero() is a read-only reference to 0 + +void GF2X::swap(GF2X& x); +void swap(GF2X& x, GF2X& y); +// swap (via "pointer swapping" -- if possible) + +GF2X::GF2X(long i, GF2 c); +GF2X::GF2X(long i, long c); +// initialize to c*X^i, provided for backward compatibility + +// SIZE INVARIANT: for any f in GF2X, deg(f)+1 < 2^(NTL_BITS_PER_LONG-4). diff --git a/thirdparty/linux/ntl/doc/GF2XFactoring.cpp.html b/thirdparty/linux/ntl/doc/GF2XFactoring.cpp.html new file mode 100644 index 0000000000..f702f49c07 --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2XFactoring.cpp.html @@ -0,0 +1,127 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/GF2XFactoring.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: GF2XFactoring
+
+SUMMARY:
+
+Routines are provided for factorization in F_2[X], as well as routines
+for related problems such as testing irreducibility and constructing
+irreducible polynomials of given degree.
+
+\**************************************************************************/
+
+#include <NTL/GF2X.h>
+#include <NTL/pair_GF2X_long.h>
+
+void SquareFreeDecomp(vec_pair_GF2X_long& u, const GF2X& f);
+vec_pair_GF2X_long SquareFreeDecomp(const GF2X& f);
+
+// Performs square-free decomposition.  f must be monic.  If f =
+// prod_i g_i^i, then u is set to a list of pairs (g_i, i).  The list
+// is is increasing order of i, with trivial terms (i.e., g_i = 1)
+// deleted.
+
+
+void DDF(vec_pair_GF2X_long& factors, const GF2X& f, long verbose=0);
+vec_pair_GF2X_long DDF(const GF2X& f, long verbose=0);
+
+// This computes a distinct-degree factorization.  The input must be
+// monic and square-free.  factors is set to a list of pairs (g, d),
+// where g is the product of all irreducible factors of f of degree d.
+// Only nontrivial pairs (i.e., g != 1) are included.
+
+
+
+void EDF(vec_GF2X& factors, const GF2X& f, long d, long verbose=0);
+vec_GF2X EDF(const GF2X& f, long d, long verbose=0);
+
+// Performs equal-degree factorization.  f is monic, square-free, and
+// all irreducible factors have same degree.  d = degree of
+// irreducible factors of f
+
+void SFCanZass(vec_GF2X& factors, const GF2X& f, long verbose=0);
+vec_GF2X SFCanZass(const GF2X& f, long verbose=0);
+
+
+// Assumes f is monic and square-free.  returns list of factors of f.
+
+
+void CanZass(vec_pair_GF2X_long& factors, const GF2X& f, long verbose=0);
+vec_pair_GF2X_long CanZass(const GF2X& f, long verbose=0);
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SquareFreeDecomp and SFCanZass.
+
+
+void mul(GF2X& f, const vec_pair_GF2X_long& v);
+GF2X mul(const vec_pair_GF2X_long& v);
+
+// multiplies polynomials, with multiplicities
+
+
+/**************************************************************************\
+
+                            Irreducible Polynomials
+
+\**************************************************************************/
+
+long IterIrredTest(const GF2X& f);
+
+// performs an iterative deterministic irreducibility test, based on
+// DDF.  Fast on average (when f has a small factor).
+
+void BuildSparseIrred(GF2X& f, long n);
+GF2X BuildSparseIrred_GF2X(long n);
+
+// Builds a monic irreducible polynomial of degree n.
+// If there is an irreducible trinomial X^n + X^k + 1,
+// then the one with minimal k is chosen.
+// Otherwise, if there is an irreducible pentanomial
+// X^n + X^k3 + X^k2 + x^k1 + 1, then the one with minimal
+// k3 is chosen (minimizing first k2 and then k1).
+// Otherwise, if there is niether an irreducible trinomial
+// or pentanomial, the routine result from BuildIrred (see below)
+// is chosen---this is probably only of academic interest,
+// since it a reasonable, but unproved, conjecture that they
+// exist for every n > 1.
+
+// For n <= 2048, the polynomial is constructed
+// by table lookup in a pre-computed table.
+
+// The GF2XModulus data structure and routines (and indirectly GF2E)
+// are optimized to deal with the output from BuildSparseIrred.
+
+void BuildIrred(GF2X& f, long n);
+GF2X BuildIrred_GF2X(long n);
+
+// Build a monic irreducible poly of degree n.  The polynomial
+// constructed is "canonical" in the sense that it is of the form
+// f=X^n + g, where the bits of g are the those of the smallest
+// non-negative integer that make f irreducible.  
+
+// The GF2XModulus data structure and routines (and indirectly GF2E)
+// are optimized to deal with the output from BuildIrred.
+
+// Note that the output from BuildSparseIrred will generally yield
+// a "better" representation (in terms of efficiency) for
+// GF(2^n) than the output from BuildIrred.
+
+
+
+void BuildRandomIrred(GF2X& f, const GF2X& g);
+GF2X BuildRandomIrred(const GF2X& g);
+
+// g is a monic irreducible polynomial.  Constructs a random monic
+// irreducible polynomial f of the same degree.
+
+
+ diff --git a/thirdparty/linux/ntl/doc/GF2XFactoring.txt b/thirdparty/linux/ntl/doc/GF2XFactoring.txt new file mode 100644 index 0000000000..133e420ea4 --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2XFactoring.txt @@ -0,0 +1,117 @@ + +/**************************************************************************\ + +MODULE: GF2XFactoring + +SUMMARY: + +Routines are provided for factorization in F_2[X], as well as routines +for related problems such as testing irreducibility and constructing +irreducible polynomials of given degree. + +\**************************************************************************/ + +#include +#include + +void SquareFreeDecomp(vec_pair_GF2X_long& u, const GF2X& f); +vec_pair_GF2X_long SquareFreeDecomp(const GF2X& f); + +// Performs square-free decomposition. f must be monic. If f = +// prod_i g_i^i, then u is set to a list of pairs (g_i, i). The list +// is is increasing order of i, with trivial terms (i.e., g_i = 1) +// deleted. + + +void DDF(vec_pair_GF2X_long& factors, const GF2X& f, long verbose=0); +vec_pair_GF2X_long DDF(const GF2X& f, long verbose=0); + +// This computes a distinct-degree factorization. The input must be +// monic and square-free. factors is set to a list of pairs (g, d), +// where g is the product of all irreducible factors of f of degree d. +// Only nontrivial pairs (i.e., g != 1) are included. + + + +void EDF(vec_GF2X& factors, const GF2X& f, long d, long verbose=0); +vec_GF2X EDF(const GF2X& f, long d, long verbose=0); + +// Performs equal-degree factorization. f is monic, square-free, and +// all irreducible factors have same degree. d = degree of +// irreducible factors of f + +void SFCanZass(vec_GF2X& factors, const GF2X& f, long verbose=0); +vec_GF2X SFCanZass(const GF2X& f, long verbose=0); + + +// Assumes f is monic and square-free. returns list of factors of f. + + +void CanZass(vec_pair_GF2X_long& factors, const GF2X& f, long verbose=0); +vec_pair_GF2X_long CanZass(const GF2X& f, long verbose=0); + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SquareFreeDecomp and SFCanZass. + + +void mul(GF2X& f, const vec_pair_GF2X_long& v); +GF2X mul(const vec_pair_GF2X_long& v); + +// multiplies polynomials, with multiplicities + + +/**************************************************************************\ + + Irreducible Polynomials + +\**************************************************************************/ + +long IterIrredTest(const GF2X& f); + +// performs an iterative deterministic irreducibility test, based on +// DDF. Fast on average (when f has a small factor). + +void BuildSparseIrred(GF2X& f, long n); +GF2X BuildSparseIrred_GF2X(long n); + +// Builds a monic irreducible polynomial of degree n. +// If there is an irreducible trinomial X^n + X^k + 1, +// then the one with minimal k is chosen. +// Otherwise, if there is an irreducible pentanomial +// X^n + X^k3 + X^k2 + x^k1 + 1, then the one with minimal +// k3 is chosen (minimizing first k2 and then k1). +// Otherwise, if there is niether an irreducible trinomial +// or pentanomial, the routine result from BuildIrred (see below) +// is chosen---this is probably only of academic interest, +// since it a reasonable, but unproved, conjecture that they +// exist for every n > 1. + +// For n <= 2048, the polynomial is constructed +// by table lookup in a pre-computed table. + +// The GF2XModulus data structure and routines (and indirectly GF2E) +// are optimized to deal with the output from BuildSparseIrred. + +void BuildIrred(GF2X& f, long n); +GF2X BuildIrred_GF2X(long n); + +// Build a monic irreducible poly of degree n. The polynomial +// constructed is "canonical" in the sense that it is of the form +// f=X^n + g, where the bits of g are the those of the smallest +// non-negative integer that make f irreducible. + +// The GF2XModulus data structure and routines (and indirectly GF2E) +// are optimized to deal with the output from BuildIrred. + +// Note that the output from BuildSparseIrred will generally yield +// a "better" representation (in terms of efficiency) for +// GF(2^n) than the output from BuildIrred. + + + +void BuildRandomIrred(GF2X& f, const GF2X& g); +GF2X BuildRandomIrred(const GF2X& g); + +// g is a monic irreducible polynomial. Constructs a random monic +// irreducible polynomial f of the same degree. + diff --git a/thirdparty/linux/ntl/doc/GF2XVec.cpp.html b/thirdparty/linux/ntl/doc/GF2XVec.cpp.html new file mode 100644 index 0000000000..e888fb688b --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2XVec.cpp.html @@ -0,0 +1,73 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/GF2XVec.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: GF2XVec
+
+SUMMARY:
+
+The class GF2XVec implements vectors of fixed-length GF2X's.  You can
+allocate a vector of GF2X's of a specified length, where the maximum
+size of each GF2X is also specified.  These parameters can be specified
+either with a constructor, or with SetSize.  It is an error to
+try to re-size a vector of non-xero , or store a GF2X that doesn't fit.  
+The space can be released with "kill", and then you are free to call SetSize
+again.  If you want more flexible---but less efficient---vectors, use
+vec_GF2X.
+
+\**************************************************************************/
+
+#include <NTL/GF2X.h>
+
+
+class GF2XVec {
+public:
+   GF2XVec();
+
+   GF2XVec& operator=(const GF2XVec&);
+   // first kill()'s destination (unless source and destination are
+   // identical)
+
+   GF2XVec(const GF2XVec&);
+   ~GF2XVec();
+
+   GF2XVec(long n, long d);
+   // sets length to n and max size of each element to d,
+   // where the size d measures the number of words
+
+   void SetSize(long n, long d);
+   // sets length to n and max size of each element to d,
+   // where the size d measures the number of words
+
+   long length() const;
+   // length of vector
+
+   long BaseSize() const;
+   // max size of each element
+
+   void kill();
+   // release space
+
+
+   GF2X* elts();
+   const GF2X* elts() const;
+   // pointer to first element
+
+   GF2X& operator[](long i);
+   const GF2X& operator[](long i) const;
+   // indexing operator; starts at 0; no range checking
+};
+
+
+void swap(GF2XVec& x, GF2XVec& y);
+// swaps x and y by swapping pointers
+
+
+ diff --git a/thirdparty/linux/ntl/doc/GF2XVec.txt b/thirdparty/linux/ntl/doc/GF2XVec.txt new file mode 100644 index 0000000000..15594a6fa9 --- /dev/null +++ b/thirdparty/linux/ntl/doc/GF2XVec.txt @@ -0,0 +1,63 @@ + +/**************************************************************************\ + +MODULE: GF2XVec + +SUMMARY: + +The class GF2XVec implements vectors of fixed-length GF2X's. You can +allocate a vector of GF2X's of a specified length, where the maximum +size of each GF2X is also specified. These parameters can be specified +either with a constructor, or with SetSize. It is an error to +try to re-size a vector of non-xero , or store a GF2X that doesn't fit. +The space can be released with "kill", and then you are free to call SetSize +again. If you want more flexible---but less efficient---vectors, use +vec_GF2X. + +\**************************************************************************/ + +#include + + +class GF2XVec { +public: + GF2XVec(); + + GF2XVec& operator=(const GF2XVec&); + // first kill()'s destination (unless source and destination are + // identical) + + GF2XVec(const GF2XVec&); + ~GF2XVec(); + + GF2XVec(long n, long d); + // sets length to n and max size of each element to d, + // where the size d measures the number of words + + void SetSize(long n, long d); + // sets length to n and max size of each element to d, + // where the size d measures the number of words + + long length() const; + // length of vector + + long BaseSize() const; + // max size of each element + + void kill(); + // release space + + + GF2X* elts(); + const GF2X* elts() const; + // pointer to first element + + GF2X& operator[](long i); + const GF2X& operator[](long i) const; + // indexing operator; starts at 0; no range checking +}; + + +void swap(GF2XVec& x, GF2XVec& y); +// swaps x and y by swapping pointers + diff --git a/thirdparty/linux/ntl/doc/HNF.cpp.html b/thirdparty/linux/ntl/doc/HNF.cpp.html new file mode 100644 index 0000000000..b819a54261 --- /dev/null +++ b/thirdparty/linux/ntl/doc/HNF.cpp.html @@ -0,0 +1,39 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/HNF.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: HNF
+
+SUMMARY:
+
+A routine for computing Hermite Normal Forms
+
+\**************************************************************************/
+
+
+#include <NTL/mat_ZZ.h>
+
+void HNF(mat_ZZ& W, const mat_ZZ& A, const ZZ& D);
+
+// The input matrix A is an n x m matrix of rank m (so n >= m), and D
+// is a multiple of the determinant of the lattice L spanned by the
+// rows of A.  W is computed as the Hermite Normal Form of A; that is,
+// W is the unique m x m matrix whose rows span L, such that
+
+//   - W is lower triangular,
+//   - the diagonal entries are positive,
+//   - any entry below the diagonal is a non-negative number
+//     strictly less than the diagonal entry in its column.
+
+// Currently, this is implemented using the algorithm of [P. Domich,
+// R. Kannan and L. Trotter, Math. Oper. Research 12:50-59, 1987].
+
+
+ diff --git a/thirdparty/linux/ntl/doc/HNF.txt b/thirdparty/linux/ntl/doc/HNF.txt new file mode 100644 index 0000000000..791a5ecbdd --- /dev/null +++ b/thirdparty/linux/ntl/doc/HNF.txt @@ -0,0 +1,29 @@ + +/**************************************************************************\ + +MODULE: HNF + +SUMMARY: + +A routine for computing Hermite Normal Forms + +\**************************************************************************/ + + +#include + +void HNF(mat_ZZ& W, const mat_ZZ& A, const ZZ& D); + +// The input matrix A is an n x m matrix of rank m (so n >= m), and D +// is a multiple of the determinant of the lattice L spanned by the +// rows of A. W is computed as the Hermite Normal Form of A; that is, +// W is the unique m x m matrix whose rows span L, such that + +// - W is lower triangular, +// - the diagonal entries are positive, +// - any entry below the diagonal is a non-negative number +// strictly less than the diagonal entry in its column. + +// Currently, this is implemented using the algorithm of [P. Domich, +// R. Kannan and L. Trotter, Math. Oper. Research 12:50-59, 1987]. + diff --git a/thirdparty/linux/ntl/doc/LLL.cpp.html b/thirdparty/linux/ntl/doc/LLL.cpp.html new file mode 100644 index 0000000000..e37beee50c --- /dev/null +++ b/thirdparty/linux/ntl/doc/LLL.cpp.html @@ -0,0 +1,442 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/LLL.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: LLL
+
+SUMMARY:
+
+Routines are provided for lattice basis reduction, including both
+exact-aritmetic variants (slow but sure) and floating-point variants
+(fast but only approximate).
+
+For an introduction to the basics of LLL reduction, see
+[H. Cohen, A Course in Computational Algebraic Number Theory, Springer, 1993].
+
+The LLL algorithm was introduced in [A. K. Lenstra, H. W. Lenstra, and
+L. Lovasz, Math. Ann. 261 (1982), 515-534].
+
+\**************************************************************************/
+
+
+
+
+#include <NTL/mat_ZZ.h>
+
+
+
+/**************************************************************************\
+
+                         Exact Arithmetic Variants
+
+\**************************************************************************/
+
+
+
+
+long LLL(ZZ& det2, mat_ZZ& B, long verbose = 0);
+long LLL(ZZ& det2, mat_ZZ& B, mat_ZZ& U, long verbose = 0);
+
+long LLL(ZZ& det2, mat_ZZ& B, long a, long b, long verbose = 0);
+long LLL(ZZ& det2, mat_ZZ& B, mat_ZZ& U, long a, long b, long verbose = 0);
+
+
+// performs LLL reduction.
+
+// B is an m x n matrix, viewed as m rows of n-vectors.  m may be less
+// than, equal to, or greater than n, and the rows need not be
+// linearly independent.  B is transformed into an LLL-reduced basis,
+// and the return value is the rank r of B.  The first m-r rows of B
+// are zero.  
+
+// More specifically, elementary row transformations are performed on
+// B so that the non-zero rows of new-B form an LLL-reduced basis
+// for the lattice spanned by the rows of old-B.
+// The default reduction parameter is delta=3/4, which means
+// that the squared length of the first non-zero basis vector
+// is no more than 2^{r-1} times that of the shortest vector in
+// the lattice.
+
+// det2 is calculated as the *square* of the determinant
+// of the lattice---note that sqrt(det2) is in general an integer
+// only when r = n.
+
+// In the second version, U is set to the transformation matrix, so
+// that U is a unimodular m x m matrix with U * old-B = new-B.
+// Note that the first m-r rows of U form a basis (as a lattice)
+// for the kernel of old-B.
+
+// The third and fourth versions allow an arbitrary reduction
+// parameter delta=a/b, where 1/4 < a/b <= 1, where a and b are positive
+// integers.
+// For a basis reduced with parameter delta, the squared length
+// of the first non-zero basis vector is no more than
+// 1/(delta-1/4)^{r-1} times that of the shortest vector in the
+// lattice (see, e.g., the article by Schnorr and Euchner mentioned below).
+
+// The algorithm employed here is essentially the one in Cohen's book.
+
+
+// Some variations:
+
+long LLL_plus(vec_ZZ& D, mat_ZZ& B, long verbose = 0);
+long LLL_plus(vec_ZZ& D, mat_ZZ& B, mat_ZZ& U, long verbose = 0);
+
+long LLL_plus(vec_ZZ& D, mat_ZZ& B, long a, long b, long verbose = 0);
+long LLL_plus(vec_ZZ& D, mat_ZZ& B, mat_ZZ& U, long a, long b,
+              long verbose = 0);
+
+// These are variations that return a bit more information about the
+// reduced basis.  If r is the rank of B, then D is a vector of length
+// r+1, such that D[0] = 1, and for i = 1..r, D[i]/D[i-1] is equal to
+// the square of the length of the i-th vector of the Gram-Schmidt basis
+// corresponding to the (non-zero) rows of the LLL reduced basis B.
+// In particular, D[r] is equal to the value det2 computed by the
+// plain LLL routines.
+
+/**************************************************************************\
+
+                      Computing Images and Kernels
+
+\**************************************************************************/
+
+
+long image(ZZ& det2, mat_ZZ& B, long verbose = 0);
+long image(ZZ& det2, mat_ZZ& B, mat_ZZ& U, long verbose = 0);
+
+// This computes the image of B using a "cheap" version of the LLL:
+// it performs the usual "size reduction", but it only swaps
+// vectors when linear dependencies are found.
+// I haven't seen this described in the literature, but it works
+// fairly well in practice, and can also easily be shown
+// to run in a reasonable amount of time with reasonably bounded
+// numbers.
+
+// As in the above LLL routines, the return value is the rank r of B, and the
+// first m-r rows will be zero.  U is a unimodular m x m matrix with
+// U * old-B = new-B.  det2 has the same meaning as above.
+
+// Note that the first m-r rows of U form a basis (as a lattice)
+// for the kernel of old-B.
+// This is a reasonably practical algorithm for computing kernels.
+// One can also apply image() to the kernel to get somewhat
+// shorter basis vectors for the kernels (there are no linear
+// dependencies, but the size reduction may anyway help).
+// For even shorter kernel basis vectors, on can apply
+// LLL().
+
+
+/**************************************************************************\
+
+                    Finding a vector in a lattice
+
+\**************************************************************************/
+
+long LatticeSolve(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& y, long reduce=0);
+
+// This tests if for given A and y, there exists x such that x*A = y;
+// if so, x is set to a solution, and the value 1 is returned;
+// otherwise, x is left unchanged, and the value 0 is returned.
+
+// The optional parameter reduce controls the 'quality' of the
+// solution vector;  if the rows of A are linearly dependent,
+// there are many solutions, if there are any at all.
+// The value of reduce controls the amount of effort that goes
+// into finding a 'short' solution vector x.
+
+//    reduce = 0: No particular effort is made to find a short solution.
+
+//    reduce = 1: A simple 'size reduction' algorithm is run on kernel(A);
+//                this is fast, and may yield somewhat shorter
+//                solutions than the default, but not necessarily
+//                very close at all to optimal.
+
+//    reduce = 2: the LLL algorithm is run on kernel(A);
+//                this may be significantly slower than the other options,
+//                but yields solutions that are provably close to optimal.
+//                More precisely, if kernel(A) has rank k,
+//                then the squared length of the obtained solution
+//                is no more than max(1, 2^(k-2)) times that of
+//                the optimal solution.  This makes use of slight
+//                variation of Babai's approximately nearest vector algorithm.
+
+// Of course, if the the rows of A are linearly independent, then
+// the value of reduce is irrelevant: the solution, if it exists,
+// is unique.
+
+// Note that regardless of the value of reduce, the algorithm
+// runs in polynomial time, and hence the bit-length of the solution
+// vector is bounded by a polynomial in the bit-length of the inputs.
+
+
+
+
+/**************************************************************************\
+
+                   Floating Point Variants
+
+There are a number of floating point LLL variants available:
+you can choose the precision, the orthogonalization strategy,
+and the reduction condition.
+
+The wide variety of choices may seem a bit bewildering.
+See below the discussion "How to choose?".
+
+*** Precision:
+
+  FP -- double
+  QP -- quad_float (quasi quadruple precision)
+        this is useful when roundoff errors can cause problems
+  XD -- xdouble (extended exponent doubles)
+        this is useful when numbers get too big
+  RR -- RR (arbitrary precision floating point)
+        this is useful for large precision and magnitudes
+
+  Generally speaking, the choice FP will be the fastest,
+  but may be prone to roundoff errors and/or overflow.
+  
+
+*** Orthogonalization Strategy:
+
+  -- Classical Gramm-Schmidt Orthogonalization.
+     This choice uses classical methods for computing
+     the Gramm-Schmidt othogonalization.
+     It is fast but prone to stability problems.
+     This strategy was first proposed by Schnorr and Euchner
+     [C. P. Schnorr and M. Euchner, Proc. Fundamentals of Computation Theory,
+     LNCS 529, pp. 68-85, 1991].  
+     The version implemented here is substantially different, improving
+     both stability and performance.
+
+  -- Givens Orthogonalization.
+     This is a bit slower, but generally much more stable,
+     and is really the preferred orthogonalization strategy.
+     For a nice description of this, see Chapter 5 of  
+     [G. Golub and C. van Loan, Matrix Computations, 3rd edition,
+     Johns Hopkins Univ. Press, 1996].
+
+
+*** Reduction Condition:
+
+  -- LLL: the classical LLL reduction condition.
+
+  -- BKZ: Block Korkin-Zolotarev reduction.
+     This is slower, but yields a higher-quality basis,
+     i.e., one with shorter vectors.
+     See the Schnorr-Euchner paper for a description of this.
+     This basically generalizes the LLL reduction condition
+     from blocks of size 2 to blocks of larger size.
+
+
+************* Calling Syntax for LLL routines ***************
+
+long
+[G_]LLL_{FP,QP,XD,RR} (mat_ZZ& B, [ mat_ZZ& U, ] double delta = 0.99,
+                       long deep = 0, LLLCheckFct check = 0, long verbose = 0);
+
+* The [ ... ] notation indicates something optional,
+  and the { ... } indicates something that is chosen from
+  among several alternatives.
+
+* The return value is the rank of B (but see below if check != 0).
+
+* The optional prefix G_ indicates that Givens rotations are to be used;
+  otherwise, classical Gramm-Schmidt is used.
+
+* The choice FP, QP, XD, RR determines the precision used.
+
+* If the optional parameter U is given, then U is computed
+  as the transition matrix:
+
+     U * old_B = new_B
+
+* The optional argument "delta" is the reduction parameter, and may
+  be set so that 0.50 <= delta < 1.  Setting it close to 1 yields
+  shorter vectors, and also improves the stability, but increases the
+  running time.  Recommended value: delta = 0.99.
+
+* The optional parameter "deep" can be set to any positive integer,
+  which allows "deep insertions" of row k into row i, provided i <=
+  deep or k-i <= deep.  Larger values of deep will usually yield
+  shorter vectors, but the running increases exponentially.  
+
+  NOTE: use of "deep" is obsolete, and has been "deprecated".
+  It is recommended to use BKZ_FP to achieve higher-quality reductions.
+  Moreover, the Givens versions do not support "deep", and setting
+  deep != 0 will raise an error in this case.
+
+* The optional parameter "check" is a function that is invoked after
+  each size reduction with the current row as an argument.  If this
+  function returns a non-zero value, the LLL procedure is immediately
+  terminated.  Note that it is possible that some linear dependencies
+  remain undiscovered, so that the calculated rank value is in fact
+  too large.  In any case, zero rows discovered by the algorithm
+  will be placed at the beginning, as usual.
+
+  The check argument (if not zero) should be a routine taking
+  a const vec_ZZ& as an argument and return value of type long.
+  LLLCheckFct is defined via a typedef as:
+
+     typedef long (*LLLCheckFct)(const vec_ZZ&);
+
+  See the file subset.c for an example of the use of this feature.
+
+* The optional parameter "verbose" can be set to see all kinds of fun
+  things printed while the routine is executing.  A status report is
+  printed every once in a while, and the current basis is optionally
+  dumped to a file.  The behavior can be controlled with these global
+  variables:
+
+     extern char *LLLDumpFile;  // file to dump basis, 0 => no dump;
+                                // initially 0
+
+     extern double LLLStatusInterval; // seconds between status reports
+                                      // initially 900s = 15min
+
+
+
+ 
+************* Calling Syntax for BKZ routines ***************
+
+long
+[G_]BKZ_{FP,QP,QP1,XD,RR} (mat_ZZ& B, [ mat_ZZ& U, ] double delta=0.99,
+                          long BlockSize=10, long prune=0,
+                          LLLCheckFct check = 0, long verbose = 0);
+
+These functions are equivalent to the LLL routines above,
+except that Block Korkin-Zolotarev reduction is applied.
+We describe here only the differences in the calling syntax.
+
+* The optional parameter "BlockSize" specifies the size of the blocks
+  in the reduction.  High values yield shorter vectors, but the
+  running time increases exponentially with BlockSize.
+  BlockSize should be between 2 and the number of rows of B.
+
+* The optional parameter "prune" can be set to any positive number to
+  invoke the Volume Heuristic from [Schnorr and Horner, Eurocrypt
+  '95].  This can significantly reduce the running time, and hence
+  allow much bigger block size, but the quality of the reduction is
+  of course not as good in general.  Higher values of prune mean
+  better quality, and slower running time.  
+  When prune == 0, pruning is disabled.
+  Recommended usage: for BlockSize >= 30, set 10 <= prune <= 15.
+
+* The QP1 variant uses quad_float precision to compute Gramm-Schmidt,
+  but uses double precision in the search phase of the block reduction
+  algorithm.  This seems adequate for most purposes, and is faster
+  than QP, which uses quad_float precision uniformly throughout.
+
+
+******************** How to choose? *********************
+
+I think it is safe to say that nobody really understands
+how the LLL algorithm works.  The theoretical analyses are a long way
+from describing what "really" happens in practice.  Choosing the best
+variant for a certain application ultimately is a matter of trial
+and error.
+
+The first thing to try is LLL_FP.
+It is the fastest of the routines, and is adequate for many applications.
+
+If there are precision problems, you will most likely get
+a warning message, something like "warning--relaxing reduction".
+If there are overflow problems, you should get an error message
+saying that the numbers are too big.
+
+If either of these happens, the next thing to try is G_LLL_FP,
+which uses the somewhat slower, but more stable, Givens rotations.
+This approach also has the nice property that the numbers remain
+smaller, so there is less chance of an overflow.
+
+If you are still having precision problems with G_LLL_FP,
+try LLL_QP or G_LLL_QP, which uses quadratic precision.
+
+If you are still having overflow problems, try LLL_XD or G_LLL_XD.
+
+I haven't yet come across a case where one *really* needs the
+extra precision available in the RR variants.
+
+All of the above discussion applies to the BKZ variants as well.
+In addition, if you have a matrix with really big entries, you might try
+using G_LLL_FP or LLL_XD first to reduce the sizes of the numbers,
+before running one of the BKZ variants.
+
+Also, one shouldn't rule out using the "all integer" LLL routines.
+For some highly structured matrices, this is not necessarily
+much worse than some of the floating point versions, and can
+under certain circumstances even be better.
+
+
+******************** Implementation notes *********************
+
+For all the floating point variants, I use a "relaxed" size reduction
+condition.  Normally in LLL one makes all |\mu_{i,j}| <= 1/2.
+However, this can easily lead to infinite loops in floating point arithemetic.
+So I use the condition |\mu_{i,j}| <= 1/2 + fudge, where fudge is
+a very small number.  Even with this, one can fall into an infinite loop.
+To handle this situation, I added some logic that detects, at quite low cost,
+when an infinite loop has been entered.  When that happens, fudge
+is replaced by fudge*2, and a warning message "relaxing reduction condition"
+is printed.   We may do this relaxation several times.
+If fudge gets too big, we give up and abort, except that
+LLL_FP and BKZ_FP make one last attempt to recover:  they try to compute the
+Gramm-Schmidt coefficients using RR and continue.  As described above,
+if you run into these problems, which you'll see in the error/warning
+messages, it is more effective to use the QP and/or Givens variants.
+
+For the Gramm-Schmidt orthogonalization, lots of "bookeeping" is done
+to avoid computing the same thing twice.
+
+For the Givens orthogonalization, we cannot do so many bookeeping tricks.
+Instead, we "cache" a certain amount of information, which
+allows us to avoid computing certain things over and over again.
+
+There are many other hacks and tricks to speed things up even further.
+For example, if the matrix elements are small enough to fit in
+double precision floating point, the algorithms avoid almost
+all big integer arithmetic.  This is done in a dynamic, on-line
+fashion, so even if the numbers start out big, whenever they
+get small, we automatically switch to floating point arithmetic.
+
+\**************************************************************************/
+
+
+
+
+/**************************************************************************\
+
+                         Other Stuff
+
+\**************************************************************************/
+
+
+
+void ComputeGS(const mat_ZZ& B, mat_RR& mu, vec_RR& c);
+
+// Computes Gramm-Schmidt data for B.  Assumes B is an m x n matrix of
+// rank m.  Let if { B^*(i) } is the othogonal basis, then c(i) =
+// |B^*(i)|^2, and B^*(i) = B(i) - \sum_{j=1}^{i-1} mu(i,j) B^*(j).
+
+void NearVector(vec_ZZ& w, const mat_ZZ& B, const vec_ZZ& a);
+
+// Computes a vector w that is an approximation to the closest vector
+// in the lattice spanned by B to a, using the "closest plane"
+// algorithm from [Babai, Combinatorica 6:1-13, 1986].  B must be a
+// square matrix, and it is assumed that B is already LLL or BKZ
+// reduced (the better the reduction the better the approximation).
+// Note that arithmetic in RR is used with the current value of
+// RR::precision().
+
+// NOTE: Both of these routines use classical Gramm-Schmidt
+// orthogonalization.
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/LLL.txt b/thirdparty/linux/ntl/doc/LLL.txt new file mode 100644 index 0000000000..5ced5e943f --- /dev/null +++ b/thirdparty/linux/ntl/doc/LLL.txt @@ -0,0 +1,432 @@ + +/**************************************************************************\ + +MODULE: LLL + +SUMMARY: + +Routines are provided for lattice basis reduction, including both +exact-aritmetic variants (slow but sure) and floating-point variants +(fast but only approximate). + +For an introduction to the basics of LLL reduction, see +[H. Cohen, A Course in Computational Algebraic Number Theory, Springer, 1993]. + +The LLL algorithm was introduced in [A. K. Lenstra, H. W. Lenstra, and +L. Lovasz, Math. Ann. 261 (1982), 515-534]. + +\**************************************************************************/ + + + + +#include + + + +/**************************************************************************\ + + Exact Arithmetic Variants + +\**************************************************************************/ + + + + +long LLL(ZZ& det2, mat_ZZ& B, long verbose = 0); +long LLL(ZZ& det2, mat_ZZ& B, mat_ZZ& U, long verbose = 0); + +long LLL(ZZ& det2, mat_ZZ& B, long a, long b, long verbose = 0); +long LLL(ZZ& det2, mat_ZZ& B, mat_ZZ& U, long a, long b, long verbose = 0); + + +// performs LLL reduction. + +// B is an m x n matrix, viewed as m rows of n-vectors. m may be less +// than, equal to, or greater than n, and the rows need not be +// linearly independent. B is transformed into an LLL-reduced basis, +// and the return value is the rank r of B. The first m-r rows of B +// are zero. + +// More specifically, elementary row transformations are performed on +// B so that the non-zero rows of new-B form an LLL-reduced basis +// for the lattice spanned by the rows of old-B. +// The default reduction parameter is delta=3/4, which means +// that the squared length of the first non-zero basis vector +// is no more than 2^{r-1} times that of the shortest vector in +// the lattice. + +// det2 is calculated as the *square* of the determinant +// of the lattice---note that sqrt(det2) is in general an integer +// only when r = n. + +// In the second version, U is set to the transformation matrix, so +// that U is a unimodular m x m matrix with U * old-B = new-B. +// Note that the first m-r rows of U form a basis (as a lattice) +// for the kernel of old-B. + +// The third and fourth versions allow an arbitrary reduction +// parameter delta=a/b, where 1/4 < a/b <= 1, where a and b are positive +// integers. +// For a basis reduced with parameter delta, the squared length +// of the first non-zero basis vector is no more than +// 1/(delta-1/4)^{r-1} times that of the shortest vector in the +// lattice (see, e.g., the article by Schnorr and Euchner mentioned below). + +// The algorithm employed here is essentially the one in Cohen's book. + + +// Some variations: + +long LLL_plus(vec_ZZ& D, mat_ZZ& B, long verbose = 0); +long LLL_plus(vec_ZZ& D, mat_ZZ& B, mat_ZZ& U, long verbose = 0); + +long LLL_plus(vec_ZZ& D, mat_ZZ& B, long a, long b, long verbose = 0); +long LLL_plus(vec_ZZ& D, mat_ZZ& B, mat_ZZ& U, long a, long b, + long verbose = 0); + +// These are variations that return a bit more information about the +// reduced basis. If r is the rank of B, then D is a vector of length +// r+1, such that D[0] = 1, and for i = 1..r, D[i]/D[i-1] is equal to +// the square of the length of the i-th vector of the Gram-Schmidt basis +// corresponding to the (non-zero) rows of the LLL reduced basis B. +// In particular, D[r] is equal to the value det2 computed by the +// plain LLL routines. + +/**************************************************************************\ + + Computing Images and Kernels + +\**************************************************************************/ + + +long image(ZZ& det2, mat_ZZ& B, long verbose = 0); +long image(ZZ& det2, mat_ZZ& B, mat_ZZ& U, long verbose = 0); + +// This computes the image of B using a "cheap" version of the LLL: +// it performs the usual "size reduction", but it only swaps +// vectors when linear dependencies are found. +// I haven't seen this described in the literature, but it works +// fairly well in practice, and can also easily be shown +// to run in a reasonable amount of time with reasonably bounded +// numbers. + +// As in the above LLL routines, the return value is the rank r of B, and the +// first m-r rows will be zero. U is a unimodular m x m matrix with +// U * old-B = new-B. det2 has the same meaning as above. + +// Note that the first m-r rows of U form a basis (as a lattice) +// for the kernel of old-B. +// This is a reasonably practical algorithm for computing kernels. +// One can also apply image() to the kernel to get somewhat +// shorter basis vectors for the kernels (there are no linear +// dependencies, but the size reduction may anyway help). +// For even shorter kernel basis vectors, on can apply +// LLL(). + + +/**************************************************************************\ + + Finding a vector in a lattice + +\**************************************************************************/ + +long LatticeSolve(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& y, long reduce=0); + +// This tests if for given A and y, there exists x such that x*A = y; +// if so, x is set to a solution, and the value 1 is returned; +// otherwise, x is left unchanged, and the value 0 is returned. + +// The optional parameter reduce controls the 'quality' of the +// solution vector; if the rows of A are linearly dependent, +// there are many solutions, if there are any at all. +// The value of reduce controls the amount of effort that goes +// into finding a 'short' solution vector x. + +// reduce = 0: No particular effort is made to find a short solution. + +// reduce = 1: A simple 'size reduction' algorithm is run on kernel(A); +// this is fast, and may yield somewhat shorter +// solutions than the default, but not necessarily +// very close at all to optimal. + +// reduce = 2: the LLL algorithm is run on kernel(A); +// this may be significantly slower than the other options, +// but yields solutions that are provably close to optimal. +// More precisely, if kernel(A) has rank k, +// then the squared length of the obtained solution +// is no more than max(1, 2^(k-2)) times that of +// the optimal solution. This makes use of slight +// variation of Babai's approximately nearest vector algorithm. + +// Of course, if the the rows of A are linearly independent, then +// the value of reduce is irrelevant: the solution, if it exists, +// is unique. + +// Note that regardless of the value of reduce, the algorithm +// runs in polynomial time, and hence the bit-length of the solution +// vector is bounded by a polynomial in the bit-length of the inputs. + + + + +/**************************************************************************\ + + Floating Point Variants + +There are a number of floating point LLL variants available: +you can choose the precision, the orthogonalization strategy, +and the reduction condition. + +The wide variety of choices may seem a bit bewildering. +See below the discussion "How to choose?". + +*** Precision: + + FP -- double + QP -- quad_float (quasi quadruple precision) + this is useful when roundoff errors can cause problems + XD -- xdouble (extended exponent doubles) + this is useful when numbers get too big + RR -- RR (arbitrary precision floating point) + this is useful for large precision and magnitudes + + Generally speaking, the choice FP will be the fastest, + but may be prone to roundoff errors and/or overflow. + + +*** Orthogonalization Strategy: + + -- Classical Gramm-Schmidt Orthogonalization. + This choice uses classical methods for computing + the Gramm-Schmidt othogonalization. + It is fast but prone to stability problems. + This strategy was first proposed by Schnorr and Euchner + [C. P. Schnorr and M. Euchner, Proc. Fundamentals of Computation Theory, + LNCS 529, pp. 68-85, 1991]. + The version implemented here is substantially different, improving + both stability and performance. + + -- Givens Orthogonalization. + This is a bit slower, but generally much more stable, + and is really the preferred orthogonalization strategy. + For a nice description of this, see Chapter 5 of + [G. Golub and C. van Loan, Matrix Computations, 3rd edition, + Johns Hopkins Univ. Press, 1996]. + + +*** Reduction Condition: + + -- LLL: the classical LLL reduction condition. + + -- BKZ: Block Korkin-Zolotarev reduction. + This is slower, but yields a higher-quality basis, + i.e., one with shorter vectors. + See the Schnorr-Euchner paper for a description of this. + This basically generalizes the LLL reduction condition + from blocks of size 2 to blocks of larger size. + + +************* Calling Syntax for LLL routines *************** + +long +[G_]LLL_{FP,QP,XD,RR} (mat_ZZ& B, [ mat_ZZ& U, ] double delta = 0.99, + long deep = 0, LLLCheckFct check = 0, long verbose = 0); + +* The [ ... ] notation indicates something optional, + and the { ... } indicates something that is chosen from + among several alternatives. + +* The return value is the rank of B (but see below if check != 0). + +* The optional prefix G_ indicates that Givens rotations are to be used; + otherwise, classical Gramm-Schmidt is used. + +* The choice FP, QP, XD, RR determines the precision used. + +* If the optional parameter U is given, then U is computed + as the transition matrix: + + U * old_B = new_B + +* The optional argument "delta" is the reduction parameter, and may + be set so that 0.50 <= delta < 1. Setting it close to 1 yields + shorter vectors, and also improves the stability, but increases the + running time. Recommended value: delta = 0.99. + +* The optional parameter "deep" can be set to any positive integer, + which allows "deep insertions" of row k into row i, provided i <= + deep or k-i <= deep. Larger values of deep will usually yield + shorter vectors, but the running increases exponentially. + + NOTE: use of "deep" is obsolete, and has been "deprecated". + It is recommended to use BKZ_FP to achieve higher-quality reductions. + Moreover, the Givens versions do not support "deep", and setting + deep != 0 will raise an error in this case. + +* The optional parameter "check" is a function that is invoked after + each size reduction with the current row as an argument. If this + function returns a non-zero value, the LLL procedure is immediately + terminated. Note that it is possible that some linear dependencies + remain undiscovered, so that the calculated rank value is in fact + too large. In any case, zero rows discovered by the algorithm + will be placed at the beginning, as usual. + + The check argument (if not zero) should be a routine taking + a const vec_ZZ& as an argument and return value of type long. + LLLCheckFct is defined via a typedef as: + + typedef long (*LLLCheckFct)(const vec_ZZ&); + + See the file subset.c for an example of the use of this feature. + +* The optional parameter "verbose" can be set to see all kinds of fun + things printed while the routine is executing. A status report is + printed every once in a while, and the current basis is optionally + dumped to a file. The behavior can be controlled with these global + variables: + + extern char *LLLDumpFile; // file to dump basis, 0 => no dump; + // initially 0 + + extern double LLLStatusInterval; // seconds between status reports + // initially 900s = 15min + + + + +************* Calling Syntax for BKZ routines *************** + +long +[G_]BKZ_{FP,QP,QP1,XD,RR} (mat_ZZ& B, [ mat_ZZ& U, ] double delta=0.99, + long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); + +These functions are equivalent to the LLL routines above, +except that Block Korkin-Zolotarev reduction is applied. +We describe here only the differences in the calling syntax. + +* The optional parameter "BlockSize" specifies the size of the blocks + in the reduction. High values yield shorter vectors, but the + running time increases exponentially with BlockSize. + BlockSize should be between 2 and the number of rows of B. + +* The optional parameter "prune" can be set to any positive number to + invoke the Volume Heuristic from [Schnorr and Horner, Eurocrypt + '95]. This can significantly reduce the running time, and hence + allow much bigger block size, but the quality of the reduction is + of course not as good in general. Higher values of prune mean + better quality, and slower running time. + When prune == 0, pruning is disabled. + Recommended usage: for BlockSize >= 30, set 10 <= prune <= 15. + +* The QP1 variant uses quad_float precision to compute Gramm-Schmidt, + but uses double precision in the search phase of the block reduction + algorithm. This seems adequate for most purposes, and is faster + than QP, which uses quad_float precision uniformly throughout. + + +******************** How to choose? ********************* + +I think it is safe to say that nobody really understands +how the LLL algorithm works. The theoretical analyses are a long way +from describing what "really" happens in practice. Choosing the best +variant for a certain application ultimately is a matter of trial +and error. + +The first thing to try is LLL_FP. +It is the fastest of the routines, and is adequate for many applications. + +If there are precision problems, you will most likely get +a warning message, something like "warning--relaxing reduction". +If there are overflow problems, you should get an error message +saying that the numbers are too big. + +If either of these happens, the next thing to try is G_LLL_FP, +which uses the somewhat slower, but more stable, Givens rotations. +This approach also has the nice property that the numbers remain +smaller, so there is less chance of an overflow. + +If you are still having precision problems with G_LLL_FP, +try LLL_QP or G_LLL_QP, which uses quadratic precision. + +If you are still having overflow problems, try LLL_XD or G_LLL_XD. + +I haven't yet come across a case where one *really* needs the +extra precision available in the RR variants. + +All of the above discussion applies to the BKZ variants as well. +In addition, if you have a matrix with really big entries, you might try +using G_LLL_FP or LLL_XD first to reduce the sizes of the numbers, +before running one of the BKZ variants. + +Also, one shouldn't rule out using the "all integer" LLL routines. +For some highly structured matrices, this is not necessarily +much worse than some of the floating point versions, and can +under certain circumstances even be better. + + +******************** Implementation notes ********************* + +For all the floating point variants, I use a "relaxed" size reduction +condition. Normally in LLL one makes all |\mu_{i,j}| <= 1/2. +However, this can easily lead to infinite loops in floating point arithemetic. +So I use the condition |\mu_{i,j}| <= 1/2 + fudge, where fudge is +a very small number. Even with this, one can fall into an infinite loop. +To handle this situation, I added some logic that detects, at quite low cost, +when an infinite loop has been entered. When that happens, fudge +is replaced by fudge*2, and a warning message "relaxing reduction condition" +is printed. We may do this relaxation several times. +If fudge gets too big, we give up and abort, except that +LLL_FP and BKZ_FP make one last attempt to recover: they try to compute the +Gramm-Schmidt coefficients using RR and continue. As described above, +if you run into these problems, which you'll see in the error/warning +messages, it is more effective to use the QP and/or Givens variants. + +For the Gramm-Schmidt orthogonalization, lots of "bookeeping" is done +to avoid computing the same thing twice. + +For the Givens orthogonalization, we cannot do so many bookeeping tricks. +Instead, we "cache" a certain amount of information, which +allows us to avoid computing certain things over and over again. + +There are many other hacks and tricks to speed things up even further. +For example, if the matrix elements are small enough to fit in +double precision floating point, the algorithms avoid almost +all big integer arithmetic. This is done in a dynamic, on-line +fashion, so even if the numbers start out big, whenever they +get small, we automatically switch to floating point arithmetic. + +\**************************************************************************/ + + + + +/**************************************************************************\ + + Other Stuff + +\**************************************************************************/ + + + +void ComputeGS(const mat_ZZ& B, mat_RR& mu, vec_RR& c); + +// Computes Gramm-Schmidt data for B. Assumes B is an m x n matrix of +// rank m. Let if { B^*(i) } is the othogonal basis, then c(i) = +// |B^*(i)|^2, and B^*(i) = B(i) - \sum_{j=1}^{i-1} mu(i,j) B^*(j). + +void NearVector(vec_ZZ& w, const mat_ZZ& B, const vec_ZZ& a); + +// Computes a vector w that is an approximation to the closest vector +// in the lattice spanned by B to a, using the "closest plane" +// algorithm from [Babai, Combinatorica 6:1-13, 1986]. B must be a +// square matrix, and it is assumed that B is already LLL or BKZ +// reduced (the better the reduction the better the approximation). +// Note that arithmetic in RR is used with the current value of +// RR::precision(). + +// NOTE: Both of these routines use classical Gramm-Schmidt +// orthogonalization. + + diff --git a/thirdparty/linux/ntl/doc/Lazy.cpp.html b/thirdparty/linux/ntl/doc/Lazy.cpp.html new file mode 100644 index 0000000000..bf5ec2b6ab --- /dev/null +++ b/thirdparty/linux/ntl/doc/Lazy.cpp.html @@ -0,0 +1,101 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/Lazy.cpp.html + + + + +
+/***************************************************************************
+
+
+Lazy<T>: template class for lazy initialization of objects whose
+values do not change after initialization.
+In a multi-threaded environment, this makes use of "double checked locking"
+for an efficient, thread-safe solution.
+
+Usage:
+
+   Lazy<T> obj; // declaration of the lazy object
+
+    ...
+
+   do {
+      Lazy<T>::Builder builder(obj);
+      if (!builder()) break; // if we are not building, the break out
+
+      UniquePtr<T> p;  // create a pointer
+
+         ...
+
+      builder.move(p); // move p into the object to complete the initialization
+                       // We can then complete the initialization process.
+   } while(0);  // When this scope closes, the object is fully initialized.
+                // subsequent attempts to build the object will yield
+                // !builder.built()
+
+
+   T objCopy = *obj;   // *obj returns a read-only reference
+                       // one can also use -> operator
+
+It is important to follow this recipe carefully.  In particular,
+the builder must be enclosed in a scope, as it's destructor
+plays a crucial role in finalizing the initialization.
+
+NOTE: if p is null in builder.move(p), the object is still considered
+built.
+
+****************************************************************************/
+
+template<class T>
+class Lazy {
+public:
+   Lazy();
+
+   Lazy(const Lazy&);
+   Lazy& operator=(const Lazy&);
+   // deep copies using T's copy constructor
+   // EXCEPTIONS: may throw (but provides strong ES guarantee)
+
+   const T& operator*()  const;     // pointer access
+   const T* operator->() const;
+   const T* get() const;
+
+   operator fake_null_type() const;
+   // allows test against 0
+  
+   ~Lazy();
+
+   kill();  // destroy and reset
+
+   bool built() const; // test if already built
+
+   class Builder {
+      Builder(const Lazy&);
+     ~Builder()
+
+      bool operator()() const; // test if we are building
+
+      void move(UniquePtr<T>&);
+      // EXCEPTIONS: may throw an exception if the move is not allowed
+      // (i.e., not building or already moved).
+      // Provides strong ES guarantee.
+   };
+};
+  
+
+// EXCEPTIONS: except where noted, no exceptions are thrown
+
+// NOTE: For more on double-checked locking, see
+// http://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/
+
+// NOTE: when compiled with the NTL_THREADS option, the Lazy
+// class may contain data members from the standard library
+// that may not satisfy the requirements of the Vec class
+// (i.e., relocatability).  One can wrap it in a pointer
+// class (e.g., CopiedPtr) to deal with this.
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/Lazy.txt b/thirdparty/linux/ntl/doc/Lazy.txt new file mode 100644 index 0000000000..0d7d462fcb --- /dev/null +++ b/thirdparty/linux/ntl/doc/Lazy.txt @@ -0,0 +1,91 @@ + +/*************************************************************************** + + +Lazy: template class for lazy initialization of objects whose +values do not change after initialization. +In a multi-threaded environment, this makes use of "double checked locking" +for an efficient, thread-safe solution. + +Usage: + + Lazy obj; // declaration of the lazy object + + ... + + do { + Lazy::Builder builder(obj); + if (!builder()) break; // if we are not building, the break out + + UniquePtr p; // create a pointer + + ... + + builder.move(p); // move p into the object to complete the initialization + // We can then complete the initialization process. + } while(0); // When this scope closes, the object is fully initialized. + // subsequent attempts to build the object will yield + // !builder.built() + + + T objCopy = *obj; // *obj returns a read-only reference + // one can also use -> operator + +It is important to follow this recipe carefully. In particular, +the builder must be enclosed in a scope, as it's destructor +plays a crucial role in finalizing the initialization. + +NOTE: if p is null in builder.move(p), the object is still considered +built. + +****************************************************************************/ + +template +class Lazy { +public: + Lazy(); + + Lazy(const Lazy&); + Lazy& operator=(const Lazy&); + // deep copies using T's copy constructor + // EXCEPTIONS: may throw (but provides strong ES guarantee) + + const T& operator*() const; // pointer access + const T* operator->() const; + const T* get() const; + + operator fake_null_type() const; + // allows test against 0 + + ~Lazy(); + + kill(); // destroy and reset + + bool built() const; // test if already built + + class Builder { + Builder(const Lazy&); + ~Builder() + + bool operator()() const; // test if we are building + + void move(UniquePtr&); + // EXCEPTIONS: may throw an exception if the move is not allowed + // (i.e., not building or already moved). + // Provides strong ES guarantee. + }; +}; + + +// EXCEPTIONS: except where noted, no exceptions are thrown + +// NOTE: For more on double-checked locking, see +// http://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ + +// NOTE: when compiled with the NTL_THREADS option, the Lazy +// class may contain data members from the standard library +// that may not satisfy the requirements of the Vec class +// (i.e., relocatability). One can wrap it in a pointer +// class (e.g., CopiedPtr) to deal with this. + + diff --git a/thirdparty/linux/ntl/doc/LazyTable.cpp.html b/thirdparty/linux/ntl/doc/LazyTable.cpp.html new file mode 100644 index 0000000000..4a26ff0bc0 --- /dev/null +++ b/thirdparty/linux/ntl/doc/LazyTable.cpp.html @@ -0,0 +1,95 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/LazyTable.cpp.html + + + + +
+
+/***************************************************************************
+
+
+LazyTable<T,MAX>: template class for lazy initialization of objects whose
+values do not change after initialization.  In a multi-threaded environment,
+this makes use of "double checked locking" for an efficient, thread-safe
+solution.
+
+Usage:
+
+   LazyTable<T,MAX> tab; // declaration of the lazy table,
+                         // with max size == MAX
+
+    ...
+
+   do {
+      LazyTable<T,MAX>::Builder builder(tab, n); // request length n
+      long amt = builder.amt();
+      if (!amt) break;      
+
+      ... initialize elements i = n-amt..n-1
+          using builder.move(p), where p is a UnqiuePtr<T>
+          note that each move application appends one element
+                            
+   } while(0);    // When this scope closes,
+                  // the table is fully initialized to length n
+
+
+   const T* val = table[i];  // read-only access to table elements 0..n-1
+                            
+
+It is important to follow this recipe carefully.  In particular,
+the builder must be enclosed in a scope, as it's destructor
+plays a crucial role in finalizing the initialization.
+
+****************************************************************************/
+
+template<class T, long MAX>
+class LazyTable {
+public:
+   LazyTable();
+   ~LazyTable();
+
+   const T * const  operator[] (long i) const;
+   // element access -- currently no range checking
+
+
+   long length() const;
+   // current table length
+
+   class Builder {
+      Builder(const LazyTable&, long request);
+      // EXCEPTIONS: may throw an exception if request is out of range
+      // or if alocation of table fails
+
+     ~Builder()
+
+      long amt() const;
+
+      void move(UniquePtr<T>& p);
+      // EXCEPTIONS: throws exception of move is not allowed.
+      // Provides strong ES guarantee.
+   };
+
+private:
+   LazyTable(const LazyTable&);             // disabled
+   LazyTable& operator=(const LazyTable&);
+
+};
+  
+
+
+// EXCEPTIONS: except where noted, no exceptions are thrown
+
+// NOTE: For more on double-checked locking, see
+// http://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/
+
+// NOTE: when compiled with the NTL_THREADS option, the LazyTable
+// class may contain data members from the standard library
+// that may not satisfy the requirements of the Vec class
+// (i.e., relocatability).  One can wrap it in a pointer
+// class (e.g., CopiedPtr) to deal with this.
+
+
+ diff --git a/thirdparty/linux/ntl/doc/LazyTable.txt b/thirdparty/linux/ntl/doc/LazyTable.txt new file mode 100644 index 0000000000..51e5b1bd0b --- /dev/null +++ b/thirdparty/linux/ntl/doc/LazyTable.txt @@ -0,0 +1,85 @@ + + +/*************************************************************************** + + +LazyTable: template class for lazy initialization of objects whose +values do not change after initialization. In a multi-threaded environment, +this makes use of "double checked locking" for an efficient, thread-safe +solution. + +Usage: + + LazyTable tab; // declaration of the lazy table, + // with max size == MAX + + ... + + do { + LazyTable::Builder builder(tab, n); // request length n + long amt = builder.amt(); + if (!amt) break; + + ... initialize elements i = n-amt..n-1 + using builder.move(p), where p is a UnqiuePtr + note that each move application appends one element + + } while(0); // When this scope closes, + // the table is fully initialized to length n + + + const T* val = table[i]; // read-only access to table elements 0..n-1 + + +It is important to follow this recipe carefully. In particular, +the builder must be enclosed in a scope, as it's destructor +plays a crucial role in finalizing the initialization. + +****************************************************************************/ + +template +class LazyTable { +public: + LazyTable(); + ~LazyTable(); + + const T * const operator[] (long i) const; + // element access -- currently no range checking + + + long length() const; + // current table length + + class Builder { + Builder(const LazyTable&, long request); + // EXCEPTIONS: may throw an exception if request is out of range + // or if alocation of table fails + + ~Builder() + + long amt() const; + + void move(UniquePtr& p); + // EXCEPTIONS: throws exception of move is not allowed. + // Provides strong ES guarantee. + }; + +private: + LazyTable(const LazyTable&); // disabled + LazyTable& operator=(const LazyTable&); + +}; + + + +// EXCEPTIONS: except where noted, no exceptions are thrown + +// NOTE: For more on double-checked locking, see +// http://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ + +// NOTE: when compiled with the NTL_THREADS option, the LazyTable +// class may contain data members from the standard library +// that may not satisfy the requirements of the Vec class +// (i.e., relocatability). One can wrap it in a pointer +// class (e.g., CopiedPtr) to deal with this. + diff --git a/thirdparty/linux/ntl/doc/RR.cpp.html b/thirdparty/linux/ntl/doc/RR.cpp.html new file mode 100644 index 0000000000..cddd949e88 --- /dev/null +++ b/thirdparty/linux/ntl/doc/RR.cpp.html @@ -0,0 +1,606 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/RR.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: RR
+
+SUMMARY:
+
+The class RR is used to represent arbitrary-precision floating point
+numbers.
+
+The functions in this module guarantee very strong accuracy conditions
+which make it easy to reason about the behavior of programs using
+these functions.
+
+The arithmetic operations always round their results to p bits, where
+p is the current precision.  The current precision can be changed
+using RR::SetPrecision(), and can be read using RR::precision().  
+
+The minimum precision that can be set is 53 bits.
+The maximum precision is limited only by the word size of the machine.
+
+A convenience class RRPush is provided to automatically save and
+restore the current precision.
+
+All arithmetic operations are implemented so that the effect is as if the
+result was computed exactly, and then rounded to p bits.  If a number
+lies exactly half-way between two p-bit numbers, the "round to even"
+rule is used.  So in particular, the computed result will have a relative error
+of at most 2^{-p}.
+
+
+The above rounding rules apply to all arithmetic operations in this
+module, except for the following routines:
+
+* The transcendental functions:
+     log, exp, log10, expm1, log1p, pow, sin, cos, ComputePi
+
+* The power function
+
+* The input and ascii to RR conversion functions when using "e"-notation
+
+For these functions, a very strong accuracy condition is still
+guaranteed: the computed result has a relative error of less than 2^{-p + 1}
+(and actually much closer to 2^{-p}).
+That is, it is as if the resulted were computed exactly, and then
+rounded to one of the two neighboring p-bit numbers (but not necessarily
+the closest).
+
+The behavior of all functions in this module is completely platform
+independent: you should get *exactly* the same results on any platform
+(the only exception to this rule is the random number generator).
+
+Note that because precision is variable, a number may be computed with
+to a high precision p', and then be used as input to an arithmetic operation
+when the current precision is p < p'.  
+The above accuracy guarantees still apply; in particular,
+no rounding is done until *after* the operation is performed.  
+
+EXAMPLE: If x and y are computed to 200 bits of precision,
+and then the precision is set to 100 bits, then x-y will
+be computed correctly to 100 bits, even if, say, x and y agree
+in their high-order 50 bits.  If x and y had been rounded to
+100 bits before the subtraction, then the difference would
+only be accurate to 50 bits of precision.
+
+Note that the assignment operator and the copy constructor
+produce *exact* copies of their inputs---they are *never* rounded.
+This is a change in semantics from versions 2.0 and earlier
+in which assignment and copy rounded their outputs.
+This was deemed a design error and has been changed.
+
+If you want to force rounding to current precision, the easiest
+way to do this is with the RR to RR conversion routines:
+   conv(x, a);
+or
+   x = to_RR(a);
+This will round a to current precision and store the result in x.
+Note that writing
+   x = a + 0;
+or
+   x = a*1;
+also has the same effect.
+
+Unlike IEEE standard floating point, there are no "special values",
+like "infinity" or "not a number", nor are there any "denormalized
+numbers".  Overflow, underflow, or taking a square root of a negative
+number all result in an error being raised.
+
+An RR is represented as a mantissa/exponent pair (x, e), where x is a
+ZZ and e is a long.  The real number represented by (x, e) is x * 2^e.
+Zero is always represented as (0, 0).  For all other numbers, x is
+always odd.
+
+
+CONVERSIONS AND PROMOTIONS:
+The complete set of conversion routines between RR and other types is
+documented in the file "conversions.txt". Conversion from any type
+to RR always rounds the result to the current precision.
+
+The basic operations also support the notion of "promotions",
+so that they promote a double to an RR.  For example, one can write
+   x = y + 1.5;
+where x and y are RR's. One should be aware that these promotions are
+always implemented using the double to RR conversion routine.
+
+
+SIZE INVARIANT: max(NumBits(x), |e|) < 2^(NTL_BITS_PER_LONG-4)
+
+\**************************************************************************/
+
+
+
+
+#include <NTL/ZZ.h>
+#include <NTL/xdouble.h>
+#include <NTL/quad_float.h>
+
+class RR {
+
+public:
+
+RR(); // = 0
+
+RR(const RR& a); // copy constructor
+
+
+explicit RR(double a);  // promotion constructor
+
+RR& operator=(const RR& a); // assignment operator
+
+// NOTE: the copy constructor and assignment operator
+// produce exact copies of their inputs, and do not round
+// to current precision.  
+
+RR& operator=(double a); // convert and assign
+
+~RR(); // destructor
+
+const ZZ& mantissa() const;  // read the mantissa
+long exponent() const;  // read the exponent
+
+static void SetPrecision(long p);  
+// set current precision to max(p, 53) bits.
+// The default is 150
+
+static long precision();  // read current value of precision
+
+static void SetOutputPrecision(long p);  
+// set the number of output decimal digits to max(p, 1).
+// The default is 10
+
+static long OutputPrecision();
+// read the current number of output decimal digits
+
+
+};
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+
+// standard comparison operators:
+
+long operator==(const RR& a, const RR& b);
+long operator!=(const RR& a, const RR& b);
+long operator<=(const RR& a, const RR& b);
+long operator>=(const RR& a, const RR& b);
+long operator <(const RR& a, const RR& b);
+long operator >(const RR& a, const RR& b);
+
+
+long IsZero(const RR& a); // test if 0
+long IsOne(const RR& a); // test if 1
+
+long sign(const RR& a);  // returns sign of a (+1, -1, 0)
+long compare(const RR& a, const RR& b); // returns sign(a-b);
+
+// PROMOTIONS: operators ==, ..., > and function compare
+// promote double to RR on (a, b).
+
+
+
+/**************************************************************************\
+
+                                  Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+RR operator+(const RR& a, const RR& b);
+RR operator-(const RR& a, const RR& b);
+RR operator-(const RR& a); // unary -
+
+RR& operator+=(RR& x, const RR& a);
+RR& operator+=(RR& x, double a);
+
+RR& operator-=(RR& x, const RR& a);
+RR& operator-=(RR& x, double a);
+
+RR& operator++(RR& x);  // prefix
+void operator++(RR& x, int);  // postfix
+
+RR& operator--(RR& x);  // prefix
+void operator--(RR& x, int);  // postfix
+
+
+// procedural versions:
+
+void add(RR& z, const RR& a, const RR& b); // z = a+b
+void sub(RR& z, const RR& a, const RR& b); // z = a-b
+void negate(RR& z, const RR& a); // z = -a
+
+// PROMOTIONS: operators +, -, and procedures add, sub promote double
+// to RR on (a, b).
+
+void abs(RR& z, const RR& a); // z = |a|
+RR fabs(const RR& a);  
+RR abs(const RR& a);
+
+
+/**************************************************************************\
+
+                                  Multiplication
+
+\**************************************************************************/
+
+
+// operator notation:
+
+RR operator*(const RR& a, const RR& b);
+
+RR& operator*=(RR& x, const RR& a);
+RR& operator*=(RR& x, double a);
+
+// procedural versions:
+
+
+void mul(RR& z, const RR& a, const RR& b); // z = a*b
+
+void sqr(RR& z, const RR& a); // z = a * a
+RR sqr(const RR& a);
+
+// PROMOTIONS: operator * and procedure mul promote double to RR on (a, b).
+
+
+/**************************************************************************\
+
+                               Division
+
+\**************************************************************************/
+
+
+// operator notation:
+
+RR operator/(const RR& a, const RR& b);
+
+RR& operator/=(RR& x, const RR& a);
+RR& operator/=(RR& x, double a);
+
+
+// procedural versions:
+
+
+void div(RR& z, const RR& a, const RR& b); z = a/b
+
+void inv(RR& z, const RR& a); // z = 1 / a
+RR inv(const RR& a);
+
+// PROMOTIONS: operator / and procedure div promote double to RR on (a, b).
+
+
+
+/**************************************************************************\
+
+                       Transcendental functions
+
+\**************************************************************************/
+
+
+void exp(RR& res, const RR& x);  // e^x
+RR exp(const RR& x);
+
+void log(RR& res, const RR& x); // log(x) (natural log)
+RR log(const RR& x);
+
+void log10(RR& res, const RR& x); // log(x)/log(10)
+RR log10(const RR& x);
+
+void expm1(RR& res, const RR&  x);
+RR expm1(const RR& x);
+// e^(x)-1; more accurate than exp(x)-1 when |x| is small
+
+void log1p(RR& res, const RR& x);
+RR log1p(const RR& x);
+// log(1 + x); more accurate than log(1 + x) when |x| is small
+
+void pow(RR& res, const RR& x, const RR& y);  // x^y
+RR pow(const RR& x, const RR& y);
+
+void sin(RR& res, const RR& x);  // sin(x); restriction: |x| < 2^1000
+RR sin(const RR& x);
+
+void cos(RR& res, const RR& x);  // cos(x); restriction: |x| < 2^1000
+RR cos(const RR& x);
+
+void ComputePi(RR& pi); // approximate pi to current precision
+RR ComputePi_RR();
+
+
+/**************************************************************************\
+
+                         Rounding to integer values        
+
+\**************************************************************************/
+
+
+/*** RR output ***/
+
+void trunc(RR& z, const RR& a);  // z = a, truncated to 0
+RR trunc(const RR& a);
+
+void floor(RR& z, const RR& a);  // z = a, truncated to -infinity
+RR floor(const RR& a);
+
+void ceil(RR& z, const RR& a);   // z = a, truncated to +infinity
+RR ceil(const RR& a);
+
+void round(RR& z, const RR& a);   // z = a, truncated to nearest integer
+RR round(const RR& a);            // ties are rounded to an even integer
+
+
+
+/*** ZZ output ***/
+
+void TruncToZZ(ZZ& z, const RR& a);  // z = a, truncated to 0
+ZZ TruncToZZ(const RR& a);
+
+void FloorToZZ(ZZ& z, const RR& a);  // z = a, truncated to -infinity
+ZZ FloorToZZ(const RR& a);           // same as RR to ZZ conversion
+
+void CeilToZZ(ZZ& z, const RR& a);   // z = a, truncated to +infinity
+ZZ CeilToZZ(const ZZ& a);
+
+void RoundToZZ(ZZ& z, const RR& a);   // z = a, truncated to nearest integer
+ZZ RoundToZZ(const RR& a);            // ties are rounded to an even integer
+
+
+
+ +
+/**************************************************************************\
+
+                 Saving and restoring the current precision
+
+\**************************************************************************/
+
+
+class RRPush {
+public:
+   RRPush();  // saves the cuurent precision
+   ~RRPush(); // restores the saved precision
+
+private:
+   RRPush(const RRPush&); // disable
+   void operator=(const RRPush&); // disable
+};
+
+
+// Example:
+//
+// {
+//    RRPush push;  // don't forget to declare a variable!!
+//    RR::SetPrecsion(new_p);
+//    ...
+// } // old precsion restored when scope is exited
+
+
+class RROutputPush {
+public:
+   RROutputPush();   // saves the cuurent output precision
+   ~RROutputPush();  // restores the saved output precision
+
+private:
+   RROutputPush(const RROutputPush&); // disable
+   void operator=(const RROutputPush&); // disable
+};
+
+
+// Example:
+//
+// {
+//    RROutputPush push;  // don't forget to declare a variable!!
+//    RR::SetOutputPrecsion(new_op);
+//    ...
+// } // old output precsion restored when scope is exited
+
+
+
+
+/**************************************************************************\
+
+                                 Miscelaneous
+
+\**************************************************************************/
+
+
+void MakeRR(RR& z, const ZZ& a,  long e);
+RR MakeRR(const ZZ& a,  long e);
+// z = a*2^e, rounded to current precision
+
+void random(RR& z);
+RR random_RR();
+// z = pseudo-random number in the range [0,1).
+// Note that the behaviour of this function is somewhat platform
+// dependent, because the underlying pseudo-ramdom generator is.
+
+
+void SqrRoot(RR& z, const RR& a); // z = sqrt(a);
+RR SqrRoot(const RR& a);
+RR sqrt(const RR& a);
+
+void power(RR& z, const RR& a, long e); // z = a^e, e may be negative
+RR power(const RR& a, long e);
+
+void power2(RR& z, long e); // z = 2^e, e may be negative
+RR power2_RR(long e);
+
+
+void clear(RR& z);  // z = 0
+void set(RR& z);  // z = 1
+
+void RR::swap(RR& a);
+void swap(RR& a, RR& b);  
+// swap (pointer swap)
+
+
+
+/**************************************************************************\
+
+                               Input/Output
+Input Syntax:
+
+<number>: [ "-" ] <unsigned-number>
+<unsigned-number>: <dotted-number> [ <e-part> ] | <e-part>
+<dotted-number>: <digits> | <digits> "." <digits> | "." <digits> | <digits> "."
+<digits>: <digit> <digits> | <digit>
+<digit>: "0" | ... | "9"
+<e-part>: ( "E" | "e" ) [ "+" | "-" ] <digits>
+
+Examples of valid input:
+
+17 1.5 0.5 .5 5.  -.5  e10 e-10 e+10 1.5e10 .5e10 .5E10
+
+Note that the number of decimal digits of precision that are used
+for output can be set to any number p >= 1 by calling
+the routine RR::SetOutputPrecision(p).  The default value of p is 10.
+The current value of p is returned by a call to RR::OutputPrecision().
+
+
+\**************************************************************************/
+
+
+
+ostream& operator<<(ostream& s, const RR& a);
+istream& operator>>(istream& s, RR& x);
+
+/**************************************************************************\
+
+
+            Specialized routines with explicit precision parameter
+
+These routines take an explicit precision parameter p.  The value of p may be
+any positive integer.  All results are computed to *precisely* p bits of
+precision, regardless of the current precision (as set by RR::SetPrecision).
+
+These routines are provided both for convenience and for situations where the
+computation must be done with a precision that may be less than 53.
+
+
+\**************************************************************************/
+
+
+
+
+void AddPrec(RR& z, const RR& a, const RR& b, long p); // z = a + b
+RR AddPrec(const RR& a, const RR& b, long p);
+
+void SubPrec(RR& z, const RR& a, const RR& b, long p); // z = a - b
+RR SubPrec(const RR& a, const RR& b, long p);
+
+void NegatePrec(RR& z, const RR& a, long p); // z = -a
+RR NegatePrec(const RR& a, long p);
+
+void AbsPrec(RR& z, const RR& a, long p); // z = |a|
+RR AbsPrec(const RR& a, long p);
+
+void MulPrec(RR& z, const RR& a, const RR& b, long p); // z = a*b
+RR MulPrec(const RR& a, const RR& b, long p);
+
+void SqrPrec(RR& z, const RR& a, long p); // z = a*a
+RR SqrPrec(const RR& a, long p);
+
+void DivPrec(RR& z, const RR& a, const RR& b, long p);  // z = a/b
+RR DivPrec(const RR& a, const RR& b, long p);
+
+void InvPrec(RR& z, const RR& a, long p);  // z = 1/a
+RR DivPrec(const RR& a, long p);
+
+void SqrRootPrec(RR& z, const RR& a, long p); // z = sqrt(a)
+RR SqrRootPrec(const RR& a, long p);
+
+void TruncPrec(RR& z, const RR& a, long p); // z = a, truncated to 0
+RR TruncPrec(const RR& a, long p);
+
+void FloorPrec(RR& z, const RR& a, long p); // z = a, truncated to -infinity
+RR FloorPrec(const RR& a, long p);
+
+void CeilPrec(RR& z, const RR& a, long p);  // z = a, truncated to +infinity
+RR CeilPrec(const RR& a, long p);
+
+void RoundPrec(RR& z, const RR& a, long p); // z = a,
+                                            // truncated to nearest integer,
+                                            // ties are roundec to an even
+                                            // integer
+RR RoundPrec(const RR& a, long p);
+
+void ConvPrec(RR& z, const RR& a, long p); // z = a
+RR ConvPrec(const RR& a, long p);
+
+void ConvPrec(RR& z, const ZZ& a, long p); // z = a
+RR ConvPrec(const ZZ& a, long p);
+
+void ConvPrec(RR& z, long a, long p); // z = a
+RR ConvPrec(long a, long p);
+
+void ConvPrec(RR& z, int a, long p); // z = a
+RR ConvPrec(int a, long p);
+
+void ConvPrec(RR& z, unsigned long a, long p); // z = a
+RR ConvPrec(unsigned long a, long p);
+
+void ConvPrec(RR& z, unsigned int a, long p); // z = a
+RR ConvPrec(unsigned int a, long p);
+
+void ConvPrec(RR& z, double a, long p); // z = a
+RR ConvPrec(double a, long p);
+
+void ConvPrec(RR& z, const xdouble& a, long p); // z = a
+RR ConvPrec(const xdouble& a, long p);
+
+void ConvPrec(RR& z, const quad_float& a, long p); // z = a
+RR ConvPrec(const quad_float& a, long p);
+
+void ConvPrec(RR& z, const char *s, long p); // read z from s
+RR ConvPrec(const char *s, long p);
+
+istream& InputPrec(RR& z, istream& s, long p);  // read z from s
+RR InputPrec(istream& s, long p);
+// The functional variant raises an error if input
+// is missing or ill-formed, while procedural form
+// does not.
+
+void MakeRRPrec(RR& z, const ZZ& a, long e, long p); // z = a*2^e
+RR MakeRRPrec(const ZZ& a, long e, long p);
+
+
+/**************************************************************************\
+
+COMPATABILITY NOTES:
+
+ (1)  Prior to version 5.3, the documentation indicated that under certain
+      circumstances, the value of the current precision could be directly set
+      by setting the variable RR::prec.  Such usage is now considered
+      obsolete.  To perform computations using a precision of less than 53
+      bits, users should use the specialized routines AddPrec, SubPrec, etc.,
+      documented above.
+
+ (2)  The routine RoundToPrecision is obsolete, although for backward
+      compatability, it is still declared (in both procedural and function
+      forms), and is equivalent to ConvPrec.
+
+ (3)  In versions 2.0 and earlier, the assignment operator and copy
+      constructor for the class RR rounded their outputs to the current
+      precision.  This is no longer the case:  their outputs are now exact
+      copies of their inputs, regardless of the current precision.
+
+\**************************************************************************/
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/RR.txt b/thirdparty/linux/ntl/doc/RR.txt new file mode 100644 index 0000000000..5a61a83530 --- /dev/null +++ b/thirdparty/linux/ntl/doc/RR.txt @@ -0,0 +1,596 @@ + +/**************************************************************************\ + +MODULE: RR + +SUMMARY: + +The class RR is used to represent arbitrary-precision floating point +numbers. + +The functions in this module guarantee very strong accuracy conditions +which make it easy to reason about the behavior of programs using +these functions. + +The arithmetic operations always round their results to p bits, where +p is the current precision. The current precision can be changed +using RR::SetPrecision(), and can be read using RR::precision(). + +The minimum precision that can be set is 53 bits. +The maximum precision is limited only by the word size of the machine. + +A convenience class RRPush is provided to automatically save and +restore the current precision. + +All arithmetic operations are implemented so that the effect is as if the +result was computed exactly, and then rounded to p bits. If a number +lies exactly half-way between two p-bit numbers, the "round to even" +rule is used. So in particular, the computed result will have a relative error +of at most 2^{-p}. + + +The above rounding rules apply to all arithmetic operations in this +module, except for the following routines: + +* The transcendental functions: + log, exp, log10, expm1, log1p, pow, sin, cos, ComputePi + +* The power function + +* The input and ascii to RR conversion functions when using "e"-notation + +For these functions, a very strong accuracy condition is still +guaranteed: the computed result has a relative error of less than 2^{-p + 1} +(and actually much closer to 2^{-p}). +That is, it is as if the resulted were computed exactly, and then +rounded to one of the two neighboring p-bit numbers (but not necessarily +the closest). + +The behavior of all functions in this module is completely platform +independent: you should get *exactly* the same results on any platform +(the only exception to this rule is the random number generator). + +Note that because precision is variable, a number may be computed with +to a high precision p', and then be used as input to an arithmetic operation +when the current precision is p < p'. +The above accuracy guarantees still apply; in particular, +no rounding is done until *after* the operation is performed. + +EXAMPLE: If x and y are computed to 200 bits of precision, +and then the precision is set to 100 bits, then x-y will +be computed correctly to 100 bits, even if, say, x and y agree +in their high-order 50 bits. If x and y had been rounded to +100 bits before the subtraction, then the difference would +only be accurate to 50 bits of precision. + +Note that the assignment operator and the copy constructor +produce *exact* copies of their inputs---they are *never* rounded. +This is a change in semantics from versions 2.0 and earlier +in which assignment and copy rounded their outputs. +This was deemed a design error and has been changed. + +If you want to force rounding to current precision, the easiest +way to do this is with the RR to RR conversion routines: + conv(x, a); +or + x = to_RR(a); +This will round a to current precision and store the result in x. +Note that writing + x = a + 0; +or + x = a*1; +also has the same effect. + +Unlike IEEE standard floating point, there are no "special values", +like "infinity" or "not a number", nor are there any "denormalized +numbers". Overflow, underflow, or taking a square root of a negative +number all result in an error being raised. + +An RR is represented as a mantissa/exponent pair (x, e), where x is a +ZZ and e is a long. The real number represented by (x, e) is x * 2^e. +Zero is always represented as (0, 0). For all other numbers, x is +always odd. + + +CONVERSIONS AND PROMOTIONS: +The complete set of conversion routines between RR and other types is +documented in the file "conversions.txt". Conversion from any type +to RR always rounds the result to the current precision. + +The basic operations also support the notion of "promotions", +so that they promote a double to an RR. For example, one can write + x = y + 1.5; +where x and y are RR's. One should be aware that these promotions are +always implemented using the double to RR conversion routine. + + +SIZE INVARIANT: max(NumBits(x), |e|) < 2^(NTL_BITS_PER_LONG-4) + +\**************************************************************************/ + + + + +#include +#include +#include + +class RR { + +public: + +RR(); // = 0 + +RR(const RR& a); // copy constructor + + +explicit RR(double a); // promotion constructor + +RR& operator=(const RR& a); // assignment operator + +// NOTE: the copy constructor and assignment operator +// produce exact copies of their inputs, and do not round +// to current precision. + +RR& operator=(double a); // convert and assign + +~RR(); // destructor + +const ZZ& mantissa() const; // read the mantissa +long exponent() const; // read the exponent + +static void SetPrecision(long p); +// set current precision to max(p, 53) bits. +// The default is 150 + +static long precision(); // read current value of precision + +static void SetOutputPrecision(long p); +// set the number of output decimal digits to max(p, 1). +// The default is 10 + +static long OutputPrecision(); +// read the current number of output decimal digits + + +}; + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + + +// standard comparison operators: + +long operator==(const RR& a, const RR& b); +long operator!=(const RR& a, const RR& b); +long operator<=(const RR& a, const RR& b); +long operator>=(const RR& a, const RR& b); +long operator <(const RR& a, const RR& b); +long operator >(const RR& a, const RR& b); + + +long IsZero(const RR& a); // test if 0 +long IsOne(const RR& a); // test if 1 + +long sign(const RR& a); // returns sign of a (+1, -1, 0) +long compare(const RR& a, const RR& b); // returns sign(a-b); + +// PROMOTIONS: operators ==, ..., > and function compare +// promote double to RR on (a, b). + + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +RR operator+(const RR& a, const RR& b); +RR operator-(const RR& a, const RR& b); +RR operator-(const RR& a); // unary - + +RR& operator+=(RR& x, const RR& a); +RR& operator+=(RR& x, double a); + +RR& operator-=(RR& x, const RR& a); +RR& operator-=(RR& x, double a); + +RR& operator++(RR& x); // prefix +void operator++(RR& x, int); // postfix + +RR& operator--(RR& x); // prefix +void operator--(RR& x, int); // postfix + + +// procedural versions: + +void add(RR& z, const RR& a, const RR& b); // z = a+b +void sub(RR& z, const RR& a, const RR& b); // z = a-b +void negate(RR& z, const RR& a); // z = -a + +// PROMOTIONS: operators +, -, and procedures add, sub promote double +// to RR on (a, b). + +void abs(RR& z, const RR& a); // z = |a| +RR fabs(const RR& a); +RR abs(const RR& a); + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + + +// operator notation: + +RR operator*(const RR& a, const RR& b); + +RR& operator*=(RR& x, const RR& a); +RR& operator*=(RR& x, double a); + +// procedural versions: + + +void mul(RR& z, const RR& a, const RR& b); // z = a*b + +void sqr(RR& z, const RR& a); // z = a * a +RR sqr(const RR& a); + +// PROMOTIONS: operator * and procedure mul promote double to RR on (a, b). + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + + +// operator notation: + +RR operator/(const RR& a, const RR& b); + +RR& operator/=(RR& x, const RR& a); +RR& operator/=(RR& x, double a); + + +// procedural versions: + + +void div(RR& z, const RR& a, const RR& b); z = a/b + +void inv(RR& z, const RR& a); // z = 1 / a +RR inv(const RR& a); + +// PROMOTIONS: operator / and procedure div promote double to RR on (a, b). + + + +/**************************************************************************\ + + Transcendental functions + +\**************************************************************************/ + + +void exp(RR& res, const RR& x); // e^x +RR exp(const RR& x); + +void log(RR& res, const RR& x); // log(x) (natural log) +RR log(const RR& x); + +void log10(RR& res, const RR& x); // log(x)/log(10) +RR log10(const RR& x); + +void expm1(RR& res, const RR& x); +RR expm1(const RR& x); +// e^(x)-1; more accurate than exp(x)-1 when |x| is small + +void log1p(RR& res, const RR& x); +RR log1p(const RR& x); +// log(1 + x); more accurate than log(1 + x) when |x| is small + +void pow(RR& res, const RR& x, const RR& y); // x^y +RR pow(const RR& x, const RR& y); + +void sin(RR& res, const RR& x); // sin(x); restriction: |x| < 2^1000 +RR sin(const RR& x); + +void cos(RR& res, const RR& x); // cos(x); restriction: |x| < 2^1000 +RR cos(const RR& x); + +void ComputePi(RR& pi); // approximate pi to current precision +RR ComputePi_RR(); + + +/**************************************************************************\ + + Rounding to integer values + +\**************************************************************************/ + + +/*** RR output ***/ + +void trunc(RR& z, const RR& a); // z = a, truncated to 0 +RR trunc(const RR& a); + +void floor(RR& z, const RR& a); // z = a, truncated to -infinity +RR floor(const RR& a); + +void ceil(RR& z, const RR& a); // z = a, truncated to +infinity +RR ceil(const RR& a); + +void round(RR& z, const RR& a); // z = a, truncated to nearest integer +RR round(const RR& a); // ties are rounded to an even integer + + + +/*** ZZ output ***/ + +void TruncToZZ(ZZ& z, const RR& a); // z = a, truncated to 0 +ZZ TruncToZZ(const RR& a); + +void FloorToZZ(ZZ& z, const RR& a); // z = a, truncated to -infinity +ZZ FloorToZZ(const RR& a); // same as RR to ZZ conversion + +void CeilToZZ(ZZ& z, const RR& a); // z = a, truncated to +infinity +ZZ CeilToZZ(const ZZ& a); + +void RoundToZZ(ZZ& z, const RR& a); // z = a, truncated to nearest integer +ZZ RoundToZZ(const RR& a); // ties are rounded to an even integer + + + +// @anchor{push} + +/**************************************************************************\ + + Saving and restoring the current precision + +\**************************************************************************/ + + +class RRPush { +public: + RRPush(); // saves the cuurent precision + ~RRPush(); // restores the saved precision + +private: + RRPush(const RRPush&); // disable + void operator=(const RRPush&); // disable +}; + + +// Example: +// +// { +// RRPush push; // don't forget to declare a variable!! +// RR::SetPrecsion(new_p); +// ... +// } // old precsion restored when scope is exited + + +class RROutputPush { +public: + RROutputPush(); // saves the cuurent output precision + ~RROutputPush(); // restores the saved output precision + +private: + RROutputPush(const RROutputPush&); // disable + void operator=(const RROutputPush&); // disable +}; + + +// Example: +// +// { +// RROutputPush push; // don't forget to declare a variable!! +// RR::SetOutputPrecsion(new_op); +// ... +// } // old output precsion restored when scope is exited + + + + +/**************************************************************************\ + + Miscelaneous + +\**************************************************************************/ + + +void MakeRR(RR& z, const ZZ& a, long e); +RR MakeRR(const ZZ& a, long e); +// z = a*2^e, rounded to current precision + +void random(RR& z); +RR random_RR(); +// z = pseudo-random number in the range [0,1). +// Note that the behaviour of this function is somewhat platform +// dependent, because the underlying pseudo-ramdom generator is. + + +void SqrRoot(RR& z, const RR& a); // z = sqrt(a); +RR SqrRoot(const RR& a); +RR sqrt(const RR& a); + +void power(RR& z, const RR& a, long e); // z = a^e, e may be negative +RR power(const RR& a, long e); + +void power2(RR& z, long e); // z = 2^e, e may be negative +RR power2_RR(long e); + + +void clear(RR& z); // z = 0 +void set(RR& z); // z = 1 + +void RR::swap(RR& a); +void swap(RR& a, RR& b); +// swap (pointer swap) + + + +/**************************************************************************\ + + Input/Output +Input Syntax: + +: [ "-" ] +: [ ] | +: | "." | "." | "." +: | +: "0" | ... | "9" +: ( "E" | "e" ) [ "+" | "-" ] + +Examples of valid input: + +17 1.5 0.5 .5 5. -.5 e10 e-10 e+10 1.5e10 .5e10 .5E10 + +Note that the number of decimal digits of precision that are used +for output can be set to any number p >= 1 by calling +the routine RR::SetOutputPrecision(p). The default value of p is 10. +The current value of p is returned by a call to RR::OutputPrecision(). + + +\**************************************************************************/ + + + +ostream& operator<<(ostream& s, const RR& a); +istream& operator>>(istream& s, RR& x); + +/**************************************************************************\ + + + Specialized routines with explicit precision parameter + +These routines take an explicit precision parameter p. The value of p may be +any positive integer. All results are computed to *precisely* p bits of +precision, regardless of the current precision (as set by RR::SetPrecision). + +These routines are provided both for convenience and for situations where the +computation must be done with a precision that may be less than 53. + + +\**************************************************************************/ + + + + +void AddPrec(RR& z, const RR& a, const RR& b, long p); // z = a + b +RR AddPrec(const RR& a, const RR& b, long p); + +void SubPrec(RR& z, const RR& a, const RR& b, long p); // z = a - b +RR SubPrec(const RR& a, const RR& b, long p); + +void NegatePrec(RR& z, const RR& a, long p); // z = -a +RR NegatePrec(const RR& a, long p); + +void AbsPrec(RR& z, const RR& a, long p); // z = |a| +RR AbsPrec(const RR& a, long p); + +void MulPrec(RR& z, const RR& a, const RR& b, long p); // z = a*b +RR MulPrec(const RR& a, const RR& b, long p); + +void SqrPrec(RR& z, const RR& a, long p); // z = a*a +RR SqrPrec(const RR& a, long p); + +void DivPrec(RR& z, const RR& a, const RR& b, long p); // z = a/b +RR DivPrec(const RR& a, const RR& b, long p); + +void InvPrec(RR& z, const RR& a, long p); // z = 1/a +RR DivPrec(const RR& a, long p); + +void SqrRootPrec(RR& z, const RR& a, long p); // z = sqrt(a) +RR SqrRootPrec(const RR& a, long p); + +void TruncPrec(RR& z, const RR& a, long p); // z = a, truncated to 0 +RR TruncPrec(const RR& a, long p); + +void FloorPrec(RR& z, const RR& a, long p); // z = a, truncated to -infinity +RR FloorPrec(const RR& a, long p); + +void CeilPrec(RR& z, const RR& a, long p); // z = a, truncated to +infinity +RR CeilPrec(const RR& a, long p); + +void RoundPrec(RR& z, const RR& a, long p); // z = a, + // truncated to nearest integer, + // ties are roundec to an even + // integer +RR RoundPrec(const RR& a, long p); + +void ConvPrec(RR& z, const RR& a, long p); // z = a +RR ConvPrec(const RR& a, long p); + +void ConvPrec(RR& z, const ZZ& a, long p); // z = a +RR ConvPrec(const ZZ& a, long p); + +void ConvPrec(RR& z, long a, long p); // z = a +RR ConvPrec(long a, long p); + +void ConvPrec(RR& z, int a, long p); // z = a +RR ConvPrec(int a, long p); + +void ConvPrec(RR& z, unsigned long a, long p); // z = a +RR ConvPrec(unsigned long a, long p); + +void ConvPrec(RR& z, unsigned int a, long p); // z = a +RR ConvPrec(unsigned int a, long p); + +void ConvPrec(RR& z, double a, long p); // z = a +RR ConvPrec(double a, long p); + +void ConvPrec(RR& z, const xdouble& a, long p); // z = a +RR ConvPrec(const xdouble& a, long p); + +void ConvPrec(RR& z, const quad_float& a, long p); // z = a +RR ConvPrec(const quad_float& a, long p); + +void ConvPrec(RR& z, const char *s, long p); // read z from s +RR ConvPrec(const char *s, long p); + +istream& InputPrec(RR& z, istream& s, long p); // read z from s +RR InputPrec(istream& s, long p); +// The functional variant raises an error if input +// is missing or ill-formed, while procedural form +// does not. + +void MakeRRPrec(RR& z, const ZZ& a, long e, long p); // z = a*2^e +RR MakeRRPrec(const ZZ& a, long e, long p); + + +/**************************************************************************\ + +COMPATABILITY NOTES: + + (1) Prior to version 5.3, the documentation indicated that under certain + circumstances, the value of the current precision could be directly set + by setting the variable RR::prec. Such usage is now considered + obsolete. To perform computations using a precision of less than 53 + bits, users should use the specialized routines AddPrec, SubPrec, etc., + documented above. + + (2) The routine RoundToPrecision is obsolete, although for backward + compatability, it is still declared (in both procedural and function + forms), and is equivalent to ConvPrec. + + (3) In versions 2.0 and earlier, the assignment operator and copy + constructor for the class RR rounded their outputs to the current + precision. This is no longer the case: their outputs are now exact + copies of their inputs, regardless of the current precision. + +\**************************************************************************/ + + diff --git a/thirdparty/linux/ntl/doc/SmartPtr.cpp.html b/thirdparty/linux/ntl/doc/SmartPtr.cpp.html new file mode 100644 index 0000000000..e4cd29e509 --- /dev/null +++ b/thirdparty/linux/ntl/doc/SmartPtr.cpp.html @@ -0,0 +1,957 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/SmartPtr.cpp.html + + + + +
+
+/****************************************************************************
+
+SmartPtr: a smart pointer class.
+
+Synopsis: provides a reference counted smart pointer, similar to shared_ptr
+in the standard library.  It is provided here to minimize reliance
+on the standard library, especially for older C++ compilers, which may
+not provide shared_ptr, or it may be in TR1, which gets messy.
+
+
+Examples:
+
+
+  SmartPtr<T> p1;         // initialize to null
+  SmartPtr<T> p1(0);
+
+  SmartPtr<T> p2 = 0;     // 0/nullptr implicitly converts to SmartPtr<T>
+
+  SmartPtr<T> p3(p1);     // copy constructor
+
+  T *rp;
+  SmartPtr<T> p4(rp);     // construct using raw pointer (explicit): better
+                          // to use MakeSmart below
+
+  p1 = MakeSmart<T>(...); // build new T object by invoking constructor
+                          // T(...) with pseudo-variadic templates.
+                          // This is safer and more efficient that
+                          // using the raw-pointer constructor
+                        
+  p1 = p2;                // assignment
+  p1 = 0;                 // assign null
+
+
+  if (!p1) ...            //  test for null
+  if (p1 == 0) ...
+
+  if (p1) ...             // test for not null ...
+  if (p1 != 0) ...
+
+  if (p1 == p2) ...       // test for equality
+  if (p1 != p2)
+
+  *p1                     // dereferencing
+  p1->...
+
+  p1.get();               // return the underlying raw pointer...dangerous!
+
+  p1.swap(p2);            // fast swap
+  swap(p1, p2);
+
+
+Automatic Conversions:
+
+If S is another class, SmartPtr<S> converts to SmartPtr<T> if S* converts to T*
+(for example, if S is a subclass of T).  Similarly, SmartPtr<S> and SmartPtr<T>
+may be compared if S* and T* may be compared.
+
+0/nullptr automatically converts to SmartPtr<T>.
+
+MakeSmart:
+
+One can write SmartPtr<T> p = MakeSmart<T>(x1, ..., xn), and this will create a
+smart pointer to an object constructed as T(x1, ..., xn).  Besides notational
+convenience, it also reduces the number of memory allocations from 2 to 1, as
+the data and control block can be allocated in one chunck of memory.
+
+This is implemented without reliance on C++11 features, which means that there
+are limitations.  First, the number n of arguments is limited to 9.  And
+second, all arguments are pass by const reference. However, you can work around
+this by using the helper function Fwd.  For example, if T has a 2-argument
+constructor where the second must be a non-const reference of some type, and x2
+is a variable of that type, you can write MakeSmart<T>(x1, Fwd(x2)), to forward
+that reference through all the template nonsense in a typesafe manner.
+
+MakeRaw:
+
+One can also write T *p = MakeRaw<T>(x1, ..., xn) to create a
+raw pointer.  This is the same as writing T *p = new T(x1, ..., xn),
+except that error handling is determined by the NTL_EXCEPTION
+flag (on => bad_alloc exception is thrown, off => error message
+and abort).
+
+MakeRawArray:
+
+Another utility routine: one can write T *p = MakeRawArray<T>(n)
+to make a plain array of n T objects.  Error handling is the same
+as for MakeRaw.
+
+Dynamic casting:
+
+I've also supplied a dynamic cast operation for smart pointers.
+
+   SmartPtr<Derived> d = MakeSmart<Derived>(); // d points to Derived
+   SmartPtr<Base> b = d; // implicit upcast: OK
+
+   SmartPtr<Derived> d1 = DynamicCast<Derived>(b);
+      // downcast to a Derived object -- returns null for a bad cast
+
+DeleterPolicy:
+
+Normally, when the object pointed to a SmartPtr needs to be destroyed, this is
+done by invoking delete on the raw pointer.  The user can override this
+behavior by specifying a "deleter policy", which is a class P that defines a
+static member function deleter, which is invoked as P::deleter(p).  Such a
+policy can be attached to a SmartPtr using a specialized constructor (see
+below).
+
+A deleter policy can be useful, for example, in realizing the PIPL
+pattern, where the class T's definition is not visible.  The specified deleter
+can invoke a free-standing function that itself invokes delete.  A deleter
+policy can also be useful is memory is to be managed using some mechanism other
+than new/delete.
+
+
+Implementation notes:
+
+If NTL is compiled with the NTL_THREADS option, then the reference counting
+will be thread safe.
+
+The SmartPtrControl class heirarchy is used to make sure the right destructor
+is called when the ref count goes to zero.  This can be an issue for forward
+declared classes and for subclasses.  For example, if T is forward declared in
+a context where the ref count goes to zero, or if the object's actual type is a
+subclass of T and T's destructor was not declared virtual.  The implementation
+of SmartPtr guarantees correct behavior in these situations.
+
+The null tests p, !p, p == 0, are all effected via an implicit conversion from
+SmartPtr<T> to a funny pointer type (a pointer to a member function, which
+avoids other, unwanted implicit conversions: this is the so-called "safe bool
+idiom");
+
+Also, there is an implicit conversion from another funny pointer type to
+SmartPtr<T>, which is how the implicit conversion from 0/nullptr is achieved.
+
+In C++11 both of the above effects could perhaps be achieved more directly.
+The new "explict bool" operator can replace the "safe bool idiom", and
+the new nullptr_t type could be used to get the conversion from null to work.
+
+NOTES: See http://www.artima.com/cppsource/safebool.html for more on the "safe
+bool idiom".  
+
+
+*****************************************************************************/
+
+// The default "deleter policy"
+struct DefaultDeleterPolicy {
+
+   template<class T>
+   static void deleter(T *p) { delete p; }
+
+};
+
+// A tagging class, for better readability in invoking constructor.
+// Usage: SmartPtr<T> p(r, ChoosePolicy<MyDeleterPolicy>());
+template<class P>
+struct ChoosePolicy { };
+
+
+
+
+
+template<class T>
+class SmartPtr {
+public:
+public:
+   template<class Y> explicit SmartPtr(Y* p);
+   // construct smart pointer from raw pointer with deleter policy
+   // DefaultDeleterPolicy (so p should be allocated using new).
+  
+   // NOTE: Y* must convert to T*, but upon the original pointer is preserved
+   // so that when ref count drops to 0, the *original* object of type Y is destroyed.
+    
+   // EXCEPTIONS: a control block is dynamically allocated;
+   //    if this allocation fails, the object pointed to by p is destroyed
+   //    and a bad_alloc exception is thrown
+
+   template<class Y, class P> SmartPtr(Y* p, ChoosePolicy<P>);
+   // construct smart pointer from raw pointer with deleter policy P.
+
+   // NOTE: Y* must convert to T*, but upon the original pointer is preserved
+   // so that when ref count drops to 0, the *original* object of type Y is destroyed.
+
+   // EXCEPTIONS: a control block is dynamically allocated;
+   //    if this allocation fails, the object pointed to by p is destroyed
+   //    and a bad_alloc exception is thrown
+
+   SmartPtr();
+   // initial value null
+
+   SmartPtr(fake_null_type1);
+   // automatic conversion from 0/nullptr
+
+   ~SmartPtr();
+   // destructor
+
+   SmartPtr(const SmartPtr& other);
+   SmartPtr& operator=(const SmartPtr& other);
+   // copy and asignment
+
+   template<class Y> SmartPtr(const SmartPtr<Y>& other);
+   template<class Y> SmartPtr& operator=(const SmartPtr<Y>& other);
+   // copy and asignment
+
+   T& operator*()  const;
+   T* operator->() const;
+   // indirection
+
+   T* get() const;
+   // get underlying raw pointer
+
+   void swap(SmartPtr& other);
+
+   SmartPtr(fake_null_type);
+   // allows assignment and initialization from 0
+
+   operator fake_null_type() const;
+   // allows comparisons to 0
+
+   template<class Y> SmartPtr<Y> DynamicCast() const;
+};
+
+
+// free swap function
+template<class T> void swap(SmartPtr<T>& p, SmartPtr<T>& q);
+
+// free dynamic cast function
+template<class X, class Y> SmartPtr<X> DynamicCast(const SmartPtr<Y>& p);
+
+
+// Equality testing
+template<class X, class Y>
+bool operator==(const SmartPtr<X>& a, const SmartPtr<Y>& b);
+
+template<class X, class Y>
+bool operator!=(const SmartPtr<X>& a, const SmartPtr<Y>& b);
+
+// MakeSmart psuedo-variadic template
+template<class T, class X1, ..., class Xn>
+SmartPtr<T> MakeSmart(const X1& x1, ..., const Xn& xn);
+// EXCEPTIONS: may throw if dynamic constrction of T(x1, ..., xn) fails
+
+
+// EXCEPTIONS: unless otherwise specified, the methods above
+// never throw an exception (under C++11 rules, if a destructor
+// is invoked that throws an exception, the program will terminate).
+
+
+/****************************************************************************
+
+Experimantal: CloneablePtr<T> ...essentially same interface as SmartPtr, but
+allows cloning of complete objects.  The differences:
+*  must construct using MakeCloneable
+*  a clone method is provided
+*  implicit conversion from CloneablePtr to SmartPtr is allowed
+
+Example:
+
+   CloneablePtr<Derived> d = MakeCloneable<Derived>();
+   // d points to Derived
+
+   CloneablePtr<Base> b = d; // implicit upcast: OK
+
+   CloneablePtr<Base> b1 = b.clone();
+   // clone of b, which is really a Derived object
+
+   CloneablePtr<Derived> d1 = DynamicCast<Derived>(b1);
+   // downcast to a Derived object -- returns null for a bad cast
+
+   SmartPtr<Base> b2 = d1;
+  
+
+
+Implementation:
+
+In the clone method, the object is constructed using the copy constructor for
+the type T, where T is the compile-time type with which the first smart pointer
+to this object was was created, even if the pointer has been subsequently
+upcasted to a base type S.  Such objects must have been initially created using
+the MakeCloneable function.  It turns out, this is hard to do in a completely
+standards-compliant way, because of the type erasure going on.  So I settled on
+the current method, which does some low-level pointer arithmetic.  Even with
+fancy things like multiple and virtual inheritance, it should work, under the
+assumption that if two objects have the same (runtime) type, then their memory
+layout is the same.  I don't think anything like that is guaranteed by the
+standard, but this seems reasonable, and it seems to work.  Like I said, it is
+experimental, and I would appreciate feedback from C++ gurus.
+
+Note that NTL does not use this feature, but I do have applications where this
+is convenient.
+
+
+**********************************************************************************/
+
+
+template<class T>
+class CloneablePtr {
+public:
+   CloneablePtr();
+   // initial value null
+
+   ~CloneablePtr();
+   // if ref count drops to zero, then delete referent
+
+   CloneablePtr(const CloneablePtr& other);
+   CloneablePtr& operator=(const CloneablePtr& other);
+   // copy and asignment
+
+   template<class Y> CloneablePtr(const CloneablePtr<Y>& other);
+   template<class Y> CloneablePtr& operator=(const CloneablePtr<Y>& other);
+   // copy and asignment
+
+   T& operator*()  const;
+   T* operator->() const;
+   // indirection
+
+   T* get() const;
+   // get underlying raw pointer
+
+   void swap(CloneablePtr& other);
+
+   CloneablePtr(fake_null_type);
+   // allows assignment and initialization from 0
+
+   operator fake_null_type() const;
+   // allows comparisons to 0
+
+   template<class Y> CloneablePtr<Y> DynamicCast() const;
+
+   CloneablePtr clone() const;
+   // construct a clone, using the copy constructor
+   // EXCEPTIONS: may throw if copy construction fails
+
+
+   template<class Y> operator SmartPtr<Y>();
+   // implicit conversion from CloneablePtr<T> to SmartPtr<Y>,
+   // allowed if T* converts implicitly to Y*.
+};
+
+
+// free swap function
+template<class T> void swap(CloneablePtr<T>& p, CloneablePtr<T>& q);
+
+// free dynamic cast function
+template<class X, class Y> CloneablePtr<X> DynamicCast(const CloneablePtr<Y>& p);
+
+
+// Equality testing
+template<class X, class Y>
+bool operator==(const CloneablePtr<X>& a, const CloneablePtr<Y>& b);
+
+template<class X, class Y>
+bool operator!=(const CloneablePtr<X>& a, const CloneablePtr<Y>& b);
+
+// MakeCloneable psuedo-variadic template
+template<class T, class X1, ..., class Xn>
+CloneablePtr<T> MakeCloneable(const X1& x1, ..., const Xn& xn);
+// EXCEPTIONS: may throw if dynamic constrction of T(x1, ..., xn) fails
+
+
+// EXCEPTIONS: unless otherwise specified, the methods above
+// never throw an exception (under C++11 rules, if a destructor
+// is invoked that throws an exception, the program will terminate).
+
+
+
+
+
+
+/**********************************************************************
+
+UniquePtr<T> -- unique pointer to object with copying disabled.
+Useful for pointers inside classes so that we can
+automatically destruct them.  
+
+Constructors:
+   UniquePtr<T> p1;     // initialize with null
+   UniquePtr<T> p1(0);
+
+   T* rp;
+   UniquePtr<T> p1(rp); // construct using raw pointer (explicit)
+
+   p1 = 0;              // destroy's p1's referent and assigns null
+
+   p1.make(...);        // destroy's p1's referent and assigns
+                        // a fresh objected constructed via T(...),
+                        // using psuedo-variadic templates
+                
+   p1.reset(rp);        // destroy's p1's referent and assign rp
+
+   if (!p1) ...         // test for null
+   if (p1 == 0) ...
+
+   if (p1) ...          // test for nonnull
+   if (p1 != 0) ...
+
+   if (p1 == p2) ...    // test for equality
+   if (p1 != p2) ...  
+
+   *p1                  // dereferencing
+   p1->...
+
+
+   rp = p1.get();       // fetch raw pointer
+   rp = p1.release();   // fetch raw pointer, and set to null
+
+   p1.move(p2);         // move p2 to p1, destroying p1's referent
+                        //   if p1 != p2
+
+   p1.swap(p2);         // swap pointers
+   swap(p1, p2);
+
+
+DeleterPolicy:
+
+UniquePtr supports a "deleter policy", analogous to that used in SmartPtr.
+
+Normally, when the object pointed to a UniquePtr needs to be destroyed, this is
+done by invoking delete on the raw pointer.  The user can override this
+behavior by specifying a "deleter policy", which is a class P that defines a
+static member function deleter, which is invoked as P::deleter(p).  
+
+Unlike with a SmartPtr, the deleter policy must be attached to the type.
+The default policy is the same DefaultDeleterPolicy, defined above.
+
+A deleter policy can be useful, for example, in realizing the PIPL
+pattern, where the class T's definition is not visible.  The specified deleter
+can invoke a free-standing function that itself invokes delete.  A deleter
+policy can also be useful is memory is to be managed using some mechanism other
+than new/delete.
+
+  
+**********************************************************************/
+
+
+template<class T, class P=DefaultDeleterPolicy>
+class UniquePtr {
+public:  
+   explicit UniquePtr(T *p);
+   // construct UniquePtr from raw pointer (allocated with new)
+
+   UniquePtr();
+   // initial value is null
+
+   UniquePtr& operator=(fake_null_type1);
+   // allows assignment of 0; equivalent to calling reset()
+
+   ~UniquePtr();
+   // destroys referent by calling P::deleter
+
+   void reset(T* p = 0);
+   // reset underlying pointer to p, destroying original referent
+   // by calling P::deleter
+
+   template<class T, class X1, ..., class Xn>
+   void make(const X1& x1, ..., const Xn& xn);
+   // pseudo-variadic template, roughly equivalent to
+   // reset(new T(x1, ..., xn))
+   // EXCEPTIONS: this may throw (but provides strong ES guarantee)
+
+   T& operator*()  const;
+   T* operator->() const;
+   // indirection
+
+   T* get() const;
+   // get raw pointer
+
+   T* release();
+   // returns raw pointer, and sets the raw pointer to null
+
+   void move(UniquePtr& other);
+   // move other to *this, destroying original referent
+   // by calling P::deleter
+
+   void swap(UniquePtr& other);
+   // swap raw pointers
+
+   operator fake_null_type() const;
+   // allows comparison with 0
+
+private:
+   UniquePtr(const UniquePtr&); // disabled
+   void operator=(const UniquePtr&); // disabled
+};
+
+
+// free swap function
+template<class T> void swap(UniquePtr<T>& p, UniquePtr<T>& q);
+
+
+
+// Equality testing
+
+template<class X, class P> bool operator==(const UniquePtr<X,P>& a, const UniquePtr<X,P>& b);
+template<class X, class P> bool operator!=(const UniquePtr<X,P>& a, const UniquePtr<X,P>& b);
+
+
+// EXCEPTIONS: unless otherwise specified, the methods above
+// never throw an exception (under C++11 rules, if a destructor
+// is invoked that throws an exception, the program will terminate).
+
+
+/**********************************************************************
+
+CopiedPtr<T> -- essentially the same interface and implemetation as UniquePtr,
+with the following exceptions:
+
+ * copy constructor is defined: by default, it will create a copy
+   of the referrent using T's copy constructor (but this bahavior
+   can be overridden -- see below)
+
+ * assignment operator is defined (and implemented in terms of the
+   copy constructor)
+
+ * The policy managing a CopiedPtr specifier deleter and copier functions:
+   the deleter is used to delete objects and the copies is used for making
+   copies (see below).
+
+NOTE: this class is meant to replace the OptionalVal class, whose
+interface is not so nice.  For backwards compatibility, OptionalVal will
+be maintained, however.
+  
+**********************************************************************/
+
+
+// This class specifies the default copier
+struct DefaultCopierPolicy {
+
+   template<class T>
+   static T* copier(T *p) { return (p ?  MakeRaw<T>(*p) : 0); }
+
+};
+
+// This class specifies an alternative copier, which is meant
+// to perform "deep" copies on class heirarchies that support an
+// appropriate clone() method.
+struct CloningCopier {
+
+   template<class T>
+   static T* copier(T *p) { return (p ?  p->clone() : 0); }
+
+};
+
+struct DefaultCopiedPtrPolicy : DefaultDeleterPolicy, DefaultCopierPolicy { };
+struct CloningCopiedPtrPolicy : DefaultDeleterPolicy, CloningCopier { };
+
+
+
+template<class T, class P=DefaultCopiedPtrPolicy>
+class CopiedPtr {
+public:  
+   explicit CopiedPtr(T *p);
+   // construct CopiedPtr from raw pointer (allocated with new)
+
+   CopiedPtr();
+   // initial value is null
+
+   CopiedPtr(const CopiedPtr& other);
+   // creates a copy of other's referent by calling P::copier,
+   // and destroys original referent by calling P::deleter
+
+   void operator=(const CopiedPtr&);
+
+   CopiedPtr& operator=(fake_null_type1);
+   // allows assignment of 0; equivalent to calling reset()
+
+   ~CopiedPtr();
+   // destroys referent by calling P::deleter
+
+   void reset(T* p = 0);
+   // reset underlying pointer to p, destroying original referent
+   // by calling P::deleter
+
+   template<class T, class X1, ..., class Xn>
+   void make(const X1& x1, ..., const Xn& xn);
+   // pseudo-variadic template, roughly equivalent to
+   // reset(new T(x1, ..., xn))
+   // EXCEPTIONS: this may throw (but provides strong ES guarantee)
+
+   T& operator*()  const;
+   T* operator->() const;
+   // indirection
+
+   T* get() const;
+   // get raw pointer
+
+   T* release();
+   // returns raw pointer, and sets the raw pointer to null
+
+   void move(CopiedPtr& other);
+   // move other to *this, destroying original referent
+   // by calling P::deleter
+
+
+   void swap(CopiedPtr& other);
+   // swap raw pointers
+
+   operator fake_null_type() const;
+   // allows comparison with 0
+
+};
+
+
+// free swap function
+template<class T> void swap(CopiedPtr<T>& p, CopiedPtr<T>& q);
+
+
+
+// Equality testing
+
+template<class X, class P> bool operator==(const CopiedPtr<X,P>& a, const CopiedPtr<X,P>& b);
+template<class X, class P> bool operator!=(const CopiedPtr<X,P>& a, const CopiedPtr<X,P>& b);
+
+
+// EXCEPTIONS: unless otherwise specified, the methods above
+// never throw an exception (under C++11 rules, if a destructor
+// is invoked that throws an exception, the program will terminate).
+
+
+
+
+/**********************************************************************
+
+UniqueArray<T> -- similar to UniquePtr, but for arrays.  These arrays cannot be
+resized -- for that, you should use the Vec class.
+
+Constructors:
+   UniqueArray<T> p1;     // initialize with null
+   UniqueArray<T> p1(0);
+
+   T* rp;
+   UniqueArray<T> p1(rp); // construct using raw pointer (explicit)
+
+   p1 = 0;              // destroy's p1's referent and assigns null
+
+   p1.SetLength(n);     // destroy's p1's referent and assigns
+                        // a fresh objected constructed via new T[n]
+                
+   p1.reset(rp);        // destroy's p1's referent and assign rp
+
+   if (!p1) ...         // test for null
+   if (p1 == 0) ...
+
+   if (p1) ...          // test for nonnull
+   if (p1 != 0) ...
+
+   if (p1 == p2) ...    // test for equality
+   if (p1 != p2) ...  
+
+   p1[i]                // array indexing
+
+   rp = p1.get();       // fetch raw pointer
+   rp = p1.release();   // fetch raw pointer, and set to null
+   p1.move(p2);         // move p2 to p1, destroying p1's referent
+                        //   if p1 != p2
+
+   p1.swap(p2);         // fast swap
+   swap(p1, p2);
+
+  
+**********************************************************************/
+
+
+template<class T>
+class UniqueArray {
+public:  
+   explicit UniqueArray(T *p);
+   // construct from raw pointer (allocated with new[])
+
+   UniqueArray();
+   // initially null
+
+   UniqueArray& operator=(fake_null_type1);
+   // allows of 0
+
+   ~UniqueArray();
+
+   void reset(T* p = 0);
+   // reset with raw pointer, destroying referent
+
+   void SetLength(long n);
+   // destroys referent and allocates an array of size n
+   // EXCEPTIONS: this may throw (but provides strong ES guarantee)
+
+   T& operator[](long i) const;
+   // accesses ith element in the array (currently no range checking)
+
+   T* get() const;
+   // get raw pointer
+
+   T* elts() const;
+   // get raw pointer (for compatibility with the Vec class)
+
+   T* release();
+   // get raw pointer and reset to null
+
+   void move(UniqueArray& other);
+   // move raw pointer
+
+   void swap(UniqueArray& other);
+   // swap raw pointer
+
+   operator fake_null_type() const;
+   // allows comparison to 0
+
+private:
+   UniqueArray(const UniqueArray&); // disabled
+   void operator=(const UniqueArray&); // disabled
+
+};
+
+
+
+// free swap function
+template<class T> void swap(UniqueArray<T>& p, UniqueArray<T>& q);
+
+
+
+// Equality testing
+
+template<class X> bool operator==(const UniqueArray<X>& a, const UniqueArray<X>& b);
+template<class X> bool operator!=(const UniqueArray<X>& a, const UniqueArray<X>& b);
+
+
+
+
+/**********************************************************************
+
+Unique2DArray<T> -- unique pointer to array of arrays.
+
+This is very similar to UniqueArray< UniqueArray<T> >, except that
+we can retrofit old code that accepts objects of type T**.
+
+Constructors:
+   Unique2DArray<T> p1;     // initialize with null
+   Unique2DArray<T> p1(0);
+
+   p1 = 0;              // destroy's p1's referent and assigns null
+   p1.reset();
+
+   p1.SetLength(n);     // destroy's p1's referent and assigns
+                        // a fresh array of null pointers
+
+   p1.SetDims(n, m)     // creates an n x m array
+                
+   if (!p1) ...         // test for null
+   if (p1 == 0) ...
+
+   if (p1) ...          // test for nonnull
+   if (p1 != 0) ...
+
+   if (p1 == p2) ...    // test for equality
+   if (p1 != p2) ...  
+
+   p1[i]                // array indexing
+
+   T **rp;
+   rp = p1.get();       // fetch raw pointer
+   rp = p1.release();   // fetch raw pointer, and set to null
+   p1.move(p2);         // if p1 != p2 then:
+                        //    makes p1 point to p2's referent,
+                        //    setting p2 to null and destroying
+                        //    p1's referent
+
+   p1.swap(p2);         // fast swap
+   swap(p1, p2);
+
+  
+**********************************************************************/
+
+
+template<class T>
+class Unique2DArray {
+public:
+   typedef T *T_ptr;
+
+   Unique2DArray();
+   // initially null
+
+   Unique2DArray& operator=(fake_null_type1);
+   // allows initialization and assignment of 0
+
+   ~Unique2DArray();
+   // destroys the entire array and each row in the array
+
+   void reset();
+   // reset to null
+
+
+   void SetLength(long n);
+   // resets the array to a vector of length n,
+   // each entry initialized to null.
+   // EXCEPTIONS: may throw (provides strong ES guarantee)
+  
+   void SetDims(long n, long m);
+   // resets the array to a 2D array with n rows and m columns.
+   // EXCEPTIONS: may throw (provides strong ES guarantee)
+
+   void SetDimsFrom1(long n, long m);
+   // same as above, but only initializes rows 1..n-1.
+   // this helps with some legacy code.
+   // EXCEPTIONS: may throw (provides strong ES guarantee)
+
+   T_ptr& operator[](long i) const;
+   // array indexing, no range checking
+
+   T_ptr* get() const;
+   // return underlying pointer
+
+   T_ptr* release() { len = 0; return dp.release(); }
+   // return underlying pointer and reset to null
+
+
+   void move(Unique2DArray& other);
+   // move pointers
+
+   void swap(Unique2DArray& other);
+   // swap pointers
+
+   operator fake_null_type() const;
+   // allows comparison to 0
+
+
+private:
+
+   Unique2DArray(const Unique2DArray&); // disabled
+   void operator=(const Unique2DArray&); // disabled
+
+};
+
+
+// free swap function
+template<class T> void swap(Unique2DArray<T>& p, Unique2DArray<T>& q);
+
+
+
+// Equality testing
+
+template<class X> bool operator==(const Unique2DArray<X>& a, const Unique2DArray<X>& b);
+template<class X> bool operator!=(const Unique2DArray<X>& a, const Unique2DArray<X>& b);
+
+
+
+
+
+/**********************************************************************
+
+
+OptionalVal<T> -- unique pointer to object with copying enabled.
+
+NOTE: this class is deprecated; use CopiedPtr instead.
+It will, however, be maintained indefinitely for backward compatibility.
+
+Constructors:
+   OptionalVal<T> p1;     // initialize with null
+
+   T* rp;
+   OptionalVal<T> p1(rp); // construct using raw pointer (explicit)
+
+   OptionalVal<T> p2(p1); // construct a copy of p1's referent
+
+    
+
+   p1.make(...);        // destroy's p1's referent and assigns
+                        // a fresh objected constructed via T(...),
+                        // using psuedo variadic templates
+                
+   p1.reset(rp);        // destroy's p1's referent and assign rp
+
+   if (p1.exists()) ... // test for null
+
+   p1.val()             // dereference
+
+   rp = p1.get();       // fetch raw pointer
+   rp = p1.release();   // fetch raw pointer, and set to NULL
+   p1.move(p2);         // move p2 to p1, destroying p1's referent
+                        //   if p1 != p2
+
+   p1 = p2;             // deep copy, using T's copy constructor
+
+   p1.swap(p2);         // swap pointers
+   swap(p1, p2);
+
+  
+**********************************************************************/
+
+
+template<class T>
+class OptionalVal {
+public:  
+   explicit OptionalVal(T *p);
+   // initialize using raw pointer (allocated with new)
+
+   OptionalVal();
+   // initialize to null
+
+   OptionalVal(const OptionalVal& other);
+   // initialize using a deep copy (via T's copy constructor)
+
+   OptionalVal& operator=(const OptionalVal& other);
+   // assignment using a deep copy (via T's copy constructor)
+
+   ~OptionalVal();
+   // destroys the referent
+
+   void reset(T* p = 0);
+   // resets the referent
+
+   template<class T, class X1, ..., class Xn>
+   void make(const X1& x1, ..., const Xn& xn);
+   // pseudo-variadic template.
+   // resets the referent to a new object T(x1, ..., xn)
+   // EXCEPTIONS: may throw an exception (but provides strong ES guarantee)
+
+   T& val() const;
+   // returns reference to referent
+   // if underlying pointer p is null, the indirection *p
+   // is undefined behavior, but most likely leads to program termination
+
+   bool exists() const;
+   // checks that underlying pointer is not null
+
+   T* get() const;
+   // returns underlying raw pointer
+
+   T* release();
+   // returns raw pointer, and sets the raw pointer to null
+
+   void move(OptionalVal& other);
+   // performs a (shallow) pointer move
+
+   void swap(OptionalVal& other);
+   // performs a (shallow) pointer swap
+
+};
+
+
+// free swap function
+template<class T> void swap(OptionalVal<T>& p, OptionalVal<T>& q);
+
+
+
+// EXCEPTIONS: unless otherwise specified, the methods above
+// never throw an exception (under C++11 rules, if a destructor
+// is invoked that throws an exception, the program will terminate).
+
+
+ diff --git a/thirdparty/linux/ntl/doc/SmartPtr.txt b/thirdparty/linux/ntl/doc/SmartPtr.txt new file mode 100644 index 0000000000..0bee4342dc --- /dev/null +++ b/thirdparty/linux/ntl/doc/SmartPtr.txt @@ -0,0 +1,947 @@ + + +/**************************************************************************** + +SmartPtr: a smart pointer class. + +Synopsis: provides a reference counted smart pointer, similar to shared_ptr +in the standard library. It is provided here to minimize reliance +on the standard library, especially for older C++ compilers, which may +not provide shared_ptr, or it may be in TR1, which gets messy. + + +Examples: + + + SmartPtr p1; // initialize to null + SmartPtr p1(0); + + SmartPtr p2 = 0; // 0/nullptr implicitly converts to SmartPtr + + SmartPtr p3(p1); // copy constructor + + T *rp; + SmartPtr p4(rp); // construct using raw pointer (explicit): better + // to use MakeSmart below + + p1 = MakeSmart(...); // build new T object by invoking constructor + // T(...) with pseudo-variadic templates. + // This is safer and more efficient that + // using the raw-pointer constructor + + p1 = p2; // assignment + p1 = 0; // assign null + + + if (!p1) ... // test for null + if (p1 == 0) ... + + if (p1) ... // test for not null ... + if (p1 != 0) ... + + if (p1 == p2) ... // test for equality + if (p1 != p2) + + *p1 // dereferencing + p1->... + + p1.get(); // return the underlying raw pointer...dangerous! + + p1.swap(p2); // fast swap + swap(p1, p2); + + +Automatic Conversions: + +If S is another class, SmartPtr converts to SmartPtr if S* converts to T* +(for example, if S is a subclass of T). Similarly, SmartPtr and SmartPtr +may be compared if S* and T* may be compared. + +0/nullptr automatically converts to SmartPtr. + +MakeSmart: + +One can write SmartPtr p = MakeSmart(x1, ..., xn), and this will create a +smart pointer to an object constructed as T(x1, ..., xn). Besides notational +convenience, it also reduces the number of memory allocations from 2 to 1, as +the data and control block can be allocated in one chunck of memory. + +This is implemented without reliance on C++11 features, which means that there +are limitations. First, the number n of arguments is limited to 9. And +second, all arguments are pass by const reference. However, you can work around +this by using the helper function Fwd. For example, if T has a 2-argument +constructor where the second must be a non-const reference of some type, and x2 +is a variable of that type, you can write MakeSmart(x1, Fwd(x2)), to forward +that reference through all the template nonsense in a typesafe manner. + +MakeRaw: + +One can also write T *p = MakeRaw(x1, ..., xn) to create a +raw pointer. This is the same as writing T *p = new T(x1, ..., xn), +except that error handling is determined by the NTL_EXCEPTION +flag (on => bad_alloc exception is thrown, off => error message +and abort). + +MakeRawArray: + +Another utility routine: one can write T *p = MakeRawArray(n) +to make a plain array of n T objects. Error handling is the same +as for MakeRaw. + +Dynamic casting: + +I've also supplied a dynamic cast operation for smart pointers. + + SmartPtr d = MakeSmart(); // d points to Derived + SmartPtr b = d; // implicit upcast: OK + + SmartPtr d1 = DynamicCast(b); + // downcast to a Derived object -- returns null for a bad cast + +DeleterPolicy: + +Normally, when the object pointed to a SmartPtr needs to be destroyed, this is +done by invoking delete on the raw pointer. The user can override this +behavior by specifying a "deleter policy", which is a class P that defines a +static member function deleter, which is invoked as P::deleter(p). Such a +policy can be attached to a SmartPtr using a specialized constructor (see +below). + +A deleter policy can be useful, for example, in realizing the PIPL +pattern, where the class T's definition is not visible. The specified deleter +can invoke a free-standing function that itself invokes delete. A deleter +policy can also be useful is memory is to be managed using some mechanism other +than new/delete. + + +Implementation notes: + +If NTL is compiled with the NTL_THREADS option, then the reference counting +will be thread safe. + +The SmartPtrControl class heirarchy is used to make sure the right destructor +is called when the ref count goes to zero. This can be an issue for forward +declared classes and for subclasses. For example, if T is forward declared in +a context where the ref count goes to zero, or if the object's actual type is a +subclass of T and T's destructor was not declared virtual. The implementation +of SmartPtr guarantees correct behavior in these situations. + +The null tests p, !p, p == 0, are all effected via an implicit conversion from +SmartPtr to a funny pointer type (a pointer to a member function, which +avoids other, unwanted implicit conversions: this is the so-called "safe bool +idiom"); + +Also, there is an implicit conversion from another funny pointer type to +SmartPtr, which is how the implicit conversion from 0/nullptr is achieved. + +In C++11 both of the above effects could perhaps be achieved more directly. +The new "explict bool" operator can replace the "safe bool idiom", and +the new nullptr_t type could be used to get the conversion from null to work. + +NOTES: See http://www.artima.com/cppsource/safebool.html for more on the "safe +bool idiom". + + +*****************************************************************************/ + +// The default "deleter policy" +struct DefaultDeleterPolicy { + + template + static void deleter(T *p) { delete p; } + +}; + +// A tagging class, for better readability in invoking constructor. +// Usage: SmartPtr p(r, ChoosePolicy()); +template +struct ChoosePolicy { }; + + + + + +template +class SmartPtr { +public: +public: + template explicit SmartPtr(Y* p); + // construct smart pointer from raw pointer with deleter policy + // DefaultDeleterPolicy (so p should be allocated using new). + + // NOTE: Y* must convert to T*, but upon the original pointer is preserved + // so that when ref count drops to 0, the *original* object of type Y is destroyed. + + // EXCEPTIONS: a control block is dynamically allocated; + // if this allocation fails, the object pointed to by p is destroyed + // and a bad_alloc exception is thrown + + template SmartPtr(Y* p, ChoosePolicy

); + // construct smart pointer from raw pointer with deleter policy P. + + // NOTE: Y* must convert to T*, but upon the original pointer is preserved + // so that when ref count drops to 0, the *original* object of type Y is destroyed. + + // EXCEPTIONS: a control block is dynamically allocated; + // if this allocation fails, the object pointed to by p is destroyed + // and a bad_alloc exception is thrown + + SmartPtr(); + // initial value null + + SmartPtr(fake_null_type1); + // automatic conversion from 0/nullptr + + ~SmartPtr(); + // destructor + + SmartPtr(const SmartPtr& other); + SmartPtr& operator=(const SmartPtr& other); + // copy and asignment + + template SmartPtr(const SmartPtr& other); + template SmartPtr& operator=(const SmartPtr& other); + // copy and asignment + + T& operator*() const; + T* operator->() const; + // indirection + + T* get() const; + // get underlying raw pointer + + void swap(SmartPtr& other); + + SmartPtr(fake_null_type); + // allows assignment and initialization from 0 + + operator fake_null_type() const; + // allows comparisons to 0 + + template SmartPtr DynamicCast() const; +}; + + +// free swap function +template void swap(SmartPtr& p, SmartPtr& q); + +// free dynamic cast function +template SmartPtr DynamicCast(const SmartPtr& p); + + +// Equality testing +template +bool operator==(const SmartPtr& a, const SmartPtr& b); + +template +bool operator!=(const SmartPtr& a, const SmartPtr& b); + +// MakeSmart psuedo-variadic template +template +SmartPtr MakeSmart(const X1& x1, ..., const Xn& xn); +// EXCEPTIONS: may throw if dynamic constrction of T(x1, ..., xn) fails + + +// EXCEPTIONS: unless otherwise specified, the methods above +// never throw an exception (under C++11 rules, if a destructor +// is invoked that throws an exception, the program will terminate). + + +/**************************************************************************** + +Experimantal: CloneablePtr ...essentially same interface as SmartPtr, but +allows cloning of complete objects. The differences: +* must construct using MakeCloneable +* a clone method is provided +* implicit conversion from CloneablePtr to SmartPtr is allowed + +Example: + + CloneablePtr d = MakeCloneable(); + // d points to Derived + + CloneablePtr b = d; // implicit upcast: OK + + CloneablePtr b1 = b.clone(); + // clone of b, which is really a Derived object + + CloneablePtr d1 = DynamicCast(b1); + // downcast to a Derived object -- returns null for a bad cast + + SmartPtr b2 = d1; + + + +Implementation: + +In the clone method, the object is constructed using the copy constructor for +the type T, where T is the compile-time type with which the first smart pointer +to this object was was created, even if the pointer has been subsequently +upcasted to a base type S. Such objects must have been initially created using +the MakeCloneable function. It turns out, this is hard to do in a completely +standards-compliant way, because of the type erasure going on. So I settled on +the current method, which does some low-level pointer arithmetic. Even with +fancy things like multiple and virtual inheritance, it should work, under the +assumption that if two objects have the same (runtime) type, then their memory +layout is the same. I don't think anything like that is guaranteed by the +standard, but this seems reasonable, and it seems to work. Like I said, it is +experimental, and I would appreciate feedback from C++ gurus. + +Note that NTL does not use this feature, but I do have applications where this +is convenient. + + +**********************************************************************************/ + + +template +class CloneablePtr { +public: + CloneablePtr(); + // initial value null + + ~CloneablePtr(); + // if ref count drops to zero, then delete referent + + CloneablePtr(const CloneablePtr& other); + CloneablePtr& operator=(const CloneablePtr& other); + // copy and asignment + + template CloneablePtr(const CloneablePtr& other); + template CloneablePtr& operator=(const CloneablePtr& other); + // copy and asignment + + T& operator*() const; + T* operator->() const; + // indirection + + T* get() const; + // get underlying raw pointer + + void swap(CloneablePtr& other); + + CloneablePtr(fake_null_type); + // allows assignment and initialization from 0 + + operator fake_null_type() const; + // allows comparisons to 0 + + template CloneablePtr DynamicCast() const; + + CloneablePtr clone() const; + // construct a clone, using the copy constructor + // EXCEPTIONS: may throw if copy construction fails + + + template operator SmartPtr(); + // implicit conversion from CloneablePtr to SmartPtr, + // allowed if T* converts implicitly to Y*. +}; + + +// free swap function +template void swap(CloneablePtr& p, CloneablePtr& q); + +// free dynamic cast function +template CloneablePtr DynamicCast(const CloneablePtr& p); + + +// Equality testing +template +bool operator==(const CloneablePtr& a, const CloneablePtr& b); + +template +bool operator!=(const CloneablePtr& a, const CloneablePtr& b); + +// MakeCloneable psuedo-variadic template +template +CloneablePtr MakeCloneable(const X1& x1, ..., const Xn& xn); +// EXCEPTIONS: may throw if dynamic constrction of T(x1, ..., xn) fails + + +// EXCEPTIONS: unless otherwise specified, the methods above +// never throw an exception (under C++11 rules, if a destructor +// is invoked that throws an exception, the program will terminate). + + + + + + +/********************************************************************** + +UniquePtr -- unique pointer to object with copying disabled. +Useful for pointers inside classes so that we can +automatically destruct them. + +Constructors: + UniquePtr p1; // initialize with null + UniquePtr p1(0); + + T* rp; + UniquePtr p1(rp); // construct using raw pointer (explicit) + + p1 = 0; // destroy's p1's referent and assigns null + + p1.make(...); // destroy's p1's referent and assigns + // a fresh objected constructed via T(...), + // using psuedo-variadic templates + + p1.reset(rp); // destroy's p1's referent and assign rp + + if (!p1) ... // test for null + if (p1 == 0) ... + + if (p1) ... // test for nonnull + if (p1 != 0) ... + + if (p1 == p2) ... // test for equality + if (p1 != p2) ... + + *p1 // dereferencing + p1->... + + + rp = p1.get(); // fetch raw pointer + rp = p1.release(); // fetch raw pointer, and set to null + + p1.move(p2); // move p2 to p1, destroying p1's referent + // if p1 != p2 + + p1.swap(p2); // swap pointers + swap(p1, p2); + + +DeleterPolicy: + +UniquePtr supports a "deleter policy", analogous to that used in SmartPtr. + +Normally, when the object pointed to a UniquePtr needs to be destroyed, this is +done by invoking delete on the raw pointer. The user can override this +behavior by specifying a "deleter policy", which is a class P that defines a +static member function deleter, which is invoked as P::deleter(p). + +Unlike with a SmartPtr, the deleter policy must be attached to the type. +The default policy is the same DefaultDeleterPolicy, defined above. + +A deleter policy can be useful, for example, in realizing the PIPL +pattern, where the class T's definition is not visible. The specified deleter +can invoke a free-standing function that itself invokes delete. A deleter +policy can also be useful is memory is to be managed using some mechanism other +than new/delete. + + +**********************************************************************/ + + +template +class UniquePtr { +public: + explicit UniquePtr(T *p); + // construct UniquePtr from raw pointer (allocated with new) + + UniquePtr(); + // initial value is null + + UniquePtr& operator=(fake_null_type1); + // allows assignment of 0; equivalent to calling reset() + + ~UniquePtr(); + // destroys referent by calling P::deleter + + void reset(T* p = 0); + // reset underlying pointer to p, destroying original referent + // by calling P::deleter + + template + void make(const X1& x1, ..., const Xn& xn); + // pseudo-variadic template, roughly equivalent to + // reset(new T(x1, ..., xn)) + // EXCEPTIONS: this may throw (but provides strong ES guarantee) + + T& operator*() const; + T* operator->() const; + // indirection + + T* get() const; + // get raw pointer + + T* release(); + // returns raw pointer, and sets the raw pointer to null + + void move(UniquePtr& other); + // move other to *this, destroying original referent + // by calling P::deleter + + void swap(UniquePtr& other); + // swap raw pointers + + operator fake_null_type() const; + // allows comparison with 0 + +private: + UniquePtr(const UniquePtr&); // disabled + void operator=(const UniquePtr&); // disabled +}; + + +// free swap function +template void swap(UniquePtr& p, UniquePtr& q); + + + +// Equality testing + +template bool operator==(const UniquePtr& a, const UniquePtr& b); +template bool operator!=(const UniquePtr& a, const UniquePtr& b); + + +// EXCEPTIONS: unless otherwise specified, the methods above +// never throw an exception (under C++11 rules, if a destructor +// is invoked that throws an exception, the program will terminate). + + +/********************************************************************** + +CopiedPtr -- essentially the same interface and implemetation as UniquePtr, +with the following exceptions: + + * copy constructor is defined: by default, it will create a copy + of the referrent using T's copy constructor (but this bahavior + can be overridden -- see below) + + * assignment operator is defined (and implemented in terms of the + copy constructor) + + * The policy managing a CopiedPtr specifier deleter and copier functions: + the deleter is used to delete objects and the copies is used for making + copies (see below). + +NOTE: this class is meant to replace the OptionalVal class, whose +interface is not so nice. For backwards compatibility, OptionalVal will +be maintained, however. + +**********************************************************************/ + + +// This class specifies the default copier +struct DefaultCopierPolicy { + + template + static T* copier(T *p) { return (p ? MakeRaw(*p) : 0); } + +}; + +// This class specifies an alternative copier, which is meant +// to perform "deep" copies on class heirarchies that support an +// appropriate clone() method. +struct CloningCopier { + + template + static T* copier(T *p) { return (p ? p->clone() : 0); } + +}; + +struct DefaultCopiedPtrPolicy : DefaultDeleterPolicy, DefaultCopierPolicy { }; +struct CloningCopiedPtrPolicy : DefaultDeleterPolicy, CloningCopier { }; + + + +template +class CopiedPtr { +public: + explicit CopiedPtr(T *p); + // construct CopiedPtr from raw pointer (allocated with new) + + CopiedPtr(); + // initial value is null + + CopiedPtr(const CopiedPtr& other); + // creates a copy of other's referent by calling P::copier, + // and destroys original referent by calling P::deleter + + void operator=(const CopiedPtr&); + + CopiedPtr& operator=(fake_null_type1); + // allows assignment of 0; equivalent to calling reset() + + ~CopiedPtr(); + // destroys referent by calling P::deleter + + void reset(T* p = 0); + // reset underlying pointer to p, destroying original referent + // by calling P::deleter + + template + void make(const X1& x1, ..., const Xn& xn); + // pseudo-variadic template, roughly equivalent to + // reset(new T(x1, ..., xn)) + // EXCEPTIONS: this may throw (but provides strong ES guarantee) + + T& operator*() const; + T* operator->() const; + // indirection + + T* get() const; + // get raw pointer + + T* release(); + // returns raw pointer, and sets the raw pointer to null + + void move(CopiedPtr& other); + // move other to *this, destroying original referent + // by calling P::deleter + + + void swap(CopiedPtr& other); + // swap raw pointers + + operator fake_null_type() const; + // allows comparison with 0 + +}; + + +// free swap function +template void swap(CopiedPtr& p, CopiedPtr& q); + + + +// Equality testing + +template bool operator==(const CopiedPtr& a, const CopiedPtr& b); +template bool operator!=(const CopiedPtr& a, const CopiedPtr& b); + + +// EXCEPTIONS: unless otherwise specified, the methods above +// never throw an exception (under C++11 rules, if a destructor +// is invoked that throws an exception, the program will terminate). + + + + +/********************************************************************** + +UniqueArray -- similar to UniquePtr, but for arrays. These arrays cannot be +resized -- for that, you should use the Vec class. + +Constructors: + UniqueArray p1; // initialize with null + UniqueArray p1(0); + + T* rp; + UniqueArray p1(rp); // construct using raw pointer (explicit) + + p1 = 0; // destroy's p1's referent and assigns null + + p1.SetLength(n); // destroy's p1's referent and assigns + // a fresh objected constructed via new T[n] + + p1.reset(rp); // destroy's p1's referent and assign rp + + if (!p1) ... // test for null + if (p1 == 0) ... + + if (p1) ... // test for nonnull + if (p1 != 0) ... + + if (p1 == p2) ... // test for equality + if (p1 != p2) ... + + p1[i] // array indexing + + rp = p1.get(); // fetch raw pointer + rp = p1.release(); // fetch raw pointer, and set to null + p1.move(p2); // move p2 to p1, destroying p1's referent + // if p1 != p2 + + p1.swap(p2); // fast swap + swap(p1, p2); + + +**********************************************************************/ + + +template +class UniqueArray { +public: + explicit UniqueArray(T *p); + // construct from raw pointer (allocated with new[]) + + UniqueArray(); + // initially null + + UniqueArray& operator=(fake_null_type1); + // allows of 0 + + ~UniqueArray(); + + void reset(T* p = 0); + // reset with raw pointer, destroying referent + + void SetLength(long n); + // destroys referent and allocates an array of size n + // EXCEPTIONS: this may throw (but provides strong ES guarantee) + + T& operator[](long i) const; + // accesses ith element in the array (currently no range checking) + + T* get() const; + // get raw pointer + + T* elts() const; + // get raw pointer (for compatibility with the Vec class) + + T* release(); + // get raw pointer and reset to null + + void move(UniqueArray& other); + // move raw pointer + + void swap(UniqueArray& other); + // swap raw pointer + + operator fake_null_type() const; + // allows comparison to 0 + +private: + UniqueArray(const UniqueArray&); // disabled + void operator=(const UniqueArray&); // disabled + +}; + + + +// free swap function +template void swap(UniqueArray& p, UniqueArray& q); + + + +// Equality testing + +template bool operator==(const UniqueArray& a, const UniqueArray& b); +template bool operator!=(const UniqueArray& a, const UniqueArray& b); + + + + +/********************************************************************** + +Unique2DArray -- unique pointer to array of arrays. + +This is very similar to UniqueArray< UniqueArray >, except that +we can retrofit old code that accepts objects of type T**. + +Constructors: + Unique2DArray p1; // initialize with null + Unique2DArray p1(0); + + p1 = 0; // destroy's p1's referent and assigns null + p1.reset(); + + p1.SetLength(n); // destroy's p1's referent and assigns + // a fresh array of null pointers + + p1.SetDims(n, m) // creates an n x m array + + if (!p1) ... // test for null + if (p1 == 0) ... + + if (p1) ... // test for nonnull + if (p1 != 0) ... + + if (p1 == p2) ... // test for equality + if (p1 != p2) ... + + p1[i] // array indexing + + T **rp; + rp = p1.get(); // fetch raw pointer + rp = p1.release(); // fetch raw pointer, and set to null + p1.move(p2); // if p1 != p2 then: + // makes p1 point to p2's referent, + // setting p2 to null and destroying + // p1's referent + + p1.swap(p2); // fast swap + swap(p1, p2); + + +**********************************************************************/ + + +template +class Unique2DArray { +public: + typedef T *T_ptr; + + Unique2DArray(); + // initially null + + Unique2DArray& operator=(fake_null_type1); + // allows initialization and assignment of 0 + + ~Unique2DArray(); + // destroys the entire array and each row in the array + + void reset(); + // reset to null + + + void SetLength(long n); + // resets the array to a vector of length n, + // each entry initialized to null. + // EXCEPTIONS: may throw (provides strong ES guarantee) + + void SetDims(long n, long m); + // resets the array to a 2D array with n rows and m columns. + // EXCEPTIONS: may throw (provides strong ES guarantee) + + void SetDimsFrom1(long n, long m); + // same as above, but only initializes rows 1..n-1. + // this helps with some legacy code. + // EXCEPTIONS: may throw (provides strong ES guarantee) + + T_ptr& operator[](long i) const; + // array indexing, no range checking + + T_ptr* get() const; + // return underlying pointer + + T_ptr* release() { len = 0; return dp.release(); } + // return underlying pointer and reset to null + + + void move(Unique2DArray& other); + // move pointers + + void swap(Unique2DArray& other); + // swap pointers + + operator fake_null_type() const; + // allows comparison to 0 + + +private: + + Unique2DArray(const Unique2DArray&); // disabled + void operator=(const Unique2DArray&); // disabled + +}; + + +// free swap function +template void swap(Unique2DArray& p, Unique2DArray& q); + + + +// Equality testing + +template bool operator==(const Unique2DArray& a, const Unique2DArray& b); +template bool operator!=(const Unique2DArray& a, const Unique2DArray& b); + + + + + +/********************************************************************** + + +OptionalVal -- unique pointer to object with copying enabled. + +NOTE: this class is deprecated; use CopiedPtr instead. +It will, however, be maintained indefinitely for backward compatibility. + +Constructors: + OptionalVal p1; // initialize with null + + T* rp; + OptionalVal p1(rp); // construct using raw pointer (explicit) + + OptionalVal p2(p1); // construct a copy of p1's referent + + + + p1.make(...); // destroy's p1's referent and assigns + // a fresh objected constructed via T(...), + // using psuedo variadic templates + + p1.reset(rp); // destroy's p1's referent and assign rp + + if (p1.exists()) ... // test for null + + p1.val() // dereference + + rp = p1.get(); // fetch raw pointer + rp = p1.release(); // fetch raw pointer, and set to NULL + p1.move(p2); // move p2 to p1, destroying p1's referent + // if p1 != p2 + + p1 = p2; // deep copy, using T's copy constructor + + p1.swap(p2); // swap pointers + swap(p1, p2); + + +**********************************************************************/ + + +template +class OptionalVal { +public: + explicit OptionalVal(T *p); + // initialize using raw pointer (allocated with new) + + OptionalVal(); + // initialize to null + + OptionalVal(const OptionalVal& other); + // initialize using a deep copy (via T's copy constructor) + + OptionalVal& operator=(const OptionalVal& other); + // assignment using a deep copy (via T's copy constructor) + + ~OptionalVal(); + // destroys the referent + + void reset(T* p = 0); + // resets the referent + + template + void make(const X1& x1, ..., const Xn& xn); + // pseudo-variadic template. + // resets the referent to a new object T(x1, ..., xn) + // EXCEPTIONS: may throw an exception (but provides strong ES guarantee) + + T& val() const; + // returns reference to referent + // if underlying pointer p is null, the indirection *p + // is undefined behavior, but most likely leads to program termination + + bool exists() const; + // checks that underlying pointer is not null + + T* get() const; + // returns underlying raw pointer + + T* release(); + // returns raw pointer, and sets the raw pointer to null + + void move(OptionalVal& other); + // performs a (shallow) pointer move + + void swap(OptionalVal& other); + // performs a (shallow) pointer swap + +}; + + +// free swap function +template void swap(OptionalVal& p, OptionalVal& q); + + + +// EXCEPTIONS: unless otherwise specified, the methods above +// never throw an exception (under C++11 rules, if a destructor +// is invoked that throws an exception, the program will terminate). + diff --git a/thirdparty/linux/ntl/doc/ZZ.cpp.html b/thirdparty/linux/ntl/doc/ZZ.cpp.html new file mode 100644 index 0000000000..2e9b674102 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ.cpp.html @@ -0,0 +1,1125 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZ.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: ZZ
+
+SUMMARY:
+
+The class ZZ is used to represent signed, arbitrary length integers.
+
+Routines are provided for all of the basic arithmetic operations, as
+well as for some more advanced operations such as primality testing.
+Space is automatically managed by the constructors and destructors.
+
+This module also provides routines for generating small primes, and
+fast routines for performing modular arithmetic on single-precision
+numbers.
+
+
+\**************************************************************************/
+
+#include <NTL/tools.h>
+
+
+class ZZ {
+public:
+
+
+   ZZ(); // initial value is 0
+
+   ZZ(const ZZ& a);  // copy constructor
+   explicit ZZ(long a);  // promotion constructor
+
+   ~ZZ(); // destructor
+
+   ZZ& operator=(const ZZ& a);  // assignment operator
+   ZZ& operator=(long a);  
+
+   // typedefs to aid in generic programming
+   typedef ZZ_p residue_type;
+   typedef ZZX poly_type;
+
+
+   // ...
+
+};
+
+
+// NOTE: A ZZ is represented as a sequence of "zzigits",
+// where each zzigit is between 0 and 2^{NTL_ZZ_NBITS-1}.
+
+// NTL_ZZ_NBITS is  macros defined in <NTL/ZZ.h>.
+
+// SIZE INVARIANT: the number of bits in a ZZ is always less than
+// 2^(NTL_BITS_PER_LONG-4).
+
+
+
+/**************************************************************************\
+
+                                 Comparison
+
+\**************************************************************************/
+
+
+
+// The usual comparison operators:
+  
+long operator==(const ZZ& a, const ZZ& b);
+long operator!=(const ZZ& a, const ZZ& b);
+long operator<(const ZZ& a, const ZZ& b);
+long operator>(const ZZ& a, const ZZ& b);
+long operator<=(const ZZ& a, const ZZ& b);
+long operator>=(const ZZ& a, const ZZ& b);
+
+// other stuff:
+
+long sign(const ZZ& a); // returns sign of a (-1, 0, +1)
+long IsZero(const ZZ& a); // test for 0
+long IsOne(const ZZ& a); // test for 1
+
+long compare(const ZZ& a, const ZZ& b); // returns sign of a-b (-1, 0, or 1).
+
+// PROMOTIONS: the comparison operators and the function compare
+// support promotion from long to ZZ on (a, b).
+
+
+/**************************************************************************\
+
+                                 Addition
+
+\**************************************************************************/
+
+
+// operator notation:
+
+ZZ operator+(const ZZ& a, const ZZ& b);
+ZZ operator-(const ZZ& a, const ZZ& b);
+ZZ operator-(const ZZ& a); // unary -
+
+ZZ& operator+=(ZZ& x, const ZZ& a);
+ZZ& operator+=(ZZ& x, long a);
+
+ZZ& operator-=(ZZ& x, const ZZ& a);
+ZZ& operator-=(ZZ& x, long a);
+
+ZZ& operator++(ZZ& x);  // prefix
+void operator++(ZZ& x, int);  // postfix
+
+ZZ& operator--(ZZ& x);  // prefix
+void operator--(ZZ& x, int);  // postfix
+
+
+
+// procedural versions:
+
+void add(ZZ& x, const ZZ& a, const ZZ& b); // x = a + b
+void sub(ZZ& x, const ZZ& a, const ZZ& b); // x = a - b
+void SubPos(ZZ& x, const ZZ& a, const ZZ& b); // x = a-b; assumes a >= b >= 0.
+void negate(ZZ& x, const ZZ& a); // x = -a
+
+void abs(ZZ& x, const ZZ& a); // x = |a|
+ZZ abs(const ZZ& a);
+
+// PROMOTIONS: binary +, -, as well as the procedural versions add, sub
+// support promotions from long to ZZ on (a, b).
+
+
+/**************************************************************************\
+
+                             Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ operator*(const ZZ& a, const ZZ& b);
+
+ZZ& operator*=(ZZ& x, const ZZ& a);
+ZZ& operator*=(ZZ& x, long a);
+
+// procedural versions:
+
+void mul(ZZ& x, const ZZ& a, const ZZ& b); // x = a * b
+
+void sqr(ZZ& x, const ZZ& a); // x = a*a
+ZZ sqr(const ZZ& a);
+
+// PROMOTIONS: operator * and procedure mul support promotion
+// from long to ZZ on (a, b).
+
+/**************************************************************************\
+
+                            Combined Multiply and Add
+
+\**************************************************************************/
+
+
+void MulAddTo(ZZ& x, const ZZ& a, const ZZ& b); // x += a*b
+void MulAddTo(ZZ& x, const ZZ& a, long b);      // x += a*b
+
+
+void MulSubFrom(ZZ& x, const ZZ& a, const ZZ& b); // x -= a*b
+void MulSubFrom(ZZ& x, const ZZ& a, long b);      // x -= a*b
+
+// NOTE: these are provided for both convenience and efficiency.
+// The single-precision versions may be significantly
+// faster than the code sequence
+//   mul(tmp, a, b); add(x, x, tmp);
+
+
+
+/**************************************************************************\
+
+                                 Division
+
+\**************************************************************************/
+
+
+// operator notation:
+
+ZZ operator/(const ZZ& a, const ZZ& b);
+ZZ operator/(const ZZ& a, long  b);
+
+ZZ operator%(const ZZ& a, const ZZ& b);
+long operator%(const ZZ& a, long b);
+
+ZZ& operator/=(ZZ& x, const ZZ& b);
+ZZ& operator/=(ZZ& x, long b);
+
+ZZ& operator%=(ZZ& x, const ZZ& b);
+
+
+// procedural versions:
+
+void DivRem(ZZ& q, ZZ& r, const ZZ& a, const ZZ& b);
+// q = floor(a/b), r = a - b*q.
+// This implies that:
+//    |r| < |b|, and if r != 0, sign(r) = sign(b)
+
+void div(ZZ& q, const ZZ& a, const ZZ& b);
+// q = floor(a/b)
+
+void rem(ZZ& r, const ZZ& a, const ZZ& b);
+// q = floor(a/b), r = a - b*q
+
+
+// single-precision variants:
+
+long DivRem(ZZ& q, const ZZ& a, long b);
+// q = floor(a/b), r = a - b*q, return value is r.
+
+long rem(const ZZ& a, long b);
+// q = floor(a/b), r = a - b*q, return value is r.
+
+
+// divisibility testing:
+
+long divide(ZZ& q, const ZZ& a, const ZZ& b);
+long divide(ZZ& q, const ZZ& a, long b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0.
+
+long divide(const ZZ& a, const ZZ& b);
+long divide(const ZZ& a, long b);
+// if b | a, returns 1; otherwise returns 0.
+
+
+/**************************************************************************\
+
+                                    GCD's
+
+\**************************************************************************/
+
+
+void GCD(ZZ& d, const ZZ& a, const ZZ& b);
+ZZ GCD(const ZZ& a, const ZZ& b);
+
+// d = gcd(a, b) (which is always non-negative).  Uses a binary GCD
+// algorithm.
+
+
+
+void XGCD(ZZ& d, ZZ& s, ZZ& t, const ZZ& a, const ZZ& b);
+
+//  d = gcd(a, b) = a*s + b*t.
+
+// The coefficients s and t are defined according to the standard
+// Euclidean algorithm applied to |a| and |b|, with the signs then
+// adjusted according to the signs of a and b.
+
+// The implementation may or may not Euclid's algorithm,
+// but the coefficients a and t are always computed as if
+// it did.
+
+
+// special-purpose single-precision variants:
+
+long GCD(long a, long b);
+// return value is gcd(a, b) (which is always non-negative)
+
+void XGCD(long& d, long& s, long& t, long a, long b);
+//  d = gcd(a, b) = a*s + b*t.
+
+//  The coefficients s and t are defined according to the standard
+//  Euclidean algorithm applied to |a| and |b|, with the signs then
+//  adjusted according to the signs of a and b.
+
+
+
+/**************************************************************************\
+
+                             Modular Arithmetic
+
+The following routines perform arithmetic mod n, where n > 1.
+
+All arguments (other than exponents) are assumed to be in the range
+0..n-1.  Some routines may check this and raise an error if this
+does not hold.  Others may not, and the behaviour is unpredictable
+in this case.
+
+\**************************************************************************/
+
+
+
+void AddMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n); // x = (a+b)%n
+ZZ AddMod(const ZZ& a, const ZZ& b, const ZZ& n);
+
+void SubMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n); // x = (a-b)%n
+ZZ SubMod(const ZZ& a, const ZZ& b, const ZZ& n);
+
+void NegateMod(ZZ& x, const ZZ& a, const ZZ& n); // x = -a % n
+ZZ NegateMod(const ZZ& a, const ZZ& n);
+
+void MulMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n); // x = (a*b)%n
+ZZ MulMod(const ZZ& a, const ZZ& b, const ZZ& n);
+
+void SqrMod(ZZ& x, const ZZ& a, const ZZ& n); // x = a^2 % n
+ZZ SqrMod(const ZZ& a, const ZZ& n);
+
+
+
+
+void InvMod(ZZ& x, const ZZ& a, const ZZ& n);
+ZZ InvMod(const ZZ& a, const ZZ& n);
+// x = a^{-1} mod n (0 <= x < n); error is raised occurs if inverse
+// not defined
+
+// If exceptions are enabled, an object of the following class
+// is throw by the InvMod routine if the inverse of a mod n is
+// not defined. The methods get_a() and get_n() give read-only
+// access to the offending values of a and n.
+// This also happens for any indirect call to InvMod, via PowerMod,
+// of via inverse computations in ZZ_p.
+
+class InvModErrorObject : public ArithmeticErrorObject {
+public:
+   InvModErrorObject(const char *s, const ZZ& a, const ZZ& n);
+   const ZZ& get_a() const;
+   const ZZ& get_n() const;
+};
+
+long InvModStatus(ZZ& x, const ZZ& a, const ZZ& n);
+// if gcd(a,n) = 1, then return-value = 0, x = a^{-1} mod n;
+// otherwise, return-value = 1, x = gcd(a, n)
+
+void PowerMod(ZZ& x, const ZZ& a, const ZZ& e, const ZZ& n);
+ZZ PowerMod(const ZZ& a, const ZZ& e, const ZZ& n);
+
+void PowerMod(ZZ& x, const ZZ& a, long e, const ZZ& n);
+ZZ PowerMod(const ZZ& a, long e, const ZZ& n);
+
+// x = a^e % n (e may be negative)
+
+
+// PROMOTIONS: AddMod, SubMod, and MulMod (both procedural and functional
+// forms) support promotions from long to ZZ on (a, b).
+
+
+
+
+ +
+/**************************************************************************\
+
+                        Single-precision modular arithmetic
+
+These routines implement single-precision modular arithmetic.  If n is
+the modulus, all inputs should be in the range 0..n-1.  The number n
+itself should be in the range 2..NTL_SP_BOUND-1.
+
+Most of these routines are, of course, implemented as fast inline
+functions.  No checking is done that inputs are in range.
+
+
+\**************************************************************************/
+
+
+
+
+long AddMod(long a, long b, long n); // return (a+b)%n
+
+long SubMod(long a, long b, long n); // return (a-b)%n
+
+long NegateMod(long a, long n); // return (-a)%n
+
+long MulMod(long a, long b, long n); // return (a*b)%n
+
+long MulMod(long a, long b, long n, mulmod_t ninv);
+// return (a*b)%n.  
+//
+// Usually faster than plain MulMod when n is fixed for many
+// invocations. The value ninv should be precomputed as
+//   mulmod_t ninv = PrepMulMod(n);
+
+mulmod_t PrepMulMod(long n);
+// Prepare auxilliary data for MulMod.
+
+long MulModPrecon(long a, long b, long n, mulmod_precon_t bninv);
+// return (a*b)%n.  
+//
+// Usually much faster than MulMod when both b and n are fixed for
+// many invocations.  The value bninv should be precomputed as
+//   mulmod_precon_t bninv = PrepMulModPrecon(b, n);
+// or as
+//   mulmod_precon_t bninv = PrepMulModPrecon(b, n, ninv);
+// where ninv = PrepMulMod(n).
+
+mulmod_precon_t PrepMulModPrecon(long b, long n);
+mulmod_precon_t PrepMulModPrecon(long b, long n, mulmod_t ninv);
+// Prepare auxilliary data for MulModPrecon.
+// In the second version, ninv = PrepMulMod(n).
+
+
+
+long InvMod(long a, long n);
+// computes a^{-1} mod n.  Error is raised if undefined.
+
+long InvModStatus(long& x, long a, long n);
+// if gcd(a,n) = 1, then return-value = 0, x = a^{-1} mod n;
+// otherwise, return-value = 1, x = gcd(a, n)
+
+long PowerMod(long a, long e, long n);
+// computes a^e mod n (e may be negative)
+
+// The following are vector versions of the MulMod routines
+// They each compute x[i] = (a[i] * b)% n   i = 0..k-1
+
+void VectorMulMod(long k, long *x, const long *a, long b, long n);
+
+void VectorMulMod(long k, long *x, const long *a, long b, long n,
+                  mulmod_t ninv);
+// ninv = PrepMulMod(n)
+
+void VectorMulModPrecon(long k, long *x, const long *a, long b, long n,
+                        mulmod_precon_t bninv);
+// bninv = MulModPrecon(b, n)
+
+
+// The following is provided for lagacy support, but is not generally
+// recommended:
+
+long MulDivRem(long& q, long a, long b, long n, muldivrem_t bninv);
+// return (a*b)%n, set q = (a*b)/n.  
+// The value bninv should be precomputed as
+//   muldivrem_t bninv = PrepMulDivRem(b, n);
+// or as
+//   muldivrem_t bninv = PrepMulDivRem(b, n, ninv);
+// where ninv = PrepMod(n).
+
+ muldivrem_t PrepMulDivRem(long b, long n);
+ muldivrem_t PrepMulDivRem(long b, long n, mulmod_t ninv);
+// Prepare auxilliary data for MulDivRem.
+// In the second version, ninv = PrepMulMod(n).
+
+// NOTE: despite the similarity in the interface to MulModPrecon,
+// this routine is typically implemented in a very different way,
+// and usually much less efficient.
+// It was initially designed for specialized, internal use
+// within NTL, but has been a part of the documented NTL
+// interface for some time, and remains so even after the
+// v9.0 upgrade.
+
+
+
+//
+// Compatibility notes:
+//
+// The types mulmod_t and muldivrem_t were introduced in NTL v9.0, as were the
+// functions PrepMulMod and PrepMulDivRem.  Prior to this, the built-in type
+// "double" played the role of these types, and the user was expected to
+// compute PrepMulMod(n) as 1/double(n) and PrepMulDivRem(b, n) as
+// double(b)/double(n).
+//
+// By abstracting these types, NTL is able to exploit a wider variety of
+// implementation strategies.  Some old client code may break, but the compiler
+// will easily find the code that needs to be updated, and the updates are
+// quite mechanical (unless the old code implicitly made use of the assumption
+// that NTL_SP_NBITS <= NTL_DOUBLE_PRECISION-3).
+//
+// It is highly recommended that old client codes be updated.  However, one may
+// build NTL with the configuration option NTL_LEGACY_SP_MULMOD=on, which will
+// cause the interfaces and implementations to revert to their pre-v9.0
+// definitions.  This option will also make the following (obslete) function
+// visible:
+
+    long MulMod2(long a, long b, long n, double bninv);
+    // return (a*b)%n.  bninv = ((double) b)/((double) n).  This is faster
+    // if both n and b are fixed for many multiplications.
+    // Note: This is OBSOLETE -- use MulModPrecon.
+
+
+// As of v9.2 of NTL, this new interface allows for 60-bit moduli on most
+// 64-bit machines.  The requirement is that a working 128-bit integer type is
+// available.  For current versions of gcc, clang, and icc, this is available
+// vie the types __int128_t and __uint128_t.  If this requirement is met (which
+// is verified during NTL installation), then a "long long" implementation for
+// MulMod is used.  In versions 9.0 and 9.1 of NTL, a "long double"
+// implementation was introduced, which utilized the 80-bit extended double
+// precision hardware on x86 machines.  This also allows for 60-bit moduli on
+// 64-bit machines.
+
+// If 128-bit integer types are not avalable, or if you build NTL with the
+// NTL_DISABLE_LONGLONG=on flag, NTL will attempt to use the extended double
+// precision hardware to still allow 60-bit moduli.  If that is not possible,
+// or if you build NTL with the NTL_DISABLE_LONGDOUBLE=on flag, then NTL will
+// fall back to its "classical" implementation (pre-9.0) that relies on
+// double-precision arithmetic and imposes a 50-bit limit on moduli.  
+
+// Note that in on 64-bit machines, either the "long long" or "long double"
+// implementations could support 62-bit moduli, rather than 60-bit moduli.
+// However, the restriction to 60-bits speeds up a few things, and so seems
+// like a good trade off.  This is subject to change in the future.
+
+// Also note that all of these enhancements introduced since v9.0 are only
+// available to builds of NTL that use GMP.  Builds that don't use GMP will
+// still be restricted to 50-bit moduli on 64-bit machines.
+
+// On machines with 32-bit longs, moduli will be resricted to 30 bits,
+// regardless on the implementation, which will be based on "long long"
+// arithmetic (if a 64-bit integer type is available), or on double-precision
+// floating point (otherwise).
+
+// One can detect the new (v9) interface by testing if the macro
+// NTL_HAVE_MULMOD_T is defined.  The following code can be used to make
+// new-style NTL clients work with either older (pre-9.0) versions of NTL or
+// newer versions (post-9.0):
+
+
+   #ifndef NTL_HAVE_MULMOD_T
+      namespace NTL {
+         typedef double mulmod_t;
+         typedef double muldivrem_t;
+
+         static inline double PrepMulMod(long n)
+         { return double(1L)/double(n); }
+
+         static inline double PrepMulDivRem(long b, long n, double ninv)
+         { return double(b)*ninv; }
+
+         static inline double PrepMulDivRem(long b, long n)
+         { return double(b)/double(n); }
+
+         static inline double PrepMulModPrecon(long b, long n)
+         { return PrepMulModPrecon(b, n, PrepMulMod(n)); }
+      }
+   #endif
+
+
+
+
+
+/**************************************************************************\
+
+                               Shift Operations
+
+LeftShift by n means multiplication by 2^n
+RightShift by n means division by 2^n, with truncation toward zero
+  (so the sign is preserved).
+
+A negative shift amount reverses the direction of the shift.
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ operator<<(const ZZ& a, long n);
+ZZ operator>>(const ZZ& a, long n);
+
+ZZ& operator<<=(ZZ& x, long n);
+ZZ& operator>>=(ZZ& x, long n);
+
+// procedural versions:
+
+void LeftShift(ZZ& x, const ZZ& a, long n);
+ZZ LeftShift(const ZZ& a, long n);
+
+void RightShift(ZZ& x, const ZZ& a, long n);
+ZZ RightShift(const ZZ& a, long n);
+
+
+
+/**************************************************************************\
+
+                              Bits and Bytes
+
+\**************************************************************************/
+
+
+
+long MakeOdd(ZZ& x);
+// removes factors of 2 from x, returns the number of 2's removed
+// returns 0 if x == 0
+
+long NumTwos(const ZZ& x);
+// returns max e such that 2^e divides x if x != 0, and returns 0 if x == 0.
+
+long IsOdd(const ZZ& a); // test if a is odd
+
+long NumBits(const ZZ& a);
+long NumBits(long a);  
+// returns the number of bits in binary represenation of |a|;
+// NumBits(0) = 0
+
+
+long bit(const ZZ& a, long k);
+long bit(long a, long k);
+// returns bit k of |a|, position 0 being the low-order bit.
+// If  k < 0 or k >= NumBits(a), returns 0.
+
+
+void trunc(ZZ& x, const ZZ& a, long k);
+// x = low order k bits of |a|.
+// If k <= 0, x = 0.
+
+// two functional variants:
+ZZ trunc_ZZ(const ZZ& a, long k);  
+long trunc_long(const ZZ& a, long k);
+
+long SetBit(ZZ& x, long p);
+// returns original value of p-th bit of |a|, and replaces p-th bit of
+// a by 1 if it was zero; low order bit is bit 0; error if p < 0;
+// the sign of x is maintained
+
+long SwitchBit(ZZ& x, long p);
+// returns original value of p-th bit of |a|, and switches the value
+// of p-th bit of a; low order bit is bit 0; error if p < 0
+// the sign of x is maintained
+
+long weight(const ZZ& a); // returns Hamming weight of |a|
+long weight(long a);
+
+// bit-wise Boolean operations, procedural form:
+
+void bit_and(ZZ& x, const ZZ& a, const ZZ& b); // x = |a| AND |b|
+void bit_or(ZZ& x, const ZZ& a, const ZZ& b); // x = |a| OR |b|
+void bit_xor(ZZ& x, const ZZ& a, const ZZ& b); // x = |a| XOR |b|
+
+// bit-wise Boolean operations, operator notation:
+
+ZZ operator&(const ZZ& a, const ZZ& b);
+ZZ operator|(const ZZ& a, const ZZ& b);
+ZZ operator^(const ZZ& a, const ZZ& b);
+
+// PROMOTIONS: the above bit-wise operations (both procedural
+// and operator forms) provide promotions from long to ZZ on (a, b).
+
+ZZ& operator&=(ZZ& x, const ZZ& b);
+ZZ& operator&=(ZZ& x, long b);
+
+ZZ& operator|=(ZZ& x, const ZZ& b);
+ZZ& operator|=(ZZ& x, long b);
+
+ZZ& operator^=(ZZ& x, const ZZ& b);
+ZZ& operator^=(ZZ& x, long b);
+
+
+
+// conversions between byte sequences and ZZ's
+
+void ZZFromBytes(ZZ& x, const unsigned char *p, long n);
+ZZ ZZFromBytes(const unsigned char *p, long n);
+// x = sum(p[i]*256^i, i=0..n-1).
+// NOTE: in the unusual event that a char is more than 8 bits,
+//       only the low order 8 bits of p[i] are used
+
+void BytesFromZZ(unsigned char *p, const ZZ& a, long n);
+// Computes p[0..n-1] such that abs(a) == sum(p[i]*256^i, i=0..n-1) mod 256^n.
+
+long NumBytes(const ZZ& a);
+long NumBytes(long a);
+// returns # of base 256 digits needed to represent abs(a).
+// NumBytes(0) == 0.
+
+
+ +
+/**************************************************************************\
+
+                            Pseudo-Random Numbers
+
+\**************************************************************************/
+
+
+// Routines for generating pseudo-random numbers.
+
+// These routines generate high qualtity, cryptographically strong
+// pseudo-random numbers.  They are implemented so that their behaviour
+// is completely independent of the underlying hardware and long
+// integer implementation.  Note, however, that other routines
+// throughout NTL use pseudo-random numbers, and because of this,
+// the word size of the machine can impact the sequence of numbers
+// seen by a client program.
+
+
+void SetSeed(const ZZ& s);
+void SetSeed(const unsigned char *data, long dlen);
+void SetSeed(const RandomStream& s);
+// Initializes generator with a "seed".
+
+// The first version hashes the binary representation of s to obtain a key for
+// a low-level RandomStream object (see below).
+
+// The second version does the same, hashing the first dlen bytes pointed to by
+// data to obtain a key for the RandomStream object.
+
+// The third version initializes the PRG state directly with the given
+// RandomStream object.
+
+// EXCEPTIONS: strong ES
+
+
+void RandomBnd(ZZ& x, const ZZ& n);
+ZZ RandomBnd(const ZZ& n);
+void RandomBnd(long& x, long n);
+long RandomBnd(long n);
+// x = pseudo-random number in the range 0..n-1, or 0 if n <= 0
+// EXCEPTIONS: strong ES
+
+void VectorRandomBnd(long k, long *x, long n);
+// equivalent to x[i] = RandomBnd(n) for i in [0..k), but faster
+// EXCEPTIONS: strong ES
+
+
+void RandomBits(ZZ& x, long l);
+ZZ RandomBits_ZZ(long l);
+void RandomBits(long& x, long l);
+long RandomBits_long(long l);
+// x = pseudo-random number in the range 0..2^l-1.
+// EXCEPTIONS: strong ES
+
+void RandomLen(ZZ& x, long l);
+ZZ RandomLen_ZZ(long l);
+void RandomLen(long& x, long l);
+long RandomLen_long(long l);
+// x = psuedo-random number with precisely l bits,
+// or 0 of l <= 0.
+// EXCEPTIONS: strong ES
+
+unsigned long RandomBits_ulong(long l);
+// returns a pseudo-random number in the range 0..2^l-1
+// EXCEPTIONS: strong ES
+
+unsigned long RandomWord();
+// returns a word filled with pseudo-random bits.
+// Equivalent to RandomBits_ulong(NTL_BITS_PER_LONG).
+// EXCEPTIONS: strong ES
+
+
+
+class RandomStream {
+// The low-level pseudo-random generator (PRG).
+// After initializing it with a key, one can effectively read an unbounded
+// stream of pseudorandom bytes
+
+public:
+
+   explicit RandomStream(const unsigned char *key);
+   // key should point to an array of NTL_PRG_KEYLEN bytes
+   // EXCEPTIONS: nothrow
+
+   void get(unsigned char *res, long n);
+   // read the next n bytes from the stream and store to location pointed to by
+   // res
+   // EXCEPTIONS: throws a LogicError exception if n is negative
+
+   RandomStream(const RandomStream&); // default
+   RandomStream& operator=(const RandomStream&); // default
+   // EXCEPTIONS: nothrow
+};
+
+
+RandomStream& GetCurrentRandomStream();
+// get reference to the current PRG state. If SetSeed has not been called, it
+// is called with a default value (which should be unique to each
+// process/thread).  NOTE: this is a reference to a thread-local object, so
+// different threads will use different PRG's, and by default, each will be
+// initialized with a unique seed.
+// NOTE: using this reference, you can copy the current PRG state or assign a
+// different value to it; however, see the helper class RandomStreamPush below,
+// which may be more convenient.
+// EXCEPTIONS: strong ES
+
+
+
+class RandomStreamPush {
+// RAII for saving/restoring current PRG state
+public:
+   RandomStreamPush();   // save a copy of the current PRG state
+                     // EXCEPTIONS: strong ES
+
+   ~RandomStreamPush();  // restore the saveed copy of the PRG state
+
+private:
+   RandomStreamPush(const RandomStreamPush&); // disable
+   void operator=(const RandomStreamPush&); // disable
+};
+
+
+void DeriveKey(unsigned char *key, long klen,  
+               const unsigned char *data, long dlen);
+// utility routine to derive from the byte string (data, dlen) a byte string
+// (key, klen).  Heuristically, if (data, dlen) has high entropy, then (key,
+// klen) should be pseudorandom.  This routine is also used internally to
+// derive PRG keys.
+// EXCEPTIONS: throws LogicError exception if klen < 0 or hlen < 0
+
+
+
+/**************************************************************************\
+
+             Incremental Chinese Remaindering
+
+\**************************************************************************/
+
+long CRT(ZZ& a, ZZ& p, const ZZ& A, const ZZ& P);
+long CRT(ZZ& a, ZZ& p, long A, long P);
+
+// 0 <= A < P, (p, P) = 1; computes a' such that a' = a mod p,
+// a' = A mod P, and -p*P/2 < a' <= p*P/2; sets a := a', p := p*P, and
+// returns 1 if a's value has changed, otherwise 0
+
+
+/**************************************************************************\
+
+                  Rational Reconstruction
+
+\**************************************************************************/
+
+long ReconstructRational(ZZ& a, ZZ& b, const ZZ& x, const ZZ& m,
+                         const ZZ& a_bound, const ZZ& b_bound);
+
+// 0 <= x < m, m > 2 * a_bound * b_bound,
+// a_bound >= 0, b_bound > 0
+
+// This routine either returns 0, leaving a and b unchanged,
+// or returns 1 and sets a and b so that
+//   (1) a = b x (mod m),
+//   (2) |a| <= a_bound, 0 < b <= b_bound, and
+//   (3) gcd(m, b) = gcd(a, b).
+
+// If there exist a, b satisfying (1), (2), and
+//   (3') gcd(m, b) = 1,
+// then a, b are uniquely determined if we impose the additional
+// condition that gcd(a, b) = 1;  moreover, if such a, b exist,
+// then these values are returned by the routine.
+
+// Unless the calling routine can *a priori* guarantee the existence
+// of a, b satisfying (1), (2), and (3'),
+// then to ensure correctness, the calling routine should check
+// that gcd(m, b) = 1, or equivalently, gcd(a, b) = 1.
+
+// This is implemented using a variant of Lehmer's extended
+// Euclidean algorithm.
+
+// Literature:  see G. Collins and M. Encarnacion, J. Symb. Comp. 20:287-297,
+// 1995; P. Wang, M. Guy, and J. Davenport, SIGSAM Bulletin 16:2-3, 1982.
+
+
+/**************************************************************************\
+
+                                Primality Testing
+                           and Prime Number Generation
+
+\**************************************************************************/
+
+void GenPrime(ZZ& n, long l, long err = 80);
+ZZ GenPrime_ZZ(long l, long err = 80);
+long GenPrime_long(long l, long err = 80);
+
+// GenPrime generates a random prime n of length l so that the
+// probability that the resulting n is composite is bounded by 2^(-err).
+// This calls the routine RandomPrime below, and uses results of
+// Damgard, Landrock, Pomerance to "optimize"
+// the number of Miller-Rabin trials at the end.
+
+void GenGermainPrime(ZZ& n, long l, long err = 80);
+ZZ GenGermainPrime_ZZ(long l, long err = 80);
+long GenGermainPrime_long(long l, long err = 80);
+
+// A (Sophie) Germain prime is a prime p such that p' = 2*p+1 is also a prime.
+// Such primes are useful for cryptographic applications...cryptographers
+// sometimes call p' a "strong" or "safe" prime.
+// GenGermainPrime generates a random Germain prime n of length l
+// so that the probability that either n or 2*n+1 is not a prime
+// is bounded by 2^(-err).
+
+
+long ProbPrime(const ZZ& n, long NumTrials = 10);
+long ProbPrime(long n, long NumTrials = 10);
+// performs up to NumTrials Miller-witness tests (after some trial division).
+
+void RandomPrime(ZZ& n, long l, long NumTrials=10);
+ZZ RandomPrime_ZZ(long l, long NumTrials=10);
+long RandomPrime_long(long l, long NumTrials=10);
+// n = random l-bit prime.  Uses ProbPrime with NumTrials.
+
+void NextPrime(ZZ& n, const ZZ& m, long NumTrials=10);
+ZZ NextPrime(const ZZ& m, long NumTrials=10);
+// n = smallest prime >= m.  Uses ProbPrime with NumTrials.
+
+long NextPrime(long m, long NumTrials=10);
+// Single precision version of the above.
+// Result will always be bounded by NTL_ZZ_SP_BOUND, and an
+// error is raised if this cannot be satisfied.
+
+long MillerWitness(const ZZ& n, const ZZ& w);
+// Tests if w is a witness to compositeness a la Miller.  Assumption: n is
+// odd and positive, 0 <= w < n.
+// Return value of 1 implies n is composite.
+// Return value of 0 indicates n might be prime.
+
+
+/**************************************************************************\
+
+                               Exponentiation
+
+\**************************************************************************/
+
+
+void power(ZZ& x, const ZZ& a, long e); // x = a^e (e >= 0)
+ZZ power(const ZZ& a, long e);
+
+void power(ZZ& x, long a, long e);
+
+// two functional variants:
+ZZ power_ZZ(long a, long e);
+long power_long(long a, long e);
+
+void power2(ZZ& x, long e); // x = 2^e (e >= 0)
+ZZ power2_ZZ(long e);
+
+
+/**************************************************************************\
+
+                               Square Roots
+
+\**************************************************************************/
+
+
+void SqrRoot(ZZ& x, const ZZ& a); // x = floor(a^{1/2}) (a >= 0)
+ZZ SqrRoot(const ZZ& a);
+
+long SqrRoot(long a);
+
+
+
+
+/**************************************************************************\
+
+                    Jacobi symbol and modular square roots
+
+\**************************************************************************/
+
+
+long Jacobi(const ZZ& a, const ZZ& n);
+//  compute Jacobi symbol of a and n; assumes 0 <= a < n, n odd
+
+void SqrRootMod(ZZ& x, const ZZ& a, const ZZ& n);
+ZZ SqrRootMod(const ZZ& a, const ZZ& n);
+//  computes square root of a mod n; assumes n is an odd prime, and
+//  that a is a square mod n, with 0 <= a < n.
+
+
+
+
+/**************************************************************************\
+
+                             Input/Output
+
+I/O Format:
+
+Numbers are written in base 10, with an optional minus sign.
+
+\**************************************************************************/
+
+istream& operator>>(istream& s, ZZ& x);  
+ostream& operator<<(ostream& s, const ZZ& a);
+
+
+
+/**************************************************************************\
+
+                            Miscellany
+
+\**************************************************************************/
+
+
+// The following macros are defined:
+
+#define NTL_ZZ_NBITS (...)  // number of bits in a zzigit;
+                            // a ZZ is represented as a sequence of zzigits.
+
+#define NTL_SP_NBITS (...)  // max number of bits in a "single-precision" number
+
+#define NTL_WSP_NBITS (...)  // max number of bits in a "wide single-precision"
+                             // number
+
+// The following relations hold:
+//    30 <= NTL_SP_NBITS <= NTL_WSP_NBITS
+//       <= min(NTL_ZZ_NBITS, NTL_BITS_PER_LONG-2)
+
+// Note that NTL_ZZ_NBITS may be less than, equal to, or greater than
+// NTL_BITS_PER_LONG  -- no particular relationship should be assumed to hold.
+// In particular, expressions like (1L << NTL_ZZ_BITS) might overflow.
+//
+// "single-precision" numbers are meant to be used in conjunction with the
+//  single-precision modular arithmetic routines.
+//
+// "wide single-precision" numbers are meant to be used in conjunction
+//  with the ZZ arithmetic routines for optimal efficiency.
+
+// The following auxilliary macros are also defined
+
+#define NTL_FRADIX (...) // double-precision value of 2^NTL_ZZ_NBITS
+
+#define NTL_SP_BOUND (1L << NTL_SP_NBITS)
+#define NTL_WSP_BOUND (1L << NTL_WSP_NBITS)
+
+
+// Backward compatability notes:
+//
+// Prior to version 5.0, the macro NTL_NBITS was defined,
+// along with the macro NTL_RADIX defined to be (1L << NTL_NBITS).
+// While these macros are still available when using NTL's traditional
+// long integer package (i.e., when NTL_GMP_LIP is not set),
+// they are not available when using the GMP as the primary long integer
+// package (i.e., when NTL_GMP_LIP is set).
+// Furthermore, when writing portable programs, one should avoid these macros.
+// Note that when using traditional long integer arithmetic, we have
+//    NTL_ZZ_NBITS = NTL_SP_NBITS = NTL_WSP_NBITS = NTL_NBITS.
+//
+// Prior to version 9.0, one could also assume that
+//   NTL_SP_NBITS <= NTL_DOUBLE_PRECISION-3;
+// however, this is no longer the case (unless NTL is build with he NTL_LEGACY_SP_MULMOD
+// flag turned on).
+
+
+// Here are some additional functions.
+
+void clear(ZZ& x); // x = 0
+void set(ZZ& x);   // x = 1
+
+void swap(ZZ& x, ZZ& y);
+// swap x and y (done by "pointer swapping", if possible).
+
+double log(const ZZ& a);
+// returns double precision approximation to log(a)
+
+long NextPowerOfTwo(long m);
+// returns least nonnegative k such that 2^k >= m
+
+long ZZ::size() const;
+// a.size() returns the number of zzigits of |a|; the
+// size of 0 is 0.
+
+void ZZ::SetSize(long k)
+// a.SetSize(k) does not change the value of a, but simply pre-allocates
+// space for k zzigits.
+
+long ZZ::SinglePrecision() const;
+// a.SinglePrecision() is a predicate that tests if abs(a) < NTL_SP_BOUND
+
+long ZZ::WideSinglePrecision() const;
+// a.WideSinglePrecision() is a predicate that tests if abs(a) < NTL_WSP_BOUND
+
+long digit(const ZZ& a, long k);
+// returns k-th zzigit of |a|, position 0 being the low-order
+// zzigit.
+// NOTE: this routine is only available when using NTL's traditional
+// long integer arithmetic, and should not be used in programs
+// that are meant to be portable.
+
+void ZZ::kill();
+// a.kill() sets a to zero and frees the space held by a.
+
+void ZZ::swap(ZZ& x);
+// swap method (done by "pointer swapping" if possible)
+
+ZZ::ZZ(INIT_SIZE_TYPE, long k);
+// ZZ(INIT_SIZE, k) initializes to 0, but space is pre-allocated so
+// that numbers x with x.size() <= k can be stored without
+// re-allocation.
+
+static const ZZ& ZZ::zero();
+// ZZ::zero() yields a read-only reference to zero, if you need it.
+
+
+
+
+/**************************************************************************\
+
+                    Small Prime Generation
+
+primes are generated in sequence, starting at 2, and up to a maximum
+that is no more than min(NTL_SP_BOUND, 2^30).
+
+Example: print the primes up to 1000
+
+#include <NTL/ZZ.h>
+
+main()
+{
+   PrimeSeq s;
+   long p;
+
+   p = s.next();
+   while (p <= 1000) {
+      cout << p << "\n";
+      p = s.next();
+   }
+}
+
+\**************************************************************************/
+
+
+
+class PrimeSeq {
+public:
+   PrimeSeq();
+   ~PrimeSeq();
+
+   long next();
+   // returns next prime in the sequence.  returns 0 if list of small
+   // primes is exhausted.
+
+   void reset(long b);
+   // resets generator so that the next prime in the sequence is the
+   // smallest prime >= b.
+
+private:
+   PrimeSeq(const PrimeSeq&);        // disabled
+   void operator=(const PrimeSeq&);  // disabled
+
+};
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZ.txt b/thirdparty/linux/ntl/doc/ZZ.txt new file mode 100644 index 0000000000..76b74c3709 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ.txt @@ -0,0 +1,1115 @@ + +/**************************************************************************\ + +MODULE: ZZ + +SUMMARY: + +The class ZZ is used to represent signed, arbitrary length integers. + +Routines are provided for all of the basic arithmetic operations, as +well as for some more advanced operations such as primality testing. +Space is automatically managed by the constructors and destructors. + +This module also provides routines for generating small primes, and +fast routines for performing modular arithmetic on single-precision +numbers. + + +\**************************************************************************/ + +#include + + +class ZZ { +public: + + + ZZ(); // initial value is 0 + + ZZ(const ZZ& a); // copy constructor + explicit ZZ(long a); // promotion constructor + + ~ZZ(); // destructor + + ZZ& operator=(const ZZ& a); // assignment operator + ZZ& operator=(long a); + + // typedefs to aid in generic programming + typedef ZZ_p residue_type; + typedef ZZX poly_type; + + + // ... + +}; + + +// NOTE: A ZZ is represented as a sequence of "zzigits", +// where each zzigit is between 0 and 2^{NTL_ZZ_NBITS-1}. + +// NTL_ZZ_NBITS is macros defined in . + +// SIZE INVARIANT: the number of bits in a ZZ is always less than +// 2^(NTL_BITS_PER_LONG-4). + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + + +// The usual comparison operators: + +long operator==(const ZZ& a, const ZZ& b); +long operator!=(const ZZ& a, const ZZ& b); +long operator<(const ZZ& a, const ZZ& b); +long operator>(const ZZ& a, const ZZ& b); +long operator<=(const ZZ& a, const ZZ& b); +long operator>=(const ZZ& a, const ZZ& b); + +// other stuff: + +long sign(const ZZ& a); // returns sign of a (-1, 0, +1) +long IsZero(const ZZ& a); // test for 0 +long IsOne(const ZZ& a); // test for 1 + +long compare(const ZZ& a, const ZZ& b); // returns sign of a-b (-1, 0, or 1). + +// PROMOTIONS: the comparison operators and the function compare +// support promotion from long to ZZ on (a, b). + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + + +// operator notation: + +ZZ operator+(const ZZ& a, const ZZ& b); +ZZ operator-(const ZZ& a, const ZZ& b); +ZZ operator-(const ZZ& a); // unary - + +ZZ& operator+=(ZZ& x, const ZZ& a); +ZZ& operator+=(ZZ& x, long a); + +ZZ& operator-=(ZZ& x, const ZZ& a); +ZZ& operator-=(ZZ& x, long a); + +ZZ& operator++(ZZ& x); // prefix +void operator++(ZZ& x, int); // postfix + +ZZ& operator--(ZZ& x); // prefix +void operator--(ZZ& x, int); // postfix + + + +// procedural versions: + +void add(ZZ& x, const ZZ& a, const ZZ& b); // x = a + b +void sub(ZZ& x, const ZZ& a, const ZZ& b); // x = a - b +void SubPos(ZZ& x, const ZZ& a, const ZZ& b); // x = a-b; assumes a >= b >= 0. +void negate(ZZ& x, const ZZ& a); // x = -a + +void abs(ZZ& x, const ZZ& a); // x = |a| +ZZ abs(const ZZ& a); + +// PROMOTIONS: binary +, -, as well as the procedural versions add, sub +// support promotions from long to ZZ on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +ZZ operator*(const ZZ& a, const ZZ& b); + +ZZ& operator*=(ZZ& x, const ZZ& a); +ZZ& operator*=(ZZ& x, long a); + +// procedural versions: + +void mul(ZZ& x, const ZZ& a, const ZZ& b); // x = a * b + +void sqr(ZZ& x, const ZZ& a); // x = a*a +ZZ sqr(const ZZ& a); + +// PROMOTIONS: operator * and procedure mul support promotion +// from long to ZZ on (a, b). + +/**************************************************************************\ + + Combined Multiply and Add + +\**************************************************************************/ + + +void MulAddTo(ZZ& x, const ZZ& a, const ZZ& b); // x += a*b +void MulAddTo(ZZ& x, const ZZ& a, long b); // x += a*b + + +void MulSubFrom(ZZ& x, const ZZ& a, const ZZ& b); // x -= a*b +void MulSubFrom(ZZ& x, const ZZ& a, long b); // x -= a*b + +// NOTE: these are provided for both convenience and efficiency. +// The single-precision versions may be significantly +// faster than the code sequence +// mul(tmp, a, b); add(x, x, tmp); + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + + +// operator notation: + +ZZ operator/(const ZZ& a, const ZZ& b); +ZZ operator/(const ZZ& a, long b); + +ZZ operator%(const ZZ& a, const ZZ& b); +long operator%(const ZZ& a, long b); + +ZZ& operator/=(ZZ& x, const ZZ& b); +ZZ& operator/=(ZZ& x, long b); + +ZZ& operator%=(ZZ& x, const ZZ& b); + + +// procedural versions: + +void DivRem(ZZ& q, ZZ& r, const ZZ& a, const ZZ& b); +// q = floor(a/b), r = a - b*q. +// This implies that: +// |r| < |b|, and if r != 0, sign(r) = sign(b) + +void div(ZZ& q, const ZZ& a, const ZZ& b); +// q = floor(a/b) + +void rem(ZZ& r, const ZZ& a, const ZZ& b); +// q = floor(a/b), r = a - b*q + + +// single-precision variants: + +long DivRem(ZZ& q, const ZZ& a, long b); +// q = floor(a/b), r = a - b*q, return value is r. + +long rem(const ZZ& a, long b); +// q = floor(a/b), r = a - b*q, return value is r. + + +// divisibility testing: + +long divide(ZZ& q, const ZZ& a, const ZZ& b); +long divide(ZZ& q, const ZZ& a, long b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0. + +long divide(const ZZ& a, const ZZ& b); +long divide(const ZZ& a, long b); +// if b | a, returns 1; otherwise returns 0. + + +/**************************************************************************\ + + GCD's + +\**************************************************************************/ + + +void GCD(ZZ& d, const ZZ& a, const ZZ& b); +ZZ GCD(const ZZ& a, const ZZ& b); + +// d = gcd(a, b) (which is always non-negative). Uses a binary GCD +// algorithm. + + + +void XGCD(ZZ& d, ZZ& s, ZZ& t, const ZZ& a, const ZZ& b); + +// d = gcd(a, b) = a*s + b*t. + +// The coefficients s and t are defined according to the standard +// Euclidean algorithm applied to |a| and |b|, with the signs then +// adjusted according to the signs of a and b. + +// The implementation may or may not Euclid's algorithm, +// but the coefficients a and t are always computed as if +// it did. + + +// special-purpose single-precision variants: + +long GCD(long a, long b); +// return value is gcd(a, b) (which is always non-negative) + +void XGCD(long& d, long& s, long& t, long a, long b); +// d = gcd(a, b) = a*s + b*t. + +// The coefficients s and t are defined according to the standard +// Euclidean algorithm applied to |a| and |b|, with the signs then +// adjusted according to the signs of a and b. + + + +/**************************************************************************\ + + Modular Arithmetic + +The following routines perform arithmetic mod n, where n > 1. + +All arguments (other than exponents) are assumed to be in the range +0..n-1. Some routines may check this and raise an error if this +does not hold. Others may not, and the behaviour is unpredictable +in this case. + +\**************************************************************************/ + + + +void AddMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n); // x = (a+b)%n +ZZ AddMod(const ZZ& a, const ZZ& b, const ZZ& n); + +void SubMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n); // x = (a-b)%n +ZZ SubMod(const ZZ& a, const ZZ& b, const ZZ& n); + +void NegateMod(ZZ& x, const ZZ& a, const ZZ& n); // x = -a % n +ZZ NegateMod(const ZZ& a, const ZZ& n); + +void MulMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n); // x = (a*b)%n +ZZ MulMod(const ZZ& a, const ZZ& b, const ZZ& n); + +void SqrMod(ZZ& x, const ZZ& a, const ZZ& n); // x = a^2 % n +ZZ SqrMod(const ZZ& a, const ZZ& n); + + + + +void InvMod(ZZ& x, const ZZ& a, const ZZ& n); +ZZ InvMod(const ZZ& a, const ZZ& n); +// x = a^{-1} mod n (0 <= x < n); error is raised occurs if inverse +// not defined + +// If exceptions are enabled, an object of the following class +// is throw by the InvMod routine if the inverse of a mod n is +// not defined. The methods get_a() and get_n() give read-only +// access to the offending values of a and n. +// This also happens for any indirect call to InvMod, via PowerMod, +// of via inverse computations in ZZ_p. + +class InvModErrorObject : public ArithmeticErrorObject { +public: + InvModErrorObject(const char *s, const ZZ& a, const ZZ& n); + const ZZ& get_a() const; + const ZZ& get_n() const; +}; + +long InvModStatus(ZZ& x, const ZZ& a, const ZZ& n); +// if gcd(a,n) = 1, then return-value = 0, x = a^{-1} mod n; +// otherwise, return-value = 1, x = gcd(a, n) + +void PowerMod(ZZ& x, const ZZ& a, const ZZ& e, const ZZ& n); +ZZ PowerMod(const ZZ& a, const ZZ& e, const ZZ& n); + +void PowerMod(ZZ& x, const ZZ& a, long e, const ZZ& n); +ZZ PowerMod(const ZZ& a, long e, const ZZ& n); + +// x = a^e % n (e may be negative) + + +// PROMOTIONS: AddMod, SubMod, and MulMod (both procedural and functional +// forms) support promotions from long to ZZ on (a, b). + + + + +// @anchor{modarith} + +/**************************************************************************\ + + Single-precision modular arithmetic + +These routines implement single-precision modular arithmetic. If n is +the modulus, all inputs should be in the range 0..n-1. The number n +itself should be in the range 2..NTL_SP_BOUND-1. + +Most of these routines are, of course, implemented as fast inline +functions. No checking is done that inputs are in range. + + +\**************************************************************************/ + + + + +long AddMod(long a, long b, long n); // return (a+b)%n + +long SubMod(long a, long b, long n); // return (a-b)%n + +long NegateMod(long a, long n); // return (-a)%n + +long MulMod(long a, long b, long n); // return (a*b)%n + +long MulMod(long a, long b, long n, mulmod_t ninv); +// return (a*b)%n. +// +// Usually faster than plain MulMod when n is fixed for many +// invocations. The value ninv should be precomputed as +// mulmod_t ninv = PrepMulMod(n); + +mulmod_t PrepMulMod(long n); +// Prepare auxilliary data for MulMod. + +long MulModPrecon(long a, long b, long n, mulmod_precon_t bninv); +// return (a*b)%n. +// +// Usually much faster than MulMod when both b and n are fixed for +// many invocations. The value bninv should be precomputed as +// mulmod_precon_t bninv = PrepMulModPrecon(b, n); +// or as +// mulmod_precon_t bninv = PrepMulModPrecon(b, n, ninv); +// where ninv = PrepMulMod(n). + +mulmod_precon_t PrepMulModPrecon(long b, long n); +mulmod_precon_t PrepMulModPrecon(long b, long n, mulmod_t ninv); +// Prepare auxilliary data for MulModPrecon. +// In the second version, ninv = PrepMulMod(n). + + + +long InvMod(long a, long n); +// computes a^{-1} mod n. Error is raised if undefined. + +long InvModStatus(long& x, long a, long n); +// if gcd(a,n) = 1, then return-value = 0, x = a^{-1} mod n; +// otherwise, return-value = 1, x = gcd(a, n) + +long PowerMod(long a, long e, long n); +// computes a^e mod n (e may be negative) + +// The following are vector versions of the MulMod routines +// They each compute x[i] = (a[i] * b)% n i = 0..k-1 + +void VectorMulMod(long k, long *x, const long *a, long b, long n); + +void VectorMulMod(long k, long *x, const long *a, long b, long n, + mulmod_t ninv); +// ninv = PrepMulMod(n) + +void VectorMulModPrecon(long k, long *x, const long *a, long b, long n, + mulmod_precon_t bninv); +// bninv = MulModPrecon(b, n) + + +// The following is provided for lagacy support, but is not generally +// recommended: + +long MulDivRem(long& q, long a, long b, long n, muldivrem_t bninv); +// return (a*b)%n, set q = (a*b)/n. +// The value bninv should be precomputed as +// muldivrem_t bninv = PrepMulDivRem(b, n); +// or as +// muldivrem_t bninv = PrepMulDivRem(b, n, ninv); +// where ninv = PrepMod(n). + + muldivrem_t PrepMulDivRem(long b, long n); + muldivrem_t PrepMulDivRem(long b, long n, mulmod_t ninv); +// Prepare auxilliary data for MulDivRem. +// In the second version, ninv = PrepMulMod(n). + +// NOTE: despite the similarity in the interface to MulModPrecon, +// this routine is typically implemented in a very different way, +// and usually much less efficient. +// It was initially designed for specialized, internal use +// within NTL, but has been a part of the documented NTL +// interface for some time, and remains so even after the +// v9.0 upgrade. + + + +// +// Compatibility notes: +// +// The types mulmod_t and muldivrem_t were introduced in NTL v9.0, as were the +// functions PrepMulMod and PrepMulDivRem. Prior to this, the built-in type +// "double" played the role of these types, and the user was expected to +// compute PrepMulMod(n) as 1/double(n) and PrepMulDivRem(b, n) as +// double(b)/double(n). +// +// By abstracting these types, NTL is able to exploit a wider variety of +// implementation strategies. Some old client code may break, but the compiler +// will easily find the code that needs to be updated, and the updates are +// quite mechanical (unless the old code implicitly made use of the assumption +// that NTL_SP_NBITS <= NTL_DOUBLE_PRECISION-3). +// +// It is highly recommended that old client codes be updated. However, one may +// build NTL with the configuration option NTL_LEGACY_SP_MULMOD=on, which will +// cause the interfaces and implementations to revert to their pre-v9.0 +// definitions. This option will also make the following (obslete) function +// visible: + + long MulMod2(long a, long b, long n, double bninv); + // return (a*b)%n. bninv = ((double) b)/((double) n). This is faster + // if both n and b are fixed for many multiplications. + // Note: This is OBSOLETE -- use MulModPrecon. + + +// As of v9.2 of NTL, this new interface allows for 60-bit moduli on most +// 64-bit machines. The requirement is that a working 128-bit integer type is +// available. For current versions of gcc, clang, and icc, this is available +// vie the types __int128_t and __uint128_t. If this requirement is met (which +// is verified during NTL installation), then a "long long" implementation for +// MulMod is used. In versions 9.0 and 9.1 of NTL, a "long double" +// implementation was introduced, which utilized the 80-bit extended double +// precision hardware on x86 machines. This also allows for 60-bit moduli on +// 64-bit machines. + +// If 128-bit integer types are not avalable, or if you build NTL with the +// NTL_DISABLE_LONGLONG=on flag, NTL will attempt to use the extended double +// precision hardware to still allow 60-bit moduli. If that is not possible, +// or if you build NTL with the NTL_DISABLE_LONGDOUBLE=on flag, then NTL will +// fall back to its "classical" implementation (pre-9.0) that relies on +// double-precision arithmetic and imposes a 50-bit limit on moduli. + +// Note that in on 64-bit machines, either the "long long" or "long double" +// implementations could support 62-bit moduli, rather than 60-bit moduli. +// However, the restriction to 60-bits speeds up a few things, and so seems +// like a good trade off. This is subject to change in the future. + +// Also note that all of these enhancements introduced since v9.0 are only +// available to builds of NTL that use GMP. Builds that don't use GMP will +// still be restricted to 50-bit moduli on 64-bit machines. + +// On machines with 32-bit longs, moduli will be resricted to 30 bits, +// regardless on the implementation, which will be based on "long long" +// arithmetic (if a 64-bit integer type is available), or on double-precision +// floating point (otherwise). + +// One can detect the new (v9) interface by testing if the macro +// NTL_HAVE_MULMOD_T is defined. The following code can be used to make +// new-style NTL clients work with either older (pre-9.0) versions of NTL or +// newer versions (post-9.0): + + + #ifndef NTL_HAVE_MULMOD_T + namespace NTL { + typedef double mulmod_t; + typedef double muldivrem_t; + + static inline double PrepMulMod(long n) + { return double(1L)/double(n); } + + static inline double PrepMulDivRem(long b, long n, double ninv) + { return double(b)*ninv; } + + static inline double PrepMulDivRem(long b, long n) + { return double(b)/double(n); } + + static inline double PrepMulModPrecon(long b, long n) + { return PrepMulModPrecon(b, n, PrepMulMod(n)); } + } + #endif + + + + + +/**************************************************************************\ + + Shift Operations + +LeftShift by n means multiplication by 2^n +RightShift by n means division by 2^n, with truncation toward zero + (so the sign is preserved). + +A negative shift amount reverses the direction of the shift. + +\**************************************************************************/ + +// operator notation: + +ZZ operator<<(const ZZ& a, long n); +ZZ operator>>(const ZZ& a, long n); + +ZZ& operator<<=(ZZ& x, long n); +ZZ& operator>>=(ZZ& x, long n); + +// procedural versions: + +void LeftShift(ZZ& x, const ZZ& a, long n); +ZZ LeftShift(const ZZ& a, long n); + +void RightShift(ZZ& x, const ZZ& a, long n); +ZZ RightShift(const ZZ& a, long n); + + + +/**************************************************************************\ + + Bits and Bytes + +\**************************************************************************/ + + + +long MakeOdd(ZZ& x); +// removes factors of 2 from x, returns the number of 2's removed +// returns 0 if x == 0 + +long NumTwos(const ZZ& x); +// returns max e such that 2^e divides x if x != 0, and returns 0 if x == 0. + +long IsOdd(const ZZ& a); // test if a is odd + +long NumBits(const ZZ& a); +long NumBits(long a); +// returns the number of bits in binary represenation of |a|; +// NumBits(0) = 0 + + +long bit(const ZZ& a, long k); +long bit(long a, long k); +// returns bit k of |a|, position 0 being the low-order bit. +// If k < 0 or k >= NumBits(a), returns 0. + + +void trunc(ZZ& x, const ZZ& a, long k); +// x = low order k bits of |a|. +// If k <= 0, x = 0. + +// two functional variants: +ZZ trunc_ZZ(const ZZ& a, long k); +long trunc_long(const ZZ& a, long k); + +long SetBit(ZZ& x, long p); +// returns original value of p-th bit of |a|, and replaces p-th bit of +// a by 1 if it was zero; low order bit is bit 0; error if p < 0; +// the sign of x is maintained + +long SwitchBit(ZZ& x, long p); +// returns original value of p-th bit of |a|, and switches the value +// of p-th bit of a; low order bit is bit 0; error if p < 0 +// the sign of x is maintained + +long weight(const ZZ& a); // returns Hamming weight of |a| +long weight(long a); + +// bit-wise Boolean operations, procedural form: + +void bit_and(ZZ& x, const ZZ& a, const ZZ& b); // x = |a| AND |b| +void bit_or(ZZ& x, const ZZ& a, const ZZ& b); // x = |a| OR |b| +void bit_xor(ZZ& x, const ZZ& a, const ZZ& b); // x = |a| XOR |b| + +// bit-wise Boolean operations, operator notation: + +ZZ operator&(const ZZ& a, const ZZ& b); +ZZ operator|(const ZZ& a, const ZZ& b); +ZZ operator^(const ZZ& a, const ZZ& b); + +// PROMOTIONS: the above bit-wise operations (both procedural +// and operator forms) provide promotions from long to ZZ on (a, b). + +ZZ& operator&=(ZZ& x, const ZZ& b); +ZZ& operator&=(ZZ& x, long b); + +ZZ& operator|=(ZZ& x, const ZZ& b); +ZZ& operator|=(ZZ& x, long b); + +ZZ& operator^=(ZZ& x, const ZZ& b); +ZZ& operator^=(ZZ& x, long b); + + + +// conversions between byte sequences and ZZ's + +void ZZFromBytes(ZZ& x, const unsigned char *p, long n); +ZZ ZZFromBytes(const unsigned char *p, long n); +// x = sum(p[i]*256^i, i=0..n-1). +// NOTE: in the unusual event that a char is more than 8 bits, +// only the low order 8 bits of p[i] are used + +void BytesFromZZ(unsigned char *p, const ZZ& a, long n); +// Computes p[0..n-1] such that abs(a) == sum(p[i]*256^i, i=0..n-1) mod 256^n. + +long NumBytes(const ZZ& a); +long NumBytes(long a); +// returns # of base 256 digits needed to represent abs(a). +// NumBytes(0) == 0. + + +// @anchor{prg} + +/**************************************************************************\ + + Pseudo-Random Numbers + +\**************************************************************************/ + + +// Routines for generating pseudo-random numbers. + +// These routines generate high qualtity, cryptographically strong +// pseudo-random numbers. They are implemented so that their behaviour +// is completely independent of the underlying hardware and long +// integer implementation. Note, however, that other routines +// throughout NTL use pseudo-random numbers, and because of this, +// the word size of the machine can impact the sequence of numbers +// seen by a client program. + + +void SetSeed(const ZZ& s); +void SetSeed(const unsigned char *data, long dlen); +void SetSeed(const RandomStream& s); +// Initializes generator with a "seed". + +// The first version hashes the binary representation of s to obtain a key for +// a low-level RandomStream object (see below). + +// The second version does the same, hashing the first dlen bytes pointed to by +// data to obtain a key for the RandomStream object. + +// The third version initializes the PRG state directly with the given +// RandomStream object. + +// EXCEPTIONS: strong ES + + +void RandomBnd(ZZ& x, const ZZ& n); +ZZ RandomBnd(const ZZ& n); +void RandomBnd(long& x, long n); +long RandomBnd(long n); +// x = pseudo-random number in the range 0..n-1, or 0 if n <= 0 +// EXCEPTIONS: strong ES + +void VectorRandomBnd(long k, long *x, long n); +// equivalent to x[i] = RandomBnd(n) for i in [0..k), but faster +// EXCEPTIONS: strong ES + + +void RandomBits(ZZ& x, long l); +ZZ RandomBits_ZZ(long l); +void RandomBits(long& x, long l); +long RandomBits_long(long l); +// x = pseudo-random number in the range 0..2^l-1. +// EXCEPTIONS: strong ES + +void RandomLen(ZZ& x, long l); +ZZ RandomLen_ZZ(long l); +void RandomLen(long& x, long l); +long RandomLen_long(long l); +// x = psuedo-random number with precisely l bits, +// or 0 of l <= 0. +// EXCEPTIONS: strong ES + +unsigned long RandomBits_ulong(long l); +// returns a pseudo-random number in the range 0..2^l-1 +// EXCEPTIONS: strong ES + +unsigned long RandomWord(); +// returns a word filled with pseudo-random bits. +// Equivalent to RandomBits_ulong(NTL_BITS_PER_LONG). +// EXCEPTIONS: strong ES + + + +class RandomStream { +// The low-level pseudo-random generator (PRG). +// After initializing it with a key, one can effectively read an unbounded +// stream of pseudorandom bytes + +public: + + explicit RandomStream(const unsigned char *key); + // key should point to an array of NTL_PRG_KEYLEN bytes + // EXCEPTIONS: nothrow + + void get(unsigned char *res, long n); + // read the next n bytes from the stream and store to location pointed to by + // res + // EXCEPTIONS: throws a LogicError exception if n is negative + + RandomStream(const RandomStream&); // default + RandomStream& operator=(const RandomStream&); // default + // EXCEPTIONS: nothrow +}; + + +RandomStream& GetCurrentRandomStream(); +// get reference to the current PRG state. If SetSeed has not been called, it +// is called with a default value (which should be unique to each +// process/thread). NOTE: this is a reference to a thread-local object, so +// different threads will use different PRG's, and by default, each will be +// initialized with a unique seed. +// NOTE: using this reference, you can copy the current PRG state or assign a +// different value to it; however, see the helper class RandomStreamPush below, +// which may be more convenient. +// EXCEPTIONS: strong ES + + + +class RandomStreamPush { +// RAII for saving/restoring current PRG state +public: + RandomStreamPush(); // save a copy of the current PRG state + // EXCEPTIONS: strong ES + + ~RandomStreamPush(); // restore the saveed copy of the PRG state + +private: + RandomStreamPush(const RandomStreamPush&); // disable + void operator=(const RandomStreamPush&); // disable +}; + + +void DeriveKey(unsigned char *key, long klen, + const unsigned char *data, long dlen); +// utility routine to derive from the byte string (data, dlen) a byte string +// (key, klen). Heuristically, if (data, dlen) has high entropy, then (key, +// klen) should be pseudorandom. This routine is also used internally to +// derive PRG keys. +// EXCEPTIONS: throws LogicError exception if klen < 0 or hlen < 0 + + + +/**************************************************************************\ + + Incremental Chinese Remaindering + +\**************************************************************************/ + +long CRT(ZZ& a, ZZ& p, const ZZ& A, const ZZ& P); +long CRT(ZZ& a, ZZ& p, long A, long P); + +// 0 <= A < P, (p, P) = 1; computes a' such that a' = a mod p, +// a' = A mod P, and -p*P/2 < a' <= p*P/2; sets a := a', p := p*P, and +// returns 1 if a's value has changed, otherwise 0 + + +/**************************************************************************\ + + Rational Reconstruction + +\**************************************************************************/ + +long ReconstructRational(ZZ& a, ZZ& b, const ZZ& x, const ZZ& m, + const ZZ& a_bound, const ZZ& b_bound); + +// 0 <= x < m, m > 2 * a_bound * b_bound, +// a_bound >= 0, b_bound > 0 + +// This routine either returns 0, leaving a and b unchanged, +// or returns 1 and sets a and b so that +// (1) a = b x (mod m), +// (2) |a| <= a_bound, 0 < b <= b_bound, and +// (3) gcd(m, b) = gcd(a, b). + +// If there exist a, b satisfying (1), (2), and +// (3') gcd(m, b) = 1, +// then a, b are uniquely determined if we impose the additional +// condition that gcd(a, b) = 1; moreover, if such a, b exist, +// then these values are returned by the routine. + +// Unless the calling routine can *a priori* guarantee the existence +// of a, b satisfying (1), (2), and (3'), +// then to ensure correctness, the calling routine should check +// that gcd(m, b) = 1, or equivalently, gcd(a, b) = 1. + +// This is implemented using a variant of Lehmer's extended +// Euclidean algorithm. + +// Literature: see G. Collins and M. Encarnacion, J. Symb. Comp. 20:287-297, +// 1995; P. Wang, M. Guy, and J. Davenport, SIGSAM Bulletin 16:2-3, 1982. + + +/**************************************************************************\ + + Primality Testing + and Prime Number Generation + +\**************************************************************************/ + +void GenPrime(ZZ& n, long l, long err = 80); +ZZ GenPrime_ZZ(long l, long err = 80); +long GenPrime_long(long l, long err = 80); + +// GenPrime generates a random prime n of length l so that the +// probability that the resulting n is composite is bounded by 2^(-err). +// This calls the routine RandomPrime below, and uses results of +// Damgard, Landrock, Pomerance to "optimize" +// the number of Miller-Rabin trials at the end. + +void GenGermainPrime(ZZ& n, long l, long err = 80); +ZZ GenGermainPrime_ZZ(long l, long err = 80); +long GenGermainPrime_long(long l, long err = 80); + +// A (Sophie) Germain prime is a prime p such that p' = 2*p+1 is also a prime. +// Such primes are useful for cryptographic applications...cryptographers +// sometimes call p' a "strong" or "safe" prime. +// GenGermainPrime generates a random Germain prime n of length l +// so that the probability that either n or 2*n+1 is not a prime +// is bounded by 2^(-err). + + +long ProbPrime(const ZZ& n, long NumTrials = 10); +long ProbPrime(long n, long NumTrials = 10); +// performs up to NumTrials Miller-witness tests (after some trial division). + +void RandomPrime(ZZ& n, long l, long NumTrials=10); +ZZ RandomPrime_ZZ(long l, long NumTrials=10); +long RandomPrime_long(long l, long NumTrials=10); +// n = random l-bit prime. Uses ProbPrime with NumTrials. + +void NextPrime(ZZ& n, const ZZ& m, long NumTrials=10); +ZZ NextPrime(const ZZ& m, long NumTrials=10); +// n = smallest prime >= m. Uses ProbPrime with NumTrials. + +long NextPrime(long m, long NumTrials=10); +// Single precision version of the above. +// Result will always be bounded by NTL_ZZ_SP_BOUND, and an +// error is raised if this cannot be satisfied. + +long MillerWitness(const ZZ& n, const ZZ& w); +// Tests if w is a witness to compositeness a la Miller. Assumption: n is +// odd and positive, 0 <= w < n. +// Return value of 1 implies n is composite. +// Return value of 0 indicates n might be prime. + + +/**************************************************************************\ + + Exponentiation + +\**************************************************************************/ + + +void power(ZZ& x, const ZZ& a, long e); // x = a^e (e >= 0) +ZZ power(const ZZ& a, long e); + +void power(ZZ& x, long a, long e); + +// two functional variants: +ZZ power_ZZ(long a, long e); +long power_long(long a, long e); + +void power2(ZZ& x, long e); // x = 2^e (e >= 0) +ZZ power2_ZZ(long e); + + +/**************************************************************************\ + + Square Roots + +\**************************************************************************/ + + +void SqrRoot(ZZ& x, const ZZ& a); // x = floor(a^{1/2}) (a >= 0) +ZZ SqrRoot(const ZZ& a); + +long SqrRoot(long a); + + + + +/**************************************************************************\ + + Jacobi symbol and modular square roots + +\**************************************************************************/ + + +long Jacobi(const ZZ& a, const ZZ& n); +// compute Jacobi symbol of a and n; assumes 0 <= a < n, n odd + +void SqrRootMod(ZZ& x, const ZZ& a, const ZZ& n); +ZZ SqrRootMod(const ZZ& a, const ZZ& n); +// computes square root of a mod n; assumes n is an odd prime, and +// that a is a square mod n, with 0 <= a < n. + + + + +/**************************************************************************\ + + Input/Output + +I/O Format: + +Numbers are written in base 10, with an optional minus sign. + +\**************************************************************************/ + +istream& operator>>(istream& s, ZZ& x); +ostream& operator<<(ostream& s, const ZZ& a); + + + +/**************************************************************************\ + + Miscellany + +\**************************************************************************/ + + +// The following macros are defined: + +#define NTL_ZZ_NBITS (...) // number of bits in a zzigit; + // a ZZ is represented as a sequence of zzigits. + +#define NTL_SP_NBITS (...) // max number of bits in a "single-precision" number + +#define NTL_WSP_NBITS (...) // max number of bits in a "wide single-precision" + // number + +// The following relations hold: +// 30 <= NTL_SP_NBITS <= NTL_WSP_NBITS +// <= min(NTL_ZZ_NBITS, NTL_BITS_PER_LONG-2) + +// Note that NTL_ZZ_NBITS may be less than, equal to, or greater than +// NTL_BITS_PER_LONG -- no particular relationship should be assumed to hold. +// In particular, expressions like (1L << NTL_ZZ_BITS) might overflow. +// +// "single-precision" numbers are meant to be used in conjunction with the +// single-precision modular arithmetic routines. +// +// "wide single-precision" numbers are meant to be used in conjunction +// with the ZZ arithmetic routines for optimal efficiency. + +// The following auxilliary macros are also defined + +#define NTL_FRADIX (...) // double-precision value of 2^NTL_ZZ_NBITS + +#define NTL_SP_BOUND (1L << NTL_SP_NBITS) +#define NTL_WSP_BOUND (1L << NTL_WSP_NBITS) + + +// Backward compatability notes: +// +// Prior to version 5.0, the macro NTL_NBITS was defined, +// along with the macro NTL_RADIX defined to be (1L << NTL_NBITS). +// While these macros are still available when using NTL's traditional +// long integer package (i.e., when NTL_GMP_LIP is not set), +// they are not available when using the GMP as the primary long integer +// package (i.e., when NTL_GMP_LIP is set). +// Furthermore, when writing portable programs, one should avoid these macros. +// Note that when using traditional long integer arithmetic, we have +// NTL_ZZ_NBITS = NTL_SP_NBITS = NTL_WSP_NBITS = NTL_NBITS. +// +// Prior to version 9.0, one could also assume that +// NTL_SP_NBITS <= NTL_DOUBLE_PRECISION-3; +// however, this is no longer the case (unless NTL is build with he NTL_LEGACY_SP_MULMOD +// flag turned on). + + +// Here are some additional functions. + +void clear(ZZ& x); // x = 0 +void set(ZZ& x); // x = 1 + +void swap(ZZ& x, ZZ& y); +// swap x and y (done by "pointer swapping", if possible). + +double log(const ZZ& a); +// returns double precision approximation to log(a) + +long NextPowerOfTwo(long m); +// returns least nonnegative k such that 2^k >= m + +long ZZ::size() const; +// a.size() returns the number of zzigits of |a|; the +// size of 0 is 0. + +void ZZ::SetSize(long k) +// a.SetSize(k) does not change the value of a, but simply pre-allocates +// space for k zzigits. + +long ZZ::SinglePrecision() const; +// a.SinglePrecision() is a predicate that tests if abs(a) < NTL_SP_BOUND + +long ZZ::WideSinglePrecision() const; +// a.WideSinglePrecision() is a predicate that tests if abs(a) < NTL_WSP_BOUND + +long digit(const ZZ& a, long k); +// returns k-th zzigit of |a|, position 0 being the low-order +// zzigit. +// NOTE: this routine is only available when using NTL's traditional +// long integer arithmetic, and should not be used in programs +// that are meant to be portable. + +void ZZ::kill(); +// a.kill() sets a to zero and frees the space held by a. + +void ZZ::swap(ZZ& x); +// swap method (done by "pointer swapping" if possible) + +ZZ::ZZ(INIT_SIZE_TYPE, long k); +// ZZ(INIT_SIZE, k) initializes to 0, but space is pre-allocated so +// that numbers x with x.size() <= k can be stored without +// re-allocation. + +static const ZZ& ZZ::zero(); +// ZZ::zero() yields a read-only reference to zero, if you need it. + + + + +/**************************************************************************\ + + Small Prime Generation + +primes are generated in sequence, starting at 2, and up to a maximum +that is no more than min(NTL_SP_BOUND, 2^30). + +Example: print the primes up to 1000 + +#include + +main() +{ + PrimeSeq s; + long p; + + p = s.next(); + while (p <= 1000) { + cout << p << "\n"; + p = s.next(); + } +} + +\**************************************************************************/ + + + +class PrimeSeq { +public: + PrimeSeq(); + ~PrimeSeq(); + + long next(); + // returns next prime in the sequence. returns 0 if list of small + // primes is exhausted. + + void reset(long b); + // resets generator so that the next prime in the sequence is the + // smallest prime >= b. + +private: + PrimeSeq(const PrimeSeq&); // disabled + void operator=(const PrimeSeq&); // disabled + +}; + + diff --git a/thirdparty/linux/ntl/doc/ZZVec.cpp.html b/thirdparty/linux/ntl/doc/ZZVec.cpp.html new file mode 100644 index 0000000000..286ad9b137 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZVec.cpp.html @@ -0,0 +1,78 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZVec.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: ZZVec
+
+SUMMARY:
+
+The class ZZVec implements vectors of fixed-length ZZ's.  You can
+allocate a vector of ZZ's of a specified length, where the maximum
+size of each ZZ is also specified.  The size is measured in terms
+of the number of zzigits.
+
+These parameters can be specified either with a constructor
+or with SetSize.  It is an error to try to re-size a vector of non-zero length,
+or store a ZZ that doesn't fit.  The space can be released with "kill",
+and then you are free to call SetSize again.  
+
+If you want more flexible---but less efficient---vectors, use vec_ZZ.
+
+\**************************************************************************/
+
+#include <NTL/ZZ.h>
+
+
+class ZZVec {
+public:
+   ZZVec();
+
+   ZZVec& operator=(const ZZVec&);
+   // first kill()'s destination (unless source and destination are
+   // identical)
+
+   ZZVec(const ZZVec&);
+
+   ~ZZVec();
+
+   ZZVec(long n, long d);
+   // sets length to n and max size of each element to d
+
+   void SetSize(long n, long d);
+   // sets length to n and max size of each element to d
+
+   long length() const;
+   // length of vector
+
+   long BaseSize() const;
+   // max size of each element
+
+   void kill();
+   // release space
+
+
+   ZZ* elts();
+   const ZZ* elts() const;
+   // pointer to first element
+
+   ZZ& operator[](long i);
+   const ZZ& operator[](long i) const;
+   // indexing operator; starts at 0; no range checking
+
+   swap(ZZVec& x);
+   // swap with x by swapping pointers
+};
+
+
+void swap(ZZVec& x, ZZVec& y);
+// swaps x and y by swapping pointers
+
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZVec.txt b/thirdparty/linux/ntl/doc/ZZVec.txt new file mode 100644 index 0000000000..963e4ace73 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZVec.txt @@ -0,0 +1,68 @@ + +/**************************************************************************\ + +MODULE: ZZVec + +SUMMARY: + +The class ZZVec implements vectors of fixed-length ZZ's. You can +allocate a vector of ZZ's of a specified length, where the maximum +size of each ZZ is also specified. The size is measured in terms +of the number of zzigits. + +These parameters can be specified either with a constructor +or with SetSize. It is an error to try to re-size a vector of non-zero length, +or store a ZZ that doesn't fit. The space can be released with "kill", +and then you are free to call SetSize again. + +If you want more flexible---but less efficient---vectors, use vec_ZZ. + +\**************************************************************************/ + +#include + + +class ZZVec { +public: + ZZVec(); + + ZZVec& operator=(const ZZVec&); + // first kill()'s destination (unless source and destination are + // identical) + + ZZVec(const ZZVec&); + + ~ZZVec(); + + ZZVec(long n, long d); + // sets length to n and max size of each element to d + + void SetSize(long n, long d); + // sets length to n and max size of each element to d + + long length() const; + // length of vector + + long BaseSize() const; + // max size of each element + + void kill(); + // release space + + + ZZ* elts(); + const ZZ* elts() const; + // pointer to first element + + ZZ& operator[](long i); + const ZZ& operator[](long i) const; + // indexing operator; starts at 0; no range checking + + swap(ZZVec& x); + // swap with x by swapping pointers +}; + + +void swap(ZZVec& x, ZZVec& y); +// swaps x and y by swapping pointers + diff --git a/thirdparty/linux/ntl/doc/ZZX.cpp.html b/thirdparty/linux/ntl/doc/ZZX.cpp.html new file mode 100644 index 0000000000..3d31d1a9cd --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZX.cpp.html @@ -0,0 +1,608 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZX.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: ZZX
+
+SUMMARY:
+
+The class ZZX implements polynomials in ZZ[X], i.e., univariate
+polynomials with integer coefficients.
+
+Polynomial multiplication is implemented using one of 4 different
+algorithms:
+
+1) classical
+
+2) Karatsuba
+
+3) Schoenhage & Strassen --- performs an FFT by working
+     modulo a "Fermat number" of appropriate size...
+     good for polynomials with huge coefficients
+     and moderate degree
+
+4) CRT/FFT --- performs an FFT by working modulo several
+     small primes...good for polynomials with moderate coefficients
+     and huge degree.
+
+The choice of algorithm is somewhat heuristic, and may not always be
+perfect.
+
+Many thanks to Juergen Gerhard <jngerhar@plato.uni-paderborn.de> for
+pointing out the deficiency in the NTL-1.0 ZZX arithmetic, and for
+contributing the Schoenhage/Strassen code.
+
+Extensive use is made of modular algorithms to enhance performance
+(e.g., the GCD algorithm and amny others).
+
+\**************************************************************************/
+
+#include <NTL/vec_ZZ.h>
+#include "zz_pX.h"
+#include <NTL/ZZ_pX.h>
+
+
+class ZZX {
+public:
+
+
+   ZZX(); // initial value 0
+
+   ZZX(const ZZX& a); // copy
+   explicit ZZX(const ZZ& a); // promotion
+   explicit ZZX(long a); // promotion
+
+   ~ZZX();
+
+   ZZX(INIT_MONO_TYPE, long i, const ZZ& c);
+   ZZX(INIT_MONO_TYPE, long i, long c);
+   // initial value c*X^i, invoke as ZZX(INIT_MONO, i, c)
+
+   ZZX(INIT_MONO_TYPE, long i);
+   // initial value X^i, invoke as ZZX(INIT_MONO, i)
+
+   ZZX& operator=(const ZZX& a); // assignment
+   ZZX& operator=(const ZZ& a);
+   ZZX& operator=(long a);
+
+   typedef ZZ coeff_type;
+
+   // ...
+
+};
+
+
+
+
+/**************************************************************************\
+
+                              Accessing coefficients
+
+The degree of a polynomial f is obtained as deg(f),
+where the zero polynomial, by definition, has degree -1.
+
+A polynomial f is represented as a coefficient vector.
+Coefficients may be accesses in one of two ways.
+
+The safe, high-level method is to call the function
+coeff(f, i) to get the coefficient of X^i in the polynomial f,
+and to call the function SetCoeff(f, i, a) to set the coefficient
+of X^i in f to the scalar a.
+
+One can also access the coefficients more directly via a lower level
+interface.  The coefficient of X^i in f may be accessed using
+subscript notation f[i].  In addition, one may write f.SetLength(n)
+to set the length of the underlying coefficient vector to n,
+and f.SetMaxLength(n) to allocate space for n coefficients,
+without changing the coefficient vector itself.
+
+After setting coefficients using this low-level interface,
+one must ensure that leading zeros in the coefficient vector
+are stripped afterwards by calling the function f.normalize().
+
+NOTE: the coefficient vector of f may also be accessed directly
+as f.rep; however, this is not recommended. Also, for a properly
+normalized polynomial f, we have f.rep.length() == deg(f)+1,
+and deg(f) >= 0  =>  f.rep[deg(f)] != 0.
+
+\**************************************************************************/
+
+
+
+long deg(const ZZX& a);  // return deg(a); deg(0) == -1.
+
+const ZZ& coeff(const ZZX& a, long i);
+// returns the coefficient of X^i, or zero if i not in range
+
+const ZZ& LeadCoeff(const ZZX& a);
+// returns leading term of a, or zero if a == 0
+
+const ZZ& ConstTerm(const ZZX& a);
+// returns constant term of a, or zero if a == 0
+
+void SetCoeff(ZZX& x, long i, const ZZ& a);
+void SetCoeff(ZZX& x, long i, long a);
+// makes coefficient of X^i equal to a; error is raised if i < 0
+
+void SetCoeff(ZZX& x, long i);
+// makes coefficient of X^i equal to 1;  error is raised if i < 0
+
+void SetX(ZZX& x); // x is set to the monomial X
+
+long IsX(const ZZX& a); // test if x = X
+
+
+
+
+ZZ& ZZX::operator[](long i);
+const ZZ& ZZX::operator[](long i) const;
+// indexing operators: f[i] is the coefficient of X^i ---
+// i should satsify i >= 0 and i <= deg(f).
+// No range checking (unless NTL_RANGE_CHECK is defined).
+
+void ZZX::SetLength(long n);
+// f.SetLength(n) sets the length of the inderlying coefficient
+// vector to n --- after this call, indexing f[i] for i = 0..n-1
+// is valid.
+
+void ZZX::normalize();  
+// f.normalize() strips leading zeros from coefficient vector of f
+
+void ZZX::SetMaxLength(long n);
+// f.SetMaxLength(n) pre-allocate spaces for n coefficients.  The
+// polynomial that f represents is unchanged.
+
+
+
+
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+long operator==(const ZZX& a, const ZZX& b);
+long operator!=(const ZZX& a, const ZZX& b);
+
+long IsZero(const ZZX& a);  // test for 0
+long IsOne(const ZZX& a);  // test for 1
+
+// PROMOTIONS: operators ==, != promote {long, ZZ} to ZZX on (a, b).
+
+
+/**************************************************************************\
+
+                                   Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZX operator+(const ZZX& a, const ZZX& b);
+ZZX operator-(const ZZX& a, const ZZX& b);
+ZZX operator-(const ZZX& a); // unary -
+
+ZZX& operator+=(ZZX& x, const ZZX& a);
+ZZX& operator-=(ZZX& x, const ZZX& a);
+
+ZZX& operator++(ZZX& x);  // prefix
+void operator++(ZZX& x, int);  // postfix
+
+ZZX& operator--(ZZX& x);  // prefix
+void operator--(ZZX& x, int);  // postfix
+
+
+// procedural versions:
+
+void add(ZZX& x, const ZZX& a, const ZZX& b); // x = a + b
+void sub(ZZX& x, const ZZX& a, const ZZX& b); // x = a - b
+void negate(ZZX& x, const ZZX& a); // x = -a
+
+// PROMOTIONS: binary +, - and procedures add, sub promote {long, ZZ}
+// to ZZX on (a, b).
+
+
+/**************************************************************************\
+
+                               Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZX operator*(const ZZX& a, const ZZX& b);
+
+ZZX& operator*=(ZZX& x, const ZZX& a);
+
+
+// procedural versions:
+
+void mul(ZZX& x, const ZZX& a, const ZZX& b); // x = a * b
+
+void sqr(ZZX& x, const ZZX& a); // x = a^2
+ZZX sqr(const ZZX& a);
+
+// PROMOTIONS: operator * and procedure mul promote {long, ZZ} to ZZX
+// on (a, b).
+
+
+/**************************************************************************\
+
+                               Shift Operations
+
+LeftShift by n means multiplication by X^n
+RightShift by n means division by X^n
+
+A negative shift amount reverses the direction of the shift.
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZX operator<<(const ZZX& a, long n);
+ZZX operator>>(const ZZX& a, long n);
+
+ZZX& operator<<=(ZZX& x, long n);
+ZZX& operator>>=(ZZX& x, long n);
+
+// procedural versions:
+
+void LeftShift(ZZX& x, const ZZX& a, long n);
+ZZX LeftShift(const ZZX& a, long n);
+
+void RightShift(ZZX& x, const ZZX& a, long n);
+ZZX RightShift(const ZZX& a, long n);
+
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+
+// Given polynomials a, b in ZZ[X], there exist polynomials
+// q, r in QQ[X] such that a = b*q + r, deg(r) < deg(b).
+// These routines return q and/or r if q and/or r lie(s) in ZZ[X],
+// and otherwise raise an error.  
+
+// Note that if the leading coefficient of b is 1 or -1,
+// then q and r always lie in ZZ[X], and no error can occur.
+
+// For example, you can write f/2 for a ZZX f.  If all coefficients
+// of f are even, the result is f with a factor of two removed;
+// otherwise, an error is raised.  More generally, f/g will be
+// evaluate q in ZZ[X] such that f = q*g if such a q exists,
+// and will otherwise raise an error.
+
+// See also below the routines for pseudo-division and division
+// predicates for routines that are perhaps more useful in
+// some situations.
+
+
+// operator notation:
+
+ZZX operator/(const ZZX& a, const ZZX& b);
+ZZX operator/(const ZZX& a, const ZZ& b);
+ZZX operator/(const ZZX& a, long b);
+
+ZZX operator%(const ZZX& a, const ZZX& b);
+
+ZZX& operator/=(ZZX& x, const ZZX& b);
+ZZX& operator/=(ZZX& x, const ZZ& b);
+ZZX& operator/=(ZZX& x, long b);
+
+ZZX& operator%=(ZZX& x, const ZZX& b);
+
+
+// procedural versions:
+
+void DivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b);
+// computes q, r such that a = b q + r and deg(r) < deg(b).
+// requires LeadCoeff(b) is a unit (+1, -1); otherwise,
+// an error is raised.
+
+void div(ZZX& q, const ZZX& a, const ZZX& b);
+void div(ZZX& q, const ZZX& a, const ZZ& b);
+void div(ZZX& q, const ZZX& a, long b);
+// same as DivRem, but only computes q
+
+void rem(ZZX& r, const ZZX& a, const ZZX& b);
+// same as DivRem, but only computes r
+
+
+
+// divide predicates:
+
+long divide(ZZX& q, const ZZX& a, const ZZX& b);
+long divide(ZZX& q, const ZZX& a, const ZZ& b);
+long divide(ZZX& q, const ZZX& a, long b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+
+long divide(const ZZX& a, const ZZX& b);
+long divide(const ZZX& a, const ZZ& b);
+long divide(const ZZX& a, long b);
+// if b | a, returns 1; otherwise returns 0
+
+// These algorithms employ a modular approach, performing the division
+// modulo small primes (reconstructing q via the CRT).  It is
+// usually much faster than the general division routines above
+// (especially when b does not divide a).
+
+
+void content(ZZ& d, const ZZX& f);
+ZZ content(const ZZX& f);
+// d = content of f, sign(d) == sign(LeadCoeff(f)); content(0) == 0
+
+void PrimitivePart(ZZX& pp, const ZZX& f);
+ZZX PrimitivePart(const ZZX& f);
+// pp = primitive part of f, LeadCoeff(pp) >= 0; PrimitivePart(0) == 0
+
+
+
+// pseudo-division:
+
+void PseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b);
+// performs pseudo-division: computes q and r with deg(r) < deg(b),
+// and LeadCoeff(b)^(deg(a)-deg(b)+1) a = b q + r.  Only the classical
+// algorithm is used.
+
+void PseudoDiv(ZZX& q, const ZZX& a, const ZZX& b);
+ZZX PseudoDiv(const ZZX& a, const ZZX& b);
+// same as PseudoDivRem, but only computes q
+
+void PseudoRem(ZZX& r, const ZZX& a, const ZZX& b);
+ZZX PseudoRem(const ZZX& a, const ZZX& b);
+// same as PseudoDivRem, but only computes r
+
+
+/**************************************************************************\
+
+                                  GCD's
+
+\**************************************************************************/
+
+
+void GCD(ZZX& d, const ZZX& a, const ZZX& b);
+ZZX GCD(const ZZX& a, const ZZX& b);
+// d = gcd(a, b), LeadCoeff(d) >= 0.  Uses a modular algorithm.
+
+
+void XGCD(ZZ& r, ZZX& s, ZZX& t, const ZZX& a, const ZZX& b,
+          long deterministic=0);
+// r = resultant of a and b; if r != 0, then computes s and t such
+// that: a*s + b*t = r; otherwise s and t not affected.  if
+// !deterministic, then resultant computation may use a randomized
+// strategy that errs with probability no more than 2^{-80}.
+
+
+
+/**************************************************************************\
+
+                               Input/Output
+
+I/O format:
+
+   [a_0 a_1 ... a_n],
+
+represents the polynomial a_0 + a_1*X + ... + a_n*X^n.
+
+
+\**************************************************************************/
+
+
+istream& operator>>(istream& s, ZZX& x);
+ostream& operator<<(ostream& s, const ZZX& a);
+
+
+/**************************************************************************\
+
+                             Some utility routines
+
+\**************************************************************************/
+
+
+void diff(ZZX& x, const ZZX& a); // x = derivative of a
+ZZX diff(const ZZX& a);
+
+long MaxBits(const ZZX& f);
+// returns max NumBits of coefficients of f
+
+void reverse(ZZX& x, const ZZX& a, long hi);
+ZZX reverse(const ZZX& a, long hi);
+
+void reverse(ZZX& x, const ZZX& a);
+ZZX reverse(const ZZX& a);
+
+// x = reverse of a[0]..a[hi] (hi >= -1);
+// hi defaults to deg(a) in second version
+
+
+void VectorCopy(vec_ZZ& x, const ZZX& a, long n);
+vec_ZZ VectorCopy(const ZZX& a, long n);
+// x = copy of coefficient vector of a of length exactly n.
+// input is truncated or padded with zeroes as appropriate.
+
+
+
+/**************************************************************************\
+
+                       Arithmetic mod X^n
+
+All routines require n >= 0, otherwise an error is raised.
+
+\**************************************************************************/
+
+
+void trunc(ZZX& x, const ZZX& a, long m); // x = a % X^m
+ZZX trunc(const ZZX& a, long m);
+
+void MulTrunc(ZZX& x, const ZZX& a, const ZZX& b, long n);
+ZZX MulTrunc(const ZZX& a, const ZZX& b, long n);
+// x = a * b % X^n
+
+void SqrTrunc(ZZX& x, const ZZX& a, long n);
+ZZX SqrTrunc(const ZZX& a, long n);
+// x = a^2 % X^n
+
+void InvTrunc(ZZX& x, const ZZX& a, long n);
+ZZX InvTrunc(const ZZX& a, long n);
+// computes x = a^{-1} % X^m.  Must have ConstTerm(a) invertible.
+
+
+
+
+/**************************************************************************\
+
+                               Modular Arithmetic
+
+The modulus f must be monic with deg(f) > 0,
+and other arguments must have smaller degree.
+
+\**************************************************************************/
+
+void MulMod(ZZX& x, const ZZX& a, const ZZX& b, const ZZX& f);
+ZZX MulMod(const ZZX& a, const ZZX& b, const ZZX& f);
+// x = a * b mod f
+
+void SqrMod(ZZX& x, const ZZX& a, const ZZX& f);
+ZZX SqrMod(const ZZX& a, const ZZX& f);
+// x = a^2 mod f
+
+void MulByXMod(ZZX& x, const ZZX& a, const ZZX& f);
+ZZX MulByXMod(const ZZX& a, const ZZX& f);
+// x = a*X mod f
+
+
+/**************************************************************************\
+
+                  traces, norms, resultants, discriminants,
+                   minimal and characteristic polynomials
+
+\**************************************************************************/
+
+
+void TraceMod(ZZ& res, const ZZX& a, const ZZX& f);
+ZZ TraceMod(const ZZX& a, const ZZX& f);
+// res = trace of (a mod f).  f must be monic, 0 < deg(f), deg(a) <
+// deg(f)
+
+void TraceVec(vec_ZZ& S, const ZZX& f);
+vec_ZZ TraceVec(const ZZX& f);
+// S[i] = Trace(X^i mod f), for i = 0..deg(f)-1.
+// f must be a monic polynomial.
+
+
+// The following routines use a modular approach.
+
+void resultant(ZZ& res, const ZZX& a, const ZZX& b, long deterministic=0);
+ZZ resultant(const ZZX& a, const ZZX& b, long deterministic=0);
+// res = resultant of a and b. If !deterministic, then it may use a
+// randomized strategy that errs with probability no more than
+// 2^{-80}.
+
+
+
+void NormMod(ZZ& res, const ZZX& a, const ZZX& f, long deterministic=0);
+ZZ NormMod(const ZZX& a, const ZZX& f, long deterministic=0);
+// res = norm of (a mod f).  f must be monic, 0 < deg(f), deg(a) <
+// deg(f). If !deterministic, then it may use a randomized strategy
+// that errs with probability no more than 2^{-80}.
+
+
+
+void discriminant(ZZ& d, const ZZX& a, long deterministic=0);
+ZZ discriminant(const ZZX& a, long deterministic=0);
+// d = discriminant of a = (-1)^{m(m-1)/2} resultant(a, a')/lc(a),
+// where m = deg(a). If !deterministic, then it may use a randomized
+// strategy that errs with probability no more than 2^{-80}.
+
+
+void CharPolyMod(ZZX& g, const ZZX& a, const ZZX& f, long deterministic=0);
+ZZX CharPolyMod(const ZZX& a, const ZZX& f, long deterministic=0);
+// g = char poly of (a mod f).  f must be monic.  If !deterministic,
+// then it may use a randomized strategy that errs with probability no
+// more than 2^{-80}.
+
+
+void MinPolyMod(ZZX& g, const ZZX& a, const ZZX& f);
+ZZX MinPolyMod(const ZZX& a, const ZZX& f);
+// g = min poly of (a mod f).  f must be monic, 0 < deg(f), deg(a) <
+// deg(f).  May use a probabilistic strategy that errs with
+// probability no more than 2^{-80}.
+
+
+
+
+/**************************************************************************\
+
+                  Incremental Chinese Remaindering
+
+\**************************************************************************/
+
+long CRT(ZZX& a, ZZ& prod, const zz_pX& A);
+long CRT(ZZX& a, ZZ& prod, const ZZ_pX& A);
+// Incremental Chinese Remaindering: If p is the current zz_p/ZZ_p modulus with
+// (p, prod) = 1; Computes a' such that a' = a mod prod and a' = A mod p,
+// with coefficients in the interval (-p*prod/2, p*prod/2];
+// Sets a := a', prod := p*prod, and returns 1 if a's value changed.
+
+
+
+
+
+/**************************************************************************\
+
+                                vectors of ZZX's
+
+\**************************************************************************/
+
+
+typedef Vec<ZZX> vec_ZZX; // backward compatibility
+
+
+/**************************************************************************\
+
+                                Miscellany
+
+
+\**************************************************************************/
+
+
+void clear(ZZX& x); // x = 0
+void set(ZZX& x); // x = 1
+
+void ZZX::kill();
+// f.kill() sets f to 0 and frees all memory held by f.  Equivalent to
+// f.rep.kill().
+
+ZZX::ZZX(INIT_SIZE_TYPE, long n);
+// ZZX(INIT_SIZE, n) initializes to zero, but space is pre-allocated
+// for n coefficients
+
+static const ZZX& zero();
+// ZZX::zero() is a read-only reference to 0
+
+void ZZX::swap(ZZX& x);
+void swap(ZZX& x, ZZX& y);
+// swap (by swapping pointers)
+
+
+ZZX::ZZX(long i, const ZZ& c);
+ZZX::ZZX(long i, long c);
+// initial value c*X^i, provided for backward compatibility
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZX.txt b/thirdparty/linux/ntl/doc/ZZX.txt new file mode 100644 index 0000000000..064390d082 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZX.txt @@ -0,0 +1,598 @@ + + +/**************************************************************************\ + +MODULE: ZZX + +SUMMARY: + +The class ZZX implements polynomials in ZZ[X], i.e., univariate +polynomials with integer coefficients. + +Polynomial multiplication is implemented using one of 4 different +algorithms: + +1) classical + +2) Karatsuba + +3) Schoenhage & Strassen --- performs an FFT by working + modulo a "Fermat number" of appropriate size... + good for polynomials with huge coefficients + and moderate degree + +4) CRT/FFT --- performs an FFT by working modulo several + small primes...good for polynomials with moderate coefficients + and huge degree. + +The choice of algorithm is somewhat heuristic, and may not always be +perfect. + +Many thanks to Juergen Gerhard for +pointing out the deficiency in the NTL-1.0 ZZX arithmetic, and for +contributing the Schoenhage/Strassen code. + +Extensive use is made of modular algorithms to enhance performance +(e.g., the GCD algorithm and amny others). + +\**************************************************************************/ + +#include +#include "zz_pX.h" +#include + + +class ZZX { +public: + + + ZZX(); // initial value 0 + + ZZX(const ZZX& a); // copy + explicit ZZX(const ZZ& a); // promotion + explicit ZZX(long a); // promotion + + ~ZZX(); + + ZZX(INIT_MONO_TYPE, long i, const ZZ& c); + ZZX(INIT_MONO_TYPE, long i, long c); + // initial value c*X^i, invoke as ZZX(INIT_MONO, i, c) + + ZZX(INIT_MONO_TYPE, long i); + // initial value X^i, invoke as ZZX(INIT_MONO, i) + + ZZX& operator=(const ZZX& a); // assignment + ZZX& operator=(const ZZ& a); + ZZX& operator=(long a); + + typedef ZZ coeff_type; + + // ... + +}; + + + + +/**************************************************************************\ + + Accessing coefficients + +The degree of a polynomial f is obtained as deg(f), +where the zero polynomial, by definition, has degree -1. + +A polynomial f is represented as a coefficient vector. +Coefficients may be accesses in one of two ways. + +The safe, high-level method is to call the function +coeff(f, i) to get the coefficient of X^i in the polynomial f, +and to call the function SetCoeff(f, i, a) to set the coefficient +of X^i in f to the scalar a. + +One can also access the coefficients more directly via a lower level +interface. The coefficient of X^i in f may be accessed using +subscript notation f[i]. In addition, one may write f.SetLength(n) +to set the length of the underlying coefficient vector to n, +and f.SetMaxLength(n) to allocate space for n coefficients, +without changing the coefficient vector itself. + +After setting coefficients using this low-level interface, +one must ensure that leading zeros in the coefficient vector +are stripped afterwards by calling the function f.normalize(). + +NOTE: the coefficient vector of f may also be accessed directly +as f.rep; however, this is not recommended. Also, for a properly +normalized polynomial f, we have f.rep.length() == deg(f)+1, +and deg(f) >= 0 => f.rep[deg(f)] != 0. + +\**************************************************************************/ + + + +long deg(const ZZX& a); // return deg(a); deg(0) == -1. + +const ZZ& coeff(const ZZX& a, long i); +// returns the coefficient of X^i, or zero if i not in range + +const ZZ& LeadCoeff(const ZZX& a); +// returns leading term of a, or zero if a == 0 + +const ZZ& ConstTerm(const ZZX& a); +// returns constant term of a, or zero if a == 0 + +void SetCoeff(ZZX& x, long i, const ZZ& a); +void SetCoeff(ZZX& x, long i, long a); +// makes coefficient of X^i equal to a; error is raised if i < 0 + +void SetCoeff(ZZX& x, long i); +// makes coefficient of X^i equal to 1; error is raised if i < 0 + +void SetX(ZZX& x); // x is set to the monomial X + +long IsX(const ZZX& a); // test if x = X + + + + +ZZ& ZZX::operator[](long i); +const ZZ& ZZX::operator[](long i) const; +// indexing operators: f[i] is the coefficient of X^i --- +// i should satsify i >= 0 and i <= deg(f). +// No range checking (unless NTL_RANGE_CHECK is defined). + +void ZZX::SetLength(long n); +// f.SetLength(n) sets the length of the inderlying coefficient +// vector to n --- after this call, indexing f[i] for i = 0..n-1 +// is valid. + +void ZZX::normalize(); +// f.normalize() strips leading zeros from coefficient vector of f + +void ZZX::SetMaxLength(long n); +// f.SetMaxLength(n) pre-allocate spaces for n coefficients. The +// polynomial that f represents is unchanged. + + + + + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + +long operator==(const ZZX& a, const ZZX& b); +long operator!=(const ZZX& a, const ZZX& b); + +long IsZero(const ZZX& a); // test for 0 +long IsOne(const ZZX& a); // test for 1 + +// PROMOTIONS: operators ==, != promote {long, ZZ} to ZZX on (a, b). + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +ZZX operator+(const ZZX& a, const ZZX& b); +ZZX operator-(const ZZX& a, const ZZX& b); +ZZX operator-(const ZZX& a); // unary - + +ZZX& operator+=(ZZX& x, const ZZX& a); +ZZX& operator-=(ZZX& x, const ZZX& a); + +ZZX& operator++(ZZX& x); // prefix +void operator++(ZZX& x, int); // postfix + +ZZX& operator--(ZZX& x); // prefix +void operator--(ZZX& x, int); // postfix + + +// procedural versions: + +void add(ZZX& x, const ZZX& a, const ZZX& b); // x = a + b +void sub(ZZX& x, const ZZX& a, const ZZX& b); // x = a - b +void negate(ZZX& x, const ZZX& a); // x = -a + +// PROMOTIONS: binary +, - and procedures add, sub promote {long, ZZ} +// to ZZX on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +ZZX operator*(const ZZX& a, const ZZX& b); + +ZZX& operator*=(ZZX& x, const ZZX& a); + + +// procedural versions: + +void mul(ZZX& x, const ZZX& a, const ZZX& b); // x = a * b + +void sqr(ZZX& x, const ZZX& a); // x = a^2 +ZZX sqr(const ZZX& a); + +// PROMOTIONS: operator * and procedure mul promote {long, ZZ} to ZZX +// on (a, b). + + +/**************************************************************************\ + + Shift Operations + +LeftShift by n means multiplication by X^n +RightShift by n means division by X^n + +A negative shift amount reverses the direction of the shift. + +\**************************************************************************/ + +// operator notation: + +ZZX operator<<(const ZZX& a, long n); +ZZX operator>>(const ZZX& a, long n); + +ZZX& operator<<=(ZZX& x, long n); +ZZX& operator>>=(ZZX& x, long n); + +// procedural versions: + +void LeftShift(ZZX& x, const ZZX& a, long n); +ZZX LeftShift(const ZZX& a, long n); + +void RightShift(ZZX& x, const ZZX& a, long n); +ZZX RightShift(const ZZX& a, long n); + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + + +// Given polynomials a, b in ZZ[X], there exist polynomials +// q, r in QQ[X] such that a = b*q + r, deg(r) < deg(b). +// These routines return q and/or r if q and/or r lie(s) in ZZ[X], +// and otherwise raise an error. + +// Note that if the leading coefficient of b is 1 or -1, +// then q and r always lie in ZZ[X], and no error can occur. + +// For example, you can write f/2 for a ZZX f. If all coefficients +// of f are even, the result is f with a factor of two removed; +// otherwise, an error is raised. More generally, f/g will be +// evaluate q in ZZ[X] such that f = q*g if such a q exists, +// and will otherwise raise an error. + +// See also below the routines for pseudo-division and division +// predicates for routines that are perhaps more useful in +// some situations. + + +// operator notation: + +ZZX operator/(const ZZX& a, const ZZX& b); +ZZX operator/(const ZZX& a, const ZZ& b); +ZZX operator/(const ZZX& a, long b); + +ZZX operator%(const ZZX& a, const ZZX& b); + +ZZX& operator/=(ZZX& x, const ZZX& b); +ZZX& operator/=(ZZX& x, const ZZ& b); +ZZX& operator/=(ZZX& x, long b); + +ZZX& operator%=(ZZX& x, const ZZX& b); + + +// procedural versions: + +void DivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b); +// computes q, r such that a = b q + r and deg(r) < deg(b). +// requires LeadCoeff(b) is a unit (+1, -1); otherwise, +// an error is raised. + +void div(ZZX& q, const ZZX& a, const ZZX& b); +void div(ZZX& q, const ZZX& a, const ZZ& b); +void div(ZZX& q, const ZZX& a, long b); +// same as DivRem, but only computes q + +void rem(ZZX& r, const ZZX& a, const ZZX& b); +// same as DivRem, but only computes r + + + +// divide predicates: + +long divide(ZZX& q, const ZZX& a, const ZZX& b); +long divide(ZZX& q, const ZZX& a, const ZZ& b); +long divide(ZZX& q, const ZZX& a, long b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + + +long divide(const ZZX& a, const ZZX& b); +long divide(const ZZX& a, const ZZ& b); +long divide(const ZZX& a, long b); +// if b | a, returns 1; otherwise returns 0 + +// These algorithms employ a modular approach, performing the division +// modulo small primes (reconstructing q via the CRT). It is +// usually much faster than the general division routines above +// (especially when b does not divide a). + + +void content(ZZ& d, const ZZX& f); +ZZ content(const ZZX& f); +// d = content of f, sign(d) == sign(LeadCoeff(f)); content(0) == 0 + +void PrimitivePart(ZZX& pp, const ZZX& f); +ZZX PrimitivePart(const ZZX& f); +// pp = primitive part of f, LeadCoeff(pp) >= 0; PrimitivePart(0) == 0 + + + +// pseudo-division: + +void PseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b); +// performs pseudo-division: computes q and r with deg(r) < deg(b), +// and LeadCoeff(b)^(deg(a)-deg(b)+1) a = b q + r. Only the classical +// algorithm is used. + +void PseudoDiv(ZZX& q, const ZZX& a, const ZZX& b); +ZZX PseudoDiv(const ZZX& a, const ZZX& b); +// same as PseudoDivRem, but only computes q + +void PseudoRem(ZZX& r, const ZZX& a, const ZZX& b); +ZZX PseudoRem(const ZZX& a, const ZZX& b); +// same as PseudoDivRem, but only computes r + + +/**************************************************************************\ + + GCD's + +\**************************************************************************/ + + +void GCD(ZZX& d, const ZZX& a, const ZZX& b); +ZZX GCD(const ZZX& a, const ZZX& b); +// d = gcd(a, b), LeadCoeff(d) >= 0. Uses a modular algorithm. + + +void XGCD(ZZ& r, ZZX& s, ZZX& t, const ZZX& a, const ZZX& b, + long deterministic=0); +// r = resultant of a and b; if r != 0, then computes s and t such +// that: a*s + b*t = r; otherwise s and t not affected. if +// !deterministic, then resultant computation may use a randomized +// strategy that errs with probability no more than 2^{-80}. + + + +/**************************************************************************\ + + Input/Output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + + +\**************************************************************************/ + + +istream& operator>>(istream& s, ZZX& x); +ostream& operator<<(ostream& s, const ZZX& a); + + +/**************************************************************************\ + + Some utility routines + +\**************************************************************************/ + + +void diff(ZZX& x, const ZZX& a); // x = derivative of a +ZZX diff(const ZZX& a); + +long MaxBits(const ZZX& f); +// returns max NumBits of coefficients of f + +void reverse(ZZX& x, const ZZX& a, long hi); +ZZX reverse(const ZZX& a, long hi); + +void reverse(ZZX& x, const ZZX& a); +ZZX reverse(const ZZX& a); + +// x = reverse of a[0]..a[hi] (hi >= -1); +// hi defaults to deg(a) in second version + + +void VectorCopy(vec_ZZ& x, const ZZX& a, long n); +vec_ZZ VectorCopy(const ZZX& a, long n); +// x = copy of coefficient vector of a of length exactly n. +// input is truncated or padded with zeroes as appropriate. + + + +/**************************************************************************\ + + Arithmetic mod X^n + +All routines require n >= 0, otherwise an error is raised. + +\**************************************************************************/ + + +void trunc(ZZX& x, const ZZX& a, long m); // x = a % X^m +ZZX trunc(const ZZX& a, long m); + +void MulTrunc(ZZX& x, const ZZX& a, const ZZX& b, long n); +ZZX MulTrunc(const ZZX& a, const ZZX& b, long n); +// x = a * b % X^n + +void SqrTrunc(ZZX& x, const ZZX& a, long n); +ZZX SqrTrunc(const ZZX& a, long n); +// x = a^2 % X^n + +void InvTrunc(ZZX& x, const ZZX& a, long n); +ZZX InvTrunc(const ZZX& a, long n); +// computes x = a^{-1} % X^m. Must have ConstTerm(a) invertible. + + + + +/**************************************************************************\ + + Modular Arithmetic + +The modulus f must be monic with deg(f) > 0, +and other arguments must have smaller degree. + +\**************************************************************************/ + +void MulMod(ZZX& x, const ZZX& a, const ZZX& b, const ZZX& f); +ZZX MulMod(const ZZX& a, const ZZX& b, const ZZX& f); +// x = a * b mod f + +void SqrMod(ZZX& x, const ZZX& a, const ZZX& f); +ZZX SqrMod(const ZZX& a, const ZZX& f); +// x = a^2 mod f + +void MulByXMod(ZZX& x, const ZZX& a, const ZZX& f); +ZZX MulByXMod(const ZZX& a, const ZZX& f); +// x = a*X mod f + + +/**************************************************************************\ + + traces, norms, resultants, discriminants, + minimal and characteristic polynomials + +\**************************************************************************/ + + +void TraceMod(ZZ& res, const ZZX& a, const ZZX& f); +ZZ TraceMod(const ZZX& a, const ZZX& f); +// res = trace of (a mod f). f must be monic, 0 < deg(f), deg(a) < +// deg(f) + +void TraceVec(vec_ZZ& S, const ZZX& f); +vec_ZZ TraceVec(const ZZX& f); +// S[i] = Trace(X^i mod f), for i = 0..deg(f)-1. +// f must be a monic polynomial. + + +// The following routines use a modular approach. + +void resultant(ZZ& res, const ZZX& a, const ZZX& b, long deterministic=0); +ZZ resultant(const ZZX& a, const ZZX& b, long deterministic=0); +// res = resultant of a and b. If !deterministic, then it may use a +// randomized strategy that errs with probability no more than +// 2^{-80}. + + + +void NormMod(ZZ& res, const ZZX& a, const ZZX& f, long deterministic=0); +ZZ NormMod(const ZZX& a, const ZZX& f, long deterministic=0); +// res = norm of (a mod f). f must be monic, 0 < deg(f), deg(a) < +// deg(f). If !deterministic, then it may use a randomized strategy +// that errs with probability no more than 2^{-80}. + + + +void discriminant(ZZ& d, const ZZX& a, long deterministic=0); +ZZ discriminant(const ZZX& a, long deterministic=0); +// d = discriminant of a = (-1)^{m(m-1)/2} resultant(a, a')/lc(a), +// where m = deg(a). If !deterministic, then it may use a randomized +// strategy that errs with probability no more than 2^{-80}. + + +void CharPolyMod(ZZX& g, const ZZX& a, const ZZX& f, long deterministic=0); +ZZX CharPolyMod(const ZZX& a, const ZZX& f, long deterministic=0); +// g = char poly of (a mod f). f must be monic. If !deterministic, +// then it may use a randomized strategy that errs with probability no +// more than 2^{-80}. + + +void MinPolyMod(ZZX& g, const ZZX& a, const ZZX& f); +ZZX MinPolyMod(const ZZX& a, const ZZX& f); +// g = min poly of (a mod f). f must be monic, 0 < deg(f), deg(a) < +// deg(f). May use a probabilistic strategy that errs with +// probability no more than 2^{-80}. + + + + +/**************************************************************************\ + + Incremental Chinese Remaindering + +\**************************************************************************/ + +long CRT(ZZX& a, ZZ& prod, const zz_pX& A); +long CRT(ZZX& a, ZZ& prod, const ZZ_pX& A); +// Incremental Chinese Remaindering: If p is the current zz_p/ZZ_p modulus with +// (p, prod) = 1; Computes a' such that a' = a mod prod and a' = A mod p, +// with coefficients in the interval (-p*prod/2, p*prod/2]; +// Sets a := a', prod := p*prod, and returns 1 if a's value changed. + + + + + +/**************************************************************************\ + + vectors of ZZX's + +\**************************************************************************/ + + +typedef Vec vec_ZZX; // backward compatibility + + +/**************************************************************************\ + + Miscellany + + +\**************************************************************************/ + + +void clear(ZZX& x); // x = 0 +void set(ZZX& x); // x = 1 + +void ZZX::kill(); +// f.kill() sets f to 0 and frees all memory held by f. Equivalent to +// f.rep.kill(). + +ZZX::ZZX(INIT_SIZE_TYPE, long n); +// ZZX(INIT_SIZE, n) initializes to zero, but space is pre-allocated +// for n coefficients + +static const ZZX& zero(); +// ZZX::zero() is a read-only reference to 0 + +void ZZX::swap(ZZX& x); +void swap(ZZX& x, ZZX& y); +// swap (by swapping pointers) + + +ZZX::ZZX(long i, const ZZ& c); +ZZX::ZZX(long i, long c); +// initial value c*X^i, provided for backward compatibility diff --git a/thirdparty/linux/ntl/doc/ZZXFactoring.cpp.html b/thirdparty/linux/ntl/doc/ZZXFactoring.cpp.html new file mode 100644 index 0000000000..abd68d83ee --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZXFactoring.cpp.html @@ -0,0 +1,181 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZXFactoring.cpp.html + + + + +
+/*****************************************************************************\
+
+MODULE: ZZXFactoring
+
+SUMMARY:
+
+Routines are provided for factoring in ZZX.
+
+See IMPLEMENTATION DETAILS below for a discussion of the algorithms used,
+and of the flags available for selecting among these algorithms.
+
+\*****************************************************************************/
+
+#include <NTL/ZZX.h>
+#include <NTL/pair_ZZX_long.h>
+
+void SquareFreeDecomp(vec_pair_ZZX_long& u, const ZZX& f);
+const vector(pair_ZZX_long SquareFreeDecomp(const ZZX& f);
+
+// input is primitive, with positive leading coefficient.  Performs
+// square-free decomposition.  If f = prod_i g_i^i, then u is set to a
+// lest of pairs (g_i, i).  The list is is increasing order of i, with
+// trivial terms (i.e., g_i = 1) deleted.
+
+
+void MultiLift(vec_ZZX& A, const vec_zz_pX& a, const ZZX& f, long e,
+               long verbose=0);
+
+// Using current value p of zz_p::modulus(), this lifts the
+// square-free factorization a mod p of f to a factorization A mod p^e
+// of f.  It is required that f and all the polynomials in a are
+// monic.
+
+
+
+void SFFactor(vec_ZZX& factors, const ZZX& f, long verbose=0, long bnd=0);
+vec_ZZX SFFactor(const ZZX& f, long verbose=0, long bnd=0);
+
+// input f is primitive and square-free, with positive leading
+// coefficient.  bnd, if not zero, indicates that f divides a
+// polynomial h whose Euclidean norm is bounded by 2^{bnd} in absolute
+// value.  This uses the routine SFCanZass in zz_pXFactoring and then
+// performs a MultiLift, followed by a brute-force search for the
+// factors.  
+
+// A number of heuristics are used to speed up the factor-search step.
+// See "implementation details" below.
+
+
+void factor(ZZ& c,
+            vec_pair_ZZX_long& factors,
+            const ZZX& f,
+            long verbose=0,
+            long bnd=0);
+
+// input f is is an arbitrary polynomial.  c is the content of f, and
+// factors is the facrorization of its primitive part.  bnd is as in
+// SFFactor.  The routine calls SquareFreeDecomp and SFFactor.
+
+void mul(ZZX& x, const vec_pair_ZZX_long& a);
+ZZX mul(const vec_pair_ZZX_long& a);
+// multiplies polynomials, with multiplcities.
+
+
+
+
+/*****************************************************************************\
+
+IMPLEMENTATION DETAILS
+
+To factor a polynomial, first its content is extracted, and it is
+made squarefree.  This is typically very fast.
+
+Second, a simple hack is performed: if the polynomial is of the
+form g(x^l), then an attempt is made to factor g(k^m),
+for divisors m of l, which can in some cases greatly simplify
+the factorization task.
+You can turn this "power hack" on/off by setting the following variable
+to 1/0:
+
+   extern long ZZXFac_PowerHack;  // initial value = 1
+
+
+Third, the polynomial is factored modulo several
+small primes, and one small prime p is selected as the "best".
+You can choose the number of small primes that you want to use
+by setting the following variable:
+
+   extern long ZZXFac_InitNumPrimes;  // initial value = 7
+
+Fourth, The factorization mod p is "lifted" to a factorization mod p^k
+for a sufficiently large k.  This is done via quadratic Hensel
+lifting.  Despite "folk wisdom" to the contrary, this is much
+more efficient than linear Hensel lifting, especially since NTL
+has very fast polynomial arithmetic.
+
+After the "lifting phase" comes the "factor recombination phase".
+The factorization mod p^k may be "finer" than the true factorization
+over the integers, hence we have to "combine" subsets of modular factors
+and test if these are factors over the integers.
+
+There are two basic strategies:  the "Zassenhaus" method
+and the "van Hoeij" method.
+
+The van Hoeij method:
+
+The van Hoeij method is fairly new, but it is so much better than
+the older, Zassenhaus method, that it is now the default.
+For a description of the method, go to Mark van Hoeij's home page:
+
+   http://www.openmath.org/~hoeij/
+
+The van Hoeij method is not really a specific algorithm, but a general
+algorithmic approach: many parameters and strategies have to be selected
+to obtain a specific algorithm, and it is a challenge to
+make all of these choices so that the resulting algorithm works
+fairly well on all input polynomials.
+
+Set the following variable to 1 to enable the van Hoeij method,
+and to 0 to revert to the Zassenhaus method:
+
+   extern long ZZXFac_van_Hoeij; // initial value = 1
+
+Note that the "power hack" is still on by default when using van Hoeij's
+method, but we have arranged things so that the "power hack" strategy
+is abandoned if it appears to be too much a waste of time.
+Unlike with the Zassenhaus method, using the "power hack" method with
+van Hoeij can sometimes be a huge waste of time if one is not careful.
+
+
+
+The Zassenhaus method:
+
+The Zassenhaus method is essentially a brute-force search, but with
+a lot of fancy "pruning" techniques, as described in the paper
+[J. Abbott, V. Shoup, P. Zimmermann, "Factoring in Z[x]: the searching phase",
+ISSAC 2000].
+
+These heuristics are fairly effective, and allow one to easily deal
+with up to around 30-40 modular factors, which is *much* more
+than other Zassenhaus-based factorizers can deal with; however, after this,
+the exponential behavior of the algorithm really starts to dominate.
+
+The behaviour of these heuristics can be fine tuned by
+setting the following global variables:
+
+   extern long ZZXFac_MaxNumPrimes;  // initial value = 50
+   // During the factor recombination phase, if not much progress
+   // is being made, occasionally more "local" information is
+   // collected by factoring f modulo another prime.
+   // This "local" information is used to rule out degrees
+   // of potential factors during recombination.
+   // This value bounds the total number of primes modulo which f
+   // is factored.
+
+   extern long ZZXFac_MaxPrune;  // initial value = 10
+   // A kind of "meet in the middle" strategy is used
+   // to prune the search space during recombination.
+   // For many (but not all) polynomials, this can greatly
+   // reduce the running time.
+   // When it does work, there is a time-space tradeoff:
+   // If t = ZZXFac_MaxPrune, the running time will be reduced by a factor near
+   // 2^t, but the table will take (at most) t*2^(t-1) bytes of storage.
+   // Note that ZZXFac_MaxPrune is treated as an upper bound on t---the
+   // factoring algorithm may decide to use a smaller value of t for
+   // a number of reasons.
+
+
+
+\*****************************************************************************/
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZXFactoring.txt b/thirdparty/linux/ntl/doc/ZZXFactoring.txt new file mode 100644 index 0000000000..bff6ed03e6 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZXFactoring.txt @@ -0,0 +1,171 @@ + +/*****************************************************************************\ + +MODULE: ZZXFactoring + +SUMMARY: + +Routines are provided for factoring in ZZX. + +See IMPLEMENTATION DETAILS below for a discussion of the algorithms used, +and of the flags available for selecting among these algorithms. + +\*****************************************************************************/ + +#include +#include + +void SquareFreeDecomp(vec_pair_ZZX_long& u, const ZZX& f); +const vector(pair_ZZX_long SquareFreeDecomp(const ZZX& f); + +// input is primitive, with positive leading coefficient. Performs +// square-free decomposition. If f = prod_i g_i^i, then u is set to a +// lest of pairs (g_i, i). The list is is increasing order of i, with +// trivial terms (i.e., g_i = 1) deleted. + + +void MultiLift(vec_ZZX& A, const vec_zz_pX& a, const ZZX& f, long e, + long verbose=0); + +// Using current value p of zz_p::modulus(), this lifts the +// square-free factorization a mod p of f to a factorization A mod p^e +// of f. It is required that f and all the polynomials in a are +// monic. + + + +void SFFactor(vec_ZZX& factors, const ZZX& f, long verbose=0, long bnd=0); +vec_ZZX SFFactor(const ZZX& f, long verbose=0, long bnd=0); + +// input f is primitive and square-free, with positive leading +// coefficient. bnd, if not zero, indicates that f divides a +// polynomial h whose Euclidean norm is bounded by 2^{bnd} in absolute +// value. This uses the routine SFCanZass in zz_pXFactoring and then +// performs a MultiLift, followed by a brute-force search for the +// factors. + +// A number of heuristics are used to speed up the factor-search step. +// See "implementation details" below. + + +void factor(ZZ& c, + vec_pair_ZZX_long& factors, + const ZZX& f, + long verbose=0, + long bnd=0); + +// input f is is an arbitrary polynomial. c is the content of f, and +// factors is the facrorization of its primitive part. bnd is as in +// SFFactor. The routine calls SquareFreeDecomp and SFFactor. + +void mul(ZZX& x, const vec_pair_ZZX_long& a); +ZZX mul(const vec_pair_ZZX_long& a); +// multiplies polynomials, with multiplcities. + + + + +/*****************************************************************************\ + +IMPLEMENTATION DETAILS + +To factor a polynomial, first its content is extracted, and it is +made squarefree. This is typically very fast. + +Second, a simple hack is performed: if the polynomial is of the +form g(x^l), then an attempt is made to factor g(k^m), +for divisors m of l, which can in some cases greatly simplify +the factorization task. +You can turn this "power hack" on/off by setting the following variable +to 1/0: + + extern long ZZXFac_PowerHack; // initial value = 1 + + +Third, the polynomial is factored modulo several +small primes, and one small prime p is selected as the "best". +You can choose the number of small primes that you want to use +by setting the following variable: + + extern long ZZXFac_InitNumPrimes; // initial value = 7 + +Fourth, The factorization mod p is "lifted" to a factorization mod p^k +for a sufficiently large k. This is done via quadratic Hensel +lifting. Despite "folk wisdom" to the contrary, this is much +more efficient than linear Hensel lifting, especially since NTL +has very fast polynomial arithmetic. + +After the "lifting phase" comes the "factor recombination phase". +The factorization mod p^k may be "finer" than the true factorization +over the integers, hence we have to "combine" subsets of modular factors +and test if these are factors over the integers. + +There are two basic strategies: the "Zassenhaus" method +and the "van Hoeij" method. + +The van Hoeij method: + +The van Hoeij method is fairly new, but it is so much better than +the older, Zassenhaus method, that it is now the default. +For a description of the method, go to Mark van Hoeij's home page: + + http://www.openmath.org/~hoeij/ + +The van Hoeij method is not really a specific algorithm, but a general +algorithmic approach: many parameters and strategies have to be selected +to obtain a specific algorithm, and it is a challenge to +make all of these choices so that the resulting algorithm works +fairly well on all input polynomials. + +Set the following variable to 1 to enable the van Hoeij method, +and to 0 to revert to the Zassenhaus method: + + extern long ZZXFac_van_Hoeij; // initial value = 1 + +Note that the "power hack" is still on by default when using van Hoeij's +method, but we have arranged things so that the "power hack" strategy +is abandoned if it appears to be too much a waste of time. +Unlike with the Zassenhaus method, using the "power hack" method with +van Hoeij can sometimes be a huge waste of time if one is not careful. + + + +The Zassenhaus method: + +The Zassenhaus method is essentially a brute-force search, but with +a lot of fancy "pruning" techniques, as described in the paper +[J. Abbott, V. Shoup, P. Zimmermann, "Factoring in Z[x]: the searching phase", +ISSAC 2000]. + +These heuristics are fairly effective, and allow one to easily deal +with up to around 30-40 modular factors, which is *much* more +than other Zassenhaus-based factorizers can deal with; however, after this, +the exponential behavior of the algorithm really starts to dominate. + +The behaviour of these heuristics can be fine tuned by +setting the following global variables: + + extern long ZZXFac_MaxNumPrimes; // initial value = 50 + // During the factor recombination phase, if not much progress + // is being made, occasionally more "local" information is + // collected by factoring f modulo another prime. + // This "local" information is used to rule out degrees + // of potential factors during recombination. + // This value bounds the total number of primes modulo which f + // is factored. + + extern long ZZXFac_MaxPrune; // initial value = 10 + // A kind of "meet in the middle" strategy is used + // to prune the search space during recombination. + // For many (but not all) polynomials, this can greatly + // reduce the running time. + // When it does work, there is a time-space tradeoff: + // If t = ZZXFac_MaxPrune, the running time will be reduced by a factor near + // 2^t, but the table will take (at most) t*2^(t-1) bytes of storage. + // Note that ZZXFac_MaxPrune is treated as an upper bound on t---the + // factoring algorithm may decide to use a smaller value of t for + // a number of reasons. + + + +\*****************************************************************************/ diff --git a/thirdparty/linux/ntl/doc/ZZ_p.cpp.html b/thirdparty/linux/ntl/doc/ZZ_p.cpp.html new file mode 100644 index 0000000000..082f2a9d7e --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_p.cpp.html @@ -0,0 +1,421 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZ_p.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: ZZ_p
+
+SUMMARY:
+
+The class ZZ_p is used to represent integers mod p.  The modulus p may
+be any positive integer, not necessarily prime.  
+
+Objects of the class ZZ_p are represented as a ZZ in the range 0..p-1.
+
+An executing program maintains a "current modulus", which is set to p using
+ZZ_p::init(p).  The current modulus *must* be initialized before any operations
+on ZZ_p's are performed.  The modulus may be changed, and a mechanism is provided
+for saving and restoring a modulus (see classes ZZ_pPush and ZZ_pContext below).
+
+
+\**************************************************************************/
+
+#include <NTL/ZZ.h>
+#include <NTL/ZZVec.h>
+#include <NTL/SmartPtr.h>
+
+class ZZ_p {
+public:
+  
+   ZZ_p(); // initialize to 0
+
+   ZZ_p(const ZZ_p& a); // copy constructor
+   explicit ZZ_p(long a);  // promotion constructor
+
+   ~ZZ_p(); // destructor
+
+   ZZ_p& operator=(const ZZ_p& a); // assignment
+   ZZ_p& operator=(long a); // assignment
+
+   static void init(const ZZ& p);
+   // ZZ_p::init(p) sets the modulus to p (p > 1)
+  
+   static const ZZ& modulus();
+   // ZZ_p::modulus() yields read-only reference to the current
+   // modulus
+
+   // typedefs to aid in generic programming
+   typedef ZZ rep_type;
+   typedef ZZ_pContext context_type;
+   typedef ZZ_pBak bak_type;
+   typedef ZZ_pPush push_type;
+   typedef ZZ_pX poly_type;
+};
+
+
+/**************************************************************************\
+
+                      Access to representation
+
+\**************************************************************************/
+
+
+const ZZ& rep(const ZZ_p& a);
+// read-only access to representation of a
+
+/****** Example: ********  
+
+   ZZ x;
+   ZZ_p a;
+
+   x = rep(a);
+
+*************************/
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(const ZZ_p& a, const ZZ_p& b);
+long operator!=(const ZZ_p& a, const ZZ_p& b);
+
+// PROMOTIONS: the comparison operators provide promotions
+// from long to ZZ_p on (a, b)
+
+long IsZero(const ZZ_p& a);  // test for 0
+long IsOne(const ZZ_p& a);  // test for 1
+
+
+/**************************************************************************\
+
+                                    Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_p operator+(const ZZ_p& a, const ZZ_p& b);
+ZZ_p operator-(const ZZ_p& a, const ZZ_p& b);
+ZZ_p operator-(const ZZ_p& a); // unary -
+
+ZZ_p& operator+=(ZZ_p& x, const ZZ_p& b);
+ZZ_p& operator+=(ZZ_p& x, long b);
+
+ZZ_p& operator-=(ZZ_p& x, const ZZ_p& b);
+ZZ_p& operator-=(ZZ_p& x, long b);
+
+ZZ_p& operator++(ZZ_p& x);  // prefix
+void operator++(ZZ_p& x, int);  // postfix
+
+ZZ_p& operator--(ZZ_p& x);  // prefix
+void operator--(ZZ_p& x, int);  // postfix
+
+// procedural versions:
+
+
+void add(ZZ_p& x, const ZZ_p& a, const ZZ_p& b); // x = a + b
+void sub(ZZ_p& x, const ZZ_p& a, const ZZ_p& b); // x = a - b
+void negate(ZZ_p& x, const ZZ_p& a); // x = -a
+
+// PROMOTIONS: binary +, - and procedures add, sub provide promotions
+// from long to ZZ_p on (a, b)
+
+
+/**************************************************************************\
+
+                                  Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_p operator*(const ZZ_p& a, const ZZ_p& b);
+
+ZZ_p& operator*=(ZZ_p& x, const ZZ_p& b);
+ZZ_p& operator*=(ZZ_p& x, long b);
+
+// procedural versions:
+
+
+void mul(ZZ_p& x, const ZZ_p& a, const ZZ_p& b); // x = a * b
+
+void sqr(ZZ_p& x, const ZZ_p& a); // x = a^2
+ZZ_p sqr(const ZZ_p& a); // x = a^2
+
+// PROMOTIONS: operator * and procedure mul provide promotions
+// from long to ZZ_p on (a, b)
+
+
+
+/**************************************************************************\
+
+                              Division
+
+\**************************************************************************/
+
+
+// operator notation:
+
+ZZ_p operator/(const ZZ_p& a, const ZZ_p& b);
+
+ZZ_p& operator/=(ZZ_p& x, const ZZ_p& b);
+ZZ_p& operator/=(ZZ_p& x, long b);
+
+
+// procedural versions:
+
+
+void div(ZZ_p& x, const ZZ_p& a, const ZZ_p& b);
+// x = a/b.
+
+// By default, if b is not invertible, an error is raised.  
+// If exceptions are enabled, an InvModErrorObject is thrown
+// (see documentation in the ZZ module); otherwise, the program
+// aborts with an error message.
+// For backward compatibility, one can define an error handler
+// void H(const ZZ_p& b), and setting ZZ_p::DivHandler = H.  Then if b
+// != 0 and b is not invertible, the function H is invoked with b as
+// its argument.  If the error handler function returns to its caller,
+// error handling proceeds as described above.
+
+void inv(ZZ_p& x, const ZZ_p& a); // x = 1/a
+ZZ_p inv(const ZZ_p& a);
+
+// Error handling is the same as above.
+
+// PROMOTIONS: operator / and procedure div provide promotions
+// from long to ZZ_p on (a, b)
+
+
+
+/**************************************************************************\
+
+                            Exponentiation
+
+\**************************************************************************/
+
+
+
+void power(ZZ_p& x, const ZZ_p& a, const ZZ& e); // x = a^e (e may be negative)
+ZZ_p power(const ZZ_p& a, const ZZ& e); // functional variants
+
+void power(ZZ_p& x, const ZZ_p& a, long e);
+ZZ_p power(ZZ_p& x, const ZZ_p& a, long e);
+
+
+
+/**************************************************************************\
+
+                               Random Elements
+
+\**************************************************************************/
+
+
+void random(ZZ_p& x);
+ZZ_p random_ZZ_p();
+// x = random element in ZZ_p.  
+
+
+/**************************************************************************\
+
+                                Input/Output
+
+\**************************************************************************/
+
+
+ostream& operator<<(ostream& s, const ZZ_p& a);
+
+istream& operator>>(istream& s, ZZ_p& x);
+// a ZZ is read and reduced mod p
+
+/**************************************************************************\
+
+                       Modulus Switching
+
+
+A class ZZ_pPush is provided for "backing up" the current modulus
+and installing a new one.
+
+Here is what you do to save the current modulus, temporarily
+set it to p, and automatically restore it:
+
+   {
+      ZZ_pPush push(p);
+
+      ...
+
+   }
+
+The constructor for push will save the current modulus, and install p as the
+current modulus.  The destructor for push will restore the old modulus when the
+scope enclosing it exits.  This is the so-called RAII (resource acquisition is
+initialization) paradigm.
+
+You could also do the following:
+
+   {
+      ZZ_pPush push; // just backup current modulus
+
+        ...
+
+      ZZ_p::init(p1); // install p1
+
+        ...
+
+      ZZ_p::init(p2); // install p2
+
+      // reinstall original modulus at close of scope
+   }
+
+      
+The ZZ_pPush interface is good for implementing simple stack-like
+modulus "context switching".  For more general context switching,
+see ZZ_pContext below.  There is also an older ZZ_pBak class
+that may also be useful.
+
+..........................................................................
+
+It is critical that ZZ_p objects created under one ZZ_p modulus are not used in
+any non-trivial way "out of context", i.e., under a different (or undefined)
+ZZ_p modulus.  However, for ease-of-use, some operations may be safely
+performed out of context.  These safe operations include: the default and copy
+constructor, the destructor, and the assignment operator.  In addition is is
+generally safe to read any ZZ_p object out of context (i.e., printing it out, or
+fetching its underlying representive using the rep() function).
+
+Any unsafe uses out of context are not in general checked, and may
+lead to unpredictable behavior.
+
+
+NOTE: the implementation of Vec<ZZ_p> is specialized to manage memory more
+efficiently than in the default implementation of Vec<T>.  Specifically,
+contiguous elements in a Vec<ZZ_p> are allocated in a contiguous region of
+memory.  This reduces the number of calls to the memory allocator, and --- more
+significantly --- leads to greater locality of reference.  A consequence of
+this implementation is that any calls to SetLength on a Vec<ZZ_p> object will
+need to use information about the current modulus, and so such calls should
+only be done "in context".  That said, it is still safe to construct a
+Vec<ZZ_p> using the default or copy contructor, and to assign or append one
+Vec<ZZ_p> to another "out of context".
+
+\**************************************************************************/
+
+
+
+// A convenient interface for common cases:
+
+class ZZ_pPush {
+
+public:
+ZZ_pPush();  // backup current modulus
+explicit ZZ_pPush(const ZZ& p);
+explicit ZZ_pPush(const ZZ_pContext& context);
+  // backup current modulus and install the given one
+
+private:
+ZZ_pPush(const ZZ_pPush&); // disabled
+void operator=(const ZZ_pPush&); // disabled
+
+};
+
+
+
+// more general context switching:
+// A ZZ_pContext object has a modulus q (possibly "null")
+
+class ZZ_pContext {
+
+
+public:
+
+ZZ_pContext(); // q = "null"
+
+explicit ZZ_pContext(const ZZ& p);  // q = p
+
+void save(); // q = CurrentModulus
+void restore() const; // CurrentModulus = q
+
+ZZ_pContext(const ZZ_pContext&);  // copy
+ZZ_pContext& operator=(const ZZ_pContext&); // assignment
+~ZZ_pContext(); // destructor
+
+
+};
+
+
+// An older interface:
+// To describe this logic, think of a ZZ_pBak object
+// of having two components: a modulus q (possibly "null") and
+// an "auto-restore bit" b.
+
+class ZZ_pBak {
+public:
+
+
+   ZZ_pBak();  // q = "null", b = 0
+
+   ~ZZ_pBak();  // if (b) CurrentModulus = q
+
+   void save();  // q = CurrentModulus, b = 1
+   void restore();  // CurrentModulus = q, b = 0
+
+
+private:
+   ZZ_pBak(const ZZ_pBak&);  // copy disabled
+   void operator=(const ZZ_pBak&);  // assignment disabled
+};
+
+
+
+
+
+
+/**************************************************************************\
+
+                               Miscellany
+
+\**************************************************************************/
+
+void clear(ZZ_p& x); // x = 0
+void set(ZZ_p& x); // x = 1
+
+static long ZZ_p::ModulusSize();
+//  ZZ_p::ModulusSize() returns ZZ_p::modulus().size()
+
+static const ZZ_p& ZZ_p::zero();
+// ZZ_p::zero() yields a read-only reference to zero
+
+void swap(ZZ_p& x, ZZ_p& y);
+// swap x and y (done by "pointer swapping", if possible).
+
+void ZZ_p::swap(ZZ& x);
+// swap member function
+
+
+ZZ_p::ZZ_p(INIT_NO_ALLOC_TYPE);
+// special constructor: invoke as ZZ_p x(INIT_NO_ALLOC);
+// initializes x to 0, but allocates no space (this is now the default)
+
+ZZ_p::ZZ_p(INIT_ALLOC_TYPE);
+// special constructor: invoke as ZZ_p x(INIT_ALLOC);
+// initializes x to 0, but allocates space
+
+
+ZZ_p::allocate();
+// useful in conjunction with the INIT_NO_ALLLOC constructor:
+// x.allocate() will pre-allocate space for x, using the
+// current modulus
+
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZ_p.txt b/thirdparty/linux/ntl/doc/ZZ_p.txt new file mode 100644 index 0000000000..91a9bca2cc --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_p.txt @@ -0,0 +1,411 @@ + + +/**************************************************************************\ + +MODULE: ZZ_p + +SUMMARY: + +The class ZZ_p is used to represent integers mod p. The modulus p may +be any positive integer, not necessarily prime. + +Objects of the class ZZ_p are represented as a ZZ in the range 0..p-1. + +An executing program maintains a "current modulus", which is set to p using +ZZ_p::init(p). The current modulus *must* be initialized before any operations +on ZZ_p's are performed. The modulus may be changed, and a mechanism is provided +for saving and restoring a modulus (see classes ZZ_pPush and ZZ_pContext below). + + +\**************************************************************************/ + +#include +#include +#include + +class ZZ_p { +public: + + ZZ_p(); // initialize to 0 + + ZZ_p(const ZZ_p& a); // copy constructor + explicit ZZ_p(long a); // promotion constructor + + ~ZZ_p(); // destructor + + ZZ_p& operator=(const ZZ_p& a); // assignment + ZZ_p& operator=(long a); // assignment + + static void init(const ZZ& p); + // ZZ_p::init(p) sets the modulus to p (p > 1) + + static const ZZ& modulus(); + // ZZ_p::modulus() yields read-only reference to the current + // modulus + + // typedefs to aid in generic programming + typedef ZZ rep_type; + typedef ZZ_pContext context_type; + typedef ZZ_pBak bak_type; + typedef ZZ_pPush push_type; + typedef ZZ_pX poly_type; +}; + + +/**************************************************************************\ + + Access to representation + +\**************************************************************************/ + + +const ZZ& rep(const ZZ_p& a); +// read-only access to representation of a + +/****** Example: ******** + + ZZ x; + ZZ_p a; + + x = rep(a); + +*************************/ + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(const ZZ_p& a, const ZZ_p& b); +long operator!=(const ZZ_p& a, const ZZ_p& b); + +// PROMOTIONS: the comparison operators provide promotions +// from long to ZZ_p on (a, b) + +long IsZero(const ZZ_p& a); // test for 0 +long IsOne(const ZZ_p& a); // test for 1 + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +ZZ_p operator+(const ZZ_p& a, const ZZ_p& b); +ZZ_p operator-(const ZZ_p& a, const ZZ_p& b); +ZZ_p operator-(const ZZ_p& a); // unary - + +ZZ_p& operator+=(ZZ_p& x, const ZZ_p& b); +ZZ_p& operator+=(ZZ_p& x, long b); + +ZZ_p& operator-=(ZZ_p& x, const ZZ_p& b); +ZZ_p& operator-=(ZZ_p& x, long b); + +ZZ_p& operator++(ZZ_p& x); // prefix +void operator++(ZZ_p& x, int); // postfix + +ZZ_p& operator--(ZZ_p& x); // prefix +void operator--(ZZ_p& x, int); // postfix + +// procedural versions: + + +void add(ZZ_p& x, const ZZ_p& a, const ZZ_p& b); // x = a + b +void sub(ZZ_p& x, const ZZ_p& a, const ZZ_p& b); // x = a - b +void negate(ZZ_p& x, const ZZ_p& a); // x = -a + +// PROMOTIONS: binary +, - and procedures add, sub provide promotions +// from long to ZZ_p on (a, b) + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +ZZ_p operator*(const ZZ_p& a, const ZZ_p& b); + +ZZ_p& operator*=(ZZ_p& x, const ZZ_p& b); +ZZ_p& operator*=(ZZ_p& x, long b); + +// procedural versions: + + +void mul(ZZ_p& x, const ZZ_p& a, const ZZ_p& b); // x = a * b + +void sqr(ZZ_p& x, const ZZ_p& a); // x = a^2 +ZZ_p sqr(const ZZ_p& a); // x = a^2 + +// PROMOTIONS: operator * and procedure mul provide promotions +// from long to ZZ_p on (a, b) + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + + +// operator notation: + +ZZ_p operator/(const ZZ_p& a, const ZZ_p& b); + +ZZ_p& operator/=(ZZ_p& x, const ZZ_p& b); +ZZ_p& operator/=(ZZ_p& x, long b); + + +// procedural versions: + + +void div(ZZ_p& x, const ZZ_p& a, const ZZ_p& b); +// x = a/b. + +// By default, if b is not invertible, an error is raised. +// If exceptions are enabled, an InvModErrorObject is thrown +// (see documentation in the ZZ module); otherwise, the program +// aborts with an error message. +// For backward compatibility, one can define an error handler +// void H(const ZZ_p& b), and setting ZZ_p::DivHandler = H. Then if b +// != 0 and b is not invertible, the function H is invoked with b as +// its argument. If the error handler function returns to its caller, +// error handling proceeds as described above. + +void inv(ZZ_p& x, const ZZ_p& a); // x = 1/a +ZZ_p inv(const ZZ_p& a); + +// Error handling is the same as above. + +// PROMOTIONS: operator / and procedure div provide promotions +// from long to ZZ_p on (a, b) + + + +/**************************************************************************\ + + Exponentiation + +\**************************************************************************/ + + + +void power(ZZ_p& x, const ZZ_p& a, const ZZ& e); // x = a^e (e may be negative) +ZZ_p power(const ZZ_p& a, const ZZ& e); // functional variants + +void power(ZZ_p& x, const ZZ_p& a, long e); +ZZ_p power(ZZ_p& x, const ZZ_p& a, long e); + + + +/**************************************************************************\ + + Random Elements + +\**************************************************************************/ + + +void random(ZZ_p& x); +ZZ_p random_ZZ_p(); +// x = random element in ZZ_p. + + +/**************************************************************************\ + + Input/Output + +\**************************************************************************/ + + +ostream& operator<<(ostream& s, const ZZ_p& a); + +istream& operator>>(istream& s, ZZ_p& x); +// a ZZ is read and reduced mod p + +/**************************************************************************\ + + Modulus Switching + + +A class ZZ_pPush is provided for "backing up" the current modulus +and installing a new one. + +Here is what you do to save the current modulus, temporarily +set it to p, and automatically restore it: + + { + ZZ_pPush push(p); + + ... + + } + +The constructor for push will save the current modulus, and install p as the +current modulus. The destructor for push will restore the old modulus when the +scope enclosing it exits. This is the so-called RAII (resource acquisition is +initialization) paradigm. + +You could also do the following: + + { + ZZ_pPush push; // just backup current modulus + + ... + + ZZ_p::init(p1); // install p1 + + ... + + ZZ_p::init(p2); // install p2 + + // reinstall original modulus at close of scope + } + + +The ZZ_pPush interface is good for implementing simple stack-like +modulus "context switching". For more general context switching, +see ZZ_pContext below. There is also an older ZZ_pBak class +that may also be useful. + +.......................................................................... + +It is critical that ZZ_p objects created under one ZZ_p modulus are not used in +any non-trivial way "out of context", i.e., under a different (or undefined) +ZZ_p modulus. However, for ease-of-use, some operations may be safely +performed out of context. These safe operations include: the default and copy +constructor, the destructor, and the assignment operator. In addition is is +generally safe to read any ZZ_p object out of context (i.e., printing it out, or +fetching its underlying representive using the rep() function). + +Any unsafe uses out of context are not in general checked, and may +lead to unpredictable behavior. + + +NOTE: the implementation of Vec is specialized to manage memory more +efficiently than in the default implementation of Vec. Specifically, +contiguous elements in a Vec are allocated in a contiguous region of +memory. This reduces the number of calls to the memory allocator, and --- more +significantly --- leads to greater locality of reference. A consequence of +this implementation is that any calls to SetLength on a Vec object will +need to use information about the current modulus, and so such calls should +only be done "in context". That said, it is still safe to construct a +Vec using the default or copy contructor, and to assign or append one +Vec to another "out of context". + +\**************************************************************************/ + + + +// A convenient interface for common cases: + +class ZZ_pPush { + +public: +ZZ_pPush(); // backup current modulus +explicit ZZ_pPush(const ZZ& p); +explicit ZZ_pPush(const ZZ_pContext& context); + // backup current modulus and install the given one + +private: +ZZ_pPush(const ZZ_pPush&); // disabled +void operator=(const ZZ_pPush&); // disabled + +}; + + + +// more general context switching: +// A ZZ_pContext object has a modulus q (possibly "null") + +class ZZ_pContext { + + +public: + +ZZ_pContext(); // q = "null" + +explicit ZZ_pContext(const ZZ& p); // q = p + +void save(); // q = CurrentModulus +void restore() const; // CurrentModulus = q + +ZZ_pContext(const ZZ_pContext&); // copy +ZZ_pContext& operator=(const ZZ_pContext&); // assignment +~ZZ_pContext(); // destructor + + +}; + + +// An older interface: +// To describe this logic, think of a ZZ_pBak object +// of having two components: a modulus q (possibly "null") and +// an "auto-restore bit" b. + +class ZZ_pBak { +public: + + + ZZ_pBak(); // q = "null", b = 0 + + ~ZZ_pBak(); // if (b) CurrentModulus = q + + void save(); // q = CurrentModulus, b = 1 + void restore(); // CurrentModulus = q, b = 0 + + +private: + ZZ_pBak(const ZZ_pBak&); // copy disabled + void operator=(const ZZ_pBak&); // assignment disabled +}; + + + + + + +/**************************************************************************\ + + Miscellany + +\**************************************************************************/ + +void clear(ZZ_p& x); // x = 0 +void set(ZZ_p& x); // x = 1 + +static long ZZ_p::ModulusSize(); +// ZZ_p::ModulusSize() returns ZZ_p::modulus().size() + +static const ZZ_p& ZZ_p::zero(); +// ZZ_p::zero() yields a read-only reference to zero + +void swap(ZZ_p& x, ZZ_p& y); +// swap x and y (done by "pointer swapping", if possible). + +void ZZ_p::swap(ZZ& x); +// swap member function + + +ZZ_p::ZZ_p(INIT_NO_ALLOC_TYPE); +// special constructor: invoke as ZZ_p x(INIT_NO_ALLOC); +// initializes x to 0, but allocates no space (this is now the default) + +ZZ_p::ZZ_p(INIT_ALLOC_TYPE); +// special constructor: invoke as ZZ_p x(INIT_ALLOC); +// initializes x to 0, but allocates space + + +ZZ_p::allocate(); +// useful in conjunction with the INIT_NO_ALLLOC constructor: +// x.allocate() will pre-allocate space for x, using the +// current modulus + diff --git a/thirdparty/linux/ntl/doc/ZZ_pE.cpp.html b/thirdparty/linux/ntl/doc/ZZ_pE.cpp.html new file mode 100644 index 0000000000..542d3efe6e --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pE.cpp.html @@ -0,0 +1,402 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZ_pE.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: ZZ_pE
+
+SUMMARY:
+
+The class ZZ_pE is used to represent polynomials in Z_p[X] modulo a
+polynomial P.  The modulus P may be any polynomial with deg(P) > 0,
+not necessarily irreducible.  The modulus p defining Z_p need
+not be prime either.
+
+Objects of the class ZZ_pE are represented as a ZZ_pX of degree < deg(P).
+
+An executing program maintains a "current modulus", which is set to P
+using ZZ_pE::init(P).  The current modulus for ZZ_pE (as well as for ZZ_p)
+*must* be initialized before an operations on ZZ_pE's are performed.
+
+The modulus may be changed, and a mechanism is provided for saving and
+restoring a modulus (see classes ZZ_pEPush and ZZ_pEContext below).
+
+
+\**************************************************************************/
+
+#include <NTL/ZZ_pX.h>
+
+class ZZ_pE {
+public:
+  
+   ZZ_pE(); // initial value 0
+
+   ZZ_pE(const ZZ_pE& a); // copy constructor
+   explicit ZZ_pE(const ZZ_p& a); // promotion
+   explicit ZZ_pE(long a); // promotion
+  
+   ZZ_pE& operator=(const ZZ_pE& a); // assignment
+   ZZ_pE& operator=(const ZZ_p& a); // assignment
+   ZZ_pE& operator=(long a); // assignment
+  
+   ~ZZ_pE(); // destructor
+
+   void init(const ZZ_pX& P);
+   // ZZ_pE::init(P) initializes the current modulus to P;
+   // required: deg(P) >= 1.
+  
+   static const ZZ_pXModulus& modulus();
+   // ZZ_pE::modulus() yields read-only reference to the current modulus
+
+   static long degree();
+   // ZZ_pE::degree() returns deg(P)
+
+
+   // typedefs to aid generic programming
+   typedef ZZ_pX rep_type;
+   typedef ZZ_pEContext context_type;
+   typedef ZZ_pEBak bak_type;
+   typedef ZZ_pEPush push_type;
+   typedef ZZ_pEX poly_type;
+
+};
+
+
+const ZZ_pX& rep(const ZZ_pE& a); // read-only access to representation of a
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+long operator==(const ZZ_pE& a, const ZZ_pE& b);
+long operator!=(const ZZ_pE& a, const ZZ_pE& b);
+
+long IsZero(const ZZ_pE& a);  // test for 0
+long IsOne(const ZZ_pE& a);  // test for 1
+
+// PROMOTIONS: ==, != promote {long, ZZ_p} to ZZ_pE on (a, b).
+
+
+/**************************************************************************\
+
+                                    Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_pE operator+(const ZZ_pE& a, const ZZ_pE& b);
+
+ZZ_pE operator-(const ZZ_pE& a, const ZZ_pE& b);
+ZZ_pE operator-(const ZZ_pE& a);
+
+ZZ_pE& operator+=(ZZ_pE& x, const ZZ_pE& a);
+ZZ_pE& operator+=(ZZ_pE& x, const ZZ_p& a);
+ZZ_pE& operator+=(ZZ_pE& x, long a);
+
+ZZ_pE& operator++(ZZ_pE& x); // prefix
+void operator++(ZZ_pE& x, int); // postfix
+
+ZZ_pE& operator-=(ZZ_pE& x, const ZZ_pE& a);
+ZZ_pE& operator-=(ZZ_pE& x, const ZZ_p& a);
+ZZ_pE& operator-=(ZZ_pE& x, long a);
+
+ZZ_pE& operator--(ZZ_pE& x); // prefix
+void operator--(ZZ_pE& x, int); // postfix
+
+// procedural versions:
+
+void add(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b); // x = a + b
+void sub(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b); // x = a - b
+void negate(ZZ_pE& x, const ZZ_pE& a); // x = - a
+
+// PROMOTIONS: +, -, add, sub promote {long, ZZ_p} to ZZ_pE on (a, b).
+
+
+/**************************************************************************\
+
+                                  Multiplication
+
+\**************************************************************************/
+
+
+// operator notation:
+
+ZZ_pE operator*(const ZZ_pE& a, const ZZ_pE& b);
+
+ZZ_pE& operator*=(ZZ_pE& x, const ZZ_pE& a);
+ZZ_pE& operator*=(ZZ_pE& x, const ZZ_p& a);
+ZZ_pE& operator*=(ZZ_pE& x, long a);
+
+// procedural versions:
+
+
+void mul(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b); // x = a * b
+
+void sqr(ZZ_pE& x, const ZZ_pE& a); // x = a^2
+ZZ_pE sqr(const ZZ_pE& a);
+
+// PROMOTIONS: *, mul promote {long, ZZ_p} to ZZ_pE on (a, b).
+
+
+
+/**************************************************************************\
+
+                                     Division
+
+\**************************************************************************/
+
+
+// operator notation:
+
+ZZ_pE operator/(const ZZ_pE& a, const ZZ_pE& b);
+
+ZZ_pE& operator/=(ZZ_pE& x, const ZZ_pE& a);
+ZZ_pE& operator/=(ZZ_pE& x, const ZZ_p& a);
+ZZ_pE& operator/=(ZZ_pE& x, long a);
+
+
+// procedural versions:
+
+void div(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b);
+// x = a/b.  If b is not invertible, an error is raised.
+
+void inv(ZZ_pE& x, const ZZ_pE& a);
+ZZ_pE inv(const ZZ_pE& a);
+// x = 1/a
+
+PROMOTIONS: /, div promote {long, ZZ_p} to ZZ_pE on (a, b).
+
+
+/**************************************************************************\
+
+                                  Exponentiation
+
+\**************************************************************************/
+
+
+
+void power(ZZ_pE& x, const ZZ_pE& a, const ZZ& e);
+ZZ_pE power(const ZZ_pE& a, const ZZ& e);
+
+void power(ZZ_pE& x, const ZZ_pE& a, long e);
+ZZ_pE power(const ZZ_pE& a, long e);
+
+// x = a^e (e may be negative)
+
+
+
+/**************************************************************************\
+
+                               Random Elements
+
+\**************************************************************************/
+
+
+void random(ZZ_pE& x);
+ZZ_pE random_ZZ_pE();
+// x = random element in ZZ_pE.
+
+/**************************************************************************\
+
+                               Norms and Traces
+
+\**************************************************************************/
+
+
+
+void trace(ZZ_p& x, const ZZ_pE& a);  // x = trace of a
+ZZ_p trace(const ZZ_pE& a);
+
+void norm(ZZ_p& x, const ZZ_pE& a);   // x = norm of a
+ZZ_p norm(const ZZ_pE& a);
+
+
+
+/**************************************************************************\
+
+                                Input/Output
+
+\**************************************************************************/
+
+
+ostream& operator<<(ostream& s, const ZZ_pE& a);
+
+istream& operator>>(istream& s, ZZ_pE& x);
+// a ZZ_pX is read and reduced mod p
+
+
+/**************************************************************************\
+
+                       Modulus Switching
+
+A class ZZ_pEPush is provided for "backing up" the current modulus
+and installing a new one.
+
+Here is what you do to save the current modulus, temporarily
+set it to P, and automatically restore it:
+
+   {
+      ZZ_pEPush push(P);
+
+      ...
+
+   }
+
+The constructor for push will save the current modulus, and install P as the
+current modulus.  The destructor for push will restore the old modulus when the
+scope enclosing it exits.  This is the so-called RAII (resource acquisition is
+initialization) paradigm.
+
+You could also do the following:
+
+   {
+      ZZ_pEPush push; // just backup current modulus
+
+        ...
+
+      ZZ_pE::init(P1); // install P1
+
+        ...
+
+      ZZ_pE::init(P2); // install P2
+
+      // reinstall original modulus as close of scope
+   }
+
+      
+The ZZ_pEPush interface is good for implementing simple stack-like
+modulus "context switching".  For more general context switching,
+see ZZ_pEContext below.  There is also an older ZZ_pEBak class
+that may also be useful.
+
+..........................................................................
+
+It is critical that ZZ_pE objects created under one ZZ_pE modulus are not used in
+any non-trivial way "out of context", i.e., under a different (or undefined)
+ZZ_pE modulus.  However, for ease-of-use, some operations may be safely
+performed out of context.  These safe operations include: the default and copy
+constructor, the destructor, and the assignment operator.  In addition is is
+generally safe to read any ZZ_pE object out of context (i.e., printing it out, or
+fetching its underlying representive using the rep() function).
+
+Any unsafe uses out of context are not in general checked, and may
+lead to unpredictable behavior.
+
+
+\**************************************************************************/
+
+
+// A convenient interface for common cases
+
+class ZZ_pEPush {
+
+public:
+ZZ_pEPush();  // backup current modulus
+explicit ZZ_pEPush(const ZZ_pX& P);
+explicit ZZ_pEPush(const ZZ_pEContext& context);
+  // backup current modulus and install the given one
+
+private:
+ZZ_pEPush(const ZZ_pEPush&); // disabled
+void operator=(const ZZ_pEPush&); // disabled
+
+};
+
+
+
+// more general context switching:
+// A ZZ_pEContext object has a modulus Q (possibly "null"),
+
+class ZZ_pEContext {
+
+
+public:
+
+ZZ_pEContext(); // Q = "null"
+explicit ZZ_pEContext(const ZZ_pX& P); // Q = P
+
+void save(); // Q = CurrentModulus
+void restore() const; // CurrentModulus = Q
+
+ZZ_pEContext(const ZZ_pEContext&);  // copy
+ZZ_pEContext& operator=(const ZZ_pEContext&); // assignment
+~ZZ_pEContext(); // destructor
+
+
+};
+
+
+// An older interface:
+// To describe this logic, think of a ZZ_pEBak object
+// of having two components: a modulus Q (possibly "null") and
+// an "auto-restore bit" b.
+
+
+class ZZ_pEBak {
+public:
+
+
+   ZZ_pEBak();  // Q = "null", b = 0
+
+   ~ZZ_pEBak();  // if (b) CurrentModulus = Q
+
+   void save();  // Q = CurrentModulus, b = 1
+   void restore();  // CurrentModulus = Q, b = 0
+
+
+private:
+   ZZ_pEBak(const ZZ_pEBak&);  // copy disabled
+   void operator=(const ZZ_pEBak&);  // assignment disabled
+};
+
+
+
+
+
+
+/**************************************************************************\
+
+                               Miscellany
+
+\**************************************************************************/
+
+void clear(ZZ_pE& x); // x = 0
+void set(ZZ_pE& x); // x = 1
+
+static const ZZ_pE& ZZ_pE::zero();
+// ZZ_pE::zero() yields a read-only reference to zero
+
+void ZZ_pE::swap(ZZ_pE& x);
+void swap(ZZ_pE& x, ZZ_pE& y);
+// swap (done by "pointer swapping", if possible).
+
+static ZZ& ZZ_pE::cardinality();
+// yields the cardinality, i.e., p^{ZZ_pE::degree()}
+
+ZZ_pE::ZZ_pE(INIT_NO_ALLOC_TYPE);
+// special constructor: invoke as ZZ_pE x(INIT_NO_ALLOC);
+// initializes x to 0, but allocates no space (this is now the default)
+
+ZZ_pE::ZZ_pE(INIT_ALLOC_TYPE);
+// special constructor: invoke as ZZ_pE x(INIT_ALLOC);
+// initializes x to 0, but allocates space
+
+ZZ_pE::allocate();
+// useful in conjunction with the INIT_NO_ALLLOC constructor:
+// x.allocate() will pre-allocate space for x, using the
+// current modulus
+
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZ_pE.txt b/thirdparty/linux/ntl/doc/ZZ_pE.txt new file mode 100644 index 0000000000..f4387d57a1 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pE.txt @@ -0,0 +1,392 @@ + + +/**************************************************************************\ + +MODULE: ZZ_pE + +SUMMARY: + +The class ZZ_pE is used to represent polynomials in Z_p[X] modulo a +polynomial P. The modulus P may be any polynomial with deg(P) > 0, +not necessarily irreducible. The modulus p defining Z_p need +not be prime either. + +Objects of the class ZZ_pE are represented as a ZZ_pX of degree < deg(P). + +An executing program maintains a "current modulus", which is set to P +using ZZ_pE::init(P). The current modulus for ZZ_pE (as well as for ZZ_p) +*must* be initialized before an operations on ZZ_pE's are performed. + +The modulus may be changed, and a mechanism is provided for saving and +restoring a modulus (see classes ZZ_pEPush and ZZ_pEContext below). + + +\**************************************************************************/ + +#include + +class ZZ_pE { +public: + + ZZ_pE(); // initial value 0 + + ZZ_pE(const ZZ_pE& a); // copy constructor + explicit ZZ_pE(const ZZ_p& a); // promotion + explicit ZZ_pE(long a); // promotion + + ZZ_pE& operator=(const ZZ_pE& a); // assignment + ZZ_pE& operator=(const ZZ_p& a); // assignment + ZZ_pE& operator=(long a); // assignment + + ~ZZ_pE(); // destructor + + void init(const ZZ_pX& P); + // ZZ_pE::init(P) initializes the current modulus to P; + // required: deg(P) >= 1. + + static const ZZ_pXModulus& modulus(); + // ZZ_pE::modulus() yields read-only reference to the current modulus + + static long degree(); + // ZZ_pE::degree() returns deg(P) + + + // typedefs to aid generic programming + typedef ZZ_pX rep_type; + typedef ZZ_pEContext context_type; + typedef ZZ_pEBak bak_type; + typedef ZZ_pEPush push_type; + typedef ZZ_pEX poly_type; + +}; + + +const ZZ_pX& rep(const ZZ_pE& a); // read-only access to representation of a + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + +long operator==(const ZZ_pE& a, const ZZ_pE& b); +long operator!=(const ZZ_pE& a, const ZZ_pE& b); + +long IsZero(const ZZ_pE& a); // test for 0 +long IsOne(const ZZ_pE& a); // test for 1 + +// PROMOTIONS: ==, != promote {long, ZZ_p} to ZZ_pE on (a, b). + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +ZZ_pE operator+(const ZZ_pE& a, const ZZ_pE& b); + +ZZ_pE operator-(const ZZ_pE& a, const ZZ_pE& b); +ZZ_pE operator-(const ZZ_pE& a); + +ZZ_pE& operator+=(ZZ_pE& x, const ZZ_pE& a); +ZZ_pE& operator+=(ZZ_pE& x, const ZZ_p& a); +ZZ_pE& operator+=(ZZ_pE& x, long a); + +ZZ_pE& operator++(ZZ_pE& x); // prefix +void operator++(ZZ_pE& x, int); // postfix + +ZZ_pE& operator-=(ZZ_pE& x, const ZZ_pE& a); +ZZ_pE& operator-=(ZZ_pE& x, const ZZ_p& a); +ZZ_pE& operator-=(ZZ_pE& x, long a); + +ZZ_pE& operator--(ZZ_pE& x); // prefix +void operator--(ZZ_pE& x, int); // postfix + +// procedural versions: + +void add(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b); // x = a + b +void sub(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b); // x = a - b +void negate(ZZ_pE& x, const ZZ_pE& a); // x = - a + +// PROMOTIONS: +, -, add, sub promote {long, ZZ_p} to ZZ_pE on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + + +// operator notation: + +ZZ_pE operator*(const ZZ_pE& a, const ZZ_pE& b); + +ZZ_pE& operator*=(ZZ_pE& x, const ZZ_pE& a); +ZZ_pE& operator*=(ZZ_pE& x, const ZZ_p& a); +ZZ_pE& operator*=(ZZ_pE& x, long a); + +// procedural versions: + + +void mul(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b); // x = a * b + +void sqr(ZZ_pE& x, const ZZ_pE& a); // x = a^2 +ZZ_pE sqr(const ZZ_pE& a); + +// PROMOTIONS: *, mul promote {long, ZZ_p} to ZZ_pE on (a, b). + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + + +// operator notation: + +ZZ_pE operator/(const ZZ_pE& a, const ZZ_pE& b); + +ZZ_pE& operator/=(ZZ_pE& x, const ZZ_pE& a); +ZZ_pE& operator/=(ZZ_pE& x, const ZZ_p& a); +ZZ_pE& operator/=(ZZ_pE& x, long a); + + +// procedural versions: + +void div(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b); +// x = a/b. If b is not invertible, an error is raised. + +void inv(ZZ_pE& x, const ZZ_pE& a); +ZZ_pE inv(const ZZ_pE& a); +// x = 1/a + +PROMOTIONS: /, div promote {long, ZZ_p} to ZZ_pE on (a, b). + + +/**************************************************************************\ + + Exponentiation + +\**************************************************************************/ + + + +void power(ZZ_pE& x, const ZZ_pE& a, const ZZ& e); +ZZ_pE power(const ZZ_pE& a, const ZZ& e); + +void power(ZZ_pE& x, const ZZ_pE& a, long e); +ZZ_pE power(const ZZ_pE& a, long e); + +// x = a^e (e may be negative) + + + +/**************************************************************************\ + + Random Elements + +\**************************************************************************/ + + +void random(ZZ_pE& x); +ZZ_pE random_ZZ_pE(); +// x = random element in ZZ_pE. + +/**************************************************************************\ + + Norms and Traces + +\**************************************************************************/ + + + +void trace(ZZ_p& x, const ZZ_pE& a); // x = trace of a +ZZ_p trace(const ZZ_pE& a); + +void norm(ZZ_p& x, const ZZ_pE& a); // x = norm of a +ZZ_p norm(const ZZ_pE& a); + + + +/**************************************************************************\ + + Input/Output + +\**************************************************************************/ + + +ostream& operator<<(ostream& s, const ZZ_pE& a); + +istream& operator>>(istream& s, ZZ_pE& x); +// a ZZ_pX is read and reduced mod p + + +/**************************************************************************\ + + Modulus Switching + +A class ZZ_pEPush is provided for "backing up" the current modulus +and installing a new one. + +Here is what you do to save the current modulus, temporarily +set it to P, and automatically restore it: + + { + ZZ_pEPush push(P); + + ... + + } + +The constructor for push will save the current modulus, and install P as the +current modulus. The destructor for push will restore the old modulus when the +scope enclosing it exits. This is the so-called RAII (resource acquisition is +initialization) paradigm. + +You could also do the following: + + { + ZZ_pEPush push; // just backup current modulus + + ... + + ZZ_pE::init(P1); // install P1 + + ... + + ZZ_pE::init(P2); // install P2 + + // reinstall original modulus as close of scope + } + + +The ZZ_pEPush interface is good for implementing simple stack-like +modulus "context switching". For more general context switching, +see ZZ_pEContext below. There is also an older ZZ_pEBak class +that may also be useful. + +.......................................................................... + +It is critical that ZZ_pE objects created under one ZZ_pE modulus are not used in +any non-trivial way "out of context", i.e., under a different (or undefined) +ZZ_pE modulus. However, for ease-of-use, some operations may be safely +performed out of context. These safe operations include: the default and copy +constructor, the destructor, and the assignment operator. In addition is is +generally safe to read any ZZ_pE object out of context (i.e., printing it out, or +fetching its underlying representive using the rep() function). + +Any unsafe uses out of context are not in general checked, and may +lead to unpredictable behavior. + + +\**************************************************************************/ + + +// A convenient interface for common cases + +class ZZ_pEPush { + +public: +ZZ_pEPush(); // backup current modulus +explicit ZZ_pEPush(const ZZ_pX& P); +explicit ZZ_pEPush(const ZZ_pEContext& context); + // backup current modulus and install the given one + +private: +ZZ_pEPush(const ZZ_pEPush&); // disabled +void operator=(const ZZ_pEPush&); // disabled + +}; + + + +// more general context switching: +// A ZZ_pEContext object has a modulus Q (possibly "null"), + +class ZZ_pEContext { + + +public: + +ZZ_pEContext(); // Q = "null" +explicit ZZ_pEContext(const ZZ_pX& P); // Q = P + +void save(); // Q = CurrentModulus +void restore() const; // CurrentModulus = Q + +ZZ_pEContext(const ZZ_pEContext&); // copy +ZZ_pEContext& operator=(const ZZ_pEContext&); // assignment +~ZZ_pEContext(); // destructor + + +}; + + +// An older interface: +// To describe this logic, think of a ZZ_pEBak object +// of having two components: a modulus Q (possibly "null") and +// an "auto-restore bit" b. + + +class ZZ_pEBak { +public: + + + ZZ_pEBak(); // Q = "null", b = 0 + + ~ZZ_pEBak(); // if (b) CurrentModulus = Q + + void save(); // Q = CurrentModulus, b = 1 + void restore(); // CurrentModulus = Q, b = 0 + + +private: + ZZ_pEBak(const ZZ_pEBak&); // copy disabled + void operator=(const ZZ_pEBak&); // assignment disabled +}; + + + + + + +/**************************************************************************\ + + Miscellany + +\**************************************************************************/ + +void clear(ZZ_pE& x); // x = 0 +void set(ZZ_pE& x); // x = 1 + +static const ZZ_pE& ZZ_pE::zero(); +// ZZ_pE::zero() yields a read-only reference to zero + +void ZZ_pE::swap(ZZ_pE& x); +void swap(ZZ_pE& x, ZZ_pE& y); +// swap (done by "pointer swapping", if possible). + +static ZZ& ZZ_pE::cardinality(); +// yields the cardinality, i.e., p^{ZZ_pE::degree()} + +ZZ_pE::ZZ_pE(INIT_NO_ALLOC_TYPE); +// special constructor: invoke as ZZ_pE x(INIT_NO_ALLOC); +// initializes x to 0, but allocates no space (this is now the default) + +ZZ_pE::ZZ_pE(INIT_ALLOC_TYPE); +// special constructor: invoke as ZZ_pE x(INIT_ALLOC); +// initializes x to 0, but allocates space + +ZZ_pE::allocate(); +// useful in conjunction with the INIT_NO_ALLLOC constructor: +// x.allocate() will pre-allocate space for x, using the +// current modulus + diff --git a/thirdparty/linux/ntl/doc/ZZ_pEX.cpp.html b/thirdparty/linux/ntl/doc/ZZ_pEX.cpp.html new file mode 100644 index 0000000000..7d0eddc7b1 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pEX.cpp.html @@ -0,0 +1,900 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZ_pEX.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: ZZ_pEX
+
+SUMMARY:
+
+The class ZZ_pEX represents polynomials over ZZ_pE,
+and so can be used, for example, for arithmentic in GF(p^n)[X].
+However, except where mathematically necessary (e.g., GCD computations),
+ZZ_pE need not be a field.
+
+\**************************************************************************/
+
+#include <NTL/ZZ_pE.h>
+#include <NTL/vec_ZZ_pE.h>
+
+class ZZ_pEX {
+public:
+
+   ZZ_pEX(); // initial value 0
+
+   ZZ_pEX(const ZZ_pEX& a); // copy
+
+   explicit ZZ_pEX(const ZZ_pE& a); // promotion
+   explicit ZZ_pEX(const ZZ_p& a);
+   explicit ZZ_pEX(long a);
+
+   ZZ_pEX& operator=(const ZZ_pEX& a); // assignment
+   ZZ_pEX& operator=(const ZZ_pE& a);
+   ZZ_pEX& operator=(const ZZ_p& a);
+   ZZ_pEX& operator=(long a);
+
+   ~ZZ_pEX(); // destructor
+
+   ZZ_pEX(INIT_MONO_TYPE, long i, const ZZ_pE& c);
+   ZZ_pEX(INIT_MONO_TYPE, long i, const ZZ_p& c);
+   ZZ_pEX(INIT_MONO_TYPE, long i, long c);
+   // initialize to c*X^i, invoke as ZZ_pEX(INIT_MONO, i, c)
+
+   ZZ_pEX(INIT_MONO_TYPE, long i);
+   // initialize to X^i, invoke as ZZ_pEX(INIT_MONO, i)
+
+   // typedefs to aid in generic programming
+   typedef ZZ_pE coeff_type;
+   typedef ZZ_pEXModulus modulus_type;
+
+   // ...
+
+};
+
+
+
+
+/**************************************************************************\
+
+                              Accessing coefficients
+
+The degree of a polynomial f is obtained as deg(f),
+where the zero polynomial, by definition, has degree -1.
+
+A polynomial f is represented as a coefficient vector.
+Coefficients may be accesses in one of two ways.
+
+The safe, high-level method is to call the function
+coeff(f, i) to get the coefficient of X^i in the polynomial f,
+and to call the function SetCoeff(f, i, a) to set the coefficient
+of X^i in f to the scalar a.
+
+One can also access the coefficients more directly via a lower level
+interface.  The coefficient of X^i in f may be accessed using
+subscript notation f[i].  In addition, one may write f.SetLength(n)
+to set the length of the underlying coefficient vector to n,
+and f.SetMaxLength(n) to allocate space for n coefficients,
+without changing the coefficient vector itself.
+
+After setting coefficients using this low-level interface,
+one must ensure that leading zeros in the coefficient vector
+are stripped afterwards by calling the function f.normalize().
+
+NOTE: the coefficient vector of f may also be accessed directly
+as f.rep; however, this is not recommended. Also, for a properly
+normalized polynomial f, we have f.rep.length() == deg(f)+1,
+and deg(f) >= 0  =>  f.rep[deg(f)] != 0.
+
+\**************************************************************************/
+
+
+
+long deg(const ZZ_pEX& a);  // return deg(a); deg(0) == -1.
+
+const ZZ_pE& coeff(const ZZ_pEX& a, long i);
+// returns the coefficient of X^i, or zero if i not in range
+
+const ZZ_pE& LeadCoeff(const ZZ_pEX& a);
+// returns leading term of a, or zero if a == 0
+
+const ZZ_pE& ConstTerm(const ZZ_pEX& a);
+// returns constant term of a, or zero if a == 0
+
+void SetCoeff(ZZ_pEX& x, long i, const ZZ_pE& a);
+void SetCoeff(ZZ_pEX& x, long i, const ZZ_p& a);
+void SetCoeff(ZZ_pEX& x, long i, long a);
+// makes coefficient of X^i equal to a; error is raised if i < 0
+
+void SetCoeff(ZZ_pEX& x, long i);
+// makes coefficient of X^i equal to 1;  error is raised if i < 0
+
+void SetX(ZZ_pEX& x); // x is set to the monomial X
+
+long IsX(const ZZ_pEX& a); // test if x = X
+
+
+
+
+ZZ_pE& ZZ_pEX::operator[](long i);
+const ZZ_pE& ZZ_pEX::operator[](long i) const;
+// indexing operators: f[i] is the coefficient of X^i ---
+// i should satsify i >= 0 and i <= deg(f).
+// No range checking (unless NTL_RANGE_CHECK is defined).
+
+void ZZ_pEX::SetLength(long n);
+// f.SetLength(n) sets the length of the inderlying coefficient
+// vector to n --- after this call, indexing f[i] for i = 0..n-1
+// is valid.
+
+void ZZ_pEX::normalize();  
+// f.normalize() strips leading zeros from coefficient vector of f
+
+void ZZ_pEX::SetMaxLength(long n);
+// f.SetMaxLength(n) pre-allocate spaces for n coefficients.  The
+// polynomial that f represents is unchanged.
+
+
+
+
+
+
+
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(const ZZ_pEX& a, const ZZ_pEX& b);
+long operator!=(const ZZ_pEX& a, const ZZ_pEX& b);
+
+long IsZero(const ZZ_pEX& a); // test for 0
+long IsOne(const ZZ_pEX& a); // test for 1
+
+// PROMOTIONS: ==, != promote {long,ZZ_p,ZZ_pE} to ZZ_pEX on (a, b).
+
+/**************************************************************************\
+
+                                   Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_pEX operator+(const ZZ_pEX& a, const ZZ_pEX& b);
+ZZ_pEX operator-(const ZZ_pEX& a, const ZZ_pEX& b);
+ZZ_pEX operator-(const ZZ_pEX& a);
+
+ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_pEX& a);
+ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_pE& a);
+ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_p& a);
+ZZ_pEX& operator+=(ZZ_pEX& x, long a);
+
+
+ZZ_pEX& operator++(ZZ_pEX& x);  // prefix
+void operator++(ZZ_pEX& x, int);  // postfix
+
+ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_pEX& a);
+ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_pE& a);
+ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_p& a);
+ZZ_pEX& operator-=(ZZ_pEX& x, long a);
+
+ZZ_pEX& operator--(ZZ_pEX& x);  // prefix
+void operator--(ZZ_pEX& x, int);  // postfix
+
+// procedural versions:
+
+void add(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); // x = a + b
+void sub(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); // x = a - b
+void negate(ZZ_pEX& x, const ZZ_pEX& a); // x = - a
+
+// PROMOTIONS: +, -, add, sub promote {long,ZZ_p,ZZ_pE} to ZZ_pEX on (a, b).
+
+
+
+/**************************************************************************\
+
+                               Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_pEX operator*(const ZZ_pEX& a, const ZZ_pEX& b);
+
+ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_pEX& a);
+ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_pE& a);
+ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_p& a);
+ZZ_pEX& operator*=(ZZ_pEX& x, long a);
+
+
+// procedural versions:
+
+
+void mul(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); // x = a * b
+
+void sqr(ZZ_pEX& x, const ZZ_pEX& a); // x = a^2
+ZZ_pEX sqr(const ZZ_pEX& a);
+
+// PROMOTIONS: *, mul promote {long,ZZ_p,ZZ_pE} to ZZ_pEX on (a, b).
+
+void power(ZZ_pEX& x, const ZZ_pEX& a, long e);  // x = a^e (e >= 0)
+ZZ_pEX power(const ZZ_pEX& a, long e);
+
+
+/**************************************************************************\
+
+                               Shift Operations
+
+LeftShift by n means multiplication by X^n
+RightShift by n means division by X^n
+
+A negative shift amount reverses the direction of the shift.
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_pEX operator<<(const ZZ_pEX& a, long n);
+ZZ_pEX operator>>(const ZZ_pEX& a, long n);
+
+ZZ_pEX& operator<<=(ZZ_pEX& x, long n);
+ZZ_pEX& operator>>=(ZZ_pEX& x, long n);
+
+// procedural versions:
+
+void LeftShift(ZZ_pEX& x, const ZZ_pEX& a, long n);
+ZZ_pEX LeftShift(const ZZ_pEX& a, long n);
+
+void RightShift(ZZ_pEX& x, const ZZ_pEX& a, long n);
+ZZ_pEX RightShift(const ZZ_pEX& a, long n);
+
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pEX& b);
+ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pE& b);
+ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_p& b);
+ZZ_pEX operator/(const ZZ_pEX& a, long b);
+
+ZZ_pEX operator%(const ZZ_pEX& a, const ZZ_pEX& b);
+
+ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pEX& a);
+ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pE& a);
+ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_p& a);
+ZZ_pEX& operator/=(ZZ_pEX& x, long a);
+
+ZZ_pEX& operator%=(ZZ_pEX& x, const ZZ_pEX& a);
+
+// procedural versions:
+
+
+void DivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b);
+// q = a/b, r = a%b
+
+void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b);
+void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pE& b);
+void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_p& b);
+void div(ZZ_pEX& q, const ZZ_pEX& a, long b);
+// q = a/b
+
+void rem(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b);
+// r = a%b
+
+long divide(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+long divide(const ZZ_pEX& a, const ZZ_pEX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+
+/**************************************************************************\
+
+                                   GCD's
+
+These routines are intended for use when ZZ_pE is a field.
+
+\**************************************************************************/
+
+
+void GCD(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b);
+ZZ_pEX GCD(const ZZ_pEX& a, const ZZ_pEX& b);
+// x = GCD(a, b),  x is always monic (or zero if a==b==0).
+
+
+void XGCD(ZZ_pEX& d, ZZ_pEX& s, ZZ_pEX& t, const ZZ_pEX& a, const ZZ_pEX& b);
+// d = gcd(a,b), a s + b t = d
+
+
+/**************************************************************************\
+
+                                  Input/Output
+
+I/O format:
+
+   [a_0 a_1 ... a_n],
+
+represents the polynomial a_0 + a_1*X + ... + a_n*X^n.
+
+On output, all coefficients will be polynomials of degree < ZZ_pE::degree() and
+a_n not zero (the zero polynomial is [ ]).  On input, the coefficients
+are arbitrary polynomials which are reduced modulo ZZ_pE::modulus(),
+and leading zeros stripped.
+
+\**************************************************************************/
+
+istream& operator>>(istream& s, ZZ_pEX& x);
+ostream& operator<<(ostream& s, const ZZ_pEX& a);
+
+
+/**************************************************************************\
+
+                              Some utility routines
+
+\**************************************************************************/
+
+
+void diff(ZZ_pEX& x, const ZZ_pEX& a); // x = derivative of a
+ZZ_pEX diff(const ZZ_pEX& a);
+
+void MakeMonic(ZZ_pEX& x);
+// if x != 0 makes x into its monic associate; LeadCoeff(x) must be
+// invertible in this case
+
+void reverse(ZZ_pEX& x, const ZZ_pEX& a, long hi);
+ZZ_pEX reverse(const ZZ_pEX& a, long hi);
+
+void reverse(ZZ_pEX& x, const ZZ_pEX& a);
+ZZ_pEX reverse(const ZZ_pEX& a);
+
+// x = reverse of a[0]..a[hi] (hi >= -1);
+// hi defaults to deg(a) in second version
+
+void VectorCopy(vec_ZZ_pE& x, const ZZ_pEX& a, long n);
+vec_ZZ_pE VectorCopy(const ZZ_pEX& a, long n);
+// x = copy of coefficient vector of a of length exactly n.
+// input is truncated or padded with zeroes as appropriate.
+
+
+
+
+/**************************************************************************\
+
+                             Random Polynomials
+
+\**************************************************************************/
+
+void random(ZZ_pEX& x, long n);
+ZZ_pEX random_ZZ_pEX(long n);
+// x = random polynomial of degree < n
+
+
+/**************************************************************************\
+
+                    Polynomial Evaluation and related problems
+
+\**************************************************************************/
+
+
+void BuildFromRoots(ZZ_pEX& x, const vec_ZZ_pE& a);
+ZZ_pEX BuildFromRoots(const vec_ZZ_pE& a);
+// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length()
+
+void eval(ZZ_pE& b, const ZZ_pEX& f, const ZZ_pE& a);
+ZZ_pE eval(const ZZ_pEX& f, const ZZ_pE& a);
+// b = f(a)
+
+void eval(ZZ_pE& b, const ZZ_pX& f, const ZZ_pE& a);
+ZZ_pE eval(const ZZ_pEX& f, const ZZ_pE& a);
+// b = f(a); uses ModComp algorithm for ZZ_pX
+
+void eval(vec_ZZ_pE& b, const ZZ_pEX& f, const vec_ZZ_pE& a);
+vec_ZZ_pE eval(const ZZ_pEX& f, const vec_ZZ_pE& a);
+//  b.SetLength(a.length()); b[i] = f(a[i]) for 0 <= i < a.length()
+
+void interpolate(ZZ_pEX& f, const vec_ZZ_pE& a, const vec_ZZ_pE& b);
+ZZ_pEX interpolate(const vec_ZZ_pE& a, const vec_ZZ_pE& b);
+// interpolates the polynomial f satisfying f(a[i]) = b[i].  
+
+/**************************************************************************\
+
+                       Arithmetic mod X^n
+
+Required: n >= 0; otherwise, an error is raised.
+
+\**************************************************************************/
+
+void trunc(ZZ_pEX& x, const ZZ_pEX& a, long n); // x = a % X^n
+ZZ_pEX trunc(const ZZ_pEX& a, long n);
+
+void MulTrunc(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, long n);
+ZZ_pEX MulTrunc(const ZZ_pEX& a, const ZZ_pEX& b, long n);
+// x = a * b % X^n
+
+void SqrTrunc(ZZ_pEX& x, const ZZ_pEX& a, long n);
+ZZ_pEX SqrTrunc(const ZZ_pEX& a, long n);
+// x = a^2 % X^n
+
+void InvTrunc(ZZ_pEX& x, const ZZ_pEX& a, long n);
+ZZ_pEX InvTrunc(ZZ_pEX& x, const ZZ_pEX& a, long n);
+// computes x = a^{-1} % X^m.  Must have ConstTerm(a) invertible.
+
+/**************************************************************************\
+
+                Modular Arithmetic (without pre-conditioning)
+
+Arithmetic mod f.
+
+All inputs and outputs are polynomials of degree less than deg(f), and
+deg(f) > 0.
+
+
+NOTE: if you want to do many computations with a fixed f, use the
+ZZ_pEXModulus data structure and associated routines below for better
+performance.
+
+\**************************************************************************/
+
+void MulMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEX& f);
+ZZ_pEX MulMod(const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEX& f);
+// x = (a * b) % f
+
+void SqrMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f);
+ZZ_pEX SqrMod(const ZZ_pEX& a, const ZZ_pEX& f);
+// x = a^2 % f
+
+void MulByXMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f);
+ZZ_pEX MulByXMod(const ZZ_pEX& a, const ZZ_pEX& f);
+// x = (a * X) mod f
+
+void InvMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f);
+ZZ_pEX InvMod(const ZZ_pEX& a, const ZZ_pEX& f);
+// x = a^{-1} % f, error is a is not invertible
+
+long InvModStatus(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f);
+// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise,
+// returns 1 and sets x = (a, f)
+
+
+/**************************************************************************\
+
+                     Modular Arithmetic with Pre-Conditioning
+
+If you need to do a lot of arithmetic modulo a fixed f, build
+ZZ_pEXModulus F for f.  This pre-computes information about f that
+speeds up subsequent computations.
+
+As an example, the following routine the product modulo f of a vector
+of polynomials.
+
+#include <NTL/ZZ_pEX.h>
+
+void product(ZZ_pEX& x, const vec_ZZ_pEX& v, const ZZ_pEX& f)
+{
+   ZZ_pEXModulus F(f);
+   ZZ_pEX res;
+   res = 1;
+   long i;
+   for (i = 0; i < v.length(); i++)
+      MulMod(res, res, v[i], F);
+   x = res;
+}
+
+NOTE: A ZZ_pEX may be used wherever a ZZ_pEXModulus is required,
+and a ZZ_pEXModulus may be used wherever a ZZ_pEX is required.
+
+
+\**************************************************************************/
+
+class ZZ_pEXModulus {
+public:
+   ZZ_pEXModulus(); // initially in an unusable state
+
+   ZZ_pEXModulus(const ZZ_pEX& f); // initialize with f, deg(f) > 0
+
+   ZZ_pEXModulus(const ZZ_pEXModulus&); // copy
+
+   ZZ_pEXModulus& operator=(const ZZ_pEXModulus&); // assignment
+
+   ~ZZ_pEXModulus(); // destructor
+
+   operator const ZZ_pEX& () const; // implicit read-only access to f
+
+   const ZZ_pEX& val() const; // explicit read-only access to f
+};
+
+void build(ZZ_pEXModulus& F, const ZZ_pEX& f);
+// pre-computes information about f and stores it in F.  Must have
+// deg(f) > 0.  Note that the declaration ZZ_pEXModulus F(f) is
+// equivalent to ZZ_pEXModulus F; build(F, f).
+
+// In the following, f refers to the polynomial f supplied to the
+// build routine, and n = deg(f).
+
+
+long deg(const ZZ_pEXModulus& F);  // return n=deg(f)
+
+void MulMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b,
+            const ZZ_pEXModulus& F);
+ZZ_pEX MulMod(const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEXModulus& F);
+// x = (a * b) % f; deg(a), deg(b) < n
+
+void SqrMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEXModulus& F);
+ZZ_pEX SqrMod(const ZZ_pEX& a, const ZZ_pEXModulus& F);
+// x = a^2 % f; deg(a) < n
+
+void PowerMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ& e, const ZZ_pEXModulus& F);
+ZZ_pEX PowerMod(const ZZ_pEX& a, const ZZ& e, const ZZ_pEXModulus& F);
+
+void PowerMod(ZZ_pEX& x, const ZZ_pEX& a, long e, const ZZ_pEXModulus& F);
+ZZ_pEX PowerMod(const ZZ_pEX& a, long e, const ZZ_pEXModulus& F);
+
+// x = a^e % f; e >= 0, deg(a) < n.  Uses a sliding window algorithm.
+// (e may be negative)
+
+void PowerXMod(ZZ_pEX& x, const ZZ& e, const ZZ_pEXModulus& F);
+ZZ_pEX PowerXMod(const ZZ& e, const ZZ_pEXModulus& F);
+
+void PowerXMod(ZZ_pEX& x, long e, const ZZ_pEXModulus& F);
+ZZ_pEX PowerXMod(long e, const ZZ_pEXModulus& F);
+
+// x = X^e % f (e may be negative)
+
+void rem(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEXModulus& F);
+// x = a % f
+
+void DivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEXModulus& F);
+// q = a/f, r = a%f
+
+void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEXModulus& F);
+// q = a/f
+
+// operator notation:
+
+ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pEXModulus& F);
+ZZ_pEX operator%(const ZZ_pEX& a, const ZZ_pEXModulus& F);
+
+ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pEXModulus& F);
+ZZ_pEX& operator%=(ZZ_pEX& x, const ZZ_pEXModulus& F);
+
+
+
+/**************************************************************************\
+
+                             vectors of ZZ_pEX's
+
+\**************************************************************************/
+
+
+typedef Vec<ZZ_pEX> vec_ZZ_pEX; // backward compatibility
+
+
+
+/**************************************************************************\
+
+                              Modular Composition
+
+Modular composition is the problem of computing g(h) mod f for
+polynomials f, g, and h.
+
+The algorithm employed is that of Brent & Kung (Fast algorithms for
+manipulating formal power series, JACM 25:581-595, 1978), which uses
+O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar
+operations.
+
+
+\**************************************************************************/
+
+void CompMod(ZZ_pEX& x, const ZZ_pEX& g, const ZZ_pEX& h,
+             const ZZ_pEXModulus& F);
+ZZ_pEX CompMod(const ZZ_pEX& g, const ZZ_pEX& h,
+                    const ZZ_pEXModulus& F);
+
+// x = g(h) mod f; deg(h) < n
+
+void Comp2Mod(ZZ_pEX& x1, ZZ_pEX& x2, const ZZ_pEX& g1, const ZZ_pEX& g2,
+              const ZZ_pEX& h, const ZZ_pEXModulus& F);
+// xi = gi(h) mod f (i=1,2); deg(h) < n.
+
+
+void Comp3Mod(ZZ_pEX& x1, ZZ_pEX& x2, ZZ_pEX& x3,
+              const ZZ_pEX& g1, const ZZ_pEX& g2, const ZZ_pEX& g3,
+              const ZZ_pEX& h, const ZZ_pEXModulus& F);
+// xi = gi(h) mod f (i=1..3); deg(h) < n.
+
+
+
+/**************************************************************************\
+
+                     Composition with Pre-Conditioning
+
+If a single h is going to be used with many g's then you should build
+a ZZ_pEXArgument for h, and then use the compose routine below.  The
+routine build computes and stores h, h^2, ..., h^m mod f.  After this
+pre-computation, composing a polynomial of degree roughly n with h
+takes n/m multiplies mod f, plus n^2 scalar multiplies.  Thus,
+increasing m increases the space requirement and the pre-computation
+time, but reduces the composition time.
+
+\**************************************************************************/
+
+
+struct ZZ_pEXArgument {
+   vec_ZZ_pEX H;
+};
+
+void build(ZZ_pEXArgument& H, const ZZ_pEX& h, const ZZ_pEXModulus& F, long m);
+// Pre-Computes information about h.  m > 0, deg(h) < n.
+
+void CompMod(ZZ_pEX& x, const ZZ_pEX& g, const ZZ_pEXArgument& H,
+             const ZZ_pEXModulus& F);
+
+ZZ_pEX CompMod(const ZZ_pEX& g, const ZZ_pEXArgument& H,
+                    const ZZ_pEXModulus& F);
+
+extern long ZZ_pEXArgBound;
+
+// Initially 0.  If this is set to a value greater than zero, then
+// composition routines will allocate a table of no than about
+// ZZ_pEXArgBound KB.  Setting this value affects all compose routines
+// and the power projection and minimal polynomial routines below,
+// and indirectly affects many routines in ZZ_pEXFactoring.
+
+/**************************************************************************\
+
+                     power projection routines
+
+\**************************************************************************/
+
+void project(ZZ_pE& x, const ZZ_pEVector& a, const ZZ_pEX& b);
+ZZ_pE project(const ZZ_pEVector& a, const ZZ_pEX& b);
+// x = inner product of a with coefficient vector of b
+
+
+void ProjectPowers(vec_ZZ_pE& x, const vec_ZZ_pE& a, long k,
+                   const ZZ_pEX& h, const ZZ_pEXModulus& F);
+
+vec_ZZ_pE ProjectPowers(const vec_ZZ_pE& a, long k,
+                   const ZZ_pEX& h, const ZZ_pEXModulus& F);
+
+// Computes the vector
+
+//    project(a, 1), project(a, h), ..., project(a, h^{k-1} % f).  
+
+// This operation is the "transpose" of the modular composition operation.
+
+void ProjectPowers(vec_ZZ_pE& x, const vec_ZZ_pE& a, long k,
+                   const ZZ_pEXArgument& H, const ZZ_pEXModulus& F);
+
+vec_ZZ_pE ProjectPowers(const vec_ZZ_pE& a, long k,
+                   const ZZ_pEXArgument& H, const ZZ_pEXModulus& F);
+
+// same as above, but uses a pre-computed ZZ_pEXArgument
+
+
+class ZZ_pEXTransMultiplier { /* ... */ };
+
+void build(ZZ_pEXTransMultiplier& B, const ZZ_pEX& b, const ZZ_pEXModulus& F);
+
+void UpdateMap(vec_ZZ_pE& x, const vec_ZZ_pE& a,
+               const ZZ_pEXMultiplier& B, const ZZ_pEXModulus& F);
+
+vec_ZZ_pE UpdateMap(const vec_ZZ_pE& a,
+               const ZZ_pEXMultiplier& B, const ZZ_pEXModulus& F);
+
+// Computes the vector
+
+//    project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f)
+
+// Required: a.length() <= deg(F), deg(b) < deg(F).
+// This is "transposed" MulMod by B.
+// Input may have "high order" zeroes stripped.
+// Output always has high order zeroes stripped.
+
+
+/**************************************************************************\
+
+                              Minimum Polynomials
+
+These routines should be used only when ZZ_pE is a field.
+
+All of these routines implement the algorithm from [Shoup, J. Symbolic
+Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397,
+1995], based on transposed modular composition and the
+Berlekamp/Massey algorithm.
+
+\**************************************************************************/
+
+
+void MinPolySeq(ZZ_pEX& h, const vec_ZZ_pE& a, long m);
+ZZ_pEX MinPolySeq(const vec_ZZ_pE& a, long m);
+// computes the minimum polynomial of a linealy generated sequence; m
+// is a bound on the degree of the polynomial; required: a.length() >=
+// 2*m
+
+
+void ProbMinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+ZZ_pEX ProbMinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+
+void ProbMinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F);
+ZZ_pEX ProbMinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+// computes the monic minimal polynomial if (g mod f).  m = a bound on
+// the degree of the minimal polynomial; in the second version, this
+// argument defaults to n.  The algorithm is probabilistic, always
+// returns a divisor of the minimal polynomial, and returns a proper
+// divisor with probability at most m/2^{ZZ_pE::degree()}.
+
+void MinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+ZZ_pEX MinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+
+void MinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F);
+ZZ_pEX MinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+// same as above, but guarantees that result is correct
+
+void IrredPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+ZZ_pEX IrredPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+
+void IrredPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F);
+ZZ_pEX IrredPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+// same as above, but assumes that f is irreducible, or at least that
+// the minimal poly of g is itself irreducible.  The algorithm is
+// deterministic (and is always correct).
+
+/**************************************************************************\
+
+           Composition and Minimal Polynomials in towers
+
+These are implementations of algorithms that will be described
+and analyzed in a forthcoming paper.
+
+The routines require that p is prime, but ZZ_pE need not be a field.
+
+\**************************************************************************/
+
+
+void CompTower(ZZ_pEX& x, const ZZ_pX& g, const ZZ_pEXArgument& h,
+             const ZZ_pEXModulus& F);
+
+ZZ_pEX CompTower(const ZZ_pX& g, const ZZ_pEXArgument& h,
+             const ZZ_pEXModulus& F);
+
+void CompTower(ZZ_pEX& x, const ZZ_pX& g, const ZZ_pEX& h,
+             const ZZ_pEXModulus& F);
+
+ZZ_pEX CompTower(const ZZ_pX& g, const ZZ_pEX& h,
+             const ZZ_pEXModulus& F);
+
+
+// x = g(h) mod f
+
+
+void ProbMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F,
+                      long m);
+
+ZZ_pX ProbMinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+
+void ProbMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+ZZ_pX ProbMinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+// Uses a probabilistic algorithm to compute the minimal
+// polynomial of (g mod f) over ZZ_p.
+// The parameter m is a bound on the degree of the minimal polynomial
+// (default = deg(f)*ZZ_pE::degree()).
+// In general, the result will be a divisor of the true minimimal
+// polynomial.  For correct results, use the MinPoly routines below.
+
+
+
+void MinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+
+ZZ_pX MinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+
+void MinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+ZZ_pX MinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+// Same as above, but result is always correct.
+
+
+void IrredPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+
+ZZ_pX IrredPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m);
+
+void IrredPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+ZZ_pX IrredPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F);
+
+// Same as above, but assumes the minimal polynomial is
+// irreducible, and uses a slightly faster, deterministic algorithm.
+
+
+/**************************************************************************\
+
+                   Traces, norms, resultants
+
+\**************************************************************************/
+
+
+void TraceMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEXModulus& F);
+ZZ_pE TraceMod(const ZZ_pEX& a, const ZZ_pEXModulus& F);
+
+void TraceMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& f);
+ZZ_pE TraceMod(const ZZ_pEX& a, const ZZ_pEXModulus& f);
+// x = Trace(a mod f); deg(a) < deg(f)
+
+
+void TraceVec(vec_ZZ_pE& S, const ZZ_pEX& f);
+vec_ZZ_pE TraceVec(const ZZ_pEX& f);
+// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f)
+
+// The above trace routines implement the asymptotically fast trace
+// algorithm from [von zur Gathen and Shoup, Computational Complexity,
+// 1992].
+
+void NormMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& f);
+ZZ_pE NormMod(const ZZ_pEX& a, const ZZ_pEX& f);
+// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f)
+
+void resultant(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& b);
+ZZ_pE resultant(const ZZ_pEX& a, const ZZ_pEX& b);
+// x = resultant(a, b)
+
+// NormMod and resultant require that ZZ_pE is a field.
+
+
+
+
+/**************************************************************************\
+
+                           Miscellany
+
+
+\**************************************************************************/
+
+
+void clear(ZZ_pEX& x) // x = 0
+void set(ZZ_pEX& x); // x = 1
+
+void ZZ_pEX::kill();
+// f.kill() sets f to 0 and frees all memory held by f.  Equivalent to
+// f.rep.kill().
+
+ZZ_pEX::ZZ_pEX(INIT_SIZE_TYPE, long n);
+// ZZ_pEX(INIT_SIZE, n) initializes to zero, but space is pre-allocated
+// for n coefficients
+
+static const ZZ_pEX& zero();
+// ZZ_pEX::zero() is a read-only reference to 0
+
+void ZZ_pEX::swap(ZZ_pEX& x);
+void swap(ZZ_pEX& x, ZZ_pEX& y);
+// swap (via "pointer swapping")
+
+
+ZZ_pEX::ZZ_pEX(long i, const ZZ_pE& c);
+ZZ_pEX::ZZ_pEX(long i, const ZZ_p& c);
+ZZ_pEX::ZZ_pEX(long i, long c);
+// initialize to c*X^i, provided for backward compatibility
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZ_pEX.txt b/thirdparty/linux/ntl/doc/ZZ_pEX.txt new file mode 100644 index 0000000000..ae62f5a6e6 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pEX.txt @@ -0,0 +1,890 @@ + +/**************************************************************************\ + +MODULE: ZZ_pEX + +SUMMARY: + +The class ZZ_pEX represents polynomials over ZZ_pE, +and so can be used, for example, for arithmentic in GF(p^n)[X]. +However, except where mathematically necessary (e.g., GCD computations), +ZZ_pE need not be a field. + +\**************************************************************************/ + +#include +#include + +class ZZ_pEX { +public: + + ZZ_pEX(); // initial value 0 + + ZZ_pEX(const ZZ_pEX& a); // copy + + explicit ZZ_pEX(const ZZ_pE& a); // promotion + explicit ZZ_pEX(const ZZ_p& a); + explicit ZZ_pEX(long a); + + ZZ_pEX& operator=(const ZZ_pEX& a); // assignment + ZZ_pEX& operator=(const ZZ_pE& a); + ZZ_pEX& operator=(const ZZ_p& a); + ZZ_pEX& operator=(long a); + + ~ZZ_pEX(); // destructor + + ZZ_pEX(INIT_MONO_TYPE, long i, const ZZ_pE& c); + ZZ_pEX(INIT_MONO_TYPE, long i, const ZZ_p& c); + ZZ_pEX(INIT_MONO_TYPE, long i, long c); + // initialize to c*X^i, invoke as ZZ_pEX(INIT_MONO, i, c) + + ZZ_pEX(INIT_MONO_TYPE, long i); + // initialize to X^i, invoke as ZZ_pEX(INIT_MONO, i) + + // typedefs to aid in generic programming + typedef ZZ_pE coeff_type; + typedef ZZ_pEXModulus modulus_type; + + // ... + +}; + + + + +/**************************************************************************\ + + Accessing coefficients + +The degree of a polynomial f is obtained as deg(f), +where the zero polynomial, by definition, has degree -1. + +A polynomial f is represented as a coefficient vector. +Coefficients may be accesses in one of two ways. + +The safe, high-level method is to call the function +coeff(f, i) to get the coefficient of X^i in the polynomial f, +and to call the function SetCoeff(f, i, a) to set the coefficient +of X^i in f to the scalar a. + +One can also access the coefficients more directly via a lower level +interface. The coefficient of X^i in f may be accessed using +subscript notation f[i]. In addition, one may write f.SetLength(n) +to set the length of the underlying coefficient vector to n, +and f.SetMaxLength(n) to allocate space for n coefficients, +without changing the coefficient vector itself. + +After setting coefficients using this low-level interface, +one must ensure that leading zeros in the coefficient vector +are stripped afterwards by calling the function f.normalize(). + +NOTE: the coefficient vector of f may also be accessed directly +as f.rep; however, this is not recommended. Also, for a properly +normalized polynomial f, we have f.rep.length() == deg(f)+1, +and deg(f) >= 0 => f.rep[deg(f)] != 0. + +\**************************************************************************/ + + + +long deg(const ZZ_pEX& a); // return deg(a); deg(0) == -1. + +const ZZ_pE& coeff(const ZZ_pEX& a, long i); +// returns the coefficient of X^i, or zero if i not in range + +const ZZ_pE& LeadCoeff(const ZZ_pEX& a); +// returns leading term of a, or zero if a == 0 + +const ZZ_pE& ConstTerm(const ZZ_pEX& a); +// returns constant term of a, or zero if a == 0 + +void SetCoeff(ZZ_pEX& x, long i, const ZZ_pE& a); +void SetCoeff(ZZ_pEX& x, long i, const ZZ_p& a); +void SetCoeff(ZZ_pEX& x, long i, long a); +// makes coefficient of X^i equal to a; error is raised if i < 0 + +void SetCoeff(ZZ_pEX& x, long i); +// makes coefficient of X^i equal to 1; error is raised if i < 0 + +void SetX(ZZ_pEX& x); // x is set to the monomial X + +long IsX(const ZZ_pEX& a); // test if x = X + + + + +ZZ_pE& ZZ_pEX::operator[](long i); +const ZZ_pE& ZZ_pEX::operator[](long i) const; +// indexing operators: f[i] is the coefficient of X^i --- +// i should satsify i >= 0 and i <= deg(f). +// No range checking (unless NTL_RANGE_CHECK is defined). + +void ZZ_pEX::SetLength(long n); +// f.SetLength(n) sets the length of the inderlying coefficient +// vector to n --- after this call, indexing f[i] for i = 0..n-1 +// is valid. + +void ZZ_pEX::normalize(); +// f.normalize() strips leading zeros from coefficient vector of f + +void ZZ_pEX::SetMaxLength(long n); +// f.SetMaxLength(n) pre-allocate spaces for n coefficients. The +// polynomial that f represents is unchanged. + + + + + + + + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(const ZZ_pEX& a, const ZZ_pEX& b); +long operator!=(const ZZ_pEX& a, const ZZ_pEX& b); + +long IsZero(const ZZ_pEX& a); // test for 0 +long IsOne(const ZZ_pEX& a); // test for 1 + +// PROMOTIONS: ==, != promote {long,ZZ_p,ZZ_pE} to ZZ_pEX on (a, b). + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +ZZ_pEX operator+(const ZZ_pEX& a, const ZZ_pEX& b); +ZZ_pEX operator-(const ZZ_pEX& a, const ZZ_pEX& b); +ZZ_pEX operator-(const ZZ_pEX& a); + +ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_pEX& a); +ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_pE& a); +ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_p& a); +ZZ_pEX& operator+=(ZZ_pEX& x, long a); + + +ZZ_pEX& operator++(ZZ_pEX& x); // prefix +void operator++(ZZ_pEX& x, int); // postfix + +ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_pEX& a); +ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_pE& a); +ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_p& a); +ZZ_pEX& operator-=(ZZ_pEX& x, long a); + +ZZ_pEX& operator--(ZZ_pEX& x); // prefix +void operator--(ZZ_pEX& x, int); // postfix + +// procedural versions: + +void add(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); // x = a + b +void sub(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); // x = a - b +void negate(ZZ_pEX& x, const ZZ_pEX& a); // x = - a + +// PROMOTIONS: +, -, add, sub promote {long,ZZ_p,ZZ_pE} to ZZ_pEX on (a, b). + + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +ZZ_pEX operator*(const ZZ_pEX& a, const ZZ_pEX& b); + +ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_pEX& a); +ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_pE& a); +ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_p& a); +ZZ_pEX& operator*=(ZZ_pEX& x, long a); + + +// procedural versions: + + +void mul(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); // x = a * b + +void sqr(ZZ_pEX& x, const ZZ_pEX& a); // x = a^2 +ZZ_pEX sqr(const ZZ_pEX& a); + +// PROMOTIONS: *, mul promote {long,ZZ_p,ZZ_pE} to ZZ_pEX on (a, b). + +void power(ZZ_pEX& x, const ZZ_pEX& a, long e); // x = a^e (e >= 0) +ZZ_pEX power(const ZZ_pEX& a, long e); + + +/**************************************************************************\ + + Shift Operations + +LeftShift by n means multiplication by X^n +RightShift by n means division by X^n + +A negative shift amount reverses the direction of the shift. + +\**************************************************************************/ + +// operator notation: + +ZZ_pEX operator<<(const ZZ_pEX& a, long n); +ZZ_pEX operator>>(const ZZ_pEX& a, long n); + +ZZ_pEX& operator<<=(ZZ_pEX& x, long n); +ZZ_pEX& operator>>=(ZZ_pEX& x, long n); + +// procedural versions: + +void LeftShift(ZZ_pEX& x, const ZZ_pEX& a, long n); +ZZ_pEX LeftShift(const ZZ_pEX& a, long n); + +void RightShift(ZZ_pEX& x, const ZZ_pEX& a, long n); +ZZ_pEX RightShift(const ZZ_pEX& a, long n); + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + +// operator notation: + +ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pEX& b); +ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pE& b); +ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_p& b); +ZZ_pEX operator/(const ZZ_pEX& a, long b); + +ZZ_pEX operator%(const ZZ_pEX& a, const ZZ_pEX& b); + +ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pEX& a); +ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pE& a); +ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_p& a); +ZZ_pEX& operator/=(ZZ_pEX& x, long a); + +ZZ_pEX& operator%=(ZZ_pEX& x, const ZZ_pEX& a); + +// procedural versions: + + +void DivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b); +// q = a/b, r = a%b + +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b); +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pE& b); +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_p& b); +void div(ZZ_pEX& q, const ZZ_pEX& a, long b); +// q = a/b + +void rem(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b); +// r = a%b + +long divide(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const ZZ_pEX& a, const ZZ_pEX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + + +/**************************************************************************\ + + GCD's + +These routines are intended for use when ZZ_pE is a field. + +\**************************************************************************/ + + +void GCD(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); +ZZ_pEX GCD(const ZZ_pEX& a, const ZZ_pEX& b); +// x = GCD(a, b), x is always monic (or zero if a==b==0). + + +void XGCD(ZZ_pEX& d, ZZ_pEX& s, ZZ_pEX& t, const ZZ_pEX& a, const ZZ_pEX& b); +// d = gcd(a,b), a s + b t = d + + +/**************************************************************************\ + + Input/Output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be polynomials of degree < ZZ_pE::degree() and +a_n not zero (the zero polynomial is [ ]). On input, the coefficients +are arbitrary polynomials which are reduced modulo ZZ_pE::modulus(), +and leading zeros stripped. + +\**************************************************************************/ + +istream& operator>>(istream& s, ZZ_pEX& x); +ostream& operator<<(ostream& s, const ZZ_pEX& a); + + +/**************************************************************************\ + + Some utility routines + +\**************************************************************************/ + + +void diff(ZZ_pEX& x, const ZZ_pEX& a); // x = derivative of a +ZZ_pEX diff(const ZZ_pEX& a); + +void MakeMonic(ZZ_pEX& x); +// if x != 0 makes x into its monic associate; LeadCoeff(x) must be +// invertible in this case + +void reverse(ZZ_pEX& x, const ZZ_pEX& a, long hi); +ZZ_pEX reverse(const ZZ_pEX& a, long hi); + +void reverse(ZZ_pEX& x, const ZZ_pEX& a); +ZZ_pEX reverse(const ZZ_pEX& a); + +// x = reverse of a[0]..a[hi] (hi >= -1); +// hi defaults to deg(a) in second version + +void VectorCopy(vec_ZZ_pE& x, const ZZ_pEX& a, long n); +vec_ZZ_pE VectorCopy(const ZZ_pEX& a, long n); +// x = copy of coefficient vector of a of length exactly n. +// input is truncated or padded with zeroes as appropriate. + + + + +/**************************************************************************\ + + Random Polynomials + +\**************************************************************************/ + +void random(ZZ_pEX& x, long n); +ZZ_pEX random_ZZ_pEX(long n); +// x = random polynomial of degree < n + + +/**************************************************************************\ + + Polynomial Evaluation and related problems + +\**************************************************************************/ + + +void BuildFromRoots(ZZ_pEX& x, const vec_ZZ_pE& a); +ZZ_pEX BuildFromRoots(const vec_ZZ_pE& a); +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + +void eval(ZZ_pE& b, const ZZ_pEX& f, const ZZ_pE& a); +ZZ_pE eval(const ZZ_pEX& f, const ZZ_pE& a); +// b = f(a) + +void eval(ZZ_pE& b, const ZZ_pX& f, const ZZ_pE& a); +ZZ_pE eval(const ZZ_pEX& f, const ZZ_pE& a); +// b = f(a); uses ModComp algorithm for ZZ_pX + +void eval(vec_ZZ_pE& b, const ZZ_pEX& f, const vec_ZZ_pE& a); +vec_ZZ_pE eval(const ZZ_pEX& f, const vec_ZZ_pE& a); +// b.SetLength(a.length()); b[i] = f(a[i]) for 0 <= i < a.length() + +void interpolate(ZZ_pEX& f, const vec_ZZ_pE& a, const vec_ZZ_pE& b); +ZZ_pEX interpolate(const vec_ZZ_pE& a, const vec_ZZ_pE& b); +// interpolates the polynomial f satisfying f(a[i]) = b[i]. + +/**************************************************************************\ + + Arithmetic mod X^n + +Required: n >= 0; otherwise, an error is raised. + +\**************************************************************************/ + +void trunc(ZZ_pEX& x, const ZZ_pEX& a, long n); // x = a % X^n +ZZ_pEX trunc(const ZZ_pEX& a, long n); + +void MulTrunc(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, long n); +ZZ_pEX MulTrunc(const ZZ_pEX& a, const ZZ_pEX& b, long n); +// x = a * b % X^n + +void SqrTrunc(ZZ_pEX& x, const ZZ_pEX& a, long n); +ZZ_pEX SqrTrunc(const ZZ_pEX& a, long n); +// x = a^2 % X^n + +void InvTrunc(ZZ_pEX& x, const ZZ_pEX& a, long n); +ZZ_pEX InvTrunc(ZZ_pEX& x, const ZZ_pEX& a, long n); +// computes x = a^{-1} % X^m. Must have ConstTerm(a) invertible. + +/**************************************************************************\ + + Modular Arithmetic (without pre-conditioning) + +Arithmetic mod f. + +All inputs and outputs are polynomials of degree less than deg(f), and +deg(f) > 0. + + +NOTE: if you want to do many computations with a fixed f, use the +ZZ_pEXModulus data structure and associated routines below for better +performance. + +\**************************************************************************/ + +void MulMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEX& f); +ZZ_pEX MulMod(const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEX& f); +// x = (a * b) % f + +void SqrMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f); +ZZ_pEX SqrMod(const ZZ_pEX& a, const ZZ_pEX& f); +// x = a^2 % f + +void MulByXMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f); +ZZ_pEX MulByXMod(const ZZ_pEX& a, const ZZ_pEX& f); +// x = (a * X) mod f + +void InvMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f); +ZZ_pEX InvMod(const ZZ_pEX& a, const ZZ_pEX& f); +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise, +// returns 1 and sets x = (a, f) + + +/**************************************************************************\ + + Modular Arithmetic with Pre-Conditioning + +If you need to do a lot of arithmetic modulo a fixed f, build +ZZ_pEXModulus F for f. This pre-computes information about f that +speeds up subsequent computations. + +As an example, the following routine the product modulo f of a vector +of polynomials. + +#include + +void product(ZZ_pEX& x, const vec_ZZ_pEX& v, const ZZ_pEX& f) +{ + ZZ_pEXModulus F(f); + ZZ_pEX res; + res = 1; + long i; + for (i = 0; i < v.length(); i++) + MulMod(res, res, v[i], F); + x = res; +} + +NOTE: A ZZ_pEX may be used wherever a ZZ_pEXModulus is required, +and a ZZ_pEXModulus may be used wherever a ZZ_pEX is required. + + +\**************************************************************************/ + +class ZZ_pEXModulus { +public: + ZZ_pEXModulus(); // initially in an unusable state + + ZZ_pEXModulus(const ZZ_pEX& f); // initialize with f, deg(f) > 0 + + ZZ_pEXModulus(const ZZ_pEXModulus&); // copy + + ZZ_pEXModulus& operator=(const ZZ_pEXModulus&); // assignment + + ~ZZ_pEXModulus(); // destructor + + operator const ZZ_pEX& () const; // implicit read-only access to f + + const ZZ_pEX& val() const; // explicit read-only access to f +}; + +void build(ZZ_pEXModulus& F, const ZZ_pEX& f); +// pre-computes information about f and stores it in F. Must have +// deg(f) > 0. Note that the declaration ZZ_pEXModulus F(f) is +// equivalent to ZZ_pEXModulus F; build(F, f). + +// In the following, f refers to the polynomial f supplied to the +// build routine, and n = deg(f). + + +long deg(const ZZ_pEXModulus& F); // return n=deg(f) + +void MulMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, + const ZZ_pEXModulus& F); +ZZ_pEX MulMod(const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEXModulus& F); +// x = (a * b) % f; deg(a), deg(b) < n + +void SqrMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEXModulus& F); +ZZ_pEX SqrMod(const ZZ_pEX& a, const ZZ_pEXModulus& F); +// x = a^2 % f; deg(a) < n + +void PowerMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ& e, const ZZ_pEXModulus& F); +ZZ_pEX PowerMod(const ZZ_pEX& a, const ZZ& e, const ZZ_pEXModulus& F); + +void PowerMod(ZZ_pEX& x, const ZZ_pEX& a, long e, const ZZ_pEXModulus& F); +ZZ_pEX PowerMod(const ZZ_pEX& a, long e, const ZZ_pEXModulus& F); + +// x = a^e % f; e >= 0, deg(a) < n. Uses a sliding window algorithm. +// (e may be negative) + +void PowerXMod(ZZ_pEX& x, const ZZ& e, const ZZ_pEXModulus& F); +ZZ_pEX PowerXMod(const ZZ& e, const ZZ_pEXModulus& F); + +void PowerXMod(ZZ_pEX& x, long e, const ZZ_pEXModulus& F); +ZZ_pEX PowerXMod(long e, const ZZ_pEXModulus& F); + +// x = X^e % f (e may be negative) + +void rem(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEXModulus& F); +// x = a % f + +void DivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEXModulus& F); +// q = a/f, r = a%f + +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEXModulus& F); +// q = a/f + +// operator notation: + +ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pEXModulus& F); +ZZ_pEX operator%(const ZZ_pEX& a, const ZZ_pEXModulus& F); + +ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pEXModulus& F); +ZZ_pEX& operator%=(ZZ_pEX& x, const ZZ_pEXModulus& F); + + + +/**************************************************************************\ + + vectors of ZZ_pEX's + +\**************************************************************************/ + + +typedef Vec vec_ZZ_pEX; // backward compatibility + + + +/**************************************************************************\ + + Modular Composition + +Modular composition is the problem of computing g(h) mod f for +polynomials f, g, and h. + +The algorithm employed is that of Brent & Kung (Fast algorithms for +manipulating formal power series, JACM 25:581-595, 1978), which uses +O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar +operations. + + +\**************************************************************************/ + +void CompMod(ZZ_pEX& x, const ZZ_pEX& g, const ZZ_pEX& h, + const ZZ_pEXModulus& F); +ZZ_pEX CompMod(const ZZ_pEX& g, const ZZ_pEX& h, + const ZZ_pEXModulus& F); + +// x = g(h) mod f; deg(h) < n + +void Comp2Mod(ZZ_pEX& x1, ZZ_pEX& x2, const ZZ_pEX& g1, const ZZ_pEX& g2, + const ZZ_pEX& h, const ZZ_pEXModulus& F); +// xi = gi(h) mod f (i=1,2); deg(h) < n. + + +void Comp3Mod(ZZ_pEX& x1, ZZ_pEX& x2, ZZ_pEX& x3, + const ZZ_pEX& g1, const ZZ_pEX& g2, const ZZ_pEX& g3, + const ZZ_pEX& h, const ZZ_pEXModulus& F); +// xi = gi(h) mod f (i=1..3); deg(h) < n. + + + +/**************************************************************************\ + + Composition with Pre-Conditioning + +If a single h is going to be used with many g's then you should build +a ZZ_pEXArgument for h, and then use the compose routine below. The +routine build computes and stores h, h^2, ..., h^m mod f. After this +pre-computation, composing a polynomial of degree roughly n with h +takes n/m multiplies mod f, plus n^2 scalar multiplies. Thus, +increasing m increases the space requirement and the pre-computation +time, but reduces the composition time. + +\**************************************************************************/ + + +struct ZZ_pEXArgument { + vec_ZZ_pEX H; +}; + +void build(ZZ_pEXArgument& H, const ZZ_pEX& h, const ZZ_pEXModulus& F, long m); +// Pre-Computes information about h. m > 0, deg(h) < n. + +void CompMod(ZZ_pEX& x, const ZZ_pEX& g, const ZZ_pEXArgument& H, + const ZZ_pEXModulus& F); + +ZZ_pEX CompMod(const ZZ_pEX& g, const ZZ_pEXArgument& H, + const ZZ_pEXModulus& F); + +extern long ZZ_pEXArgBound; + +// Initially 0. If this is set to a value greater than zero, then +// composition routines will allocate a table of no than about +// ZZ_pEXArgBound KB. Setting this value affects all compose routines +// and the power projection and minimal polynomial routines below, +// and indirectly affects many routines in ZZ_pEXFactoring. + +/**************************************************************************\ + + power projection routines + +\**************************************************************************/ + +void project(ZZ_pE& x, const ZZ_pEVector& a, const ZZ_pEX& b); +ZZ_pE project(const ZZ_pEVector& a, const ZZ_pEX& b); +// x = inner product of a with coefficient vector of b + + +void ProjectPowers(vec_ZZ_pE& x, const vec_ZZ_pE& a, long k, + const ZZ_pEX& h, const ZZ_pEXModulus& F); + +vec_ZZ_pE ProjectPowers(const vec_ZZ_pE& a, long k, + const ZZ_pEX& h, const ZZ_pEXModulus& F); + +// Computes the vector + +// project(a, 1), project(a, h), ..., project(a, h^{k-1} % f). + +// This operation is the "transpose" of the modular composition operation. + +void ProjectPowers(vec_ZZ_pE& x, const vec_ZZ_pE& a, long k, + const ZZ_pEXArgument& H, const ZZ_pEXModulus& F); + +vec_ZZ_pE ProjectPowers(const vec_ZZ_pE& a, long k, + const ZZ_pEXArgument& H, const ZZ_pEXModulus& F); + +// same as above, but uses a pre-computed ZZ_pEXArgument + + +class ZZ_pEXTransMultiplier { /* ... */ }; + +void build(ZZ_pEXTransMultiplier& B, const ZZ_pEX& b, const ZZ_pEXModulus& F); + +void UpdateMap(vec_ZZ_pE& x, const vec_ZZ_pE& a, + const ZZ_pEXMultiplier& B, const ZZ_pEXModulus& F); + +vec_ZZ_pE UpdateMap(const vec_ZZ_pE& a, + const ZZ_pEXMultiplier& B, const ZZ_pEXModulus& F); + +// Computes the vector + +// project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f) + +// Required: a.length() <= deg(F), deg(b) < deg(F). +// This is "transposed" MulMod by B. +// Input may have "high order" zeroes stripped. +// Output always has high order zeroes stripped. + + +/**************************************************************************\ + + Minimum Polynomials + +These routines should be used only when ZZ_pE is a field. + +All of these routines implement the algorithm from [Shoup, J. Symbolic +Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397, +1995], based on transposed modular composition and the +Berlekamp/Massey algorithm. + +\**************************************************************************/ + + +void MinPolySeq(ZZ_pEX& h, const vec_ZZ_pE& a, long m); +ZZ_pEX MinPolySeq(const vec_ZZ_pE& a, long m); +// computes the minimum polynomial of a linealy generated sequence; m +// is a bound on the degree of the polynomial; required: a.length() >= +// 2*m + + +void ProbMinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); +ZZ_pEX ProbMinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); + +void ProbMinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F); +ZZ_pEX ProbMinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F); + +// computes the monic minimal polynomial if (g mod f). m = a bound on +// the degree of the minimal polynomial; in the second version, this +// argument defaults to n. The algorithm is probabilistic, always +// returns a divisor of the minimal polynomial, and returns a proper +// divisor with probability at most m/2^{ZZ_pE::degree()}. + +void MinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); +ZZ_pEX MinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); + +void MinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F); +ZZ_pEX MinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F); + +// same as above, but guarantees that result is correct + +void IrredPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); +ZZ_pEX IrredPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); + +void IrredPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F); +ZZ_pEX IrredPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F); + +// same as above, but assumes that f is irreducible, or at least that +// the minimal poly of g is itself irreducible. The algorithm is +// deterministic (and is always correct). + +/**************************************************************************\ + + Composition and Minimal Polynomials in towers + +These are implementations of algorithms that will be described +and analyzed in a forthcoming paper. + +The routines require that p is prime, but ZZ_pE need not be a field. + +\**************************************************************************/ + + +void CompTower(ZZ_pEX& x, const ZZ_pX& g, const ZZ_pEXArgument& h, + const ZZ_pEXModulus& F); + +ZZ_pEX CompTower(const ZZ_pX& g, const ZZ_pEXArgument& h, + const ZZ_pEXModulus& F); + +void CompTower(ZZ_pEX& x, const ZZ_pX& g, const ZZ_pEX& h, + const ZZ_pEXModulus& F); + +ZZ_pEX CompTower(const ZZ_pX& g, const ZZ_pEX& h, + const ZZ_pEXModulus& F); + + +// x = g(h) mod f + + +void ProbMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m); + +ZZ_pX ProbMinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); + +void ProbMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F); + +ZZ_pX ProbMinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F); + +// Uses a probabilistic algorithm to compute the minimal +// polynomial of (g mod f) over ZZ_p. +// The parameter m is a bound on the degree of the minimal polynomial +// (default = deg(f)*ZZ_pE::degree()). +// In general, the result will be a divisor of the true minimimal +// polynomial. For correct results, use the MinPoly routines below. + + + +void MinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); + +ZZ_pX MinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); + +void MinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F); + +ZZ_pX MinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F); + +// Same as above, but result is always correct. + + +void IrredPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); + +ZZ_pX IrredPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); + +void IrredPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F); + +ZZ_pX IrredPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F); + +// Same as above, but assumes the minimal polynomial is +// irreducible, and uses a slightly faster, deterministic algorithm. + + +/**************************************************************************\ + + Traces, norms, resultants + +\**************************************************************************/ + + +void TraceMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEXModulus& F); +ZZ_pE TraceMod(const ZZ_pEX& a, const ZZ_pEXModulus& F); + +void TraceMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& f); +ZZ_pE TraceMod(const ZZ_pEX& a, const ZZ_pEXModulus& f); +// x = Trace(a mod f); deg(a) < deg(f) + + +void TraceVec(vec_ZZ_pE& S, const ZZ_pEX& f); +vec_ZZ_pE TraceVec(const ZZ_pEX& f); +// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f) + +// The above trace routines implement the asymptotically fast trace +// algorithm from [von zur Gathen and Shoup, Computational Complexity, +// 1992]. + +void NormMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& f); +ZZ_pE NormMod(const ZZ_pEX& a, const ZZ_pEX& f); +// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f) + +void resultant(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& b); +ZZ_pE resultant(const ZZ_pEX& a, const ZZ_pEX& b); +// x = resultant(a, b) + +// NormMod and resultant require that ZZ_pE is a field. + + + + +/**************************************************************************\ + + Miscellany + + +\**************************************************************************/ + + +void clear(ZZ_pEX& x) // x = 0 +void set(ZZ_pEX& x); // x = 1 + +void ZZ_pEX::kill(); +// f.kill() sets f to 0 and frees all memory held by f. Equivalent to +// f.rep.kill(). + +ZZ_pEX::ZZ_pEX(INIT_SIZE_TYPE, long n); +// ZZ_pEX(INIT_SIZE, n) initializes to zero, but space is pre-allocated +// for n coefficients + +static const ZZ_pEX& zero(); +// ZZ_pEX::zero() is a read-only reference to 0 + +void ZZ_pEX::swap(ZZ_pEX& x); +void swap(ZZ_pEX& x, ZZ_pEX& y); +// swap (via "pointer swapping") + + +ZZ_pEX::ZZ_pEX(long i, const ZZ_pE& c); +ZZ_pEX::ZZ_pEX(long i, const ZZ_p& c); +ZZ_pEX::ZZ_pEX(long i, long c); +// initialize to c*X^i, provided for backward compatibility diff --git a/thirdparty/linux/ntl/doc/ZZ_pEXFactoring.cpp.html b/thirdparty/linux/ntl/doc/ZZ_pEXFactoring.cpp.html new file mode 100644 index 0000000000..78fee01e14 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pEXFactoring.cpp.html @@ -0,0 +1,199 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZ_pEXFactoring.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: ZZ_pEXFactoring
+
+SUMMARY:
+
+Routines are provided for factorization of polynomials over ZZ_pE, as
+well as routines for related problems such as testing irreducibility
+and constructing irreducible polynomials of given degree.
+
+\**************************************************************************/
+
+#include <NTL/ZZ_pEX.h>
+#include <NTL/pair_ZZ_pEX_long.h>
+
+void SquareFreeDecomp(vec_pair_ZZ_pEX_long& u, const ZZ_pEX& f);
+vec_pair_ZZ_pEX_long SquareFreeDecomp(const ZZ_pEX& f);
+
+// Performs square-free decomposition.  f must be monic.  If f =
+// prod_i g_i^i, then u is set to a list of pairs (g_i, i).  The list
+// is is increasing order of i, with trivial terms (i.e., g_i = 1)
+// deleted.
+
+
+void FindRoots(vec_ZZ_pE& x, const ZZ_pEX& f);
+vec_ZZ_pE FindRoots(const ZZ_pEX& f);
+
+// f is monic, and has deg(f) distinct roots.  returns the list of
+// roots
+
+void FindRoot(ZZ_pE& root, const ZZ_pEX& f);
+ZZ_pE FindRoot(const ZZ_pEX& f);
+
+// finds a single root of f.  assumes that f is monic and splits into
+// distinct linear factors
+
+
+void NewDDF(vec_pair_ZZ_pEX_long& factors, const ZZ_pEX& f,
+            const ZZ_pEX& h, long verbose=0);
+
+vec_pair_ZZ_pEX_long NewDDF(const ZZ_pEX& f, const ZZ_pEX& h,
+         long verbose=0);
+
+
+// This computes a distinct-degree factorization.  The input must be
+// monic and square-free.  factors is set to a list of pairs (g, d),
+// where g is the product of all irreducible factors of f of degree d.
+// Only nontrivial pairs (i.e., g != 1) are included.  The polynomial
+// h is assumed to be equal to X^{ZZ_pE::cardinality()} mod f.  
+
+// This routine implements the baby step/giant step algorithm
+// of [Kaltofen and Shoup, STOC 1995].
+// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995].
+
+// NOTE: When factoring "large" polynomials,
+// this routine uses external files to store some intermediate
+// results, which are removed if the routine terminates normally.
+// These files are stored in the current directory under names of the
+// form tmp-*.
+// The definition of "large" is controlled by the variable
+
+      extern double ZZ_pEXFileThresh
+
+// which can be set by the user.  If the sizes of the tables
+// exceeds ZZ_pEXFileThresh KB, external files are used.
+// Initial value is NTL_FILE_THRESH (defined in tools.h).
+
+
+
+
+void EDF(vec_ZZ_pEX& factors, const ZZ_pEX& f, const ZZ_pEX& h,
+         long d, long verbose=0);
+
+vec_ZZ_pEX EDF(const ZZ_pEX& f, const ZZ_pEX& h,
+         long d, long verbose=0);
+
+// Performs equal-degree factorization.  f is monic, square-free, and
+// all irreducible factors have same degree.  h = X^{ZZ_pE::cardinality()} mod
+// f.  d = degree of irreducible factors of f.  This routine
+// implements the algorithm of [von zur Gathen and Shoup,
+// Computational Complexity 2:187-224, 1992]
+
+void RootEDF(vec_ZZ_pEX& factors, const ZZ_pEX& f, long verbose=0);
+vec_ZZ_pEX RootEDF(const ZZ_pEX& f, long verbose=0);
+
+// EDF for d==1
+
+
+void SFCanZass(vec_ZZ_pEX& factors, const ZZ_pEX& f, long verbose=0);
+vec_ZZ_pEX SFCanZass(const ZZ_pEX& f, long verbose=0);
+
+// Assumes f is monic and square-free.  returns list of factors of f.
+// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and
+// EDF above.
+
+
+void CanZass(vec_pair_ZZ_pEX_long& factors, const ZZ_pEX& f,
+             long verbose=0);
+
+vec_pair_ZZ_pEX_long CanZass(const ZZ_pEX& f, long verbose=0);
+
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SquareFreeDecomp and SFCanZass.
+
+// NOTE: these routines use modular composition.  The space
+// used for the required tables can be controlled by the variable
+// ZZ_pEXArgBound (see ZZ_pEX.txt).
+
+
+
+void mul(ZZ_pEX& f, const vec_pair_ZZ_pEX_long& v);
+ZZ_pEX mul(const vec_pair_ZZ_pEX_long& v);
+
+// multiplies polynomials, with multiplicities
+
+
+/**************************************************************************\
+
+                            Irreducible Polynomials
+
+\**************************************************************************/
+
+long ProbIrredTest(const ZZ_pEX& f, long iter=1);
+
+// performs a fast, probabilistic irreduciblity test.  The test can
+// err only if f is reducible, and the error probability is bounded by
+// ZZ_pE::cardinality()^{-iter}.  This implements an algorithm from [Shoup,
+// J. Symbolic Comp. 17:371-391, 1994].
+
+long DetIrredTest(const ZZ_pEX& f);
+
+// performs a recursive deterministic irreducibility test.  Fast in
+// the worst-case (when input is irreducible).  This implements an
+// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994].
+
+long IterIrredTest(const ZZ_pEX& f);
+
+// performs an iterative deterministic irreducibility test, based on
+// DDF.  Fast on average (when f has a small factor).
+
+void BuildIrred(ZZ_pEX& f, long n);
+ZZ_pEX BuildIrred_ZZ_pEX(long n);
+
+// Build a monic irreducible poly of degree n.
+
+void BuildRandomIrred(ZZ_pEX& f, const ZZ_pEX& g);
+ZZ_pEX BuildRandomIrred(const ZZ_pEX& g);
+
+// g is a monic irreducible polynomial.  Constructs a random monic
+// irreducible polynomial f of the same degree.
+
+
+long IterComputeDegree(const ZZ_pEX& h, const ZZ_pEXModulus& F);
+
+// f is assumed to be an "equal degree" polynomial, and h =
+// X^{ZZ_pE::cardinality()} mod f.  The common degree of the irreducible
+// factors of f is computed.  Uses a "baby step/giant step" algorithm, similar
+// to NewDDF.  Although asymptotocally slower than RecComputeDegree
+// (below), it is faster for reasonably sized inputs.
+
+long RecComputeDegree(const ZZ_pEX& h, const ZZ_pEXModulus& F);
+
+// f is assumed to be an "equal degree" polynomial,
+// h = X^{ZZ_pE::cardinality()} mod f.  
+// The common degree of the irreducible factors of f is
+// computed Uses a recursive algorithm similar to DetIrredTest.
+
+void TraceMap(ZZ_pEX& w, const ZZ_pEX& a, long d, const ZZ_pEXModulus& F,
+              const ZZ_pEX& h);
+
+ZZ_pEX TraceMap(const ZZ_pEX& a, long d, const ZZ_pEXModulus& F,
+              const ZZ_pEX& h);
+
+// Computes w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0,
+// and h = X^q mod f, q a power of ZZ_pE::cardinality().  This routine
+// implements an algorithm from [von zur Gathen and Shoup,
+// Computational Complexity 2:187-224, 1992]
+
+void PowerCompose(ZZ_pEX& w, const ZZ_pEX& h, long d, const ZZ_pEXModulus& F);
+
+ZZ_pEX PowerCompose(const ZZ_pEX& h, long d, const ZZ_pEXModulus& F);
+
+// Computes w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q
+// mod f, q a power of ZZ_pE::cardinality().  This routine implements an
+// algorithm from [von zur Gathen and Shoup, Computational Complexity
+// 2:187-224, 1992]
+
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZ_pEXFactoring.txt b/thirdparty/linux/ntl/doc/ZZ_pEXFactoring.txt new file mode 100644 index 0000000000..3859a8c9ec --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pEXFactoring.txt @@ -0,0 +1,189 @@ + +/**************************************************************************\ + +MODULE: ZZ_pEXFactoring + +SUMMARY: + +Routines are provided for factorization of polynomials over ZZ_pE, as +well as routines for related problems such as testing irreducibility +and constructing irreducible polynomials of given degree. + +\**************************************************************************/ + +#include +#include + +void SquareFreeDecomp(vec_pair_ZZ_pEX_long& u, const ZZ_pEX& f); +vec_pair_ZZ_pEX_long SquareFreeDecomp(const ZZ_pEX& f); + +// Performs square-free decomposition. f must be monic. If f = +// prod_i g_i^i, then u is set to a list of pairs (g_i, i). The list +// is is increasing order of i, with trivial terms (i.e., g_i = 1) +// deleted. + + +void FindRoots(vec_ZZ_pE& x, const ZZ_pEX& f); +vec_ZZ_pE FindRoots(const ZZ_pEX& f); + +// f is monic, and has deg(f) distinct roots. returns the list of +// roots + +void FindRoot(ZZ_pE& root, const ZZ_pEX& f); +ZZ_pE FindRoot(const ZZ_pEX& f); + +// finds a single root of f. assumes that f is monic and splits into +// distinct linear factors + + +void NewDDF(vec_pair_ZZ_pEX_long& factors, const ZZ_pEX& f, + const ZZ_pEX& h, long verbose=0); + +vec_pair_ZZ_pEX_long NewDDF(const ZZ_pEX& f, const ZZ_pEX& h, + long verbose=0); + + +// This computes a distinct-degree factorization. The input must be +// monic and square-free. factors is set to a list of pairs (g, d), +// where g is the product of all irreducible factors of f of degree d. +// Only nontrivial pairs (i.e., g != 1) are included. The polynomial +// h is assumed to be equal to X^{ZZ_pE::cardinality()} mod f. + +// This routine implements the baby step/giant step algorithm +// of [Kaltofen and Shoup, STOC 1995]. +// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995]. + +// NOTE: When factoring "large" polynomials, +// this routine uses external files to store some intermediate +// results, which are removed if the routine terminates normally. +// These files are stored in the current directory under names of the +// form tmp-*. +// The definition of "large" is controlled by the variable + + extern double ZZ_pEXFileThresh + +// which can be set by the user. If the sizes of the tables +// exceeds ZZ_pEXFileThresh KB, external files are used. +// Initial value is NTL_FILE_THRESH (defined in tools.h). + + + + +void EDF(vec_ZZ_pEX& factors, const ZZ_pEX& f, const ZZ_pEX& h, + long d, long verbose=0); + +vec_ZZ_pEX EDF(const ZZ_pEX& f, const ZZ_pEX& h, + long d, long verbose=0); + +// Performs equal-degree factorization. f is monic, square-free, and +// all irreducible factors have same degree. h = X^{ZZ_pE::cardinality()} mod +// f. d = degree of irreducible factors of f. This routine +// implements the algorithm of [von zur Gathen and Shoup, +// Computational Complexity 2:187-224, 1992] + +void RootEDF(vec_ZZ_pEX& factors, const ZZ_pEX& f, long verbose=0); +vec_ZZ_pEX RootEDF(const ZZ_pEX& f, long verbose=0); + +// EDF for d==1 + + +void SFCanZass(vec_ZZ_pEX& factors, const ZZ_pEX& f, long verbose=0); +vec_ZZ_pEX SFCanZass(const ZZ_pEX& f, long verbose=0); + +// Assumes f is monic and square-free. returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and +// EDF above. + + +void CanZass(vec_pair_ZZ_pEX_long& factors, const ZZ_pEX& f, + long verbose=0); + +vec_pair_ZZ_pEX_long CanZass(const ZZ_pEX& f, long verbose=0); + + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SquareFreeDecomp and SFCanZass. + +// NOTE: these routines use modular composition. The space +// used for the required tables can be controlled by the variable +// ZZ_pEXArgBound (see ZZ_pEX.txt). + + + +void mul(ZZ_pEX& f, const vec_pair_ZZ_pEX_long& v); +ZZ_pEX mul(const vec_pair_ZZ_pEX_long& v); + +// multiplies polynomials, with multiplicities + + +/**************************************************************************\ + + Irreducible Polynomials + +\**************************************************************************/ + +long ProbIrredTest(const ZZ_pEX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test. The test can +// err only if f is reducible, and the error probability is bounded by +// ZZ_pE::cardinality()^{-iter}. This implements an algorithm from [Shoup, +// J. Symbolic Comp. 17:371-391, 1994]. + +long DetIrredTest(const ZZ_pEX& f); + +// performs a recursive deterministic irreducibility test. Fast in +// the worst-case (when input is irreducible). This implements an +// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994]. + +long IterIrredTest(const ZZ_pEX& f); + +// performs an iterative deterministic irreducibility test, based on +// DDF. Fast on average (when f has a small factor). + +void BuildIrred(ZZ_pEX& f, long n); +ZZ_pEX BuildIrred_ZZ_pEX(long n); + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(ZZ_pEX& f, const ZZ_pEX& g); +ZZ_pEX BuildRandomIrred(const ZZ_pEX& g); + +// g is a monic irreducible polynomial. Constructs a random monic +// irreducible polynomial f of the same degree. + + +long IterComputeDegree(const ZZ_pEX& h, const ZZ_pEXModulus& F); + +// f is assumed to be an "equal degree" polynomial, and h = +// X^{ZZ_pE::cardinality()} mod f. The common degree of the irreducible +// factors of f is computed. Uses a "baby step/giant step" algorithm, similar +// to NewDDF. Although asymptotocally slower than RecComputeDegree +// (below), it is faster for reasonably sized inputs. + +long RecComputeDegree(const ZZ_pEX& h, const ZZ_pEXModulus& F); + +// f is assumed to be an "equal degree" polynomial, +// h = X^{ZZ_pE::cardinality()} mod f. +// The common degree of the irreducible factors of f is +// computed Uses a recursive algorithm similar to DetIrredTest. + +void TraceMap(ZZ_pEX& w, const ZZ_pEX& a, long d, const ZZ_pEXModulus& F, + const ZZ_pEX& h); + +ZZ_pEX TraceMap(const ZZ_pEX& a, long d, const ZZ_pEXModulus& F, + const ZZ_pEX& h); + +// Computes w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0, +// and h = X^q mod f, q a power of ZZ_pE::cardinality(). This routine +// implements an algorithm from [von zur Gathen and Shoup, +// Computational Complexity 2:187-224, 1992] + +void PowerCompose(ZZ_pEX& w, const ZZ_pEX& h, long d, const ZZ_pEXModulus& F); + +ZZ_pEX PowerCompose(const ZZ_pEX& h, long d, const ZZ_pEXModulus& F); + +// Computes w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q +// mod f, q a power of ZZ_pE::cardinality(). This routine implements an +// algorithm from [von zur Gathen and Shoup, Computational Complexity +// 2:187-224, 1992] + diff --git a/thirdparty/linux/ntl/doc/ZZ_pX.cpp.html b/thirdparty/linux/ntl/doc/ZZ_pX.cpp.html new file mode 100644 index 0000000000..5e83b4049c --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pX.cpp.html @@ -0,0 +1,907 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZ_pX.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: ZZ_pX
+
+SUMMARY:
+
+The class ZZ_pX implements polynomial arithmetic modulo p.
+
+Polynomial arithmetic is implemented using the FFT, combined with the
+Chinese Remainder Theorem.  A more detailed description of the
+techniques used here can be found in [Shoup, J. Symbolic
+Comp. 20:363-397, 1995].
+
+Small degree polynomials are multiplied either with classical
+or Karatsuba algorithms.
+
+\**************************************************************************/
+
+#include <NTL/ZZ_p.h>
+#include <NTL/vec_ZZ_p.h>
+
+class ZZ_pX {
+public:
+
+   ZZ_pX(); // initialize to 0
+
+   ZZ_pX(const ZZ_pX& a); // copy constructor
+   explicit ZZ_pX(const ZZ_p& a); // promotion
+   explicit ZZ_pX(long a); // promotion
+
+   ZZ_pX& operator=(const ZZ_pX& a); // assignment
+   ZZ_pX& operator=(const ZZ_p& a); // assignment
+   ZZ_pX& operator=(const long a); // assignment
+
+   ~ZZ_pX(); // destructor
+
+   ZZ_pX(INIT_MONO_TYPE, long i, const ZZ_p& c);  
+   ZZ_pX(INIT_MONO_TYPE, long i, long c);
+   // initialize to c*X^i, invoke as ZZ_pX(INIT_MONO, i, c)
+
+   ZZ_pX(INIT_MONO_TYPE, long i, long c);
+   // initialize to X^i, invoke as ZZ_pX(INIT_MONO, i)
+
+
+   // typedefs to aid in generic programming
+   typedef zz_p coeff_type;
+   typedef zz_pE residue_type;
+   typedef zz_pXModulus modulus_type;
+   typedef zz_pXMultiplier multiplier_type;
+   typedef fftRep fft_type;
+
+
+   // ...
+
+  
+};
+
+
+
+
+
+/**************************************************************************\
+
+                              Accessing coefficients
+
+The degree of a polynomial f is obtained as deg(f),
+where the zero polynomial, by definition, has degree -1.
+
+A polynomial f is represented as a coefficient vector.
+Coefficients may be accesses in one of two ways.
+
+The safe, high-level method is to call the function
+coeff(f, i) to get the coefficient of X^i in the polynomial f,
+and to call the function SetCoeff(f, i, a) to set the coefficient
+of X^i in f to the scalar a.
+
+One can also access the coefficients more directly via a lower level
+interface.  The coefficient of X^i in f may be accessed using
+subscript notation f[i].  In addition, one may write f.SetLength(n)
+to set the length of the underlying coefficient vector to n,
+and f.SetMaxLength(n) to allocate space for n coefficients,
+without changing the coefficient vector itself.
+
+After setting coefficients using this low-level interface,
+one must ensure that leading zeros in the coefficient vector
+are stripped afterwards by calling the function f.normalize().
+
+NOTE: the coefficient vector of f may also be accessed directly
+as f.rep; however, this is not recommended. Also, for a properly
+normalized polynomial f, we have f.rep.length() == deg(f)+1,
+and deg(f) >= 0  =>  f.rep[deg(f)] != 0.
+
+\**************************************************************************/
+
+
+
+long deg(const ZZ_pX& a);  // return deg(a); deg(0) == -1.
+
+const ZZ_p& coeff(const ZZ_pX& a, long i);
+// returns the coefficient of X^i, or zero if i not in range
+
+const ZZ_p& LeadCoeff(const ZZ_pX& a);
+// returns leading term of a, or zero if a == 0
+
+const ZZ_p& ConstTerm(const ZZ_pX& a);
+// returns constant term of a, or zero if a == 0
+
+void SetCoeff(ZZ_pX& x, long i, const ZZ_p& a);
+void SetCoeff(ZZ_pX& x, long i, long a);
+// makes coefficient of X^i equal to a; error is raised if i < 0
+
+void SetCoeff(ZZ_pX& x, long i);
+// makes coefficient of X^i equal to 1;  error is raised if i < 0
+
+void SetX(ZZ_pX& x); // x is set to the monomial X
+
+long IsX(const ZZ_pX& a); // test if x = X
+
+
+
+
+ZZ_p& ZZ_pX::operator[](long i);
+const ZZ_p& ZZ_pX::operator[](long i) const;
+// indexing operators: f[i] is the coefficient of X^i ---
+// i should satsify i >= 0 and i <= deg(f).
+// No range checking (unless NTL_RANGE_CHECK is defined).
+
+
+void ZZ_pX::SetLength(long n);
+// f.SetLength(n) sets the length of the inderlying coefficient
+// vector to n --- after this call, indexing f[i] for i = 0..n-1
+// is valid.
+
+void ZZ_pX::normalize();  
+// f.normalize() strips leading zeros from coefficient vector of f
+
+void ZZ_pX::SetMaxLength(long n);
+// f.SetMaxLength(n) pre-allocate spaces for n coefficients.  The
+// polynomial that f represents is unchanged.
+
+
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(const ZZ_pX& a, const ZZ_pX& b);
+long operator!=(const ZZ_pX& a, const ZZ_pX& b);
+
+// PROMOTIONS: operators ==, != promote {long, ZZ_p} to ZZ_pX on (a, b).
+
+long IsZero(const ZZ_pX& a); // test for 0
+long IsOne(const ZZ_pX& a); // test for 1
+
+
+/**************************************************************************\
+
+                                   Addition
+
+\**************************************************************************/
+
+
+// operator notation:
+
+ZZ_pX operator+(const ZZ_pX& a, const ZZ_pX& b);
+ZZ_pX operator-(const ZZ_pX& a, const ZZ_pX& b);
+
+ZZ_pX operator-(const ZZ_pX& a); // unary -
+
+ZZ_pX& operator+=(ZZ_pX& x, const ZZ_pX& a);
+ZZ_pX& operator+=(ZZ_pX& x, const ZZ_p& a);
+ZZ_pX& operator+=(ZZ_pX& x, long a);
+
+ZZ_pX& operator-=(ZZ_pX& x, const ZZ_pX& a);
+ZZ_pX& operator-=(ZZ_pX& x, const ZZ_p& a);
+ZZ_pX& operator-=(ZZ_pX& x, long a);
+
+ZZ_pX& operator++(ZZ_pX& x);  // prefix
+void operator++(ZZ_pX& x, int);  // postfix
+
+ZZ_pX& operator--(ZZ_pX& x);  // prefix
+void operator--(ZZ_pX& x, int);  // postfix
+
+// procedural versions:
+
+
+void add(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); // x = a + b
+void sub(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); // x = a - b
+void negate(ZZ_pX& x, const ZZ_pX& a); // x = -a
+
+
+// PROMOTIONS: binary +, - and procedures add, sub promote
+// {long, ZZ_p} to ZZ_pX on (a, b).
+
+
+/**************************************************************************\
+
+                               Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_pX operator*(const ZZ_pX& a, const ZZ_pX& b);
+
+ZZ_pX& operator*=(ZZ_pX& x, const ZZ_pX& a);
+ZZ_pX& operator*=(ZZ_pX& x, const ZZ_p& a);
+ZZ_pX& operator*=(ZZ_pX& x, long a);
+
+// procedural versions:
+
+void mul(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); // x = a * b
+
+void sqr(ZZ_pX& x, const ZZ_pX& a); // x = a^2
+ZZ_pX sqr(const ZZ_pX& a);
+
+// PROMOTIONS: operator * and procedure mul promote {long, ZZ_p} to ZZ_pX
+// on (a, b).
+
+void power(ZZ_pX& x, const ZZ_pX& a, long e);  // x = a^e (e >= 0)
+ZZ_pX power(const ZZ_pX& a, long e);
+
+
+/**************************************************************************\
+
+                               Shift Operations
+
+LeftShift by n means multiplication by X^n
+RightShift by n means division by X^n
+
+A negative shift amount reverses the direction of the shift.
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_pX operator<<(const ZZ_pX& a, long n);
+ZZ_pX operator>>(const ZZ_pX& a, long n);
+
+ZZ_pX& operator<<=(ZZ_pX& x, long n);
+ZZ_pX& operator>>=(ZZ_pX& x, long n);
+
+// procedural versions:
+
+void LeftShift(ZZ_pX& x, const ZZ_pX& a, long n);
+ZZ_pX LeftShift(const ZZ_pX& a, long n);
+
+void RightShift(ZZ_pX& x, const ZZ_pX& a, long n);
+ZZ_pX RightShift(const ZZ_pX& a, long n);
+
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+// operator notation:
+
+ZZ_pX operator/(const ZZ_pX& a, const ZZ_pX& b);
+ZZ_pX operator/(const ZZ_pX& a, const ZZ_p& b);
+ZZ_pX operator/(const ZZ_pX& a, long b);
+
+
+ZZ_pX& operator/=(ZZ_pX& x, const ZZ_pX& b);
+ZZ_pX& operator/=(ZZ_pX& x, const ZZ_p& b);
+ZZ_pX& operator/=(ZZ_pX& x, long b);
+
+ZZ_pX operator%(const ZZ_pX& a, const ZZ_pX& b);
+
+ZZ_pX& operator%=(ZZ_pX& x, const ZZ_pX& b);
+
+
+// procedural versions:
+
+
+void DivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b);
+// q = a/b, r = a%b
+
+void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b);
+void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_p& b);
+void div(ZZ_pX& q, const ZZ_pX& a, long b);
+// q = a/b
+
+void rem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b);
+// r = a%b
+
+long divide(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+long divide(const ZZ_pX& a, const ZZ_pX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+
+/**************************************************************************\
+
+                                   GCD's
+
+These routines are intended for use when p is prime.
+
+\**************************************************************************/
+
+
+void GCD(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b);
+ZZ_pX GCD(const ZZ_pX& a, const ZZ_pX& b);
+// x = GCD(a, b),  x is always monic (or zero if a==b==0).
+
+
+void XGCD(ZZ_pX& d, ZZ_pX& s, ZZ_pX& t, const ZZ_pX& a, const ZZ_pX& b);
+// d = gcd(a,b), a s + b t = d
+
+
+// NOTE: A classical algorithm is used, switching over to a
+// "half-GCD" algorithm for large degree
+
+
+/**************************************************************************\
+
+                                  Input/Output
+
+I/O format:
+
+   [a_0 a_1 ... a_n],
+
+represents the polynomial a_0 + a_1*X + ... + a_n*X^n.
+
+On output, all coefficients will be integers between 0 and p-1, and
+a_n not zero (the zero polynomial is [ ]).  On input, the coefficients
+are arbitrary integers which are reduced modulo p, and leading zeros
+stripped.
+
+\**************************************************************************/
+
+istream& operator>>(istream& s, ZZ_pX& x);
+ostream& operator<<(ostream& s, const ZZ_pX& a);
+
+
+/**************************************************************************\
+
+                              Some utility routines
+
+\**************************************************************************/
+
+void diff(ZZ_pX& x, const ZZ_pX& a); // x = derivative of a
+ZZ_pX diff(const ZZ_pX& a);
+
+void MakeMonic(ZZ_pX& x);
+// if x != 0 makes x into its monic associate; LeadCoeff(x) must be
+// invertible in this case.
+
+void reverse(ZZ_pX& x, const ZZ_pX& a, long hi);
+ZZ_pX reverse(const ZZ_pX& a, long hi);
+
+void reverse(ZZ_pX& x, const ZZ_pX& a);
+ZZ_pX reverse(const ZZ_pX& a);
+
+// x = reverse of a[0]..a[hi] (hi >= -1);
+// hi defaults to deg(a) in second version
+
+void VectorCopy(vec_ZZ_p& x, const ZZ_pX& a, long n);
+vec_ZZ_p VectorCopy(const ZZ_pX& a, long n);
+// x = copy of coefficient vector of a of length exactly n.
+// input is truncated or padded with zeroes as appropriate.
+
+
+
+
+/**************************************************************************\
+
+                             Random Polynomials
+
+\**************************************************************************/
+
+void random(ZZ_pX& x, long n);
+ZZ_pX random_ZZ_pX(long n);
+// generate a random polynomial of degree < n
+
+
+
+/**************************************************************************\
+
+                    Polynomial Evaluation and related problems
+
+\**************************************************************************/
+
+
+void BuildFromRoots(ZZ_pX& x, const vec_ZZ_p& a);
+ZZ_pX BuildFromRoots(const vec_ZZ_p& a);
+// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length()
+
+void eval(ZZ_p& b, const ZZ_pX& f, const ZZ_p& a);
+ZZ_p eval(const ZZ_pX& f, const ZZ_p& a);
+// b = f(a)
+
+void eval(vec_ZZ_p& b, const ZZ_pX& f, const vec_ZZ_p& a);
+vec_ZZ_p eval(const ZZ_pX& f, const vec_ZZ_p& a);
+//  b.SetLength(a.length()).  b[i] = f(a[i]) for 0 <= i < a.length()
+
+void interpolate(ZZ_pX& f, const vec_ZZ_p& a, const vec_ZZ_p& b);
+ZZ_pX interpolate(const vec_ZZ_p& a, const vec_ZZ_p& b);
+// interpolates the polynomial f satisfying f(a[i]) = b[i].  p should
+// be prime.
+
+/**************************************************************************\
+
+                       Arithmetic mod X^n
+
+All routines require n >= 0, otherwise an error is raised.
+
+\**************************************************************************/
+
+void trunc(ZZ_pX& x, const ZZ_pX& a, long n); // x = a % X^n
+ZZ_pX trunc(const ZZ_pX& a, long n);
+
+void MulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n);
+ZZ_pX MulTrunc(const ZZ_pX& a, const ZZ_pX& b, long n);
+// x = a * b % X^n
+
+void SqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n);
+ZZ_pX SqrTrunc(const ZZ_pX& a, long n);
+// x = a^2 % X^n
+
+void InvTrunc(ZZ_pX& x, const ZZ_pX& a, long n);
+ZZ_pX InvTrunc(const ZZ_pX& a, long n);
+// computes x = a^{-1} % X^m.  Must have ConstTerm(a) invertible.
+
+/**************************************************************************\
+
+                Modular Arithmetic (without pre-conditioning)
+
+Arithmetic mod f.
+
+All inputs and outputs are polynomials of degree less than deg(f), and
+deg(f) > 0.
+
+NOTE: if you want to do many computations with a fixed f, use the
+ZZ_pXModulus data structure and associated routines below for better
+performance.
+
+\**************************************************************************/
+
+void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, const ZZ_pX& f);
+ZZ_pX MulMod(const ZZ_pX& a, const ZZ_pX& b, const ZZ_pX& f);
+// x = (a * b) % f
+
+void SqrMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f);
+ZZ_pX SqrMod(const ZZ_pX& a, const ZZ_pX& f);
+// x = a^2 % f
+
+void MulByXMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f);
+ZZ_pX MulByXMod(const ZZ_pX& a, const ZZ_pX& f);
+// x = (a * X) mod f
+// NOTE: thread boosting enabled only if x does not alias a
+
+void InvMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f);
+ZZ_pX InvMod(const ZZ_pX& a, const ZZ_pX& f);
+// x = a^{-1} % f, error is a is not invertible
+
+long InvModStatus(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f);
+// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise,
+// returns 1 and sets x = (a, f)
+
+
+// for modular exponentiation, see below
+
+
+
+/**************************************************************************\
+
+                     Modular Arithmetic with Pre-Conditioning
+
+If you need to do a lot of arithmetic modulo a fixed f, build a
+ZZ_pXModulus F for f.  This pre-computes information about f that
+speeds up subsequent computations.
+
+It is required that deg(f) > 0 and that LeadCoeff(f) is invertible.
+
+As an example, the following routine computes the product modulo f of a vector
+of polynomials.
+
+#include <NTL/ZZ_pX.h>
+
+void product(ZZ_pX& x, const vec_ZZ_pX& v, const ZZ_pX& f)
+{
+   ZZ_pXModulus F(f);
+   ZZ_pX res;
+   res = 1;
+   long i;
+   for (i = 0; i < v.length(); i++)
+      MulMod(res, res, v[i], F);
+   x = res;
+}
+
+Note that automatic conversions are provided so that a ZZ_pX can
+be used wherever a ZZ_pXModulus is required, and a ZZ_pXModulus
+can be used wherever a ZZ_pX is required.
+
+\**************************************************************************/
+
+
+class ZZ_pXModulus {
+public:
+   ZZ_pXModulus(); // initially in an unusable state
+
+   ZZ_pXModulus(const ZZ_pXModulus&);  // copy
+
+   ZZ_pXModulus& operator=(const ZZ_pXModulus&); // assignment
+
+   ~ZZ_pXModulus();
+
+   ZZ_pXModulus(const ZZ_pX& f); // initialize with f, deg(f) > 0
+
+   operator const ZZ_pX& () const;
+   // read-only access to f, implicit conversion operator
+
+   const ZZ_pX& val() const;
+   // read-only access to f, explicit notation
+
+};
+
+void build(ZZ_pXModulus& F, const ZZ_pX& f);
+// pre-computes information about f and stores it in F.
+// Note that the declaration ZZ_pXModulus F(f) is equivalent to
+// ZZ_pXModulus F; build(F, f).
+
+// In the following, f refers to the polynomial f supplied to the
+// build routine, and n = deg(f).
+
+long deg(const ZZ_pXModulus& F);  // return n=deg(f)
+
+void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, const ZZ_pXModulus& F);
+ZZ_pX MulMod(const ZZ_pX& a, const ZZ_pX& b, const ZZ_pXModulus& F);
+// x = (a * b) % f; deg(a), deg(b) < n
+
+void SqrMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F);
+ZZ_pX SqrMod(const ZZ_pX& a, const ZZ_pXModulus& F);
+// x = a^2 % f; deg(a) < n
+
+void PowerMod(ZZ_pX& x, const ZZ_pX& a, const ZZ& e, const ZZ_pXModulus& F);
+ZZ_pX PowerMod(const ZZ_pX& a, const ZZ& e, const ZZ_pXModulus& F);
+
+void PowerMod(ZZ_pX& x, const ZZ_pX& a, long e, const ZZ_pXModulus& F);
+ZZ_pX PowerMod(const ZZ_pX& a, long e, const ZZ_pXModulus& F);
+
+// x = a^e % f; deg(a) < n (e may be negative)
+
+void PowerXMod(ZZ_pX& x, const ZZ& e, const ZZ_pXModulus& F);
+ZZ_pX PowerXMod(const ZZ& e, const ZZ_pXModulus& F);
+
+void PowerXMod(ZZ_pX& x, long e, const ZZ_pXModulus& F);
+ZZ_pX PowerXMod(long e, const ZZ_pXModulus& F);
+
+// x = X^e % f (e may be negative)
+
+void PowerXPlusAMod(ZZ_pX& x, const ZZ_p& a, const ZZ& e,
+                    const ZZ_pXModulus& F);
+
+ZZ_pX PowerXPlusAMod(const ZZ_p& a, const ZZ& e,
+                           const ZZ_pXModulus& F);
+
+void PowerXPlusAMod(ZZ_pX& x, const ZZ_p& a, long e,
+                    const ZZ_pXModulus& F);
+
+ZZ_pX PowerXPlusAMod(const ZZ_p& a, long e,
+                           const ZZ_pXModulus& F);
+
+// x = (X + a)^e % f (e may be negative)
+
+
+void rem(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F);
+// x = a % f
+
+void DivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pXModulus& F);
+// q = a/f, r = a%f
+
+void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_pXModulus& F);
+// q = a/f
+
+// operator notation:
+
+ZZ_pX operator/(const ZZ_pX& a, const ZZ_pXModulus& F);
+ZZ_pX operator%(const ZZ_pX& a, const ZZ_pXModulus& F);
+
+ZZ_pX& operator/=(ZZ_pX& x, const ZZ_pXModulus& F);
+ZZ_pX& operator%=(ZZ_pX& x, const ZZ_pXModulus& F);
+
+
+
+/**************************************************************************\
+
+
+                                More Pre-Conditioning
+
+If you need to compute a * b % f for a fixed b, but for many a's, it
+is much more efficient to first build a ZZ_pXMultiplier B for b, and
+then use the MulMod routine below.
+
+Here is an example that multiplies each element of a vector by a fixed
+polynomial modulo f.
+
+#include <NTL/ZZ_pX.h>
+
+void mul(vec_ZZ_pX& v, const ZZ_pX& b, const ZZ_pX& f)
+{
+   ZZ_pXModulus F(f);
+   ZZ_pXMultiplier B(b, F);
+   long i;
+   for (i = 0; i < v.length(); i++)
+      MulMod(v[i], v[i], B, F);
+}
+
+\**************************************************************************/
+
+
+class ZZ_pXMultiplier {
+public:
+   ZZ_pXMultiplier(); // initially zero
+
+   ZZ_pXMultiplier(const ZZ_pX& b, const ZZ_pXModulus& F);
+      // initializes with b mod F, where deg(b) < deg(F)
+
+   ZZ_pXMultiplier(const ZZ_pXMultiplier&);  // copy
+
+   ZZ_pXMultiplier& operator=(const ZZ_pXMultiplier&);  // assignment
+
+   ~ZZ_pXMultiplier();
+
+   const ZZ_pX& val() const; // read-only access to b
+
+};
+
+
+void build(ZZ_pXMultiplier& B, const ZZ_pX& b, const ZZ_pXModulus& F);
+// pre-computes information about b and stores it in B; deg(b) <
+// deg(F)
+
+void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXMultiplier& B,
+                                      const ZZ_pXModulus& F);
+
+// x = (a * b) % F; deg(a) < deg(F)
+
+/**************************************************************************\
+
+                             vectors of ZZ_pX's
+
+\**************************************************************************/
+
+typedef Vec<ZZ_pX> vec_ZZ_pX; // backward compatibility
+
+
+/**************************************************************************\
+
+                              Modular Composition
+
+Modular composition is the problem of computing g(h) mod f for
+polynomials f, g, and h.
+
+The algorithm employed is that of Brent & Kung (Fast algorithms for
+manipulating formal power series, JACM 25:581-595, 1978), which uses
+O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar
+operations.
+
+
+\**************************************************************************/
+
+void CompMod(ZZ_pX& x, const ZZ_pX& g, const ZZ_pX& h, const ZZ_pXModulus& F);
+ZZ_pX CompMod(const ZZ_pX& g, const ZZ_pX& h,
+                    const ZZ_pXModulus& F);
+
+// x = g(h) mod f; deg(h) < n
+
+void Comp2Mod(ZZ_pX& x1, ZZ_pX& x2, const ZZ_pX& g1, const ZZ_pX& g2,
+              const ZZ_pX& h, const ZZ_pXModulus& F);
+// xi = gi(h) mod f (i=1,2); deg(h) < n.
+
+void Comp3Mod(ZZ_pX& x1, ZZ_pX& x2, ZZ_pX& x3,
+              const ZZ_pX& g1, const ZZ_pX& g2, const ZZ_pX& g3,
+              const ZZ_pX& h, const ZZ_pXModulus& F);
+// xi = gi(h) mod f (i=1..3); deg(h) < n.
+
+
+/**************************************************************************\
+
+                     Composition with Pre-Conditioning
+
+If a single h is going to be used with many g's then you should build
+a ZZ_pXArgument for h, and then use the compose routine below.  The
+routine build computes and stores h, h^2, ..., h^m mod f.  After this
+pre-computation, composing a polynomial of degree roughly n with h
+takes n/m multiplies mod f, plus n^2 scalar multiplies.  Thus,
+increasing m increases the space requirement and the pre-computation
+time, but reduces the composition time.
+
+\**************************************************************************/
+
+
+struct ZZ_pXArgument {
+   vec_ZZ_pX H;
+};
+
+void build(ZZ_pXArgument& H, const ZZ_pX& h, const ZZ_pXModulus& F, long m);
+// Pre-Computes information about h.  m > 0, deg(h) < n.
+
+void CompMod(ZZ_pX& x, const ZZ_pX& g, const ZZ_pXArgument& H,
+             const ZZ_pXModulus& F);
+
+ZZ_pX CompMod(const ZZ_pX& g, const ZZ_pXArgument& H,
+                    const ZZ_pXModulus& F);
+
+extern long ZZ_pXArgBound;
+
+// Initially 0.  If this is set to a value greater than zero, then
+// composition routines will allocate a table of no than about
+// ZZ_pXArgBound KB.  Setting this value affects all compose routines
+// and the power projection and minimal polynomial routines below,
+// and indirectly affects many routines in ZZ_pXFactoring.
+
+/**************************************************************************\
+
+                     power projection routines
+
+\**************************************************************************/
+
+void project(ZZ_p& x, const ZZ_pVector& a, const ZZ_pX& b);
+ZZ_p project(const ZZ_pVector& a, const ZZ_pX& b);
+// x = inner product of a with coefficient vector of b
+
+
+void ProjectPowers(vec_ZZ_p& x, const vec_ZZ_p& a, long k,
+                   const ZZ_pX& h, const ZZ_pXModulus& F);
+
+vec_ZZ_p ProjectPowers(const vec_ZZ_p& a, long k,
+                   const ZZ_pX& h, const ZZ_pXModulus& F);
+
+// Computes the vector
+
+//    project(a, 1), project(a, h), ..., project(a, h^{k-1} % f).  
+
+// This operation is the "transpose" of the modular composition operation.
+
+void ProjectPowers(vec_ZZ_p& x, const vec_ZZ_p& a, long k,
+                   const ZZ_pXArgument& H, const ZZ_pXModulus& F);
+
+vec_ZZ_p ProjectPowers(const vec_ZZ_p& a, long k,
+                   const ZZ_pXArgument& H, const ZZ_pXModulus& F);
+
+// same as above, but uses a pre-computed ZZ_pXArgument
+
+
+void UpdateMap(vec_ZZ_p& x, const vec_ZZ_p& a,
+               const ZZ_pXMultiplier& B, const ZZ_pXModulus& F);
+
+vec_ZZ_p UpdateMap(const vec_ZZ_p& a,
+               const ZZ_pXMultiplier& B, const ZZ_pXModulus& F);
+
+// Computes the vector
+
+//    project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f)
+
+// Restriction: must have a.length() <= deg(F).
+// This is "transposed" MulMod by B.
+// Input may have "high order" zeroes stripped.
+// Output will always have high order zeroes stripped.
+
+
+/**************************************************************************\
+
+                              Minimum Polynomials
+
+These routines should be used with prime p.
+
+All of these routines implement the algorithm from [Shoup, J. Symbolic
+Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397,
+1995], based on transposed modular composition and the
+Berlekamp/Massey algorithm.
+
+\**************************************************************************/
+
+
+void MinPolySeq(ZZ_pX& h, const vec_ZZ_p& a, long m);
+ZZ_pX MinPolySeq(const vec_ZZ_p& a, long m);
+// computes the minimum polynomial of a linealy generated sequence; m
+// is a bound on the degree of the polynomial; required: a.length() >=
+// 2*m
+
+void ProbMinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m);
+ZZ_pX ProbMinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m);
+
+void ProbMinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F);
+ZZ_pX ProbMinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F);
+
+// computes the monic minimal polynomial if (g mod f).  m = a bound on
+// the degree of the minimal polynomial; in the second version, this
+// argument defaults to n.  The algorithm is probabilistic, always
+// returns a divisor of the minimal polynomial, and returns a proper
+// divisor with probability at most m/p.
+
+void MinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m);
+ZZ_pX MinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m);
+
+void MinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F);
+ZZ_pX MinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F);
+
+// same as above, but guarantees that result is correct
+
+void IrredPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m);
+ZZ_pX IrredPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m);
+
+void IrredPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F);
+ZZ_pX IrredPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F);
+
+// same as above, but assumes that f is irreducible, or at least that
+// the minimal poly of g is itself irreducible.  The algorithm is
+// deterministic (and is always correct).
+
+
+/**************************************************************************\
+
+                   Traces, norms, resultants
+
+These routines should be used with prime p.
+
+\**************************************************************************/
+
+
+void TraceMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pXModulus& F);
+ZZ_p TraceMod(const ZZ_pX& a, const ZZ_pXModulus& F);
+
+void TraceMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& f);
+ZZ_p TraceMod(const ZZ_pX& a, const ZZ_pXModulus& f);
+// x = Trace(a mod f); deg(a) < deg(f)
+
+
+void TraceVec(vec_ZZ_p& S, const ZZ_pX& f);
+vec_ZZ_p TraceVec(const ZZ_pX& f);
+// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f)
+
+// The above trace routines implement the asymptotically fast trace
+// algorithm from [von zur Gathen and Shoup, Computational Complexity,
+// 1992].
+
+void NormMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& f);
+ZZ_p NormMod(const ZZ_pX& a, const ZZ_pX& f);
+// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f)
+
+void resultant(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& b);
+ZZ_p resultant(const ZZ_pX& a, const ZZ_pX& b);
+// x = resultant(a, b)
+
+void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& f);
+ZZ_pX CharPolyMod(const ZZ_pX& a, const ZZ_pX& f);
+// g = charcteristic polynomial of (a mod f); 0 < deg(f), deg(g) <
+// deg(f);  this routine works for arbitrary f;  if f is irreducible,
+// it is faster to use the IrredPolyMod routine, and then exponentiate
+// if necessary (since in this case the CharPoly is just a power of
+// the IrredPoly).
+
+
+/**************************************************************************\
+
+                           Miscellany
+
+
+\**************************************************************************/
+
+
+void clear(ZZ_pX& x) // x = 0
+void set(ZZ_pX& x); // x = 1
+
+void ZZ_pX::kill();
+// f.kill() sets f to 0 and frees all memory held by f; Equivalent to
+// f.rep.kill().
+
+ZZ_pX::ZZ_pX(INIT_SIZE_TYPE, long n);
+// ZZ_pX(INIT_SIZE, n) initializes to zero, but space is pre-allocated
+// for n coefficients
+
+static const ZZ_pX& ZZ_pX::zero();
+// ZZ_pX::zero() is a read-only reference to 0
+
+void swap(ZZ_pX& x, ZZ_pX& y);
+// swap x and y (via "pointer swapping")
+
+void ZZ_pX::swap(ZZ_pX& x);
+// swap member function
+
+
+ZZ_pX::ZZ_pX(long i, const ZZ_p& c);  
+ZZ_pX::ZZ_pX(long i, long c);
+// initialize to c*X^i, provided for backward compatibility
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZ_pX.txt b/thirdparty/linux/ntl/doc/ZZ_pX.txt new file mode 100644 index 0000000000..b6614478f5 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pX.txt @@ -0,0 +1,897 @@ + +/**************************************************************************\ + +MODULE: ZZ_pX + +SUMMARY: + +The class ZZ_pX implements polynomial arithmetic modulo p. + +Polynomial arithmetic is implemented using the FFT, combined with the +Chinese Remainder Theorem. A more detailed description of the +techniques used here can be found in [Shoup, J. Symbolic +Comp. 20:363-397, 1995]. + +Small degree polynomials are multiplied either with classical +or Karatsuba algorithms. + +\**************************************************************************/ + +#include +#include + +class ZZ_pX { +public: + + ZZ_pX(); // initialize to 0 + + ZZ_pX(const ZZ_pX& a); // copy constructor + explicit ZZ_pX(const ZZ_p& a); // promotion + explicit ZZ_pX(long a); // promotion + + ZZ_pX& operator=(const ZZ_pX& a); // assignment + ZZ_pX& operator=(const ZZ_p& a); // assignment + ZZ_pX& operator=(const long a); // assignment + + ~ZZ_pX(); // destructor + + ZZ_pX(INIT_MONO_TYPE, long i, const ZZ_p& c); + ZZ_pX(INIT_MONO_TYPE, long i, long c); + // initialize to c*X^i, invoke as ZZ_pX(INIT_MONO, i, c) + + ZZ_pX(INIT_MONO_TYPE, long i, long c); + // initialize to X^i, invoke as ZZ_pX(INIT_MONO, i) + + + // typedefs to aid in generic programming + typedef zz_p coeff_type; + typedef zz_pE residue_type; + typedef zz_pXModulus modulus_type; + typedef zz_pXMultiplier multiplier_type; + typedef fftRep fft_type; + + + // ... + + +}; + + + + + +/**************************************************************************\ + + Accessing coefficients + +The degree of a polynomial f is obtained as deg(f), +where the zero polynomial, by definition, has degree -1. + +A polynomial f is represented as a coefficient vector. +Coefficients may be accesses in one of two ways. + +The safe, high-level method is to call the function +coeff(f, i) to get the coefficient of X^i in the polynomial f, +and to call the function SetCoeff(f, i, a) to set the coefficient +of X^i in f to the scalar a. + +One can also access the coefficients more directly via a lower level +interface. The coefficient of X^i in f may be accessed using +subscript notation f[i]. In addition, one may write f.SetLength(n) +to set the length of the underlying coefficient vector to n, +and f.SetMaxLength(n) to allocate space for n coefficients, +without changing the coefficient vector itself. + +After setting coefficients using this low-level interface, +one must ensure that leading zeros in the coefficient vector +are stripped afterwards by calling the function f.normalize(). + +NOTE: the coefficient vector of f may also be accessed directly +as f.rep; however, this is not recommended. Also, for a properly +normalized polynomial f, we have f.rep.length() == deg(f)+1, +and deg(f) >= 0 => f.rep[deg(f)] != 0. + +\**************************************************************************/ + + + +long deg(const ZZ_pX& a); // return deg(a); deg(0) == -1. + +const ZZ_p& coeff(const ZZ_pX& a, long i); +// returns the coefficient of X^i, or zero if i not in range + +const ZZ_p& LeadCoeff(const ZZ_pX& a); +// returns leading term of a, or zero if a == 0 + +const ZZ_p& ConstTerm(const ZZ_pX& a); +// returns constant term of a, or zero if a == 0 + +void SetCoeff(ZZ_pX& x, long i, const ZZ_p& a); +void SetCoeff(ZZ_pX& x, long i, long a); +// makes coefficient of X^i equal to a; error is raised if i < 0 + +void SetCoeff(ZZ_pX& x, long i); +// makes coefficient of X^i equal to 1; error is raised if i < 0 + +void SetX(ZZ_pX& x); // x is set to the monomial X + +long IsX(const ZZ_pX& a); // test if x = X + + + + +ZZ_p& ZZ_pX::operator[](long i); +const ZZ_p& ZZ_pX::operator[](long i) const; +// indexing operators: f[i] is the coefficient of X^i --- +// i should satsify i >= 0 and i <= deg(f). +// No range checking (unless NTL_RANGE_CHECK is defined). + + +void ZZ_pX::SetLength(long n); +// f.SetLength(n) sets the length of the inderlying coefficient +// vector to n --- after this call, indexing f[i] for i = 0..n-1 +// is valid. + +void ZZ_pX::normalize(); +// f.normalize() strips leading zeros from coefficient vector of f + +void ZZ_pX::SetMaxLength(long n); +// f.SetMaxLength(n) pre-allocate spaces for n coefficients. The +// polynomial that f represents is unchanged. + + + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(const ZZ_pX& a, const ZZ_pX& b); +long operator!=(const ZZ_pX& a, const ZZ_pX& b); + +// PROMOTIONS: operators ==, != promote {long, ZZ_p} to ZZ_pX on (a, b). + +long IsZero(const ZZ_pX& a); // test for 0 +long IsOne(const ZZ_pX& a); // test for 1 + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + + +// operator notation: + +ZZ_pX operator+(const ZZ_pX& a, const ZZ_pX& b); +ZZ_pX operator-(const ZZ_pX& a, const ZZ_pX& b); + +ZZ_pX operator-(const ZZ_pX& a); // unary - + +ZZ_pX& operator+=(ZZ_pX& x, const ZZ_pX& a); +ZZ_pX& operator+=(ZZ_pX& x, const ZZ_p& a); +ZZ_pX& operator+=(ZZ_pX& x, long a); + +ZZ_pX& operator-=(ZZ_pX& x, const ZZ_pX& a); +ZZ_pX& operator-=(ZZ_pX& x, const ZZ_p& a); +ZZ_pX& operator-=(ZZ_pX& x, long a); + +ZZ_pX& operator++(ZZ_pX& x); // prefix +void operator++(ZZ_pX& x, int); // postfix + +ZZ_pX& operator--(ZZ_pX& x); // prefix +void operator--(ZZ_pX& x, int); // postfix + +// procedural versions: + + +void add(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); // x = a + b +void sub(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); // x = a - b +void negate(ZZ_pX& x, const ZZ_pX& a); // x = -a + + +// PROMOTIONS: binary +, - and procedures add, sub promote +// {long, ZZ_p} to ZZ_pX on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +ZZ_pX operator*(const ZZ_pX& a, const ZZ_pX& b); + +ZZ_pX& operator*=(ZZ_pX& x, const ZZ_pX& a); +ZZ_pX& operator*=(ZZ_pX& x, const ZZ_p& a); +ZZ_pX& operator*=(ZZ_pX& x, long a); + +// procedural versions: + +void mul(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); // x = a * b + +void sqr(ZZ_pX& x, const ZZ_pX& a); // x = a^2 +ZZ_pX sqr(const ZZ_pX& a); + +// PROMOTIONS: operator * and procedure mul promote {long, ZZ_p} to ZZ_pX +// on (a, b). + +void power(ZZ_pX& x, const ZZ_pX& a, long e); // x = a^e (e >= 0) +ZZ_pX power(const ZZ_pX& a, long e); + + +/**************************************************************************\ + + Shift Operations + +LeftShift by n means multiplication by X^n +RightShift by n means division by X^n + +A negative shift amount reverses the direction of the shift. + +\**************************************************************************/ + +// operator notation: + +ZZ_pX operator<<(const ZZ_pX& a, long n); +ZZ_pX operator>>(const ZZ_pX& a, long n); + +ZZ_pX& operator<<=(ZZ_pX& x, long n); +ZZ_pX& operator>>=(ZZ_pX& x, long n); + +// procedural versions: + +void LeftShift(ZZ_pX& x, const ZZ_pX& a, long n); +ZZ_pX LeftShift(const ZZ_pX& a, long n); + +void RightShift(ZZ_pX& x, const ZZ_pX& a, long n); +ZZ_pX RightShift(const ZZ_pX& a, long n); + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + +// operator notation: + +ZZ_pX operator/(const ZZ_pX& a, const ZZ_pX& b); +ZZ_pX operator/(const ZZ_pX& a, const ZZ_p& b); +ZZ_pX operator/(const ZZ_pX& a, long b); + + +ZZ_pX& operator/=(ZZ_pX& x, const ZZ_pX& b); +ZZ_pX& operator/=(ZZ_pX& x, const ZZ_p& b); +ZZ_pX& operator/=(ZZ_pX& x, long b); + +ZZ_pX operator%(const ZZ_pX& a, const ZZ_pX& b); + +ZZ_pX& operator%=(ZZ_pX& x, const ZZ_pX& b); + + +// procedural versions: + + +void DivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b); +// q = a/b, r = a%b + +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b); +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_p& b); +void div(ZZ_pX& q, const ZZ_pX& a, long b); +// q = a/b + +void rem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b); +// r = a%b + +long divide(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const ZZ_pX& a, const ZZ_pX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + + +/**************************************************************************\ + + GCD's + +These routines are intended for use when p is prime. + +\**************************************************************************/ + + +void GCD(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); +ZZ_pX GCD(const ZZ_pX& a, const ZZ_pX& b); +// x = GCD(a, b), x is always monic (or zero if a==b==0). + + +void XGCD(ZZ_pX& d, ZZ_pX& s, ZZ_pX& t, const ZZ_pX& a, const ZZ_pX& b); +// d = gcd(a,b), a s + b t = d + + +// NOTE: A classical algorithm is used, switching over to a +// "half-GCD" algorithm for large degree + + +/**************************************************************************\ + + Input/Output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be integers between 0 and p-1, and +a_n not zero (the zero polynomial is [ ]). On input, the coefficients +are arbitrary integers which are reduced modulo p, and leading zeros +stripped. + +\**************************************************************************/ + +istream& operator>>(istream& s, ZZ_pX& x); +ostream& operator<<(ostream& s, const ZZ_pX& a); + + +/**************************************************************************\ + + Some utility routines + +\**************************************************************************/ + +void diff(ZZ_pX& x, const ZZ_pX& a); // x = derivative of a +ZZ_pX diff(const ZZ_pX& a); + +void MakeMonic(ZZ_pX& x); +// if x != 0 makes x into its monic associate; LeadCoeff(x) must be +// invertible in this case. + +void reverse(ZZ_pX& x, const ZZ_pX& a, long hi); +ZZ_pX reverse(const ZZ_pX& a, long hi); + +void reverse(ZZ_pX& x, const ZZ_pX& a); +ZZ_pX reverse(const ZZ_pX& a); + +// x = reverse of a[0]..a[hi] (hi >= -1); +// hi defaults to deg(a) in second version + +void VectorCopy(vec_ZZ_p& x, const ZZ_pX& a, long n); +vec_ZZ_p VectorCopy(const ZZ_pX& a, long n); +// x = copy of coefficient vector of a of length exactly n. +// input is truncated or padded with zeroes as appropriate. + + + + +/**************************************************************************\ + + Random Polynomials + +\**************************************************************************/ + +void random(ZZ_pX& x, long n); +ZZ_pX random_ZZ_pX(long n); +// generate a random polynomial of degree < n + + + +/**************************************************************************\ + + Polynomial Evaluation and related problems + +\**************************************************************************/ + + +void BuildFromRoots(ZZ_pX& x, const vec_ZZ_p& a); +ZZ_pX BuildFromRoots(const vec_ZZ_p& a); +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + +void eval(ZZ_p& b, const ZZ_pX& f, const ZZ_p& a); +ZZ_p eval(const ZZ_pX& f, const ZZ_p& a); +// b = f(a) + +void eval(vec_ZZ_p& b, const ZZ_pX& f, const vec_ZZ_p& a); +vec_ZZ_p eval(const ZZ_pX& f, const vec_ZZ_p& a); +// b.SetLength(a.length()). b[i] = f(a[i]) for 0 <= i < a.length() + +void interpolate(ZZ_pX& f, const vec_ZZ_p& a, const vec_ZZ_p& b); +ZZ_pX interpolate(const vec_ZZ_p& a, const vec_ZZ_p& b); +// interpolates the polynomial f satisfying f(a[i]) = b[i]. p should +// be prime. + +/**************************************************************************\ + + Arithmetic mod X^n + +All routines require n >= 0, otherwise an error is raised. + +\**************************************************************************/ + +void trunc(ZZ_pX& x, const ZZ_pX& a, long n); // x = a % X^n +ZZ_pX trunc(const ZZ_pX& a, long n); + +void MulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n); +ZZ_pX MulTrunc(const ZZ_pX& a, const ZZ_pX& b, long n); +// x = a * b % X^n + +void SqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n); +ZZ_pX SqrTrunc(const ZZ_pX& a, long n); +// x = a^2 % X^n + +void InvTrunc(ZZ_pX& x, const ZZ_pX& a, long n); +ZZ_pX InvTrunc(const ZZ_pX& a, long n); +// computes x = a^{-1} % X^m. Must have ConstTerm(a) invertible. + +/**************************************************************************\ + + Modular Arithmetic (without pre-conditioning) + +Arithmetic mod f. + +All inputs and outputs are polynomials of degree less than deg(f), and +deg(f) > 0. + +NOTE: if you want to do many computations with a fixed f, use the +ZZ_pXModulus data structure and associated routines below for better +performance. + +\**************************************************************************/ + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, const ZZ_pX& f); +ZZ_pX MulMod(const ZZ_pX& a, const ZZ_pX& b, const ZZ_pX& f); +// x = (a * b) % f + +void SqrMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f); +ZZ_pX SqrMod(const ZZ_pX& a, const ZZ_pX& f); +// x = a^2 % f + +void MulByXMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f); +ZZ_pX MulByXMod(const ZZ_pX& a, const ZZ_pX& f); +// x = (a * X) mod f +// NOTE: thread boosting enabled only if x does not alias a + +void InvMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f); +ZZ_pX InvMod(const ZZ_pX& a, const ZZ_pX& f); +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise, +// returns 1 and sets x = (a, f) + + +// for modular exponentiation, see below + + + +/**************************************************************************\ + + Modular Arithmetic with Pre-Conditioning + +If you need to do a lot of arithmetic modulo a fixed f, build a +ZZ_pXModulus F for f. This pre-computes information about f that +speeds up subsequent computations. + +It is required that deg(f) > 0 and that LeadCoeff(f) is invertible. + +As an example, the following routine computes the product modulo f of a vector +of polynomials. + +#include + +void product(ZZ_pX& x, const vec_ZZ_pX& v, const ZZ_pX& f) +{ + ZZ_pXModulus F(f); + ZZ_pX res; + res = 1; + long i; + for (i = 0; i < v.length(); i++) + MulMod(res, res, v[i], F); + x = res; +} + +Note that automatic conversions are provided so that a ZZ_pX can +be used wherever a ZZ_pXModulus is required, and a ZZ_pXModulus +can be used wherever a ZZ_pX is required. + +\**************************************************************************/ + + +class ZZ_pXModulus { +public: + ZZ_pXModulus(); // initially in an unusable state + + ZZ_pXModulus(const ZZ_pXModulus&); // copy + + ZZ_pXModulus& operator=(const ZZ_pXModulus&); // assignment + + ~ZZ_pXModulus(); + + ZZ_pXModulus(const ZZ_pX& f); // initialize with f, deg(f) > 0 + + operator const ZZ_pX& () const; + // read-only access to f, implicit conversion operator + + const ZZ_pX& val() const; + // read-only access to f, explicit notation + +}; + +void build(ZZ_pXModulus& F, const ZZ_pX& f); +// pre-computes information about f and stores it in F. +// Note that the declaration ZZ_pXModulus F(f) is equivalent to +// ZZ_pXModulus F; build(F, f). + +// In the following, f refers to the polynomial f supplied to the +// build routine, and n = deg(f). + +long deg(const ZZ_pXModulus& F); // return n=deg(f) + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, const ZZ_pXModulus& F); +ZZ_pX MulMod(const ZZ_pX& a, const ZZ_pX& b, const ZZ_pXModulus& F); +// x = (a * b) % f; deg(a), deg(b) < n + +void SqrMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F); +ZZ_pX SqrMod(const ZZ_pX& a, const ZZ_pXModulus& F); +// x = a^2 % f; deg(a) < n + +void PowerMod(ZZ_pX& x, const ZZ_pX& a, const ZZ& e, const ZZ_pXModulus& F); +ZZ_pX PowerMod(const ZZ_pX& a, const ZZ& e, const ZZ_pXModulus& F); + +void PowerMod(ZZ_pX& x, const ZZ_pX& a, long e, const ZZ_pXModulus& F); +ZZ_pX PowerMod(const ZZ_pX& a, long e, const ZZ_pXModulus& F); + +// x = a^e % f; deg(a) < n (e may be negative) + +void PowerXMod(ZZ_pX& x, const ZZ& e, const ZZ_pXModulus& F); +ZZ_pX PowerXMod(const ZZ& e, const ZZ_pXModulus& F); + +void PowerXMod(ZZ_pX& x, long e, const ZZ_pXModulus& F); +ZZ_pX PowerXMod(long e, const ZZ_pXModulus& F); + +// x = X^e % f (e may be negative) + +void PowerXPlusAMod(ZZ_pX& x, const ZZ_p& a, const ZZ& e, + const ZZ_pXModulus& F); + +ZZ_pX PowerXPlusAMod(const ZZ_p& a, const ZZ& e, + const ZZ_pXModulus& F); + +void PowerXPlusAMod(ZZ_pX& x, const ZZ_p& a, long e, + const ZZ_pXModulus& F); + +ZZ_pX PowerXPlusAMod(const ZZ_p& a, long e, + const ZZ_pXModulus& F); + +// x = (X + a)^e % f (e may be negative) + + +void rem(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F); +// x = a % f + +void DivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pXModulus& F); +// q = a/f, r = a%f + +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_pXModulus& F); +// q = a/f + +// operator notation: + +ZZ_pX operator/(const ZZ_pX& a, const ZZ_pXModulus& F); +ZZ_pX operator%(const ZZ_pX& a, const ZZ_pXModulus& F); + +ZZ_pX& operator/=(ZZ_pX& x, const ZZ_pXModulus& F); +ZZ_pX& operator%=(ZZ_pX& x, const ZZ_pXModulus& F); + + + +/**************************************************************************\ + + + More Pre-Conditioning + +If you need to compute a * b % f for a fixed b, but for many a's, it +is much more efficient to first build a ZZ_pXMultiplier B for b, and +then use the MulMod routine below. + +Here is an example that multiplies each element of a vector by a fixed +polynomial modulo f. + +#include + +void mul(vec_ZZ_pX& v, const ZZ_pX& b, const ZZ_pX& f) +{ + ZZ_pXModulus F(f); + ZZ_pXMultiplier B(b, F); + long i; + for (i = 0; i < v.length(); i++) + MulMod(v[i], v[i], B, F); +} + +\**************************************************************************/ + + +class ZZ_pXMultiplier { +public: + ZZ_pXMultiplier(); // initially zero + + ZZ_pXMultiplier(const ZZ_pX& b, const ZZ_pXModulus& F); + // initializes with b mod F, where deg(b) < deg(F) + + ZZ_pXMultiplier(const ZZ_pXMultiplier&); // copy + + ZZ_pXMultiplier& operator=(const ZZ_pXMultiplier&); // assignment + + ~ZZ_pXMultiplier(); + + const ZZ_pX& val() const; // read-only access to b + +}; + + +void build(ZZ_pXMultiplier& B, const ZZ_pX& b, const ZZ_pXModulus& F); +// pre-computes information about b and stores it in B; deg(b) < +// deg(F) + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXMultiplier& B, + const ZZ_pXModulus& F); + +// x = (a * b) % F; deg(a) < deg(F) + +/**************************************************************************\ + + vectors of ZZ_pX's + +\**************************************************************************/ + +typedef Vec vec_ZZ_pX; // backward compatibility + + +/**************************************************************************\ + + Modular Composition + +Modular composition is the problem of computing g(h) mod f for +polynomials f, g, and h. + +The algorithm employed is that of Brent & Kung (Fast algorithms for +manipulating formal power series, JACM 25:581-595, 1978), which uses +O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar +operations. + + +\**************************************************************************/ + +void CompMod(ZZ_pX& x, const ZZ_pX& g, const ZZ_pX& h, const ZZ_pXModulus& F); +ZZ_pX CompMod(const ZZ_pX& g, const ZZ_pX& h, + const ZZ_pXModulus& F); + +// x = g(h) mod f; deg(h) < n + +void Comp2Mod(ZZ_pX& x1, ZZ_pX& x2, const ZZ_pX& g1, const ZZ_pX& g2, + const ZZ_pX& h, const ZZ_pXModulus& F); +// xi = gi(h) mod f (i=1,2); deg(h) < n. + +void Comp3Mod(ZZ_pX& x1, ZZ_pX& x2, ZZ_pX& x3, + const ZZ_pX& g1, const ZZ_pX& g2, const ZZ_pX& g3, + const ZZ_pX& h, const ZZ_pXModulus& F); +// xi = gi(h) mod f (i=1..3); deg(h) < n. + + +/**************************************************************************\ + + Composition with Pre-Conditioning + +If a single h is going to be used with many g's then you should build +a ZZ_pXArgument for h, and then use the compose routine below. The +routine build computes and stores h, h^2, ..., h^m mod f. After this +pre-computation, composing a polynomial of degree roughly n with h +takes n/m multiplies mod f, plus n^2 scalar multiplies. Thus, +increasing m increases the space requirement and the pre-computation +time, but reduces the composition time. + +\**************************************************************************/ + + +struct ZZ_pXArgument { + vec_ZZ_pX H; +}; + +void build(ZZ_pXArgument& H, const ZZ_pX& h, const ZZ_pXModulus& F, long m); +// Pre-Computes information about h. m > 0, deg(h) < n. + +void CompMod(ZZ_pX& x, const ZZ_pX& g, const ZZ_pXArgument& H, + const ZZ_pXModulus& F); + +ZZ_pX CompMod(const ZZ_pX& g, const ZZ_pXArgument& H, + const ZZ_pXModulus& F); + +extern long ZZ_pXArgBound; + +// Initially 0. If this is set to a value greater than zero, then +// composition routines will allocate a table of no than about +// ZZ_pXArgBound KB. Setting this value affects all compose routines +// and the power projection and minimal polynomial routines below, +// and indirectly affects many routines in ZZ_pXFactoring. + +/**************************************************************************\ + + power projection routines + +\**************************************************************************/ + +void project(ZZ_p& x, const ZZ_pVector& a, const ZZ_pX& b); +ZZ_p project(const ZZ_pVector& a, const ZZ_pX& b); +// x = inner product of a with coefficient vector of b + + +void ProjectPowers(vec_ZZ_p& x, const vec_ZZ_p& a, long k, + const ZZ_pX& h, const ZZ_pXModulus& F); + +vec_ZZ_p ProjectPowers(const vec_ZZ_p& a, long k, + const ZZ_pX& h, const ZZ_pXModulus& F); + +// Computes the vector + +// project(a, 1), project(a, h), ..., project(a, h^{k-1} % f). + +// This operation is the "transpose" of the modular composition operation. + +void ProjectPowers(vec_ZZ_p& x, const vec_ZZ_p& a, long k, + const ZZ_pXArgument& H, const ZZ_pXModulus& F); + +vec_ZZ_p ProjectPowers(const vec_ZZ_p& a, long k, + const ZZ_pXArgument& H, const ZZ_pXModulus& F); + +// same as above, but uses a pre-computed ZZ_pXArgument + + +void UpdateMap(vec_ZZ_p& x, const vec_ZZ_p& a, + const ZZ_pXMultiplier& B, const ZZ_pXModulus& F); + +vec_ZZ_p UpdateMap(const vec_ZZ_p& a, + const ZZ_pXMultiplier& B, const ZZ_pXModulus& F); + +// Computes the vector + +// project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f) + +// Restriction: must have a.length() <= deg(F). +// This is "transposed" MulMod by B. +// Input may have "high order" zeroes stripped. +// Output will always have high order zeroes stripped. + + +/**************************************************************************\ + + Minimum Polynomials + +These routines should be used with prime p. + +All of these routines implement the algorithm from [Shoup, J. Symbolic +Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397, +1995], based on transposed modular composition and the +Berlekamp/Massey algorithm. + +\**************************************************************************/ + + +void MinPolySeq(ZZ_pX& h, const vec_ZZ_p& a, long m); +ZZ_pX MinPolySeq(const vec_ZZ_p& a, long m); +// computes the minimum polynomial of a linealy generated sequence; m +// is a bound on the degree of the polynomial; required: a.length() >= +// 2*m + +void ProbMinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m); +ZZ_pX ProbMinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m); + +void ProbMinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F); +ZZ_pX ProbMinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F); + +// computes the monic minimal polynomial if (g mod f). m = a bound on +// the degree of the minimal polynomial; in the second version, this +// argument defaults to n. The algorithm is probabilistic, always +// returns a divisor of the minimal polynomial, and returns a proper +// divisor with probability at most m/p. + +void MinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m); +ZZ_pX MinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m); + +void MinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F); +ZZ_pX MinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F); + +// same as above, but guarantees that result is correct + +void IrredPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m); +ZZ_pX IrredPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m); + +void IrredPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F); +ZZ_pX IrredPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F); + +// same as above, but assumes that f is irreducible, or at least that +// the minimal poly of g is itself irreducible. The algorithm is +// deterministic (and is always correct). + + +/**************************************************************************\ + + Traces, norms, resultants + +These routines should be used with prime p. + +\**************************************************************************/ + + +void TraceMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pXModulus& F); +ZZ_p TraceMod(const ZZ_pX& a, const ZZ_pXModulus& F); + +void TraceMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& f); +ZZ_p TraceMod(const ZZ_pX& a, const ZZ_pXModulus& f); +// x = Trace(a mod f); deg(a) < deg(f) + + +void TraceVec(vec_ZZ_p& S, const ZZ_pX& f); +vec_ZZ_p TraceVec(const ZZ_pX& f); +// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f) + +// The above trace routines implement the asymptotically fast trace +// algorithm from [von zur Gathen and Shoup, Computational Complexity, +// 1992]. + +void NormMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& f); +ZZ_p NormMod(const ZZ_pX& a, const ZZ_pX& f); +// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f) + +void resultant(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& b); +ZZ_p resultant(const ZZ_pX& a, const ZZ_pX& b); +// x = resultant(a, b) + +void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& f); +ZZ_pX CharPolyMod(const ZZ_pX& a, const ZZ_pX& f); +// g = charcteristic polynomial of (a mod f); 0 < deg(f), deg(g) < +// deg(f); this routine works for arbitrary f; if f is irreducible, +// it is faster to use the IrredPolyMod routine, and then exponentiate +// if necessary (since in this case the CharPoly is just a power of +// the IrredPoly). + + +/**************************************************************************\ + + Miscellany + + +\**************************************************************************/ + + +void clear(ZZ_pX& x) // x = 0 +void set(ZZ_pX& x); // x = 1 + +void ZZ_pX::kill(); +// f.kill() sets f to 0 and frees all memory held by f; Equivalent to +// f.rep.kill(). + +ZZ_pX::ZZ_pX(INIT_SIZE_TYPE, long n); +// ZZ_pX(INIT_SIZE, n) initializes to zero, but space is pre-allocated +// for n coefficients + +static const ZZ_pX& ZZ_pX::zero(); +// ZZ_pX::zero() is a read-only reference to 0 + +void swap(ZZ_pX& x, ZZ_pX& y); +// swap x and y (via "pointer swapping") + +void ZZ_pX::swap(ZZ_pX& x); +// swap member function + + +ZZ_pX::ZZ_pX(long i, const ZZ_p& c); +ZZ_pX::ZZ_pX(long i, long c); +// initialize to c*X^i, provided for backward compatibility diff --git a/thirdparty/linux/ntl/doc/ZZ_pXFactoring.cpp.html b/thirdparty/linux/ntl/doc/ZZ_pXFactoring.cpp.html new file mode 100644 index 0000000000..009139a4ad --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pXFactoring.cpp.html @@ -0,0 +1,208 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/ZZ_pXFactoring.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: ZZ_pXFactoring
+
+SUMMARY:
+
+Routines are provided for factorization of polynomials over ZZ_p, as
+well as routines for related problems such as testing irreducibility
+and constructing irreducible polynomials of given degree.
+
+\**************************************************************************/
+
+#include <NTL/ZZ_pX.h>
+#include <NTL/pair_ZZ_pX_long.h>
+
+void SquareFreeDecomp(vec_pair_ZZ_pX_long& u, const ZZ_pX& f);
+vec_pair_ZZ_pX_long SquareFreeDecomp(const ZZ_pX& f);
+
+// Performs square-free decomposition.  f must be monic.  If f =
+// prod_i g_i^i, then u is set to a lest of pairs (g_i, i).  The list
+// is is increasing order of i, with trivial terms (i.e., g_i = 1)
+// deleted.
+
+
+void FindRoots(vec_ZZ_p& x, const ZZ_pX& f);
+vec_ZZ_p FindRoots(const ZZ_pX& f);
+
+// f is monic, and has deg(f) distinct roots.  returns the list of
+// roots
+
+void FindRoot(ZZ_p& root, const ZZ_pX& f);
+ZZ_p FindRoot(const ZZ_pX& f);
+
+// finds a single root of f.  assumes that f is monic and splits into
+// distinct linear factors
+
+
+void SFBerlekamp(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0);
+vec_ZZ_pX  SFBerlekamp(const ZZ_pX& f, long verbose=0);
+
+// Assumes f is square-free and monic.  returns list of factors of f.
+// Uses "Berlekamp" approach, as described in detail in [Shoup,
+// J. Symbolic Comp. 20:363-397, 1995].
+
+
+void berlekamp(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f,
+               long verbose=0);
+
+vec_pair_ZZ_pX_long berlekamp(const ZZ_pX& f, long verbose=0);
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SFBerlekamp.
+
+
+
+void NewDDF(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, const ZZ_pX& h,
+         long verbose=0);
+
+vec_pair_ZZ_pX_long NewDDF(const ZZ_pX& f, const ZZ_pX& h,
+         long verbose=0);
+
+// This computes a distinct-degree factorization.  The input must be
+// monic and square-free.  factors is set to a list of pairs (g, d),
+// where g is the product of all irreducible factors of f of degree d.
+// Only nontrivial pairs (i.e., g != 1) are included.  The polynomial
+// h is assumed to be equal to X^p mod f.  
+
+// This routine implements the baby step/giant step algorithm
+// of [Kaltofen and Shoup, STOC 1995].
+// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995].
+
+// NOTE: When factoring "large" polynomials,
+// this routine uses external files to store some intermediate
+// results, which are removed if the routine terminates normally.
+// These files are stored in the current directory under names of the
+// form tmp-*.
+// The definition of "large" is controlled by the variable
+
+      extern double ZZ_pXFileThresh
+
+// which can be set by the user.  If the sizes of the tables
+// exceeds ZZ_pXFileThresh KB, external files are used.
+// Initial value is NTL_FILE_THRESH (defined in tools.h).
+
+
+
+
+void EDF(vec_ZZ_pX& factors, const ZZ_pX& f, const ZZ_pX& h,
+         long d, long verbose=0);
+
+vec_ZZ_pX EDF(const ZZ_pX& f, const ZZ_pX& h,
+         long d, long verbose=0);
+
+// Performs equal-degree factorization.  f is monic, square-free, and
+// all irreducible factors have same degree.  h = X^p mod f.  d =
+// degree of irreducible factors of f.  This routine implements the
+// algorithm of [von zur Gathen and Shoup, Computational Complexity
+// 2:187-224, 1992].
+
+void RootEDF(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0);
+vec_ZZ_pX RootEDF(const ZZ_pX& f, long verbose=0);
+
+// EDF for d==1
+
+void SFCanZass(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0);
+vec_ZZ_pX SFCanZass(const ZZ_pX& f, long verbose=0);
+
+// Assumes f is monic and square-free.  returns list of factors of f.
+// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and
+// EDF above.
+
+
+void CanZass(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f,
+             long verbose=0);
+
+vec_pair_ZZ_pX_long CanZass(const ZZ_pX& f, long verbose=0);
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SquareFreeDecomp and SFCanZass.
+
+// NOTE: these routines use modular composition.  The space
+// used for the required tables can be controlled by the variable
+// ZZ_pXArgBound (see ZZ_pX.txt).
+
+
+void mul(ZZ_pX& f, const vec_pair_ZZ_pX_long& v);
+ZZ_pX mul(const vec_pair_ZZ_pX_long& v);
+
+// multiplies polynomials, with multiplicities
+
+
+/**************************************************************************\
+
+                            Irreducible Polynomials
+
+\**************************************************************************/
+
+long ProbIrredTest(const ZZ_pX& f, long iter=1);
+
+// performs a fast, probabilistic irreduciblity test.  The test can
+// err only if f is reducible, and the error probability is bounded by
+// p^{-iter}.  This implements an algorithm from [Shoup, J. Symbolic
+// Comp. 17:371-391, 1994].
+
+long DetIrredTest(const ZZ_pX& f);
+
+// performs a recursive deterministic irreducibility test.  Fast in
+// the worst-case (when input is irreducible).  This implements an
+// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994].
+
+long IterIrredTest(const ZZ_pX& f);
+
+// performs an iterative deterministic irreducibility test, based on
+// DDF.  Fast on average (when f has a small factor).
+
+void BuildIrred(ZZ_pX& f, long n);
+ZZ_pX BuildIrred_ZZ_pX(long n);
+
+// Build a monic irreducible poly of degree n.
+
+void BuildRandomIrred(ZZ_pX& f, const ZZ_pX& g);
+ZZ_pX BuildRandomIrred(const ZZ_pX& g);
+
+// g is a monic irreducible polynomial.  Constructs a random monic
+// irreducible polynomial f of the same degree.
+
+long ComputeDegree(const ZZ_pX& h, const ZZ_pXModulus& F);
+
+// f is assumed to be an "equal degree" polynomial; h = X^p mod f.
+// The common degree of the irreducible factors of f is computed This
+// routine is useful in counting points on elliptic curves
+
+long ProbComputeDegree(const ZZ_pX& h, const ZZ_pXModulus& F);
+
+// Same as above, but uses a slightly faster probabilistic algorithm.
+// The return value may be 0 or may be too big, but for large p
+// (relative to n), this happens with very low probability.
+
+void TraceMap(ZZ_pX& w, const ZZ_pX& a, long d, const ZZ_pXModulus& F,
+              const ZZ_pX& h);
+
+ZZ_pX TraceMap(const ZZ_pX& a, long d, const ZZ_pXModulus& F,
+              const ZZ_pX& h);
+
+// w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0, and h =
+// X^q mod f, q a power of p.  This routine implements an algorithm
+// from [von zur Gathen and Shoup, Computational Complexity 2:187-224,
+// 1992].
+
+void PowerCompose(ZZ_pX& w, const ZZ_pX& h, long d, const ZZ_pXModulus& F);
+
+ZZ_pX PowerCompose(const ZZ_pX& h, long d, const ZZ_pXModulus& F);
+
+// w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q mod f, q
+// a power of p.  This routine implements an algorithm from [von zur
+// Gathen and Shoup, Computational Complexity 2:187-224, 1992]
+
+
+ diff --git a/thirdparty/linux/ntl/doc/ZZ_pXFactoring.txt b/thirdparty/linux/ntl/doc/ZZ_pXFactoring.txt new file mode 100644 index 0000000000..d23d19b551 --- /dev/null +++ b/thirdparty/linux/ntl/doc/ZZ_pXFactoring.txt @@ -0,0 +1,198 @@ + +/**************************************************************************\ + +MODULE: ZZ_pXFactoring + +SUMMARY: + +Routines are provided for factorization of polynomials over ZZ_p, as +well as routines for related problems such as testing irreducibility +and constructing irreducible polynomials of given degree. + +\**************************************************************************/ + +#include +#include + +void SquareFreeDecomp(vec_pair_ZZ_pX_long& u, const ZZ_pX& f); +vec_pair_ZZ_pX_long SquareFreeDecomp(const ZZ_pX& f); + +// Performs square-free decomposition. f must be monic. If f = +// prod_i g_i^i, then u is set to a lest of pairs (g_i, i). The list +// is is increasing order of i, with trivial terms (i.e., g_i = 1) +// deleted. + + +void FindRoots(vec_ZZ_p& x, const ZZ_pX& f); +vec_ZZ_p FindRoots(const ZZ_pX& f); + +// f is monic, and has deg(f) distinct roots. returns the list of +// roots + +void FindRoot(ZZ_p& root, const ZZ_pX& f); +ZZ_p FindRoot(const ZZ_pX& f); + +// finds a single root of f. assumes that f is monic and splits into +// distinct linear factors + + +void SFBerlekamp(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0); +vec_ZZ_pX SFBerlekamp(const ZZ_pX& f, long verbose=0); + +// Assumes f is square-free and monic. returns list of factors of f. +// Uses "Berlekamp" approach, as described in detail in [Shoup, +// J. Symbolic Comp. 20:363-397, 1995]. + + +void berlekamp(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, + long verbose=0); + +vec_pair_ZZ_pX_long berlekamp(const ZZ_pX& f, long verbose=0); + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SFBerlekamp. + + + +void NewDDF(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, const ZZ_pX& h, + long verbose=0); + +vec_pair_ZZ_pX_long NewDDF(const ZZ_pX& f, const ZZ_pX& h, + long verbose=0); + +// This computes a distinct-degree factorization. The input must be +// monic and square-free. factors is set to a list of pairs (g, d), +// where g is the product of all irreducible factors of f of degree d. +// Only nontrivial pairs (i.e., g != 1) are included. The polynomial +// h is assumed to be equal to X^p mod f. + +// This routine implements the baby step/giant step algorithm +// of [Kaltofen and Shoup, STOC 1995]. +// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995]. + +// NOTE: When factoring "large" polynomials, +// this routine uses external files to store some intermediate +// results, which are removed if the routine terminates normally. +// These files are stored in the current directory under names of the +// form tmp-*. +// The definition of "large" is controlled by the variable + + extern double ZZ_pXFileThresh + +// which can be set by the user. If the sizes of the tables +// exceeds ZZ_pXFileThresh KB, external files are used. +// Initial value is NTL_FILE_THRESH (defined in tools.h). + + + + +void EDF(vec_ZZ_pX& factors, const ZZ_pX& f, const ZZ_pX& h, + long d, long verbose=0); + +vec_ZZ_pX EDF(const ZZ_pX& f, const ZZ_pX& h, + long d, long verbose=0); + +// Performs equal-degree factorization. f is monic, square-free, and +// all irreducible factors have same degree. h = X^p mod f. d = +// degree of irreducible factors of f. This routine implements the +// algorithm of [von zur Gathen and Shoup, Computational Complexity +// 2:187-224, 1992]. + +void RootEDF(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0); +vec_ZZ_pX RootEDF(const ZZ_pX& f, long verbose=0); + +// EDF for d==1 + +void SFCanZass(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0); +vec_ZZ_pX SFCanZass(const ZZ_pX& f, long verbose=0); + +// Assumes f is monic and square-free. returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and +// EDF above. + + +void CanZass(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, + long verbose=0); + +vec_pair_ZZ_pX_long CanZass(const ZZ_pX& f, long verbose=0); + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SquareFreeDecomp and SFCanZass. + +// NOTE: these routines use modular composition. The space +// used for the required tables can be controlled by the variable +// ZZ_pXArgBound (see ZZ_pX.txt). + + +void mul(ZZ_pX& f, const vec_pair_ZZ_pX_long& v); +ZZ_pX mul(const vec_pair_ZZ_pX_long& v); + +// multiplies polynomials, with multiplicities + + +/**************************************************************************\ + + Irreducible Polynomials + +\**************************************************************************/ + +long ProbIrredTest(const ZZ_pX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test. The test can +// err only if f is reducible, and the error probability is bounded by +// p^{-iter}. This implements an algorithm from [Shoup, J. Symbolic +// Comp. 17:371-391, 1994]. + +long DetIrredTest(const ZZ_pX& f); + +// performs a recursive deterministic irreducibility test. Fast in +// the worst-case (when input is irreducible). This implements an +// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994]. + +long IterIrredTest(const ZZ_pX& f); + +// performs an iterative deterministic irreducibility test, based on +// DDF. Fast on average (when f has a small factor). + +void BuildIrred(ZZ_pX& f, long n); +ZZ_pX BuildIrred_ZZ_pX(long n); + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(ZZ_pX& f, const ZZ_pX& g); +ZZ_pX BuildRandomIrred(const ZZ_pX& g); + +// g is a monic irreducible polynomial. Constructs a random monic +// irreducible polynomial f of the same degree. + +long ComputeDegree(const ZZ_pX& h, const ZZ_pXModulus& F); + +// f is assumed to be an "equal degree" polynomial; h = X^p mod f. +// The common degree of the irreducible factors of f is computed This +// routine is useful in counting points on elliptic curves + +long ProbComputeDegree(const ZZ_pX& h, const ZZ_pXModulus& F); + +// Same as above, but uses a slightly faster probabilistic algorithm. +// The return value may be 0 or may be too big, but for large p +// (relative to n), this happens with very low probability. + +void TraceMap(ZZ_pX& w, const ZZ_pX& a, long d, const ZZ_pXModulus& F, + const ZZ_pX& h); + +ZZ_pX TraceMap(const ZZ_pX& a, long d, const ZZ_pXModulus& F, + const ZZ_pX& h); + +// w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0, and h = +// X^q mod f, q a power of p. This routine implements an algorithm +// from [von zur Gathen and Shoup, Computational Complexity 2:187-224, +// 1992]. + +void PowerCompose(ZZ_pX& w, const ZZ_pX& h, long d, const ZZ_pXModulus& F); + +ZZ_pX PowerCompose(const ZZ_pX& h, long d, const ZZ_pXModulus& F); + +// w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q mod f, q +// a power of p. This routine implements an algorithm from [von zur +// Gathen and Shoup, Computational Complexity 2:187-224, 1992] + diff --git a/thirdparty/linux/ntl/doc/arrow1.gif b/thirdparty/linux/ntl/doc/arrow1.gif new file mode 100644 index 0000000000000000000000000000000000000000..c5a1321268b55873dcad3d1c4ded1f6d460179d6 GIT binary patch literal 967 zcmZ?wbhEHbG-5Df_*THEWx7q242U`v9D6b}{+yHlUDv?NUZL0Af^ND6-wTer6&QOzBI#Is#>w!+8wr`G z5;D&w=Uh+Dy_u4CFR$#9SNLP!sHeVBF9YLV1;xJ&O?(iX@YW&We`wOj@RZL{X`dp~ zz7|zKNX+|_U;V$j>1uhy!@9P|Rqg+4I{!B`9BAyk);IOm)LFM?FTB?^@!_PI&l?*4 zPn!RK&Z39&mp*J*@PFFk|BF^UT(;)@>Wy!A?R&X!_5U@SJ}lkzfB)f++mHM|b@tui z;~$Qm{B-)<$1~@@960d*+{KUQZvDS>_1o>cUk^O^|Mc0n7q5Q4dH3i2hhOhM{{8g% z_ovT)zkU1r|Nnmm5`p4R7EX4CdIlYkK2V&nvHxqxZ)$F7ZENr7?CS36?dzW~anj@| zQ>RUzF>}`JIdkW==Eg5tyeM;iOIA#TuC5~M`VFjHa=Hfo@yllA#u&)6>_4!7;|4Bn zK@Ec)(=z=P51v18c+)X)H8q2XofESB<(Mx#IJEJo;MEhpiP`-z^33nkD3 zwin7a)HvI)#N%#d#UkGedU^?mi#|#Sr07kKD9n3%Dx=lA;}L7dzJoKu8rarNy0fp9 z&H2K`EzCU|YL}GfZ`hX~wM6y81GcFLd%2|lxqdx6c~_6&f(n_Fvk#{i9ljD9v7j;S zT%V0QtC-t_S+f_jPPx|OdUt`oLDB1TvX0NUbNI2w_W9oW(tE3EpF+gBIhLn-gE@A7 zbg+5+=JD=T+IKxaY397W-k!bv^izRp?;l8;NaSxfU1acLtFXlSg?p6B-rTI$*wqpJ z=a1*zqiibi5+CIo${w1Y6!+I?Tfm^}!#GnV#^>T8_N%QIn8d!79&(FV(;|@+ad&Rg zqfU)wD;{;}Y;qb-~eae5C+Bn+yS1Bev$5;ZiadW?4G%a>8Xs24AuY^sm~1n literal 0 HcmV?d00001 diff --git a/thirdparty/linux/ntl/doc/arrow2.gif b/thirdparty/linux/ntl/doc/arrow2.gif new file mode 100644 index 0000000000000000000000000000000000000000..ea48fd9545562b942d8c144df7d56c211828e8cf GIT binary patch literal 942 zcmZ?wbhEHbG-5Df_?FG6ZN5v}e2iT^tWWCOoYuEFYh-uO(C(Ch?KuOx zi-vYr%p4C{xgM}|J!bE9(8TeivBNntr*l?rCv7}V*?FBccDQKjbkWl7lAYIOTd%7& zUUwY4k2(7t@eDrd-OVBgVut(kzPuxRZdWOC7j(Fu0`64voaahvRu;ll#nXeOa z-X-OING<%5RrWKp^nXsp@4Tvig?0by+OAc#{jcx--_d`od(zF`$@lxG-Jd%9?u>c& z<}AEFW%i>dbVZfn|+5qojUvO^tq2`&VM~{ z;QzUcpRV5ccKhy^0}uW`d;as~t6y*4{(S%8&!^A7KYjlD?c3j~%(7_{r=(77%BnPP*$w+PZr*C_7!ortT8)Wm|GtBgCP!iuGiQhM zo;r1A!)9S)>yX&x88fdjGTyj#fMx52kdW{l)1!EwJ$rHMp!7Y*z`u{Dq$yn4T%PaH z9KS^^XTiqg<2|ZVr}3zC_%d|q?wz%E_JqX4Z9Kx;e3XAU6Ppri>_PMFFh393tXq46Iyg7a+pOTEq`YO`B`5tprMZGHy)HB8uaGoqcFH;* z(m%_@mGPUv%pToT$!eF)%Nn##uHakvaMNY&#;tuvW{DIuFgEM8@AzRm!B1C3+w}J@ z)hv$>4@#JeY;Of^mV6v)d%3h@$}_FT(=9uAR;=i4T_Wzwsq^sAE>m&s#h(8+DS3Kd zW~?iIEmyGakx}y`#ifplBHQ2HHLJDn-LmOZZb74%cjls^ z`+mPC%@kKXem!Lo=h;bLUY&Jhn7fku;}4CdVx#1zb(h&RG8RtN%F4LN!ll8&%GkvC z&t-vPO2)$b%_3nL4_hVDRy=HzDO;h)p!iQiBU#hX($Ye~CDk{-C_OPx!80$Vw78@w tGqqU3IlriYF*G5ovBPO&hcl*5XYIVsTe@Ad^}J;5an;uAs+IcJ zzxna%4X-zCf4_3;|DF3j?mhTw-=UBDPW?Z1_TA}oAJ3fsdf>qSgXjLAzx3(Y<^NZ2 ze7kt>|GfuaA3gr|^x5~9uYSIH`}6&WU!Okz{`l$Nw{L%c{`~j<|9=LGf#OdVPIiWR z1|5(|pg3V;|JRV&)ZEhA*51+C)!ozE*FRz6q{&mJPMbbs=B(Ls=FXcxEj@g3fY~y$ zfW_g-3o^sqjEq=Wwy>~n-@$5W<`$7YD}KY)!&`T5=jIlX+!H@7-dTY8$mMOvPD)x@ zx_c!|N_J6Test-|k!zCqk6U%vJ_+Y=zb`1#4z*=0QQdrKq5 z93=%7BpzrtdNg(Sa-Q3I8l5ZtWWV^Z&_*Mv=g4C7H}_0)BzPzOJg`yKp`SUZO;B@# zifd{Tk7(h`qK2*rhvZmAmbMcflfD)3YPzH;pKoJbVWB1EC$qIWkt4A3mXX$y^iXv% z56ukq@D)MIjjmBSFBKH8r7xURTf*km#*~wJQ^7=Hyess=T3b1{iT33UzM@WzoPdG4RvM~$1I4kI?kUWba3;(N!5q>{|L2ezMZ?f z=Un#*fyg*Jy%wc+_XP9*mo)Tzn7HOd!L5JI6E2=}5OglNR4c$NdbdERO_b@_gM<<- zix&$-!!}&, then set GMP_PREFIX=. +# This works if the directory /include contains gmp.h +# and /lib contains libgmp.a. +# For finer-grained control, set the variables GMP_INCDIR and GMP_LIBDIR +# instead (see below). + + +########## GF2X variables: + +NTL_GF2X_LIB=off + +# Set to 'on' if you want to use the gf2x library for faster +# arithmetic over GF(2)[X] (the NTL class GF2X). + +# If you set this flag, please note the following. +# If you have installed gf2x in a standard "system" location, this is +# all you have to do. Otherwise, if gf2x is built, but not installed +# in a standard place, you have to set the variable GF2X_PREFIX. + + +GF2X_PREFIX=$(DEF_PREFIX) + +# If gf2x was installed in a standard system directory, e.g., /usr/local, +# then do not set this variable. +# Otherwise, if you want to use gf2x and gf2x was installed in +# a directory , then set GF2X_PREFIX=. +# This works if the directory /include contains gf2x.h +# and /lib contains libgf2x.a. +# For finer-grained control, set the variables GF2X_INCDIR and GF2X_LIBDIR +# instead (see below). + + + +########### Examples: + +# If you are happy with all the default values: + + ./configure + +# Actually, the initially installed makefile and config.h files +# already reflect the default values. + + + + +# If your C++ compiler is called CC: + + ./configure CXX=CC + + + + +# If GMP was installed in a non-standard directory, say, $HOME/sw: + + ./configure GMP_PREFIX=$HOME/sw + + +# If you want to use the options -g and -O for compiling C++, +# just execute + + ./configure "CXXFLAGS=-g -O" + +# Note the use of quotes to keep the argument in one piece. + + +# If you want to use the gf2x library: + + ./configure NTL_GF2X_LIB=on + + + +########### +########### A little magic +########### + +CXXAUTOFLAGS= + +# This is a variable that is automagically set by the configuration script. +# These are C++ compiler flags that are selected depending on +# the choice of other configuration options, and is geared towards gcc. +# The configuration script always prints out the value it chooses. +# If you explicitly set a value when invoking the configuration script, +# then it will not change that value. + + + +########### Here is a complete list of the remaining variables, +########### with their default values. These variables are pretty +########### esoteric, and you will probably never change their +########### default values. + +AR=ar +ARFLAGS=ruv +RANLIB=ranlib +LDFLAGS= +LDLIBS=-lm +CPPFLAGS= +LIBTOOL=libtool + +LIBDIR=$(PREFIX)/lib +INCLUDEDIR=$(PREFIX)/include +DOCDIR=$(PREFIX)/share/doc + +NTL_DISABLE_TLS_HACK=off +NTL_ENABLE_TLS_HACK=off + +NTL_LEGACY_NO_NAMESPACE=off +NTL_LEGACY_INPUT_ERROR=off + +NTL_LEGACY_SP_MULMOD=off + +NTL_LONG_LONG_TYPE=undefined +NTL_UNSIGNED_LONG_LONG_TYPE=undefined +NTL_CLEAN_INT=off +NTL_CLEAN_PTR=off +NTL_RANGE_CHECK=off +NTL_X86_FIX=off +NTL_NO_X86_FIX=off +NTL_NO_INIT_TRANS=off +NTL_DISABLE_LONGDOUBLE=off +NTL_DISABLE_LONGLONG=off +NTL_DISABLE_LL_ASM=off +NTL_MAXIMIZE_SP_NBITS=off + +WIZARD=on +NTL_LONG_LONG=off +NTL_AVOID_FLOAT=off +NTL_SPMM_ULL=off +NTL_SPMM_ASM=off +NTL_FFT_BIGTAB=off +NTL_FFT_LAZYMUL=off +NTL_TBL_REM=off +NTL_TBL_REM_LL=off +NTL_AVOID_BRANCHING=off +NTL_GF2X_NOINLINE=off +NTL_GF2X_ALTCODE=off +NTL_GF2X_ALTCODE1=off +NTL_PCLMUL=off + +GMP_INCDIR=$(GMP_PREFIX)/include +GMP_LIBDIR=$(GMP_PREFIX)/lib + +GF2X_INCDIR=$(GF2X_PREFIX)/include +GF2X_LIBDIR=$(GF2X_PREFIX)/lib + + + + +########### Here is a more detailed description of these variables. + +########### Further compilation variables: + + +AR=ar + +# command to make a library + + + +ARFLAGS=ruv + +# arguments for AR + + + +RANLIB=ranlib + +# set to echo if you want to disable it completely + + + +LDFLAGS= + +# arguments for linker for C++ programs + + + + +LDLIBS=-lm + +# libraries for linking C++ programs + + + +CPPFLAGS= + +# arguments for the C preprocessor + +LIBTOOL=libtool + +# the libtool command -- only needed if SHARED=on + + + + +########### Details of the compilation process (when SHARED=off) + + +# When a C++ file foo.c is compiled: + $(CXX) -I../include $(CPPFLAGS) $(CXXFLAGS) -c foo.c + +# When a C++ file foo.c is compiled and linked: + $(CXX) -I../include $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) \ + -o foo foo.c $(LDLIBS) + +# When the library ntl.a is built + $(AR) $(ARFLAGS) ntl.a [ object files ]... + $(RANLIB) ntl.a +# If the ranlib command does not exist, everything will still function OK. + + + +########### Further installation variables: + +LIBDIR=$(PREFIX)/lib +INCLUDEDIR=$(PREFIX)/include +DOCDIR=$(PREFIX)/share/doc +# Where to install NTL. +# Execution of 'make install' copies header files into $(INCLUDEDIR)/NTL, +# copies the library itself to $(LIBDIR)/libntl.a, and copies the +# documentation files into $(DOCDIR)/NTL. + +########## Disable/enable TLS hack + +NTL_DISABLE_TLS_HACK=off +NTL_ENABLE_TLS_HACK=off + +# when building NTL with NTL_THREADS=on, if the compiler is gcc-compatible, a +# "TLS hack" may be used to workaround the fact that many compilers do not +# (correctly) implement C++11's thread_local feature. The workaround is to use +# gcc's more limited __thread feature, and to emulate thread_local semantics +# using pthread routines. +# +# "gcc-compatible" means that the "__GNUC__" macro is defined, which means the +# TLS hack may be used for gcc, clang, and icc compilers. The current version +# of NTL will enable this hack by default, but you can disable it by specifying +# NTL_DISABLE_TLS_HACK=on. At some point in the future, this default behavior +# may change, in which case you will still be able to force the TLS hack by +# specifying NTL_ENABLE_TLS_HACK=on. + + + + + + +########## Legacy switches + +NTL_LEGACY_NO_NAMESPACE=off + +# put NTL components in the global namespace + + + +NTL_LEGACY_INPUT_ERROR=off + +# abort on input errors, instead of just setting the +# "fail bit" of the istream object + +NTL_LEGACY_SP_MULMOD=off + +# use pre-9.0 interfaces for single-precision MulMod routines. +# See discussion in ZZ.txt for details. + + + +########### Basic Configuration Options: + +NTL_LONG_LONG_TYPE=undefined + +# Name of double-word signed integer type. +# This is a non-standard type, and is called 'long long' by many +# compilers. MS C++ calls it '__int64'. +# +# Note that the new C99 standard defines the type 'long long' +# to be at least 64-bits wide. On 32-bit machines, this is just right. +# Although not officially part of the C++ standard (which predates C99), +# it is widely supported by C++ compilers, and is likely to be added +# to the C++ standard. +# +# Unfortunately, 64-bit machines usually define 'long long' +# to also be 64-bits wide, which is kind of useless. +# However, GCC provides a type __int128_t which does the job. +# +# If left undefined, NTL will use some "ifdef magic" to find +# the type most suitable for your machine (based on compiler and +# word size). + +NTL_UNSIGNED_LONG_LONG_TYPE=undefined + +# Name of double-word unsigned integer type. +# +# If left undefined, NTL will use some "ifdef magic" to find +# the type most suitable for your machine (based on compiler and +# word size). + + + + +NTL_CLEAN_INT=off + +# Setting this to 'on' disables the use of some non-standard +# integer arithmetic which would yield slightly better performance. + + +NTL_CLEAN_PTR=off + +# Setting this to 'on' disables the use of some non-standard +# pointer arithmetic which would yield slightly better performance. + + +NTL_RANGE_CHECK=off + +# Setting this to 'on' will generate vector subscript range-check code. +# Useful for debugging, but it slows things down of course. + + + +NTL_X86_FIX=off + +# Set to 'on' to force the "x86 floating point fix", +# overriding the default behavior. +# By default, NTL will apply the "fix" if it looks like it is +# necessary, and if it knows how to fix it. +# The problem addressed here is that x86 processors sometimes +# run in a mode where FP registers have more precision than doubles. +# This will cause code in quad_float.c some trouble. +# NTL can normally automatically detect the problem, and fix it, +# so you shouldn't need to worry about this or the next flag. + + + +NTL_NO_X86_FIX=off + +# Set to 'on' to forces no "x86 floating point fix", +# overriding the default behavior. + + + +NTL_NO_INIT_TRANS=off + +# When 'off', NTL uses a special code sequence to avoid +# copying large objects in return statements. However, if your +# compiler optimizes away the return of a *named* local object, +# this is not necessary, and setting this flag to 'on' will result +# in *slightly* more compact and efficient code. The C++ +# standard explicitly allows compilers to perform this optimization, +# and with time, more compilers actually do this. +# Traditionally, however, most will only avoid copying *temporary* +# objects in return statements, and NTL's default code sequence +# exploits this fact. + + + +NTL_DISABLE_LONGDOUBLE=off + +# Explicitly disables use of long double arithmetic + +NTL_DISABLE_LONGLONG=off + +# Explicitly disables use of long long arithmetic + +NTL_DISABLE_LL_ASM=off + +# Explicitly disables use of inline asm as replacement for +# long long arithmetic + +NTL_MAXIMIZE_SP_NBITS=on + +# Allows for 62-bit single-precision moduli on 64-bit platforms. +# By default, such moduli are restricted to 60 bits, which +# usually gives *slightly* better performance across a range of +# of parameters. + + + + + +########## Performance Options: + +WIZARD=on + +# Set to 'off' if you want to bypass the wizard; otherwise, set to 'on'. +# The wizard is a script that runs when NTL is built that sets the following +# flags to 'optimize' performance on the current platform. + + + +NTL_LONG_LONG=off + +# For platforms that support it, this flag can be set to cause +# the long-integer multiplication code to use the type "long long", +# which on some platforms yields a significant performance gain, +# but on others, it can yield no improvement and can even +# slow things down. +# The variable NTL_LONG_LONG_TYPE can be defined to use a type name +# other than "long long". +# If you set NTL_LONG_LONG, you might also want to set +# the flag NTL_TBL_REM. + + + +NTL_AVOID_FLOAT=off + +# On machines with slow floating point or---more comminly---slow int/float +# conversions, this flag can lead to faster long-integer multiplication code. +# If you set NTL_AVOID_FLOAT, you should probably also +# set NTL_TBL_REM. +# Note that at most one of NTL_LONG_LONG and NTL_AVOID_FLOAT may be set. + + +NTL_SPMM_ULL=off + +# Implement the MulModPrecon code using "unsigned long long" +# (or specify NTL_UNSIGNED_LONG_LONG_TYPE to override the default). + + +NTL_SPMM_ASM=off + +# Similar to NTL_SPMM_ULL, but relies on double-word unsigned multiplication +# using assembly code. Only supported on select machines +# and only under GCC. + +NTL_FFT_BIGTAB=off + +# Precomputed tables are used to store all the roots of unity +# used in FFT computations. + + +NTL_FFT_LAZYMUL=off + +# This flag only has an effect when combined with +# either the NTL_SPMM_ULL or NTL_SPMM_ASM flags. +# When set, a "lazy multiplication" strategy due to David Harvey: +# see his paper "FASTER ARITHMETIC FOR NUMBER-THEORETIC TRANSFORMS". + + + + +NTL_TBL_REM=off + +# With this flag, some divisions are avoided in the +# ZZ_pX multiplication routines. If you use the NTL_AVOID_FLOAT +# or NTL_LONG_LONG flags, then you should probably use this one too. + +NTL_TBL_REM_LL=off + +# Forces the LONG_LONG implementation of NTL_TBL_REM. + + + + +NTL_AVOID_BRANCHING=off + +# With this option, branches are replaced at several +# key points with equivalent code using shifts and masks. +# Recommended for use with RISC architectures, especially +# ones with deep pipelines and high branch penalities. +# This flag is becoming less helpful as newer machines +# have much smaller branch penalties, but still may be worth a try. + + + + +NTL_GF2X_NOINLINE=off + +# By default, the low-level GF2X multiplication routine in inlined. +# This can potentially lead to some trouble on some platforms, +# and you can override the default by setting this flag. + + + + +NTL_GF2X_ALTCODE=off + +# With this option, the default strategy for implmenting low-level +# GF2X multiplication is replaced with an alternative strategy. +# This alternative strategy seems to work better on RISC machines +# with deep pipelines and high branch penalties (like a powerpc), +# but does no better (or even worse) on x86s. + + +NTL_GF2X_ALTCODE1=off + +# Yet another alternative implementation for GF2X multiplication. + +NTL_PCLMUL=off + +# switch to enable the PCLMUL instruction on x86 machines for faster arithmetic +# over GF(2)[X] (without relying on the gf2x package) + + + +########## More GMP Options: + + +GMP_INCDIR=$(GMP_PREFIX)/include +# directory containing gmp.h + +GMP_LIBDIR=$(GMP_PREFIX)/lib +# directory containing libgmp.a + + +####### More gf2x options: + +GF2X_INCDIR=$(GF2X_PREFIX)/include +# directory containing gf2x.h + +GF2X_LIBDIR=$(GF2X_PREFIX)/lib +# directory containing libgf2x.a + diff --git a/thirdparty/linux/ntl/doc/conversions.txt b/thirdparty/linux/ntl/doc/conversions.txt new file mode 100644 index 0000000000..d47f78a48a --- /dev/null +++ b/thirdparty/linux/ntl/doc/conversions.txt @@ -0,0 +1,183 @@ +CONVERSIONS + +notation: + + typedef unsigned int uint; + typedef unsigned long ulong; + typedef const char * cstr; + + +destination: source + + +int: int, long, uint, ulong, ZZ, float, double, xdouble, quad_float, RR + GF2, zz_p, ZZ_p + +long: int, long, uint, ulong, ZZ, float, double, xdouble, quad_float, RR + GF2, zz_p, ZZ_p + +uint: int, long, uint, ulong, ZZ, float, double, xdouble, quad_float, RR + GF2, zz_p, ZZ_p + +ulong: int, long, uint, ulong, ZZ, float, double, xdouble, quad_float, RR + GF2, zz_p, ZZ_p + +ZZ: int, long, uint, ulong, ZZ, float, double, xdouble, quad_float, RR, cstr + GF2, zz_p, ZZ_p + +float: int, long, uint, ulong, ZZ, float, double, xdouble, quad_float, RR + +double: int, long, uint, ulong, ZZ, float, double, xdouble, quad_float, RR + +xdouble: int, long, uint, ulong, ZZ, float, double, xdouble, RR, cstr + +quad_float: int, long, uint, ulong, ZZ, float, double, quad_float, RR, cstr + +RR: int, long, uint, ulong, ZZ, float, double, xdouble, quad_float, + RR, cstr + + +ZZ_p: long, ZZ, ZZ_p + +ZZ_pX: long, ZZ, ZZ_p; ZZX, ZZ_pX; ZZ_pE; vec_ZZ_p + +zz_p: long, ZZ, zz_p + +zz_pX: long, ZZ, zz_p; ZZX, zz_pX; zz_pE; vec_zz_p + +ZZX: long, ZZ; ZZX, GF2X, zz_pX, ZZ_pX; vec_ZZ + +GF2: long, ZZ, GF2 + +GF2X: long, ZZ, GF2; ZZX, GF2X; GF2E; vec_GF2 + +GF2E: long, ZZ, GF2, GF2E; GF2X + +GF2EX: long, ZZ, GF2, GF2E; ZZX, GF2X, GF2EX; vec_GF2E + +ZZ_pE: long, ZZ, ZZ_p, ZZ_pE; ZZ_pX + +ZZ_pEX: long, ZZ, ZZ_p, ZZ_pE; ZZX, ZZ_pX, ZZ_pEX; vec_ZZ_pE + +zz_pE: long, ZZ, zz_p, zz_pE; zz_pX + +zz_pEX: long, ZZ, zz_p, zz_pE; ZZX, zz_pX, zz_pEX; vec_zz_pE + +vec_ZZ: ZZX +vec_ZZ_p: ZZ_pX +vec_zz_p: zz_pX +vec_GF2: GF2X +vec_ZZ_pE: ZZ_pEX +vec_zz_pE: zz_pEX +vec_GF2E: GF2EX + + +********** NOTES *********** + +nomenclature: + + - integral types: int, long, uint, ulong, ZZ + - bounded integral types: int, long, uint, ulong + - floating point types: float, double, xdouble, quad_float, RR + + + [1] All conversion operators come in procedural or functional + form. To convert a of type S to x of type T, you can write + conv(x, a); + or + x = conv(a); + + E.g., conv(a), conv(a), conv< Vec >, etc. + + The notation conv(a) was introduced in NTL v6. Prior to + this, the notation to_T(a) was used. For backard compatibility, + the various "to_T" functions have been retained; however, their + use is dicouraged. Also note that new conversions have been + added in v6 for which there is no corresponding "to_T" function: + for these, one must use the new "conv" notation. + + Note that conv is implemented as a template function: + + template T conv(const S& a) + { T x; conv(x, a); return x; } + + Thus, the call conv(a) always resolves to the procedure call + conv(x, a). Modern C++ compilers do a pretty good job implementing + the "named return value optimization", so this should not create too + any unnecessary temporary objects. + + [2] In addition to the conversions listed, for generic vector types, + a template conversion operator is provided: + + template + void conv(Vec& x, const Vec& a) { + long n = a.length(); + x.SetLength(n); + for (long i = 0; i < n; i++) + conv(x[i], a[i]); + } + + This provides component-wise conversion. This, if there is a conversion + provided from S to T, then there is automatically a conversion provided + from Vec to Vec. + + Note that because of the simple implementation, this input a is not allowed + to alias the output x. + + Similarly, for generic matrix types Mat, a template conversion + operator provides component-wise conversion. Again, the input may not + alias the output. + + [3] All conversions from an integral type to a bounded integral type + compute the result modulo 2^n, where n is the number of bits of the + destination type: no overflow occurs. + + [4] All floating point to signed integral conversions compute the floor + function *exactly*, unless the destination type is int or long + and overflow occurs, in which case the result is undefined. + An exception: converting an RR x to int or long will always + yield floor(x) modulo 2^n, where n is the number of bits + in the destination type. + + [5] Conversions from floating point to unsigned int and unsigned long + are done via conversions to signed long: if the conversion to long + overflows, the result is undefined; otherwise, the result + is computed modulo 2^n, where n is the number of bits in + the destination type. + + [6] The ZZ to double conversion routine is very precise: + the result is the nearest double, breaking ties using the + "round to even" rule. Overflow results in +/- Infinity. + All this assumes the underlying floating point adheres to + the IEEE standard. + + [7] All conversions to RR round to the current working precision: + even converting an RR to an RR. + + + [8] All conversions from long or ZZ to one of the "mod p" types + ZZ_p, ZZ_pX, ZZ_pE, ZZ_pEX, + zz_p, zz_pX, zz_pE, zz_pEX, + GF2, GF2X, GF2E, GF2EX + yield the the residue class modulo p (or 2). + + [9] All polynomial-to-polynomial conversions apply coefficient-wise + conversion. Note that as a rule, if a conversion S to T + is provided, then there is a corresponding conversion from + the polynomial ring S[X] to the polynomial ring T[X]. + +[10] All polynomial/vector conversions simply copy from/to the coefficient + vector of the polynomial. + +[11] The GF2X/ZZ_pX/zz_pX to GF2E/ZZ_pE/zz_pE conversions reduce + the given polynomial modulo the current modulus; the reverse + conversions yield the standard representative (smallest degree polynomial). + +[12] Conversions from GF2, zz_p or ZZ_p to any integral type yeld + the standard representative (least non-negative) of the given residue class. + +[13] All conversions from the type cstr apply the same algorithm + as is used for reading from an I/O stream, so + ZZ x = conv("999999999999999999"); + initializes the ZZ x to the integer 999999999999999999. + diff --git a/thirdparty/linux/ntl/doc/copying.txt b/thirdparty/linux/ntl/doc/copying.txt new file mode 100644 index 0000000000..93ff97c647 --- /dev/null +++ b/thirdparty/linux/ntl/doc/copying.txt @@ -0,0 +1,361 @@ + +COPYRIGHT NOTICE + +NTL -- A Library for Doing Number Theory +Copyright (C) 1996-2016 Victor Shoup + +The most recent version of NTL is available at http://www.shoup.net + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +This entire copyright notice should be placed in an appropriately +conspicuous place accompanying all distributions of software that +make use of NTL. + +The above terms apply to all of the software modules distributed with NTL, +i.e., all source files in either the ntl-xxx.tar.gz or WinNTL-xxx.zip +distributions. In general, the individual files do not contain +copyright notices. + +Note that the the file ZZ.c contains an implementation of SHA256 +which is derived from work by Brad Conte, which is in the public domain. +See file ZZ.c for a more detailed notice. + +Note that the file mat_lzz_p.c contains an implemention of Strassen's +matrix multiplication algorithm which is derived from the implementation +in FLINT v2.5.2. The latter is copyrighted by Martin Albrecht, William Hart, +and Fredrik Johansson, and also licensed under te GPL. +See file mat_lzz_p.c for a more detailed notice. + +Note that the quad_float package is derived from the doubledouble package, +originally developed by Keith Briggs, and also licensed under the GNU GPL. +The files quad_float.c and quad_float.h contain more detailed copyright +notices. + +Note that the traditional long integer package used by NTL, lip.c, is derived +from---and represents an extensive modification of--- +a package originally developed and copyrighted by Arjen Lenstra, +who has agreed to renounce any copyright claims on the particular +version of the long integer package appearing in NTL, so that +this package now is covered by the GNU GPL as well. + +Note that the alternative long integer package used by NTL is GMP, +which is written by Torbjorn Granlund . +GMP is licensed under the terms of the GNU Lesser General Public License. + + +Note that prior to version 4.0, NTL was distributed under the following terms: + NTL is freely available for research and educational purposes. + I don't want to attach any legalistic licensing restrictions on + users of NTL. + However, NTL should not be linked in a commercial program + (although using data in a commercial + product produced by a program that used NTL is fine). + +The hope is that the GNU GPL is actually less restrictive than these +older terms; however, in any circumstances such that GNU GPL is more +restrictive, then the following rule is in force: +versions prior to 4.0 may continue to be used under the old terms, +but users of versions 4.0 or later should adhere to the terms of the GNU GPL. + +END COPYRIGHT NOTICE + + +Following is the complete text of the GNU General Public License. +Note that the copyright notice below applies to the text of the license +itself, and not to NTL. + + + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS diff --git a/thirdparty/linux/ntl/doc/flags.txt b/thirdparty/linux/ntl/doc/flags.txt new file mode 100644 index 0000000000..9927ecbfac --- /dev/null +++ b/thirdparty/linux/ntl/doc/flags.txt @@ -0,0 +1,40 @@ + + +Use the compiler flag + + -DNTL_TRANSITION + +to help with the transition to NTL 3.0 from earlier versions. + +This has the effect of undeclaring certain functions +whose semantics in 3.0 is different than in versions < 3.0. +Thus, one can use the compiler to find the trouble spots. + +THE LIBRARY CAN NOT BE COMPILED WITH THIS FLAG! +ONLY USE TO FIND TRANSITION PROBLEMS IN CLIENT CODE. + +Undeclares all shift operators for NTL arithmetic type; +in versions < 3.0, << was a conversion operator; now it +is a shift operator. + +Undeclares division functions in ZZX; in versions < 3.0, +these were defined in terms of pseudo-division; now they are +defined as ordinary division with an error being raised if the +result is not integral. Explicit pseudo-division functions +are now provided for the old semantics. + +Undeclares the UpdateMap function in for ZZ_pX and zz_pX; +in versions < 3.0, the output always had length n; +now high-order zeroes are stripped. + +Undeclares the conversion from GF2X to GF2EX functions; +in versions < 3.0, this was defined as creating a constant +polynomial by reduction modulo GF2E::modulus(); +now, it is defined as a coefiicient-wise "lift". +GF2X and GF2EX happened to be called BB and BB_pX in +versions < 3.0. + +Declares assignment and copy for RR to be private. The +semantics of these have changed from "copy and round to +current precision" to "exact copy". + diff --git a/thirdparty/linux/ntl/doc/lzz_p.cpp.html b/thirdparty/linux/ntl/doc/lzz_p.cpp.html new file mode 100644 index 0000000000..397ce9bd44 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_p.cpp.html @@ -0,0 +1,432 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/lzz_p.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: zz_p
+
+SUMMARY:
+
+The class zz_p is used to represent integers mod p, where 1 <= p <
+NTL_SP_BOUND.  Note that NTL_SP_BOUND is usually 2^30 on 32-bit machines and
+2^50 on 64-bit machines.
+
+The modulus p may be any positive integer, not necessarily prime.
+
+Objects of the class zz_p are represented as a long in the range 0..p-1.
+
+An executing program maintains a "current modulus", which is set to p using
+zz_p::init(p).  The current modulus *must* be initialized before any operations
+on zz_p's are performed.  The modulus may be changed, and a mechanism is provided
+for saving and restoring a modulus (see classes zz_pPush and zz_pContext below).
+
+\**************************************************************************/
+
+#include <NTL/ZZ.h>
+#include <NTL/FFT.h>
+#include <NTL/SmartPtr.h>
+
+
+class zz_p {
+public:
+  
+   zz_p(); // initial value 0
+
+   zz_p(const zz_p& a); // copy constructor
+   explicit zz_p(long a); // promotion constructor
+
+   zz_p& operator=(const zz_p& a); // assignment
+   zz_p& operator=(long a); // assignment
+
+   static void init(long p);
+   // set the modulus to p, where p > 1.  This must be called before any
+   // zz_p constructors are invoked.
+   // The number p must have at most NTL_SP_NBITS bits.
+
+   static long modulus();
+   // zz_p::modulus() yields read-only reference to the current
+   // modulus
+
+
+   // typedefs to aid in generic programming
+   typedef long rep_type;
+   typedef zz_pContext context_type;
+   typedef zz_pBak bak_type;
+   typedef zz_pPush push_type;
+   typedef zz_pX poly_type;
+
+};
+
+
+long rep(zz_p a); // read-only access to representation of a
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(zz_p a, zz_p b);
+long operator!=(zz_p a, zz_p b);
+
+long IsZero(zz_p a);  // test for 0
+long IsOne(zz_p a);  // test for 1
+
+// PROMOTIONS: operators ==, != promote long to zz_p on (a, b).
+
+
+/**************************************************************************\
+
+                                    Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_p operator+(zz_p a, zz_p b);
+zz_p operator-(zz_p a, zz_p b);
+
+zz_p operator-(zz_p a); // unary -
+
+zz_p& operator+=(zz_p& x, zz_p a);
+zz_p& operator+=(zz_p& x, long a);
+
+zz_p& operator-=(zz_p& x, zz_p a);
+zz_p& operator-=(zz_p& x, long a);
+
+zz_p& operator++(zz_p& x);  // prefix
+void operator++(zz_p& x, int);  // postfix
+
+zz_p& operator--(zz_p& x);  // prefix
+void operator--(zz_p& x, int);  // postfix
+
+// procedural versions:
+
+
+void add(zz_p& x, zz_p a, zz_p b); // x = a + b
+void sub(zz_p& x, zz_p a, zz_p b); // x = a - b
+void negate(zz_p& x, zz_p a); // x = -a
+
+// PROMOTIONS: binary +, -, and procedures add, sub promote
+// from long to zz_p on (a, b).
+
+
+/**************************************************************************\
+
+                                  Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_p operator*(zz_p a, zz_p b);
+
+zz_p& operator*=(zz_p& x, zz_p a);
+zz_p& operator*=(zz_p& x, long a);
+
+// procedural versions:
+
+void mul(zz_p& x, zz_p a, zz_p b); // x = a * b
+
+void sqr(zz_p& x, zz_p a); // x = a^2
+zz_p sqr(zz_p a);
+
+// PROMOTIONS: operator * and procedure mul promote from long to zz_p
+// on (a, b).
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+operator notation:
+
+zz_p operator/(z_p a, zz_p b);
+
+zz_p& operator/=(zz_p& x, zz_p a);
+zz_p& operator/=(zz_p& x, long a);
+
+procedural versions:
+
+void div(zz_p& x, zz_p a, zz_p b);
+// x = a/b
+
+void inv(zz_p& x, zz_p a);
+zz_p inv(zz_p a);
+// x = 1/a
+
+// PROMOTIONS: operator / and procedure div promote from long to zz_p
+// on (a, b).
+
+
+/**************************************************************************\
+
+                                  Exponentiation
+
+\**************************************************************************/
+
+
+void power(zz_p& x, zz_p a, long e); // x = a^e (e may be negative)
+zz_p power(zz_p a, long e);
+
+
+/**************************************************************************\
+
+                               Random Elements
+
+\**************************************************************************/
+
+
+void random(zz_p& x);
+zz_p random_zz_p();
+// x = random element in zz_p.  Uses RandomBnd from ZZ.
+
+void VectorRandom(long k, zz_p *x);
+// equivalent to random(x[i]) for i in [0..k), but fatser
+
+
+/**************************************************************************\
+
+                                Input/Output
+
+\**************************************************************************/
+
+
+ostream& operator<<(ostream& s, zz_p a);
+
+istream& operator>>(istream& s, zz_p& x);
+// a ZZ is read and reduced mod p
+
+/**************************************************************************\
+
+                       Modulus Switching
+
+A class zz_pPush is provided for "backing up" the current modulus
+and installing a new one.
+
+Here is what you do to save the current modulus, temporarily
+set it to p, and automatically restore it:
+
+   {
+      zz_pPush push(p);
+
+      ...
+
+   }
+
+The constructor for push will save the current modulus, and install p as the
+current modulus.  The destructor for push will restore the old modulus when the
+scope enclosing it exits.  This is the so-called RAII (resource acquisition is
+initialization) paradigm.
+
+You could also do the following:
+
+   {
+      zz_pPush push; // just backup current modulus
+
+        ...
+
+      zz_p::init(p1); // install p1
+
+        ...
+
+      zz_p::init(p2); // install p2
+
+      // reinstall original modulus as close of scope
+   }
+
+      
+The zz_pPush interface is good for implementing simple stack-like
+modulus "context switching".  For more general context switching,
+see zz_pContext below.  There is also an older zz_pBak class
+that may also be useful.
+
+..........................................................................
+
+It is critical that zz_p objects created under one zz_p modulus are not used in
+any non-trivial way "out of context", i.e., under a different (or undefined)
+zz_p modulus.  However, for ease-of-use, some operations may be safely
+performed out of context.  These safe operations include: the default and copy
+constructor, the destructor, and the assignment operator.  In addition is is
+generally safe to read any zz_p object out of context (i.e., printing it out, or
+fetching its underlying representive using the rep() function).
+
+Any unsafe uses out of context are not in general checked, and may
+lead to unpredictable behavior.
+
+\**************************************************************************/
+
+
+// A convenient interface for common cases:
+
+class zz_pPush {
+public:
+
+zz_pPush();  // just backup current modulus
+
+explicit zz_pPush(long p, long maxroot=NTL_FFTMaxRoot);
+zz_pPush(INIT_FFT_TYPE, long index);
+zz_pPush(INIT_USER_FFT_TYPE, long p);
+explicit zz_pPush(const zz_pContext& context);
+  // backup current modulus and install the given one
+  // see documentation for zz_p::init for more details
+
+private:
+zz_pPush(const zz_pPush&); // disabled
+void operator=(const zz_pPush&); // disabled
+
+};
+
+
+
+// more general context switching:
+// A zz_pContext object has a modulus q (possibly "null")
+
+class zz_pContext {
+
+
+public:
+
+zz_pContext();  // q = "null"
+
+explicit zz_pContext(long p);
+zz_pContext(INIT_FFT_TYPE, long index);
+zz_pContext(INIT_USER_FFT_TYPE, long p);
+  // q = the given modulus
+  // see documentation for zz_p::init for more details
+  
+
+void save(); // q = CurrentModulus
+void restore() const; // CurrentModulus = q
+
+zz_pContext(const zz_pContext&);  // copy
+zz_pContext& operator=(const zz_pContext&); // assignment
+~zz_pContext(); // destructor
+
+
+};
+
+
+/ An older interface:
+// To describe this logic, think of a zz_pBak object
+// of having two components: a modulus q (possibly "null") and
+// an "auto-restore bit" b.
+
+class zz_pBak {
+public:
+
+
+   zz_pBak();  // q = "null", b = 0
+
+   ~zz_pBak();  // if (b) CurrentModulus = q
+
+   void save();  // q = CurrentModulus, b = 1
+   void restore();  // CurrentModulus = q, b = 0
+
+
+private:
+   zz_pBak(const zz_pBak&);  // copy disabled
+   void operator=(const zz_pBak&);  // assignment disabled
+};
+
+
+
+
+
+
+
+
+/**************************************************************************\
+
+                               Miscellany
+
+\**************************************************************************/
+
+
+void clear(zz_p& x); // x = 0
+void set(zz_p& x); // x = 1
+
+static mulmod_t zz_p::ModulusInverse();
+// zz_p::ModulusInverse() returns PrepMulMod(zz_p::modulus())
+
+static zz_p zz_p::zero();
+// zz_p::zero() yields a read-only reference to zero
+
+void swap(zz_p& x, zz_p& y);
+// swap x and y
+
+static void zz_p::init(long p, long maxroot);
+// Same as ordinary zz_p::init(p), but somewhat more efficient.  If you are
+// going to perform arithmetic modulo a degree n polynomial, in which
+// case set maxroot to NextPowerOfTwo(n)+1.  This is useful, for
+// example, if you are going to factor a polynomial of degree n modulo
+// p, and you know n in advance.
+// If maxroot is set too low, the program will abort with an
+// appropriate error message.
+
+static void zz_p::FFTInit(long i);
+// sets modulus to the i-th FFT prime (counting from 0).  FFT primes
+// are NTL_SP_NBITS-bit primes p, where p-1 is divisible by a high power
+// of two.  Thus, polynomial arithmetic mod p can be implemented
+// particularly efficiently using the FFT.  As i increases, the power
+// of 2 that divides p-1 gets smaller, thus placing a more severe
+// restriction on the degrees of the polynomials to be multiplied.
+
+static void zz_p::UserFFTInit(long p);
+// set the modulus to a user-provided FFT prime p. To be useful,
+// p-1 should be divisibly by a high power of 2.
+// The function is a utility routine that may be used to
+// calculate this value (see below).
+// If you are going to perform arithmetic modulo a degree n polynomial,
+// you will want CalcMaxRoot(p) >= NextPowerOfTwo(n)+1.
+
+zz_pContext::zz_pContext(long p, long maxroot);
+// constructor for a zz_pContext with same semantics
+// as zz_p::init(p, maxroot) above.
+
+zz_pContext::zz_pContext(INIT_FFT_TYPE, long i);
+// constructor for a zz_pContext with same semantics
+// as zz_p::FFTInit(i) above; invoke as zz_pContext(INIT_FFT, i).
+
+zz_pContext::zz_pContext(INIT_USER_FFT_TYPE, long p);
+// constructor for a zz_pContext with same semantics
+// as zz_p::UserFFTInit(p) above; invoke as zz_pContext(INIT_USER_FFT, p).
+
+zz_p::zz_p(INIT_NO_ALLOC_TYPE);
+// provided for consistency with other classes, initialize to zero
+
+zz_p::zz_p(INIT_ALLOC_TYPE);
+// provided for consistency with other classes, initialize to zero
+
+zz_p::allocate();
+// provided for consistency with other classes, no action
+
+long CalcMaxRoot(long p);
+// p is assumed to be an odd prime.
+// Returns the largest k such that 2^k divides p-1
+// and such that k does not exceed an implementation defined
+// constant.  This represents the max power of two for which
+// an FFT mod p is supported.
+
+void VectorConv(long k, zz_p *x, const ZZ *a);
+void VectorConv(long k, zz_p *x, const long *a);
+// equivalent to conv(x[i], a[i]) for i in [0..k), but fatser
+
+
+
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/lzz_p.txt b/thirdparty/linux/ntl/doc/lzz_p.txt new file mode 100644 index 0000000000..eab6e08094 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_p.txt @@ -0,0 +1,422 @@ + + +/**************************************************************************\ + +MODULE: zz_p + +SUMMARY: + +The class zz_p is used to represent integers mod p, where 1 <= p < +NTL_SP_BOUND. Note that NTL_SP_BOUND is usually 2^30 on 32-bit machines and +2^50 on 64-bit machines. + +The modulus p may be any positive integer, not necessarily prime. + +Objects of the class zz_p are represented as a long in the range 0..p-1. + +An executing program maintains a "current modulus", which is set to p using +zz_p::init(p). The current modulus *must* be initialized before any operations +on zz_p's are performed. The modulus may be changed, and a mechanism is provided +for saving and restoring a modulus (see classes zz_pPush and zz_pContext below). + +\**************************************************************************/ + +#include +#include +#include + + +class zz_p { +public: + + zz_p(); // initial value 0 + + zz_p(const zz_p& a); // copy constructor + explicit zz_p(long a); // promotion constructor + + zz_p& operator=(const zz_p& a); // assignment + zz_p& operator=(long a); // assignment + + static void init(long p); + // set the modulus to p, where p > 1. This must be called before any + // zz_p constructors are invoked. + // The number p must have at most NTL_SP_NBITS bits. + + static long modulus(); + // zz_p::modulus() yields read-only reference to the current + // modulus + + + // typedefs to aid in generic programming + typedef long rep_type; + typedef zz_pContext context_type; + typedef zz_pBak bak_type; + typedef zz_pPush push_type; + typedef zz_pX poly_type; + +}; + + +long rep(zz_p a); // read-only access to representation of a + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(zz_p a, zz_p b); +long operator!=(zz_p a, zz_p b); + +long IsZero(zz_p a); // test for 0 +long IsOne(zz_p a); // test for 1 + +// PROMOTIONS: operators ==, != promote long to zz_p on (a, b). + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +zz_p operator+(zz_p a, zz_p b); +zz_p operator-(zz_p a, zz_p b); + +zz_p operator-(zz_p a); // unary - + +zz_p& operator+=(zz_p& x, zz_p a); +zz_p& operator+=(zz_p& x, long a); + +zz_p& operator-=(zz_p& x, zz_p a); +zz_p& operator-=(zz_p& x, long a); + +zz_p& operator++(zz_p& x); // prefix +void operator++(zz_p& x, int); // postfix + +zz_p& operator--(zz_p& x); // prefix +void operator--(zz_p& x, int); // postfix + +// procedural versions: + + +void add(zz_p& x, zz_p a, zz_p b); // x = a + b +void sub(zz_p& x, zz_p a, zz_p b); // x = a - b +void negate(zz_p& x, zz_p a); // x = -a + +// PROMOTIONS: binary +, -, and procedures add, sub promote +// from long to zz_p on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +zz_p operator*(zz_p a, zz_p b); + +zz_p& operator*=(zz_p& x, zz_p a); +zz_p& operator*=(zz_p& x, long a); + +// procedural versions: + +void mul(zz_p& x, zz_p a, zz_p b); // x = a * b + +void sqr(zz_p& x, zz_p a); // x = a^2 +zz_p sqr(zz_p a); + +// PROMOTIONS: operator * and procedure mul promote from long to zz_p +// on (a, b). + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + +operator notation: + +zz_p operator/(z_p a, zz_p b); + +zz_p& operator/=(zz_p& x, zz_p a); +zz_p& operator/=(zz_p& x, long a); + +procedural versions: + +void div(zz_p& x, zz_p a, zz_p b); +// x = a/b + +void inv(zz_p& x, zz_p a); +zz_p inv(zz_p a); +// x = 1/a + +// PROMOTIONS: operator / and procedure div promote from long to zz_p +// on (a, b). + + +/**************************************************************************\ + + Exponentiation + +\**************************************************************************/ + + +void power(zz_p& x, zz_p a, long e); // x = a^e (e may be negative) +zz_p power(zz_p a, long e); + + +/**************************************************************************\ + + Random Elements + +\**************************************************************************/ + + +void random(zz_p& x); +zz_p random_zz_p(); +// x = random element in zz_p. Uses RandomBnd from ZZ. + +void VectorRandom(long k, zz_p *x); +// equivalent to random(x[i]) for i in [0..k), but fatser + + +/**************************************************************************\ + + Input/Output + +\**************************************************************************/ + + +ostream& operator<<(ostream& s, zz_p a); + +istream& operator>>(istream& s, zz_p& x); +// a ZZ is read and reduced mod p + +/**************************************************************************\ + + Modulus Switching + +A class zz_pPush is provided for "backing up" the current modulus +and installing a new one. + +Here is what you do to save the current modulus, temporarily +set it to p, and automatically restore it: + + { + zz_pPush push(p); + + ... + + } + +The constructor for push will save the current modulus, and install p as the +current modulus. The destructor for push will restore the old modulus when the +scope enclosing it exits. This is the so-called RAII (resource acquisition is +initialization) paradigm. + +You could also do the following: + + { + zz_pPush push; // just backup current modulus + + ... + + zz_p::init(p1); // install p1 + + ... + + zz_p::init(p2); // install p2 + + // reinstall original modulus as close of scope + } + + +The zz_pPush interface is good for implementing simple stack-like +modulus "context switching". For more general context switching, +see zz_pContext below. There is also an older zz_pBak class +that may also be useful. + +.......................................................................... + +It is critical that zz_p objects created under one zz_p modulus are not used in +any non-trivial way "out of context", i.e., under a different (or undefined) +zz_p modulus. However, for ease-of-use, some operations may be safely +performed out of context. These safe operations include: the default and copy +constructor, the destructor, and the assignment operator. In addition is is +generally safe to read any zz_p object out of context (i.e., printing it out, or +fetching its underlying representive using the rep() function). + +Any unsafe uses out of context are not in general checked, and may +lead to unpredictable behavior. + +\**************************************************************************/ + + +// A convenient interface for common cases: + +class zz_pPush { +public: + +zz_pPush(); // just backup current modulus + +explicit zz_pPush(long p, long maxroot=NTL_FFTMaxRoot); +zz_pPush(INIT_FFT_TYPE, long index); +zz_pPush(INIT_USER_FFT_TYPE, long p); +explicit zz_pPush(const zz_pContext& context); + // backup current modulus and install the given one + // see documentation for zz_p::init for more details + +private: +zz_pPush(const zz_pPush&); // disabled +void operator=(const zz_pPush&); // disabled + +}; + + + +// more general context switching: +// A zz_pContext object has a modulus q (possibly "null") + +class zz_pContext { + + +public: + +zz_pContext(); // q = "null" + +explicit zz_pContext(long p); +zz_pContext(INIT_FFT_TYPE, long index); +zz_pContext(INIT_USER_FFT_TYPE, long p); + // q = the given modulus + // see documentation for zz_p::init for more details + + +void save(); // q = CurrentModulus +void restore() const; // CurrentModulus = q + +zz_pContext(const zz_pContext&); // copy +zz_pContext& operator=(const zz_pContext&); // assignment +~zz_pContext(); // destructor + + +}; + + +/ An older interface: +// To describe this logic, think of a zz_pBak object +// of having two components: a modulus q (possibly "null") and +// an "auto-restore bit" b. + +class zz_pBak { +public: + + + zz_pBak(); // q = "null", b = 0 + + ~zz_pBak(); // if (b) CurrentModulus = q + + void save(); // q = CurrentModulus, b = 1 + void restore(); // CurrentModulus = q, b = 0 + + +private: + zz_pBak(const zz_pBak&); // copy disabled + void operator=(const zz_pBak&); // assignment disabled +}; + + + + + + + + +/**************************************************************************\ + + Miscellany + +\**************************************************************************/ + + +void clear(zz_p& x); // x = 0 +void set(zz_p& x); // x = 1 + +static mulmod_t zz_p::ModulusInverse(); +// zz_p::ModulusInverse() returns PrepMulMod(zz_p::modulus()) + +static zz_p zz_p::zero(); +// zz_p::zero() yields a read-only reference to zero + +void swap(zz_p& x, zz_p& y); +// swap x and y + +static void zz_p::init(long p, long maxroot); +// Same as ordinary zz_p::init(p), but somewhat more efficient. If you are +// going to perform arithmetic modulo a degree n polynomial, in which +// case set maxroot to NextPowerOfTwo(n)+1. This is useful, for +// example, if you are going to factor a polynomial of degree n modulo +// p, and you know n in advance. +// If maxroot is set too low, the program will abort with an +// appropriate error message. + +static void zz_p::FFTInit(long i); +// sets modulus to the i-th FFT prime (counting from 0). FFT primes +// are NTL_SP_NBITS-bit primes p, where p-1 is divisible by a high power +// of two. Thus, polynomial arithmetic mod p can be implemented +// particularly efficiently using the FFT. As i increases, the power +// of 2 that divides p-1 gets smaller, thus placing a more severe +// restriction on the degrees of the polynomials to be multiplied. + +static void zz_p::UserFFTInit(long p); +// set the modulus to a user-provided FFT prime p. To be useful, +// p-1 should be divisibly by a high power of 2. +// The function is a utility routine that may be used to +// calculate this value (see below). +// If you are going to perform arithmetic modulo a degree n polynomial, +// you will want CalcMaxRoot(p) >= NextPowerOfTwo(n)+1. + +zz_pContext::zz_pContext(long p, long maxroot); +// constructor for a zz_pContext with same semantics +// as zz_p::init(p, maxroot) above. + +zz_pContext::zz_pContext(INIT_FFT_TYPE, long i); +// constructor for a zz_pContext with same semantics +// as zz_p::FFTInit(i) above; invoke as zz_pContext(INIT_FFT, i). + +zz_pContext::zz_pContext(INIT_USER_FFT_TYPE, long p); +// constructor for a zz_pContext with same semantics +// as zz_p::UserFFTInit(p) above; invoke as zz_pContext(INIT_USER_FFT, p). + +zz_p::zz_p(INIT_NO_ALLOC_TYPE); +// provided for consistency with other classes, initialize to zero + +zz_p::zz_p(INIT_ALLOC_TYPE); +// provided for consistency with other classes, initialize to zero + +zz_p::allocate(); +// provided for consistency with other classes, no action + +long CalcMaxRoot(long p); +// p is assumed to be an odd prime. +// Returns the largest k such that 2^k divides p-1 +// and such that k does not exceed an implementation defined +// constant. This represents the max power of two for which +// an FFT mod p is supported. + +void VectorConv(long k, zz_p *x, const ZZ *a); +void VectorConv(long k, zz_p *x, const long *a); +// equivalent to conv(x[i], a[i]) for i in [0..k), but fatser + + + + + diff --git a/thirdparty/linux/ntl/doc/lzz_pE.cpp.html b/thirdparty/linux/ntl/doc/lzz_pE.cpp.html new file mode 100644 index 0000000000..6fa2a46dcd --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pE.cpp.html @@ -0,0 +1,400 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/lzz_pE.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: zz_pE
+
+SUMMARY:
+
+The class zz_pE is used to represent polynomials in Z_p[X] modulo a
+polynomial P.  The modulus P may be any polynomial with deg(P) > 0,
+not necessarily irreducible.  The modulus p defining Z_p need
+not be prime either.
+
+Objects of the class zz_pE are represented as a zz_pX of degree < deg(P).
+
+An executing program maintains a "current modulus", which is set to P
+using zz_pE::init(P).  The current modulus for zz_pE (as well as for zz_p)
+*must* be initialized before an operations on zz_pE's are performed.
+
+The modulus may be changed, and a mechanism is provided for saving and
+restoring a modulus (see classes zz_pEPush and zz_pEContext below).
+
+\**************************************************************************/
+
+#include <NTL/lzz_pX.h>
+
+class zz_pE {
+public:
+  
+   zz_pE(); // initial value 0
+
+   zz_pE(const zz_pE& a); // copy constructor
+   explicit zz_pE(const zz_p& a); // promotion
+   explicit zz_pE(long a); // promotion
+  
+   zz_pE& operator=(const zz_pE& a); // assignment
+   zz_pE& operator=(const zz_p& a); // assignment
+   zz_pE& operator=(long a); // assignment
+  
+   ~zz_pE(); // destructor
+
+   void init(const zz_pX& P);
+   // zz_pE::init(P) initializes the current modulus to P;
+   // required: deg(P) >= 1.
+  
+   static const zz_pXModulus& modulus();
+   // zz_pE::modulus() yields read-only reference to the current modulus
+
+   static long degree();
+   // zz_pE::degree() returns deg(P)
+
+   // typedefs to aid generic programming
+   typedef zz_pX rep_type;
+   typedef zz_pEContext context_type;
+   typedef zz_pEBak bak_type;
+   typedef zz_pEPush push_type;
+   typedef zz_pEX poly_type;
+
+};
+
+
+const zz_pX& rep(const zz_pE& a); // read-only access to representation of a
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+long operator==(const zz_pE& a, const zz_pE& b);
+long operator!=(const zz_pE& a, const zz_pE& b);
+
+long IsZero(const zz_pE& a);  // test for 0
+long IsOne(const zz_pE& a);  // test for 1
+
+// PROMOTIONS: ==, != promote {long, zz_p} to zz_pE on (a, b).
+
+
+/**************************************************************************\
+
+                                    Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pE operator+(const zz_pE& a, const zz_pE& b);
+
+zz_pE operator-(const zz_pE& a, const zz_pE& b);
+zz_pE operator-(const zz_pE& a);
+
+zz_pE& operator+=(zz_pE& x, const zz_pE& a);
+zz_pE& operator+=(zz_pE& x, const zz_p& a);
+zz_pE& operator+=(zz_pE& x, long a);
+
+zz_pE& operator++(zz_pE& x); // prefix
+void operator++(zz_pE& x, int); // postfix
+
+zz_pE& operator-=(zz_pE& x, const zz_pE& a);
+zz_pE& operator-=(zz_pE& x, const zz_p& a);
+zz_pE& operator-=(zz_pE& x, long a);
+
+zz_pE& operator--(zz_pE& x); // prefix
+void operator--(zz_pE& x, int); // postfix
+
+// procedural versions:
+
+void add(zz_pE& x, const zz_pE& a, const zz_pE& b); // x = a + b
+void sub(zz_pE& x, const zz_pE& a, const zz_pE& b); // x = a - b
+void negate(zz_pE& x, const zz_pE& a); // x = - a
+
+// PROMOTIONS: +, -, add, sub promote {long, zz_p} to zz_pE on (a, b).
+
+
+/**************************************************************************\
+
+                                  Multiplication
+
+\**************************************************************************/
+
+
+// operator notation:
+
+zz_pE operator*(const zz_pE& a, const zz_pE& b);
+
+zz_pE& operator*=(zz_pE& x, const zz_pE& a);
+zz_pE& operator*=(zz_pE& x, const zz_p& a);
+zz_pE& operator*=(zz_pE& x, long a);
+
+// procedural versions:
+
+
+void mul(zz_pE& x, const zz_pE& a, const zz_pE& b); // x = a * b
+
+void sqr(zz_pE& x, const zz_pE& a); // x = a^2
+zz_pE sqr(const zz_pE& a);
+
+// PROMOTIONS: *, mul promote {long, zz_p} to zz_pE on (a, b).
+
+
+
+/**************************************************************************\
+
+                                     Division
+
+\**************************************************************************/
+
+
+// operator notation:
+
+zz_pE operator/(const zz_pE& a, const zz_pE& b);
+
+zz_pE& operator/=(zz_pE& x, const zz_pE& a);
+zz_pE& operator/=(zz_pE& x, const zz_p& a);
+zz_pE& operator/=(zz_pE& x, long a);
+
+
+// procedural versions:
+
+void div(zz_pE& x, const zz_pE& a, const zz_pE& b);
+// x = a/b.  If b is not invertible, an error is raised.
+
+void inv(zz_pE& x, const zz_pE& a);
+zz_pE inv(const zz_pE& a);
+// x = 1/a
+
+PROMOTIONS: /, div promote {long, zz_p} to zz_pE on (a, b).
+
+
+/**************************************************************************\
+
+                                  Exponentiation
+
+\**************************************************************************/
+
+
+
+void power(zz_pE& x, const zz_pE& a, const ZZ& e);
+zz_pE power(const zz_pE& a, const ZZ& e);
+
+void power(zz_pE& x, const zz_pE& a, long e);
+zz_pE power(const zz_pE& a, long e);
+
+// x = a^e (e may be negative)
+
+
+
+/**************************************************************************\
+
+                               Random Elements
+
+\**************************************************************************/
+
+
+void random(zz_pE& x);
+zz_pE random_zz_pE();
+// x = random element in zz_pE.
+
+/**************************************************************************\
+
+                               Norms and Traces
+
+\**************************************************************************/
+
+
+
+void trace(zz_p& x, const zz_pE& a);  // x = trace of a
+zz_p trace(const zz_pE& a);
+
+void norm(zz_p& x, const zz_pE& a);   // x = norm of a
+zz_p norm(const zz_pE& a);
+
+
+
+/**************************************************************************\
+
+                                Input/Output
+
+\**************************************************************************/
+
+
+ostream& operator<<(ostream& s, const zz_pE& a);
+
+istream& operator>>(istream& s, zz_pE& x);
+// a zz_pX is read and reduced mod p
+
+
+/**************************************************************************\
+
+                       Modulus Switching
+
+A class zz_pEPush is provided for "backing up" the current modulus
+and installing a new one.
+
+Here is what you do to save the current modulus, temporarily
+set it to P, and automatically restore it:
+
+   {
+      zz_pEPush push(P);
+
+      ...
+
+   }
+
+The constructor for push will save the current modulus, and install P as the
+current modulus.  The destructor for push will restore the old modulus when the
+scope enclosing it exits.  This is the so-called RAII (resource acquisition is
+initialization) paradigm.
+
+You could also do the following:
+
+   {
+      zz_pEPush push; // just backup current modulus
+
+        ...
+
+      zz_pE::init(P1); // install P1
+
+        ...
+
+      zz_pE::init(P2); // install P2
+
+      // reinstall original modulus as close of scope
+   }
+
+      
+The zz_pEPush interface is good for implementing simple stack-like
+modulus "context switching".  For more general context switching,
+see zz_pEContext below.  There is also an older zz_pEBak class
+that may also be useful.
+
+..........................................................................
+
+It is critical that zz_pE objects created under one zz_pE modulus are not used in
+any non-trivial way "out of context", i.e., under a different (or undefined)
+zz_pE modulus.  However, for ease-of-use, some operations may be safely
+performed out of context.  These safe operations include: the default and copy
+constructor, the destructor, and the assignment operator.  In addition is is
+generally safe to read any zz_pE object out of context (i.e., printing it out, or
+fetching its underlying representive using the rep() function).
+
+Any unsafe uses out of context are not in general checked, and may
+lead to unpredictable behavior.
+
+
+\**************************************************************************/
+
+
+// A convenient interface for common cases
+
+class zz_pEPush {
+
+public:
+zz_pEPush();  // backup current modulus
+explicit zz_pEPush(const zz_pX& p);
+explicit zz_pEPush(const zz_pEContext& context);
+  // backup current modulus and install the given one
+
+private:
+zz_pEPush(const zz_pEPush&); // disabled
+void operator=(const zz_pEPush&); // disabled
+
+};
+
+
+
+// more general context switching:
+// A zz_pEContext object has a modulus Q (possibly "null"),
+
+class zz_pEContext {
+
+
+public:
+
+zz_pEContext(); // Q = "null"
+explicit zz_pEContext(const zz_pX& P); // Q = P
+
+void save(); // Q = CurrentModulus
+void restore() const; // CurrentModulus = Q
+
+zz_pEContext(const zz_pEContext&);  // copy
+zz_pEContext& operator=(const zz_pEContext&); // assignment
+~zz_pEContext(); // destructor
+
+
+};
+
+
+// An older interface:
+// To describe this logic, think of a zz_pEBak object
+// of having two components: a modulus Q (possibly "null") and
+// an "auto-restore bit" b.
+
+
+class zz_pEBak {
+public:
+
+
+   zz_pEBak();  // Q = "null", b = 0
+
+   ~zz_pEBak();  // if (b) CurrentModulus = Q
+
+   void save();  // Q = CurrentModulus, b = 1
+   void restore();  // CurrentModulus = Q, b = 0
+
+
+private:
+   zz_pEBak(const zz_pEBak&);  // copy disabled
+   void operator=(const zz_pEBak&);  // assignment disabled
+};
+
+
+
+
+
+
+/**************************************************************************\
+
+                               Miscellany
+
+\**************************************************************************/
+
+void clear(zz_pE& x); // x = 0
+void set(zz_pE& x); // x = 1
+
+static const zz_pE& zz_pE::zero();
+// zz_pE::zero() yields a read-only reference to zero
+
+void zz_pE::swap(zz_pE& x);
+void swap(zz_pE& x, zz_pE& y);
+// swap (done by "pointer swapping", if possible).
+
+static ZZ& zz_pE::cardinality();
+// yields the cardinality, i.e., p^{zz_pE::degree()}
+
+zz_pE::zz_pE(INIT_NO_ALLOC_TYPE);
+// special constructor: invoke as zz_pE x(INIT_NO_ALLOC);
+// initializes x to 0, but allocates no space (this is now the default)
+
+zz_pE::zz_pE(INIT_ALLOC_TYPE);
+// special constructor: invoke as zz_pE x(INIT_ALLOC);
+// initializes x to 0, but allocates space
+
+zz_pE::allocate();
+// useful in conjunction with the INIT_NO_ALLLOC constructor:
+// x.allocate() will pre-allocate space for x, using the
+// current modulus
+
+
+ diff --git a/thirdparty/linux/ntl/doc/lzz_pE.txt b/thirdparty/linux/ntl/doc/lzz_pE.txt new file mode 100644 index 0000000000..d232557116 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pE.txt @@ -0,0 +1,390 @@ + + +/**************************************************************************\ + +MODULE: zz_pE + +SUMMARY: + +The class zz_pE is used to represent polynomials in Z_p[X] modulo a +polynomial P. The modulus P may be any polynomial with deg(P) > 0, +not necessarily irreducible. The modulus p defining Z_p need +not be prime either. + +Objects of the class zz_pE are represented as a zz_pX of degree < deg(P). + +An executing program maintains a "current modulus", which is set to P +using zz_pE::init(P). The current modulus for zz_pE (as well as for zz_p) +*must* be initialized before an operations on zz_pE's are performed. + +The modulus may be changed, and a mechanism is provided for saving and +restoring a modulus (see classes zz_pEPush and zz_pEContext below). + +\**************************************************************************/ + +#include + +class zz_pE { +public: + + zz_pE(); // initial value 0 + + zz_pE(const zz_pE& a); // copy constructor + explicit zz_pE(const zz_p& a); // promotion + explicit zz_pE(long a); // promotion + + zz_pE& operator=(const zz_pE& a); // assignment + zz_pE& operator=(const zz_p& a); // assignment + zz_pE& operator=(long a); // assignment + + ~zz_pE(); // destructor + + void init(const zz_pX& P); + // zz_pE::init(P) initializes the current modulus to P; + // required: deg(P) >= 1. + + static const zz_pXModulus& modulus(); + // zz_pE::modulus() yields read-only reference to the current modulus + + static long degree(); + // zz_pE::degree() returns deg(P) + + // typedefs to aid generic programming + typedef zz_pX rep_type; + typedef zz_pEContext context_type; + typedef zz_pEBak bak_type; + typedef zz_pEPush push_type; + typedef zz_pEX poly_type; + +}; + + +const zz_pX& rep(const zz_pE& a); // read-only access to representation of a + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + +long operator==(const zz_pE& a, const zz_pE& b); +long operator!=(const zz_pE& a, const zz_pE& b); + +long IsZero(const zz_pE& a); // test for 0 +long IsOne(const zz_pE& a); // test for 1 + +// PROMOTIONS: ==, != promote {long, zz_p} to zz_pE on (a, b). + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +zz_pE operator+(const zz_pE& a, const zz_pE& b); + +zz_pE operator-(const zz_pE& a, const zz_pE& b); +zz_pE operator-(const zz_pE& a); + +zz_pE& operator+=(zz_pE& x, const zz_pE& a); +zz_pE& operator+=(zz_pE& x, const zz_p& a); +zz_pE& operator+=(zz_pE& x, long a); + +zz_pE& operator++(zz_pE& x); // prefix +void operator++(zz_pE& x, int); // postfix + +zz_pE& operator-=(zz_pE& x, const zz_pE& a); +zz_pE& operator-=(zz_pE& x, const zz_p& a); +zz_pE& operator-=(zz_pE& x, long a); + +zz_pE& operator--(zz_pE& x); // prefix +void operator--(zz_pE& x, int); // postfix + +// procedural versions: + +void add(zz_pE& x, const zz_pE& a, const zz_pE& b); // x = a + b +void sub(zz_pE& x, const zz_pE& a, const zz_pE& b); // x = a - b +void negate(zz_pE& x, const zz_pE& a); // x = - a + +// PROMOTIONS: +, -, add, sub promote {long, zz_p} to zz_pE on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + + +// operator notation: + +zz_pE operator*(const zz_pE& a, const zz_pE& b); + +zz_pE& operator*=(zz_pE& x, const zz_pE& a); +zz_pE& operator*=(zz_pE& x, const zz_p& a); +zz_pE& operator*=(zz_pE& x, long a); + +// procedural versions: + + +void mul(zz_pE& x, const zz_pE& a, const zz_pE& b); // x = a * b + +void sqr(zz_pE& x, const zz_pE& a); // x = a^2 +zz_pE sqr(const zz_pE& a); + +// PROMOTIONS: *, mul promote {long, zz_p} to zz_pE on (a, b). + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + + +// operator notation: + +zz_pE operator/(const zz_pE& a, const zz_pE& b); + +zz_pE& operator/=(zz_pE& x, const zz_pE& a); +zz_pE& operator/=(zz_pE& x, const zz_p& a); +zz_pE& operator/=(zz_pE& x, long a); + + +// procedural versions: + +void div(zz_pE& x, const zz_pE& a, const zz_pE& b); +// x = a/b. If b is not invertible, an error is raised. + +void inv(zz_pE& x, const zz_pE& a); +zz_pE inv(const zz_pE& a); +// x = 1/a + +PROMOTIONS: /, div promote {long, zz_p} to zz_pE on (a, b). + + +/**************************************************************************\ + + Exponentiation + +\**************************************************************************/ + + + +void power(zz_pE& x, const zz_pE& a, const ZZ& e); +zz_pE power(const zz_pE& a, const ZZ& e); + +void power(zz_pE& x, const zz_pE& a, long e); +zz_pE power(const zz_pE& a, long e); + +// x = a^e (e may be negative) + + + +/**************************************************************************\ + + Random Elements + +\**************************************************************************/ + + +void random(zz_pE& x); +zz_pE random_zz_pE(); +// x = random element in zz_pE. + +/**************************************************************************\ + + Norms and Traces + +\**************************************************************************/ + + + +void trace(zz_p& x, const zz_pE& a); // x = trace of a +zz_p trace(const zz_pE& a); + +void norm(zz_p& x, const zz_pE& a); // x = norm of a +zz_p norm(const zz_pE& a); + + + +/**************************************************************************\ + + Input/Output + +\**************************************************************************/ + + +ostream& operator<<(ostream& s, const zz_pE& a); + +istream& operator>>(istream& s, zz_pE& x); +// a zz_pX is read and reduced mod p + + +/**************************************************************************\ + + Modulus Switching + +A class zz_pEPush is provided for "backing up" the current modulus +and installing a new one. + +Here is what you do to save the current modulus, temporarily +set it to P, and automatically restore it: + + { + zz_pEPush push(P); + + ... + + } + +The constructor for push will save the current modulus, and install P as the +current modulus. The destructor for push will restore the old modulus when the +scope enclosing it exits. This is the so-called RAII (resource acquisition is +initialization) paradigm. + +You could also do the following: + + { + zz_pEPush push; // just backup current modulus + + ... + + zz_pE::init(P1); // install P1 + + ... + + zz_pE::init(P2); // install P2 + + // reinstall original modulus as close of scope + } + + +The zz_pEPush interface is good for implementing simple stack-like +modulus "context switching". For more general context switching, +see zz_pEContext below. There is also an older zz_pEBak class +that may also be useful. + +.......................................................................... + +It is critical that zz_pE objects created under one zz_pE modulus are not used in +any non-trivial way "out of context", i.e., under a different (or undefined) +zz_pE modulus. However, for ease-of-use, some operations may be safely +performed out of context. These safe operations include: the default and copy +constructor, the destructor, and the assignment operator. In addition is is +generally safe to read any zz_pE object out of context (i.e., printing it out, or +fetching its underlying representive using the rep() function). + +Any unsafe uses out of context are not in general checked, and may +lead to unpredictable behavior. + + +\**************************************************************************/ + + +// A convenient interface for common cases + +class zz_pEPush { + +public: +zz_pEPush(); // backup current modulus +explicit zz_pEPush(const zz_pX& p); +explicit zz_pEPush(const zz_pEContext& context); + // backup current modulus and install the given one + +private: +zz_pEPush(const zz_pEPush&); // disabled +void operator=(const zz_pEPush&); // disabled + +}; + + + +// more general context switching: +// A zz_pEContext object has a modulus Q (possibly "null"), + +class zz_pEContext { + + +public: + +zz_pEContext(); // Q = "null" +explicit zz_pEContext(const zz_pX& P); // Q = P + +void save(); // Q = CurrentModulus +void restore() const; // CurrentModulus = Q + +zz_pEContext(const zz_pEContext&); // copy +zz_pEContext& operator=(const zz_pEContext&); // assignment +~zz_pEContext(); // destructor + + +}; + + +// An older interface: +// To describe this logic, think of a zz_pEBak object +// of having two components: a modulus Q (possibly "null") and +// an "auto-restore bit" b. + + +class zz_pEBak { +public: + + + zz_pEBak(); // Q = "null", b = 0 + + ~zz_pEBak(); // if (b) CurrentModulus = Q + + void save(); // Q = CurrentModulus, b = 1 + void restore(); // CurrentModulus = Q, b = 0 + + +private: + zz_pEBak(const zz_pEBak&); // copy disabled + void operator=(const zz_pEBak&); // assignment disabled +}; + + + + + + +/**************************************************************************\ + + Miscellany + +\**************************************************************************/ + +void clear(zz_pE& x); // x = 0 +void set(zz_pE& x); // x = 1 + +static const zz_pE& zz_pE::zero(); +// zz_pE::zero() yields a read-only reference to zero + +void zz_pE::swap(zz_pE& x); +void swap(zz_pE& x, zz_pE& y); +// swap (done by "pointer swapping", if possible). + +static ZZ& zz_pE::cardinality(); +// yields the cardinality, i.e., p^{zz_pE::degree()} + +zz_pE::zz_pE(INIT_NO_ALLOC_TYPE); +// special constructor: invoke as zz_pE x(INIT_NO_ALLOC); +// initializes x to 0, but allocates no space (this is now the default) + +zz_pE::zz_pE(INIT_ALLOC_TYPE); +// special constructor: invoke as zz_pE x(INIT_ALLOC); +// initializes x to 0, but allocates space + +zz_pE::allocate(); +// useful in conjunction with the INIT_NO_ALLLOC constructor: +// x.allocate() will pre-allocate space for x, using the +// current modulus + diff --git a/thirdparty/linux/ntl/doc/lzz_pEX.cpp.html b/thirdparty/linux/ntl/doc/lzz_pEX.cpp.html new file mode 100644 index 0000000000..49ed94f2a0 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pEX.cpp.html @@ -0,0 +1,894 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/lzz_pEX.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: zz_pEX
+
+SUMMARY:
+
+The class zz_pEX represents polynomials over zz_pE,
+and so can be used, for example, for arithmentic in GF(p^n)[X].
+However, except where mathematically necessary (e.g., GCD computations),
+zz_pE need not be a field.
+
+\**************************************************************************/
+
+#include <NTL/lzz_pE.h>
+#include <NTL/vec_lzz_pE.h>
+
+class zz_pEX {
+public:
+
+   zz_pEX(); // initial value 0
+
+   zz_pEX(const zz_pEX& a); // copy
+   zz_pEX(const zz_pE& a); // promotion
+   zz_pEX(const zz_p& a);
+   zz_pEX(long a);
+
+   zz_pEX& operator=(const zz_pEX& a); // assignment
+   zz_pEX& operator=(const zz_pE& a);
+   zz_pEX& operator=(const zz_p& a);
+   zz_pEX& operator=(long a);
+
+   ~zz_pEX(); // destructor
+
+   zz_pEX(INIT_MONO_TYPE, long i, const zz_pE& c);
+   zz_pEX(INIT_MONO_TYPE, long i, const zz_p& c);
+   zz_pEX(INIT_MONO_TYPE, long i, long c);
+   // initilaize to c*X^i; invoke as zz_pEX(INIT_MONO, i, c)
+
+   zz_pEX(INIT_MONO_TYPE, long i);
+   // initilaize to X^i; invoke as zz_pEX(INIT_MONO, i)
+
+   // typedefs to aid in generic programming
+   typedef zz_pE coeff_type;
+   typedef zz_pEXModulus modulus_type;
+
+   // ...
+
+  
+};
+
+
+
+/**************************************************************************\
+
+                              Accessing coefficients
+
+The degree of a polynomial f is obtained as deg(f),
+where the zero polynomial, by definition, has degree -1.
+
+A polynomial f is represented as a coefficient vector.
+Coefficients may be accesses in one of two ways.
+
+The safe, high-level method is to call the function
+coeff(f, i) to get the coefficient of X^i in the polynomial f,
+and to call the function SetCoeff(f, i, a) to set the coefficient
+of X^i in f to the scalar a.
+
+One can also access the coefficients more directly via a lower level
+interface.  The coefficient of X^i in f may be accessed using
+subscript notation f[i].  In addition, one may write f.SetLength(n)
+to set the length of the underlying coefficient vector to n,
+and f.SetMaxLength(n) to allocate space for n coefficients,
+without changing the coefficient vector itself.
+
+After setting coefficients using this low-level interface,
+one must ensure that leading zeros in the coefficient vector
+are stripped afterwards by calling the function f.normalize().
+
+NOTE: the coefficient vector of f may also be accessed directly
+as f.rep; however, this is not recommended. Also, for a properly
+normalized polynomial f, we have f.rep.length() == deg(f)+1,
+and deg(f) >= 0  =>  f.rep[deg(f)] != 0.
+
+\**************************************************************************/
+
+
+
+long deg(const zz_pEX& a);  // return deg(a); deg(0) == -1.
+
+const zz_pE& coeff(const zz_pEX& a, long i);
+// returns the coefficient of X^i, or zero if i not in range
+
+const zz_pE& LeadCoeff(const zz_pEX& a);
+// returns leading term of a, or zero if a == 0
+
+const zz_pE& ConstTerm(const zz_pEX& a);
+// returns constant term of a, or zero if a == 0
+
+void SetCoeff(zz_pEX& x, long i, const zz_pE& a);
+void SetCoeff(zz_pEX& x, long i, const zz_p& a);
+void SetCoeff(zz_pEX& x, long i, long a);
+// makes coefficient of X^i equal to a; error is raised if i < 0
+
+void SetCoeff(zz_pEX& x, long i);
+// makes coefficient of X^i equal to 1;  error is raised if i < 0
+
+void SetX(zz_pEX& x); // x is set to the monomial X
+
+long IsX(const zz_pEX& a); // test if x = X
+
+
+
+
+zz_pE& zz_pEX::operator[](long i);
+const zz_pE& zz_pEX::operator[](long i) const;
+// indexing operators: f[i] is the coefficient of X^i ---
+// i should satsify i >= 0 and i <= deg(f).
+// No range checking (unless NTL_RANGE_CHECK is defined).
+
+void zz_pEX::SetLength(long n);
+// f.SetLength(n) sets the length of the inderlying coefficient
+// vector to n --- after this call, indexing f[i] for i = 0..n-1
+// is valid.
+
+void zz_pEX::normalize();  
+// f.normalize() strips leading zeros from coefficient vector of f
+
+void zz_pEX::SetMaxLength(long n);
+// f.SetMaxLength(n) pre-allocate spaces for n coefficients.  The
+// polynomial that f represents is unchanged.
+
+
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(const zz_pEX& a, const zz_pEX& b);
+long operator!=(const zz_pEX& a, const zz_pEX& b);
+
+long IsZero(const zz_pEX& a); // test for 0
+long IsOne(const zz_pEX& a); // test for 1
+
+// PROMOTIONS: ==, != promote {long,zz_p,zz_pE} to zz_pEX on (a, b).
+
+/**************************************************************************\
+
+                                   Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pEX operator+(const zz_pEX& a, const zz_pEX& b);
+zz_pEX operator-(const zz_pEX& a, const zz_pEX& b);
+zz_pEX operator-(const zz_pEX& a);
+
+zz_pEX& operator+=(zz_pEX& x, const zz_pEX& a);
+zz_pEX& operator+=(zz_pEX& x, const zz_pE& a);
+zz_pEX& operator+=(zz_pEX& x, const zz_p& a);
+zz_pEX& operator+=(zz_pEX& x, long a);
+
+
+zz_pEX& operator++(zz_pEX& x);  // prefix
+void operator++(zz_pEX& x, int);  // postfix
+
+zz_pEX& operator-=(zz_pEX& x, const zz_pEX& a);
+zz_pEX& operator-=(zz_pEX& x, const zz_pE& a);
+zz_pEX& operator-=(zz_pEX& x, const zz_p& a);
+zz_pEX& operator-=(zz_pEX& x, long a);
+
+zz_pEX& operator--(zz_pEX& x);  // prefix
+void operator--(zz_pEX& x, int);  // postfix
+
+// procedural versions:
+
+void add(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); // x = a + b
+void sub(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); // x = a - b
+void negate(zz_pEX& x, const zz_pEX& a); // x = - a
+
+// PROMOTIONS: +, -, add, sub promote {long,zz_p,zz_pE} to zz_pEX on (a, b).
+
+
+
+/**************************************************************************\
+
+                               Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pEX operator*(const zz_pEX& a, const zz_pEX& b);
+
+zz_pEX& operator*=(zz_pEX& x, const zz_pEX& a);
+zz_pEX& operator*=(zz_pEX& x, const zz_pE& a);
+zz_pEX& operator*=(zz_pEX& x, const zz_p& a);
+zz_pEX& operator*=(zz_pEX& x, long a);
+
+
+// procedural versions:
+
+
+void mul(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); // x = a * b
+
+void sqr(zz_pEX& x, const zz_pEX& a); // x = a^2
+zz_pEX sqr(const zz_pEX& a);
+
+// PROMOTIONS: *, mul promote {long,zz_p,zz_pE} to zz_pEX on (a, b).
+
+void power(zz_pEX& x, const zz_pEX& a, long e);  // x = a^e (e >= 0)
+zz_pEX power(const zz_pEX& a, long e);
+
+
+/**************************************************************************\
+
+                               Shift Operations
+
+LeftShift by n means multiplication by X^n
+RightShift by n means division by X^n
+
+A negative shift amount reverses the direction of the shift.
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pEX operator<<(const zz_pEX& a, long n);
+zz_pEX operator>>(const zz_pEX& a, long n);
+
+zz_pEX& operator<<=(zz_pEX& x, long n);
+zz_pEX& operator>>=(zz_pEX& x, long n);
+
+// procedural versions:
+
+void LeftShift(zz_pEX& x, const zz_pEX& a, long n);
+zz_pEX LeftShift(const zz_pEX& a, long n);
+
+void RightShift(zz_pEX& x, const zz_pEX& a, long n);
+zz_pEX RightShift(const zz_pEX& a, long n);
+
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pEX operator/(const zz_pEX& a, const zz_pEX& b);
+zz_pEX operator/(const zz_pEX& a, const zz_pE& b);
+zz_pEX operator/(const zz_pEX& a, const zz_p& b);
+zz_pEX operator/(const zz_pEX& a, long b);
+
+zz_pEX operator%(const zz_pEX& a, const zz_pEX& b);
+
+zz_pEX& operator/=(zz_pEX& x, const zz_pEX& a);
+zz_pEX& operator/=(zz_pEX& x, const zz_pE& a);
+zz_pEX& operator/=(zz_pEX& x, const zz_p& a);
+zz_pEX& operator/=(zz_pEX& x, long a);
+
+zz_pEX& operator%=(zz_pEX& x, const zz_pEX& a);
+
+// procedural versions:
+
+
+void DivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEX& b);
+// q = a/b, r = a%b
+
+void div(zz_pEX& q, const zz_pEX& a, const zz_pEX& b);
+void div(zz_pEX& q, const zz_pEX& a, const zz_pE& b);
+void div(zz_pEX& q, const zz_pEX& a, const zz_p& b);
+void div(zz_pEX& q, const zz_pEX& a, long b);
+// q = a/b
+
+void rem(zz_pEX& r, const zz_pEX& a, const zz_pEX& b);
+// r = a%b
+
+long divide(zz_pEX& q, const zz_pEX& a, const zz_pEX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+long divide(const zz_pEX& a, const zz_pEX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+
+/**************************************************************************\
+
+                                   GCD's
+
+These routines are intended for use when zz_pE is a field.
+
+\**************************************************************************/
+
+
+void GCD(zz_pEX& x, const zz_pEX& a, const zz_pEX& b);
+zz_pEX GCD(const zz_pEX& a, const zz_pEX& b);
+// x = GCD(a, b),  x is always monic (or zero if a==b==0).
+
+
+void XGCD(zz_pEX& d, zz_pEX& s, zz_pEX& t, const zz_pEX& a, const zz_pEX& b);
+// d = gcd(a,b), a s + b t = d
+
+
+/**************************************************************************\
+
+                                  Input/Output
+
+I/O format:
+
+   [a_0 a_1 ... a_n],
+
+represents the polynomial a_0 + a_1*X + ... + a_n*X^n.
+
+On output, all coefficients will be polynomials of degree < zz_pE::degree() and
+a_n not zero (the zero polynomial is [ ]).  On input, the coefficients
+are arbitrary polynomials which are reduced modulo zz_pE::modulus(),
+and leading zeros stripped.
+
+\**************************************************************************/
+
+istream& operator>>(istream& s, zz_pEX& x);
+ostream& operator<<(ostream& s, const zz_pEX& a);
+
+
+/**************************************************************************\
+
+                              Some utility routines
+
+\**************************************************************************/
+
+
+void diff(zz_pEX& x, const zz_pEX& a); // x = derivative of a
+zz_pEX diff(const zz_pEX& a);
+
+void MakeMonic(zz_pEX& x);
+// if x != 0 makes x into its monic associate; LeadCoeff(x) must be
+// invertible in this case
+
+void reverse(zz_pEX& x, const zz_pEX& a, long hi);
+zz_pEX reverse(const zz_pEX& a, long hi);
+
+void reverse(zz_pEX& x, const zz_pEX& a);
+zz_pEX reverse(const zz_pEX& a);
+
+// x = reverse of a[0]..a[hi] (hi >= -1);
+// hi defaults to deg(a) in second version
+
+void VectorCopy(vec_zz_pE& x, const zz_pEX& a, long n);
+vec_zz_pE VectorCopy(const zz_pEX& a, long n);
+// x = copy of coefficient vector of a of length exactly n.
+// input is truncated or padded with zeroes as appropriate.
+
+
+
+
+/**************************************************************************\
+
+                             Random Polynomials
+
+\**************************************************************************/
+
+void random(zz_pEX& x, long n);
+zz_pEX random_zz_pEX(long n);
+// x = random polynomial of degree < n
+
+
+/**************************************************************************\
+
+                    Polynomial Evaluation and related problems
+
+\**************************************************************************/
+
+
+void BuildFromRoots(zz_pEX& x, const vec_zz_pE& a);
+zz_pEX BuildFromRoots(const vec_zz_pE& a);
+// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length()
+
+void eval(zz_pE& b, const zz_pEX& f, const zz_pE& a);
+zz_pE eval(const zz_pEX& f, const zz_pE& a);
+// b = f(a)
+
+void eval(zz_pE& b, const zz_pX& f, const zz_pE& a);
+zz_pE eval(const zz_pEX& f, const zz_pE& a);
+// b = f(a); uses ModComp algorithm for zz_pX
+
+void eval(vec_zz_pE& b, const zz_pEX& f, const vec_zz_pE& a);
+vec_zz_pE eval(const zz_pEX& f, const vec_zz_pE& a);
+//  b.SetLength(a.length()); b[i] = f(a[i]) for 0 <= i < a.length()
+
+void interpolate(zz_pEX& f, const vec_zz_pE& a, const vec_zz_pE& b);
+zz_pEX interpolate(const vec_zz_pE& a, const vec_zz_pE& b);
+// interpolates the polynomial f satisfying f(a[i]) = b[i].  
+
+/**************************************************************************\
+
+                       Arithmetic mod X^n
+
+Required: n >= 0; otherwise, an error is raised.
+
+\**************************************************************************/
+
+void trunc(zz_pEX& x, const zz_pEX& a, long n); // x = a % X^n
+zz_pEX trunc(const zz_pEX& a, long n);
+
+void MulTrunc(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, long n);
+zz_pEX MulTrunc(const zz_pEX& a, const zz_pEX& b, long n);
+// x = a * b % X^n
+
+void SqrTrunc(zz_pEX& x, const zz_pEX& a, long n);
+zz_pEX SqrTrunc(const zz_pEX& a, long n);
+// x = a^2 % X^n
+
+void InvTrunc(zz_pEX& x, const zz_pEX& a, long n);
+zz_pEX InvTrunc(zz_pEX& x, const zz_pEX& a, long n);
+// computes x = a^{-1} % X^m.  Must have ConstTerm(a) invertible.
+
+/**************************************************************************\
+
+                Modular Arithmetic (without pre-conditioning)
+
+Arithmetic mod f.
+
+All inputs and outputs are polynomials of degree less than deg(f), and
+deg(f) > 0.
+
+
+NOTE: if you want to do many computations with a fixed f, use the
+zz_pEXModulus data structure and associated routines below for better
+performance.
+
+\**************************************************************************/
+
+void MulMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, const zz_pEX& f);
+zz_pEX MulMod(const zz_pEX& a, const zz_pEX& b, const zz_pEX& f);
+// x = (a * b) % f
+
+void SqrMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f);
+zz_pEX SqrMod(const zz_pEX& a, const zz_pEX& f);
+// x = a^2 % f
+
+void MulByXMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f);
+zz_pEX MulByXMod(const zz_pEX& a, const zz_pEX& f);
+// x = (a * X) mod f
+
+void InvMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f);
+zz_pEX InvMod(const zz_pEX& a, const zz_pEX& f);
+// x = a^{-1} % f, error is a is not invertible
+
+long InvModStatus(zz_pEX& x, const zz_pEX& a, const zz_pEX& f);
+// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise,
+// returns 1 and sets x = (a, f)
+
+
+/**************************************************************************\
+
+                     Modular Arithmetic with Pre-Conditioning
+
+If you need to do a lot of arithmetic modulo a fixed f, build
+zz_pEXModulus F for f.  This pre-computes information about f that
+speeds up subsequent computations.
+
+As an example, the following routine the product modulo f of a vector
+of polynomials.
+
+#include <NTL/lzz_pEX.h>
+
+void product(zz_pEX& x, const vec_zz_pEX& v, const zz_pEX& f)
+{
+   zz_pEXModulus F(f);
+   zz_pEX res;
+   res = 1;
+   long i;
+   for (i = 0; i < v.length(); i++)
+      MulMod(res, res, v[i], F);
+   x = res;
+}
+
+NOTE: A zz_pEX may be used wherever a zz_pEXModulus is required,
+and a zz_pEXModulus may be used wherever a zz_pEX is required.
+
+
+\**************************************************************************/
+
+class zz_pEXModulus {
+public:
+   zz_pEXModulus(); // initially in an unusable state
+
+   zz_pEXModulus(const zz_pEX& f); // initialize with f, deg(f) > 0
+
+   zz_pEXModulus(const zz_pEXModulus&); // copy
+
+   zz_pEXModulus& operator=(const zz_pEXModulus&); // assignment
+
+   ~zz_pEXModulus(); // destructor
+
+   operator const zz_pEX& () const; // implicit read-only access to f
+
+   const zz_pEX& val() const; // explicit read-only access to f
+};
+
+void build(zz_pEXModulus& F, const zz_pEX& f);
+// pre-computes information about f and stores it in F.  Must have
+// deg(f) > 0.  Note that the declaration zz_pEXModulus F(f) is
+// equivalent to zz_pEXModulus F; build(F, f).
+
+// In the following, f refers to the polynomial f supplied to the
+// build routine, and n = deg(f).
+
+
+long deg(const zz_pEXModulus& F);  // return n=deg(f)
+
+void MulMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& b,
+            const zz_pEXModulus& F);
+zz_pEX MulMod(const zz_pEX& a, const zz_pEX& b, const zz_pEXModulus& F);
+// x = (a * b) % f; deg(a), deg(b) < n
+
+void SqrMod(zz_pEX& x, const zz_pEX& a, const zz_pEXModulus& F);
+zz_pEX SqrMod(const zz_pEX& a, const zz_pEXModulus& F);
+// x = a^2 % f; deg(a) < n
+
+void PowerMod(zz_pEX& x, const zz_pEX& a, const ZZ& e, const zz_pEXModulus& F);
+zz_pEX PowerMod(const zz_pEX& a, const ZZ& e, const zz_pEXModulus& F);
+
+void PowerMod(zz_pEX& x, const zz_pEX& a, long e, const zz_pEXModulus& F);
+zz_pEX PowerMod(const zz_pEX& a, long e, const zz_pEXModulus& F);
+
+// x = a^e % f; e >= 0, deg(a) < n.  Uses a sliding window algorithm.
+// (e may be negative)
+
+void PowerXMod(zz_pEX& x, const ZZ& e, const zz_pEXModulus& F);
+zz_pEX PowerXMod(const ZZ& e, const zz_pEXModulus& F);
+
+void PowerXMod(zz_pEX& x, long e, const zz_pEXModulus& F);
+zz_pEX PowerXMod(long e, const zz_pEXModulus& F);
+
+// x = X^e % f (e may be negative)
+
+void rem(zz_pEX& x, const zz_pEX& a, const zz_pEXModulus& F);
+// x = a % f
+
+void DivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEXModulus& F);
+// q = a/f, r = a%f
+
+void div(zz_pEX& q, const zz_pEX& a, const zz_pEXModulus& F);
+// q = a/f
+
+// operator notation:
+
+zz_pEX operator/(const zz_pEX& a, const zz_pEXModulus& F);
+zz_pEX operator%(const zz_pEX& a, const zz_pEXModulus& F);
+
+zz_pEX& operator/=(zz_pEX& x, const zz_pEXModulus& F);
+zz_pEX& operator%=(zz_pEX& x, const zz_pEXModulus& F);
+
+
+
+/**************************************************************************\
+
+                             vectors of zz_pEX's
+
+\**************************************************************************/
+
+
+typedef Vec<zz_pEX> vec_zz_pEX; // backward compatibility
+
+
+
+/**************************************************************************\
+
+                              Modular Composition
+
+Modular composition is the problem of computing g(h) mod f for
+polynomials f, g, and h.
+
+The algorithm employed is that of Brent & Kung (Fast algorithms for
+manipulating formal power series, JACM 25:581-595, 1978), which uses
+O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar
+operations.
+
+
+\**************************************************************************/
+
+void CompMod(zz_pEX& x, const zz_pEX& g, const zz_pEX& h,
+             const zz_pEXModulus& F);
+zz_pEX CompMod(const zz_pEX& g, const zz_pEX& h,
+                    const zz_pEXModulus& F);
+
+// x = g(h) mod f; deg(h) < n
+
+void Comp2Mod(zz_pEX& x1, zz_pEX& x2, const zz_pEX& g1, const zz_pEX& g2,
+              const zz_pEX& h, const zz_pEXModulus& F);
+// xi = gi(h) mod f (i=1,2); deg(h) < n.
+
+
+void Comp3Mod(zz_pEX& x1, zz_pEX& x2, zz_pEX& x3,
+              const zz_pEX& g1, const zz_pEX& g2, const zz_pEX& g3,
+              const zz_pEX& h, const zz_pEXModulus& F);
+// xi = gi(h) mod f (i=1..3); deg(h) < n.
+
+
+
+/**************************************************************************\
+
+                     Composition with Pre-Conditioning
+
+If a single h is going to be used with many g's then you should build
+a zz_pEXArgument for h, and then use the compose routine below.  The
+routine build computes and stores h, h^2, ..., h^m mod f.  After this
+pre-computation, composing a polynomial of degree roughly n with h
+takes n/m multiplies mod f, plus n^2 scalar multiplies.  Thus,
+increasing m increases the space requirement and the pre-computation
+time, but reduces the composition time.
+
+\**************************************************************************/
+
+
+struct zz_pEXArgument {
+   vec_zz_pEX H;
+};
+
+void build(zz_pEXArgument& H, const zz_pEX& h, const zz_pEXModulus& F, long m);
+// Pre-Computes information about h.  m > 0, deg(h) < n.
+
+void CompMod(zz_pEX& x, const zz_pEX& g, const zz_pEXArgument& H,
+             const zz_pEXModulus& F);
+
+zz_pEX CompMod(const zz_pEX& g, const zz_pEXArgument& H,
+                    const zz_pEXModulus& F);
+
+extern long zz_pEXArgBound;
+
+// Initially 0.  If this is set to a value greater than zero, then
+// composition routines will allocate a table of no than about
+// zz_pEXArgBound KB.  Setting this value affects all compose routines
+// and the power projection and minimal polynomial routines below,
+// and indirectly affects many routines in zz_pEXFactoring.
+
+/**************************************************************************\
+
+                     power projection routines
+
+\**************************************************************************/
+
+void project(zz_pE& x, const zz_pEVector& a, const zz_pEX& b);
+zz_pE project(const zz_pEVector& a, const zz_pEX& b);
+// x = inner product of a with coefficient vector of b
+
+
+void ProjectPowers(vec_zz_pE& x, const vec_zz_pE& a, long k,
+                   const zz_pEX& h, const zz_pEXModulus& F);
+
+vec_zz_pE ProjectPowers(const vec_zz_pE& a, long k,
+                   const zz_pEX& h, const zz_pEXModulus& F);
+
+// Computes the vector
+
+//    project(a, 1), project(a, h), ..., project(a, h^{k-1} % f).  
+
+// This operation is the "transpose" of the modular composition operation.
+
+void ProjectPowers(vec_zz_pE& x, const vec_zz_pE& a, long k,
+                   const zz_pEXArgument& H, const zz_pEXModulus& F);
+
+vec_zz_pE ProjectPowers(const vec_zz_pE& a, long k,
+                   const zz_pEXArgument& H, const zz_pEXModulus& F);
+
+// same as above, but uses a pre-computed zz_pEXArgument
+
+
+class zz_pEXTransMultiplier { /* ... */ };
+
+void build(zz_pEXTransMultiplier& B, const zz_pEX& b, const zz_pEXModulus& F);
+
+void UpdateMap(vec_zz_pE& x, const vec_zz_pE& a,
+               const zz_pEXMultiplier& B, const zz_pEXModulus& F);
+
+vec_zz_pE UpdateMap(const vec_zz_pE& a,
+               const zz_pEXMultiplier& B, const zz_pEXModulus& F);
+
+// Computes the vector
+
+//    project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f)
+
+// Required: a.length() <= deg(F), deg(b) < deg(F).
+// This is "transposed" MulMod by B.
+// Input may have "high order" zeroes stripped.
+// Output always has high order zeroes stripped.
+
+
+/**************************************************************************\
+
+                              Minimum Polynomials
+
+These routines should be used only when zz_pE is a field.
+
+All of these routines implement the algorithm from [Shoup, J. Symbolic
+Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397,
+1995], based on transposed modular composition and the
+Berlekamp/Massey algorithm.
+
+\**************************************************************************/
+
+
+void MinPolySeq(zz_pEX& h, const vec_zz_pE& a, long m);
+zz_pEX MinPolySeq(const vec_zz_pE& a, long m);
+// computes the minimum polynomial of a linealy generated sequence; m
+// is a bound on the degree of the polynomial; required: a.length() >=
+// 2*m
+
+
+void ProbMinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m);
+zz_pEX ProbMinPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m);
+
+void ProbMinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F);
+zz_pEX ProbMinPolyMod(const zz_pEX& g, const zz_pEXModulus& F);
+
+// computes the monic minimal polynomial if (g mod f).  m = a bound on
+// the degree of the minimal polynomial; in the second version, this
+// argument defaults to n.  The algorithm is probabilistic, always
+// returns a divisor of the minimal polynomial, and returns a proper
+// divisor with probability at most m/2^{zz_pE::degree()}.
+
+void MinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m);
+zz_pEX MinPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m);
+
+void MinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F);
+zz_pEX MinPolyMod(const zz_pEX& g, const zz_pEXModulus& F);
+
+// same as above, but guarantees that result is correct
+
+void IrredPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m);
+zz_pEX IrredPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m);
+
+void IrredPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F);
+zz_pEX IrredPolyMod(const zz_pEX& g, const zz_pEXModulus& F);
+
+// same as above, but assumes that f is irreducible, or at least that
+// the minimal poly of g is itself irreducible.  The algorithm is
+// deterministic (and is always correct).
+
+/**************************************************************************\
+
+           Composition and Minimal Polynomials in towers
+
+These are implementations of algorithms that will be described
+and analyzed in a forthcoming paper.
+
+The routines require that p is prime, but zz_pE need not be a field.
+
+\**************************************************************************/
+
+
+void CompTower(zz_pEX& x, const zz_pX& g, const zz_pEXArgument& h,
+             const zz_pEXModulus& F);
+
+zz_pEX CompTower(const zz_pX& g, const zz_pEXArgument& h,
+             const zz_pEXModulus& F);
+
+void CompTower(zz_pEX& x, const zz_pX& g, const zz_pEX& h,
+             const zz_pEXModulus& F);
+
+zz_pEX CompTower(const zz_pX& g, const zz_pEX& h,
+             const zz_pEXModulus& F);
+
+
+// x = g(h) mod f
+
+
+void ProbMinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F,
+                      long m);
+
+zz_pX ProbMinPolyTower(const zz_pEX& g, const zz_pEXModulus& F, long m);
+
+void ProbMinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F);
+
+zz_pX ProbMinPolyTower(const zz_pEX& g, const zz_pEXModulus& F);
+
+// Uses a probabilistic algorithm to compute the minimal
+// polynomial of (g mod f) over zz_p.
+// The parameter m is a bound on the degree of the minimal polynomial
+// (default = deg(f)*zz_pE::degree()).
+// In general, the result will be a divisor of the true minimimal
+// polynomial.  For correct results, use the MinPoly routines below.
+
+
+
+void MinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, long m);
+
+zz_pX MinPolyTower(const zz_pEX& g, const zz_pEXModulus& F, long m);
+
+void MinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F);
+
+zz_pX MinPolyTower(const zz_pEX& g, const zz_pEXModulus& F);
+
+// Same as above, but result is always correct.
+
+
+void IrredPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, long m);
+
+zz_pX IrredPolyTower(const zz_pEX& g, const zz_pEXModulus& F, long m);
+
+void IrredPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F);
+
+zz_pX IrredPolyTower(const zz_pEX& g, const zz_pEXModulus& F);
+
+// Same as above, but assumes the minimal polynomial is
+// irreducible, and uses a slightly faster, deterministic algorithm.
+
+
+/**************************************************************************\
+
+                   Traces, norms, resultants
+
+\**************************************************************************/
+
+
+void TraceMod(zz_pE& x, const zz_pEX& a, const zz_pEXModulus& F);
+zz_pE TraceMod(const zz_pEX& a, const zz_pEXModulus& F);
+
+void TraceMod(zz_pE& x, const zz_pEX& a, const zz_pEX& f);
+zz_pE TraceMod(const zz_pEX& a, const zz_pEXModulus& f);
+// x = Trace(a mod f); deg(a) < deg(f)
+
+
+void TraceVec(vec_zz_pE& S, const zz_pEX& f);
+vec_zz_pE TraceVec(const zz_pEX& f);
+// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f)
+
+// The above trace routines implement the asymptotically fast trace
+// algorithm from [von zur Gathen and Shoup, Computational Complexity,
+// 1992].
+
+void NormMod(zz_pE& x, const zz_pEX& a, const zz_pEX& f);
+zz_pE NormMod(const zz_pEX& a, const zz_pEX& f);
+// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f)
+
+void resultant(zz_pE& x, const zz_pEX& a, const zz_pEX& b);
+zz_pE resultant(const zz_pEX& a, const zz_pEX& b);
+// x = resultant(a, b)
+
+// NormMod and resultant require that zz_pE is a field.
+
+
+
+
+/**************************************************************************\
+
+                           Miscellany
+
+
+\**************************************************************************/
+
+
+void clear(zz_pEX& x) // x = 0
+void set(zz_pEX& x); // x = 1
+
+void zz_pEX::kill();
+// f.kill() sets f to 0 and frees all memory held by f.  Equivalent to
+// f.rep.kill().
+
+zz_pEX::zz_pEX(INIT_SIZE_TYPE, long n);
+// zz_pEX(INIT_SIZE, n) initializes to zero, but space is pre-allocated
+// for n coefficients
+
+static const zz_pEX& zero();
+// zz_pEX::zero() is a read-only reference to 0
+
+void zz_pEX::swap(zz_pEX& x);
+void swap(zz_pEX& x, zz_pEX& y);
+// swap (via "pointer swapping")
+
+
+zz_pEX::zz_pEX(long i, const zz_pE& c);
+zz_pEX::zz_pEX(long i, const zz_p& c);
+zz_pEX::zz_pEX(long i, long c);
+// initilaize to c*X^i; provided for backward compatibility
+
+ diff --git a/thirdparty/linux/ntl/doc/lzz_pEX.txt b/thirdparty/linux/ntl/doc/lzz_pEX.txt new file mode 100644 index 0000000000..8467bc83c5 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pEX.txt @@ -0,0 +1,884 @@ + +/**************************************************************************\ + +MODULE: zz_pEX + +SUMMARY: + +The class zz_pEX represents polynomials over zz_pE, +and so can be used, for example, for arithmentic in GF(p^n)[X]. +However, except where mathematically necessary (e.g., GCD computations), +zz_pE need not be a field. + +\**************************************************************************/ + +#include +#include + +class zz_pEX { +public: + + zz_pEX(); // initial value 0 + + zz_pEX(const zz_pEX& a); // copy + zz_pEX(const zz_pE& a); // promotion + zz_pEX(const zz_p& a); + zz_pEX(long a); + + zz_pEX& operator=(const zz_pEX& a); // assignment + zz_pEX& operator=(const zz_pE& a); + zz_pEX& operator=(const zz_p& a); + zz_pEX& operator=(long a); + + ~zz_pEX(); // destructor + + zz_pEX(INIT_MONO_TYPE, long i, const zz_pE& c); + zz_pEX(INIT_MONO_TYPE, long i, const zz_p& c); + zz_pEX(INIT_MONO_TYPE, long i, long c); + // initilaize to c*X^i; invoke as zz_pEX(INIT_MONO, i, c) + + zz_pEX(INIT_MONO_TYPE, long i); + // initilaize to X^i; invoke as zz_pEX(INIT_MONO, i) + + // typedefs to aid in generic programming + typedef zz_pE coeff_type; + typedef zz_pEXModulus modulus_type; + + // ... + + +}; + + + +/**************************************************************************\ + + Accessing coefficients + +The degree of a polynomial f is obtained as deg(f), +where the zero polynomial, by definition, has degree -1. + +A polynomial f is represented as a coefficient vector. +Coefficients may be accesses in one of two ways. + +The safe, high-level method is to call the function +coeff(f, i) to get the coefficient of X^i in the polynomial f, +and to call the function SetCoeff(f, i, a) to set the coefficient +of X^i in f to the scalar a. + +One can also access the coefficients more directly via a lower level +interface. The coefficient of X^i in f may be accessed using +subscript notation f[i]. In addition, one may write f.SetLength(n) +to set the length of the underlying coefficient vector to n, +and f.SetMaxLength(n) to allocate space for n coefficients, +without changing the coefficient vector itself. + +After setting coefficients using this low-level interface, +one must ensure that leading zeros in the coefficient vector +are stripped afterwards by calling the function f.normalize(). + +NOTE: the coefficient vector of f may also be accessed directly +as f.rep; however, this is not recommended. Also, for a properly +normalized polynomial f, we have f.rep.length() == deg(f)+1, +and deg(f) >= 0 => f.rep[deg(f)] != 0. + +\**************************************************************************/ + + + +long deg(const zz_pEX& a); // return deg(a); deg(0) == -1. + +const zz_pE& coeff(const zz_pEX& a, long i); +// returns the coefficient of X^i, or zero if i not in range + +const zz_pE& LeadCoeff(const zz_pEX& a); +// returns leading term of a, or zero if a == 0 + +const zz_pE& ConstTerm(const zz_pEX& a); +// returns constant term of a, or zero if a == 0 + +void SetCoeff(zz_pEX& x, long i, const zz_pE& a); +void SetCoeff(zz_pEX& x, long i, const zz_p& a); +void SetCoeff(zz_pEX& x, long i, long a); +// makes coefficient of X^i equal to a; error is raised if i < 0 + +void SetCoeff(zz_pEX& x, long i); +// makes coefficient of X^i equal to 1; error is raised if i < 0 + +void SetX(zz_pEX& x); // x is set to the monomial X + +long IsX(const zz_pEX& a); // test if x = X + + + + +zz_pE& zz_pEX::operator[](long i); +const zz_pE& zz_pEX::operator[](long i) const; +// indexing operators: f[i] is the coefficient of X^i --- +// i should satsify i >= 0 and i <= deg(f). +// No range checking (unless NTL_RANGE_CHECK is defined). + +void zz_pEX::SetLength(long n); +// f.SetLength(n) sets the length of the inderlying coefficient +// vector to n --- after this call, indexing f[i] for i = 0..n-1 +// is valid. + +void zz_pEX::normalize(); +// f.normalize() strips leading zeros from coefficient vector of f + +void zz_pEX::SetMaxLength(long n); +// f.SetMaxLength(n) pre-allocate spaces for n coefficients. The +// polynomial that f represents is unchanged. + + + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(const zz_pEX& a, const zz_pEX& b); +long operator!=(const zz_pEX& a, const zz_pEX& b); + +long IsZero(const zz_pEX& a); // test for 0 +long IsOne(const zz_pEX& a); // test for 1 + +// PROMOTIONS: ==, != promote {long,zz_p,zz_pE} to zz_pEX on (a, b). + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +zz_pEX operator+(const zz_pEX& a, const zz_pEX& b); +zz_pEX operator-(const zz_pEX& a, const zz_pEX& b); +zz_pEX operator-(const zz_pEX& a); + +zz_pEX& operator+=(zz_pEX& x, const zz_pEX& a); +zz_pEX& operator+=(zz_pEX& x, const zz_pE& a); +zz_pEX& operator+=(zz_pEX& x, const zz_p& a); +zz_pEX& operator+=(zz_pEX& x, long a); + + +zz_pEX& operator++(zz_pEX& x); // prefix +void operator++(zz_pEX& x, int); // postfix + +zz_pEX& operator-=(zz_pEX& x, const zz_pEX& a); +zz_pEX& operator-=(zz_pEX& x, const zz_pE& a); +zz_pEX& operator-=(zz_pEX& x, const zz_p& a); +zz_pEX& operator-=(zz_pEX& x, long a); + +zz_pEX& operator--(zz_pEX& x); // prefix +void operator--(zz_pEX& x, int); // postfix + +// procedural versions: + +void add(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); // x = a + b +void sub(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); // x = a - b +void negate(zz_pEX& x, const zz_pEX& a); // x = - a + +// PROMOTIONS: +, -, add, sub promote {long,zz_p,zz_pE} to zz_pEX on (a, b). + + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +zz_pEX operator*(const zz_pEX& a, const zz_pEX& b); + +zz_pEX& operator*=(zz_pEX& x, const zz_pEX& a); +zz_pEX& operator*=(zz_pEX& x, const zz_pE& a); +zz_pEX& operator*=(zz_pEX& x, const zz_p& a); +zz_pEX& operator*=(zz_pEX& x, long a); + + +// procedural versions: + + +void mul(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); // x = a * b + +void sqr(zz_pEX& x, const zz_pEX& a); // x = a^2 +zz_pEX sqr(const zz_pEX& a); + +// PROMOTIONS: *, mul promote {long,zz_p,zz_pE} to zz_pEX on (a, b). + +void power(zz_pEX& x, const zz_pEX& a, long e); // x = a^e (e >= 0) +zz_pEX power(const zz_pEX& a, long e); + + +/**************************************************************************\ + + Shift Operations + +LeftShift by n means multiplication by X^n +RightShift by n means division by X^n + +A negative shift amount reverses the direction of the shift. + +\**************************************************************************/ + +// operator notation: + +zz_pEX operator<<(const zz_pEX& a, long n); +zz_pEX operator>>(const zz_pEX& a, long n); + +zz_pEX& operator<<=(zz_pEX& x, long n); +zz_pEX& operator>>=(zz_pEX& x, long n); + +// procedural versions: + +void LeftShift(zz_pEX& x, const zz_pEX& a, long n); +zz_pEX LeftShift(const zz_pEX& a, long n); + +void RightShift(zz_pEX& x, const zz_pEX& a, long n); +zz_pEX RightShift(const zz_pEX& a, long n); + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + +// operator notation: + +zz_pEX operator/(const zz_pEX& a, const zz_pEX& b); +zz_pEX operator/(const zz_pEX& a, const zz_pE& b); +zz_pEX operator/(const zz_pEX& a, const zz_p& b); +zz_pEX operator/(const zz_pEX& a, long b); + +zz_pEX operator%(const zz_pEX& a, const zz_pEX& b); + +zz_pEX& operator/=(zz_pEX& x, const zz_pEX& a); +zz_pEX& operator/=(zz_pEX& x, const zz_pE& a); +zz_pEX& operator/=(zz_pEX& x, const zz_p& a); +zz_pEX& operator/=(zz_pEX& x, long a); + +zz_pEX& operator%=(zz_pEX& x, const zz_pEX& a); + +// procedural versions: + + +void DivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEX& b); +// q = a/b, r = a%b + +void div(zz_pEX& q, const zz_pEX& a, const zz_pEX& b); +void div(zz_pEX& q, const zz_pEX& a, const zz_pE& b); +void div(zz_pEX& q, const zz_pEX& a, const zz_p& b); +void div(zz_pEX& q, const zz_pEX& a, long b); +// q = a/b + +void rem(zz_pEX& r, const zz_pEX& a, const zz_pEX& b); +// r = a%b + +long divide(zz_pEX& q, const zz_pEX& a, const zz_pEX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const zz_pEX& a, const zz_pEX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + + +/**************************************************************************\ + + GCD's + +These routines are intended for use when zz_pE is a field. + +\**************************************************************************/ + + +void GCD(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); +zz_pEX GCD(const zz_pEX& a, const zz_pEX& b); +// x = GCD(a, b), x is always monic (or zero if a==b==0). + + +void XGCD(zz_pEX& d, zz_pEX& s, zz_pEX& t, const zz_pEX& a, const zz_pEX& b); +// d = gcd(a,b), a s + b t = d + + +/**************************************************************************\ + + Input/Output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be polynomials of degree < zz_pE::degree() and +a_n not zero (the zero polynomial is [ ]). On input, the coefficients +are arbitrary polynomials which are reduced modulo zz_pE::modulus(), +and leading zeros stripped. + +\**************************************************************************/ + +istream& operator>>(istream& s, zz_pEX& x); +ostream& operator<<(ostream& s, const zz_pEX& a); + + +/**************************************************************************\ + + Some utility routines + +\**************************************************************************/ + + +void diff(zz_pEX& x, const zz_pEX& a); // x = derivative of a +zz_pEX diff(const zz_pEX& a); + +void MakeMonic(zz_pEX& x); +// if x != 0 makes x into its monic associate; LeadCoeff(x) must be +// invertible in this case + +void reverse(zz_pEX& x, const zz_pEX& a, long hi); +zz_pEX reverse(const zz_pEX& a, long hi); + +void reverse(zz_pEX& x, const zz_pEX& a); +zz_pEX reverse(const zz_pEX& a); + +// x = reverse of a[0]..a[hi] (hi >= -1); +// hi defaults to deg(a) in second version + +void VectorCopy(vec_zz_pE& x, const zz_pEX& a, long n); +vec_zz_pE VectorCopy(const zz_pEX& a, long n); +// x = copy of coefficient vector of a of length exactly n. +// input is truncated or padded with zeroes as appropriate. + + + + +/**************************************************************************\ + + Random Polynomials + +\**************************************************************************/ + +void random(zz_pEX& x, long n); +zz_pEX random_zz_pEX(long n); +// x = random polynomial of degree < n + + +/**************************************************************************\ + + Polynomial Evaluation and related problems + +\**************************************************************************/ + + +void BuildFromRoots(zz_pEX& x, const vec_zz_pE& a); +zz_pEX BuildFromRoots(const vec_zz_pE& a); +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + +void eval(zz_pE& b, const zz_pEX& f, const zz_pE& a); +zz_pE eval(const zz_pEX& f, const zz_pE& a); +// b = f(a) + +void eval(zz_pE& b, const zz_pX& f, const zz_pE& a); +zz_pE eval(const zz_pEX& f, const zz_pE& a); +// b = f(a); uses ModComp algorithm for zz_pX + +void eval(vec_zz_pE& b, const zz_pEX& f, const vec_zz_pE& a); +vec_zz_pE eval(const zz_pEX& f, const vec_zz_pE& a); +// b.SetLength(a.length()); b[i] = f(a[i]) for 0 <= i < a.length() + +void interpolate(zz_pEX& f, const vec_zz_pE& a, const vec_zz_pE& b); +zz_pEX interpolate(const vec_zz_pE& a, const vec_zz_pE& b); +// interpolates the polynomial f satisfying f(a[i]) = b[i]. + +/**************************************************************************\ + + Arithmetic mod X^n + +Required: n >= 0; otherwise, an error is raised. + +\**************************************************************************/ + +void trunc(zz_pEX& x, const zz_pEX& a, long n); // x = a % X^n +zz_pEX trunc(const zz_pEX& a, long n); + +void MulTrunc(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, long n); +zz_pEX MulTrunc(const zz_pEX& a, const zz_pEX& b, long n); +// x = a * b % X^n + +void SqrTrunc(zz_pEX& x, const zz_pEX& a, long n); +zz_pEX SqrTrunc(const zz_pEX& a, long n); +// x = a^2 % X^n + +void InvTrunc(zz_pEX& x, const zz_pEX& a, long n); +zz_pEX InvTrunc(zz_pEX& x, const zz_pEX& a, long n); +// computes x = a^{-1} % X^m. Must have ConstTerm(a) invertible. + +/**************************************************************************\ + + Modular Arithmetic (without pre-conditioning) + +Arithmetic mod f. + +All inputs and outputs are polynomials of degree less than deg(f), and +deg(f) > 0. + + +NOTE: if you want to do many computations with a fixed f, use the +zz_pEXModulus data structure and associated routines below for better +performance. + +\**************************************************************************/ + +void MulMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, const zz_pEX& f); +zz_pEX MulMod(const zz_pEX& a, const zz_pEX& b, const zz_pEX& f); +// x = (a * b) % f + +void SqrMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f); +zz_pEX SqrMod(const zz_pEX& a, const zz_pEX& f); +// x = a^2 % f + +void MulByXMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f); +zz_pEX MulByXMod(const zz_pEX& a, const zz_pEX& f); +// x = (a * X) mod f + +void InvMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f); +zz_pEX InvMod(const zz_pEX& a, const zz_pEX& f); +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(zz_pEX& x, const zz_pEX& a, const zz_pEX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise, +// returns 1 and sets x = (a, f) + + +/**************************************************************************\ + + Modular Arithmetic with Pre-Conditioning + +If you need to do a lot of arithmetic modulo a fixed f, build +zz_pEXModulus F for f. This pre-computes information about f that +speeds up subsequent computations. + +As an example, the following routine the product modulo f of a vector +of polynomials. + +#include + +void product(zz_pEX& x, const vec_zz_pEX& v, const zz_pEX& f) +{ + zz_pEXModulus F(f); + zz_pEX res; + res = 1; + long i; + for (i = 0; i < v.length(); i++) + MulMod(res, res, v[i], F); + x = res; +} + +NOTE: A zz_pEX may be used wherever a zz_pEXModulus is required, +and a zz_pEXModulus may be used wherever a zz_pEX is required. + + +\**************************************************************************/ + +class zz_pEXModulus { +public: + zz_pEXModulus(); // initially in an unusable state + + zz_pEXModulus(const zz_pEX& f); // initialize with f, deg(f) > 0 + + zz_pEXModulus(const zz_pEXModulus&); // copy + + zz_pEXModulus& operator=(const zz_pEXModulus&); // assignment + + ~zz_pEXModulus(); // destructor + + operator const zz_pEX& () const; // implicit read-only access to f + + const zz_pEX& val() const; // explicit read-only access to f +}; + +void build(zz_pEXModulus& F, const zz_pEX& f); +// pre-computes information about f and stores it in F. Must have +// deg(f) > 0. Note that the declaration zz_pEXModulus F(f) is +// equivalent to zz_pEXModulus F; build(F, f). + +// In the following, f refers to the polynomial f supplied to the +// build routine, and n = deg(f). + + +long deg(const zz_pEXModulus& F); // return n=deg(f) + +void MulMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, + const zz_pEXModulus& F); +zz_pEX MulMod(const zz_pEX& a, const zz_pEX& b, const zz_pEXModulus& F); +// x = (a * b) % f; deg(a), deg(b) < n + +void SqrMod(zz_pEX& x, const zz_pEX& a, const zz_pEXModulus& F); +zz_pEX SqrMod(const zz_pEX& a, const zz_pEXModulus& F); +// x = a^2 % f; deg(a) < n + +void PowerMod(zz_pEX& x, const zz_pEX& a, const ZZ& e, const zz_pEXModulus& F); +zz_pEX PowerMod(const zz_pEX& a, const ZZ& e, const zz_pEXModulus& F); + +void PowerMod(zz_pEX& x, const zz_pEX& a, long e, const zz_pEXModulus& F); +zz_pEX PowerMod(const zz_pEX& a, long e, const zz_pEXModulus& F); + +// x = a^e % f; e >= 0, deg(a) < n. Uses a sliding window algorithm. +// (e may be negative) + +void PowerXMod(zz_pEX& x, const ZZ& e, const zz_pEXModulus& F); +zz_pEX PowerXMod(const ZZ& e, const zz_pEXModulus& F); + +void PowerXMod(zz_pEX& x, long e, const zz_pEXModulus& F); +zz_pEX PowerXMod(long e, const zz_pEXModulus& F); + +// x = X^e % f (e may be negative) + +void rem(zz_pEX& x, const zz_pEX& a, const zz_pEXModulus& F); +// x = a % f + +void DivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEXModulus& F); +// q = a/f, r = a%f + +void div(zz_pEX& q, const zz_pEX& a, const zz_pEXModulus& F); +// q = a/f + +// operator notation: + +zz_pEX operator/(const zz_pEX& a, const zz_pEXModulus& F); +zz_pEX operator%(const zz_pEX& a, const zz_pEXModulus& F); + +zz_pEX& operator/=(zz_pEX& x, const zz_pEXModulus& F); +zz_pEX& operator%=(zz_pEX& x, const zz_pEXModulus& F); + + + +/**************************************************************************\ + + vectors of zz_pEX's + +\**************************************************************************/ + + +typedef Vec vec_zz_pEX; // backward compatibility + + + +/**************************************************************************\ + + Modular Composition + +Modular composition is the problem of computing g(h) mod f for +polynomials f, g, and h. + +The algorithm employed is that of Brent & Kung (Fast algorithms for +manipulating formal power series, JACM 25:581-595, 1978), which uses +O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar +operations. + + +\**************************************************************************/ + +void CompMod(zz_pEX& x, const zz_pEX& g, const zz_pEX& h, + const zz_pEXModulus& F); +zz_pEX CompMod(const zz_pEX& g, const zz_pEX& h, + const zz_pEXModulus& F); + +// x = g(h) mod f; deg(h) < n + +void Comp2Mod(zz_pEX& x1, zz_pEX& x2, const zz_pEX& g1, const zz_pEX& g2, + const zz_pEX& h, const zz_pEXModulus& F); +// xi = gi(h) mod f (i=1,2); deg(h) < n. + + +void Comp3Mod(zz_pEX& x1, zz_pEX& x2, zz_pEX& x3, + const zz_pEX& g1, const zz_pEX& g2, const zz_pEX& g3, + const zz_pEX& h, const zz_pEXModulus& F); +// xi = gi(h) mod f (i=1..3); deg(h) < n. + + + +/**************************************************************************\ + + Composition with Pre-Conditioning + +If a single h is going to be used with many g's then you should build +a zz_pEXArgument for h, and then use the compose routine below. The +routine build computes and stores h, h^2, ..., h^m mod f. After this +pre-computation, composing a polynomial of degree roughly n with h +takes n/m multiplies mod f, plus n^2 scalar multiplies. Thus, +increasing m increases the space requirement and the pre-computation +time, but reduces the composition time. + +\**************************************************************************/ + + +struct zz_pEXArgument { + vec_zz_pEX H; +}; + +void build(zz_pEXArgument& H, const zz_pEX& h, const zz_pEXModulus& F, long m); +// Pre-Computes information about h. m > 0, deg(h) < n. + +void CompMod(zz_pEX& x, const zz_pEX& g, const zz_pEXArgument& H, + const zz_pEXModulus& F); + +zz_pEX CompMod(const zz_pEX& g, const zz_pEXArgument& H, + const zz_pEXModulus& F); + +extern long zz_pEXArgBound; + +// Initially 0. If this is set to a value greater than zero, then +// composition routines will allocate a table of no than about +// zz_pEXArgBound KB. Setting this value affects all compose routines +// and the power projection and minimal polynomial routines below, +// and indirectly affects many routines in zz_pEXFactoring. + +/**************************************************************************\ + + power projection routines + +\**************************************************************************/ + +void project(zz_pE& x, const zz_pEVector& a, const zz_pEX& b); +zz_pE project(const zz_pEVector& a, const zz_pEX& b); +// x = inner product of a with coefficient vector of b + + +void ProjectPowers(vec_zz_pE& x, const vec_zz_pE& a, long k, + const zz_pEX& h, const zz_pEXModulus& F); + +vec_zz_pE ProjectPowers(const vec_zz_pE& a, long k, + const zz_pEX& h, const zz_pEXModulus& F); + +// Computes the vector + +// project(a, 1), project(a, h), ..., project(a, h^{k-1} % f). + +// This operation is the "transpose" of the modular composition operation. + +void ProjectPowers(vec_zz_pE& x, const vec_zz_pE& a, long k, + const zz_pEXArgument& H, const zz_pEXModulus& F); + +vec_zz_pE ProjectPowers(const vec_zz_pE& a, long k, + const zz_pEXArgument& H, const zz_pEXModulus& F); + +// same as above, but uses a pre-computed zz_pEXArgument + + +class zz_pEXTransMultiplier { /* ... */ }; + +void build(zz_pEXTransMultiplier& B, const zz_pEX& b, const zz_pEXModulus& F); + +void UpdateMap(vec_zz_pE& x, const vec_zz_pE& a, + const zz_pEXMultiplier& B, const zz_pEXModulus& F); + +vec_zz_pE UpdateMap(const vec_zz_pE& a, + const zz_pEXMultiplier& B, const zz_pEXModulus& F); + +// Computes the vector + +// project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f) + +// Required: a.length() <= deg(F), deg(b) < deg(F). +// This is "transposed" MulMod by B. +// Input may have "high order" zeroes stripped. +// Output always has high order zeroes stripped. + + +/**************************************************************************\ + + Minimum Polynomials + +These routines should be used only when zz_pE is a field. + +All of these routines implement the algorithm from [Shoup, J. Symbolic +Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397, +1995], based on transposed modular composition and the +Berlekamp/Massey algorithm. + +\**************************************************************************/ + + +void MinPolySeq(zz_pEX& h, const vec_zz_pE& a, long m); +zz_pEX MinPolySeq(const vec_zz_pE& a, long m); +// computes the minimum polynomial of a linealy generated sequence; m +// is a bound on the degree of the polynomial; required: a.length() >= +// 2*m + + +void ProbMinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m); +zz_pEX ProbMinPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m); + +void ProbMinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F); +zz_pEX ProbMinPolyMod(const zz_pEX& g, const zz_pEXModulus& F); + +// computes the monic minimal polynomial if (g mod f). m = a bound on +// the degree of the minimal polynomial; in the second version, this +// argument defaults to n. The algorithm is probabilistic, always +// returns a divisor of the minimal polynomial, and returns a proper +// divisor with probability at most m/2^{zz_pE::degree()}. + +void MinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m); +zz_pEX MinPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m); + +void MinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F); +zz_pEX MinPolyMod(const zz_pEX& g, const zz_pEXModulus& F); + +// same as above, but guarantees that result is correct + +void IrredPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m); +zz_pEX IrredPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m); + +void IrredPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F); +zz_pEX IrredPolyMod(const zz_pEX& g, const zz_pEXModulus& F); + +// same as above, but assumes that f is irreducible, or at least that +// the minimal poly of g is itself irreducible. The algorithm is +// deterministic (and is always correct). + +/**************************************************************************\ + + Composition and Minimal Polynomials in towers + +These are implementations of algorithms that will be described +and analyzed in a forthcoming paper. + +The routines require that p is prime, but zz_pE need not be a field. + +\**************************************************************************/ + + +void CompTower(zz_pEX& x, const zz_pX& g, const zz_pEXArgument& h, + const zz_pEXModulus& F); + +zz_pEX CompTower(const zz_pX& g, const zz_pEXArgument& h, + const zz_pEXModulus& F); + +void CompTower(zz_pEX& x, const zz_pX& g, const zz_pEX& h, + const zz_pEXModulus& F); + +zz_pEX CompTower(const zz_pX& g, const zz_pEX& h, + const zz_pEXModulus& F); + + +// x = g(h) mod f + + +void ProbMinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, + long m); + +zz_pX ProbMinPolyTower(const zz_pEX& g, const zz_pEXModulus& F, long m); + +void ProbMinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F); + +zz_pX ProbMinPolyTower(const zz_pEX& g, const zz_pEXModulus& F); + +// Uses a probabilistic algorithm to compute the minimal +// polynomial of (g mod f) over zz_p. +// The parameter m is a bound on the degree of the minimal polynomial +// (default = deg(f)*zz_pE::degree()). +// In general, the result will be a divisor of the true minimimal +// polynomial. For correct results, use the MinPoly routines below. + + + +void MinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, long m); + +zz_pX MinPolyTower(const zz_pEX& g, const zz_pEXModulus& F, long m); + +void MinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F); + +zz_pX MinPolyTower(const zz_pEX& g, const zz_pEXModulus& F); + +// Same as above, but result is always correct. + + +void IrredPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, long m); + +zz_pX IrredPolyTower(const zz_pEX& g, const zz_pEXModulus& F, long m); + +void IrredPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F); + +zz_pX IrredPolyTower(const zz_pEX& g, const zz_pEXModulus& F); + +// Same as above, but assumes the minimal polynomial is +// irreducible, and uses a slightly faster, deterministic algorithm. + + +/**************************************************************************\ + + Traces, norms, resultants + +\**************************************************************************/ + + +void TraceMod(zz_pE& x, const zz_pEX& a, const zz_pEXModulus& F); +zz_pE TraceMod(const zz_pEX& a, const zz_pEXModulus& F); + +void TraceMod(zz_pE& x, const zz_pEX& a, const zz_pEX& f); +zz_pE TraceMod(const zz_pEX& a, const zz_pEXModulus& f); +// x = Trace(a mod f); deg(a) < deg(f) + + +void TraceVec(vec_zz_pE& S, const zz_pEX& f); +vec_zz_pE TraceVec(const zz_pEX& f); +// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f) + +// The above trace routines implement the asymptotically fast trace +// algorithm from [von zur Gathen and Shoup, Computational Complexity, +// 1992]. + +void NormMod(zz_pE& x, const zz_pEX& a, const zz_pEX& f); +zz_pE NormMod(const zz_pEX& a, const zz_pEX& f); +// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f) + +void resultant(zz_pE& x, const zz_pEX& a, const zz_pEX& b); +zz_pE resultant(const zz_pEX& a, const zz_pEX& b); +// x = resultant(a, b) + +// NormMod and resultant require that zz_pE is a field. + + + + +/**************************************************************************\ + + Miscellany + + +\**************************************************************************/ + + +void clear(zz_pEX& x) // x = 0 +void set(zz_pEX& x); // x = 1 + +void zz_pEX::kill(); +// f.kill() sets f to 0 and frees all memory held by f. Equivalent to +// f.rep.kill(). + +zz_pEX::zz_pEX(INIT_SIZE_TYPE, long n); +// zz_pEX(INIT_SIZE, n) initializes to zero, but space is pre-allocated +// for n coefficients + +static const zz_pEX& zero(); +// zz_pEX::zero() is a read-only reference to 0 + +void zz_pEX::swap(zz_pEX& x); +void swap(zz_pEX& x, zz_pEX& y); +// swap (via "pointer swapping") + + +zz_pEX::zz_pEX(long i, const zz_pE& c); +zz_pEX::zz_pEX(long i, const zz_p& c); +zz_pEX::zz_pEX(long i, long c); +// initilaize to c*X^i; provided for backward compatibility diff --git a/thirdparty/linux/ntl/doc/lzz_pEXFactoring.cpp.html b/thirdparty/linux/ntl/doc/lzz_pEXFactoring.cpp.html new file mode 100644 index 0000000000..7b79c5535b --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pEXFactoring.cpp.html @@ -0,0 +1,198 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/lzz_pEXFactoring.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: zz_pEXFactoring
+
+SUMMARY:
+
+Routines are provided for factorization of polynomials over zz_pE, as
+well as routines for related problems such as testing irreducibility
+and constructing irreducible polynomials of given degree.
+
+\**************************************************************************/
+
+#include <NTL/lzz_pEX.h>
+#include <NTL/pair_lzz_pEX_long.h>
+
+void SquareFreeDecomp(vec_pair_zz_pEX_long& u, const zz_pEX& f);
+vec_pair_zz_pEX_long SquareFreeDecomp(const zz_pEX& f);
+
+// Performs square-free decomposition.  f must be monic.  If f =
+// prod_i g_i^i, then u is set to a list of pairs (g_i, i).  The list
+// is is increasing order of i, with trivial terms (i.e., g_i = 1)
+// deleted.
+
+
+void FindRoots(vec_zz_pE& x, const zz_pEX& f);
+vec_zz_pE FindRoots(const zz_pEX& f);
+
+// f is monic, and has deg(f) distinct roots.  returns the list of
+// roots
+
+void FindRoot(zz_pE& root, const zz_pEX& f);
+zz_pE FindRoot(const zz_pEX& f);
+
+// finds a single root of f.  assumes that f is monic and splits into
+// distinct linear factors
+
+
+void NewDDF(vec_pair_zz_pEX_long& factors, const zz_pEX& f,
+            const zz_pEX& h, long verbose=0);
+
+vec_pair_zz_pEX_long NewDDF(const zz_pEX& f, const zz_pEX& h,
+         long verbose=0);
+
+
+// This computes a distinct-degree factorization.  The input must be
+// monic and square-free.  factors is set to a list of pairs (g, d),
+// where g is the product of all irreducible factors of f of degree d.
+// Only nontrivial pairs (i.e., g != 1) are included.  The polynomial
+// h is assumed to be equal to X^{zz_pE::cardinality()} mod f.
+
+// This routine implements the baby step/giant step algorithm
+// of [Kaltofen and Shoup, STOC 1995].
+// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995].
+
+// NOTE: When factoring "large" polynomials,
+// this routine uses external files to store some intermediate
+// results, which are removed if the routine terminates normally.
+// These files are stored in the current directory under names of the
+// form tmp-*.
+// The definition of "large" is controlled by the variable
+
+      extern double zz_pEXFileThresh
+
+// which can be set by the user.  If the sizes of the tables
+// exceeds zz_pEXFileThresh KB, external files are used.
+// Initial value is NTL_FILE_THRESH (defined in tools.h).
+
+
+
+void EDF(vec_zz_pEX& factors, const zz_pEX& f, const zz_pEX& h,
+         long d, long verbose=0);
+
+vec_zz_pEX EDF(const zz_pEX& f, const zz_pEX& h,
+         long d, long verbose=0);
+
+// Performs equal-degree factorization.  f is monic, square-free, and
+// all irreducible factors have same degree.  h = X^{zz_pE::cardinality()} mod
+// f.  d = degree of irreducible factors of f.  This routine
+// implements the algorithm of [von zur Gathen and Shoup,
+// Computational Complexity 2:187-224, 1992]
+
+void RootEDF(vec_zz_pEX& factors, const zz_pEX& f, long verbose=0);
+vec_zz_pEX RootEDF(const zz_pEX& f, long verbose=0);
+
+// EDF for d==1
+
+
+void SFCanZass(vec_zz_pEX& factors, const zz_pEX& f, long verbose=0);
+vec_zz_pEX SFCanZass(const zz_pEX& f, long verbose=0);
+
+// Assumes f is monic and square-free.  returns list of factors of f.
+// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and
+// EDF above.
+
+
+void CanZass(vec_pair_zz_pEX_long& factors, const zz_pEX& f,
+             long verbose=0);
+
+vec_pair_zz_pEX_long CanZass(const zz_pEX& f, long verbose=0);
+
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SquareFreeDecomp and SFCanZass.
+
+// NOTE: these routines use modular composition.  The space
+// used for the required tables can be controlled by the variable
+// zz_pEXArgBound (see zz_pEX.txt).
+
+
+
+void mul(zz_pEX& f, const vec_pair_zz_pEX_long& v);
+zz_pEX mul(const vec_pair_zz_pEX_long& v);
+
+// multiplies polynomials, with multiplicities
+
+
+/**************************************************************************\
+
+                            Irreducible Polynomials
+
+\**************************************************************************/
+
+long ProbIrredTest(const zz_pEX& f, long iter=1);
+
+// performs a fast, probabilistic irreduciblity test.  The test can
+// err only if f is reducible, and the error probability is bounded by
+// zz_pE::cardinality()^{-iter}.  This implements an algorithm from [Shoup,
+// J. Symbolic Comp. 17:371-391, 1994].
+
+long DetIrredTest(const zz_pEX& f);
+
+// performs a recursive deterministic irreducibility test.  Fast in
+// the worst-case (when input is irreducible).  This implements an
+// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994].
+
+long IterIrredTest(const zz_pEX& f);
+
+// performs an iterative deterministic irreducibility test, based on
+// DDF.  Fast on average (when f has a small factor).
+
+void BuildIrred(zz_pEX& f, long n);
+zz_pEX BuildIrred_zz_pEX(long n);
+
+// Build a monic irreducible poly of degree n.
+
+void BuildRandomIrred(zz_pEX& f, const zz_pEX& g);
+zz_pEX BuildRandomIrred(const zz_pEX& g);
+
+// g is a monic irreducible polynomial.  Constructs a random monic
+// irreducible polynomial f of the same degree.
+
+
+long IterComputeDegree(const zz_pEX& h, const zz_pEXModulus& F);
+
+// f is assumed to be an "equal degree" polynomial, and h =
+// X^{zz_pE::cardinality()} mod f.  The common degree of the irreducible
+// factors of f is computed.  Uses a "baby step/giant step" algorithm, similar
+// to NewDDF.  Although asymptotocally slower than RecComputeDegree
+// (below), it is faster for reasonably sized inputs.
+
+long RecComputeDegree(const zz_pEX& h, const zz_pEXModulus& F);
+
+// f is assumed to be an "equal degree" polynomial,
+// h = X^{zz_pE::cardinality()} mod f.  
+// The common degree of the irreducible factors of f is
+// computed Uses a recursive algorithm similar to DetIrredTest.
+
+void TraceMap(zz_pEX& w, const zz_pEX& a, long d, const zz_pEXModulus& F,
+              const zz_pEX& h);
+
+zz_pEX TraceMap(const zz_pEX& a, long d, const zz_pEXModulus& F,
+              const zz_pEX& h);
+
+// Computes w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0,
+// and h = X^q mod f, q a power of zz_pE::cardinality().  This routine
+// implements an algorithm from [von zur Gathen and Shoup,
+// Computational Complexity 2:187-224, 1992]
+
+void PowerCompose(zz_pEX& w, const zz_pEX& h, long d, const zz_pEXModulus& F);
+
+zz_pEX PowerCompose(const zz_pEX& h, long d, const zz_pEXModulus& F);
+
+// Computes w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q
+// mod f, q a power of zz_pE::cardinality().  This routine implements an
+// algorithm from [von zur Gathen and Shoup, Computational Complexity
+// 2:187-224, 1992]
+
+
+ diff --git a/thirdparty/linux/ntl/doc/lzz_pEXFactoring.txt b/thirdparty/linux/ntl/doc/lzz_pEXFactoring.txt new file mode 100644 index 0000000000..40677ba7d3 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pEXFactoring.txt @@ -0,0 +1,188 @@ + +/**************************************************************************\ + +MODULE: zz_pEXFactoring + +SUMMARY: + +Routines are provided for factorization of polynomials over zz_pE, as +well as routines for related problems such as testing irreducibility +and constructing irreducible polynomials of given degree. + +\**************************************************************************/ + +#include +#include + +void SquareFreeDecomp(vec_pair_zz_pEX_long& u, const zz_pEX& f); +vec_pair_zz_pEX_long SquareFreeDecomp(const zz_pEX& f); + +// Performs square-free decomposition. f must be monic. If f = +// prod_i g_i^i, then u is set to a list of pairs (g_i, i). The list +// is is increasing order of i, with trivial terms (i.e., g_i = 1) +// deleted. + + +void FindRoots(vec_zz_pE& x, const zz_pEX& f); +vec_zz_pE FindRoots(const zz_pEX& f); + +// f is monic, and has deg(f) distinct roots. returns the list of +// roots + +void FindRoot(zz_pE& root, const zz_pEX& f); +zz_pE FindRoot(const zz_pEX& f); + +// finds a single root of f. assumes that f is monic and splits into +// distinct linear factors + + +void NewDDF(vec_pair_zz_pEX_long& factors, const zz_pEX& f, + const zz_pEX& h, long verbose=0); + +vec_pair_zz_pEX_long NewDDF(const zz_pEX& f, const zz_pEX& h, + long verbose=0); + + +// This computes a distinct-degree factorization. The input must be +// monic and square-free. factors is set to a list of pairs (g, d), +// where g is the product of all irreducible factors of f of degree d. +// Only nontrivial pairs (i.e., g != 1) are included. The polynomial +// h is assumed to be equal to X^{zz_pE::cardinality()} mod f. + +// This routine implements the baby step/giant step algorithm +// of [Kaltofen and Shoup, STOC 1995]. +// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995]. + +// NOTE: When factoring "large" polynomials, +// this routine uses external files to store some intermediate +// results, which are removed if the routine terminates normally. +// These files are stored in the current directory under names of the +// form tmp-*. +// The definition of "large" is controlled by the variable + + extern double zz_pEXFileThresh + +// which can be set by the user. If the sizes of the tables +// exceeds zz_pEXFileThresh KB, external files are used. +// Initial value is NTL_FILE_THRESH (defined in tools.h). + + + +void EDF(vec_zz_pEX& factors, const zz_pEX& f, const zz_pEX& h, + long d, long verbose=0); + +vec_zz_pEX EDF(const zz_pEX& f, const zz_pEX& h, + long d, long verbose=0); + +// Performs equal-degree factorization. f is monic, square-free, and +// all irreducible factors have same degree. h = X^{zz_pE::cardinality()} mod +// f. d = degree of irreducible factors of f. This routine +// implements the algorithm of [von zur Gathen and Shoup, +// Computational Complexity 2:187-224, 1992] + +void RootEDF(vec_zz_pEX& factors, const zz_pEX& f, long verbose=0); +vec_zz_pEX RootEDF(const zz_pEX& f, long verbose=0); + +// EDF for d==1 + + +void SFCanZass(vec_zz_pEX& factors, const zz_pEX& f, long verbose=0); +vec_zz_pEX SFCanZass(const zz_pEX& f, long verbose=0); + +// Assumes f is monic and square-free. returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and +// EDF above. + + +void CanZass(vec_pair_zz_pEX_long& factors, const zz_pEX& f, + long verbose=0); + +vec_pair_zz_pEX_long CanZass(const zz_pEX& f, long verbose=0); + + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SquareFreeDecomp and SFCanZass. + +// NOTE: these routines use modular composition. The space +// used for the required tables can be controlled by the variable +// zz_pEXArgBound (see zz_pEX.txt). + + + +void mul(zz_pEX& f, const vec_pair_zz_pEX_long& v); +zz_pEX mul(const vec_pair_zz_pEX_long& v); + +// multiplies polynomials, with multiplicities + + +/**************************************************************************\ + + Irreducible Polynomials + +\**************************************************************************/ + +long ProbIrredTest(const zz_pEX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test. The test can +// err only if f is reducible, and the error probability is bounded by +// zz_pE::cardinality()^{-iter}. This implements an algorithm from [Shoup, +// J. Symbolic Comp. 17:371-391, 1994]. + +long DetIrredTest(const zz_pEX& f); + +// performs a recursive deterministic irreducibility test. Fast in +// the worst-case (when input is irreducible). This implements an +// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994]. + +long IterIrredTest(const zz_pEX& f); + +// performs an iterative deterministic irreducibility test, based on +// DDF. Fast on average (when f has a small factor). + +void BuildIrred(zz_pEX& f, long n); +zz_pEX BuildIrred_zz_pEX(long n); + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(zz_pEX& f, const zz_pEX& g); +zz_pEX BuildRandomIrred(const zz_pEX& g); + +// g is a monic irreducible polynomial. Constructs a random monic +// irreducible polynomial f of the same degree. + + +long IterComputeDegree(const zz_pEX& h, const zz_pEXModulus& F); + +// f is assumed to be an "equal degree" polynomial, and h = +// X^{zz_pE::cardinality()} mod f. The common degree of the irreducible +// factors of f is computed. Uses a "baby step/giant step" algorithm, similar +// to NewDDF. Although asymptotocally slower than RecComputeDegree +// (below), it is faster for reasonably sized inputs. + +long RecComputeDegree(const zz_pEX& h, const zz_pEXModulus& F); + +// f is assumed to be an "equal degree" polynomial, +// h = X^{zz_pE::cardinality()} mod f. +// The common degree of the irreducible factors of f is +// computed Uses a recursive algorithm similar to DetIrredTest. + +void TraceMap(zz_pEX& w, const zz_pEX& a, long d, const zz_pEXModulus& F, + const zz_pEX& h); + +zz_pEX TraceMap(const zz_pEX& a, long d, const zz_pEXModulus& F, + const zz_pEX& h); + +// Computes w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0, +// and h = X^q mod f, q a power of zz_pE::cardinality(). This routine +// implements an algorithm from [von zur Gathen and Shoup, +// Computational Complexity 2:187-224, 1992] + +void PowerCompose(zz_pEX& w, const zz_pEX& h, long d, const zz_pEXModulus& F); + +zz_pEX PowerCompose(const zz_pEX& h, long d, const zz_pEXModulus& F); + +// Computes w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q +// mod f, q a power of zz_pE::cardinality(). This routine implements an +// algorithm from [von zur Gathen and Shoup, Computational Complexity +// 2:187-224, 1992] + diff --git a/thirdparty/linux/ntl/doc/lzz_pX.cpp.html b/thirdparty/linux/ntl/doc/lzz_pX.cpp.html new file mode 100644 index 0000000000..e51eba5487 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pX.cpp.html @@ -0,0 +1,940 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/lzz_pX.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: zz_pX
+
+SUMMARY:
+
+The class zz_pX implements polynomial arithmetic modulo p.
+
+Polynomial arithmetic is implemented using a combination of classical
+routines, Karatsuba, and FFT.
+
+\**************************************************************************/
+
+#include "zz_p.h"
+#include "vec_zz_p.h"
+
+class zz_pX {
+public:
+
+   zz_pX(); // initial value 0
+
+   zz_pX(const zz_pX& a); // copy
+   explicit zz_pX(zz_p a); // promotion
+   explicit zz_pX(long a); // promotion
+
+   zz_pX& operator=(const zz_pX& a); // assignment
+   zz_pX& operator=(zz_p a);
+   zz_pX& operator=(long a);
+
+   ~zz_pX(); // destructor
+
+   zz_pX(INIT_MONO_TYPE, long i, zz_p c);
+   zz_pX(INIT_MONO_TYPE, long i, long c);
+   // initialize to c*X^i, invoke as zz_pX(INIT_MONO, i, c)
+
+   zz_pX(INIT_MONO_TYPE, long i);
+   // initialize to X^i, invoke as zz_pX(INIT_MONO, i)
+
+   typedef zz_p coeff_type;
+
+   // ...
+
+  
+};
+
+
+
+
+
+/**************************************************************************\
+
+                              Accessing coefficients
+
+The degree of a polynomial f is obtained as deg(f),
+where the zero polynomial, by definition, has degree -1.
+
+A polynomial f is represented as a coefficient vector.
+Coefficients may be accesses in one of two ways.
+
+The safe, high-level method is to call the function
+coeff(f, i) to get the coefficient of X^i in the polynomial f,
+and to call the function SetCoeff(f, i, a) to set the coefficient
+of X^i in f to the scalar a.
+
+One can also access the coefficients more directly via a lower level
+interface.  The coefficient of X^i in f may be accessed using
+subscript notation f[i].  In addition, one may write f.SetLength(n)
+to set the length of the underlying coefficient vector to n,
+and f.SetMaxLength(n) to allocate space for n coefficients,
+without changing the coefficient vector itself.
+
+After setting coefficients using this low-level interface,
+one must ensure that leading zeros in the coefficient vector
+are stripped afterwards by calling the function f.normalize().
+
+
+NOTE: the coefficient vector of f may also be accessed directly
+as f.rep; however, this is not recommended. Also, for a properly
+normalized polynomial f, we have f.rep.length() == deg(f)+1,
+and deg(f) >= 0  =>  f.rep[deg(f)] != 0.
+
+\**************************************************************************/
+
+
+
+long deg(const zz_pX& a);  // return deg(a); deg(0) == -1.
+
+const zz_p coeff(const zz_pX& a, long i);
+// returns the coefficient of X^i, or zero if i not in range
+
+const zz_p LeadCoeff(const zz_pX& a);
+// returns leading term of a, or zero if a == 0
+
+const zz_p ConstTerm(const zz_pX& a);
+// returns constant term of a, or zero if a == 0
+
+void SetCoeff(zz_pX& x, long i, zz_p a);
+void SetCoeff(zz_pX& x, long i, long a);
+// makes coefficient of X^i equal to a; error is raised if i < 0
+
+void SetCoeff(zz_pX& x, long i);
+// makes coefficient of X^i equal to 1;  error is raised if i < 0
+
+void SetX(zz_pX& x); // x is set to the monomial X
+
+long IsX(const zz_pX& a); // test if x = X
+
+
+
+
+zz_p& zz_pX::operator[](long i);
+const zz_p& zz_pX::operator[](long i) const;
+// indexing operators: f[i] is the coefficient of X^i ---
+// i should satsify i >= 0 and i <= deg(f).
+// No range checking (unless NTL_RANGE_CHECK is defined).
+
+void zz_pX::SetLength(long n);
+// f.SetLength(n) sets the length of the inderlying coefficient
+// vector to n --- after this call, indexing f[i] for i = 0..n-1
+// is valid.
+
+void zz_pX::normalize();  
+// f.normalize() strips leading zeros from coefficient vector of f
+
+void zz_pX::SetMaxLength(long n);
+// f.SetMaxLength(n) pre-allocate spaces for n coefficients.  The
+// polynomial that f represents is unchanged.
+
+
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator==(const zz_pX& a, const zz_pX& b);
+long operator!=(const zz_pX& a, const zz_pX& b);
+
+long IsZero(const zz_pX& a); // test for 0
+long IsOne(const zz_pX& a); // test for 1
+
+// PROMOTIONS: operators ==, != promote {long, zz_p} to zz_pX on (a, b)
+
+
+/**************************************************************************\
+
+                                   Addition
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pX operator+(const zz_pX& a, const zz_pX& b);
+zz_pX operator-(const zz_pX& a, const zz_pX& b);
+
+zz_pX operator-(const zz_pX& a); // unary -
+
+zz_pX& operator+=(zz_pX& x, const zz_pX& a);
+zz_pX& operator+=(zz_pX& x, zz_p a);
+zz_pX& operator+=(zz_pX& x, long a);
+
+zz_pX& operator-=(zz_pX& x, const zz_pX& a);
+zz_pX& operator-=(zz_pX& x, zz_p a);
+zz_pX& operator-=(zz_pX& x, long a);
+
+zz_pX& operator++(zz_pX& x);  // prefix
+void operator++(zz_pX& x, int);  // postfix
+
+zz_pX& operator--(zz_pX& x);  // prefix
+void operator--(zz_pX& x, int);  // postfix
+
+// procedural versions:
+
+
+void add(zz_pX& x, const zz_pX& a, const zz_pX& b); // x = a + b
+void sub(zz_pX& x, const zz_pX& a, const zz_pX& b); // x = a - b
+void negate(zz_pX& x, const zz_pX& a); // x = -a
+
+// PROMOTIONS: binary +, - and procedures add, sub promote {long, zz_p}
+// to zz_pX on (a, b).
+
+
+/**************************************************************************\
+
+                               Multiplication
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pX operator*(const zz_pX& a, const zz_pX& b);
+
+zz_pX& operator*=(zz_pX& x, const zz_pX& a);
+zz_pX& operator*=(zz_pX& x, zz_p a);
+zz_pX& operator*=(zz_pX& x, long a);
+
+// procedural versions:
+
+
+void mul(zz_pX& x, const zz_pX& a, const zz_pX& b); // x = a * b
+
+void sqr(zz_pX& x, const zz_pX& a); // x = a^2
+zz_pX sqr(const zz_pX& a);
+
+// PROMOTIONS: operator * and procedure mul promote {long, zz_p} to zz_pX
+// on (a, b).
+
+void power(zz_pX& x, const zz_pX& a, long e);  // x = a^e (e >= 0)
+zz_pX power(const zz_pX& a, long e);
+
+
+/**************************************************************************\
+
+                               Shift Operations
+
+LeftShift by n means multiplication by X^n
+RightShift by n means division by X^n
+
+A negative shift amount reverses the direction of the shift.
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pX operator<<(const zz_pX& a, long n);
+zz_pX operator>>(const zz_pX& a, long n);
+
+zz_pX& operator<<=(zz_pX& x, long n);
+zz_pX& operator>>=(zz_pX& x, long n);
+
+// procedural versions:
+
+void LeftShift(zz_pX& x, const zz_pX& a, long n);
+zz_pX LeftShift(const zz_pX& a, long n);
+
+void RightShift(zz_pX& x, const zz_pX& a, long n);
+zz_pX RightShift(const zz_pX& a, long n);
+
+
+
+/**************************************************************************\
+
+                                  Division
+
+\**************************************************************************/
+
+// operator notation:
+
+zz_pX operator/(const zz_pX& a, const zz_pX& b);
+zz_pX operator%(const zz_pX& a, const zz_pX& b);
+
+zz_pX& operator/=(zz_pX& x, const zz_pX& a);
+zz_pX& operator/=(zz_pX& x, zz_p a);
+zz_pX& operator/=(zz_pX& x, long a);
+
+zz_pX& operator%=(zz_pX& x, const zz_pX& b);
+
+
+// procedural versions:
+
+
+void DivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pX& b);
+// q = a/b, r = a%b
+
+void div(zz_pX& q, const zz_pX& a, const zz_pX& b);
+// q = a/b
+
+void rem(zz_pX& r, const zz_pX& a, const zz_pX& b);
+// r = a%b
+
+long divide(zz_pX& q, const zz_pX& a, const zz_pX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+long divide(const zz_pX& a, const zz_pX& b);
+// if b | a, sets q = a/b and returns 1; otherwise returns 0
+
+// PROMOTIONS: operator / and procedure div promote {long, zz_p} to zz_pX
+// on (a, b).
+
+
+/**************************************************************************\
+
+                                   GCD's
+
+These routines are intended for use when p is prime.
+
+\**************************************************************************/
+
+
+void GCD(zz_pX& x, const zz_pX& a, const zz_pX& b);
+zz_pX GCD(const zz_pX& a, const zz_pX& b);
+// x = GCD(a, b),  x is always monic (or zero if a==b==0).
+
+
+void XGCD(zz_pX& d, zz_pX& s, zz_pX& t, const zz_pX& a, const zz_pX& b);
+// d = gcd(a,b), a s + b t = d
+
+
+// NOTE: A classical algorithm is used, switching over to a
+// "half-GCD" algorithm for large degree
+
+
+/**************************************************************************\
+
+                                  Input/Output
+
+I/O format:
+
+   [a_0 a_1 ... a_n],
+
+represents the polynomial a_0 + a_1*X + ... + a_n*X^n.
+
+On output, all coefficients will be integers between 0 and p-1, amd
+a_n not zero (the zero polynomial is [ ]).  On input, the coefficients
+are arbitrary integers which are reduced modulo p, and leading zeros
+stripped.
+
+\**************************************************************************/
+
+istream& operator>>(istream& s, zz_pX& x);
+ostream& operator<<(ostream& s, const zz_pX& a);
+
+
+/**************************************************************************\
+
+                              Some utility routines
+
+\**************************************************************************/
+
+
+void diff(zz_pX& x, const zz_pX& a);
+zz_pX diff(const zz_pX& a);
+// x = derivative of a
+
+
+void MakeMonic(zz_pX& x);
+// if x != 0 makes x into its monic associate; LeadCoeff(x) must be
+// invertible in this case.
+
+void reverse(zz_pX& x, const zz_pX& a, long hi);
+zz_pX reverse(const zz_pX& a, long hi);
+
+void reverse(zz_pX& x, const zz_pX& a);
+zz_pX reverse(const zz_pX& a);
+
+// x = reverse of a[0]..a[hi] (hi >= -1);
+// hi defaults to deg(a) in second version
+
+void VectorCopy(vec_zz_p& x, const zz_pX& a, long n);
+vec_zz_p VectorCopy(const zz_pX& a, long n);
+// x = copy of coefficient vector of a of length exactly n.
+// input is truncated or padded with zeroes as appropriate.
+
+
+
+
+
+/**************************************************************************\
+
+                             Random Polynomials
+
+\**************************************************************************/
+
+void random(zz_pX& x, long n);
+zz_pX random_zz_pX(long n);
+// x = random polynomial of degree < n
+
+
+/**************************************************************************\
+
+                    Polynomial Evaluation and related problems
+
+\**************************************************************************/
+
+
+void BuildFromRoots(zz_pX& x, const vec_zz_p& a);
+zz_pX BuildFromRoots(const vec_zz_p& a);
+// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n =
+// a.length()
+
+void eval(zz_p& b, const zz_pX& f, zz_p a);
+zz_p eval(const zz_pX& f, zz_p a);
+// b = f(a)
+
+void eval(vec_zz_p& b, const zz_pX& f, const vec_zz_p& a);
+vec_zz_p eval(const zz_pX& f, const vec_zz_p& a);
+//  b.SetLength(a.length());  b[i] = f(a[i]) for 0 <= i < a.length()
+
+void interpolate(zz_pX& f, const vec_zz_p& a, const vec_zz_p& b);
+zz_pX interpolate(const vec_zz_p& a, const vec_zz_p& b);
+// interpolates the polynomial f satisfying f(a[i]) = b[i].  p should
+// be prime.
+
+/**************************************************************************\
+
+                       Arithmetic mod X^n
+
+It is required that n >= 0, otherwise an error is raised.
+
+\**************************************************************************/
+
+void trunc(zz_pX& x, const zz_pX& a, long n); // x = a % X^n
+zz_pX trunc(const zz_pX& a, long n);
+
+void MulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n);
+zz_pX MulTrunc(const zz_pX& a, const zz_pX& b, long n);
+// x = a * b % X^n
+
+void SqrTrunc(zz_pX& x, const zz_pX& a, long n);
+zz_pX SqrTrunc(const zz_pX& a, long n);
+// x = a^2 % X^n
+
+void InvTrunc(zz_pX& x, const zz_pX& a, long n);
+zz_pX InvTrunc(const zz_pX& a, long n);
+// computes x = a^{-1} % X^n.  Must have ConstTerm(a) invertible.
+
+/**************************************************************************\
+
+                Modular Arithmetic (without pre-conditioning)
+
+Arithmetic mod f.
+
+All inputs and outputs are polynomials of degree less than deg(f), and
+deg(f) > 0.
+
+NOTE: if you want to do many computations with a fixed f, use the
+zz_pXModulus data structure and associated routines below for better
+performance.
+
+\**************************************************************************/
+
+void MulMod(zz_pX& x, const zz_pX& a, const zz_pX& b, const zz_pX& f);
+zz_pX MulMod(const zz_pX& a, const zz_pX& b, const zz_pX& f);
+// x = (a * b) % f
+
+void SqrMod(zz_pX& x, const zz_pX& a, const zz_pX& f);
+zz_pX SqrMod(const zz_pX& a, const zz_pX& f);
+// x = a^2 % f
+
+void MulByXMod(zz_pX& x, const zz_pX& a, const zz_pX& f);
+zz_pX MulByXMod(const zz_pX& a, const zz_pX& f);
+// x = (a * X) mod f
+
+void InvMod(zz_pX& x, const zz_pX& a, const zz_pX& f);
+zz_pX InvMod(const zz_pX& a, const zz_pX& f);
+// x = a^{-1} % f, error is a is not invertible
+
+long InvModStatus(zz_pX& x, const zz_pX& a, const zz_pX& f);
+// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise,
+// returns 1 and sets x = (a, f)
+
+
+// for modular exponentiation, see below
+
+
+
+/**************************************************************************\
+
+                     Modular Arithmetic with Pre-Conditioning
+
+If you need to do a lot of arithmetic modulo a fixed f, build
+zz_pXModulus F for f.  This pre-computes information about f that
+speeds up subsequent computations. Required: deg(f) > 0 and LeadCoeff(f)
+invertible.
+
+As an example, the following routine computes the product modulo f of a vector
+of polynomials.
+
+#include "zz_pX.h"
+
+void product(zz_pX& x, const vec_zz_pX& v, const zz_pX& f)
+{
+   zz_pXModulus F(f);
+   zz_pX res;
+   res = 1;
+   long i;
+   for (i = 0; i < v.length(); i++)
+      MulMod(res, res, v[i], F);
+   x = res;
+}
+
+
+Note that automatic conversions are provided so that a zz_pX can
+be used wherever a zz_pXModulus is required, and a zz_pXModulus
+can be used wherever a zz_pX is required.
+
+
+
+\**************************************************************************/
+
+class zz_pXModulus {
+public:
+   zz_pXModulus(); // initially in an unusable state
+   ~zz_pXModulus();
+
+   zz_pXModulus(const zz_pXModulus&);  // copy
+
+   zz_pXModulus& operator=(const zz_pXModulus&);  // assignment
+
+   zz_pXModulus(const zz_pX& f); // initialize with f, deg(f) > 0
+
+   operator const zz_pX& () const;
+   // read-only access to f, implicit conversion operator
+
+   const zz_pX& val() const;
+   // read-only access to f, explicit notation
+
+};
+
+void build(zz_pXModulus& F, const zz_pX& f);
+// pre-computes information about f and stores it in F.
+// Note that the declaration zz_pXModulus F(f) is equivalent to
+// zz_pXModulus F; build(F, f).
+
+// In the following, f refers to the polynomial f supplied to the
+// build routine, and n = deg(f).
+
+long deg(const zz_pXModulus& F);  // return deg(f)
+
+void MulMod(zz_pX& x, const zz_pX& a, const zz_pX& b, const zz_pXModulus& F);
+zz_pX MulMod(const zz_pX& a, const zz_pX& b, const zz_pXModulus& F);
+// x = (a * b) % f; deg(a), deg(b) < n
+
+void SqrMod(zz_pX& x, const zz_pX& a, const zz_pXModulus& F);
+zz_pX SqrMod(const zz_pX& a, const zz_pXModulus& F);
+// x = a^2 % f; deg(a) < n
+
+void PowerMod(zz_pX& x, const zz_pX& a, const ZZ& e, const zz_pXModulus& F);
+zz_pX PowerMod(const zz_pX& a, const ZZ& e, const zz_pXModulus& F);
+
+void PowerMod(zz_pX& x, const zz_pX& a, long e, const zz_pXModulus& F);
+zz_pX PowerMod(const zz_pX& a, long e, const zz_pXModulus& F);
+
+// x = a^e % f; deg(a) < n (e may be negative)
+
+void PowerXMod(zz_pX& x, const ZZ& e, const zz_pXModulus& F);
+zz_pX PowerXMod(const ZZ& e, const zz_pXModulus& F);
+
+void PowerXMod(zz_pX& x, long e, const zz_pXModulus& F);
+zz_pX PowerXMod(long e, const zz_pXModulus& F);
+
+// x = X^e % f (e may be negative)
+
+void PowerXPlusAMod(zz_pX& x, const zz_p& a, const ZZ& e,
+                    const zz_pXModulus& F);
+
+zz_pX PowerXPlusAMod(const zz_p& a, const ZZ& e,
+                           const zz_pXModulus& F);
+
+void PowerXPlusAMod(zz_pX& x, const zz_p& a, long e,
+                    const zz_pXModulus& F);
+
+zz_pX PowerXPlusAMod(const zz_p& a, long e,
+                           const zz_pXModulus& F);
+
+// x = (X + a)^e % f (e may be negative)
+
+
+void rem(zz_pX& x, const zz_pX& a, const zz_pXModulus& F);
+// x = a % f
+
+void DivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pXModulus& F);
+// q = a/f, r = a%f
+
+void div(zz_pX& q, const zz_pX& a, const zz_pXModulus& F);
+// q = a/f
+
+// operator notation:
+
+zz_pX operator/(const zz_pX& a, const zz_pXModulus& F);
+zz_pX operator%(const zz_pX& a, const zz_pXModulus& F);
+
+zz_pX& operator/=(zz_pX& x, const zz_pXModulus& F);
+zz_pX& operator%=(zz_pX& x, const zz_pXModulus& F);
+
+
+
+
+/**************************************************************************\
+
+
+                        More Pre-Conditioning
+
+If you need to compute a * b % f for a fixed b, but for many a's, it
+is much more efficient to first build a zz_pXMultiplier B for b, and
+then use the MulMod routine below.
+
+Here is an example that multiplies each element of a vector by a fixed
+polynomial modulo f.
+
+#include "zz_pX.h"
+
+void mul(vec_zz_pX& v, const zz_pX& b, const zz_pX& f)
+{
+   zz_pXModulus F(f);
+   zz_pXMultiplier B(b, F);
+   long i;
+   for (i = 0; i < v.length(); i++)
+      MulMod(v[i], v[i], B, F);
+}
+
+Note that a (trivial) conversion operator from zz_pXMultiplier to zz_pX
+is provided, so that a zz_pXMultiplier can be used in a context
+where a zz_pX is required.
+
+
+\**************************************************************************/
+
+
+class zz_pXMultiplier {
+public:
+   zz_pXMultiplier(); // initially zero
+
+   zz_pXMultiplier(const zz_pX& b, const zz_pXModulus& F);
+      // initializes with b mod F, where deg(b) < deg(F)
+
+   zz_pXMultiplier(const zz_pXMultiplier&);
+   zz_pXMultiplier& operator=(const zz_pXMultiplier&);
+
+   ~zz_pXMultiplier();
+
+   const zz_pX& val() const; // read-only access to b
+
+};
+
+void build(zz_pXMultiplier& B, const zz_pX& b, const zz_pXModulus& F);
+// pre-computes information about b and stores it in B; deg(b) <
+// deg(F)
+
+void MulMod(zz_pX& x, const zz_pX& a, const zz_pXMultiplier& B,
+                                      const zz_pXModulus& F);
+
+zz_pX MulMod(const zz_pX& a, const zz_pXMultiplier& B,
+             const zz_pXModulus& F);
+
+// x = (a * b) % F; deg(a) < deg(F)
+
+/**************************************************************************\
+
+                             vectors of zz_pX's
+
+\**************************************************************************/
+
+
+typedef Vec<zz_pX> vec_zz_pX; // backward compatibility
+
+
+
+/**************************************************************************\
+
+                              Modular Composition
+
+Modular composition is the problem of computing g(h) mod f for
+polynomials f, g, and h.
+
+The algorithm employed is that of Brent & Kung (Fast algorithms for
+manipulating formal power series, JACM 25:581-595, 1978), which uses
+O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar
+operations.
+
+
+
+\**************************************************************************/
+
+void CompMod(zz_pX& x, const zz_pX& g, const zz_pX& h, const zz_pXModulus& F);
+zz_pX CompMod(const zz_pX& g, const zz_pX& h, const zz_pXModulus& F);
+// x = g(h) mod f; deg(h) < n
+
+void Comp2Mod(zz_pX& x1, zz_pX& x2, const zz_pX& g1, const zz_pX& g2,
+              const zz_pX& h, const zz_pXModulus& F);
+// xi = gi(h) mod f (i=1,2), deg(h) < n.
+
+void CompMod3(zz_pX& x1, zz_pX& x2, zz_pX& x3,
+              const zz_pX& g1, const zz_pX& g2, const zz_pX& g3,
+              const zz_pX& h, const zz_pXModulus& F);
+// xi = gi(h) mod f (i=1..3), deg(h) < n
+
+
+/**************************************************************************\
+
+                     Composition with Pre-Conditioning
+
+If a single h is going to be used with many g's then you should build
+a zz_pXArgument for h, and then use the compose routine below.  The
+routine build computes and stores h, h^2, ..., h^m mod f.  After this
+pre-computation, composing a polynomial of degree roughly n with h
+takes n/m multiplies mod f, plus n^2 scalar multiplies.  Thus,
+increasing m increases the space requirement and the pre-computation
+time, but reduces the composition time.
+
+\**************************************************************************/
+
+
+struct zz_pXArgument {
+   vec_zz_pX H;
+};
+
+void build(zz_pXArgument& H, const zz_pX& h, const zz_pXModulus& F, long m);
+// Pre-Computes information about h.  m > 0, deg(h) < n
+
+void CompMod(zz_pX& x, const zz_pX& g, const zz_pXArgument& H,
+             const zz_pXModulus& F);
+
+zz_pX CompMod(const zz_pX& g, const zz_pXArgument& H,
+             const zz_pXModulus& F);
+
+
+extern long zz_pXArgBound;
+
+// Initially 0.  If this is set to a value greater than zero, then
+// composition routines will allocate a table of no than about
+// zz_pXArgBound KB.  Setting this value affects all compose routines
+// and the power projection and minimal polynomial routines below,
+// and indirectly affects many routines in zz_pXFactoring.
+
+
+ +
+/**************************************************************************\
+
+                     Faster Composition with Pre-Conditioning
+
+A new, experimental version of composition with preconditioning.
+This interface was introduced in NTL v9.6.3, and it should be
+considered a preliminary interface and suvject to change (although
+it is likely to not change very much).
+
+Usage:
+    zz_pX x, g, h;
+    zz_pXModulus F;
+    zz_pXArgument H;
+    build(H, h, F);
+    zz_pXAltArgument H1;
+    build(H1, H, F);  // this keeps a pointer to H, so H must remain alive
+    CompMod(x, g, H1, F);  // x = g(h) mod f
+
+The idea is that H1 stores the data in H in an alternative format
+that allows for a more cache-friendly and more efficient execution
+of CompMod.  Depending on a variety of factors, this can be up to
+about 3x faster than the redgular CompMod.
+
+
+\**************************************************************************/
+
+class  zz_pXAltArgument {
// ...
+};
+
+void build(zz_pXAltArgument& altH, const zz_pXArgument& H, const zz_pXModulus& F);
+void CompMod(zz_pX& x, const zz_pX& g, const zz_pXAltArgument& A,
+             const zz_pXModulus& F);
+
+
+
+/**************************************************************************\
+
+                     power projection routines
+
+\**************************************************************************/
+
+void project(zz_p& x, const zz_pVector& a, const zz_pX& b);
+zz_p project(const zz_pVector& a, const zz_pX& b);
+// x = inner product of a with coefficient vector of b
+
+
+void ProjectPowers(vec_zz_p& x, const vec_zz_p& a, long k,
+                   const zz_pX& h, const zz_pXModulus& F);
+
+vec_zz_p ProjectPowers(const vec_zz_p& a, long k,
+                   const zz_pX& h, const zz_pXModulus& F);
+
+// Computes the vector
+
+//    project(a, 1), project(a, h), ..., project(a, h^{k-1} % f).  
+
+// This operation is the "transpose" of the modular composition operation.
+// Input and output may have "high order" zeroes stripped.
+
+void ProjectPowers(vec_zz_p& x, const vec_zz_p& a, long k,
+                   const zz_pXArgument& H, const zz_pXModulus& F);
+
+vec_zz_p ProjectPowers(const vec_zz_p& a, long k,
+                   const zz_pXArgument& H, const zz_pXModulus& F);
+
+// same as above, but uses a pre-computed zz_pXArgument
+
+
+void UpdateMap(vec_zz_p& x, const vec_zz_p& a,
+               const zz_pXMultiplier& B, const zz_pXModulus& F);
+
+vec_zz_p UpdateMap(const vec_zz_p& a,
+               const zz_pXMultiplier& B, const zz_pXModulus& F);
+
+// Computes the vector
+
+//    project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f)
+
+// Restriction: a.length() <= deg(F).
+// This is "transposed" MulMod by B.
+// Input vector may have "high order" zeroes striped.
+// The output will always have high order zeroes stripped.
+
+
+/**************************************************************************\
+
+                              Minimum Polynomials
+
+These routines should be used with prime p.
+
+All of these routines implement the algorithm from [Shoup, J. Symbolic
+Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397,
+1995], based on transposed modular composition and the
+Berlekamp/Massey algorithm.
+
+\**************************************************************************/
+
+
+void MinPolySeq(zz_pX& h, const vec_zz_p& a, long m);
+// computes the minimum polynomial of a linealy generated sequence; m
+// is a bound on the degree of the polynomial; required: a.length() >=
+// 2*m
+
+void ProbMinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m);
+zz_pX ProbMinPolyMod(const zz_pX& g, const zz_pXModulus& F, long m);
+
+void ProbMinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F);
+zz_pX ProbMinPolyMod(const zz_pX& g, const zz_pXModulus& F);
+
+// computes the monic minimal polynomial if (g mod f).  m = a bound on
+// the degree of the minimal polynomial; in the second version, this
+// argument defaults to n.  The algorithm is probabilistic, always
+// returns a divisor of the minimal polynomial, and returns a proper
+// divisor with probability at most m/p.
+
+void MinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m);
+zz_pX MinPolyMod(const zz_pX& g, const zz_pXModulus& F, long m);
+
+void MinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F);
+zz_pX MinPolyMod(const zz_pX& g, const zz_pXModulus& F);
+// same as above, but guarantees that result is correct
+
+void IrredPoly(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m);
+zz_pX IrredPoly(const zz_pX& g, const zz_pXModulus& F, long m);
+
+void IrredPoly(zz_pX& h, const zz_pX& g, const zz_pXModulus& F);
+zz_pX IrredPoly(const zz_pX& g, const zz_pXModulus& F);
+
+// same as above, but assumes that f is irreducible, or at least that
+// the minimal poly of g is itself irreducible.  The algorithm is
+// deterministic (and is always correct).
+
+
+/**************************************************************************\
+
+                   Traces, norms, resultants
+
+These routines should be used with prime p.
+
+\**************************************************************************/
+
+
+void TraceMod(zz_p& x, const zz_pX& a, const zz_pXModulus& F);
+zz_p TraceMod(const zz_pX& a, const zz_pXModulus& F);
+
+void TraceMod(zz_p& x, const zz_pX& a, const zz_pX& f);
+zz_p TraceMod(const zz_pX& a, const zz_pXModulus& f);
+// x = Trace(a mod f); deg(a) < deg(f)
+
+
+void TraceVec(vec_zz_p& S, const zz_pX& f);
+vec_zz_p TraceVec(const zz_pX& f);
+// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f)
+
+// The above routines implement the asymptotically fast trace
+// algorithm from [von zur Gathen and Shoup, Computational Complexity,
+// 1992].
+
+void NormMod(zz_p& x, const zz_pX& a, const zz_pX& f);
+zz_p NormMod(const zz_pX& a, const zz_pX& f);
+// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f)
+
+
+void resultant(zz_p& x, const zz_pX& a, const zz_pX& b);
+zz_pX resultant(zz_p& x, const zz_pX& a, const zz_pX& b);
+// x = resultant(a, b)
+
+
+void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& f);
+zz_pX CharPolyMod(const zz_pX& a, const zz_pX& f);
+// g = charcteristic polynomial of (a mod f); 0 < deg(f), deg(g) <
+// deg(f).  This routine works for arbitrary f.  For irreducible f,
+// is it faster to use IrredPolyMod, and then exponentiate as
+// necessary, since in this case the characterstic polynomial
+// is a power of the minimal polynomial.
+
+
+/**************************************************************************\
+
+                           Miscellany
+
+
+\**************************************************************************/
+
+
+void clear(zz_pX& x) // x = 0
+void set(zz_pX& x); // x = 1
+
+void zz_pX::kill();
+// f.kill() sets f to 0 and frees all memory held by f.  Equivalent to
+// f.rep.kill().
+
+zz_pX::zz_pX(INIT_SIZE_TYPE, long n);
+// zz_pX(INIT_SIZE, n) initializes to zero, but space is pre-allocated
+// for n coefficients
+
+static const zz_pX& zero();
+// zz_pX::zero() is a read-only reference to 0
+
+void swap(zz_pX& x, zz_pX& y);
+// swap x and y (via "pointer swapping")
+
+
+zz_pX::zz_pX(long i, zz_p c);
+zz_pX::zz_pX(long i, long c);
// initialize to c*X^i, provided for backward compatibility
+
+ diff --git a/thirdparty/linux/ntl/doc/lzz_pX.txt b/thirdparty/linux/ntl/doc/lzz_pX.txt new file mode 100644 index 0000000000..4dc89ddbf5 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pX.txt @@ -0,0 +1,930 @@ + +/**************************************************************************\ + +MODULE: zz_pX + +SUMMARY: + +The class zz_pX implements polynomial arithmetic modulo p. + +Polynomial arithmetic is implemented using a combination of classical +routines, Karatsuba, and FFT. + +\**************************************************************************/ + +#include "zz_p.h" +#include "vec_zz_p.h" + +class zz_pX { +public: + + zz_pX(); // initial value 0 + + zz_pX(const zz_pX& a); // copy + explicit zz_pX(zz_p a); // promotion + explicit zz_pX(long a); // promotion + + zz_pX& operator=(const zz_pX& a); // assignment + zz_pX& operator=(zz_p a); + zz_pX& operator=(long a); + + ~zz_pX(); // destructor + + zz_pX(INIT_MONO_TYPE, long i, zz_p c); + zz_pX(INIT_MONO_TYPE, long i, long c); + // initialize to c*X^i, invoke as zz_pX(INIT_MONO, i, c) + + zz_pX(INIT_MONO_TYPE, long i); + // initialize to X^i, invoke as zz_pX(INIT_MONO, i) + + typedef zz_p coeff_type; + + // ... + + +}; + + + + + +/**************************************************************************\ + + Accessing coefficients + +The degree of a polynomial f is obtained as deg(f), +where the zero polynomial, by definition, has degree -1. + +A polynomial f is represented as a coefficient vector. +Coefficients may be accesses in one of two ways. + +The safe, high-level method is to call the function +coeff(f, i) to get the coefficient of X^i in the polynomial f, +and to call the function SetCoeff(f, i, a) to set the coefficient +of X^i in f to the scalar a. + +One can also access the coefficients more directly via a lower level +interface. The coefficient of X^i in f may be accessed using +subscript notation f[i]. In addition, one may write f.SetLength(n) +to set the length of the underlying coefficient vector to n, +and f.SetMaxLength(n) to allocate space for n coefficients, +without changing the coefficient vector itself. + +After setting coefficients using this low-level interface, +one must ensure that leading zeros in the coefficient vector +are stripped afterwards by calling the function f.normalize(). + + +NOTE: the coefficient vector of f may also be accessed directly +as f.rep; however, this is not recommended. Also, for a properly +normalized polynomial f, we have f.rep.length() == deg(f)+1, +and deg(f) >= 0 => f.rep[deg(f)] != 0. + +\**************************************************************************/ + + + +long deg(const zz_pX& a); // return deg(a); deg(0) == -1. + +const zz_p coeff(const zz_pX& a, long i); +// returns the coefficient of X^i, or zero if i not in range + +const zz_p LeadCoeff(const zz_pX& a); +// returns leading term of a, or zero if a == 0 + +const zz_p ConstTerm(const zz_pX& a); +// returns constant term of a, or zero if a == 0 + +void SetCoeff(zz_pX& x, long i, zz_p a); +void SetCoeff(zz_pX& x, long i, long a); +// makes coefficient of X^i equal to a; error is raised if i < 0 + +void SetCoeff(zz_pX& x, long i); +// makes coefficient of X^i equal to 1; error is raised if i < 0 + +void SetX(zz_pX& x); // x is set to the monomial X + +long IsX(const zz_pX& a); // test if x = X + + + + +zz_p& zz_pX::operator[](long i); +const zz_p& zz_pX::operator[](long i) const; +// indexing operators: f[i] is the coefficient of X^i --- +// i should satsify i >= 0 and i <= deg(f). +// No range checking (unless NTL_RANGE_CHECK is defined). + +void zz_pX::SetLength(long n); +// f.SetLength(n) sets the length of the inderlying coefficient +// vector to n --- after this call, indexing f[i] for i = 0..n-1 +// is valid. + +void zz_pX::normalize(); +// f.normalize() strips leading zeros from coefficient vector of f + +void zz_pX::SetMaxLength(long n); +// f.SetMaxLength(n) pre-allocate spaces for n coefficients. The +// polynomial that f represents is unchanged. + + + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator==(const zz_pX& a, const zz_pX& b); +long operator!=(const zz_pX& a, const zz_pX& b); + +long IsZero(const zz_pX& a); // test for 0 +long IsOne(const zz_pX& a); // test for 1 + +// PROMOTIONS: operators ==, != promote {long, zz_p} to zz_pX on (a, b) + + +/**************************************************************************\ + + Addition + +\**************************************************************************/ + +// operator notation: + +zz_pX operator+(const zz_pX& a, const zz_pX& b); +zz_pX operator-(const zz_pX& a, const zz_pX& b); + +zz_pX operator-(const zz_pX& a); // unary - + +zz_pX& operator+=(zz_pX& x, const zz_pX& a); +zz_pX& operator+=(zz_pX& x, zz_p a); +zz_pX& operator+=(zz_pX& x, long a); + +zz_pX& operator-=(zz_pX& x, const zz_pX& a); +zz_pX& operator-=(zz_pX& x, zz_p a); +zz_pX& operator-=(zz_pX& x, long a); + +zz_pX& operator++(zz_pX& x); // prefix +void operator++(zz_pX& x, int); // postfix + +zz_pX& operator--(zz_pX& x); // prefix +void operator--(zz_pX& x, int); // postfix + +// procedural versions: + + +void add(zz_pX& x, const zz_pX& a, const zz_pX& b); // x = a + b +void sub(zz_pX& x, const zz_pX& a, const zz_pX& b); // x = a - b +void negate(zz_pX& x, const zz_pX& a); // x = -a + +// PROMOTIONS: binary +, - and procedures add, sub promote {long, zz_p} +// to zz_pX on (a, b). + + +/**************************************************************************\ + + Multiplication + +\**************************************************************************/ + +// operator notation: + +zz_pX operator*(const zz_pX& a, const zz_pX& b); + +zz_pX& operator*=(zz_pX& x, const zz_pX& a); +zz_pX& operator*=(zz_pX& x, zz_p a); +zz_pX& operator*=(zz_pX& x, long a); + +// procedural versions: + + +void mul(zz_pX& x, const zz_pX& a, const zz_pX& b); // x = a * b + +void sqr(zz_pX& x, const zz_pX& a); // x = a^2 +zz_pX sqr(const zz_pX& a); + +// PROMOTIONS: operator * and procedure mul promote {long, zz_p} to zz_pX +// on (a, b). + +void power(zz_pX& x, const zz_pX& a, long e); // x = a^e (e >= 0) +zz_pX power(const zz_pX& a, long e); + + +/**************************************************************************\ + + Shift Operations + +LeftShift by n means multiplication by X^n +RightShift by n means division by X^n + +A negative shift amount reverses the direction of the shift. + +\**************************************************************************/ + +// operator notation: + +zz_pX operator<<(const zz_pX& a, long n); +zz_pX operator>>(const zz_pX& a, long n); + +zz_pX& operator<<=(zz_pX& x, long n); +zz_pX& operator>>=(zz_pX& x, long n); + +// procedural versions: + +void LeftShift(zz_pX& x, const zz_pX& a, long n); +zz_pX LeftShift(const zz_pX& a, long n); + +void RightShift(zz_pX& x, const zz_pX& a, long n); +zz_pX RightShift(const zz_pX& a, long n); + + + +/**************************************************************************\ + + Division + +\**************************************************************************/ + +// operator notation: + +zz_pX operator/(const zz_pX& a, const zz_pX& b); +zz_pX operator%(const zz_pX& a, const zz_pX& b); + +zz_pX& operator/=(zz_pX& x, const zz_pX& a); +zz_pX& operator/=(zz_pX& x, zz_p a); +zz_pX& operator/=(zz_pX& x, long a); + +zz_pX& operator%=(zz_pX& x, const zz_pX& b); + + +// procedural versions: + + +void DivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pX& b); +// q = a/b, r = a%b + +void div(zz_pX& q, const zz_pX& a, const zz_pX& b); +// q = a/b + +void rem(zz_pX& r, const zz_pX& a, const zz_pX& b); +// r = a%b + +long divide(zz_pX& q, const zz_pX& a, const zz_pX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const zz_pX& a, const zz_pX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +// PROMOTIONS: operator / and procedure div promote {long, zz_p} to zz_pX +// on (a, b). + + +/**************************************************************************\ + + GCD's + +These routines are intended for use when p is prime. + +\**************************************************************************/ + + +void GCD(zz_pX& x, const zz_pX& a, const zz_pX& b); +zz_pX GCD(const zz_pX& a, const zz_pX& b); +// x = GCD(a, b), x is always monic (or zero if a==b==0). + + +void XGCD(zz_pX& d, zz_pX& s, zz_pX& t, const zz_pX& a, const zz_pX& b); +// d = gcd(a,b), a s + b t = d + + +// NOTE: A classical algorithm is used, switching over to a +// "half-GCD" algorithm for large degree + + +/**************************************************************************\ + + Input/Output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be integers between 0 and p-1, amd +a_n not zero (the zero polynomial is [ ]). On input, the coefficients +are arbitrary integers which are reduced modulo p, and leading zeros +stripped. + +\**************************************************************************/ + +istream& operator>>(istream& s, zz_pX& x); +ostream& operator<<(ostream& s, const zz_pX& a); + + +/**************************************************************************\ + + Some utility routines + +\**************************************************************************/ + + +void diff(zz_pX& x, const zz_pX& a); +zz_pX diff(const zz_pX& a); +// x = derivative of a + + +void MakeMonic(zz_pX& x); +// if x != 0 makes x into its monic associate; LeadCoeff(x) must be +// invertible in this case. + +void reverse(zz_pX& x, const zz_pX& a, long hi); +zz_pX reverse(const zz_pX& a, long hi); + +void reverse(zz_pX& x, const zz_pX& a); +zz_pX reverse(const zz_pX& a); + +// x = reverse of a[0]..a[hi] (hi >= -1); +// hi defaults to deg(a) in second version + +void VectorCopy(vec_zz_p& x, const zz_pX& a, long n); +vec_zz_p VectorCopy(const zz_pX& a, long n); +// x = copy of coefficient vector of a of length exactly n. +// input is truncated or padded with zeroes as appropriate. + + + + + +/**************************************************************************\ + + Random Polynomials + +\**************************************************************************/ + +void random(zz_pX& x, long n); +zz_pX random_zz_pX(long n); +// x = random polynomial of degree < n + + +/**************************************************************************\ + + Polynomial Evaluation and related problems + +\**************************************************************************/ + + +void BuildFromRoots(zz_pX& x, const vec_zz_p& a); +zz_pX BuildFromRoots(const vec_zz_p& a); +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = +// a.length() + +void eval(zz_p& b, const zz_pX& f, zz_p a); +zz_p eval(const zz_pX& f, zz_p a); +// b = f(a) + +void eval(vec_zz_p& b, const zz_pX& f, const vec_zz_p& a); +vec_zz_p eval(const zz_pX& f, const vec_zz_p& a); +// b.SetLength(a.length()); b[i] = f(a[i]) for 0 <= i < a.length() + +void interpolate(zz_pX& f, const vec_zz_p& a, const vec_zz_p& b); +zz_pX interpolate(const vec_zz_p& a, const vec_zz_p& b); +// interpolates the polynomial f satisfying f(a[i]) = b[i]. p should +// be prime. + +/**************************************************************************\ + + Arithmetic mod X^n + +It is required that n >= 0, otherwise an error is raised. + +\**************************************************************************/ + +void trunc(zz_pX& x, const zz_pX& a, long n); // x = a % X^n +zz_pX trunc(const zz_pX& a, long n); + +void MulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n); +zz_pX MulTrunc(const zz_pX& a, const zz_pX& b, long n); +// x = a * b % X^n + +void SqrTrunc(zz_pX& x, const zz_pX& a, long n); +zz_pX SqrTrunc(const zz_pX& a, long n); +// x = a^2 % X^n + +void InvTrunc(zz_pX& x, const zz_pX& a, long n); +zz_pX InvTrunc(const zz_pX& a, long n); +// computes x = a^{-1} % X^n. Must have ConstTerm(a) invertible. + +/**************************************************************************\ + + Modular Arithmetic (without pre-conditioning) + +Arithmetic mod f. + +All inputs and outputs are polynomials of degree less than deg(f), and +deg(f) > 0. + +NOTE: if you want to do many computations with a fixed f, use the +zz_pXModulus data structure and associated routines below for better +performance. + +\**************************************************************************/ + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pX& b, const zz_pX& f); +zz_pX MulMod(const zz_pX& a, const zz_pX& b, const zz_pX& f); +// x = (a * b) % f + +void SqrMod(zz_pX& x, const zz_pX& a, const zz_pX& f); +zz_pX SqrMod(const zz_pX& a, const zz_pX& f); +// x = a^2 % f + +void MulByXMod(zz_pX& x, const zz_pX& a, const zz_pX& f); +zz_pX MulByXMod(const zz_pX& a, const zz_pX& f); +// x = (a * X) mod f + +void InvMod(zz_pX& x, const zz_pX& a, const zz_pX& f); +zz_pX InvMod(const zz_pX& a, const zz_pX& f); +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(zz_pX& x, const zz_pX& a, const zz_pX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f; otherwise, +// returns 1 and sets x = (a, f) + + +// for modular exponentiation, see below + + + +/**************************************************************************\ + + Modular Arithmetic with Pre-Conditioning + +If you need to do a lot of arithmetic modulo a fixed f, build +zz_pXModulus F for f. This pre-computes information about f that +speeds up subsequent computations. Required: deg(f) > 0 and LeadCoeff(f) +invertible. + +As an example, the following routine computes the product modulo f of a vector +of polynomials. + +#include "zz_pX.h" + +void product(zz_pX& x, const vec_zz_pX& v, const zz_pX& f) +{ + zz_pXModulus F(f); + zz_pX res; + res = 1; + long i; + for (i = 0; i < v.length(); i++) + MulMod(res, res, v[i], F); + x = res; +} + + +Note that automatic conversions are provided so that a zz_pX can +be used wherever a zz_pXModulus is required, and a zz_pXModulus +can be used wherever a zz_pX is required. + + + +\**************************************************************************/ + +class zz_pXModulus { +public: + zz_pXModulus(); // initially in an unusable state + ~zz_pXModulus(); + + zz_pXModulus(const zz_pXModulus&); // copy + + zz_pXModulus& operator=(const zz_pXModulus&); // assignment + + zz_pXModulus(const zz_pX& f); // initialize with f, deg(f) > 0 + + operator const zz_pX& () const; + // read-only access to f, implicit conversion operator + + const zz_pX& val() const; + // read-only access to f, explicit notation + +}; + +void build(zz_pXModulus& F, const zz_pX& f); +// pre-computes information about f and stores it in F. +// Note that the declaration zz_pXModulus F(f) is equivalent to +// zz_pXModulus F; build(F, f). + +// In the following, f refers to the polynomial f supplied to the +// build routine, and n = deg(f). + +long deg(const zz_pXModulus& F); // return deg(f) + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pX& b, const zz_pXModulus& F); +zz_pX MulMod(const zz_pX& a, const zz_pX& b, const zz_pXModulus& F); +// x = (a * b) % f; deg(a), deg(b) < n + +void SqrMod(zz_pX& x, const zz_pX& a, const zz_pXModulus& F); +zz_pX SqrMod(const zz_pX& a, const zz_pXModulus& F); +// x = a^2 % f; deg(a) < n + +void PowerMod(zz_pX& x, const zz_pX& a, const ZZ& e, const zz_pXModulus& F); +zz_pX PowerMod(const zz_pX& a, const ZZ& e, const zz_pXModulus& F); + +void PowerMod(zz_pX& x, const zz_pX& a, long e, const zz_pXModulus& F); +zz_pX PowerMod(const zz_pX& a, long e, const zz_pXModulus& F); + +// x = a^e % f; deg(a) < n (e may be negative) + +void PowerXMod(zz_pX& x, const ZZ& e, const zz_pXModulus& F); +zz_pX PowerXMod(const ZZ& e, const zz_pXModulus& F); + +void PowerXMod(zz_pX& x, long e, const zz_pXModulus& F); +zz_pX PowerXMod(long e, const zz_pXModulus& F); + +// x = X^e % f (e may be negative) + +void PowerXPlusAMod(zz_pX& x, const zz_p& a, const ZZ& e, + const zz_pXModulus& F); + +zz_pX PowerXPlusAMod(const zz_p& a, const ZZ& e, + const zz_pXModulus& F); + +void PowerXPlusAMod(zz_pX& x, const zz_p& a, long e, + const zz_pXModulus& F); + +zz_pX PowerXPlusAMod(const zz_p& a, long e, + const zz_pXModulus& F); + +// x = (X + a)^e % f (e may be negative) + + +void rem(zz_pX& x, const zz_pX& a, const zz_pXModulus& F); +// x = a % f + +void DivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pXModulus& F); +// q = a/f, r = a%f + +void div(zz_pX& q, const zz_pX& a, const zz_pXModulus& F); +// q = a/f + +// operator notation: + +zz_pX operator/(const zz_pX& a, const zz_pXModulus& F); +zz_pX operator%(const zz_pX& a, const zz_pXModulus& F); + +zz_pX& operator/=(zz_pX& x, const zz_pXModulus& F); +zz_pX& operator%=(zz_pX& x, const zz_pXModulus& F); + + + + +/**************************************************************************\ + + + More Pre-Conditioning + +If you need to compute a * b % f for a fixed b, but for many a's, it +is much more efficient to first build a zz_pXMultiplier B for b, and +then use the MulMod routine below. + +Here is an example that multiplies each element of a vector by a fixed +polynomial modulo f. + +#include "zz_pX.h" + +void mul(vec_zz_pX& v, const zz_pX& b, const zz_pX& f) +{ + zz_pXModulus F(f); + zz_pXMultiplier B(b, F); + long i; + for (i = 0; i < v.length(); i++) + MulMod(v[i], v[i], B, F); +} + +Note that a (trivial) conversion operator from zz_pXMultiplier to zz_pX +is provided, so that a zz_pXMultiplier can be used in a context +where a zz_pX is required. + + +\**************************************************************************/ + + +class zz_pXMultiplier { +public: + zz_pXMultiplier(); // initially zero + + zz_pXMultiplier(const zz_pX& b, const zz_pXModulus& F); + // initializes with b mod F, where deg(b) < deg(F) + + zz_pXMultiplier(const zz_pXMultiplier&); + zz_pXMultiplier& operator=(const zz_pXMultiplier&); + + ~zz_pXMultiplier(); + + const zz_pX& val() const; // read-only access to b + +}; + +void build(zz_pXMultiplier& B, const zz_pX& b, const zz_pXModulus& F); +// pre-computes information about b and stores it in B; deg(b) < +// deg(F) + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pXMultiplier& B, + const zz_pXModulus& F); + +zz_pX MulMod(const zz_pX& a, const zz_pXMultiplier& B, + const zz_pXModulus& F); + +// x = (a * b) % F; deg(a) < deg(F) + +/**************************************************************************\ + + vectors of zz_pX's + +\**************************************************************************/ + + +typedef Vec vec_zz_pX; // backward compatibility + + + +/**************************************************************************\ + + Modular Composition + +Modular composition is the problem of computing g(h) mod f for +polynomials f, g, and h. + +The algorithm employed is that of Brent & Kung (Fast algorithms for +manipulating formal power series, JACM 25:581-595, 1978), which uses +O(n^{1/2}) modular polynomial multiplications, and O(n^2) scalar +operations. + + + +\**************************************************************************/ + +void CompMod(zz_pX& x, const zz_pX& g, const zz_pX& h, const zz_pXModulus& F); +zz_pX CompMod(const zz_pX& g, const zz_pX& h, const zz_pXModulus& F); +// x = g(h) mod f; deg(h) < n + +void Comp2Mod(zz_pX& x1, zz_pX& x2, const zz_pX& g1, const zz_pX& g2, + const zz_pX& h, const zz_pXModulus& F); +// xi = gi(h) mod f (i=1,2), deg(h) < n. + +void CompMod3(zz_pX& x1, zz_pX& x2, zz_pX& x3, + const zz_pX& g1, const zz_pX& g2, const zz_pX& g3, + const zz_pX& h, const zz_pXModulus& F); +// xi = gi(h) mod f (i=1..3), deg(h) < n + + +/**************************************************************************\ + + Composition with Pre-Conditioning + +If a single h is going to be used with many g's then you should build +a zz_pXArgument for h, and then use the compose routine below. The +routine build computes and stores h, h^2, ..., h^m mod f. After this +pre-computation, composing a polynomial of degree roughly n with h +takes n/m multiplies mod f, plus n^2 scalar multiplies. Thus, +increasing m increases the space requirement and the pre-computation +time, but reduces the composition time. + +\**************************************************************************/ + + +struct zz_pXArgument { + vec_zz_pX H; +}; + +void build(zz_pXArgument& H, const zz_pX& h, const zz_pXModulus& F, long m); +// Pre-Computes information about h. m > 0, deg(h) < n + +void CompMod(zz_pX& x, const zz_pX& g, const zz_pXArgument& H, + const zz_pXModulus& F); + +zz_pX CompMod(const zz_pX& g, const zz_pXArgument& H, + const zz_pXModulus& F); + + +extern long zz_pXArgBound; + +// Initially 0. If this is set to a value greater than zero, then +// composition routines will allocate a table of no than about +// zz_pXArgBound KB. Setting this value affects all compose routines +// and the power projection and minimal polynomial routines below, +// and indirectly affects many routines in zz_pXFactoring. + + +// @anchor{compmod} + +/**************************************************************************\ + + Faster Composition with Pre-Conditioning + +A new, experimental version of composition with preconditioning. +This interface was introduced in NTL v9.6.3, and it should be +considered a preliminary interface and suvject to change (although +it is likely to not change very much). + +Usage: + zz_pX x, g, h; + zz_pXModulus F; + zz_pXArgument H; + build(H, h, F); + zz_pXAltArgument H1; + build(H1, H, F); // this keeps a pointer to H, so H must remain alive + CompMod(x, g, H1, F); // x = g(h) mod f + +The idea is that H1 stores the data in H in an alternative format +that allows for a more cache-friendly and more efficient execution +of CompMod. Depending on a variety of factors, this can be up to +about 3x faster than the redgular CompMod. + + +\**************************************************************************/ + +class zz_pXAltArgument { + // ... +}; + +void build(zz_pXAltArgument& altH, const zz_pXArgument& H, const zz_pXModulus& F); +void CompMod(zz_pX& x, const zz_pX& g, const zz_pXAltArgument& A, + const zz_pXModulus& F); + + + +/**************************************************************************\ + + power projection routines + +\**************************************************************************/ + +void project(zz_p& x, const zz_pVector& a, const zz_pX& b); +zz_p project(const zz_pVector& a, const zz_pX& b); +// x = inner product of a with coefficient vector of b + + +void ProjectPowers(vec_zz_p& x, const vec_zz_p& a, long k, + const zz_pX& h, const zz_pXModulus& F); + +vec_zz_p ProjectPowers(const vec_zz_p& a, long k, + const zz_pX& h, const zz_pXModulus& F); + +// Computes the vector + +// project(a, 1), project(a, h), ..., project(a, h^{k-1} % f). + +// This operation is the "transpose" of the modular composition operation. +// Input and output may have "high order" zeroes stripped. + +void ProjectPowers(vec_zz_p& x, const vec_zz_p& a, long k, + const zz_pXArgument& H, const zz_pXModulus& F); + +vec_zz_p ProjectPowers(const vec_zz_p& a, long k, + const zz_pXArgument& H, const zz_pXModulus& F); + +// same as above, but uses a pre-computed zz_pXArgument + + +void UpdateMap(vec_zz_p& x, const vec_zz_p& a, + const zz_pXMultiplier& B, const zz_pXModulus& F); + +vec_zz_p UpdateMap(const vec_zz_p& a, + const zz_pXMultiplier& B, const zz_pXModulus& F); + +// Computes the vector + +// project(a, b), project(a, (b*X)%f), ..., project(a, (b*X^{n-1})%f) + +// Restriction: a.length() <= deg(F). +// This is "transposed" MulMod by B. +// Input vector may have "high order" zeroes striped. +// The output will always have high order zeroes stripped. + + +/**************************************************************************\ + + Minimum Polynomials + +These routines should be used with prime p. + +All of these routines implement the algorithm from [Shoup, J. Symbolic +Comp. 17:371-391, 1994] and [Shoup, J. Symbolic Comp. 20:363-397, +1995], based on transposed modular composition and the +Berlekamp/Massey algorithm. + +\**************************************************************************/ + + +void MinPolySeq(zz_pX& h, const vec_zz_p& a, long m); +// computes the minimum polynomial of a linealy generated sequence; m +// is a bound on the degree of the polynomial; required: a.length() >= +// 2*m + +void ProbMinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m); +zz_pX ProbMinPolyMod(const zz_pX& g, const zz_pXModulus& F, long m); + +void ProbMinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F); +zz_pX ProbMinPolyMod(const zz_pX& g, const zz_pXModulus& F); + +// computes the monic minimal polynomial if (g mod f). m = a bound on +// the degree of the minimal polynomial; in the second version, this +// argument defaults to n. The algorithm is probabilistic, always +// returns a divisor of the minimal polynomial, and returns a proper +// divisor with probability at most m/p. + +void MinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m); +zz_pX MinPolyMod(const zz_pX& g, const zz_pXModulus& F, long m); + +void MinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F); +zz_pX MinPolyMod(const zz_pX& g, const zz_pXModulus& F); +// same as above, but guarantees that result is correct + +void IrredPoly(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m); +zz_pX IrredPoly(const zz_pX& g, const zz_pXModulus& F, long m); + +void IrredPoly(zz_pX& h, const zz_pX& g, const zz_pXModulus& F); +zz_pX IrredPoly(const zz_pX& g, const zz_pXModulus& F); + +// same as above, but assumes that f is irreducible, or at least that +// the minimal poly of g is itself irreducible. The algorithm is +// deterministic (and is always correct). + + +/**************************************************************************\ + + Traces, norms, resultants + +These routines should be used with prime p. + +\**************************************************************************/ + + +void TraceMod(zz_p& x, const zz_pX& a, const zz_pXModulus& F); +zz_p TraceMod(const zz_pX& a, const zz_pXModulus& F); + +void TraceMod(zz_p& x, const zz_pX& a, const zz_pX& f); +zz_p TraceMod(const zz_pX& a, const zz_pXModulus& f); +// x = Trace(a mod f); deg(a) < deg(f) + + +void TraceVec(vec_zz_p& S, const zz_pX& f); +vec_zz_p TraceVec(const zz_pX& f); +// S[i] = Trace(X^i mod f), i = 0..deg(f)-1; 0 < deg(f) + +// The above routines implement the asymptotically fast trace +// algorithm from [von zur Gathen and Shoup, Computational Complexity, +// 1992]. + +void NormMod(zz_p& x, const zz_pX& a, const zz_pX& f); +zz_p NormMod(const zz_pX& a, const zz_pX& f); +// x = Norm(a mod f); 0 < deg(f), deg(a) < deg(f) + + +void resultant(zz_p& x, const zz_pX& a, const zz_pX& b); +zz_pX resultant(zz_p& x, const zz_pX& a, const zz_pX& b); +// x = resultant(a, b) + + +void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& f); +zz_pX CharPolyMod(const zz_pX& a, const zz_pX& f); +// g = charcteristic polynomial of (a mod f); 0 < deg(f), deg(g) < +// deg(f). This routine works for arbitrary f. For irreducible f, +// is it faster to use IrredPolyMod, and then exponentiate as +// necessary, since in this case the characterstic polynomial +// is a power of the minimal polynomial. + + +/**************************************************************************\ + + Miscellany + + +\**************************************************************************/ + + +void clear(zz_pX& x) // x = 0 +void set(zz_pX& x); // x = 1 + +void zz_pX::kill(); +// f.kill() sets f to 0 and frees all memory held by f. Equivalent to +// f.rep.kill(). + +zz_pX::zz_pX(INIT_SIZE_TYPE, long n); +// zz_pX(INIT_SIZE, n) initializes to zero, but space is pre-allocated +// for n coefficients + +static const zz_pX& zero(); +// zz_pX::zero() is a read-only reference to 0 + +void swap(zz_pX& x, zz_pX& y); +// swap x and y (via "pointer swapping") + + +zz_pX::zz_pX(long i, zz_p c); +zz_pX::zz_pX(long i, long c); + // initialize to c*X^i, provided for backward compatibility diff --git a/thirdparty/linux/ntl/doc/lzz_pXFactoring.cpp.html b/thirdparty/linux/ntl/doc/lzz_pXFactoring.cpp.html new file mode 100644 index 0000000000..25b18091e1 --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pXFactoring.cpp.html @@ -0,0 +1,187 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/lzz_pXFactoring.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: zz_pXFactoring
+
+SUMMARY:
+
+Routines are provided for factorization of polynomials over zz_p, as
+well as routines for related problems such as testing irreducibility
+and constructing irreducible polynomials of given degree.
+
+\**************************************************************************/
+
+#include "zz_pX.h"
+#include "pair_zz_pX_long.h"
+
+
+void SquareFreeDecomp(vec_pair_zz_pX_long& u, const zz_pX& f);
+vec_pair_zz_pX_long SquareFreeDecomp(const zz_pX& f);
+
+// Performs square-free decomposition.  f must be monic.  If f =
+// prod_i g_i^i, then u is set to a lest of pairs (g_i, i).  The list
+// is is increasing order of i, with trivial terms (i.e., g_i = 1)
+// deleted.
+
+
+void FindRoots(vec_zz_p& x, const zz_pX& f);
+vec_zz_p FindRoots(const zz_pX& f);
+
+// f is monic, and has deg(f) distinct roots.  returns the list of
+// roots
+
+void FindRoot(zz_p& root, const zz_pX& f);
+zz_p FindRoot(const zz_pX& f);
+
+// finds a single root of f.  assumes that f is monic and splits into
+// distinct linear factors
+
+
+void SFBerlekamp(vec_zz_pX& factors, const zz_pX& f, long verbose=0);
+vec_zz_pX  SFBerlekamp(const zz_pX& f, long verbose=0);
+
+// Assumes f is square-free and monic.  returns list of factors of f.
+// Uses "Berlekamp" approach, as described in detail in [Shoup,
+// J. Symbolic Comp. 20:363-397, 1995].
+
+void berlekamp(vec_pair_zz_pX_long& factors, const zz_pX& f,
+               long verbose=0);
+vec_pair_zz_pX_long berlekamp(const zz_pX& f, long verbose=0);
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SFBerlekamp.
+
+
+
+void NewDDF(vec_pair_zz_pX_long& factors, const zz_pX& f, const zz_pX& h,
+         long verbose=0);
+
+vec_pair_zz_pX_long NewDDF(const zz_pX& f, const zz_pX& h,
+         long verbose=0);
+
+// This computes a distinct-degree factorization.  The input must be
+// monic and square-free.  factors is set to a list of pairs (g, d),
+// where g is the product of all irreducible factors of f of degree d.
+// Only nontrivial pairs (i.e., g != 1) are included.  The polynomial
+// h is assumed to be equal to X^p mod f.  This routine implements the
+// baby step/giant step algorithm of [Kaltofen and Shoup, STOC 1995],
+// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995].
+
+void EDF(vec_zz_pX& factors, const zz_pX& f, const zz_pX& h,
+         long d, long verbose=0);
+
+vec_zz_pX EDF(const zz_pX& f, const zz_pX& h,
+         long d, long verbose=0);
+
+// Performs equal-degree factorization.  f is monic, square-free, and
+// all irreducible factors have same degree.  h = X^p mod f.  d =
+// degree of irreducible factors of f.  This routine implements the
+// algorithm of [von zur Gathen and Shoup, Computational Complexity
+// 2:187-224, 1992]
+
+
+void RootEDF(vec_zz_pX& factors, const zz_pX& f, long verbose=0);
+vec_zz_pX RootEDF(const zz_pX& f, long verbose=0);
+
+// EDF for d==1
+
+void SFCanZass(vec_zz_pX& factors, const zz_pX& f, long verbose=0);
+vec_zz_pX SFCanZass(const zz_pX& f, long verbose=0);
+
+// Assumes f is monic and square-free.  returns list of factors of f.
+// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and
+// EDF above.
+
+
+void CanZass(vec_pair_zz_pX_long& factors, const zz_pX& f,
+             long verbose=0);
+vec_pair_zz_pX_long CanZass(const zz_pX& f, long verbose=0);
+
+
+// returns a list of factors, with multiplicities.  f must be monic.
+// Calls SquareFreeDecomp and SFCanZass.
+
+
+void mul(zz_pX& f, const vec_pair_zz_pX_long& v);
+zz_pX mul(const vec_pair_zz_pX_long& v);
+
+
+// multiplies polynomials, with multiplicities
+
+/**************************************************************************\
+
+                            Irreducible Polynomials
+
+\**************************************************************************/
+
+long ProbIrredTest(const zz_pX& f, long iter=1);
+
+// performs a fast, probabilistic irreduciblity test.  The test can
+// err only if f is reducible, and the error probability is bounded by
+// p^{-iter}.  This implements an algorithm from [Shoup, J. Symbolic
+// Comp. 17:371-391, 1994].
+
+
+long DetIrredTest(const zz_pX& f);
+
+// performs a recursive deterministic irreducibility test.  Fast in
+// the worst-case (when input is irreducible).  This implements an
+// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994].
+
+long IterIrredTest(const zz_pX& f);
+
+// performs an iterative deterministic irreducibility test, based on
+// DDF.  Fast on average (when f has a small factor).
+
+void BuildIrred(zz_pX& f, long n);
+zz_pX BuildIrred_zz_pX(long n);
+
+// Build a monic irreducible poly of degree n.
+
+void BuildRandomIrred(zz_pX& f, const zz_pX& g);
+zz_pX BuildRandomIrred(const zz_pX& g);
+
+// g is a monic irreducible polynomial.  Constructs a random monic
+// irreducible polynomial f of the same degree.
+
+long ComputeDegree(const zz_pX& h, const zz_pXModulus& F);
+
+// f is assumed to be an "equal degree" polynomial.  h = X^p mod f.
+// The common degree of the irreducible factors of f is computed This
+// routine is useful in counting points on elliptic curves
+
+long ProbComputeDegree(const zz_pX& h, const zz_pXModulus& F);
+
+// same as above, but uses a slightly faster probabilistic algorithm.
+// The return value may be 0 or may be too big, but for large p
+// (relative to n), this happens with very low probability.
+
+void TraceMap(zz_pX& w, const zz_pX& a, long d, const zz_pXModulus& F,
+              const zz_pX& h);
+
+zz_pX TraceMap(const zz_pX& a, long d, const zz_pXModulus& F,
+              const zz_pX& h);
+
+// w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0, and h =
+// X^q mod f, q a power of p.  This routine implements an algorithm
+// from [von zur Gathen and Shoup, Computational Complexity 2:187-224,
+// 1992]
+
+void PowerCompose(zz_pX& w, const zz_pX& h, long d, const zz_pXModulus& F);
+zz_pX PowerCompose(const zz_pX& h, long d, const zz_pXModulus& F);
+
+
+// w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q mod f, q
+// a power of p.  This routine implements an algorithm from [von zur
+// Gathen and Shoup, Computational Complexity 2:187-224, 1992]
+
+
+ diff --git a/thirdparty/linux/ntl/doc/lzz_pXFactoring.txt b/thirdparty/linux/ntl/doc/lzz_pXFactoring.txt new file mode 100644 index 0000000000..2b2df5c77c --- /dev/null +++ b/thirdparty/linux/ntl/doc/lzz_pXFactoring.txt @@ -0,0 +1,177 @@ + +/**************************************************************************\ + +MODULE: zz_pXFactoring + +SUMMARY: + +Routines are provided for factorization of polynomials over zz_p, as +well as routines for related problems such as testing irreducibility +and constructing irreducible polynomials of given degree. + +\**************************************************************************/ + +#include "zz_pX.h" +#include "pair_zz_pX_long.h" + + +void SquareFreeDecomp(vec_pair_zz_pX_long& u, const zz_pX& f); +vec_pair_zz_pX_long SquareFreeDecomp(const zz_pX& f); + +// Performs square-free decomposition. f must be monic. If f = +// prod_i g_i^i, then u is set to a lest of pairs (g_i, i). The list +// is is increasing order of i, with trivial terms (i.e., g_i = 1) +// deleted. + + +void FindRoots(vec_zz_p& x, const zz_pX& f); +vec_zz_p FindRoots(const zz_pX& f); + +// f is monic, and has deg(f) distinct roots. returns the list of +// roots + +void FindRoot(zz_p& root, const zz_pX& f); +zz_p FindRoot(const zz_pX& f); + +// finds a single root of f. assumes that f is monic and splits into +// distinct linear factors + + +void SFBerlekamp(vec_zz_pX& factors, const zz_pX& f, long verbose=0); +vec_zz_pX SFBerlekamp(const zz_pX& f, long verbose=0); + +// Assumes f is square-free and monic. returns list of factors of f. +// Uses "Berlekamp" approach, as described in detail in [Shoup, +// J. Symbolic Comp. 20:363-397, 1995]. + +void berlekamp(vec_pair_zz_pX_long& factors, const zz_pX& f, + long verbose=0); +vec_pair_zz_pX_long berlekamp(const zz_pX& f, long verbose=0); + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SFBerlekamp. + + + +void NewDDF(vec_pair_zz_pX_long& factors, const zz_pX& f, const zz_pX& h, + long verbose=0); + +vec_pair_zz_pX_long NewDDF(const zz_pX& f, const zz_pX& h, + long verbose=0); + +// This computes a distinct-degree factorization. The input must be +// monic and square-free. factors is set to a list of pairs (g, d), +// where g is the product of all irreducible factors of f of degree d. +// Only nontrivial pairs (i.e., g != 1) are included. The polynomial +// h is assumed to be equal to X^p mod f. This routine implements the +// baby step/giant step algorithm of [Kaltofen and Shoup, STOC 1995], +// further described in [Shoup, J. Symbolic Comp. 20:363-397, 1995]. + +void EDF(vec_zz_pX& factors, const zz_pX& f, const zz_pX& h, + long d, long verbose=0); + +vec_zz_pX EDF(const zz_pX& f, const zz_pX& h, + long d, long verbose=0); + +// Performs equal-degree factorization. f is monic, square-free, and +// all irreducible factors have same degree. h = X^p mod f. d = +// degree of irreducible factors of f. This routine implements the +// algorithm of [von zur Gathen and Shoup, Computational Complexity +// 2:187-224, 1992] + + +void RootEDF(vec_zz_pX& factors, const zz_pX& f, long verbose=0); +vec_zz_pX RootEDF(const zz_pX& f, long verbose=0); + +// EDF for d==1 + +void SFCanZass(vec_zz_pX& factors, const zz_pX& f, long verbose=0); +vec_zz_pX SFCanZass(const zz_pX& f, long verbose=0); + +// Assumes f is monic and square-free. returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach, using the routines NewDDF and +// EDF above. + + +void CanZass(vec_pair_zz_pX_long& factors, const zz_pX& f, + long verbose=0); +vec_pair_zz_pX_long CanZass(const zz_pX& f, long verbose=0); + + +// returns a list of factors, with multiplicities. f must be monic. +// Calls SquareFreeDecomp and SFCanZass. + + +void mul(zz_pX& f, const vec_pair_zz_pX_long& v); +zz_pX mul(const vec_pair_zz_pX_long& v); + + +// multiplies polynomials, with multiplicities + +/**************************************************************************\ + + Irreducible Polynomials + +\**************************************************************************/ + +long ProbIrredTest(const zz_pX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test. The test can +// err only if f is reducible, and the error probability is bounded by +// p^{-iter}. This implements an algorithm from [Shoup, J. Symbolic +// Comp. 17:371-391, 1994]. + + +long DetIrredTest(const zz_pX& f); + +// performs a recursive deterministic irreducibility test. Fast in +// the worst-case (when input is irreducible). This implements an +// algorithm from [Shoup, J. Symbolic Comp. 17:371-391, 1994]. + +long IterIrredTest(const zz_pX& f); + +// performs an iterative deterministic irreducibility test, based on +// DDF. Fast on average (when f has a small factor). + +void BuildIrred(zz_pX& f, long n); +zz_pX BuildIrred_zz_pX(long n); + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(zz_pX& f, const zz_pX& g); +zz_pX BuildRandomIrred(const zz_pX& g); + +// g is a monic irreducible polynomial. Constructs a random monic +// irreducible polynomial f of the same degree. + +long ComputeDegree(const zz_pX& h, const zz_pXModulus& F); + +// f is assumed to be an "equal degree" polynomial. h = X^p mod f. +// The common degree of the irreducible factors of f is computed This +// routine is useful in counting points on elliptic curves + +long ProbComputeDegree(const zz_pX& h, const zz_pXModulus& F); + +// same as above, but uses a slightly faster probabilistic algorithm. +// The return value may be 0 or may be too big, but for large p +// (relative to n), this happens with very low probability. + +void TraceMap(zz_pX& w, const zz_pX& a, long d, const zz_pXModulus& F, + const zz_pX& h); + +zz_pX TraceMap(const zz_pX& a, long d, const zz_pXModulus& F, + const zz_pX& h); + +// w = a+a^q+...+^{q^{d-1}} mod f; it is assumed that d >= 0, and h = +// X^q mod f, q a power of p. This routine implements an algorithm +// from [von zur Gathen and Shoup, Computational Complexity 2:187-224, +// 1992] + +void PowerCompose(zz_pX& w, const zz_pX& h, long d, const zz_pXModulus& F); +zz_pX PowerCompose(const zz_pX& h, long d, const zz_pXModulus& F); + + +// w = X^{q^d} mod f; it is assumed that d >= 0, and h = X^q mod f, q +// a power of p. This routine implements an algorithm from [von zur +// Gathen and Shoup, Computational Complexity 2:187-224, 1992] + diff --git a/thirdparty/linux/ntl/doc/mat_GF2.cpp.html b/thirdparty/linux/ntl/doc/mat_GF2.cpp.html new file mode 100644 index 0000000000..85cc573511 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_GF2.cpp.html @@ -0,0 +1,177 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_GF2.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_GF2
+
+SUMMARY:
+
+Defines the class mat_GF2.
+
+\**************************************************************************/
+
+
+#include <NTL/matrix.h>
+#include <NTL/vec_vec_GF2.h>
+
+
+typedef Mat<GF2> mat_GF2; // backward compatibility
+
+
+void conv(mat_GF2& X, const vec_vec_GF2& A);  
+mat_GF2 to_mat_GF2(const vec_vec_GF2& A);  
+// convert a vector of vec_GF2's to a matrix
+
+
+// procedural arithmetic routines:
+
+void add(mat_GF2& X, const mat_GF2& A, const mat_GF2& B);
+// X = A + B
+
+void sub(mat_GF2& X, const mat_GF2& A, const mat_GF2& B);
+// X = A - B = A + B
+
+void negate(mat_GF2& X, const mat_GF2& A);
+// X = -A = A
+
+void mul(mat_GF2& X, const mat_GF2& A, const mat_GF2& B);
+// X = A * B
+
+void mul(vec_GF2& x, const mat_GF2& A, const vec_GF2& b);
+// x = A * b
+
+void mul(vec_GF2& x, const vec_GF2& a, const mat_GF2& B);
+// x = a * B
+
+
+void mul(mat_GF2& X, const mat_GF2& A, GF2 b);
+void mul(mat_GF2& X, const mat_GF2& A, long b);
+// X = A * b
+
+void mul(mat_GF2& X, GF2 a, const mat_GF2& B);
+void mul(mat_GF2& X, long a, const mat_GF2& B);
+// X = a * B
+
+void determinant(GF2& d, const mat_GF2& A);
+GF2 determinant(const mat_GF2& A);
+// d =  determinant of A
+
+void transpose(mat_GF2& X, const mat_GF2& A);
+mat_GF2 transpose(const mat_GF2& A);
+// X = transpose of A
+
+void solve(GF2& d, vec_GF2& x, const mat_GF2& A, const vec_GF2& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves x*A = b.
+
+void solve(GF2& d, const mat_GF2& A, vec_GF2& x, const vec_GF2& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves A*x = b (so x and b are treated as a column vectors).
+
+void inv(GF2& d, mat_GF2& X, const mat_GF2& A);
+// A is an n x n matrix.  Computes d = det(A).  If d != 0,
+// computes X = A^{-1}.
+
+void sqr(mat_GF2& X, const mat_GF2& A);
+mat_GF2 sqr(const mat_GF2& A);
+// X = A*A  
+
+void inv(mat_GF2& X, const mat_GF2& A);
+mat_GF2 inv(const mat_GF2& A);
+// X = A^{-1}; error is raised if A is  singular
+
+void power(mat_GF2& X, const mat_GF2& A, const ZZ& e);
+mat_GF2 power(const mat_GF2& A, const ZZ& e);
+
+void power(mat_GF2& X, const mat_GF2& A, long e);
+mat_GF2 power(const mat_GF2& A, long e);
+// X = A^e; e may be negative (in which case A must be nonsingular).
+
+
+void ident(mat_GF2& X, long n);
+mat_GF2 ident_mat_GF2(long n);
+// X = n x n identity matrix
+
+long IsIdent(const mat_GF2& A, long n);
+// test if A is n x n identity matrix
+
+
+void diag(mat_GF2& X, long n, GF2 d);
+mat_GF2 diag(long n, GF2 d);
+// X = n x n diagonal matrix with diagonal element d
+
+long IsDiag(const mat_GF2& A, long n, long d);
+// test if X is an n x n diagonal matrix with diagonal element (d mod 2)
+
+
+long gauss(mat_GF2& M);
+long gauss(mat_GF2& M, long w);
+// Performs unitary row operations so as to bring M into row echelon
+// form.  If the optional argument w is supplied, stops when first w
+// columns are in echelon form.  The return value is the rank (or the
+// rank of the first w columns).
+
+void image(mat_GF2& X, const mat_GF2& A);
+// The rows of X are computed as basis of A's row space.  X is is row
+// echelon form
+
+
+void kernel(mat_GF2& X, const mat_GF2& A);
+// Computes a basis for the kernel of the map x -> x*A. where x is a
+// row vector.
+
+// miscellaneous:
+
+
+void clear(mat_GF2& X);
+// X = 0 (dimension unchanged)
+
+long IsZero(const mat_GF2& A);
+// test if A is the zero matrix (any dimension)
+
+
+// arithmetic operator notation:
+
+mat_GF2 operator+(const mat_GF2& a, const mat_GF2& b);
+mat_GF2 operator-(const mat_GF2& a, const mat_GF2& b);
+mat_GF2 operator*(const mat_GF2& a, const mat_GF2& b);
+
+mat_GF2 operator-(const mat_GF2& a);
+
+
+// matrix/scalar multiplication:
+
+mat_GF2 operator*(const mat_GF2& a, GF2 b);
+mat_GF2 operator*(const mat_GF2& a, long b);
+
+mat_GF2 operator*(GF2 a, const mat_GF2& b);
+mat_GF2 operator*(long a, const mat_GF2& b);
+
+// matrix/vector multiplication:
+
+vec_GF2 operator*(const mat_GF2& a, const vec_GF2& b);
+
+vec_GF2 operator*(const vec_GF2& a, const mat_GF2& b);
+
+
+// assignment operator notation:
+
+mat_GF2& operator+=(mat_GF2& x, const mat_GF2& a);
+mat_GF2& operator-=(mat_GF2& x, const mat_GF2& a);
+mat_GF2& operator*=(mat_GF2& x, const mat_GF2& a);
+
+mat_GF2& operator*=(mat_GF2& x, GF2 a);
+mat_GF2& operator*=(mat_GF2& x, long a);
+
+vec_GF2& operator*=(vec_GF2& x, const mat_GF2& a);
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_GF2.txt b/thirdparty/linux/ntl/doc/mat_GF2.txt new file mode 100644 index 0000000000..dd3791d738 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_GF2.txt @@ -0,0 +1,167 @@ + +/**************************************************************************\ + +MODULE: mat_GF2 + +SUMMARY: + +Defines the class mat_GF2. + +\**************************************************************************/ + + +#include +#include + + +typedef Mat mat_GF2; // backward compatibility + + +void conv(mat_GF2& X, const vec_vec_GF2& A); +mat_GF2 to_mat_GF2(const vec_vec_GF2& A); +// convert a vector of vec_GF2's to a matrix + + +// procedural arithmetic routines: + +void add(mat_GF2& X, const mat_GF2& A, const mat_GF2& B); +// X = A + B + +void sub(mat_GF2& X, const mat_GF2& A, const mat_GF2& B); +// X = A - B = A + B + +void negate(mat_GF2& X, const mat_GF2& A); +// X = -A = A + +void mul(mat_GF2& X, const mat_GF2& A, const mat_GF2& B); +// X = A * B + +void mul(vec_GF2& x, const mat_GF2& A, const vec_GF2& b); +// x = A * b + +void mul(vec_GF2& x, const vec_GF2& a, const mat_GF2& B); +// x = a * B + + +void mul(mat_GF2& X, const mat_GF2& A, GF2 b); +void mul(mat_GF2& X, const mat_GF2& A, long b); +// X = A * b + +void mul(mat_GF2& X, GF2 a, const mat_GF2& B); +void mul(mat_GF2& X, long a, const mat_GF2& B); +// X = a * B + +void determinant(GF2& d, const mat_GF2& A); +GF2 determinant(const mat_GF2& A); +// d = determinant of A + +void transpose(mat_GF2& X, const mat_GF2& A); +mat_GF2 transpose(const mat_GF2& A); +// X = transpose of A + +void solve(GF2& d, vec_GF2& x, const mat_GF2& A, const vec_GF2& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves x*A = b. + +void solve(GF2& d, const mat_GF2& A, vec_GF2& x, const vec_GF2& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves A*x = b (so x and b are treated as a column vectors). + +void inv(GF2& d, mat_GF2& X, const mat_GF2& A); +// A is an n x n matrix. Computes d = det(A). If d != 0, +// computes X = A^{-1}. + +void sqr(mat_GF2& X, const mat_GF2& A); +mat_GF2 sqr(const mat_GF2& A); +// X = A*A + +void inv(mat_GF2& X, const mat_GF2& A); +mat_GF2 inv(const mat_GF2& A); +// X = A^{-1}; error is raised if A is singular + +void power(mat_GF2& X, const mat_GF2& A, const ZZ& e); +mat_GF2 power(const mat_GF2& A, const ZZ& e); + +void power(mat_GF2& X, const mat_GF2& A, long e); +mat_GF2 power(const mat_GF2& A, long e); +// X = A^e; e may be negative (in which case A must be nonsingular). + + +void ident(mat_GF2& X, long n); +mat_GF2 ident_mat_GF2(long n); +// X = n x n identity matrix + +long IsIdent(const mat_GF2& A, long n); +// test if A is n x n identity matrix + + +void diag(mat_GF2& X, long n, GF2 d); +mat_GF2 diag(long n, GF2 d); +// X = n x n diagonal matrix with diagonal element d + +long IsDiag(const mat_GF2& A, long n, long d); +// test if X is an n x n diagonal matrix with diagonal element (d mod 2) + + +long gauss(mat_GF2& M); +long gauss(mat_GF2& M, long w); +// Performs unitary row operations so as to bring M into row echelon +// form. If the optional argument w is supplied, stops when first w +// columns are in echelon form. The return value is the rank (or the +// rank of the first w columns). + +void image(mat_GF2& X, const mat_GF2& A); +// The rows of X are computed as basis of A's row space. X is is row +// echelon form + + +void kernel(mat_GF2& X, const mat_GF2& A); +// Computes a basis for the kernel of the map x -> x*A. where x is a +// row vector. + +// miscellaneous: + + +void clear(mat_GF2& X); +// X = 0 (dimension unchanged) + +long IsZero(const mat_GF2& A); +// test if A is the zero matrix (any dimension) + + +// arithmetic operator notation: + +mat_GF2 operator+(const mat_GF2& a, const mat_GF2& b); +mat_GF2 operator-(const mat_GF2& a, const mat_GF2& b); +mat_GF2 operator*(const mat_GF2& a, const mat_GF2& b); + +mat_GF2 operator-(const mat_GF2& a); + + +// matrix/scalar multiplication: + +mat_GF2 operator*(const mat_GF2& a, GF2 b); +mat_GF2 operator*(const mat_GF2& a, long b); + +mat_GF2 operator*(GF2 a, const mat_GF2& b); +mat_GF2 operator*(long a, const mat_GF2& b); + +// matrix/vector multiplication: + +vec_GF2 operator*(const mat_GF2& a, const vec_GF2& b); + +vec_GF2 operator*(const vec_GF2& a, const mat_GF2& b); + + +// assignment operator notation: + +mat_GF2& operator+=(mat_GF2& x, const mat_GF2& a); +mat_GF2& operator-=(mat_GF2& x, const mat_GF2& a); +mat_GF2& operator*=(mat_GF2& x, const mat_GF2& a); + +mat_GF2& operator*=(mat_GF2& x, GF2 a); +mat_GF2& operator*=(mat_GF2& x, long a); + +vec_GF2& operator*=(vec_GF2& x, const mat_GF2& a); + + diff --git a/thirdparty/linux/ntl/doc/mat_GF2E.cpp.html b/thirdparty/linux/ntl/doc/mat_GF2E.cpp.html new file mode 100644 index 0000000000..9791a23c50 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_GF2E.cpp.html @@ -0,0 +1,179 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_GF2E.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_GF2E
+
+SUMMARY:
+
+Defines the class mat_GF2E.
+
+\**************************************************************************/
+
+
+#include <NTL/matrix.h>
+#include <NTL/vec_vec_GF2E.h>
+
+
+typedef Mat<GF2E> mat_GF2E; // backward compatibility
+
+
+void add(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B);
+// X = A + B
+
+void sub(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B);
+// X = A - B = A + B
+
+void negate(mat_GF2E& X, const mat_GF2E& A);
+// X = - A  = A
+
+void mul(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B);
+// X = A * B
+
+void mul(vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b);
+// x = A * b
+
+void mul(vec_GF2E& x, const vec_GF2E& a, const mat_GF2E& B);
+// x = a * B
+
+void mul(mat_GF2E& X, const mat_GF2E& A, const GF2E& b);
+void mul(mat_GF2E& X, const mat_GF2E& A, GF2 b);
+void mul(mat_GF2E& X, const mat_GF2E& A, long b);
+// X = A * b
+
+void mul(mat_GF2E& X, const GF2E& a, const mat_GF2E& B);
+void mul(mat_GF2E& X, GF2 a, const mat_GF2E& B);
+void mul(mat_GF2E& X, long a, const mat_GF2E& B);
+// X = a * B
+
+
+
+void determinant(GF2E& d, const mat_GF2E& A);
+GF2E determinant(const mat_GF2E& a);
+// d = determinant(A)
+
+
+void transpose(mat_GF2E& X, const mat_GF2E& A);
+mat_GF2E transpose(const mat_GF2E& A);
+// X = transpose of A
+
+void solve(GF2E& d, vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves x*A = b.
+
+void solve(GF2E& d, const mat_GF2E& A, vec_GF2E& x, const vec_GF2E& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves A*x = b (so x and b are treated as a column vectors).
+
+void inv(GF2E& d, mat_GF2E& X, const mat_GF2E& A);
+// A is an n x n matrix.  Computes d = determinant(A).  If d != 0,
+// computes X = A^{-1}.
+
+void sqr(mat_GF2E& X, const mat_GF2E& A);
+mat_GF2E sqr(const mat_GF2E& A);
+// X = A*A  
+
+void inv(mat_GF2E& X, const mat_GF2E& A);
+mat_GF2E inv(const mat_GF2E& A);
+// X = A^{-1}; error is raised if A is  singular
+
+void power(mat_GF2E& X, const mat_GF2E& A, const ZZ& e);
+mat_GF2E power(const mat_GF2E& A, const ZZ& e);
+
+void power(mat_GF2E& X, const mat_GF2E& A, long e);
+mat_GF2E power(const mat_GF2E& A, long e);
+// X = A^e; e may be negative (in which case A must be nonsingular).
+
+
+void ident(mat_GF2E& X, long n);
+mat_GF2E ident_mat_GF2E(long n);
+// X = n x n identity matrix
+
+long IsIdent(const mat_GF2E& A, long n);
+// test if A is the n x n identity matrix
+
+void diag(mat_GF2E& X, long n, const GF2E& d);
+mat_GF2E diag(long n, const GF2E& d);
+// X = n x n diagonal matrix with d on diagonal
+
+long IsDiag(const mat_GF2E& A, long n, const GF2E& d);
+// test if X is an  n x n diagonal matrix with d on diagonal
+
+
+
+
+long gauss(mat_GF2E& M);
+long gauss(mat_GF2E& M, long w);
+// Performs unitary row operations so as to bring M into row echelon
+// form.  If the optional argument w is supplied, stops when first w
+// columns are in echelon form.  The return value is the rank (or the
+// rank of the first w columns).
+
+void image(mat_GF2E& X, const mat_GF2E& A);
+// The rows of X are computed as basis of A's row space.  X is is row
+// echelon form
+
+void kernel(mat_GF2E& X, const mat_GF2E& A);
+// Computes a basis for the kernel of the map x -> x*A. where x is a
+// row vector.
+
+
+
+
+// miscellaneous:
+
+void clear(mat_GF2E& a);
+// x = 0 (dimension unchanged)
+
+long IsZero(const mat_GF2E& a);
+// test if a is the zero matrix (any dimension)
+
+
+// operator notation:
+
+mat_GF2E operator+(const mat_GF2E& a, const mat_GF2E& b);
+mat_GF2E operator-(const mat_GF2E& a, const mat_GF2E& b);
+mat_GF2E operator*(const mat_GF2E& a, const mat_GF2E& b);
+
+mat_GF2E operator-(const mat_GF2E& a);
+
+
+// matrix/scalar multiplication:
+
+mat_GF2E operator*(const mat_GF2E& a, const GF2E& b);
+mat_GF2E operator*(const mat_GF2E& a, GF2 b);
+mat_GF2E operator*(const mat_GF2E& a, long b);
+
+mat_GF2E operator*(const GF2E& a, const mat_GF2E& b);
+mat_GF2E operator*(GF2 a, const mat_GF2E& b);
+mat_GF2E operator*(long a, const mat_GF2E& b);
+
+// matrix/vector multiplication:
+
+vec_GF2E operator*(const mat_GF2E& a, const vec_GF2E& b);
+
+vec_GF2E operator*(const vec_GF2E& a, const mat_GF2E& b);
+
+
+// assignment operator notation:
+
+mat_GF2E& operator+=(mat_GF2E& x, const mat_GF2E& a);
+mat_GF2E& operator-=(mat_GF2E& x, const mat_GF2E& a);
+mat_GF2E& operator*=(mat_GF2E& x, const mat_GF2E& a);
+
+mat_GF2E& operator*=(mat_GF2E& x, const GF2E& a);
+mat_GF2E& operator*=(mat_GF2E& x, GF2 a);
+mat_GF2E& operator*=(mat_GF2E& x, long a);
+
+vec_GF2E& operator*=(vec_GF2E& x, const mat_GF2E& a);
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_GF2E.txt b/thirdparty/linux/ntl/doc/mat_GF2E.txt new file mode 100644 index 0000000000..e3742e9d41 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_GF2E.txt @@ -0,0 +1,169 @@ + +/**************************************************************************\ + +MODULE: mat_GF2E + +SUMMARY: + +Defines the class mat_GF2E. + +\**************************************************************************/ + + +#include +#include + + +typedef Mat mat_GF2E; // backward compatibility + + +void add(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B); +// X = A + B + +void sub(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B); +// X = A - B = A + B + +void negate(mat_GF2E& X, const mat_GF2E& A); +// X = - A = A + +void mul(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B); +// X = A * B + +void mul(vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b); +// x = A * b + +void mul(vec_GF2E& x, const vec_GF2E& a, const mat_GF2E& B); +// x = a * B + +void mul(mat_GF2E& X, const mat_GF2E& A, const GF2E& b); +void mul(mat_GF2E& X, const mat_GF2E& A, GF2 b); +void mul(mat_GF2E& X, const mat_GF2E& A, long b); +// X = A * b + +void mul(mat_GF2E& X, const GF2E& a, const mat_GF2E& B); +void mul(mat_GF2E& X, GF2 a, const mat_GF2E& B); +void mul(mat_GF2E& X, long a, const mat_GF2E& B); +// X = a * B + + + +void determinant(GF2E& d, const mat_GF2E& A); +GF2E determinant(const mat_GF2E& a); +// d = determinant(A) + + +void transpose(mat_GF2E& X, const mat_GF2E& A); +mat_GF2E transpose(const mat_GF2E& A); +// X = transpose of A + +void solve(GF2E& d, vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves x*A = b. + +void solve(GF2E& d, const mat_GF2E& A, vec_GF2E& x, const vec_GF2E& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves A*x = b (so x and b are treated as a column vectors). + +void inv(GF2E& d, mat_GF2E& X, const mat_GF2E& A); +// A is an n x n matrix. Computes d = determinant(A). If d != 0, +// computes X = A^{-1}. + +void sqr(mat_GF2E& X, const mat_GF2E& A); +mat_GF2E sqr(const mat_GF2E& A); +// X = A*A + +void inv(mat_GF2E& X, const mat_GF2E& A); +mat_GF2E inv(const mat_GF2E& A); +// X = A^{-1}; error is raised if A is singular + +void power(mat_GF2E& X, const mat_GF2E& A, const ZZ& e); +mat_GF2E power(const mat_GF2E& A, const ZZ& e); + +void power(mat_GF2E& X, const mat_GF2E& A, long e); +mat_GF2E power(const mat_GF2E& A, long e); +// X = A^e; e may be negative (in which case A must be nonsingular). + + +void ident(mat_GF2E& X, long n); +mat_GF2E ident_mat_GF2E(long n); +// X = n x n identity matrix + +long IsIdent(const mat_GF2E& A, long n); +// test if A is the n x n identity matrix + +void diag(mat_GF2E& X, long n, const GF2E& d); +mat_GF2E diag(long n, const GF2E& d); +// X = n x n diagonal matrix with d on diagonal + +long IsDiag(const mat_GF2E& A, long n, const GF2E& d); +// test if X is an n x n diagonal matrix with d on diagonal + + + + +long gauss(mat_GF2E& M); +long gauss(mat_GF2E& M, long w); +// Performs unitary row operations so as to bring M into row echelon +// form. If the optional argument w is supplied, stops when first w +// columns are in echelon form. The return value is the rank (or the +// rank of the first w columns). + +void image(mat_GF2E& X, const mat_GF2E& A); +// The rows of X are computed as basis of A's row space. X is is row +// echelon form + +void kernel(mat_GF2E& X, const mat_GF2E& A); +// Computes a basis for the kernel of the map x -> x*A. where x is a +// row vector. + + + + +// miscellaneous: + +void clear(mat_GF2E& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_GF2E& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_GF2E operator+(const mat_GF2E& a, const mat_GF2E& b); +mat_GF2E operator-(const mat_GF2E& a, const mat_GF2E& b); +mat_GF2E operator*(const mat_GF2E& a, const mat_GF2E& b); + +mat_GF2E operator-(const mat_GF2E& a); + + +// matrix/scalar multiplication: + +mat_GF2E operator*(const mat_GF2E& a, const GF2E& b); +mat_GF2E operator*(const mat_GF2E& a, GF2 b); +mat_GF2E operator*(const mat_GF2E& a, long b); + +mat_GF2E operator*(const GF2E& a, const mat_GF2E& b); +mat_GF2E operator*(GF2 a, const mat_GF2E& b); +mat_GF2E operator*(long a, const mat_GF2E& b); + +// matrix/vector multiplication: + +vec_GF2E operator*(const mat_GF2E& a, const vec_GF2E& b); + +vec_GF2E operator*(const vec_GF2E& a, const mat_GF2E& b); + + +// assignment operator notation: + +mat_GF2E& operator+=(mat_GF2E& x, const mat_GF2E& a); +mat_GF2E& operator-=(mat_GF2E& x, const mat_GF2E& a); +mat_GF2E& operator*=(mat_GF2E& x, const mat_GF2E& a); + +mat_GF2E& operator*=(mat_GF2E& x, const GF2E& a); +mat_GF2E& operator*=(mat_GF2E& x, GF2 a); +mat_GF2E& operator*=(mat_GF2E& x, long a); + +vec_GF2E& operator*=(vec_GF2E& x, const mat_GF2E& a); + + diff --git a/thirdparty/linux/ntl/doc/mat_RR.cpp.html b/thirdparty/linux/ntl/doc/mat_RR.cpp.html new file mode 100644 index 0000000000..7ac1c5d20d --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_RR.cpp.html @@ -0,0 +1,152 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_RR.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_RR
+
+SUMMARY:
+
+Defines the class mat_RR.
+
+\**************************************************************************/
+
+
+#include <NTL/matrix.h>
+#include <NTL/vec_vec_RR.h>
+
+typedef Mat<RR> mat_RR; // backward compatibility
+
+void add(mat_RR& X, const mat_RR& A, const mat_RR& B);
+// X = A + B
+
+void sub(mat_RR& X, const mat_RR& A, const mat_RR& B);
+// X = A - B
+
+void negate(mat_RR& X, const mat_RR& A);
+// X = - A
+
+void mul(mat_RR& X, const mat_RR& A, const mat_RR& B);
+// X = A * B
+
+void mul(vec_RR& x, const mat_RR& A, const vec_RR& b);
+// x = A * b
+
+void mul(vec_RR& x, const vec_RR& a, const mat_RR& B);
+// x = a * B
+
+void mul(mat_RR& X, const mat_RR& A, const RR& b);
+void mul(mat_RR& X, const mat_RR& A, double b);
+// X = A * b
+
+void mul(mat_RR& X, const RR& a, const mat_RR& B);
+void mul(mat_RR& X, double a, const mat_RR& B);
+// X = a * B
+
+
+void determinant(RR& d, const mat_RR& A);
+RR determinant(const mat_RR& A);
+// d = determinant(A)
+
+
+void transpose(mat_RR& X, const mat_RR& A);
+mat_RR transpose(const mat_RR& A);
+// X = transpose of A
+
+void solve(RR& d, vec_RR& X,
+           const mat_RR& A, const vec_RR& b);
+// A is an n x n matrix, b is a length n vector.  Computes d =
+// determinant(A).  If d != 0, solves x*A = b.
+
+void inv(RR& d, mat_RR& X, const mat_RR& A);
+// A is an n x n matrix.  Computes d = determinant(A).  If d != 0,
+// computes X = A^{-1}.
+
+void sqr(mat_RR& X, const mat_RR& A);
+mat_RR sqr(const mat_RR& A);
+// X = A*A
+
+void inv(mat_RR& X, const mat_RR& A);
+mat_RR inv(const mat_RR& A);
+// X = A^{-1}; error is raised if A is  singular
+
+void power(mat_RR& X, const mat_RR& A, const ZZ& e);
+mat_RR power(const mat_RR& A, const ZZ& e);
+
+void power(mat_RR& X, const mat_RR& A, long e);
+mat_RR power(const mat_RR& A, long e);
+// X = A^e; e may be negative (in which case A must be nonsingular).
+
+void ident(mat_RR& X, long n);
+mat_RR ident_mat_RR(long n);
+// X = n x n identity matrix
+
+long IsIdent(const mat_RR& A, long n);
+// test if A is the n x n identity matrix
+
+void diag(mat_RR& X, long n, const RR& d);
+mat_RR diag(long n, const RR& d);
+// X = n x n diagonal matrix with d on diagonal
+
+long IsDiag(const mat_RR& A, long n, const RR& d);
+// test if X is an  n x n diagonal matrix with d on diagonal
+
+
+
+
+
+// miscellaneous:
+
+void clear(mat_RR& a);
+// x = 0 (dimension unchanged)
+
+long IsZero(const mat_RR& a);
+// test if a is the zero matrix (any dimension)
+
+
+// operator notation:
+
+mat_RR operator+(const mat_RR& a, const mat_RR& b);
+mat_RR operator-(const mat_RR& a, const mat_RR& b);
+mat_RR operator*(const mat_RR& a, const mat_RR& b);
+
+mat_RR operator-(const mat_RR& a);
+
+
+// matrix/scalar multiplication:
+
+mat_RR operator*(const mat_RR& a, const RR& b);
+mat_RR operator*(const mat_RR& a, double b);
+
+mat_RR operator*(const RR& a, const mat_RR& b);
+mat_RR operator*(double a, const mat_RR& b);
+
+
+// matrix/vector multiplication:
+
+vec_RR operator*(const mat_RR& a, const vec_RR& b);
+
+vec_RR operator*(const vec_RR& a, const mat_RR& b);
+
+
+// assignment operator notation:
+
+mat_RR& operator+=(mat_RR& x, const mat_RR& a);
+mat_RR& operator-=(mat_RR& x, const mat_RR& a);
+mat_RR& operator*=(mat_RR& x, const mat_RR& a);
+
+mat_RR& operator*=(mat_RR& x, const RR& a);
+mat_RR& operator*=(mat_RR& x, double a);
+
+vec_RR& operator*=(vec_RR& x, const mat_RR& a);
+
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_RR.txt b/thirdparty/linux/ntl/doc/mat_RR.txt new file mode 100644 index 0000000000..efd260a1dd --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_RR.txt @@ -0,0 +1,142 @@ + +/**************************************************************************\ + +MODULE: mat_RR + +SUMMARY: + +Defines the class mat_RR. + +\**************************************************************************/ + + +#include +#include + +typedef Mat mat_RR; // backward compatibility + +void add(mat_RR& X, const mat_RR& A, const mat_RR& B); +// X = A + B + +void sub(mat_RR& X, const mat_RR& A, const mat_RR& B); +// X = A - B + +void negate(mat_RR& X, const mat_RR& A); +// X = - A + +void mul(mat_RR& X, const mat_RR& A, const mat_RR& B); +// X = A * B + +void mul(vec_RR& x, const mat_RR& A, const vec_RR& b); +// x = A * b + +void mul(vec_RR& x, const vec_RR& a, const mat_RR& B); +// x = a * B + +void mul(mat_RR& X, const mat_RR& A, const RR& b); +void mul(mat_RR& X, const mat_RR& A, double b); +// X = A * b + +void mul(mat_RR& X, const RR& a, const mat_RR& B); +void mul(mat_RR& X, double a, const mat_RR& B); +// X = a * B + + +void determinant(RR& d, const mat_RR& A); +RR determinant(const mat_RR& A); +// d = determinant(A) + + +void transpose(mat_RR& X, const mat_RR& A); +mat_RR transpose(const mat_RR& A); +// X = transpose of A + +void solve(RR& d, vec_RR& X, + const mat_RR& A, const vec_RR& b); +// A is an n x n matrix, b is a length n vector. Computes d = +// determinant(A). If d != 0, solves x*A = b. + +void inv(RR& d, mat_RR& X, const mat_RR& A); +// A is an n x n matrix. Computes d = determinant(A). If d != 0, +// computes X = A^{-1}. + +void sqr(mat_RR& X, const mat_RR& A); +mat_RR sqr(const mat_RR& A); +// X = A*A + +void inv(mat_RR& X, const mat_RR& A); +mat_RR inv(const mat_RR& A); +// X = A^{-1}; error is raised if A is singular + +void power(mat_RR& X, const mat_RR& A, const ZZ& e); +mat_RR power(const mat_RR& A, const ZZ& e); + +void power(mat_RR& X, const mat_RR& A, long e); +mat_RR power(const mat_RR& A, long e); +// X = A^e; e may be negative (in which case A must be nonsingular). + +void ident(mat_RR& X, long n); +mat_RR ident_mat_RR(long n); +// X = n x n identity matrix + +long IsIdent(const mat_RR& A, long n); +// test if A is the n x n identity matrix + +void diag(mat_RR& X, long n, const RR& d); +mat_RR diag(long n, const RR& d); +// X = n x n diagonal matrix with d on diagonal + +long IsDiag(const mat_RR& A, long n, const RR& d); +// test if X is an n x n diagonal matrix with d on diagonal + + + + + +// miscellaneous: + +void clear(mat_RR& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_RR& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_RR operator+(const mat_RR& a, const mat_RR& b); +mat_RR operator-(const mat_RR& a, const mat_RR& b); +mat_RR operator*(const mat_RR& a, const mat_RR& b); + +mat_RR operator-(const mat_RR& a); + + +// matrix/scalar multiplication: + +mat_RR operator*(const mat_RR& a, const RR& b); +mat_RR operator*(const mat_RR& a, double b); + +mat_RR operator*(const RR& a, const mat_RR& b); +mat_RR operator*(double a, const mat_RR& b); + + +// matrix/vector multiplication: + +vec_RR operator*(const mat_RR& a, const vec_RR& b); + +vec_RR operator*(const vec_RR& a, const mat_RR& b); + + +// assignment operator notation: + +mat_RR& operator+=(mat_RR& x, const mat_RR& a); +mat_RR& operator-=(mat_RR& x, const mat_RR& a); +mat_RR& operator*=(mat_RR& x, const mat_RR& a); + +mat_RR& operator*=(mat_RR& x, const RR& a); +mat_RR& operator*=(mat_RR& x, double a); + +vec_RR& operator*=(vec_RR& x, const mat_RR& a); + + + diff --git a/thirdparty/linux/ntl/doc/mat_ZZ.cpp.html b/thirdparty/linux/ntl/doc/mat_ZZ.cpp.html new file mode 100644 index 0000000000..ab40a51602 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_ZZ.cpp.html @@ -0,0 +1,187 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_ZZ.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_ZZ
+
+SUMMARY:
+
+Defines the class mat_ZZ.
+
+\**************************************************************************/
+
+
+#include <NTL/matrix.h>
+#include <NTL/vec_vec_ZZ.h>
+
+typedef Mat<ZZ> mat_ZZ; // backward compatibility
+
+void add(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B);
+// X = A + B
+
+void sub(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B);
+// X = A - B
+
+void negate(mat_ZZ& X, const mat_ZZ& A);
+// X = - A
+
+void mul(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B);
+// X = A * B
+
+void mul(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& b);
+// x = A * b
+
+void mul(vec_ZZ& x, const vec_ZZ& a, const mat_ZZ& B);
+// x = a * B
+
+void mul(mat_ZZ& X, const mat_ZZ& A, const ZZ& b);
+void mul(mat_ZZ& X, const mat_ZZ& A, long b);
+// X = A * b
+
+void mul(mat_ZZ& X, const ZZ& a, const mat_ZZ& B);
+void mul(mat_ZZ& X, long a, const mat_ZZ& B);
+// X = a * B
+
+
+
+void determinant(ZZ& d, const mat_ZZ& A, long deterministic=0);
+ZZ determinant(const mat_ZZ& a, long deterministic=0);
+// d = determinant(A).  If !deterministic, a randomized strategy may
+// be used that errs with probability at most 2^{-80}.
+
+
+
+void solve(ZZ& d, vec_ZZ& x,
+           const mat_ZZ& A, const vec_ZZ& b,
+           long deterministic=0)
+// computes d = determinant(A) and solves x*A = b*d if d != 0; A must
+// be a square matrix and have compatible dimensions with b.  If
+// !deterministic, the computation of d may use a randomized strategy
+// that errs with probability 2^{-80}.
+
+
+
+void solve1(ZZ& d, vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& b);
+// A must be a square matrix.
+// If A is singular, this routine sets d = 0 and returns.
+// Otherwise, it computes d, x such that x*A == b*d,
+// such that d > 0 and minimal.
+// Note that d is a positive divisor of the determinant,
+// and is not in general equal to the determinant.
+// The routine is deterministic, and uses a Hensel lifting strategy.
+
+// For backward compatability, there is also a routine called
+// HenselSolve1 that simply calls solve1.
+
+
+void inv(ZZ& d, mat_ZZ& X, const mat_ZZ& A, long deterministic=0);
+// computes d = determinant(A) and solves X*A = I*d if d != 0; A must
+// be a square matrix.  If !deterministic, the computation of d may
+// use a randomized strategy that errs with probability 2^{-80}.
+
+
+// NOTE:  See LLL.txt for routines that compute the kernel and
+// image of an integer matrix.
+
+// NOTE: See HNF.txt for a routine that computes Hermite Normal Forms.
+
+void sqr(mat_ZZ& X, const mat_ZZ& A);
+mat_ZZ sqr(const mat_ZZ& A);
+// X = A*A  
+
+void inv(mat_ZZ& X, const mat_ZZ& A);
+mat_ZZ inv(const mat_ZZ& A);
+// X = A^{-1}; error is raised if |det(A)| != 1.
+
+void power(mat_ZZ& X, const mat_ZZ& A, const ZZ& e);
+mat_ZZ power(const mat_ZZ& A, const ZZ& e);
+
+void power(mat_ZZ& X, const mat_ZZ& A, long e);
+mat_ZZ power(const mat_ZZ& A, long e);
+// X = A^e; e may be negative (in which case A must be nonsingular).
+
+
+
+void ident(mat_ZZ& X, long n);
+mat_ZZ ident_mat_ZZ(long n);
+// X = n x n identity matrix
+
+long IsIdent(const mat_ZZ& A, long n);
+// test if A is the n x n identity matrix
+
+void diag(mat_ZZ& X, long n, const ZZ& d);
+mat_ZZ diag(long n, const ZZ& d);
+// X = n x n diagonal matrix with d on diagonal
+
+long IsDiag(const mat_ZZ& A, long n, const ZZ& d);
+// test if X is an  n x n diagonal matrix with d on diagonal
+
+
+void transpose(mat_ZZ& X, const mat_ZZ& A);
+mat_ZZ transpose(const mat_ZZ& A);
+// X = transpose of A
+
+
+long CRT(mat_ZZ& a, ZZ& prod, const mat_zz_p& A);
+// Incremental Chinese Remaindering: If p is the current zz_p modulus with
+// (p, prod) = 1; Computes a' such that a' = a mod prod and a' = A mod p,
+// with coefficients in the interval (-p*prod/2, p*prod/2];
+// Sets a := a', prod := p*prod, and returns 1 if a's value changed.
+
+
+
+// miscellaneous:
+
+void clear(mat_ZZ& a);
+// x = 0 (dimension unchanged)
+
+long IsZero(const mat_ZZ& a);
+// test if a is the zero matrix (any dimension)
+
+
+// operator notation:
+
+mat_ZZ operator+(const mat_ZZ& a, const mat_ZZ& b);
+mat_ZZ operator-(const mat_ZZ& a, const mat_ZZ& b);
+mat_ZZ operator*(const mat_ZZ& a, const mat_ZZ& b);
+
+mat_ZZ operator-(const mat_ZZ& a);
+
+
+// matrix/scalar multiplication:
+
+mat_ZZ operator*(const mat_ZZ& a, const ZZ& b);
+mat_ZZ operator*(const mat_ZZ& a, long b);
+
+mat_ZZ operator*(const ZZ& a, const mat_ZZ& b);
+mat_ZZ operator*(long a, const mat_ZZ& b);
+
+// matrix/vector multiplication:
+
+vec_ZZ operator*(const mat_ZZ& a, const vec_ZZ& b);
+
+vec_ZZ operator*(const vec_ZZ& a, const mat_ZZ& b);
+
+
+
+// assignment operator notation:
+
+mat_ZZ& operator+=(mat_ZZ& x, const mat_ZZ& a);
+mat_ZZ& operator-=(mat_ZZ& x, const mat_ZZ& a);
+mat_ZZ& operator*=(mat_ZZ& x, const mat_ZZ& a);
+
+mat_ZZ& operator*=(mat_ZZ& x, const ZZ& a);
+mat_ZZ& operator*=(mat_ZZ& x, long a);
+
+vec_ZZ& operator*=(vec_ZZ& x, const mat_ZZ& a);
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_ZZ.txt b/thirdparty/linux/ntl/doc/mat_ZZ.txt new file mode 100644 index 0000000000..20dcea8f45 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_ZZ.txt @@ -0,0 +1,177 @@ + +/**************************************************************************\ + +MODULE: mat_ZZ + +SUMMARY: + +Defines the class mat_ZZ. + +\**************************************************************************/ + + +#include +#include + +typedef Mat mat_ZZ; // backward compatibility + +void add(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B); +// X = A + B + +void sub(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B); +// X = A - B + +void negate(mat_ZZ& X, const mat_ZZ& A); +// X = - A + +void mul(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B); +// X = A * B + +void mul(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& b); +// x = A * b + +void mul(vec_ZZ& x, const vec_ZZ& a, const mat_ZZ& B); +// x = a * B + +void mul(mat_ZZ& X, const mat_ZZ& A, const ZZ& b); +void mul(mat_ZZ& X, const mat_ZZ& A, long b); +// X = A * b + +void mul(mat_ZZ& X, const ZZ& a, const mat_ZZ& B); +void mul(mat_ZZ& X, long a, const mat_ZZ& B); +// X = a * B + + + +void determinant(ZZ& d, const mat_ZZ& A, long deterministic=0); +ZZ determinant(const mat_ZZ& a, long deterministic=0); +// d = determinant(A). If !deterministic, a randomized strategy may +// be used that errs with probability at most 2^{-80}. + + + +void solve(ZZ& d, vec_ZZ& x, + const mat_ZZ& A, const vec_ZZ& b, + long deterministic=0) +// computes d = determinant(A) and solves x*A = b*d if d != 0; A must +// be a square matrix and have compatible dimensions with b. If +// !deterministic, the computation of d may use a randomized strategy +// that errs with probability 2^{-80}. + + + +void solve1(ZZ& d, vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& b); +// A must be a square matrix. +// If A is singular, this routine sets d = 0 and returns. +// Otherwise, it computes d, x such that x*A == b*d, +// such that d > 0 and minimal. +// Note that d is a positive divisor of the determinant, +// and is not in general equal to the determinant. +// The routine is deterministic, and uses a Hensel lifting strategy. + +// For backward compatability, there is also a routine called +// HenselSolve1 that simply calls solve1. + + +void inv(ZZ& d, mat_ZZ& X, const mat_ZZ& A, long deterministic=0); +// computes d = determinant(A) and solves X*A = I*d if d != 0; A must +// be a square matrix. If !deterministic, the computation of d may +// use a randomized strategy that errs with probability 2^{-80}. + + +// NOTE: See LLL.txt for routines that compute the kernel and +// image of an integer matrix. + +// NOTE: See HNF.txt for a routine that computes Hermite Normal Forms. + +void sqr(mat_ZZ& X, const mat_ZZ& A); +mat_ZZ sqr(const mat_ZZ& A); +// X = A*A + +void inv(mat_ZZ& X, const mat_ZZ& A); +mat_ZZ inv(const mat_ZZ& A); +// X = A^{-1}; error is raised if |det(A)| != 1. + +void power(mat_ZZ& X, const mat_ZZ& A, const ZZ& e); +mat_ZZ power(const mat_ZZ& A, const ZZ& e); + +void power(mat_ZZ& X, const mat_ZZ& A, long e); +mat_ZZ power(const mat_ZZ& A, long e); +// X = A^e; e may be negative (in which case A must be nonsingular). + + + +void ident(mat_ZZ& X, long n); +mat_ZZ ident_mat_ZZ(long n); +// X = n x n identity matrix + +long IsIdent(const mat_ZZ& A, long n); +// test if A is the n x n identity matrix + +void diag(mat_ZZ& X, long n, const ZZ& d); +mat_ZZ diag(long n, const ZZ& d); +// X = n x n diagonal matrix with d on diagonal + +long IsDiag(const mat_ZZ& A, long n, const ZZ& d); +// test if X is an n x n diagonal matrix with d on diagonal + + +void transpose(mat_ZZ& X, const mat_ZZ& A); +mat_ZZ transpose(const mat_ZZ& A); +// X = transpose of A + + +long CRT(mat_ZZ& a, ZZ& prod, const mat_zz_p& A); +// Incremental Chinese Remaindering: If p is the current zz_p modulus with +// (p, prod) = 1; Computes a' such that a' = a mod prod and a' = A mod p, +// with coefficients in the interval (-p*prod/2, p*prod/2]; +// Sets a := a', prod := p*prod, and returns 1 if a's value changed. + + + +// miscellaneous: + +void clear(mat_ZZ& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_ZZ& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_ZZ operator+(const mat_ZZ& a, const mat_ZZ& b); +mat_ZZ operator-(const mat_ZZ& a, const mat_ZZ& b); +mat_ZZ operator*(const mat_ZZ& a, const mat_ZZ& b); + +mat_ZZ operator-(const mat_ZZ& a); + + +// matrix/scalar multiplication: + +mat_ZZ operator*(const mat_ZZ& a, const ZZ& b); +mat_ZZ operator*(const mat_ZZ& a, long b); + +mat_ZZ operator*(const ZZ& a, const mat_ZZ& b); +mat_ZZ operator*(long a, const mat_ZZ& b); + +// matrix/vector multiplication: + +vec_ZZ operator*(const mat_ZZ& a, const vec_ZZ& b); + +vec_ZZ operator*(const vec_ZZ& a, const mat_ZZ& b); + + + +// assignment operator notation: + +mat_ZZ& operator+=(mat_ZZ& x, const mat_ZZ& a); +mat_ZZ& operator-=(mat_ZZ& x, const mat_ZZ& a); +mat_ZZ& operator*=(mat_ZZ& x, const mat_ZZ& a); + +mat_ZZ& operator*=(mat_ZZ& x, const ZZ& a); +mat_ZZ& operator*=(mat_ZZ& x, long a); + +vec_ZZ& operator*=(vec_ZZ& x, const mat_ZZ& a); + + diff --git a/thirdparty/linux/ntl/doc/mat_ZZ_p.cpp.html b/thirdparty/linux/ntl/doc/mat_ZZ_p.cpp.html new file mode 100644 index 0000000000..35c3dd3120 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_ZZ_p.cpp.html @@ -0,0 +1,171 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_ZZ_p.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_ZZ_p
+
+SUMMARY:
+
+Defines the class mat_ZZ_p.
+
+\**************************************************************************/
+
+
+#include <NTL/matrix.h>
+#include <NTL/vec_vec_ZZ_p.h>
+
+
+typedef mat_ZZ_p mat_ZZ_p; // backward compatibility
+
+void add(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B);
+// X = A + B
+
+void sub(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B);
+// X = A - B
+
+void negate(mat_ZZ_p& X, const mat_ZZ_p& A);
+// X = - A
+
+void mul(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B);
+// X = A * B
+
+void mul(vec_ZZ_p& x, const mat_ZZ_p& A, const vec_ZZ_p& b);
+// x = A * b
+
+void mul(vec_ZZ_p& x, const vec_ZZ_p& a, const mat_ZZ_p& B);
+// x = a * B
+
+void mul(mat_ZZ_p& X, const mat_ZZ_p& A, const ZZ_p& b);
+void mul(mat_ZZ_p& X, const mat_ZZ_p& A, long b);
+// X = A * b
+
+void mul(mat_ZZ_p& X, const ZZ_p& a, const mat_ZZ_p& B);
+void mul(mat_ZZ_p& X, long a, const mat_ZZ_p& B);
+// X = a * B
+
+
+void determinant(ZZ_p& d, const mat_ZZ_p& A);
+ZZ_p determinant(const mat_ZZ_p& a);
+// d = determinant(A)
+
+
+void transpose(mat_ZZ_p& X, const mat_ZZ_p& A);
+mat_ZZ_p transpose(const mat_ZZ_p& A);
+// X = transpose of A
+
+void solve(ZZ_p& d, vec_ZZ_p& x, const mat_ZZ_p& A, const vec_ZZ_p& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves x*A = b.
+
+void solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves A*x = b (so x and b are treated as a column vectors).
+
+void inv(ZZ_p& d, mat_ZZ_p& X, const mat_ZZ_p& A);
+// A is an n x n matrix.  Computes d = determinant(A).  If d != 0,
+// computes X = A^{-1}.
+
+void sqr(mat_ZZ_p& X, const mat_ZZ_p& A);
+mat_ZZ_p sqr(const mat_ZZ_p& A);
+// X = A*A  
+
+void inv(mat_ZZ_p& X, const mat_ZZ_p& A);
+mat_ZZ_p inv(const mat_ZZ_p& A);
+// X = A^{-1}; error is raised if A is  singular
+
+void power(mat_ZZ_p& X, const mat_ZZ_p& A, const ZZ& e);
+mat_ZZ_p power(const mat_ZZ_p& A, const ZZ& e);
+
+void power(mat_ZZ_p& X, const mat_ZZ_p& A, long e);
+mat_ZZ_p power(const mat_ZZ_p& A, long e);
+// X = A^e; e may be negative (in which case A must be nonsingular).
+
+void ident(mat_ZZ_p& X, long n);
+mat_ZZ_p ident_mat_ZZ_p(long n);
+// X = n x n identity matrix
+
+long IsIdent(const mat_ZZ_p& A, long n);
+// test if A is the n x n identity matrix
+
+void diag(mat_ZZ_p& X, long n, const ZZ_p& d);
+mat_ZZ_p diag(long n, const ZZ_p& d);
+// X = n x n diagonal matrix with d on diagonal
+
+long IsDiag(const mat_ZZ_p& A, long n, const ZZ_p& d);
+// test if X is an  n x n diagonal matrix with d on diagonal
+
+
+
+
+long gauss(mat_ZZ_p& M);
+long gauss(mat_ZZ_p& M, long w);
+// Performs unitary row operations so as to bring M into row echelon
+// form.  If the optional argument w is supplied, stops when first w
+// columns are in echelon form.  The return value is the rank (or the
+// rank of the first w columns).
+
+void image(mat_ZZ_p& X, const mat_ZZ_p& A);
+// The rows of X are computed as basis of A's row space.  X is is row
+// echelon form
+
+void kernel(mat_ZZ_p& X, const mat_ZZ_p& A);
+// Computes a basis for the kernel of the map x -> x*A. where x is a
+// row vector.
+
+
+
+// miscellaneous:
+
+void clear(mat_ZZ_p& a);
+// x = 0 (dimension unchanged)
+
+long IsZero(const mat_ZZ_p& a);
+// test if a is the zero matrix (any dimension)
+
+
+// operator notation:
+
+mat_ZZ_p operator+(const mat_ZZ_p& a, const mat_ZZ_p& b);
+mat_ZZ_p operator-(const mat_ZZ_p& a, const mat_ZZ_p& b);
+mat_ZZ_p operator*(const mat_ZZ_p& a, const mat_ZZ_p& b);
+
+mat_ZZ_p operator-(const mat_ZZ_p& a);
+
+
+// matrix/scalar multiplication:
+
+mat_ZZ_p operator*(const mat_ZZ_p& a, const ZZ_p& b);
+mat_ZZ_p operator*(const mat_ZZ_p& a, long b);
+
+mat_ZZ_p operator*(const ZZ_p& a, const mat_ZZ_p& b);
+mat_ZZ_p operator*(long a, const mat_ZZ_p& b);
+
+// matrix/vector multiplication:
+
+vec_ZZ_p operator*(const mat_ZZ_p& a, const vec_ZZ_p& b);
+
+vec_ZZ_p operator*(const vec_ZZ_p& a, const mat_ZZ_p& b);
+
+
+// assignment operator notation:
+
+mat_ZZ_p& operator+=(mat_ZZ_p& x, const mat_ZZ_p& a);
+mat_ZZ_p& operator-=(mat_ZZ_p& x, const mat_ZZ_p& a);
+mat_ZZ_p& operator*=(mat_ZZ_p& x, const mat_ZZ_p& a);
+
+mat_ZZ_p& operator*=(mat_ZZ_p& x, const ZZ_p& a);
+mat_ZZ_p& operator*=(mat_ZZ_p& x, long a);
+
+vec_ZZ_p& operator*=(vec_ZZ_p& x, const mat_ZZ_p& a);
+
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_ZZ_p.txt b/thirdparty/linux/ntl/doc/mat_ZZ_p.txt new file mode 100644 index 0000000000..c1d102f70d --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_ZZ_p.txt @@ -0,0 +1,161 @@ + +/**************************************************************************\ + +MODULE: mat_ZZ_p + +SUMMARY: + +Defines the class mat_ZZ_p. + +\**************************************************************************/ + + +#include +#include + + +typedef mat_ZZ_p mat_ZZ_p; // backward compatibility + +void add(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B); +// X = A + B + +void sub(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B); +// X = A - B + +void negate(mat_ZZ_p& X, const mat_ZZ_p& A); +// X = - A + +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B); +// X = A * B + +void mul(vec_ZZ_p& x, const mat_ZZ_p& A, const vec_ZZ_p& b); +// x = A * b + +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, const mat_ZZ_p& B); +// x = a * B + +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, const ZZ_p& b); +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, long b); +// X = A * b + +void mul(mat_ZZ_p& X, const ZZ_p& a, const mat_ZZ_p& B); +void mul(mat_ZZ_p& X, long a, const mat_ZZ_p& B); +// X = a * B + + +void determinant(ZZ_p& d, const mat_ZZ_p& A); +ZZ_p determinant(const mat_ZZ_p& a); +// d = determinant(A) + + +void transpose(mat_ZZ_p& X, const mat_ZZ_p& A); +mat_ZZ_p transpose(const mat_ZZ_p& A); +// X = transpose of A + +void solve(ZZ_p& d, vec_ZZ_p& x, const mat_ZZ_p& A, const vec_ZZ_p& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves x*A = b. + +void solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves A*x = b (so x and b are treated as a column vectors). + +void inv(ZZ_p& d, mat_ZZ_p& X, const mat_ZZ_p& A); +// A is an n x n matrix. Computes d = determinant(A). If d != 0, +// computes X = A^{-1}. + +void sqr(mat_ZZ_p& X, const mat_ZZ_p& A); +mat_ZZ_p sqr(const mat_ZZ_p& A); +// X = A*A + +void inv(mat_ZZ_p& X, const mat_ZZ_p& A); +mat_ZZ_p inv(const mat_ZZ_p& A); +// X = A^{-1}; error is raised if A is singular + +void power(mat_ZZ_p& X, const mat_ZZ_p& A, const ZZ& e); +mat_ZZ_p power(const mat_ZZ_p& A, const ZZ& e); + +void power(mat_ZZ_p& X, const mat_ZZ_p& A, long e); +mat_ZZ_p power(const mat_ZZ_p& A, long e); +// X = A^e; e may be negative (in which case A must be nonsingular). + +void ident(mat_ZZ_p& X, long n); +mat_ZZ_p ident_mat_ZZ_p(long n); +// X = n x n identity matrix + +long IsIdent(const mat_ZZ_p& A, long n); +// test if A is the n x n identity matrix + +void diag(mat_ZZ_p& X, long n, const ZZ_p& d); +mat_ZZ_p diag(long n, const ZZ_p& d); +// X = n x n diagonal matrix with d on diagonal + +long IsDiag(const mat_ZZ_p& A, long n, const ZZ_p& d); +// test if X is an n x n diagonal matrix with d on diagonal + + + + +long gauss(mat_ZZ_p& M); +long gauss(mat_ZZ_p& M, long w); +// Performs unitary row operations so as to bring M into row echelon +// form. If the optional argument w is supplied, stops when first w +// columns are in echelon form. The return value is the rank (or the +// rank of the first w columns). + +void image(mat_ZZ_p& X, const mat_ZZ_p& A); +// The rows of X are computed as basis of A's row space. X is is row +// echelon form + +void kernel(mat_ZZ_p& X, const mat_ZZ_p& A); +// Computes a basis for the kernel of the map x -> x*A. where x is a +// row vector. + + + +// miscellaneous: + +void clear(mat_ZZ_p& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_ZZ_p& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_ZZ_p operator+(const mat_ZZ_p& a, const mat_ZZ_p& b); +mat_ZZ_p operator-(const mat_ZZ_p& a, const mat_ZZ_p& b); +mat_ZZ_p operator*(const mat_ZZ_p& a, const mat_ZZ_p& b); + +mat_ZZ_p operator-(const mat_ZZ_p& a); + + +// matrix/scalar multiplication: + +mat_ZZ_p operator*(const mat_ZZ_p& a, const ZZ_p& b); +mat_ZZ_p operator*(const mat_ZZ_p& a, long b); + +mat_ZZ_p operator*(const ZZ_p& a, const mat_ZZ_p& b); +mat_ZZ_p operator*(long a, const mat_ZZ_p& b); + +// matrix/vector multiplication: + +vec_ZZ_p operator*(const mat_ZZ_p& a, const vec_ZZ_p& b); + +vec_ZZ_p operator*(const vec_ZZ_p& a, const mat_ZZ_p& b); + + +// assignment operator notation: + +mat_ZZ_p& operator+=(mat_ZZ_p& x, const mat_ZZ_p& a); +mat_ZZ_p& operator-=(mat_ZZ_p& x, const mat_ZZ_p& a); +mat_ZZ_p& operator*=(mat_ZZ_p& x, const mat_ZZ_p& a); + +mat_ZZ_p& operator*=(mat_ZZ_p& x, const ZZ_p& a); +mat_ZZ_p& operator*=(mat_ZZ_p& x, long a); + +vec_ZZ_p& operator*=(vec_ZZ_p& x, const mat_ZZ_p& a); + + + diff --git a/thirdparty/linux/ntl/doc/mat_ZZ_pE.cpp.html b/thirdparty/linux/ntl/doc/mat_ZZ_pE.cpp.html new file mode 100644 index 0000000000..3d76a73239 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_ZZ_pE.cpp.html @@ -0,0 +1,176 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_ZZ_pE.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_ZZ_pE
+
+SUMMARY:
+
+Defines the class mat_ZZ_pE.
+
+\**************************************************************************/
+
+
+#include <NTL/matrix.h>
+#include <NTL/vec_vec_ZZ_pE.h>
+
+
+typedef Mat<ZZ_pE> mat_ZZ_pE; // backward compatibility
+
+void add(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B);
+// X = A + B
+
+void sub(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B);
+// X = A - B
+
+void negate(mat_ZZ_pE& X, const mat_ZZ_pE& A);
+// X = - A
+
+void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B);
+// X = A * B
+
+void mul(vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b);
+// x = A * b
+
+void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const mat_ZZ_pE& B);
+// x = a * B
+
+void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ_pE& b);
+void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ_p& b);
+void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, long b);
+// X = A * b
+
+void mul(mat_ZZ_pE& X, const ZZ_pE& a, const mat_ZZ_pE& B);
+void mul(mat_ZZ_pE& X, const ZZ_p& a, const mat_ZZ_pE& B);
+void mul(mat_ZZ_pE& X, long a, const mat_ZZ_pE& B);
+// X = a * B
+
+
+void determinant(ZZ_pE& d, const mat_ZZ_pE& A);
+ZZ_pE determinant(const mat_ZZ_pE& a);
+// d = determinant(A)
+
+
+void transpose(mat_ZZ_pE& X, const mat_ZZ_pE& A);
+mat_ZZ_pE transpose(const mat_ZZ_pE& A);
+// X = transpose of A
+
+void solve(ZZ_pE& d, vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves x*A = b.
+
+void solve(ZZ_pE& d, const mat_ZZ_pE& A, vec_ZZ_pE& x, const vec_ZZ_pE& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves A*x = b (so x and b are treated as a column vectors).
+
+void inv(ZZ_pE& d, mat_ZZ_pE& X, const mat_ZZ_pE& A);
+// A is an n x n matrix.  Computes d = determinant(A).  If d != 0,
+// computes X = A^{-1}.
+
+void sqr(mat_ZZ_pE& X, const mat_ZZ_pE& A);
+mat_ZZ_pE sqr(const mat_ZZ_pE& A);
+// X = A*A  
+
+void inv(mat_ZZ_pE& X, const mat_ZZ_pE& A);
+mat_ZZ_pE inv(const mat_ZZ_pE& A);
+// X = A^{-1}; error is raised if A is  singular
+
+void power(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ& e);
+mat_ZZ_pE power(const mat_ZZ_pE& A, const ZZ& e);
+
+void power(mat_ZZ_pE& X, const mat_ZZ_pE& A, long e);
+mat_ZZ_pE power(const mat_ZZ_pE& A, long e);
+// X = A^e; e may be negative (in which case A must be nonsingular).
+
+void ident(mat_ZZ_pE& X, long n);
+mat_ZZ_pE ident_mat_ZZ_pE(long n);
+// X = n x n identity matrix
+
+long IsIdent(const mat_ZZ_pE& A, long n);
+// test if A is the n x n identity matrix
+
+void diag(mat_ZZ_pE& X, long n, const ZZ_pE& d);
+mat_ZZ_pE diag(long n, const ZZ_pE& d);
+// X = n x n diagonal matrix with d on diagonal
+
+long IsDiag(const mat_ZZ_pE& A, long n, const ZZ_pE& d);
+// test if X is an  n x n diagonal matrix with d on diagonal
+
+
+
+
+long gauss(mat_ZZ_pE& M);
+long gauss(mat_ZZ_pE& M, long w);
+// Performs unitary row operations so as to bring M into row echelon
+// form.  If the optional argument w is supplied, stops when first w
+// columns are in echelon form.  The return value is the rank (or the
+// rank of the first w columns).
+
+void image(mat_ZZ_pE& X, const mat_ZZ_pE& A);
+// The rows of X are computed as basis of A's row space.  X is is row
+// echelon form
+
+void kernel(mat_ZZ_pE& X, const mat_ZZ_pE& A);
+// Computes a basis for the kernel of the map x -> x*A. where x is a
+// row vector.
+
+
+
+// miscellaneous:
+
+void clear(mat_ZZ_pE& a);
+// x = 0 (dimension unchanged)
+
+long IsZero(const mat_ZZ_pE& a);
+// test if a is the zero matrix (any dimension)
+
+
+// operator notation:
+
+mat_ZZ_pE operator+(const mat_ZZ_pE& a, const mat_ZZ_pE& b);
+mat_ZZ_pE operator-(const mat_ZZ_pE& a, const mat_ZZ_pE& b);
+mat_ZZ_pE operator*(const mat_ZZ_pE& a, const mat_ZZ_pE& b);
+
+mat_ZZ_pE operator-(const mat_ZZ_pE& a);
+
+
+// matrix/scalar multiplication:
+
+mat_ZZ_pE operator*(const mat_ZZ_pE& a, const ZZ_pE& b);
+mat_ZZ_pE operator*(const mat_ZZ_pE& a, const ZZ_p& b);
+mat_ZZ_pE operator*(const mat_ZZ_pE& a, long b);
+
+mat_ZZ_pE operator*(const ZZ_pE& a, const mat_ZZ_pE& b);
+mat_ZZ_pE operator*(const ZZ_p& a, const mat_ZZ_pE& b);
+mat_ZZ_pE operator*(long a, const mat_ZZ_pE& b);
+
+// matrix/vector multiplication:
+
+vec_ZZ_pE operator*(const mat_ZZ_pE& a, const vec_ZZ_pE& b);
+
+vec_ZZ_pE operator*(const vec_ZZ_pE& a, const mat_ZZ_pE& b);
+
+
+// assignment operator notation:
+
+mat_ZZ_pE& operator+=(mat_ZZ_pE& x, const mat_ZZ_pE& a);
+mat_ZZ_pE& operator-=(mat_ZZ_pE& x, const mat_ZZ_pE& a);
+mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const mat_ZZ_pE& a);
+
+mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const ZZ_pE& a);
+mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const ZZ_p& a);
+mat_ZZ_pE& operator*=(mat_ZZ_pE& x, long a);
+
+vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const mat_ZZ_pE& a);
+
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_ZZ_pE.txt b/thirdparty/linux/ntl/doc/mat_ZZ_pE.txt new file mode 100644 index 0000000000..897bc33242 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_ZZ_pE.txt @@ -0,0 +1,166 @@ + +/**************************************************************************\ + +MODULE: mat_ZZ_pE + +SUMMARY: + +Defines the class mat_ZZ_pE. + +\**************************************************************************/ + + +#include +#include + + +typedef Mat mat_ZZ_pE; // backward compatibility + +void add(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B); +// X = A + B + +void sub(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B); +// X = A - B + +void negate(mat_ZZ_pE& X, const mat_ZZ_pE& A); +// X = - A + +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B); +// X = A * B + +void mul(vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b); +// x = A * b + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const mat_ZZ_pE& B); +// x = a * B + +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ_pE& b); +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ_p& b); +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, long b); +// X = A * b + +void mul(mat_ZZ_pE& X, const ZZ_pE& a, const mat_ZZ_pE& B); +void mul(mat_ZZ_pE& X, const ZZ_p& a, const mat_ZZ_pE& B); +void mul(mat_ZZ_pE& X, long a, const mat_ZZ_pE& B); +// X = a * B + + +void determinant(ZZ_pE& d, const mat_ZZ_pE& A); +ZZ_pE determinant(const mat_ZZ_pE& a); +// d = determinant(A) + + +void transpose(mat_ZZ_pE& X, const mat_ZZ_pE& A); +mat_ZZ_pE transpose(const mat_ZZ_pE& A); +// X = transpose of A + +void solve(ZZ_pE& d, vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves x*A = b. + +void solve(ZZ_pE& d, const mat_ZZ_pE& A, vec_ZZ_pE& x, const vec_ZZ_pE& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves A*x = b (so x and b are treated as a column vectors). + +void inv(ZZ_pE& d, mat_ZZ_pE& X, const mat_ZZ_pE& A); +// A is an n x n matrix. Computes d = determinant(A). If d != 0, +// computes X = A^{-1}. + +void sqr(mat_ZZ_pE& X, const mat_ZZ_pE& A); +mat_ZZ_pE sqr(const mat_ZZ_pE& A); +// X = A*A + +void inv(mat_ZZ_pE& X, const mat_ZZ_pE& A); +mat_ZZ_pE inv(const mat_ZZ_pE& A); +// X = A^{-1}; error is raised if A is singular + +void power(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ& e); +mat_ZZ_pE power(const mat_ZZ_pE& A, const ZZ& e); + +void power(mat_ZZ_pE& X, const mat_ZZ_pE& A, long e); +mat_ZZ_pE power(const mat_ZZ_pE& A, long e); +// X = A^e; e may be negative (in which case A must be nonsingular). + +void ident(mat_ZZ_pE& X, long n); +mat_ZZ_pE ident_mat_ZZ_pE(long n); +// X = n x n identity matrix + +long IsIdent(const mat_ZZ_pE& A, long n); +// test if A is the n x n identity matrix + +void diag(mat_ZZ_pE& X, long n, const ZZ_pE& d); +mat_ZZ_pE diag(long n, const ZZ_pE& d); +// X = n x n diagonal matrix with d on diagonal + +long IsDiag(const mat_ZZ_pE& A, long n, const ZZ_pE& d); +// test if X is an n x n diagonal matrix with d on diagonal + + + + +long gauss(mat_ZZ_pE& M); +long gauss(mat_ZZ_pE& M, long w); +// Performs unitary row operations so as to bring M into row echelon +// form. If the optional argument w is supplied, stops when first w +// columns are in echelon form. The return value is the rank (or the +// rank of the first w columns). + +void image(mat_ZZ_pE& X, const mat_ZZ_pE& A); +// The rows of X are computed as basis of A's row space. X is is row +// echelon form + +void kernel(mat_ZZ_pE& X, const mat_ZZ_pE& A); +// Computes a basis for the kernel of the map x -> x*A. where x is a +// row vector. + + + +// miscellaneous: + +void clear(mat_ZZ_pE& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_ZZ_pE& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_ZZ_pE operator+(const mat_ZZ_pE& a, const mat_ZZ_pE& b); +mat_ZZ_pE operator-(const mat_ZZ_pE& a, const mat_ZZ_pE& b); +mat_ZZ_pE operator*(const mat_ZZ_pE& a, const mat_ZZ_pE& b); + +mat_ZZ_pE operator-(const mat_ZZ_pE& a); + + +// matrix/scalar multiplication: + +mat_ZZ_pE operator*(const mat_ZZ_pE& a, const ZZ_pE& b); +mat_ZZ_pE operator*(const mat_ZZ_pE& a, const ZZ_p& b); +mat_ZZ_pE operator*(const mat_ZZ_pE& a, long b); + +mat_ZZ_pE operator*(const ZZ_pE& a, const mat_ZZ_pE& b); +mat_ZZ_pE operator*(const ZZ_p& a, const mat_ZZ_pE& b); +mat_ZZ_pE operator*(long a, const mat_ZZ_pE& b); + +// matrix/vector multiplication: + +vec_ZZ_pE operator*(const mat_ZZ_pE& a, const vec_ZZ_pE& b); + +vec_ZZ_pE operator*(const vec_ZZ_pE& a, const mat_ZZ_pE& b); + + +// assignment operator notation: + +mat_ZZ_pE& operator+=(mat_ZZ_pE& x, const mat_ZZ_pE& a); +mat_ZZ_pE& operator-=(mat_ZZ_pE& x, const mat_ZZ_pE& a); +mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const mat_ZZ_pE& a); + +mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const ZZ_pE& a); +mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const ZZ_p& a); +mat_ZZ_pE& operator*=(mat_ZZ_pE& x, long a); + +vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const mat_ZZ_pE& a); + + + diff --git a/thirdparty/linux/ntl/doc/mat_lzz_p.cpp.html b/thirdparty/linux/ntl/doc/mat_lzz_p.cpp.html new file mode 100644 index 0000000000..e5e05f4885 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_lzz_p.cpp.html @@ -0,0 +1,215 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_lzz_p.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_zz_p
+
+SUMMARY:
+
+Defines the class mat_zz_p.
+Note that the modulus p need not be a prime, except as indicated below.
+
+IMPLEMENTATION NOTES:
+
+Starting with NTL version 9.7.0 (and 9.7.1), many of the routines here have
+been optimized to take better advantage of specific hardware features available
+on 64-bit Intel CPU's.  Currently, the mul, inv, determinant, solve, gauss,
+kernel, and image routines are fastest for p up to 23-bits long (assuming the
+CPU supports AVX instructions).  After that, performance degrades in three
+stages: stage 1: up to 28-bits; stage 2: up to 31-bits; stage 3: 32-bits and
+up.
+
+For primes up to 23-bits, AVX floating point instructions are used.  After
+that, ordinary integer arithmetic is used.  In a future version, I may exploit
+AVX2 integer instructions to get better stage 2 performance.  And in the more
+distant future, AVX512 instructions will be used, when they become available.
+
+On older Intel machines, or non-Intel machines that have "long long" support,
+one still gets optimizations corresponding to the three stages above.  On
+32-bit machines, one still gets three stages, just with smaller crossover
+points.
+
+\**************************************************************************/
+
+
+#include <NTL/matrix.h>
+#include "vec_vec_zz_p.h"
+
+
+typedef Mat<zz_p> mat_zz_p; // backward compatibility
+
+void add(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B);
+// X = A + B
+
+void sub(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B);
+// X = A - B
+
+void mul(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B);
+// X = A * B
+
+void mul(vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b);
+// x = A * b
+
+void mul(vec_zz_p& x, const vec_zz_p& a, const mat_zz_p& B);
+// x = a * B
+
+void mul(mat_zz_p& X, const mat_zz_p& A, zz_p b);
+void mul(mat_zz_p& X, const mat_zz_p& A, long b);
+// X = A * b
+
+void mul(mat_zz_p& X, zz_p a, const mat_zz_p& B);
+void mul(mat_zz_p& X, long a, const mat_zz_p& B);
+// X = a * B
+
+
+void transpose(mat_zz_p& X, const mat_zz_p& A);
+mat_zz_p transpose(const mat_zz_p& A);
+// X = transpose of A
+
+
+void determinant(zz_p& d, const mat_zz_p& A);
+zz_p determinant(const mat_zz_p& a);
+// d = determinant(A)
+
+void solve(zz_p& d, vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves x*A = b (so x and b are treated as a row vectors).
+
+void solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves A*x = b (so x and b are treated as a column vectors).
+
+void inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A);
+// A is an n x n matrix.  Computes d = determinant(A).  If d != 0,
+// computes X = A^{-1}.
+
+
+void inv(mat_zz_p& X, const mat_zz_p& A);
+mat_zz_p inv(const mat_zz_p& A);
+// X = A^{-1}; error is raised if A is  singular
+
+void power(mat_zz_p& X, const mat_zz_p& A, const ZZ& e);
+mat_zz_p power(const mat_zz_p& A, const ZZ& e);
+void power(mat_zz_p& X, const mat_zz_p& A, long e);
+mat_zz_p power(const mat_zz_p& A, long e);
+// X = A^e; e may be negative (in which case A must be nonsingular).
+
+// NOTE: the routines determinant, solve, inv, and power (with negative
+// exponent) all require that the modulus p is prime: during elimination, if a
+// non-zero pivot element does not have an inverse, and error is raised.  The
+// following "relaxed" versions of these routines will also work with prime
+// powers, if the optional parameter relax is true (which is the default).
+// However, note that in these relaxed routines, if a computed determinant
+// value is zero, this may not be the true determinant: all that you can assume
+// is that the true determinant is is not invertible mod p. If the parameter
+// relax==false, then these routines behave identically to their "unrelaxed"
+// counterparts.
+
+void relaxed_determinant(zz_p& d, const mat_zz_p& A, bool relax=true);
+zz_p relaxed_determinant(const mat_zz_p& a, bool relax=true);
+void relaxed_solve(zz_p& d, vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b, bool relax=true);
+void relaxed_solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b, bool relax=true);
+void relaxed_inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax=true);
+void relaxed_inv(mat_zz_p& X, const mat_zz_p& A, bool relax=true);
+mat_zz_p relaxed_inv(const mat_zz_p& A, bool relax=true);
+void relaxed_power(mat_zz_p& X, const mat_zz_p& A, const ZZ& e, bool relax=true);
+mat_zz_p relaxed_power(const mat_zz_p& A, const ZZ& e, bool relax=true);
+void relaxed_power(mat_zz_p& X, const mat_zz_p& A, long e, bool relax=true);
+mat_zz_p relaxed_power(const mat_zz_p& A, long e, bool relax=true);
+
+
+void sqr(mat_zz_p& X, const mat_zz_p& A);
+mat_zz_p sqr(const mat_zz_p& A);
+// X = A*A  
+
+void ident(mat_zz_p& X, long n);
+mat_zz_p ident_mat_zz_p(long n);
+// X = n x n identity matrix
+
+long IsIdent(const mat_zz_p& A, long n);
+// test if A is the n x n identity matrix
+
+void diag(mat_zz_p& X, long n, zz_p d);
+mat_zz_p diag(long n, zz_p d);
+// X = n x n diagonal matrix with d on diagonal
+
+long IsDiag(const mat_zz_p& A, long n, zz_p d);
+// test if X is an  n x n diagonal matrix with d on diagonal
+
+
+
+long gauss(mat_zz_p& M);
+long gauss(mat_zz_p& M, long w);
+// Performs unitary row operations so as to bring M into row echelon
+// form.  If the optional argument w is supplied, stops when first w
+// columns are in echelon form.  The return value is the rank (or the
+// rank of the first w columns).
+
+void image(mat_zz_p& X, const mat_zz_p& A);
+// The rows of X are computed as basis of A's row space.  X is is row
+// echelon form
+
+void kernel(mat_zz_p& X, const mat_zz_p& A);
+// Computes a basis for the kernel of the map x -> x*A. where x is a
+// row vector.
+
+// NOTE: the gauss, image, and kernel routines all require that
+// the modulus p is prime.
+
+
+
+// miscellaneous:
+
+void clear(mat_zz_p& a);
+// x = 0 (dimension unchanged)
+
+long IsZero(const mat_zz_p& a);
+// test if a is the zero matrix (any dimension)
+
+
+// operator notation:
+
+mat_zz_p operator+(const mat_zz_p& a, const mat_zz_p& b);
+mat_zz_p operator-(const mat_zz_p& a, const mat_zz_p& b);
+mat_zz_p operator*(const mat_zz_p& a, const mat_zz_p& b);
+
+mat_zz_p operator-(const mat_zz_p& a);
+
+
+// matrix/scalar multiplication:
+
+mat_zz_p operator*(const mat_zz_p& a, zz_p b);
+mat_zz_p operator*(const mat_zz_p& a, long b);
+
+mat_zz_p operator*(zz_p a, const mat_zz_p& b);
+mat_zz_p operator*(long a, const mat_zz_p& b);
+
+
+// matrix/vector multiplication:
+
+vec_zz_p operator*(const mat_zz_p& a, const vec_zz_p& b);
+
+vec_zz_p operator*(const vec_zz_p& a, const mat_zz_p& b);
+
+
+// assignment operator notation:
+
+mat_zz_p& operator+=(mat_zz_p& x, const mat_zz_p& a);
+mat_zz_p& operator-=(mat_zz_p& x, const mat_zz_p& a);
+mat_zz_p& operator*=(mat_zz_p& x, const mat_zz_p& a);
+
+mat_zz_p& operator*=(mat_zz_p& x, zz_p a);
+mat_zz_p& operator*=(mat_zz_p& x, long a);
+
+vec_zz_p& operator*=(vec_zz_p& x, const mat_zz_p& a);
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_lzz_p.txt b/thirdparty/linux/ntl/doc/mat_lzz_p.txt new file mode 100644 index 0000000000..dc82734107 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_lzz_p.txt @@ -0,0 +1,205 @@ + +/**************************************************************************\ + +MODULE: mat_zz_p + +SUMMARY: + +Defines the class mat_zz_p. +Note that the modulus p need not be a prime, except as indicated below. + +IMPLEMENTATION NOTES: + +Starting with NTL version 9.7.0 (and 9.7.1), many of the routines here have +been optimized to take better advantage of specific hardware features available +on 64-bit Intel CPU's. Currently, the mul, inv, determinant, solve, gauss, +kernel, and image routines are fastest for p up to 23-bits long (assuming the +CPU supports AVX instructions). After that, performance degrades in three +stages: stage 1: up to 28-bits; stage 2: up to 31-bits; stage 3: 32-bits and +up. + +For primes up to 23-bits, AVX floating point instructions are used. After +that, ordinary integer arithmetic is used. In a future version, I may exploit +AVX2 integer instructions to get better stage 2 performance. And in the more +distant future, AVX512 instructions will be used, when they become available. + +On older Intel machines, or non-Intel machines that have "long long" support, +one still gets optimizations corresponding to the three stages above. On +32-bit machines, one still gets three stages, just with smaller crossover +points. + +\**************************************************************************/ + + +#include +#include "vec_vec_zz_p.h" + + +typedef Mat mat_zz_p; // backward compatibility + +void add(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B); +// X = A + B + +void sub(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B); +// X = A - B + +void mul(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B); +// X = A * B + +void mul(vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b); +// x = A * b + +void mul(vec_zz_p& x, const vec_zz_p& a, const mat_zz_p& B); +// x = a * B + +void mul(mat_zz_p& X, const mat_zz_p& A, zz_p b); +void mul(mat_zz_p& X, const mat_zz_p& A, long b); +// X = A * b + +void mul(mat_zz_p& X, zz_p a, const mat_zz_p& B); +void mul(mat_zz_p& X, long a, const mat_zz_p& B); +// X = a * B + + +void transpose(mat_zz_p& X, const mat_zz_p& A); +mat_zz_p transpose(const mat_zz_p& A); +// X = transpose of A + + +void determinant(zz_p& d, const mat_zz_p& A); +zz_p determinant(const mat_zz_p& a); +// d = determinant(A) + +void solve(zz_p& d, vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves x*A = b (so x and b are treated as a row vectors). + +void solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves A*x = b (so x and b are treated as a column vectors). + +void inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A); +// A is an n x n matrix. Computes d = determinant(A). If d != 0, +// computes X = A^{-1}. + + +void inv(mat_zz_p& X, const mat_zz_p& A); +mat_zz_p inv(const mat_zz_p& A); +// X = A^{-1}; error is raised if A is singular + +void power(mat_zz_p& X, const mat_zz_p& A, const ZZ& e); +mat_zz_p power(const mat_zz_p& A, const ZZ& e); +void power(mat_zz_p& X, const mat_zz_p& A, long e); +mat_zz_p power(const mat_zz_p& A, long e); +// X = A^e; e may be negative (in which case A must be nonsingular). + +// NOTE: the routines determinant, solve, inv, and power (with negative +// exponent) all require that the modulus p is prime: during elimination, if a +// non-zero pivot element does not have an inverse, and error is raised. The +// following "relaxed" versions of these routines will also work with prime +// powers, if the optional parameter relax is true (which is the default). +// However, note that in these relaxed routines, if a computed determinant +// value is zero, this may not be the true determinant: all that you can assume +// is that the true determinant is is not invertible mod p. If the parameter +// relax==false, then these routines behave identically to their "unrelaxed" +// counterparts. + +void relaxed_determinant(zz_p& d, const mat_zz_p& A, bool relax=true); +zz_p relaxed_determinant(const mat_zz_p& a, bool relax=true); +void relaxed_solve(zz_p& d, vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b, bool relax=true); +void relaxed_solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b, bool relax=true); +void relaxed_inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax=true); +void relaxed_inv(mat_zz_p& X, const mat_zz_p& A, bool relax=true); +mat_zz_p relaxed_inv(const mat_zz_p& A, bool relax=true); +void relaxed_power(mat_zz_p& X, const mat_zz_p& A, const ZZ& e, bool relax=true); +mat_zz_p relaxed_power(const mat_zz_p& A, const ZZ& e, bool relax=true); +void relaxed_power(mat_zz_p& X, const mat_zz_p& A, long e, bool relax=true); +mat_zz_p relaxed_power(const mat_zz_p& A, long e, bool relax=true); + + +void sqr(mat_zz_p& X, const mat_zz_p& A); +mat_zz_p sqr(const mat_zz_p& A); +// X = A*A + +void ident(mat_zz_p& X, long n); +mat_zz_p ident_mat_zz_p(long n); +// X = n x n identity matrix + +long IsIdent(const mat_zz_p& A, long n); +// test if A is the n x n identity matrix + +void diag(mat_zz_p& X, long n, zz_p d); +mat_zz_p diag(long n, zz_p d); +// X = n x n diagonal matrix with d on diagonal + +long IsDiag(const mat_zz_p& A, long n, zz_p d); +// test if X is an n x n diagonal matrix with d on diagonal + + + +long gauss(mat_zz_p& M); +long gauss(mat_zz_p& M, long w); +// Performs unitary row operations so as to bring M into row echelon +// form. If the optional argument w is supplied, stops when first w +// columns are in echelon form. The return value is the rank (or the +// rank of the first w columns). + +void image(mat_zz_p& X, const mat_zz_p& A); +// The rows of X are computed as basis of A's row space. X is is row +// echelon form + +void kernel(mat_zz_p& X, const mat_zz_p& A); +// Computes a basis for the kernel of the map x -> x*A. where x is a +// row vector. + +// NOTE: the gauss, image, and kernel routines all require that +// the modulus p is prime. + + + +// miscellaneous: + +void clear(mat_zz_p& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_zz_p& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_zz_p operator+(const mat_zz_p& a, const mat_zz_p& b); +mat_zz_p operator-(const mat_zz_p& a, const mat_zz_p& b); +mat_zz_p operator*(const mat_zz_p& a, const mat_zz_p& b); + +mat_zz_p operator-(const mat_zz_p& a); + + +// matrix/scalar multiplication: + +mat_zz_p operator*(const mat_zz_p& a, zz_p b); +mat_zz_p operator*(const mat_zz_p& a, long b); + +mat_zz_p operator*(zz_p a, const mat_zz_p& b); +mat_zz_p operator*(long a, const mat_zz_p& b); + + +// matrix/vector multiplication: + +vec_zz_p operator*(const mat_zz_p& a, const vec_zz_p& b); + +vec_zz_p operator*(const vec_zz_p& a, const mat_zz_p& b); + + +// assignment operator notation: + +mat_zz_p& operator+=(mat_zz_p& x, const mat_zz_p& a); +mat_zz_p& operator-=(mat_zz_p& x, const mat_zz_p& a); +mat_zz_p& operator*=(mat_zz_p& x, const mat_zz_p& a); + +mat_zz_p& operator*=(mat_zz_p& x, zz_p a); +mat_zz_p& operator*=(mat_zz_p& x, long a); + +vec_zz_p& operator*=(vec_zz_p& x, const mat_zz_p& a); + + diff --git a/thirdparty/linux/ntl/doc/mat_lzz_pE.cpp.html b/thirdparty/linux/ntl/doc/mat_lzz_pE.cpp.html new file mode 100644 index 0000000000..abdc634592 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_lzz_pE.cpp.html @@ -0,0 +1,176 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_lzz_pE.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_zz_pE
+
+SUMMARY:
+
+Defines the class mat_zz_pE.
+
+\**************************************************************************/
+
+
+#include <NTL/matrix.h>
+#include <NTL/vec_vec_lzz_pE.h>
+
+
+typedef Mat<zz_pE> mat_zz_pE; // backward compatibility
+
+void add(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B);
+// X = A + B
+
+void sub(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B);
+// X = A - B
+
+void negate(mat_zz_pE& X, const mat_zz_pE& A);
+// X = - A
+
+void mul(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B);
+// X = A * B
+
+void mul(vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b);
+// x = A * b
+
+void mul(vec_zz_pE& x, const vec_zz_pE& a, const mat_zz_pE& B);
+// x = a * B
+
+void mul(mat_zz_pE& X, const mat_zz_pE& A, const zz_pE& b);
+void mul(mat_zz_pE& X, const mat_zz_pE& A, const zz_p& b);
+void mul(mat_zz_pE& X, const mat_zz_pE& A, long b);
+// X = A * b
+
+void mul(mat_zz_pE& X, const zz_pE& a, const mat_zz_pE& B);
+void mul(mat_zz_pE& X, const zz_p& a, const mat_zz_pE& B);
+void mul(mat_zz_pE& X, long a, const mat_zz_pE& B);
+// X = a * B
+
+
+void determinant(zz_pE& d, const mat_zz_pE& A);
+zz_pE determinant(const mat_zz_pE& a);
+// d = determinant(A)
+
+
+void transpose(mat_zz_pE& X, const mat_zz_pE& A);
+mat_zz_pE transpose(const mat_zz_pE& A);
+// X = transpose of A
+
+void solve(zz_pE& d, vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b);
+// A is an n x n matrix, b is a length n vector.  Computes d =
+// determinant(A).  If d != 0, solves x*A = b.
+
+void solve(zz_pE& d, const mat_zz_pE& A, vec_zz_pE& x, const vec_zz_pE& b);
+// A is an n x n matrix, b is a length n vector.  Computes d = determinant(A).
+// If d != 0, solves A*x = b (so x and b are treated as a column vectors).
+
+void inv(zz_pE& d, mat_zz_pE& X, const mat_zz_pE& A);
+// A is an n x n matrix.  Computes d = determinant(A).  If d != 0,
+// computes X = A^{-1}.
+
+void sqr(mat_zz_pE& X, const mat_zz_pE& A);
+mat_zz_pE sqr(const mat_zz_pE& A);
+// X = A*A  
+
+void inv(mat_zz_pE& X, const mat_zz_pE& A);
+mat_zz_pE inv(const mat_zz_pE& A);
+// X = A^{-1}; error is raised if A is  singular
+
+void power(mat_zz_pE& X, const mat_zz_pE& A, const ZZ& e);
+mat_zz_pE power(const mat_zz_pE& A, const ZZ& e);
+
+void power(mat_zz_pE& X, const mat_zz_pE& A, long e);
+mat_zz_pE power(const mat_zz_pE& A, long e);
+// X = A^e; e may be negative (in which case A must be nonsingular).
+
+void ident(mat_zz_pE& X, long n);
+mat_zz_pE ident_mat_zz_pE(long n);
+// X = n x n identity matrix
+
+long IsIdent(const mat_zz_pE& A, long n);
+// test if A is the n x n identity matrix
+
+void diag(mat_zz_pE& X, long n, const zz_pE& d);
+mat_zz_pE diag(long n, const zz_pE& d);
+// X = n x n diagonal matrix with d on diagonal
+
+long IsDiag(const mat_zz_pE& A, long n, const zz_pE& d);
+// test if X is an  n x n diagonal matrix with d on diagonal
+
+
+
+
+long gauss(mat_zz_pE& M);
+long gauss(mat_zz_pE& M, long w);
+// Performs unitary row operations so as to bring M into row echelon
+// form.  If the optional argument w is supplied, stops when first w
+// columns are in echelon form.  The return value is the rank (or the
+// rank of the first w columns).
+
+void image(mat_zz_pE& X, const mat_zz_pE& A);
+// The rows of X are computed as basis of A's row space.  X is is row
+// echelon form
+
+void kernel(mat_zz_pE& X, const mat_zz_pE& A);
+// Computes a basis for the kernel of the map x -> x*A. where x is a
+// row vector.
+
+
+
+// miscellaneous:
+
+void clear(mat_zz_pE& a);
+// x = 0 (dimension unchanged)
+
+long IsZero(const mat_zz_pE& a);
+// test if a is the zero matrix (any dimension)
+
+
+// operator notation:
+
+mat_zz_pE operator+(const mat_zz_pE& a, const mat_zz_pE& b);
+mat_zz_pE operator-(const mat_zz_pE& a, const mat_zz_pE& b);
+mat_zz_pE operator*(const mat_zz_pE& a, const mat_zz_pE& b);
+
+mat_zz_pE operator-(const mat_zz_pE& a);
+
+
+// matrix/scalar multiplication:
+
+mat_zz_pE operator*(const mat_zz_pE& a, const zz_pE& b);
+mat_zz_pE operator*(const mat_zz_pE& a, const zz_p& b);
+mat_zz_pE operator*(const mat_zz_pE& a, long b);
+
+mat_zz_pE operator*(const zz_pE& a, const mat_zz_pE& b);
+mat_zz_pE operator*(const zz_p& a, const mat_zz_pE& b);
+mat_zz_pE operator*(long a, const mat_zz_pE& b);
+
+// matrix/vector multiplication:
+
+vec_zz_pE operator*(const mat_zz_pE& a, const vec_zz_pE& b);
+
+vec_zz_pE operator*(const vec_zz_pE& a, const mat_zz_pE& b);
+
+
+// assignment operator notation:
+
+mat_zz_pE& operator+=(mat_zz_pE& x, const mat_zz_pE& a);
+mat_zz_pE& operator-=(mat_zz_pE& x, const mat_zz_pE& a);
+mat_zz_pE& operator*=(mat_zz_pE& x, const mat_zz_pE& a);
+
+mat_zz_pE& operator*=(mat_zz_pE& x, const zz_pE& a);
+mat_zz_pE& operator*=(mat_zz_pE& x, const zz_p& a);
+mat_zz_pE& operator*=(mat_zz_pE& x, long a);
+
+vec_zz_pE& operator*=(vec_zz_pE& x, const mat_zz_pE& a);
+
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_lzz_pE.txt b/thirdparty/linux/ntl/doc/mat_lzz_pE.txt new file mode 100644 index 0000000000..0648bfa10a --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_lzz_pE.txt @@ -0,0 +1,166 @@ + +/**************************************************************************\ + +MODULE: mat_zz_pE + +SUMMARY: + +Defines the class mat_zz_pE. + +\**************************************************************************/ + + +#include +#include + + +typedef Mat mat_zz_pE; // backward compatibility + +void add(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B); +// X = A + B + +void sub(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B); +// X = A - B + +void negate(mat_zz_pE& X, const mat_zz_pE& A); +// X = - A + +void mul(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B); +// X = A * B + +void mul(vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b); +// x = A * b + +void mul(vec_zz_pE& x, const vec_zz_pE& a, const mat_zz_pE& B); +// x = a * B + +void mul(mat_zz_pE& X, const mat_zz_pE& A, const zz_pE& b); +void mul(mat_zz_pE& X, const mat_zz_pE& A, const zz_p& b); +void mul(mat_zz_pE& X, const mat_zz_pE& A, long b); +// X = A * b + +void mul(mat_zz_pE& X, const zz_pE& a, const mat_zz_pE& B); +void mul(mat_zz_pE& X, const zz_p& a, const mat_zz_pE& B); +void mul(mat_zz_pE& X, long a, const mat_zz_pE& B); +// X = a * B + + +void determinant(zz_pE& d, const mat_zz_pE& A); +zz_pE determinant(const mat_zz_pE& a); +// d = determinant(A) + + +void transpose(mat_zz_pE& X, const mat_zz_pE& A); +mat_zz_pE transpose(const mat_zz_pE& A); +// X = transpose of A + +void solve(zz_pE& d, vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b); +// A is an n x n matrix, b is a length n vector. Computes d = +// determinant(A). If d != 0, solves x*A = b. + +void solve(zz_pE& d, const mat_zz_pE& A, vec_zz_pE& x, const vec_zz_pE& b); +// A is an n x n matrix, b is a length n vector. Computes d = determinant(A). +// If d != 0, solves A*x = b (so x and b are treated as a column vectors). + +void inv(zz_pE& d, mat_zz_pE& X, const mat_zz_pE& A); +// A is an n x n matrix. Computes d = determinant(A). If d != 0, +// computes X = A^{-1}. + +void sqr(mat_zz_pE& X, const mat_zz_pE& A); +mat_zz_pE sqr(const mat_zz_pE& A); +// X = A*A + +void inv(mat_zz_pE& X, const mat_zz_pE& A); +mat_zz_pE inv(const mat_zz_pE& A); +// X = A^{-1}; error is raised if A is singular + +void power(mat_zz_pE& X, const mat_zz_pE& A, const ZZ& e); +mat_zz_pE power(const mat_zz_pE& A, const ZZ& e); + +void power(mat_zz_pE& X, const mat_zz_pE& A, long e); +mat_zz_pE power(const mat_zz_pE& A, long e); +// X = A^e; e may be negative (in which case A must be nonsingular). + +void ident(mat_zz_pE& X, long n); +mat_zz_pE ident_mat_zz_pE(long n); +// X = n x n identity matrix + +long IsIdent(const mat_zz_pE& A, long n); +// test if A is the n x n identity matrix + +void diag(mat_zz_pE& X, long n, const zz_pE& d); +mat_zz_pE diag(long n, const zz_pE& d); +// X = n x n diagonal matrix with d on diagonal + +long IsDiag(const mat_zz_pE& A, long n, const zz_pE& d); +// test if X is an n x n diagonal matrix with d on diagonal + + + + +long gauss(mat_zz_pE& M); +long gauss(mat_zz_pE& M, long w); +// Performs unitary row operations so as to bring M into row echelon +// form. If the optional argument w is supplied, stops when first w +// columns are in echelon form. The return value is the rank (or the +// rank of the first w columns). + +void image(mat_zz_pE& X, const mat_zz_pE& A); +// The rows of X are computed as basis of A's row space. X is is row +// echelon form + +void kernel(mat_zz_pE& X, const mat_zz_pE& A); +// Computes a basis for the kernel of the map x -> x*A. where x is a +// row vector. + + + +// miscellaneous: + +void clear(mat_zz_pE& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_zz_pE& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_zz_pE operator+(const mat_zz_pE& a, const mat_zz_pE& b); +mat_zz_pE operator-(const mat_zz_pE& a, const mat_zz_pE& b); +mat_zz_pE operator*(const mat_zz_pE& a, const mat_zz_pE& b); + +mat_zz_pE operator-(const mat_zz_pE& a); + + +// matrix/scalar multiplication: + +mat_zz_pE operator*(const mat_zz_pE& a, const zz_pE& b); +mat_zz_pE operator*(const mat_zz_pE& a, const zz_p& b); +mat_zz_pE operator*(const mat_zz_pE& a, long b); + +mat_zz_pE operator*(const zz_pE& a, const mat_zz_pE& b); +mat_zz_pE operator*(const zz_p& a, const mat_zz_pE& b); +mat_zz_pE operator*(long a, const mat_zz_pE& b); + +// matrix/vector multiplication: + +vec_zz_pE operator*(const mat_zz_pE& a, const vec_zz_pE& b); + +vec_zz_pE operator*(const vec_zz_pE& a, const mat_zz_pE& b); + + +// assignment operator notation: + +mat_zz_pE& operator+=(mat_zz_pE& x, const mat_zz_pE& a); +mat_zz_pE& operator-=(mat_zz_pE& x, const mat_zz_pE& a); +mat_zz_pE& operator*=(mat_zz_pE& x, const mat_zz_pE& a); + +mat_zz_pE& operator*=(mat_zz_pE& x, const zz_pE& a); +mat_zz_pE& operator*=(mat_zz_pE& x, const zz_p& a); +mat_zz_pE& operator*=(mat_zz_pE& x, long a); + +vec_zz_pE& operator*=(vec_zz_pE& x, const mat_zz_pE& a); + + + diff --git a/thirdparty/linux/ntl/doc/mat_poly_ZZ.cpp.html b/thirdparty/linux/ntl/doc/mat_poly_ZZ.cpp.html new file mode 100644 index 0000000000..f0ea657971 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_poly_ZZ.cpp.html @@ -0,0 +1,31 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_poly_ZZ.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_poly_ZZ
+
+SUMMARY:
+
+Routine for computing the characteristic polynomial of a matrix over ZZ.
+
+
+
+\**************************************************************************/
+
+
+#include <NTL/mat_ZZ.h>
+#include <NTL/ZZX.h>
+
+void CharPoly(ZZX& f, const mat_ZZ& M);
+// f = characteristic polynomial of M
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_poly_ZZ.txt b/thirdparty/linux/ntl/doc/mat_poly_ZZ.txt new file mode 100644 index 0000000000..062edc7636 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_poly_ZZ.txt @@ -0,0 +1,21 @@ + +/**************************************************************************\ + +MODULE: mat_poly_ZZ + +SUMMARY: + +Routine for computing the characteristic polynomial of a matrix over ZZ. + + + +\**************************************************************************/ + + +#include +#include + +void CharPoly(ZZX& f, const mat_ZZ& M); +// f = characteristic polynomial of M + + diff --git a/thirdparty/linux/ntl/doc/mat_poly_ZZ_p.cpp.html b/thirdparty/linux/ntl/doc/mat_poly_ZZ_p.cpp.html new file mode 100644 index 0000000000..92cf0bb352 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_poly_ZZ_p.cpp.html @@ -0,0 +1,30 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_poly_ZZ_p.cpp.html + + + + +
+/*****************************************************************************\
+
+MODULE: mat_poly_ZZ_p
+
+SUMMARY:
+
+Routine for computing the characteristic polynomial of a matrix over ZZ_p.
+
+
+
+\*****************************************************************************/
+
+
+#include <NTL/mat_ZZ_p.h>
+#include <NTL/ZZ_pX.h>
+
+void CharPoly(ZZ_pX& f, const mat_ZZ_p& M);
+// f = characteristic polynomial of M
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_poly_ZZ_p.txt b/thirdparty/linux/ntl/doc/mat_poly_ZZ_p.txt new file mode 100644 index 0000000000..f3b79564a9 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_poly_ZZ_p.txt @@ -0,0 +1,20 @@ + +/*****************************************************************************\ + +MODULE: mat_poly_ZZ_p + +SUMMARY: + +Routine for computing the characteristic polynomial of a matrix over ZZ_p. + + + +\*****************************************************************************/ + + +#include +#include + +void CharPoly(ZZ_pX& f, const mat_ZZ_p& M); +// f = characteristic polynomial of M + diff --git a/thirdparty/linux/ntl/doc/mat_poly_lzz_p.cpp.html b/thirdparty/linux/ntl/doc/mat_poly_lzz_p.cpp.html new file mode 100644 index 0000000000..5702aafa69 --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_poly_lzz_p.cpp.html @@ -0,0 +1,31 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/mat_poly_lzz_p.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: mat_poly_zz_p
+
+SUMMARY:
+
+Routine for computing the characteristic polynomial of a matrix over zz_p.
+
+
+
+\**************************************************************************/
+
+
+#include "mat_zz_p.h"
+#include "zz_pX.h"
+
+void CharPoly(zz_pX& f, const mat_zz_p& M);
+// f = characteristic polynomial of M
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/mat_poly_lzz_p.txt b/thirdparty/linux/ntl/doc/mat_poly_lzz_p.txt new file mode 100644 index 0000000000..e1323255aa --- /dev/null +++ b/thirdparty/linux/ntl/doc/mat_poly_lzz_p.txt @@ -0,0 +1,21 @@ + +/**************************************************************************\ + +MODULE: mat_poly_zz_p + +SUMMARY: + +Routine for computing the characteristic polynomial of a matrix over zz_p. + + + +\**************************************************************************/ + + +#include "mat_zz_p.h" +#include "zz_pX.h" + +void CharPoly(zz_pX& f, const mat_zz_p& M); +// f = characteristic polynomial of M + + diff --git a/thirdparty/linux/ntl/doc/matrix.cpp.html b/thirdparty/linux/ntl/doc/matrix.cpp.html new file mode 100644 index 0000000000..45c0b9ea0f --- /dev/null +++ b/thirdparty/linux/ntl/doc/matrix.cpp.html @@ -0,0 +1,195 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/matrix.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: matrix
+
+SUMMARY:
+
+Matrix templates.
+
+The declaration
+
+   Mat<T> M;
+
+creates a 0 x 0 matrix.  
+
+We can make it have 10 rows and 20 columns like this:
+
+   M.SetDims(10, 20);
+
+A row can be accessed as M[i], indexing from 0, or as M(i), indexing from 1.
+A matrix entry can be accessed as M[i][j], indexing from 0, or as
+M(i, j), indexing from 1.
+
+A matrix is represented as a Vec< Vec<T> >: a vector of rows, where
+each row is a Vec<T>.  Any attempt to resize one of the rows so
+as to create a non-rectangular matrix will result in a run-time
+error.
+
+The dimensions of an existing matrix may be changed.  If the number of
+columns does not change, then the matrix is just "resized" like a vector,
+and no information is lost.  Otherwise, if the number of columns changes,
+the matrix is completely destroyed, and a new matrix is created
+
+
+\**************************************************************************/
+
+
+// EXCEPTIONS: all functions below do not throw any exceptions,
+//   except as noted
+
+template<class T>
+class Mat {
+
+   typedef typename Vec<T>::value_type value_type;
+   typedef typename Vec<T>::reference reference;
+   typedef typename Vec<T>::const_reference const_reference;
+
+
+   Mat(); // initially 0 x 0
+
+   Mat(const Mat<T>& a);
+   // copy constructor
+
+   // EXCEPTIONS: may throw
+
+
+   Mat& operator=(const Mat<T>& a);
+   // assignment
+
+   // EXCEPTIONS: may throw, weak ES (but dimensions of LHS
+   //   will be either that of old LHS or RHS)
+
+   ~Mat();
+   // destructor
+
+   Mat(INIT_SIZE_TYPE, long n, long m);
+   // Mat(INIT_SIZE, n, m) initializes an n x m matrix, invoking
+   // the default constructor for T to initialize entries.
+
+   // EXCEPTIONS: may throw
+
+   void SetDims(long n, long m);
+   // M.SetDims(n, m) makes M have dimension n x m.  If the number of
+   // columns (m) changes, previous storage is freed, and space for M
+   // is reallocated and initialized; otherwise, more rows are
+   // allocated as necessary (when number of rows increases),
+   // excess rows are retained (when number of rows decreases),
+   // and--importantly--the contents do not change.
+
+   // EXCEPTIONS: strong ES (although underlying vector representation
+   //    may be reallocated)
+
+   void kill(); free storage and make 0 x 0
+
+   long NumRows() const;
+   // M.NumRows() returns the number of rows of M
+
+   long NumCols() const;
+   // M.NumCols() returns the number of columns of M
+
+   Vec<T>& operator[](long i);
+   const Vec<T>& operator[](long i) const;
+   // access row i, initial index 0.  
+   // Even if one has read/write access to a row, any attempt
+   // to change its length will raise an error.
+
+   // EXCEPTIONS: may throw if range checking is turned on
+
+   Vec<T>& operator()(long i);
+   const Vec<T>& operator()(long i) const;
+   // access row i, initial index 1.
+   // Even if one has read/write access to a row, any attempt
+   // to change its length will raise an error.
+   // of this row will raise an error.
+
+   // EXCEPTIONS: may throw if range checking is turned on
+
+   reference operator()(long i, long j);
+   const_reference operator()(long i, long j) const;
+   // access element (i, j), both indices starting at 1
+
+   // EXCEPTIONS: may throw if range checking is turned on
+
+   const_reference get(long i, long j) const;
+   // access element (i, j), both indices starting at 0
+
+   // EXCEPTIONS: may throw if range checking is turned on
+
+   void put(long i, long j, const T& a);
+   // same as M[i].put(j, a)
+
+   template <class U>
+   void put(long i, long j, const U& a);
+   // same as M[i].put(j, a)
+
+   long position(const Vec<T>& a) const;
+   // returns index of a in matrix, or -1 if not present;
+   // equivalent to rep(*this).position(a).
+
+   long position1(const Vec<T>& a) const;
+   // returns index of a in matrix, or -1 if not present;
+   // equivalent to rep(*this).position1(a).
+
+   void swap(Mat<T>& other);
+   // quick swap *this and other
+
+};
+
+template<class T>
+const Vec< Vec<T> >& rep(const Mat<T>& a);
+// read-only access to underlying representation
+
+template<class T>
+void swap(Mat<T>& X, Mat<T>& Y);
+// quick swap of X and Y
+
+template<class T>
+void MakeMatrix(Mat<T>& x, const vec_vec_T& a);
+// copies a to x, checking that it is "rectangular"
+
+// EXCEPTIONS: may thow, weak ES (but dimensions of x either
+//    remain unchanged or are set to the new dimensions implied by a)
+
+/**************************************************************************\
+
+                            Input/Output
+
+\**************************************************************************/
+
+
+template<class T>
+istream& operator>>(istream&, Mat<T>&);
+
+// EXCEPTIONS: may throw, weak ES
+
+template<class T>
+ostream& operator<<(ostream&, const Mat<T>&);
+
+// EXCEPTIONS: may throw, weak ES
+
+
+/**************************************************************************\
+
+                              Equality Testing
+
+
+\**************************************************************************/
+
+
+template<class T>
+long operator==(const Mat<T>& a, const Mat<T>& b);
+
+template<class T>
+long operator!=(const Mat<T>& a, const Mat<T>& b);
+
+
+ diff --git a/thirdparty/linux/ntl/doc/matrix.txt b/thirdparty/linux/ntl/doc/matrix.txt new file mode 100644 index 0000000000..7ff111c902 --- /dev/null +++ b/thirdparty/linux/ntl/doc/matrix.txt @@ -0,0 +1,185 @@ + +/**************************************************************************\ + +MODULE: matrix + +SUMMARY: + +Matrix templates. + +The declaration + + Mat M; + +creates a 0 x 0 matrix. + +We can make it have 10 rows and 20 columns like this: + + M.SetDims(10, 20); + +A row can be accessed as M[i], indexing from 0, or as M(i), indexing from 1. +A matrix entry can be accessed as M[i][j], indexing from 0, or as +M(i, j), indexing from 1. + +A matrix is represented as a Vec< Vec >: a vector of rows, where +each row is a Vec. Any attempt to resize one of the rows so +as to create a non-rectangular matrix will result in a run-time +error. + +The dimensions of an existing matrix may be changed. If the number of +columns does not change, then the matrix is just "resized" like a vector, +and no information is lost. Otherwise, if the number of columns changes, +the matrix is completely destroyed, and a new matrix is created + + +\**************************************************************************/ + + +// EXCEPTIONS: all functions below do not throw any exceptions, +// except as noted + +template +class Mat { + + typedef typename Vec::value_type value_type; + typedef typename Vec::reference reference; + typedef typename Vec::const_reference const_reference; + + + Mat(); // initially 0 x 0 + + Mat(const Mat& a); + // copy constructor + + // EXCEPTIONS: may throw + + + Mat& operator=(const Mat& a); + // assignment + + // EXCEPTIONS: may throw, weak ES (but dimensions of LHS + // will be either that of old LHS or RHS) + + ~Mat(); + // destructor + + Mat(INIT_SIZE_TYPE, long n, long m); + // Mat(INIT_SIZE, n, m) initializes an n x m matrix, invoking + // the default constructor for T to initialize entries. + + // EXCEPTIONS: may throw + + void SetDims(long n, long m); + // M.SetDims(n, m) makes M have dimension n x m. If the number of + // columns (m) changes, previous storage is freed, and space for M + // is reallocated and initialized; otherwise, more rows are + // allocated as necessary (when number of rows increases), + // excess rows are retained (when number of rows decreases), + // and--importantly--the contents do not change. + + // EXCEPTIONS: strong ES (although underlying vector representation + // may be reallocated) + + void kill(); free storage and make 0 x 0 + + long NumRows() const; + // M.NumRows() returns the number of rows of M + + long NumCols() const; + // M.NumCols() returns the number of columns of M + + Vec& operator[](long i); + const Vec& operator[](long i) const; + // access row i, initial index 0. + // Even if one has read/write access to a row, any attempt + // to change its length will raise an error. + + // EXCEPTIONS: may throw if range checking is turned on + + Vec& operator()(long i); + const Vec& operator()(long i) const; + // access row i, initial index 1. + // Even if one has read/write access to a row, any attempt + // to change its length will raise an error. + // of this row will raise an error. + + // EXCEPTIONS: may throw if range checking is turned on + + reference operator()(long i, long j); + const_reference operator()(long i, long j) const; + // access element (i, j), both indices starting at 1 + + // EXCEPTIONS: may throw if range checking is turned on + + const_reference get(long i, long j) const; + // access element (i, j), both indices starting at 0 + + // EXCEPTIONS: may throw if range checking is turned on + + void put(long i, long j, const T& a); + // same as M[i].put(j, a) + + template + void put(long i, long j, const U& a); + // same as M[i].put(j, a) + + long position(const Vec& a) const; + // returns index of a in matrix, or -1 if not present; + // equivalent to rep(*this).position(a). + + long position1(const Vec& a) const; + // returns index of a in matrix, or -1 if not present; + // equivalent to rep(*this).position1(a). + + void swap(Mat& other); + // quick swap *this and other + +}; + +template +const Vec< Vec >& rep(const Mat& a); +// read-only access to underlying representation + +template +void swap(Mat& X, Mat& Y); +// quick swap of X and Y + +template +void MakeMatrix(Mat& x, const vec_vec_T& a); +// copies a to x, checking that it is "rectangular" + +// EXCEPTIONS: may thow, weak ES (but dimensions of x either +// remain unchanged or are set to the new dimensions implied by a) + +/**************************************************************************\ + + Input/Output + +\**************************************************************************/ + + +template +istream& operator>>(istream&, Mat&); + +// EXCEPTIONS: may throw, weak ES + +template +ostream& operator<<(ostream&, const Mat&); + +// EXCEPTIONS: may throw, weak ES + + +/**************************************************************************\ + + Equality Testing + + +\**************************************************************************/ + + +template +long operator==(const Mat& a, const Mat& b); + +template +long operator!=(const Mat& a, const Mat& b); + diff --git a/thirdparty/linux/ntl/doc/names.txt b/thirdparty/linux/ntl/doc/names.txt new file mode 100644 index 0000000000..f4984674ef --- /dev/null +++ b/thirdparty/linux/ntl/doc/names.txt @@ -0,0 +1,106 @@ + +Here is a list of the macro names that have changed. +As you can see, most of these are anyway undocumented, +and you probably never knew they existed. +Also changed, but not listed here, are the macros used +to prevent double inclusion of ".h" files. + +Also, the identifiers like INIT_VAL, INIT_SIZE, INIT_FFT +are no longer macros, but are defined to be constant objects +of particular classes. Their names do not change. + +ZZ_ARITH_RIGHT_SHIFT -> NTL_ARITH_RIGHT_SHIFT +ZZ_BITS_PER_INT -> NTL_BITS_PER_INT +ZZ_BITS_PER_LONG -> NTL_BITS_PER_LONG +ZZ_DOUBLES_LOW_HIGH -> NTL_DOUBLES_LOW_HIGH +ZZ_DOUBLE_PRECISION -> NTL_DOUBLE_PRECISION +ZZ_EXT_DOUBLE -> NTL_EXT_DOUBLE +ZZ_FDOUBLE_PRECISION -> NTL_FDOUBLE_PRECISION +ZZ_FRADIX -> NTL_FRADIX +ZZ_FRADIX_INV -> NTL_FRADIX_INV +ZZ_FetchHiLo -> NTL_FetchHiLo +ZZ_FetchLo -> NTL_FetchLo +ZZ_HI_WD -> NTL_HI_WD +ZZ_LO_WD -> NTL_LO_WD +ZZ_MAX_INT -> NTL_MAX_INT +ZZ_MAX_LONG -> NTL_MAX_LONG +ZZ_MIN_INT -> NTL_MIN_INT +ZZ_MIN_LONG -> NTL_MIN_LONG +ZZ_NBITS -> NTL_NBITS +ZZ_NBITSH -> NTL_NBITSH +ZZ_NBITS_MAX -> NTL_NBITS_MAX +ZZ_NTL_SINGLE_MUL_OK -> NTL_SINGLE_MUL_OK +ZZ_PRIME_BND -> NTL_PRIME_BND +ZZ_RADIX -> NTL_RADIX +ZZ_RADIXM -> NTL_RADIXM +ZZ_RADIXROOT -> NTL_RADIXROOT +ZZ_RADIXROOTM -> NTL_RADIXROOTM + +ntl_eq_matrix_decl -> NTL_eq_matrix_decl +ntl_eq_matrix_impl -> NTL_eq_matrix_impl +ntl_eq_vector_decl -> NTL_eq_vector_decl +ntl_eq_vector_impl -> NTL_eq_vector_impl + +ntl_io_matrix_decl -> NTL_io_matrix_decl +ntl_io_matrix_impl -> NTL_io_matrix_impl +ntl_io_vector_decl -> NTL_io_vector_decl +ntl_io_vector_impl -> NTL_io_vector_impl + +ntl_matrix_decl -> NTL_matrix_decl +ntl_matrix_impl -> NTL_matrix_impl + +ntl_pair_decl -> NTL_pair_decl +ntl_pair_eq_decl -> NTL_pair_eq_decl +ntl_pair_eq_impl -> NTL_pair_eq_impl +ntl_pair_impl -> NTL_pair_impl +ntl_pair_io_decl -> NTL_pair_io_decl +ntl_pair_io_impl -> NTL_pair_io_impl + +ntl_vector_decl -> NTL_vector_decl +ntl_vector_default -> NTL_vector_default +ntl_vector_impl -> NTL_vector_impl +ntl_vector_impl_plain -> NTL_vector_impl_plain + +BB_HALF_MUL_CODE -> NTL_BB_HALF_MUL_CODE +BB_MUL_CODE -> NTL_BB_MUL_CODE +BB_REV_CODE -> NTL_BB_REV_CODE +BB_SQR_CODE -> NTL_BB_SQR_CODE + +FFTFudge -> NTL_FFTFudge +FFTMaxRoot -> NTL_FFTMaxRoot +FFTMaxRootBnd -> NTL_FFTMaxRootBnd + +QUAD_FLOAT_SPLIT -> NTL_QUAD_FLOAT_SPLIT + +WV_NTL_RANGE_CHECK_CODE -> NTL_WV_RANGE_CHECK_CODE + +WordVectorExpansionRatio -> NTL_WordVectorExpansionRatio +WordVectorInputBlock -> NTL_WordVectorInputBlock +WordVectorMinAlloc -> NTL_WordVectorMinAlloc + +XD_BOUND -> NTL_XD_BOUND +XD_BOUND_INV -> NTL_XD_BOUND_INV +XD_HBOUND -> NTL_XD_HBOUND +XD_HBOUND_INV -> NTL_XD_HBOUND_INV + +ZZ_pRegister -> NTL_ZZ_pRegister + +ZZ_pX_BERMASS_CROSSOVER -> NTL_ZZ_pX_BERMASS_CROSSOVER +ZZ_pX_DIV_CROSSOVER -> NTL_ZZ_pX_DIV_CROSSOVER +ZZ_pX_FFT_CROSSOVER -> NTL_ZZ_pX_FFT_CROSSOVER +ZZ_pX_GCD_CROSSOVER -> NTL_ZZ_pX_GCD_CROSSOVER +ZZ_pX_HalfGCD_CROSSOVER -> NTL_ZZ_pX_HalfGCD_CROSSOVER +ZZ_pX_NEWTON_CROSSOVER -> NTL_ZZ_pX_NEWTON_CROSSOVER +ZZ_pX_TRACE_CROSSOVER -> NTL_ZZ_pX_TRACE_CROSSOVER + +zz_pRegister -> NTL_zz_pRegister + +zz_pX_BERMASS_CROSSOVER -> NTL_zz_pX_BERMASS_CROSSOVER +zz_pX_DIV_CROSSOVER -> NTL_zz_pX_DIV_CROSSOVER +zz_pX_GCD_CROSSOVER -> NTL_zz_pX_GCD_CROSSOVER +zz_pX_HalfGCD_CROSSOVER -> NTL_zz_pX_HalfGCD_CROSSOVER +zz_pX_MOD_CROSSOVER -> NTL_zz_pX_MOD_CROSSOVER +zz_pX_MUL_CROSSOVER -> NTL_zz_pX_MUL_CROSSOVER +zz_pX_NEWTON_CROSSOVER -> NTL_zz_pX_NEWTON_CROSSOVER +zz_pX_TRACE_CROSSOVER -> NTL_zz_pX_TRACE_CROSSOVER + diff --git a/thirdparty/linux/ntl/doc/pair.cpp.html b/thirdparty/linux/ntl/doc/pair.cpp.html new file mode 100644 index 0000000000..8e8f04a069 --- /dev/null +++ b/thirdparty/linux/ntl/doc/pair.cpp.html @@ -0,0 +1,89 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/pair.cpp.html + + + + +/**************************************************************************\
+
+MODULE: pair
+
+SUMMARY:
+
+Pair templates.
+
+The decalaration
+
+   Pair<S,T> p;
+
+creates a pair object using the default constructors for S and T.  The
+member p.a is the first component (of type S) and the member p.b is
+the second component (of type T).
+
+
+\**************************************************************************/
+
+
+
+#include <NTL/tools.h>
+
+template<class S, class T>
+class Pair {  
+public:  
+   S a;  
+   T b;  
+  
+   Pair();
+   // default constructor...invokes default constructors for S and T
+
+   Pair(const Pair<S,T>& x); // copy
+
+   Pair& operator=(const Pair<S,T>& x); // assignment
+
+   Pair(const S& x, const T& y);  // initialize with (x, y)
+
+   ~Pair();
+   // destructor...invokes destructors for S and T
+};  
+  
+template<class S, class T>
+Pair<S,T> cons(const S& x, const T& y);
+// returns Pair<S,T>(x, y)
+
+
+/**************************************************************************\
+
+                             Input/Output
+
+The I/O format for a Pair is
+
+   [a b]
+
+\**************************************************************************/
+
+
+template<class S, class T>
+istream& operator>>(istream&, Pair<S,T>&);  
+
+template<class S, class T>
+ostream& operator<<(ostream&, const Pair<S,T>&);  
+
+
+/**************************************************************************\
+
+                              Equality Testing
+
+\**************************************************************************/
+
+
+template<class S, class T>
+long operator==(const Pair<S,T>& x, const Pair<S,T>& y);
+
+template<class S, class T>
+long operator!=(const Pair<S,T>& x, const Pair<S,T>& y);
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/pair.txt b/thirdparty/linux/ntl/doc/pair.txt new file mode 100644 index 0000000000..c5b53b43b2 --- /dev/null +++ b/thirdparty/linux/ntl/doc/pair.txt @@ -0,0 +1,79 @@ +/**************************************************************************\ + +MODULE: pair + +SUMMARY: + +Pair templates. + +The decalaration + + Pair p; + +creates a pair object using the default constructors for S and T. The +member p.a is the first component (of type S) and the member p.b is +the second component (of type T). + + +\**************************************************************************/ + + + +#include + +template +class Pair { +public: + S a; + T b; + + Pair(); + // default constructor...invokes default constructors for S and T + + Pair(const Pair& x); // copy + + Pair& operator=(const Pair& x); // assignment + + Pair(const S& x, const T& y); // initialize with (x, y) + + ~Pair(); + // destructor...invokes destructors for S and T +}; + +template +Pair cons(const S& x, const T& y); +// returns Pair(x, y) + + +/**************************************************************************\ + + Input/Output + +The I/O format for a Pair is + + [a b] + +\**************************************************************************/ + + +template +istream& operator>>(istream&, Pair&); + +template +ostream& operator<<(ostream&, const Pair&); + + +/**************************************************************************\ + + Equality Testing + +\**************************************************************************/ + + +template +long operator==(const Pair& x, const Pair& y); + +template +long operator!=(const Pair& x, const Pair& y); + + diff --git a/thirdparty/linux/ntl/doc/quad_float.cpp.html b/thirdparty/linux/ntl/doc/quad_float.cpp.html new file mode 100644 index 0000000000..97245609ab --- /dev/null +++ b/thirdparty/linux/ntl/doc/quad_float.cpp.html @@ -0,0 +1,391 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/quad_float.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: quad_float
+
+SUMMARY:
+
+The class quad_float is used to represent quadruple precision numbers.
+Thus, with standard IEEE floating point, you should get the equivalent
+of about 106 bits of precision (but actually just a bit less).
+
+The interface allows you to treat quad_floats more or less as if they were
+"ordinary" floating point types.
+
+See below for more implementation details.
+
+
+\**************************************************************************/
+
+#include <NTL/ZZ.h>
+
+
+class quad_float {
+public:
+
+quad_float(); // = 0
+
+quad_float(const quad_float& a);  // copy constructor
+
+explicit quad_float(double a);  // promotion constructor
+
+
+quad_float& operator=(const quad_float& a);  // assignment operator
+quad_float& operator=(double a);
+
+~quad_float();
+
+
+static void SetOutputPrecision(long p);
+// This sets the number of decimal digits to be output.  Default is
+// 10.
+
+
+static long OutputPrecision();
+// returns current output precision.
+
+
+};
+
+
+/**************************************************************************\
+
+                             Arithmetic Operations
+
+\**************************************************************************/
+
+
+
+
+quad_float operator +(const quad_float& x, const quad_float& y);
+quad_float operator -(const quad_float& x, const quad_float& y);
+quad_float operator *(const quad_float& x, const quad_float& y);
+quad_float operator /(const quad_float& x, const quad_float& y);
+
+
+// PROMOTIONS: operators +, -, *, / promote double to quad_float
+// on (x, y).
+
+quad_float operator -(const quad_float& x);
+
+quad_float& operator += (quad_float& x, const quad_float& y);
+quad_float& operator += (quad_float& x, double y);
+
+quad_float& operator -= (quad_float& x, const quad_float& y);
+quad_float& operator -= (quad_float& x, double y);
+
+quad_float& operator *= (quad_float& x, const quad_float& y);
+quad_float& operator *= (quad_float& x, double y);
+
+quad_float& operator /= (quad_float& x, const quad_float& y);
+quad_float& operator /= (quad_float& x, double y);
+
+quad_float& operator++(quad_float& a); // prefix
+void operator++(quad_float& a, int); // postfix
+
+quad_float& operator--(quad_float& a); // prefix
+void operator--(quad_float& a, int); // postfix
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+
+long operator> (const quad_float& x, const quad_float& y);
+long operator>=(const quad_float& x, const quad_float& y);
+long operator< (const quad_float& x, const quad_float& y);
+long operator<=(const quad_float& x, const quad_float& y);
+long operator==(const quad_float& x, const quad_float& y);
+long operator!=(const quad_float& x, const quad_float& y);
+
+long sign(const quad_float& x);  // sign of x, -1, 0, +1
+long compare(const quad_float& x, const quad_float& y); // sign of x - y
+
+// PROMOTIONS: operators >, ..., != and function compare
+// promote double to quad_float on (x, y).
+
+
+/**************************************************************************\
+
+                               Input/Output
+Input Syntax:
+
+<number>: [ "-" ] <unsigned-number>
+<unsigned-number>: <dotted-number> [ <e-part> ] | <e-part>
+<dotted-number>: <digits> | <digits> "." <digits> | "." <digits> | <digits> "."
+<digits>: <digit> <digits> | <digit>
+<digit>: "0" | ... | "9"
+<e-part>: ( "E" | "e" ) [ "+" | "-" ] <digits>
+
+Examples of valid input:
+
+17 1.5 0.5 .5  5.  -.5 e10 e-10 e+10 1.5e10 .5e10 .5E10
+
+Note that the number of decimal digits of precision that are used
+for output can be set to any number p >= 1 by calling
+the routine quad_float::SetOutputPrecision(p).  
+The default value of p is 10.
+The current value of p is returned by a call to quad_float::OutputPrecision().
+
+
+
+\**************************************************************************/
+
+
+istream& operator >> (istream& s, quad_float& x);
+ostream& operator << (ostream& s, const quad_float& x);
+
+
+/**************************************************************************\
+
+                                  Miscellaneous
+
+\**************************************************************************/
+
+
+
+quad_float sqrt(const quad_float& x);
+quad_float floor(const quad_float& x);
+quad_float ceil(const quad_float& x);
+quad_float trunc(const quad_float& x);
+quad_float fabs(const quad_float& x);
+quad_float exp(const quad_float& x);
+quad_float log(const quad_float& x);
+
+
+void power(quad_float& x, const quad_float& a, long e); // x = a^e
+quad_float power(const quad_float& a, long e);
+
+void power2(quad_float& x, long e); // x = 2^e
+quad_float power2_quad_float(long e);
+
+quad_float ldexp(const quad_float& x, long e);  // return x*2^e
+
+long IsFinite(quad_float *x); // checks if x is "finite"  
+                              // pointer is used for compatability with
+                              // IsFinite(double*)
+
+
+void random(quad_float& x);
+quad_float random_quad_float();
+// generate a random quad_float x with 0 <= x <= 1
+
+
+
+
+
+/***********************************************************************\
+
+IMPLEMENTATION DETAILS
+
+A quad_float x is represented as a pair of doubles, x.hi and x.lo,
+such that the number represented by x is x.hi + x.lo, where
+
+   |x.lo| <= 0.5*ulp(x.hi),  (*)
+
+and ulp(y) means "unit in the last place of y".  
+
+For the software to work correctly, IEEE Standard Arithmetic is sufficient.  
+That includes just about every modern computer; the only exception I'm
+aware of is Intel x86 platforms running Linux (but you can still
+use this platform--see below).
+
+Also sufficient is any platform that implements arithmetic with correct
+rounding, i.e., given double floating point numbers a and b, a op b
+is computed exactly and then rounded to the nearest double.  
+The tie-breaking rule is not important.
+
+This is a rather wierd representation;  although it gives one
+essentially twice the precision of an ordinary double, it is
+not really the equivalent of quadratic precision (despite the name).
+For example, the number 1 + 2^{-200} can be represented exactly as
+a quad_float.  Also, there is no real notion of "machine precision".
+
+Note that overflow/underflow for quad_floats does not follow any particularly
+useful rules, even if the underlying floating point arithmetic is IEEE
+compliant.  Generally, when an overflow/underflow occurs, the resulting value
+is unpredicatble, although typically when overflow occurs in computing a value
+x, the result is non-finite (i.e., IsFinite(&x) == 0).  Note, however, that
+some care is taken to ensure that the ZZ to quad_float conversion routine
+produces a non-finite value upon overflow.
+
+THE INTEL x86 PROBLEM
+
+Although just about every modern processor implements the IEEE
+floating point standard, there still can be problems
+on processors that support IEEE extended double precision.
+The only processor I know of that supports this is the x86/Pentium.
+
+While extended double precision may sound like a nice thing,
+it is not.  Normal double precision has 53 bits of precision.
+Extended has 64.  On x86s, the FP registers have 53 or 64 bits
+of precision---this can be set at run-time by modifying
+the cpu "control word" (something that can be done
+only in assembly code).
+However, doubles stored in memory always have only 53 bits.
+Compilers may move values between memory and registers
+whenever they want, which can effectively change the value
+of a floating point number even though at the C/C++ level,
+nothing has happened that should have changed the value.
+Is that sick, or what?
+Actually, the new C99 standard seems to outlaw such "spontaneous"
+value changes; however, this behavior is not necessarily
+universally implemented.
+
+This is a real headache, and if one is not just a bit careful,
+the quad_float code will break.  This breaking is not at all subtle,
+and the program QuadTest will catch the problem if it exists.
+
+You should not need to worry about any of this, because NTL automatically
+detects and works around these problems as best it can, as described below.
+It shouldn't make a mistake, but if it does, you will
+catch it in the QuadTest program.
+If things don't work quite right, you might try
+setting NTL_FIX_X86 or NTL_NO_FIX_X86 flags in ntl_config.h,
+but this should not be necessary.
+
+Here are the details about how NTL fixes the problem.
+
+The first and best way is to have the default setting of the control word
+be 53 bits.  However, you are at the mercy of your platform
+(compiler, OS, run-time libraries).  Windows does this,
+and so the problem simply does not arise here, and NTL neither
+detects nor fixes the problem.  Linux, however, does not do this,
+which really sucks.  Can we talk these Linux people into changing this?
+
+The second way to fix the problem is by having NTL
+fiddle with control word itself.  If you compile NTL using a GNU compiler
+on an x86, this should happen automatically.
+On the one hand, this is not a general, portable solution,
+since it will only work if you use a GNU compiler, or at least one that
+supports GNU 'asm' syntax.  
+On the other hand, almost everybody who compiles C++ on x86/Linux
+platforms uses GNU compilers (although there are some commercial
+compilers out there that I don't know too much about).
+
+The third way to fix the problem is to 'force' all intermediate
+floating point results into memory.  This is not an 'ideal' fix,
+since it is not fully equivalent to 53-bit precision (because of
+double rounding), but it works (although to be honest, I've never seen
+a full proof of correctness in this case).
+NTL's quad_float code does this by storing intermediate results
+in local variables declared to be 'volatile'.
+This is the solution to the problem that NTL uses if it detects
+the problem and can't fix it using the GNU 'asm' hack mentioned above.
+This solution should work on any platform that faithfully
+implements 'volatile' according to the ANSI C standard.
+
+
+
+BACKGROUND INFO
+
+The code NTL uses algorithms designed by Knuth, Kahan, Dekker, and
+Linnainmaa.  The original transcription to C++ was done by Douglas
+Priest.  Enhancements and bug fixes were done by Keith Briggs
+(http://epidem13.plantsci.cam.ac.uk/~kbriggs).  The NTL version is a
+stripped down version of Briggs' code, with a couple of bug fixes and
+portability improvements.  Briggs has continued to develop his
+library;  see his web page above for the latest version and more information.
+
+Here is a brief annotated bibliography (compiled by Priest) of papers
+dealing with DP and similar techniques, arranged chronologically.
+
+
+Kahan, W., Further Remarks on Reducing Truncation Errors,
+  {\it Comm.\ ACM\/} {\bf 8} (1965), 40.
+
+M{\o}ller, O., Quasi Double Precision in Floating-Point Addition,
+  {\it BIT\/} {\bf 5} (1965), 37--50.
+
+  The two papers that first presented the idea of recovering the
+  roundoff of a sum.
+
+Dekker, T., A Floating-Point Technique for Extending the Available
+  Precision, {\it Numer.\ Math.} {\bf 18} (1971), 224--242.
+
+  The classic reference for DP algorithms for sum, product, quotient,
+  and square root.
+
+Pichat, M., Correction d'une Somme en Arithmetique \`a Virgule
+  Flottante, {\it Numer.\ Math.} {\bf 19} (1972), 400--406.
+
+  An iterative algorithm for computing a protracted sum to working
+  precision by repeatedly applying the sum-and-roundoff method.
+
+Linnainmaa, S., Analysis of Some Known Methods of Improving the Accuracy
+  of Floating-Point Sums, {\it BIT\/} {\bf 14} (1974), 167--202.
+
+  Comparison of Kahan and M{\o}ller algorithms with variations given
+  by Knuth.
+
+Bohlender, G., Floating-Point Computation of Functions with Maximum
+  Accuracy, {\it IEEE Trans.\ Comput.} {\bf C-26} (1977), 621--632.
+
+  Extended the analysis of Pichat's algorithm to compute a multi-word
+  representation of the exact sum of n working precision numbers.
+  This is the algorithm Kahan has called "distillation".
+
+Linnainmaa, S., Software for Doubled-Precision Floating-Point Computations,
+  {\it ACM Trans.\ Math.\ Soft.} {\bf 7} (1981), 272--283.
+
+  Generalized the hypotheses of Dekker and showed how to take advantage
+  of extended precision where available.
+
+Leuprecht, H., and W.~Oberaigner, Parallel Algorithms for the Rounding-Exact
+  Summation of Floating-Point Numbers, {\it Computing} {\bf 28} (1982), 89--104.
+
+  Variations of distillation appropriate for parallel and vector
+  architectures.
+
+Kahan, W., Paradoxes in Concepts of Accuracy, lecture notes from Joint
+  Seminar on Issues and Directions in Scientific Computation, Berkeley, 1989.
+
+  Gives the more accurate DP sum I've shown above, discusses some
+  examples.
+
+Priest, D., Algorithms for Arbitrary Precision Floating Point Arithmetic,
+  in P.~Kornerup and D.~Matula, Eds., {\it Proc.\ 10th Symposium on Com-
+  puter Arithmetic}, IEEE Computer Society Press, Los Alamitos, Calif., 1991.
+
+  Extends from DP to arbitrary precision; gives portable algorithms and
+  general proofs.
+
+Sorensen, D., and P.~Tang, On the Orthogonality of Eigenvectors Computed
+  by Divide-and-Conquer Techniques, {\it SIAM J.\ Num.\ Anal.} {\bf 28}
+  (1991), 1752--1775.
+
+  Uses some DP arithmetic to retain orthogonality of eigenvectors
+  computed by a parallel divide-and-conquer scheme.
+
+Priest, D., On Properties of Floating Point Arithmetics: Numerical Stability
+  and the Cost of Accurate Computations, Ph.D. dissertation, University
+  of California at Berkeley, 1992.
+
+  More examples, organizes proofs in terms of common properties of fp
+  addition/subtraction, gives other summation algorithms.
+
+Another relevant paper:
+
+X. S. Li, et al.
+Design, implementation, and testing of extended and mixed
+precision BLAS.  ACM Trans. Math. Soft., 28:152-205, 2002.
+
+
+
+\***********************************************************************/
+
+
+ diff --git a/thirdparty/linux/ntl/doc/quad_float.txt b/thirdparty/linux/ntl/doc/quad_float.txt new file mode 100644 index 0000000000..ec418008f8 --- /dev/null +++ b/thirdparty/linux/ntl/doc/quad_float.txt @@ -0,0 +1,381 @@ + + +/**************************************************************************\ + +MODULE: quad_float + +SUMMARY: + +The class quad_float is used to represent quadruple precision numbers. +Thus, with standard IEEE floating point, you should get the equivalent +of about 106 bits of precision (but actually just a bit less). + +The interface allows you to treat quad_floats more or less as if they were +"ordinary" floating point types. + +See below for more implementation details. + + +\**************************************************************************/ + +#include + + +class quad_float { +public: + +quad_float(); // = 0 + +quad_float(const quad_float& a); // copy constructor + +explicit quad_float(double a); // promotion constructor + + +quad_float& operator=(const quad_float& a); // assignment operator +quad_float& operator=(double a); + +~quad_float(); + + +static void SetOutputPrecision(long p); +// This sets the number of decimal digits to be output. Default is +// 10. + + +static long OutputPrecision(); +// returns current output precision. + + +}; + + +/**************************************************************************\ + + Arithmetic Operations + +\**************************************************************************/ + + + + +quad_float operator +(const quad_float& x, const quad_float& y); +quad_float operator -(const quad_float& x, const quad_float& y); +quad_float operator *(const quad_float& x, const quad_float& y); +quad_float operator /(const quad_float& x, const quad_float& y); + + +// PROMOTIONS: operators +, -, *, / promote double to quad_float +// on (x, y). + +quad_float operator -(const quad_float& x); + +quad_float& operator += (quad_float& x, const quad_float& y); +quad_float& operator += (quad_float& x, double y); + +quad_float& operator -= (quad_float& x, const quad_float& y); +quad_float& operator -= (quad_float& x, double y); + +quad_float& operator *= (quad_float& x, const quad_float& y); +quad_float& operator *= (quad_float& x, double y); + +quad_float& operator /= (quad_float& x, const quad_float& y); +quad_float& operator /= (quad_float& x, double y); + +quad_float& operator++(quad_float& a); // prefix +void operator++(quad_float& a, int); // postfix + +quad_float& operator--(quad_float& a); // prefix +void operator--(quad_float& a, int); // postfix + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + + +long operator> (const quad_float& x, const quad_float& y); +long operator>=(const quad_float& x, const quad_float& y); +long operator< (const quad_float& x, const quad_float& y); +long operator<=(const quad_float& x, const quad_float& y); +long operator==(const quad_float& x, const quad_float& y); +long operator!=(const quad_float& x, const quad_float& y); + +long sign(const quad_float& x); // sign of x, -1, 0, +1 +long compare(const quad_float& x, const quad_float& y); // sign of x - y + +// PROMOTIONS: operators >, ..., != and function compare +// promote double to quad_float on (x, y). + + +/**************************************************************************\ + + Input/Output +Input Syntax: + +: [ "-" ] +: [ ] | +: | "." | "." | "." +: | +: "0" | ... | "9" +: ( "E" | "e" ) [ "+" | "-" ] + +Examples of valid input: + +17 1.5 0.5 .5 5. -.5 e10 e-10 e+10 1.5e10 .5e10 .5E10 + +Note that the number of decimal digits of precision that are used +for output can be set to any number p >= 1 by calling +the routine quad_float::SetOutputPrecision(p). +The default value of p is 10. +The current value of p is returned by a call to quad_float::OutputPrecision(). + + + +\**************************************************************************/ + + +istream& operator >> (istream& s, quad_float& x); +ostream& operator << (ostream& s, const quad_float& x); + + +/**************************************************************************\ + + Miscellaneous + +\**************************************************************************/ + + + +quad_float sqrt(const quad_float& x); +quad_float floor(const quad_float& x); +quad_float ceil(const quad_float& x); +quad_float trunc(const quad_float& x); +quad_float fabs(const quad_float& x); +quad_float exp(const quad_float& x); +quad_float log(const quad_float& x); + + +void power(quad_float& x, const quad_float& a, long e); // x = a^e +quad_float power(const quad_float& a, long e); + +void power2(quad_float& x, long e); // x = 2^e +quad_float power2_quad_float(long e); + +quad_float ldexp(const quad_float& x, long e); // return x*2^e + +long IsFinite(quad_float *x); // checks if x is "finite" + // pointer is used for compatability with + // IsFinite(double*) + + +void random(quad_float& x); +quad_float random_quad_float(); +// generate a random quad_float x with 0 <= x <= 1 + + + + + +/***********************************************************************\ + +IMPLEMENTATION DETAILS + +A quad_float x is represented as a pair of doubles, x.hi and x.lo, +such that the number represented by x is x.hi + x.lo, where + + |x.lo| <= 0.5*ulp(x.hi), (*) + +and ulp(y) means "unit in the last place of y". + +For the software to work correctly, IEEE Standard Arithmetic is sufficient. +That includes just about every modern computer; the only exception I'm +aware of is Intel x86 platforms running Linux (but you can still +use this platform--see below). + +Also sufficient is any platform that implements arithmetic with correct +rounding, i.e., given double floating point numbers a and b, a op b +is computed exactly and then rounded to the nearest double. +The tie-breaking rule is not important. + +This is a rather wierd representation; although it gives one +essentially twice the precision of an ordinary double, it is +not really the equivalent of quadratic precision (despite the name). +For example, the number 1 + 2^{-200} can be represented exactly as +a quad_float. Also, there is no real notion of "machine precision". + +Note that overflow/underflow for quad_floats does not follow any particularly +useful rules, even if the underlying floating point arithmetic is IEEE +compliant. Generally, when an overflow/underflow occurs, the resulting value +is unpredicatble, although typically when overflow occurs in computing a value +x, the result is non-finite (i.e., IsFinite(&x) == 0). Note, however, that +some care is taken to ensure that the ZZ to quad_float conversion routine +produces a non-finite value upon overflow. + +THE INTEL x86 PROBLEM + +Although just about every modern processor implements the IEEE +floating point standard, there still can be problems +on processors that support IEEE extended double precision. +The only processor I know of that supports this is the x86/Pentium. + +While extended double precision may sound like a nice thing, +it is not. Normal double precision has 53 bits of precision. +Extended has 64. On x86s, the FP registers have 53 or 64 bits +of precision---this can be set at run-time by modifying +the cpu "control word" (something that can be done +only in assembly code). +However, doubles stored in memory always have only 53 bits. +Compilers may move values between memory and registers +whenever they want, which can effectively change the value +of a floating point number even though at the C/C++ level, +nothing has happened that should have changed the value. +Is that sick, or what? +Actually, the new C99 standard seems to outlaw such "spontaneous" +value changes; however, this behavior is not necessarily +universally implemented. + +This is a real headache, and if one is not just a bit careful, +the quad_float code will break. This breaking is not at all subtle, +and the program QuadTest will catch the problem if it exists. + +You should not need to worry about any of this, because NTL automatically +detects and works around these problems as best it can, as described below. +It shouldn't make a mistake, but if it does, you will +catch it in the QuadTest program. +If things don't work quite right, you might try +setting NTL_FIX_X86 or NTL_NO_FIX_X86 flags in ntl_config.h, +but this should not be necessary. + +Here are the details about how NTL fixes the problem. + +The first and best way is to have the default setting of the control word +be 53 bits. However, you are at the mercy of your platform +(compiler, OS, run-time libraries). Windows does this, +and so the problem simply does not arise here, and NTL neither +detects nor fixes the problem. Linux, however, does not do this, +which really sucks. Can we talk these Linux people into changing this? + +The second way to fix the problem is by having NTL +fiddle with control word itself. If you compile NTL using a GNU compiler +on an x86, this should happen automatically. +On the one hand, this is not a general, portable solution, +since it will only work if you use a GNU compiler, or at least one that +supports GNU 'asm' syntax. +On the other hand, almost everybody who compiles C++ on x86/Linux +platforms uses GNU compilers (although there are some commercial +compilers out there that I don't know too much about). + +The third way to fix the problem is to 'force' all intermediate +floating point results into memory. This is not an 'ideal' fix, +since it is not fully equivalent to 53-bit precision (because of +double rounding), but it works (although to be honest, I've never seen +a full proof of correctness in this case). +NTL's quad_float code does this by storing intermediate results +in local variables declared to be 'volatile'. +This is the solution to the problem that NTL uses if it detects +the problem and can't fix it using the GNU 'asm' hack mentioned above. +This solution should work on any platform that faithfully +implements 'volatile' according to the ANSI C standard. + + + +BACKGROUND INFO + +The code NTL uses algorithms designed by Knuth, Kahan, Dekker, and +Linnainmaa. The original transcription to C++ was done by Douglas +Priest. Enhancements and bug fixes were done by Keith Briggs +(http://epidem13.plantsci.cam.ac.uk/~kbriggs). The NTL version is a +stripped down version of Briggs' code, with a couple of bug fixes and +portability improvements. Briggs has continued to develop his +library; see his web page above for the latest version and more information. + +Here is a brief annotated bibliography (compiled by Priest) of papers +dealing with DP and similar techniques, arranged chronologically. + + +Kahan, W., Further Remarks on Reducing Truncation Errors, + {\it Comm.\ ACM\/} {\bf 8} (1965), 40. + +M{\o}ller, O., Quasi Double Precision in Floating-Point Addition, + {\it BIT\/} {\bf 5} (1965), 37--50. + + The two papers that first presented the idea of recovering the + roundoff of a sum. + +Dekker, T., A Floating-Point Technique for Extending the Available + Precision, {\it Numer.\ Math.} {\bf 18} (1971), 224--242. + + The classic reference for DP algorithms for sum, product, quotient, + and square root. + +Pichat, M., Correction d'une Somme en Arithmetique \`a Virgule + Flottante, {\it Numer.\ Math.} {\bf 19} (1972), 400--406. + + An iterative algorithm for computing a protracted sum to working + precision by repeatedly applying the sum-and-roundoff method. + +Linnainmaa, S., Analysis of Some Known Methods of Improving the Accuracy + of Floating-Point Sums, {\it BIT\/} {\bf 14} (1974), 167--202. + + Comparison of Kahan and M{\o}ller algorithms with variations given + by Knuth. + +Bohlender, G., Floating-Point Computation of Functions with Maximum + Accuracy, {\it IEEE Trans.\ Comput.} {\bf C-26} (1977), 621--632. + + Extended the analysis of Pichat's algorithm to compute a multi-word + representation of the exact sum of n working precision numbers. + This is the algorithm Kahan has called "distillation". + +Linnainmaa, S., Software for Doubled-Precision Floating-Point Computations, + {\it ACM Trans.\ Math.\ Soft.} {\bf 7} (1981), 272--283. + + Generalized the hypotheses of Dekker and showed how to take advantage + of extended precision where available. + +Leuprecht, H., and W.~Oberaigner, Parallel Algorithms for the Rounding-Exact + Summation of Floating-Point Numbers, {\it Computing} {\bf 28} (1982), 89--104. + + Variations of distillation appropriate for parallel and vector + architectures. + +Kahan, W., Paradoxes in Concepts of Accuracy, lecture notes from Joint + Seminar on Issues and Directions in Scientific Computation, Berkeley, 1989. + + Gives the more accurate DP sum I've shown above, discusses some + examples. + +Priest, D., Algorithms for Arbitrary Precision Floating Point Arithmetic, + in P.~Kornerup and D.~Matula, Eds., {\it Proc.\ 10th Symposium on Com- + puter Arithmetic}, IEEE Computer Society Press, Los Alamitos, Calif., 1991. + + Extends from DP to arbitrary precision; gives portable algorithms and + general proofs. + +Sorensen, D., and P.~Tang, On the Orthogonality of Eigenvectors Computed + by Divide-and-Conquer Techniques, {\it SIAM J.\ Num.\ Anal.} {\bf 28} + (1991), 1752--1775. + + Uses some DP arithmetic to retain orthogonality of eigenvectors + computed by a parallel divide-and-conquer scheme. + +Priest, D., On Properties of Floating Point Arithmetics: Numerical Stability + and the Cost of Accurate Computations, Ph.D. dissertation, University + of California at Berkeley, 1992. + + More examples, organizes proofs in terms of common properties of fp + addition/subtraction, gives other summation algorithms. + +Another relevant paper: + +X. S. Li, et al. +Design, implementation, and testing of extended and mixed +precision BLAS. ACM Trans. Math. Soft., 28:152-205, 2002. + + + +\***********************************************************************/ + diff --git a/thirdparty/linux/ntl/doc/sedscript.txt b/thirdparty/linux/ntl/doc/sedscript.txt new file mode 100644 index 0000000000..717c5c08ce --- /dev/null +++ b/thirdparty/linux/ntl/doc/sedscript.txt @@ -0,0 +1,82 @@ + +# This is a sed script to make most of the common syntactic +# changes necessary to move from NTL 2.0 to 3.0. +# If this file is in sedscript.txt (as it originally is) +# the command +# sed -f sedscript.txt < old.c > new.c +# will convert old.c to new.c with the necesary changes. +# +# Please note that this script is niether "sound" or "complete", +# but should still be useful. + +# rename some classes +s/BB/GF2X/g +s/BB_p/GF2E/g +s/GF2Vector/vec_GF2/g +s/GF2Matrix/mat_GF2/g + +# rename some functions +s/ZZ_pInit(/ZZ_p::init(/g +s/zz_pInit(/zz_p::init(/g +s/zz_pFFTInit(/zz_p::FFTInit(/ +s/GF2EInit(/GF2E::init(/g +s/LowBits/trunc/g +s/Long(/to_long(/g +s/XDouble(/to_xdouble(/g +s/Quad_float(/to_quad_float(/g +s/trace(/TraceMod(/g +s/norm(/NormMod(/g +s/MinPoly(/MinPolyMod(/g +s/IrredPoly(/IrredPolyMod(/g +s/CharPoly(/CharPolyMod(/g + +# rename generic vector, pair, matrix macro instantations +# these assume no embedded blanks +s/vector_decl(\(.*\))/ntl_vector_decl(\1,vec_\1)/g +s/vector_io_decl(\(.*\))/ntl_io_vector_decl(\1,vec_\1)/g +s/vector_eq_decl(\(.*\))/ntl_eq_vector_decl(\1,vec_\1)/g +# +s/vector_impl(\(.*\))/ntl_vector_impl(\1,vec_\1)/g +s/vector_impl_plain(\(.*\))/ntl_vector_impl_plain(\1,vec_\1)/g +s/vector_io_impl(\(.*\))/ntl_io_vector_impl(\1,vec_\1)/g +s/vector_eq_impl(\(.*\))/ntl_eq_vector_impl(\1,vec_\1)/g +# +s/matrix_decl(\(.*\))/ntl_matrix_decl(\1,vec_\1,vec_vec_\1,mat_\1)/g +s/matrix_io_decl(\(.*\))/ntl_io_matrix_decl(\1,vec_\1,vec_vec_\1,mat_\1)/g +s/matrix_eq_decl(\(.*\))/ntl_eq_matrix_decl(\1,vec_\1,vec_vec_\1,mat_\1)/g +# +s/matrix_impl(\(.*\))/ntl_matrix_impl(\1,vec_\1,vec_vec_\1,mat_\1)/g +s/matrix_io_impl(\(.*\))/ntl_io_matrix_impl(\1,vec_\1,vec_vec_\1,mat_\1)/g +s/matrix_eq_impl(\(.*\))/ntl_eq_matrix_impl(\1,vec_\1,vec_vec_\1,mat_\1)/g +# +s/pair_decl(\(.*\),\(.*\))/ntl_pair_decl(\1,\2,pair_\1_\2)/g +s/pair_io_decl(\(.*\),\(.*\))/ntl_pair_io_decl(\1,\2,pair_\1_\2)/g +s/pair_eq_decl(\(.*\),\(.*\))/ntl_pair_eq_decl(\1,\2,pair_\1_\2)/g +# +s/pair_impl(\(.*\),\(.*\))/ntl_pair_impl(\1,\2,pair_\1_\2)/g +s/pair_io_impl(\(.*\),\(.*\))/ntl_pair_io_impl(\1,\2,pair_\1_\2)/g +s/pair_eq_impl(\(.*\),\(.*\))/ntl_pair_eq_impl(\1,\2,pair_\1_\2)/g + +# rename type names for the generic types +# these allow embedded blanks +s/pair *( *\([^,() ]*\) *, *\([^() ]*\) *)/pair_\1_\2/g +s/vector *( *\([^() ]*\) *)/vec_\1/g +s/matrix *( *\([^() ]*\) *)/mat_\1/g +# +# repeat to handle one nesting level +# +s/pair *( *\([^,() ]*\) *, *\([^() ]*\) *)/pair_\1_\2/g +s/vector *( *\([^() ]*\) *)/vec_\1/g +s/matrix *( *\([^() ]*\) *)/mat_\1/g +# +# repeat to handle two nesting levels +# +s/pair *( *\([^,() ]*\) *, *\([^() ]*\) *)/pair_\1_\2/g +s/vector *( *\([^() ]*\) *)/vec_\1/g +s/matrix *( *\([^() ]*\) *)/mat_\1/g + +# rename header files for generic types +s/vector\.h/ntl_vector\.h/ +s/matrix\.h/ntl_matrix\.h/ +s/pair\.h/ntl_pair\.h/ + diff --git a/thirdparty/linux/ntl/doc/tools.cpp.html b/thirdparty/linux/ntl/doc/tools.cpp.html new file mode 100644 index 0000000000..189b4f2550 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tools.cpp.html @@ -0,0 +1,208 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/tools.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: tools
+
+SUMMARY:
+
+Some useful tools that are used throughout NTL.
+
+\**************************************************************************/
+
+#include <cstdlib>
+#include <cmath>
+#include <iostream>
+
+#include <NTL/config.h>
+#include <NTL/mach_desc.h>
+
+
+
+
+double GetTime();
+// returns number of seconds of CPU time used by this process;
+
+void PrintTime(ostream& s, double t);
+// prints the time t (in seconds) to s in the format
+//     ss  or  mm:ss  or  hh:mm:ss,
+// where the value t is first rounded to the nearest integer.
+
+
+long IsWhiteSpace(long c);
+// returns 1 if c is "white space" (as defined by isspace is the
+// standard library...usually blanks, tabs, newlines), and 0 otherwise.
+
+long SkipWhiteSpace(istream& s);
+// skips white space (as defined by IsWhiteSpace).
+// Return value is 0 if end-of-file is reached; otherwise,
+// return value is 1.
+
+
+long IsEOFChar(long c);
+// test if c == EOF
+
+
+long CharToIntVal(long c);
+// returns the hexidecimal value of c if c is '0'..'9', 'A'..'F', or 'a'..'f';
+// otherwise, the return value is -1.
+
+char IntValToChar(long x);
+// returns the hexadecimal digit '0'..'9', 'a'..'f' representing x;
+// an error is raised if x < 0 or x > 15.
+
+long IsFinite(double *p);
+// Returns 1 if *p is a "finite" floating point number.
+// A pointer is used to ensure that the number is in memory,
+// which on some architectures (notably x86/Pentium) can make a difference.
+
+// some min/max and swap routines:
+
+int min(int a, int b);
+int max(int a, int b);
+
+long min(long a, long b);
+long max(long a, long b);
+
+long min(int a, long b);
+long max(int a, long b);
+
+long min(long a, int b);
+long max(long a, int b);
+
+unsigned int min(unsigned int a, unsigned int b);
+unsigned int max(unsigned int a, unsigned int b);
+
+unsigned long min(unsigned long a, unsigned long b);
+unsigned long max(unsigned long a, unsigned long b);
+
+unsigned long min(unsigned int a, unsigned long b);
+unsigned long max(unsigned int a, unsigned long b);
+
+unsigned long min(unsigned long a, unsigned int b);
+unsigned long max(unsigned long a, unsigned int b);
+
+
+void swap(long& a, long& b);
+void swap(int& a, int& b);
+
+
+// defined here are all the conversion routines among the types
+// int, long, float, double.  See conversions.txt for complete details.
+
+
+
+// The following platform-dependent macros are defined:
+
+#define NTL_BITS_PER_LONG      (...)  /* bits in a long */
+#define NTL_MAX_LONG           (...)  /* max value of a long */
+#define NTL_MIN_LONG           (...)  /* min value of a long */
+
+#define NTL_BITS_PER_INT       (...)  /* bits in a int */
+#define NTL_MAX_INT            (...)  /* max value of a int */
+#define NTL_MIN_INT            (...)  /* min value of a int */
+
+#define NTL_DOUBLE_PRECISION   (...)  /* # of bits of precision in a double */
+#define NTL_FDOUBLE_PRECISION  (...)  /* the double value
+                                        2^{NTL_DOUBLE_PRECISION-1} */
+
+#define NTL_ARITH_RIGHT_SHIFT  (...)  /* 1 if signed right-shift is
+                                        arithmetic; 0 otherwise */
+
+#define NTL_EXT_DOUBLE         (...)  /* 1 if platform has "extended" doubles;
+                                        0 otherwise */
+
+
+// ERROR HANDLING
+
+void TerminalError(const char *s);
+// print an error message and call abort
+
+extern void (*ErrorMsgCallback)(const char *);
+extern void (*ErrorCallback)();
+// Pointers (initially NULL) to callback functions.
+// Upon encountering an unrecoverable error with an error
+// message s, the following happens:
+//
+//    if (ErrorMsgCallback)
+//       (*ErrorMsgCallback)(s);
+//    else
+//       cerr << s << "\n";
+//
+//    if (ErrorCallback) (*ErrorCallback)();
+//    abort();
+//
+// NOTE: if threads are enabled, these are actually thread_local variables.
+
+
+
+// The following classes are defined even if exceptions are not
+// enabled with NTL_EXCEPTIONS
+
+class ErrorObject : public runtime_error {
+public:
+   ErrorObject(const char *msg);
+};
+
+class LogicErrorObject : public ErrorObject {
+public:
+   LogicErrorObject(const char *msg);
+};
+
+class ArithmeticErrorObject : public ErrorObject {
+public:
+   ArithmeticErrorObject(const char *msg);
+};
+
+class ResourceErrorObject : public ErrorObject {
+public:
+   ResourceErrorObject(const char *msg);
+};
+
+class FileErrorObject : public ErrorObject {
+public:
+   FileErrorObject(const char *msg);
+};
+
+class InputErrorObject : public ErrorObject {
+public:
+   InputErrorObject(const char *msg);
+};
+
+
+// The following functions throw the indicated exception if
+// exceptions are enabled with NTL_EXCEPTIONS; otherwise,
+// they simply invoke TerminalError.
+
+void MemoryError();
+// throws bad_alloc
+
+void Error(const char *msg);
+// throws ErrorObject
+
+void LogicError(const char *msg);
+// throws LogicErrorObject
+
+void ArithmeticError(const char *msg);
+// throws ArithmeticErrorObject
+
+void ResourceError(const char *msg);
+// throws ResourceErrorObject
+
+void FileError(const char *msg);
+// throws FileErrorObject
+
+void InputError(const char *msg);
+// throws InputErrorObject
+
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/tools.txt b/thirdparty/linux/ntl/doc/tools.txt new file mode 100644 index 0000000000..06402bae52 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tools.txt @@ -0,0 +1,198 @@ + +/**************************************************************************\ + +MODULE: tools + +SUMMARY: + +Some useful tools that are used throughout NTL. + +\**************************************************************************/ + +#include +#include +#include + +#include +#include + + + + +double GetTime(); +// returns number of seconds of CPU time used by this process; + +void PrintTime(ostream& s, double t); +// prints the time t (in seconds) to s in the format +// ss or mm:ss or hh:mm:ss, +// where the value t is first rounded to the nearest integer. + + +long IsWhiteSpace(long c); +// returns 1 if c is "white space" (as defined by isspace is the +// standard library...usually blanks, tabs, newlines), and 0 otherwise. + +long SkipWhiteSpace(istream& s); +// skips white space (as defined by IsWhiteSpace). +// Return value is 0 if end-of-file is reached; otherwise, +// return value is 1. + + +long IsEOFChar(long c); +// test if c == EOF + + +long CharToIntVal(long c); +// returns the hexidecimal value of c if c is '0'..'9', 'A'..'F', or 'a'..'f'; +// otherwise, the return value is -1. + +char IntValToChar(long x); +// returns the hexadecimal digit '0'..'9', 'a'..'f' representing x; +// an error is raised if x < 0 or x > 15. + +long IsFinite(double *p); +// Returns 1 if *p is a "finite" floating point number. +// A pointer is used to ensure that the number is in memory, +// which on some architectures (notably x86/Pentium) can make a difference. + +// some min/max and swap routines: + +int min(int a, int b); +int max(int a, int b); + +long min(long a, long b); +long max(long a, long b); + +long min(int a, long b); +long max(int a, long b); + +long min(long a, int b); +long max(long a, int b); + +unsigned int min(unsigned int a, unsigned int b); +unsigned int max(unsigned int a, unsigned int b); + +unsigned long min(unsigned long a, unsigned long b); +unsigned long max(unsigned long a, unsigned long b); + +unsigned long min(unsigned int a, unsigned long b); +unsigned long max(unsigned int a, unsigned long b); + +unsigned long min(unsigned long a, unsigned int b); +unsigned long max(unsigned long a, unsigned int b); + + +void swap(long& a, long& b); +void swap(int& a, int& b); + + +// defined here are all the conversion routines among the types +// int, long, float, double. See conversions.txt for complete details. + + + +// The following platform-dependent macros are defined: + +#define NTL_BITS_PER_LONG (...) /* bits in a long */ +#define NTL_MAX_LONG (...) /* max value of a long */ +#define NTL_MIN_LONG (...) /* min value of a long */ + +#define NTL_BITS_PER_INT (...) /* bits in a int */ +#define NTL_MAX_INT (...) /* max value of a int */ +#define NTL_MIN_INT (...) /* min value of a int */ + +#define NTL_DOUBLE_PRECISION (...) /* # of bits of precision in a double */ +#define NTL_FDOUBLE_PRECISION (...) /* the double value + 2^{NTL_DOUBLE_PRECISION-1} */ + +#define NTL_ARITH_RIGHT_SHIFT (...) /* 1 if signed right-shift is + arithmetic; 0 otherwise */ + +#define NTL_EXT_DOUBLE (...) /* 1 if platform has "extended" doubles; + 0 otherwise */ + + +// ERROR HANDLING + +void TerminalError(const char *s); +// print an error message and call abort + +extern void (*ErrorMsgCallback)(const char *); +extern void (*ErrorCallback)(); +// Pointers (initially NULL) to callback functions. +// Upon encountering an unrecoverable error with an error +// message s, the following happens: +// +// if (ErrorMsgCallback) +// (*ErrorMsgCallback)(s); +// else +// cerr << s << "\n"; +// +// if (ErrorCallback) (*ErrorCallback)(); +// abort(); +// +// NOTE: if threads are enabled, these are actually thread_local variables. + + + +// The following classes are defined even if exceptions are not +// enabled with NTL_EXCEPTIONS + +class ErrorObject : public runtime_error { +public: + ErrorObject(const char *msg); +}; + +class LogicErrorObject : public ErrorObject { +public: + LogicErrorObject(const char *msg); +}; + +class ArithmeticErrorObject : public ErrorObject { +public: + ArithmeticErrorObject(const char *msg); +}; + +class ResourceErrorObject : public ErrorObject { +public: + ResourceErrorObject(const char *msg); +}; + +class FileErrorObject : public ErrorObject { +public: + FileErrorObject(const char *msg); +}; + +class InputErrorObject : public ErrorObject { +public: + InputErrorObject(const char *msg); +}; + + +// The following functions throw the indicated exception if +// exceptions are enabled with NTL_EXCEPTIONS; otherwise, +// they simply invoke TerminalError. + +void MemoryError(); +// throws bad_alloc + +void Error(const char *msg); +// throws ErrorObject + +void LogicError(const char *msg); +// throws LogicErrorObject + +void ArithmeticError(const char *msg); +// throws ArithmeticErrorObject + +void ResourceError(const char *msg); +// throws ResourceErrorObject + +void FileError(const char *msg); +// throws FileErrorObject + +void InputError(const char *msg); +// throws InputErrorObject + + + diff --git a/thirdparty/linux/ntl/doc/tour-ack.html b/thirdparty/linux/ntl/doc/tour-ack.html new file mode 100644 index 0000000000..9f9b978074 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-ack.html @@ -0,0 +1,79 @@ + + + +A Tour of NTL: Acknowledgements + + + +

+[Previous] + [Up] + [Next] +
+ + + +

+

+A Tour of NTL: Acknowledgements +

+

+ +


+ +

    + +
  • +Thanks to Arjen Lenstra +and +Keith Briggs +for letting me use their software. +Arjen Lenstra wrote LIP, a long integer package, which formed +the basis of NTL. +Keith Briggs developed a quadratic precision package. +NTL has incorporated parts of these two packages, although +what is in NTL has been extensively re-written. +Thanks also to Keith for many helpful comments and suggestions. + +
  • +Thanks to +Juergen Gerhard +for pointing out the deficiency in the NTL-1.0 ZZX arithmetic, +for contributing the Schoenhage/Strassen code to NTL 1.5, +and for helping to track down some bugs. + +
  • +Thanks to +Phong Nguyen for +putting the new LLL code (NTL 1.7) through a torture test of +lattices arising from new lattice-based cryptosystems; this +led to a number of significant improvements in the LLL code. + +
  • +Thanks to Dan Boneh +for encouraging me to improve NTL's +programming interface. + +
  • +Thanks to John Abbott, +Mark van Hoeij, +and +Paul Zimmermann +for sharing many of their ideas about +polynomial factoring over ZZ with me, which +led to a number of improvements in NTL's factorizer. +Thanks also to Paul for numerous other suggestions and improvements. + +
  • +Thanks to +Joachim von zur Gathen +and +Erich Kaltofen +for their collaboration and support over the years. + + +
+ + + + diff --git a/thirdparty/linux/ntl/doc/tour-changes.html b/thirdparty/linux/ntl/doc/tour-changes.html new file mode 100644 index 0000000000..8f4fad5824 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-changes.html @@ -0,0 +1,2932 @@ + + + +A Tour of NTL: Summary of Changes + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Summary of Changes +

+

+ + +


+

+2016.08.22: Changes between NTL 9.10.0 and 9.11.0 +

+ +
    +
  • +Improved the effectiveness of the new, faster +ZZ to zz_p conversion + +
  • +Added new routines VectorConv +for faster bulk conversion from ZZ and long +to zz_p +(see lzz_p.txt) +
      +
    • +There are some hidden interfaces which could be more generally useful, +and I may add these to the documented interface at some point. +
    + +
  • +Added new routines VectorRandomBnd +(see ZZ.txt) +and VectorRandom +(see lzz_p.txt) +for faster bulk random number generation +
      +
    • +Again, there are some hidden interfaces which could be more generally useful, +and I may add these to the documented interface at some point. +
    + + + + + +
+ +


+

+2016.06.21: Changes between NTL 9.9.1 and 9.10.0 +

+ +
    +
  • +Fixed a problem in the aligned array logic +that prevented compilation on MinGW on Windows. + +
  • +Conversions from ZZ to zz_p +are now faster, thanks to preconditioning. +Among other things, +the CRT-based ZZX multiplication code is also +a bit faster as a result. + +
  • +The BasicThreadPool class + now guarantees that + exec_range assigns the current thread + first=0, and exec_index assigns + the current thread index=0. +This makes it easy for a thread to tell whether of not +it is the current thread, which can be convienient for +some applications. + +
  • +Fine tuned the interface for SmartPtr +and UniquePtr a bit, including the ability +to attach an explicit deleter policy, which +(among other things) makes it easier to implement the +PIMPL pattern using these classes. +Unfortunately, some of these changes introduced some +minor backward incompatibilities (but I doubt anyone +will even notice). + +
  • +Introduced a new class CopiedPtr, +which has a similar interface to UniquePtr, +but which allows copy and assignment. +This class is meant to replace the OptionalVal +class, whose use is now discouraged. + +
+ + +


+

+2016.06.02: Changes between NTL 9.9.0 and 9.9.1 +

+ +
    +
  • +Fixed a bug in NTL_EXEC_INDEX (actually, in +BasicThreadPool::relaxed_exec_index) +that would cause an error to be incorrectly raised in some +situations +
  • +Fine tuned some crossover points +
+ +


+

+2016.05.30: Changes between NTL 9.8.1 and 9.9.0 +

+ +
    +
  • +Added +examples on how to use documentation on NTL's thread pools +and parallel for loops: +see here +
  • +The build procedure now puts files config_log.h +and wizard_log.h +in NTL's include directory. +These files contain comments that document what choices were +made during the build process, +including the CXXAUTOFLAGS value. +
  • +Added elts() method to UniqueArray and AlignedArray + (for compatibility with Vec class) + +
  • +Added get() and release() methods to OptionalVal + +
  • +Made constructors for PartitionInfo and BasicThreadPool +explicit + +
  • +Cleaned up some pointer issues in mat_lzz_p.c (mainly academic) + +
  • +Definition of NTL_TLS_LOCAL_INIT ensures that var names + a local reference, regardless of the implementation + +
  • +Allow p.move(q), where p is a UniquePtr<T>, +q is a UniquePtr<Y>, + and Y* converts to T*. + +
  • +Introduced PreconditionedRemainder class +for faster reduction of a ZZ modulo a fixed long. +This is intended to make Chinese Remaindering type computations faster +
      +
    • +for the time being, +this is an undocumented feature which may be modified or removed +in a future release +
    + +
  • +Introduced ll_type and related routines which perform +a restricted set of operations on a long-long-like type. +It can be implemented via inline asm, and is a cleaner +interface and sometimes faster. +On x86-64/gcc platforms, the assembly code version is +used and gives a modest speed boost. +For all other platforms (including x86-64 with clang or icc), +the assembly code is not used. +I should really dynamically enable the assembly via the performance +tuning wizard, but I don't do this yet. +To explicitly disable the assembly code, +configure with NTL_DISABLE_LL_ASM=on. +
      +
    • +for the time being, +this is an undocumented feature which may be modified or removed +in a future release +
    + +
+ + +


+

+2016.04.29: Changes between NTL 9.8.0 and 9.8.1 +

+ +
    +
  • +Fixed an annoying issue that could cause a unnecessary +ambiguities in client code when compiling with NTL_EXCEPTIONS=on +
+ + +


+

+2016.04.26: Changes between NTL 9.7.1 and 9.8.0 +

+ +
    +

  • +Thread safety for the masses! + +
      +
    • +Previous versions of NTL required full C++11 +compliance to achieve thread safety +
    • +Unfortunately, many platforms (notably, Mac OSX) +do not provide the necessary +features - in particular, they do not provide full, correct support +for "thread local storage" (TLS) +
    • +This new release (by default) will apply a "TLS hack" +that works around this limitation (at least for +gcc and gcc-compatible compilers such as clang and icc) +
        +
      • +With this "hack", it is only required that gcc's +more widely available __thread +storage specifier be implemented, rather than the less widely available +thread_local specifier (and it also makes direct use +of the pthread library) +
      • +You can explicitly disable the hack by configuring NTL +with NTL_DISABLE_TLS_HACK=on +
      +
    • +This "hack" has been successfully +tested on Linux with gcc 4.8.5 +and on Mac OSX 10.10 and 10.11 with clang +
        +
      • +It should work with any gcc 4.8.x or higher +
      • +Many thanks to Justin Walker for pushing this issue and +helping with the Mac OSX testing +
      +
    + +
  • +Fixed a "pseudo" bug in the test script: BitMatTest +in make check was reporting "failure", but this was +a bug in BitMatTest, not in NTL itself. + +

  • +Fixed a real bug in the ReleaseThreadPool +function (although NTL internally does not use this function, +so only client code that called it directly would be affected). + + +
+ +


+

+2016.04.20: Changes between NTL 9.7.0 and 9.7.1 +

+ +
    + +
  • +Extended the performance improvements in +mat_lzz_p +to include the gauss, kernel, +and image routines + +
  • +Generally improved +performance for all of the mat_lzz_p, +including an implementation of Strassen for matrix multiplication. + +
  • +Added the matrix/column vector solve routines +to all other matrix classes (for consistency). + +

    +

  • +Fixed a compile-time bug that occured on certain platforms +(mainly Windows). + +
  • +Made some of the steps in configure and make +a bit more quiet (look at .log files for outputs). + +
+ + +


+

+2016.03.12: Changes between NTL 9.6.4 and 9.7.0 +

+ +
    + + + +

    +

  • +Changes to mat_lzz_p module: +
      +
    • +Improved performance of mul, inv, solve, +and determinant routines: +
        +
      • +more cache friendly +
      • +thread boosted +
      • +for small p (up to 23 bits), exploits +AVX and FMA instructions (when available) +
      • +depending on many things, +the new code can be anywhere between +1.5x and 70x (!) times faster than the old code +(part of that speedup up can be attributed to just how +awful some of the old code was, rather than +how brilliant the new code is) +
      • +on the SandyBridge and Haswell machines I was able to test, +the new code is comparable in speed +to +FFLAS/FFPACK +
      +
    • +Added "relaxed" versions of inv, solve, and +determinant, +which also now work for prime powers, not just primes +
    • +Added a new variant of solve routine to solve A*x = b +for column vectors +
    + +

    +

  • Changes to BasicThreadPool +module: +
      +
    • +Added NTL_EXEC_RANGE and other functionality which makes writing +"parallel for loops" simple (very similar to OpenMP), +and the same source code will work regardless of whether +threads or thread boosting is enabled. + +
    • +Backward incompatibilities: +
        +
      • +NTLThreadPool is no longer directly accessible: +new access functions are provided +
      • +Got rid of method SplitProblems, and made a more general/abstract +class PartitionInfo +
      +
    + + +

    +

  • +Miscellaneous: +
      +
    • +Improved crossover points for GF2X division + +
    • +Made access to thread local variables used in NTL faster +by using GCC's __thread in place of thread_local, +wherever possible + +
    • +Improved performance of vec_long to vec_zz_p conversion + +
    • +Made AVX and FMA detection more robust, requiring LP64 + +
    • +Added InvModStatus for long's + +
    • +Bumped FILE_THRESH to 1e12 +
    +
+ +


+

+2016.01.30: Changes between NTL 9.6.3 and 9.6.4 +

+ +
    +
  • +Streamlined some of the installation scripts, +so now the "heurstic selection of compiler flags" +and the "nonstandard feature testing" procedures are more structured +so as to be easier to extend in the future -- it is beginning to +act more like a sort of "autoconf". +
  • +Fixed a couple of "buglets" in the header files. +
+ + +


+

+2016.01.26: Changes between NTL 9.6.2 and 9.6.3 +

+ +
    +
  • +Some changes to the installation procedure: +
      +
    • +For the Unix distribution, NTL_GMP_LIP is now +on by default, which means that by default, NTL will use +GMP. +
    • +By default, the configuration script will attempt a +"native'' build by passing -march=native +as a compiler flag. +Most modern compilers support this, but the configuration script will +check to make sure. +
    • +The NTL_PCLMUL flag (which enables the use of +Intel's PCLMUL instruction) is now automagically set by the +Wizard script. +
    • +The build script automatically checks for availability of Intel +AVX intrinsics, which may be used to better +optimize certain code. +
    +
  • +A new modular composition implemention for zz_pX. +This makes modular composition up to 3x faster, depending +on several factors. +See here for details. + +
  • +Improved performance for polynomial factoring over zz_pX +using CanZass, +using the improved modular composition routine (above) +and better choice of baby step / giant step parameters. +This leads to a 1.1x to 1.8x speedup, depending on several factors. + +
  • +Improved robustness of quad_float implementation: +it should now work correctly on platforms that are too +liberal in their use of FMA instructions. + + +
+ +


+

+2015.11.13: Changes between NTL 9.6.1 and 9.6.2 +

+ +
    +
  • +More small tweaks and a new configuration variable: +
    +NTL_MAXIMIZE_SP_NBITS=off
    +
    +# Allows for 62-bit single-precision moduli on 64-bit platforms.
    +# By default, such moduli are restricted to 60 bits, which
    +# usually gives *slightly* better performance across a range of
    +# of parameters.
    +
    + +
+ +


+

+2015.11.13: Changes between NTL 9.6.0 and 9.6.1 +

+ +
    +
  • +Streamlined some awkard code in g_lip_impl.h. +
  • +Made QuickTest a bit quicker. +
  • +Fixed some documentation/packaging problems. +
+ +


+

+2015.11.10: Changes between NTL 9.5.0 and 9.6.0 +

+ +
    +
  • +More performance tuning for ZZ_pX arithmetic. + +
  • +Added configuration variable CXXAUTOFLAGS, +which is dynamically (and heuristically) set by the configuration +script. +This way, CXXFLAGS is not modified by the script. +
+ + +


+

+2015.10.20: Changes between NTL 9.4.0 and 9.5.0 +

+ +
    +
  • +Added a new thread boosting feature. +With this feature, certain code within NTL will use available +threads to speed up certain computations on a multicore +machine. +This feature is enabled by setting NTL_THREAD_BOOST=on +during configuration. +See BasicThreadPool.txt +for more information. + +

    +This feature is a work in progress. +Currently, basic ZZ_pX arithmetic has been thread boosted. +More code will be boosted later. + +

  • +A bit more perfomance tuning for ZZ_pX arithmetic, +and better crossovers for ZZX multiplcation. + +
+ +


+

+2015.9.22: Changes between NTL 9.3.0 and 9.4.0 +

+ +
    +
  • +Performance tuning: ZZ_pX and zz_pX keep +getting faster + +
  • +Upgrade to pseudo-random number generation: +I replaced the underlying PRG with Chacha20 (replacing RC4) +and the underlying key-derivation function with a function +based on HMAC-SHA256 (replacing an MD5-based function). +The new routines are faster and more secure. +

    +I also expanded the PRG interface a bit: +see here for details. + +

  • +Bug fixes: fixed a (mostly dormant) bug in the ZZFromBytes +routine (triggered only when n==0). + +
  • +Added documentation for classes RRPush and +RROutputPush: +see here for details. + +
+ + + + +


+

+2015.7.9: Changes between NTL 9.2.0 and 9.3.0 +

+ +
    +
  • +Fixed a compilation error that arose with NTL_LEGACY_SP_MULMOD=on. + +
  • +Added a new call back routine ErrorMsgCallback. +See tools.txt. +This is mainly to help with NTL integration withing SAGE. + +
+ + +


+

+2015.5.23: Changes between NTL 9.1.1 and 9.2.0 +

+ +
    +
  • +Completed the transition away from floating-point arithmetic +for the implementation of single-precision modular arithmetic. +The current implementation should allow 60-bit moduli on +64-bit platforms that support a 128-bit extended integer +type (this is the case for current gcc, clang, and icc +compilers). + +

    +One can still revert to the "classical" (pre-9.0) implementation +using double-precision arithmetic (which imposes a 50-bit limit), +or to the "long double" implementation introduced in v9.0 (60-bit limit). + +

    +Note that one must compile NTL with GMP to get any of these improvements. +It would have perhaps been better to use GMP's longlong.h +facility instead of relying on compiler support for extended +integer types. +However, at the moment, it is a bit inconvenient to use longlong.h +as a freestanding header file. +This might change in the future. + +

    +For details, see here, +including the comments entitled "Compatibility notes". + +

    +Programming notes: MulMod(a, b, n) is equivalent to +mulmod_t ninv = PrepMulMod(n); MulMod(a, b, n, ninv). +Compared to the older, floating-point implementation, the +relative cost of computing ninv is higher in the new regime. +In a loop where n is invariant, the compiler should +"hoist" the computation of ninv, so it is only done once. +However, it is usually better to precompute and store ninv, +and use the second form of MulMod, with ninv passed +as a parameter (NTL does this internally quite consistently). +The performance of MulMod(a, b, n, ninv) is somewhat faster +in the new implementation. +Where possible, one should use MulModPrecon, which is faster still +(useful in situations where both n and b are invariant). + + + +

    +

  • +A number of general performance improvements. + + + +
+ +


+

+2015.5.16: +Changes between NTL 9.1.0 and 9.1.1 +

+ +
    +
  • +Fixed a bug introduced in 9.1.0 that prevented conversions +between Vec<GF2> and Vec<T>. +
+ +


+

+2015.5.2: +Changes between NTL 9.0.2 and 9.1.0 +

+ +
    +
  • +Added a new configuration switch to enable +the PCLMUL instruction on x86 machines. +This can speed up GF2X arithmetic significantly +(by a factor of 4). +This is enabled by configuring with NTL_PCLMUL=on +(and the configuration +script automatically checks if it actually works on your platform). +

    +Note that this is an alternative to building NTL against the gf2x +library (the latter is currently not thread or exception safe). +

    + + +

  • +Performance improvements to zz_pX and Vec<zz_p>. +

    + +

  • +Performance improvements to ZZX: +implemented asymptotically fast CRT code +for HomMul and more cache-friendly logic. +This routine is used for polynomials whose degree is +significantly larger than the bit-length of its coefficients. +This should make NTL's ZZX multiplication faster +across a broader range of parameters, +and at least be within a (hopefully not-too-large) constant factor +of optimal. +

    + +

  • +Some internal cleaning on the small-prime FFT code. +I've made David Harvey's lazy butterfly routine without +precomputed tables more competitive with the large-table +variant, +so now that large tables +are used for a smaller range of parameters (this should reduce +the overall memory footprint). +

    + +

  • +Laid the groundwork for some future changes; +namely, to allow 60-bit +modular arithmetic without relying on the esoteric x87 fmul +instruction. +This should be coming soon (probably v9.2). +

    + + +

+ + + +


+

+2015.3.29: +Changes between NTL 9.0.1 and 9.0.2 +

+ +
    +
  • +Made a small change to single-precison MulMod +that enables slightly better compiler optimizations +(compiler can "hoist" the computation of 1/n +out of a loop, so the variant with extra mulmod_t +arg becomes somewhat less essential). +
+ + + + +


+

+2015.3.27: +Changes between NTL 9.0.0 and 9.0.1 +

+ +
    +
  • +Fixed a small bug that prevented compilation a certain platforms. +
+ + +


+

+2015.3.27: +Changes between NTL 8.1.2 and 9.0.0 +

+ +
    +
  • +With much trepidation, I have introduced a (hopefully minor) +backward incompatibility into NTL. + +The interface to the single-precision modular arithmetic +routines has been modified slightly. +This interface change allows for more flexible and more +efficient implementation of these routines, +which play a crucial role at many levels in NTL. +

    +Basically, these changes to the interface abstract away +some implementation details that arguably should never been there +in the first place. +By coding to the new interface, NTL clients will be able to +benefit from +the current and future improvements. +

    +In particular, on 64-bit x86/GCC platforms, single precision +moduli can now be up to 60 bits, rather than 50 bits. +While some operations may in fact be a little slower, the most important +ones (like MulModPrecon) should not be. +Using larger moduli speeds up a number of things, like ZZ_pX +arithmetic, as fewer primes need to be used in Chinese Remaindering steps. +Other applications benefit from larger moduli as well. + +

    +It is expected that most NTL clients will not be affected at all. +Moreover, any code that needs to be updated will be detected +by the compiler, and the updates should be simple and mechanical. +There is also a configuration flag that will enable the legacy +interface (although this is not recommended practice). + +

    +For details, see here, +including the comments entitled "Compatibility notes". + +

    +

  • +Other changes: +
      +
    • +Previous versions of NTL relied (at least by default) on some +undefined behavior regarding integer arithemtic +(namely, that in a few key code sequences, signed integer +overflow just "wraps around"). +All of this undefined behavior has been replaced by +(much more desirable) implementation-defined behavior +(namely, that conversion from unsigned to signed works as expected). +As in the past, the NTL_CLEAN_INT can be used to +avoid all of these issues (but with the new code, this should +truly be academic). +For details, look here. +
    • +By request, added a function xdouble exp(const xdouble& x), +which is equivalent to xexp(to_double(x)). +For details, look here. +
    +
+ + + + + + + +


+

+2015.1.31: +Changes between NTL 8.1.1 and 8.1.2 +

+ +
    +
  • +Corrected a bug that could affect the log +function in a multi-threaded execution. +
+ + + +


+

+2015.1.30: +Changes between NTL 8.1 and 8.1.1 +

+ +
    +
  • +Corrected a syntax error in SmartPtr.h, +which most compilers don't seem to complain about, but some +do. +

    +

  • +Added --tag=CXX to the some lines +in the makefile to keep libtool happy. +
+ + +


+

+2015.1.9: +Changes between NTL 8.0 and 8.1 +

+ +
    +
  • +Corrected an oversight in the matrix template class. +With this new version, one may safely copy and assign +objects of type Mat<ZZ_p> +and Mat<GF2E> out of context (i.e., +under a different or undefined modulus). +More generally, the copy constructor for Mat<T> +now relies only on the copy constructor for Vec<T> +and the assignment operator for Mat<T> +relies only on the assignment operator and copy constructor +for Vec<T>. +

    +The goal since v6.2 has been to allow all modular types (ZZ_p, etc.) +and all types derived from them (vectors, polynomials, matrices, etc.) +to be safely copy constructed and assigned out of context. +Hopefully, this goal has now been reached. + +

+ + +


+

+2014.12.24: +Changes between NTL 7.0.2 and 8.0 +

+ +
    + +

  • Exceptions! +

    This is another major milestone for NTL, and hence the big +version number bump (this will be the last of these big bumps +for a while). + +

    +Prior to this version, error handling consisted of "abort with an error message". +To enable exceptions in NTL, configure with NTL_EXCEPTIONS=on. +You will also need a C++11 compiler for this to work properly +(and if you don't enable exceptions, any old C++98 compiler will +work, as always). + +

    +With exceptions enabled, errors are reported by throwing an appropriate +exception. +Of course, this was the easy part. +The hard part was making NTL's code exception safe, +which (among other things) means that no resources (i.e., memory) +are leaked when an exception is thrown. +This required a very painful top-to-bottom scrub of the whole library. + +

    +Despite major changes to the code base and many internal +interfaces, the external (i.e., documented) interfaces remain +completely unchanged. + +

    +More details are available here. + +

  • +Improved performance of ZZ_pX arithmetic for both classic +and GMP-based long integer packages. + +

  • +Made copy constructor and assignment operators +for fftRep and FFTRep safe "out of context", +which extends to the classes zz_pXModulus and ZZ_pXModulus. + +

  • +Made mechanism for establishing "unique ID's" (used for temporary +file name generation and default pseudo-random number seeds) +more robust. + + +
+ +


+

+2014.12.15: +Changes between NTL 7.0.1 and 7.0.2 +

+ +
    +

  • +Fixed bug introduced in v7.0 affecting RR and quad_float input routines, +which would leave the RR precision variable in an incorrect state. +

  • +Fixed a bug introduced in the v6.2 that affected the append routines +for ZZ_p and GF2E, which would lead to incorrect memory allocation +(which, if triggered, should just have led to an error message and abort, rather than +incorrect results). +This bug also affected the new Vec constructor introduced in v7.0 +(and again, only for ZZ_p and GF2E). + +
+ + +


+

+2014.11.14: +Changes between NTL 7.0.0 and 7.0.1 +

+ +
    +
  • +Fixed critical bug in new bit-reverse-copy routine. +Large degree polynomial multiplication code was buggy +in v7.0. +Now it's fixed and properly tested. +
+ +


+

+2014.11.8: +Changes between NTL 6.2.1 and 7.0 +

+ +
    + +

  • Thread safety! +

    + +This is a major milestone for NTL (and hence a bump in the +major version number). +However, to actually use it, you will need a "bleeding edge" +C++ that supports C++11 concurrency features. +Most importantly, the C++11 storage class thread_local +needs to be fully and correctly implemented. +Some compilers claim to support it, but are very buggy to the point of +being useless. +All I can say is, as of right now, I have been able to +successfully build and test a multi-threaded NTL program +using GCC 4.9.2 on a Red Hat Linux distribution. +I don't think any pre-4.9.x version of GCC will work. +And unfortunatly, I don't think any compiler (GCC or CLANG) +on any current Mac will work, but I haven't been able to directly +test this. +

    +As time goes on, I expect C++ compilers will provide the +necessary support. +In the meantime, you can try it out and see if it works for you. +Configure with the NTL_THREADS flag turned on and see +what happens. +The test program ThreadTest that runs as the last step +of make check will let you know if it works. +If not, you can try building GCC 4.9.2 yourself. +It is actually not that hard! + +

    +See the portability and implementation section +for more information. +In any case, if threads don't work for you, just don't use them. +Everything still works as before using almost any compiler. + +

  • +I changed the stream input behavior to conform to wider +C++ practice (and with an eye towards am exception safe future). +Previously, if an attempt to read an NTL object failed, the +good old Error function was called, printing an error message, +and aborting your program. +Now, NTL just quietly sets the ``fail bit'' of the stream. +The second example here illustrates this. +Hopefully, this change will not cause too many problems, +but if it does, configure NTL with the NTL_LEGACY_INPUT_ERROR +flag on to get the old behavior back. + +

  • +To further simplify future development, I've dropped support +for legacy C++ standard header files. +That is, NTL always uses <iostream> +rather than <iostream.h>. +This shouldn't be a problem for anyone by now, as these +lagacy header files have been gone from standard C++ +since 1998. +Also, by default, NTL components are still wrapped in the NTL +namespace, but for backward compatibility, you can still put +them in the global namespace by configuring NTL +with the NTL_LEGACY_NO_NAMESPACE flag. + + + +

  • +Implemented a cache-friendy "bit reverse copy" routine for doing +FFT's. This is the COBRA algorithm from Cater and Gatlin, "Towards an +optimal bit-reversal permutation algorithm", FOCS 1998. +This does seem to help a bit. +Getting rid of "bit reverse copy" would be even better, +but this would take more work and break a number of interfaces. + +

  • +Made some minor improvements to ZZX multiplication routines +to get better locality of reference. +Improvement is nominal. + +

  • +Fixed a small issue in the left-shift ZZ routine: +it was allocating one word more than necessary in some cases. + + +

  • +Added new Vec constructor, so + + +

    + +   T a;
    +   Vec<T> v(INIT_SIZE, n, a);
    +
    +

    + +is equivalent to + + +

    + +   T a;
    +   Vec<T> v;
    +   v.SetLength(n, a);
    +
    +

    + +In both cases, the copy constructor for T +is used. + +

  • +I've added some more documentation about what I plan on +doing with NTL in the future, as well as a "wish list" +of what I hope others might contribute. +See the roadmap section for +more details. +
+ + +


+

+2014.8.26: Changes between NTL 6.2 and 6.2.1 +

+ +
    +
  • +Fixed syntax problem in NTL/vector.h +
+ +


+

+2014.8.21: Changes between NTL 6.1 and 6.2 +

+ +
    + +

    +

  • +I added explicit constructors corresponding to promotions. +For example: + + +

    + +   ZZ w = ZZ(1); // legal
    +   ZZ w(1);      // legal
    +   ZZ w{1};      // legal in C++11
    +   ZZ w = 1;     // not legal
    +
    +

    + + +

    +Also added new names for the "monomial constructors", e.g., +ZZX(INIT_MONO, i, c) is now preferred to ZZX(i, c), +although the old constructors are still there. +There are also new constructors like ZZX(INIT_MONO, i) +for making monic monomials. + +

    +

  • +An subtle but important change is that now objects from +classes that represent residue class rings with a +dynamically installed modulus, i.e., +
    +   ZZ_p, zz_p, ZZ_pE, lzz_pE, GF2E,
    +
    +may now be used a bit more flexibly. + +

    +It is critical that such objects created under one modulus are not used in +any non-trivial way "out of context", i.e., under a different (or undefined) +modulus. However, for ease-of-use, some operations may be safely +performed out of context. These safe operations now include: the default and copy +constructor, the destructor, and the assignment operator. In addition it is +generally safe to read any object out of context (i.e., printing it out, or +fetching its underlying representive using the rep() function). +(In the past, it was generally unsafe to use the the default and copy constructors +out of context, which also prevented vectors and polynomials +of such objects from being copied out of context.) + +

    +The implementations of Vec<ZZ_p> and Vec<GF2E> +are still specialized to manage memory more +efficiently than in the default implementation of Vec<T>. +Contiguous elements in such an array are allocated in a contiguous region of +memory. This reduces the number of calls to the memory allocator, and +leads to greater locality of reference. +A consequence of +this implementation is that any calls to SetLength on such a vector will +need to use information about the current modulus, and so such calls should +only be done "in context". That said, it is still safe to construct a +such a vector using the default or copy contructor, and to assign or append one +to another "out of context". + +

    +

  • +For the classes ZZ_p, ZZ_pE, zz_pE, +and GF2E, added explicit "allocation" and "no allocation" contructors +(invoked with INIT_ALLOC and INIT_NO_ALLOC) and special member function +allocate(). This allows one to explicitly determine exactly when +space for such objects is allocated. +By default, no space is allocated (this is different from prior versions of NTL), +except for ZZ_p's that are a part of a Vec<ZZ_p> +and GF2E's that are a part of a Vec<GF2E> + + +

    +

  • +Added new classes ZZ_pPush, ZZ_pEPush, +zz_pPush, zz_pEPush, GF2EPush. +These allow one to conveniently backup and optionally install +a new modulus in one step: + + +

    + +   { ZZ_pPush push(p); ... }
    +
    +

    + + +will save the current modulus and install p as the +new modulus; when the destructor for push is invoked, +the old modulus will be re-installed. + + +

    +

  • +Made the one-arg constructors for all the various "context" classes +(e.g., ZZ_pContext) explicit. + + +

    +

  • +As a general aid to generic programming, I've added a +bunch of typedef's using consistent naming conventions +to all of the main arithmetic classes. +E.g., ZZ_p::poly_type is a typedef for ZZ_pX. +There are a whole bunch of these. +See the documentation for the individual classes for details. + +

    +

  • +Got rid of a few esoteric compilation modes: +
      +
    • +All files are now C++ files, and should be compiled +using a C++ compiler. In older versions, some files +could be compiled either as C or C++. + +
    • +The flag NTL_GMP_HACK is no longer supported. +GMP may still be used using the NTL_GMP_LIP flag, +which is still highly recommended for high-performance applcations. + +
    • +The flags NTL_SINGLE_MUL and NTL_FAST_INT_MUL +are no longer recognized. +These were really outdated and esoteric. +
    + +

    +

  • +I have started working towards making NTL thread safe. +It is not as difficult as I thought it would be, but it is still +a work in progress. +So far I have identified all global variables, and either got +rid of them, or tagged them as "thread local". +So, although there are still some global variables, they will +all eventually be "thread local". +In particular, things like the current ZZ_p modulus +will be a thread-local global variable. + + +

    +There are a few remaining trouble spots I've tagged: +these mostly involve lazy initialization of tables; +I have a plan for making this code thread safe using +nearly lock-free coding techniques. + +

    +I will hopefully get this done within the next 6-12 months. +One thing that is slowing me down is the lack of availibility +of C++11 features that I need to do some of this, +but it will come. + +

    +The main reason for getting rid of the esoteric compilation modes +mentioned above is to make it easier to do this thread-safety work. + + +

+ +


+

+2014.03.13: Changes between NTL 6.0 and 6.1 +

+ +
    +
  • +Added support for "user defined" FFT primes for zz_p. +See the functions + + +

    + +   static void zz_p::UserFFTInit(long p);
    +   zz_pContext::zz_pContext(INIT_USER_FFT_TYPE, long p);
    +
    +

    + + +in the lzz_p module. + +

+ +


+

+2013.02.15: Changes between NTL 5.5.2 and 6.0 +

+ +
    +
  • +Replaced the old template-like macros for vectors, matrices, +and pairs with true template classes: Vec<T>, +Mat<T>, and Pair<S,T>. + +

    +For backwards compatibilty, all the names that were used +in previous versions (e.g., vec_ZZ_p, mat_ZZ_p) +have been replaced with appropriate typedefs. + +

    +For many years, I resisted the temptation of using templates, +because compiler support was very inconsistent. +But that no longer seems to be the case. + +

    +This change, while rather sweeping, should create very few, +if any, incompatibilities with existing software. +The biggest issue would be for software that uses the +old template-like macros: such macro invocations can simply be +replaced with appropriate typedefs. + +

    +

  • +Made the conversion interface more complete and uniform. +Also, using template notation, one can and should now write +conv<ZZ>(a) instead of to_ZZ(a) +(for backward compatibility, all the old names to_XXX +are still there, but many new conversions are not available +under these old names). + + +

    +There are many new conversions provided. +Moreover, whenever there is a conversion from a ring R +to a ring S, there is a corresponding, coefficiet-wise +conversion from the polynomial ring R[X] to the +polynomial ring S[X]. + +

    +In addition, using the template mechanism, there are +generic conversions for vectors and matrices. +For example, if there is a conversion from S to T, +then there is automatically a corresponding component-wise +conversion from Vec<S> to Vec<T>. + + +

    +

  • +Introduced a more general mechanism for accessing GF2's +in packed structures via indexing (see the class +ref_GF2 in the GF2 module). + +

    +

  • +Employed ideas from David Harvey to make the single-precision +FFT faster (about twice as fast in many cases). +This speeds up many higher-level operations. +See: Faster arithmetic for number-theoretic transforms. +J. Symb. Comp. 60 (2014) 113-119. + + +

    +

  • +Fixed all known bugs. + + +
+ + + +


+

+2009.08.14: Changes between NTL 5.5.1 and 5.5.2 +

+ +
    +
  • +New routines MulAddTo and MulSubFrom +for computing x += a*b and x -= a*b, +where x and a are ZZ's and +b is a ZZ or a long. +In the case where b is a long, +this may be much faster than writing +mul(t, a, b); add(x, x, t). +See ZZ.txt for details. + +These new routines are used in a number of places in +NTL to get faster algorithms (for example, the LLL routine). + +
  • +Fixed a relatively benign indexing bug in GF2EX +discovered by Berend-Benjamin Tams using the valgrind tool. + + + + +
+ +


+

+2009.05.05: Changes between NTL 5.5 and 5.5.1 +

+ +
    +
  • If using GMP (via either NTL_GMP_LIP +or NTL_GMP_HACK), then the new version (4.3.0) of +GMP implements the XGCD functionality differently, +so that the coefficients do not always agree with those returned by +the classical extended Euclidean algorithm. +This version of NTL corrects the coefficients, so that the +"classical" coefficients are always produced, regardless +of GMP's implementation. +This version of NTL also works +around a bug in GMP 4.3.0's XGCD code +(although that bug should be fixed in GMP 4.3.1). + +
  • +The configure script has been slightly modified: +there is a new configuration variable DEF_PREFIX, +whose value can be used to set PREFIX, GMP_PREFIX, +and GF2X_PREFIX in one stroke. +Also, the (somewhat esoteric) configure variables +GMP_LIBDIR, GMP_INCDIR, +GF2X_LIBDIR, and GF2X_INCDIR +have slightly different meanings now. + +
+ + +


+

+2009.04.08: Changes between NTL 5.4.2 and 5.5 +

+ +
    +
  • +Added the ability to generate a shared library +(with help from Tim Abbott). Details. + +
  • +Fixed some standardization issues +(with help from Tim Abbot): +default location of installed documentation files now conforms +to standards; use of EOF now conforms to standards. + +
  • +Added a callback mechanism to NTL's error reporting function. +See ErrorCallback in tools.txt. + +
  • +Added support for the gf2x library for speeding up +arithmetic in GF2X (with help from Emmanuel Thomé). +Details. + +
  • +In conjuction with the above, I also changed the +GF2X so that it works better with very large polynomials: +large blocks of memory are released, recursive HalfGCD algorithms +are used for large polynomials. + + +
  • +Fixed a bug in void TraceMod(zz_p& x, const zz_pX& a, const zz_pXModulus& F) (reported by Luca De Feo). + +
  • +Fixed a performance issue in various versions of SetCoeff +(reported by Luca De Feo). + +
  • +Fixed the declaration of mat_zz_p transpose(const mat_zz_p& a) +(reported by Benoit Lacelle). +
+ + +


+

+2008.03.05: Changes between NTL 5.4.1 and 5.4.2 +

+ +
    +
  • +Fixed a bug in the sub(ZZ_pEX, ZZ_pE, ZZ_pEX) +and sub(zz_pEX, zz_pE, zz_pEX) routines (reported by Charanjit Jutla). +Under certain circumstances, these could outout wrong answers. + +
+ +


+

+2007.05.09: Changes between NTL 5.4 and 5.4.1 +

+ +
    +
  • +Fixed rounding bug in expm1 (reported by Paul Zimmermann). + +
  • +Fixed memory leak in several LLL routines (reported by Friedrich Bahr). + +
  • +Fixed infinite loop in several LLL routines +(this only occurred on machines, like x86, with double rounding). + +
  • +Improved GF2X timing tests (suggested by Paul Zimmermann). + +
+ +


+

+2005.03.24: Changes between NTL 5.3.2 and 5.4 +

+ +
    +
  • +By default, NTL now compiles in ISO mode (using namespaces, etc.). +You can always revert to traditional mode by unsetting +the flag NTL_STD_CXX +(either pass NTL_STD_CXX=off to the configure script, +or manually edit the config.h file). +

    + +

  • +Some bug fixes: + +
      +
    • +The sqrt and log1p routines +for the RR class would produce incorrectly rounded +results in certain circumstances (although this only affected the relative +error of the result very marginally). +
    • +The SqrRootPrec routine for the RR class +could not be called, because it was defined incorrectly. +
    + +

    + +Thanks to Paul Zimmermann for finding (and fixing) these bugs! +Paul has also validated NTL's RR class by cross-checking it with the +MPFR library. + +

    +

  • +Some performance enhancements: + +
      +
    • +Added a new MulModPrecon inline function for +computing (a * b) % n for single precision numbers, +when b and n are fixed for several computations. +On some platforms this can be twice as fast or more than the +old MulMod2 routine. +This indirectly affects a lot of computations that are done via +homomorphic imaging (polynomial multiplication +over zz_p, ZZ_p, and ZZ, +matrix computations over zz_p and ZZ). + +
    • +Rewrote the small prime FFT to take advantage of the new +MulModPrecon, and to be more cache friendly. + +
    • +Improved the performance of the GF2X multiplication routine. +On some platforms, it can be twice as fast as the old one. +Thanks (again) to Paul Zimmermann for suggesting some of these +improvements and supplying some of the code. + +
    + +

    +

  • +Miscellany: + +
      +
    • +Rewrote several of the installation scripts in Perl (the old shell +scripts were getting too messy to maintain). +However, the syntax for all of the command-line interfaces +remains identical. + +
    + + + + + +
+ +


+

+2004.05.21: Changes between NTL 5.3.1 and 5.3.2 +

+ +
    +
  • +Some bug fixes. + +

    +

  • +Re-wrote SqrRootMod to make it run faster. + +
+ + + +


+

+2002.12.17: Changes between NTL 5.3 and 5.3.1 +

+ +
    +
  • +Fixed a bug affecting the BuildIrred routines +for ZZ_pEX and zz_pEX. +
+ +


+

+2002.07.05: Changes between NTL 5.2 and 5.3 +

+ +
    +
  • +Minimized and isolated constructs that do not adhere to C/C++ +standards, +and added flags NTL_CLEAN_INT and NTL_CLEAN_PTR +which force stricter compliance with these standards +[more details]. + +

    +

  • +Added functions IsWhiteSpace, CharToIntVal, +and IntValToChar to the tools module +[more details]. + +

    +

  • +Added methods allocated, position1 to generic vector classes +[more details]. + +

    +

  • +Added method allocated to the class vec_GF2 +[more details]. + +

    +

  • +Added conversion routines from unsigned int/long to int, long, float, and double +[more details]. + +

    +

  • +Added routines AddPrec, SubPrec, etc., to the RR +module, and declared the practice of directly assigning to the variable +RR::prec obsolete +[more details]. + +

    +

  • +Fixed a number of minor bugs. + +
+ + + +


+

+2001.07.19: Changes between NTL 5.1a and 5.2 +

+ +

+ +

    +
  • +Implemented Mark van Hoeij's new algorithm for factorining polynomials +with rational coefficients. +This new algorithm is much more efficient than the previous algorithm +used by NTL, and is the default (one can switch back to the old algorithm +with a run-time switch). +

    +[documentation] +

    +[performance measurements] +

    + +

  • +Added routines LLL_plus that are just like the all-integer +LLL routines, except that they return the exact values of the +squared lengths of the Gramm-Schmidt basis vectors. +This is useful in implementing van Hoeij's algorithm. +[more details]. +

    + +

  • +Made a small change to quad_float.c to make it compile +under gcc version 3.0 +without errors. +This is the one place in NTL where I resort to just a little +assmebly code (but only on x86/Linux platforms), and wouldn't you know it, +this is the one place where gcc 3.0 had problems. +

    + +

  • +Made a small change to the procedure for generating a distribution, +so that now all files in the "tar" file comprising the distribution +come without any annoyingly excessive access control restrictions. +

    + +

  • +Changes the version numbering scheme so that it is now closer to +"standard practice". +This is version "5.2". +Any small bug fixes to this version will be named "5.2.1", "5.2.2", etc. +Also, macros are now defined so that the numerical components +of the version number are available to the programmer. +[more details]. + + +
+ + +


+

+2001.06.08: Changes between NTL 5.0c and 5.1a +

+ +

+Some minor fixes and additions. +

+Completely backward compatible. +

+ +

    + +
  • +Added a routine LatticeSolve() for finding integer +solutions to linear systems of integer equations. +[more details] + +

    +

  • +Modified the stragey used by the LLL() and image() +routines in the LLL package to deal +with linear dependencies. +The new strategy guarantees better worst-case bounds on the +sizes of intermediate values. +I'm not sure if it will have any serious practical impact, though. + +

    +

  • +Added some "partial ISO modes" so that one can use +some of the features of Standard C++, +even if ones compiler does not yet support all of the features. + +

    +

  • +Bug fix: routine determnant() in mat_GF2.h +was not visible to the linker because of a typo in mat_GF2.c. + +

    +

  • +Made a "smarter" script for selecting the GetTime() +function. +This fixes an installation problem on Cygwin/Windows 95 platforms. +I hope it doesn't create more problems than it solves, though. + +

    +

  • +Added some extra documentation for installation under +Windows/MS Visual C++. +[more details] + +

    +

  • +Changed some names like c_lip.c to c_lip_impl.h. +This should avoid some potential installation problems. + +

    +

  • +Throw away first 256-bytes of arc4 streams to improve quality of +the pseudo-random number generator. +This may change the precise behavior of some programs. + +

    +

  • +Other minor, internal modifications. + +
+ + + + +


+

+2001.02.19: Changes between NTL 5.0b and 5.0c +

+ +

+Fixed a naming problem in the Windows distribution. +The Unix distribution is unaffected. + + +


+

+2001.02.19: Changes between NTL 5.0a and 5.0b +

+ +

+Fixed a typo in vec_ulong.c that causes a compile error +on some platforms. + + +


+

+2001.02.19: Changes between NTL 4.3a and 5.0a +

+ +

+

    +
  • +I've now re-structured NTL so that one can use +either 'traditional' LIP or GMP as the primary long integer package. +Doing this introduced some (minor) backward incompatabilies in +the programming interface, so there is also a 'third way' -- you +can use GMP as a supplemental long integer package (as in NTL 4.3), +getting +many (but not all) of the performance benefits of GMP, while +maintaining complete backward compatability with the traditional +long integer package. +This 'third way' is not highly recommended -- it is only intended +as a backward compatabilty hack. + +

    +Even if you do not use GMP, you should +read about using NTL with GMP so +that you can write code that works with either the traditional or GMP +long integer packages. +

    +

  • +Added a ZZ to unsigned long conversion routine. +[more details] +
  • +Added new vector classes vec_ulong (vectors +of unsigned longs) and vec_vec_ulong. +[more details] +
  • +Some minor bug fixes: under some unusual circumstances, a memory +allocation error could be erroneously raised; I also added a patch +that works around a bug in v3.0.1 of GMP. +
  • +Some internal cleansing, minimizing the use of non-standard constructs. +
+ + +


+

+Changes between NTL 4.2a and 4.3a +

+ +This is backward compatible with previous versions. + +

+

    +
  • +Improved the performance of ZZ_pX arithmetic when using +GMP. +The GMP version is also more space efficient +(the pre-computed tables are much smaller). +These improvements are most marked for very large p (several +thousand bits). + +

    +The only thing unsatisfactory about this state of affairs is that +vis a vis the GMP version, the pure +LIP code is asymptotically slower by more than a constant factor, +and is is also less space efficient. +Perhaps I'll get around to rectifying this imbalance someday. +To do this, I need a sub-quadratic division with remainder routine for LIP. +At any rate, the differences only become seriously noticible when +p has more than a few thousand bits. + +

    + +

  • +Some other small adjustments here and there. + +
+ +


+

+Changes between NTL 4.1a and 4.2a +

+ +This is backward compatible with previous versions. + +

+

    +
  • +Hacked the big integer code so that NTL uses GMP +(the GNU Multi-Precision library). +This is done in such a way as to get most of the benefits of GMP +with a reasonable amount of effort, and while maintaining complete backward +compatability and minimizing the risk of introducing bugs. +Some arithmetic operations +on some platforms may execute two to three times +faster if using GMP. [more details] +
  • +Simplified the installation procedure on Unix systems by +providing a simple configuration script so that setting +various configuration variables can be done without +editing the makefile and config.h file. +[more details] +
  • +Added function GenGermainPrime +to efficiently generate random Germain primes, i.e., primes p +such that 2p+1 is also prime. [more details] +
  • +Added a function random to generate random quad_floats. +[more details] +
  • +Added an ifdef in tools.h that allows +one to suppress the declaration of min and max +functions in NTL client programs; +these were causing problems when writing 'Windows applications'. +
  • +Implemented a faster algorithm for initializing the +ZZ_p auxilliary data structures. +
  • +Polished up a few other minor things in the code and documentation. +
+ +


+ +

+

+Changes between NTL 4.0a and 4.1a +

+

+ +This is backward compatible with previous versions. + +

+

    +
  • +Made some changes that should make NTL compile smoothly +using any variation of the C++ language between traditional and +ISO Standard. +These changes do not affect the documented NTL interface or the +behaviour of NTL. + +
  • +Added a flag NTL_STD_CXX in the config.h file. +Setting this flag causes all of NTL to be "wrapped" in namespace NTL, +and that part of the standard library used by NTL is "wrapped" +in namespace std. +This should greatly help with the namespace pollution problem. + +
+ + + +


+ +

+

+Changes between NTL 3.9b and 4.0a +

+

+ +This is backward compatible with previous version. + +

+

    +
  • +Attached the GNU General Public License to NTL. + +
  • +Fixed two bugs: +
      +
    • +one in ReconstructRational which resulted in a crash on some inputs; +
    • +one in exp(RR) (and by implication in pow(RR,RR)), +which led to wrong answers on 64-bit machines when computing exp(x) +for x > 2^53. +
    + +
  • +Increased some inconvenient limiting bounds, including a restriction on the +FFT. + +
+ + +


+ +

+

+Changes between NTL 3.9a and 3.9b +

+

+ +This is a minor revision of 3.9a. + +

    +
  • +Improved time and space efficiency of the HNF routine +(see HNF.txt). +The old version was based on the description in Henri Cohen's book, +which was not really properly optimized. +
+ + + +


+ +

+

+Changes between NTL 3.8b and 3.9a +

+

+ +This is backward compatible with previous versions. + +

    +
  • +Modified the installation script somewhat, adding +a configuration wizard that sets the flags in +config.h "automagically". +This works for the Unix version only. + +
  • +Improved the xdouble input/output and ascii to xdouble +conversion. +The old version could be a bit flaky when reading/writing +very large numbers. +The new I/O routines also attain better accuracy. + +
  • +Improved conversion routines between xdouble +and ZZ/RR. + +
  • +Improved the RR output routine. +The new version should be more accurate and also +completely platform independent. + +
  • +Added the following routines to the RR package: +
    +   {Trunc,Floor,Ceil,Round}ToZZ, round
    +   RoundToPrecision, MakeRR
    +   random
    +
    +See RR.txt for details. + +
  • +Improved the accuracy of quad_float input/output, +and the accuracy of conversion between quad_float and RR. + +
  • +Made the timing function somewhat more robust. + +
  • +Hacked the Unix installation script so that it works +more smoothly with Cygnus tools under Windows. + +
  • +Fixed a few other, small problems. +
+ +


+ +

+

+Changes between NTL 3.8a and 3.8b +

+

+ +This is a minor revision of 3.8a. + +

    +
  • +Fixed a bug, a memory leak in routine gauss for mat_ZZ_pE +and mat_zz_pE. +
  • +Fixed a minor problem in config.h. +
  • +Tightened up some size checks, so that now some nice "size invariants" +are guaranteed, e.g., for a ZZ n, + + +

    + +   NumBits(NumBits(n)) <= NTL_BITS_PER_LONG-4
    +
    +

    + + +Similarly for the type GF2X. +Of course, on most platforms, one will run out of memory before +these bounds are exceeded, but they are nevertheless convenient. +

+ + +


+ +

+

+Changes between NTL 3.7a and 3.8a +

+

+ +This is backward compatible with previous versions. + + +

    +
  • +Added conversion routines from unsigned int +and unsigned long to +ZZ, RR, xdouble, and quad_float. + +
  • +Added routines GF2XFromBytes and BytesFromGF2X +for conversion between byte vectors and polynomials over GF(2), +along with routines NumBits and NumBytes +for such polynomials. +See GF2X.txt for details. + +
  • +Added a hack in the ZZX factorizer +to exploit polynomials of the form g(x^k). +This can be disabled by setting the variable ZZXFac_PowerHack +to zero. +See ZZXFactoring.txt +for details. + +
  • +Improved the hensel system solver solve1. +See mat_ZZ.txt for details. + +
  • +Changed documentation for RationalReconstruction +to reflect the Wang, Guy, Davenport bounds. +See ZZ.txt for details. + +
  • +Improved the routine GenPrime a bit. + +
  • +Some other small tweaks here and there. +No real bug fixes. + +
  • +Polished the documentation a bit, adding more examples. + +
+ +


+ +

+

+Changes between NTL 3.6b and 3.7a +

+

+ +This is backward compatible with previous versions. + +

    +
  • +Added a "rational reconstruction" routine. +See the routine ReconstructRational in ZZ.txt. +
  • +Added another routine for solving linear systems over ZZ +that is based on Hensel lifting, rather than Chinese Remaindering. +It can be significantly faster in some cases. +See the routine solve1 in mat_ZZ.txt). +
  • +Some performace tuning, especially CRT and polynomial interpolation code. +
  • +Various documentation corrections. +
  • +Added more "overflow checks" here and there to ensure programs crash gracefully +when certain things get too big. +
  • +Fixed a "benign" bug (i.e., it would never get triggered on any of today's +machines). +
  • +Removed references to <malloc.h>, which were unnecessary, +non-standard, and caused problems on some platforms. +
+ +

+


+ +

+

+Changes between NTL 3.6a and 3.6b +

+

+ +Bug fixes. + +

+


+ +

+

+Changes between NTL 3.5a and 3.6a +

+

+ +This version is backward compatible with 3.5a. + +

+ +

    + +
  • +A few small bug fixes and performance enhancements. + +
  • +Changed to the ZZX factoring routines that in some +cases yield dramatic performance improvements +(more details). + +
+ +

+


+ + +

+

+Changes between NTL 3.1b and 3.5a +

+

+ +Please note. This version is NOT completely backward compatible. + +

+ +Summary of changes: + +

    + +
  • +Improved performance of the "all integer" LLL routine. + +
  • +Put in a better pseudo-random number generator, +and added ZZ/byte array conversions. + +
  • +Improved performance of primality test, and added a +more convenient routine GenPrime. + +
  • +Overloaded NTL's vector placement "new" operator in a different +way to avoid conflicts with standard C++ library. + +
  • +Renamed many macros. + +
  • +Renamed header files. + +
  • +Made some changes to the packaging +the installation procedure. + +
+ +

+Renamed Macros. +I renamed many macros defined in NTL header files. + +

+The reason is that I want to minimize namespace pollution. +Someday, NTL will be wrapped in a namespace, and when that happens +the only remaining namespace pollution problems will be caused by macros. +Eliminating all macros from NTL is not feasible. +Instead, all NTL defined macros now begin with the prefix "NTL_", +which reduces the namespace pollution to an ecceptable level. +You will probably not be affected by this, unless you +do some low level hacking using a macro like ZZ_NBITS +(now called NTL_NBITS), or unless you create your +own NTL vectors using a macro like ntl_vector_decl +(now called NTL_vector_decl). + +

+For a complete list of affected names, see names.txt. + +

+Adapting to this name change should be painless, as there is a +program to translate source files from the old naming convention to the new. +The file "newnames.c", +can be compiled as either a C or C++ +program. +The program is a "filter" that copies its input to its output, +replacing all the old macro names by the new macro names. +

+In the WinNTL distribibution, "newnames.c" is called +"newnames.cpp" and is located in the directory +"newnames". + + +

+Renamed header files. +The names of header files themeselves pollute another (extra-linguitsic) namespace. +To alleviate this problem, the header files have been renamed. +Instead of + + +

+ +   #include "foo.h"
+
+

+ + +one now should write + + +

+ +   #include <NTL/foo.h>
+
+

+ + +The only exceptions are the old header files "ntl_vector.h", +"ntl_matrix.h", and "ntl_pair.h", which are now called +<NTL/vector.h>, <NTL/matrix.h>, and +<NTL/pair.h>. + +

+Installation procedure. +Now all +NTL flags like NTL_LONG_LONG, NTL_AVOID_FLOAT, etc., can now be set +by editing the special file "include/NTL/config.h". +See details in that file. +The reason for this change is that this allows all of these settings +to be made when NTL is configured and built. +Clients of NTL will then automatically use consistent settings. +One should not set these flags on the compiler command line as previously. + + +

+Pentium/Linux people should no longer have to worry +about the NTL_X86_FIX flag. NTL now psychically deduces +the "right thing to do", although if its psychic abilities fail, +you can override it with flags in "include/NTL/config.h". + +

+The "packaging" in the Unix distribution is slightly +different, but hopefully nicer. +Among other things, the tar file now unpacks into a sub-directory of the current directory. +See the unix installation section +for more details. +The Windows zip file now also +unpacks into sub-directory. + + +

+My apologies. +Although these changes are minor, they will cause some NTL +users some inconvenience. +I apologize for this. +I really, really hope there are no more changes like this +(see my roadmap of NTL's future). + +

+


+ + +

+

+Changes between NTL 3.1a and 3.1b +

+

+ +Defined functions div(GF2X,GF2X,GF2) and div(GF2X,GF2X,long), +which had not been defined in earlier versions. +Affected file: GF2X.c. +Most programs never use this, and most linkers do not complain +if these are missing (but some do). + +

+


+ +

+

+Changes between NTL 3.0f and 3.1a +

+

+ +This version is backward compatible with previous versions. + +

+ +

    +
  • +Added floating point LLL routines based on Givens rotations, +instead of classical Gramm-Schmidt orthogonalization. +This is a more stable, but somewhat slower, method. +See LLL.txt for details. + +
  • +Added support for irreducible trinomials and pentanomials +over GF(2). The GF2XModulus routines, +and by extension, the GF2E routines, +now exploit moduli of this special form. +The new routine BuildSparseIrred in GF2XFactoring +builds irreducibles of this form. + +
  • +Also implemented a faster modular inversion routine +for GF2X, and improved the performance of ZZ_pX +multiplication for small degree polynomials. +
+ +

+


+ +

+

+Changes between NTL 3.0e and 3.0f +

+

+ +

    +
  • +Fixed a bug (another one) affecting routines +
    +   RandomBits, RandomBits_ZZ
    +
    +in module ZZ. +Affected source file: lip.c. + +
  • +Bug fix and performance tweak in ZZX factorizer. +Affected source file: ZZXFactoring.c. + +
+ +

+


+ +

+

+Changes between NTL 3.0 and 3.0e +

+

+ +

    +
  • +Fixed a bug affecting routines +
    +   RandomBits, RandomBits_ZZ, RandomBits_long
    +
    +in module ZZ. +The only source files that are affected and require re-compilation are +
    +   ZZ.c, lip.c
    +
    + +
  • +Note about names: +3.0a-c were "pre-releases", which makes the "first release" 3.0d, +and hence this bug fix 3.0e. + +
+ +

+


+ + +

+ +

+Changes between NTL 2.0 and 3.0 +

+

+ + +

    + +
  • +Added functionality: +

    + +

      + +
    • +Added classes vec_GF2 and mat_GF2 for fast linear algebra over GF(2). + +
    • +Added classes ZZ_pE, ZZ_pEX, zz_pE, zz_pEX, supporting polynomial +arithmetic over extension rings/fields over prime fields. + +
    • +Added John Abbott's pruning heuristic to the ZZX factoring routine. + +
    • +Speeded up multiplication in zz_pX for small p (this also helps +the ZZX factoring routine). + +
    • +Added some some transcendental functions (e.g., exp, log, pi) to RR. + +
    • +Added verbose mode and pruning to the XD and RR variants of LLL. + +
    +

    + +

  • +Improved programming interface: +with this version, I've taken an the opportunity to +give the programming interface a "professional facelift". +In previous releases, I've tried to maintain backward compatability +as much as possible, but to make the badly needed improvements +to the interface that I've made with this release, this was not +possible. +

    +NTL 3.0 is not backward compatable with NTL 2.0. +

    +I apologize to NTL users for this, but it is a bit of painful +medicine that should only be necessary to take just this one time +(but then as a C++ programmer, you must already +be used to suffering ;-). +Just about all of the incompatabilities are detectable by the compiler. +See below for a detailed list of the changes and +some tips on making the transition. +

    +The new interface is much more enjoyable to work with, +and I don't foresee any changes to the interace in the future. +Here is a broad overview of the changes: +

    + +

      +
    • +Added functional/operator notation consistently throughout NTL, +making it possible to write much more concise and readable code. +
    • +Got rid of automatic type conversions: these cause just too +many problems. But I've overloaded all of the basic arithmetic +operators and procedures so as to emulate a natural kind +of "type promotion" logic. With these promotions, along with +a full compliment of conversion functions, one hardly misses +the automatic conversions. +
    • +Got rid of the macros +
      +   vector(T), matrix(T), pair(T),
      +
      +which were causing too many name space problems. + +
    • +Made assignment operators have the "correct" return type. +
    • +Introduced a more powerful and flexible mechanism for modulus changing. +
    • +Cleaned up numerous other minor problems. +
    + +
+ +

+

+Compatibility +

+

+ +Here is a detailed list of the changes to the programming +interface. +

+ + +

    + +
  • +The names of the classes +
    +   BB, BB_p, BB_pX
    +
    +have been changed to +
    +   GF2X, GF2E, GF2EX
    +
    + +
  • +There is also a class GF2 to represent GF(2). +Many of the functions relating to BB, BB_p, BB_pX +had argument and return-value types of type long +that are now of the more appropriate type GF2. +This change was needed so that the interface would be consistent +with that of the new classes +
    +   ZZ_pE, ZZ_pEX, zz_pE, zz_pEX.
    +
    + +
  • +The explicit conversion operator from GF2X +(the new BB) to GF2EX (the new BB_pX) +has different semantics: it now performs a coefficient lift, +instead of creating a constant polynomial. + +
  • +The conversion operator "<<" has been retired. +Now instead of + + +

    + +   x << a; 
    +
    +

    + + +one writes + + +

    + +   conv(x, a);
    +
    +

    + + +

    +Operator "<<" is now used for shift operations. +

  • +Every conversion routine now has a corresponding functional version +which has the name to_T, where T is the result type. +These new names replace old names that were less consistent. +So instead of + + +

    + +   x = Long(a);
    +
    +

    + + +one writes + + +

    + +   x = to_long(a);
    +
    +

    + + + + +

  • +The names of the routines +
    +   ZZ_pInit, zz_pInit, zz_pFFTInit, GF2EInit
    +
    +have been changed to +
    +   zz_p::init, zz_p::init, zz_p::FFTInit, GF2E::init
    +
    + +
  • +The names of the routines +
    +   and, or, xor 
    +
    +for class ZZ have +changed to +
    +   bit_and, bit_or, bit_xor, 
    +
    +because the new C++ +standard defines these as reserved words. + +
  • +The function LowBits for ZZ is now called trunc. + +
  • +Polynomial inversion mod X^n has changed from inv +to InvTrunc. + +
  • +Modular trace, norm, minimum polynomial and characteristic +polynomial have changed from +
    +   trace, norm, MinPoly, IrredPoly, CharPoly
    +
    +to +
    +   TraceMod, NormMod, MinPolyMod, IrredPolyMod, CharPolyMod
    +
    + + +
  • +For the class ZZX, the functions +
    +   DivRem, div, rem, /, %, /=, %=
    +
    +have new semantics when dividing by non-monic polynomials. +The old semantics are provided by new routines +
    +   PseudoDivRem, PseudoDiv, PseudoRem.
    +
    + +
  • +The UpdateMap routines have slightly different semantics: +in versions < 3.0, the output always had length n; +now high-order zeroes are stripped. + +
  • +The classes ZZ_pBak, zz_pBak, etc., +have just slightly different semantics; I can't imagine +any reasonable program detecting a difference. + +
  • +The assignment operator and copy constructor for the class RR +have different semantics: they now produce exact copies, instead +of rounding to current precision. + +
  • +All of the NTL compiler flags now start with NTL_ +to avoid name space problems. + +
  • +All of the files "zz_p.h", vec_zz_p.h", etc., have been eliminated. +Use instead the names "lzz_p.h", "vec_lzz_p.h", etc. + +
+ +

+

+Tips on making the transition +

+

+ +

    + +
  • +Apply this sed script to make +most of the necessary syntactic changes. + +
  • +Re-compile old NTL programs with the flag +
    +   -DNTL_TRANSITION
    +
    +See flags.txt for details on how +this will help your compiler detect remaining incompatabilities. +In particular, any uses of operator << +in its old role as a conversion operator will cause the compiler +to raise an error. +You can then convert all of these to the new notation. + +
+ + +

+


+ + +

+

+Changes between NTL 1.7 and 2.0 +

+

+ +

    +
  • +Implementation of classes BB (polynomials over GF(2)) +and BB_pX (polynomials over GF(2^n)). + +
  • +A more consistent and natural interface, including arithmetic operators +and a disciplined use of automatic conversion. +So now one can write + + +

    + +   x = a * b + c;
    +
    +

    + + +instead of + + +

    + +   mul(x, a, b);
    +   add(x, x, c);
    +
    +

    + + +as one must in older versions of NTL. +The operator notation leads to somewhat less efficient code, +and one can always use the old notation in situations +where efficiency is critical. +Despite the new programming interface, +care has been taken to ensure backward compitability; +pre-existing programs that use NTL should still work. + +

  • +Windows port. + +
  • +Added compile-time flag that allows one to exploit +"long long" data type if it exists (this especially helps on Pentium/Linux +platforms). + +
  • +Added compile-time flag to get better quad_float code on +Pentium/Linux platforms. + +
  • +A few bug fixes and performance tuning. +
+ +

+


+ + +

+

+Changes between NTL 1.5 and NTL 1.7 +

+

+ +

    +
  • +Incorporation of Keith Briggs' quadratic precision package. + +
  • +Much faster and more robust lattice basis reduction, +including Schnorr-Horner "volume heuristic" for Block Korkin +Zolotarev reductions, and a new quadratic precision LLL variant +that is much more robust. + +
  • +A few bug fixes. + +
+ + +

+


+ +

+

+Changes between NTL 1.0 and NTL 1.5 +

+

+ + +

    +
  • +Implementation of Schnorr-Euchner algorithms for +lattice basis reduction, including deep insertions and +block Korkin Zolotarev reduction. +These are significantly faster than the LLL algorithm +in NTL 1.0. + +
  • +Implementation of arbitrary-precision floating point. + +
  • +Implementation of double precision with extended exponent range, +which is useful for lattice basis reduction when the coefficients +are large. + +
  • +Faster polynomial multiplication over the integers, +incorporating the Schoenhagge-Strassen method. + +
  • +Compilation flags that increase performance on machines +with poor floating-point performance. + +
  • +Sundry performance tuning and a few bug fixes. + +
+ +
+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-ex1.html b/thirdparty/linux/ntl/doc/tour-ex1.html new file mode 100644 index 0000000000..b17d427fd2 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-ex1.html @@ -0,0 +1,725 @@ + + + +A Tour of NTL: Examples: Big Integers + + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Examples: Big Integers +

+

+ +


+ +The first example makes use of the class +ZZ, +which +represents "big integers": signed, arbitrary length integers. +This program reads two big integers a and b, +and prints (a+1)*(b+1). + + + +

+#include <NTL/ZZ.h>
+
+using namespace std;
+using namespace NTL;
+
+int main()
+{
+   ZZ a, b, c;
+
+   cin >> a;
+   cin >> b;
+   c = (a+1)*(b+1);
+   cout << c << "\n";
+}
+

+ + + + +This program declares three variables a, b, +and c of type ZZ. +The values a and b are read from standard input. +The value c is then computed as (a+1)*(b+1). +Finally, the value of c is printed to the standard output. + +

+Note that one can compute with ZZs much as with ordinary +ints, in that most of the standard arithmetic and +assignment operators can be used in a direct and natural way. +The C++ compiler and the NTL library routines +automatically take care +of all the bookkeeping involved +with memory management and temporary objects. + +

+Note that by default, all of NTL's components +are in the namespace NTL; +with the "using directive" +

+   using namespace NTL;
+
+in the above example, one can access +these components directly. + +


+ +Here's a program that reads a list of integers from standard +input and prints the sum of their squares. + + + +

+#include <NTL/ZZ.h>
+
+
+using namespace std;
+using namespace NTL;
+
+
+int main()
+{
+   ZZ acc, val;
+
+   acc = 0;
+   while (cin >> val)
+      acc += val*val;
+
+   cout << acc << "\n";
+}
+

+ + + + +Following the normal conventions for input operators, +NTL's input operators will set the "fail bit" of the +input stream if the input is missing or ill formed, +and the condition in the while loop will detect this. + + + + +

+


+

+ +Here's a simple modular exponentiation routine for computing +a^e mod n. +NTL already provides a more sophisticated one, though. + + + +

+ +ZZ PowerMod(const ZZ& a, const ZZ& e, const ZZ& n)
+{
+   if (e == 0return ZZ(1);
+
+   long k = NumBits(e);
+
+   ZZ res;
+   res = 1;
+
+   for (long i = k-1; i >= 0; i--) {
+      res = (res*res) % n;
+      if (bit(e, i) == 1) res = (res*a) % n;
+   }
+
+   if (e < 0)
+      return InvMod(res, n);
+   else
+      return res;
+}
+
+

+ + + +Note that as an alternative, we could implement the inner loop +as follows: + + + +

+ +   res = SqrMod(res, n);
+   if (bit(e, i) == 1) res = MulMod(res, a, n);
+
+

+ + + +We could also write this as: + + + +

+ +   SqrMod(res, res, n);
+   if (bit(e, i) == 1) MulMod(res, res, a, n);
+
+

+ + + +This illustrates an important point about NTL's programming interface. +For every function in NTL, there is a procedural version that +stores its result in its first argument. +The reason for using the procedural variant is efficieny: +on every iteration through the above loop, the functional form +of SqrMod will cause a temporary ZZ object to +be created and destroyed, whereas the procedural version +will not create any temporaries. +Where performance is critical, the procedural version +is to be preferred. +Although it is usually silly to get too worked up about performance, +it may be reasonable to argue that modular exponentiation +is an important enough routine that it should be as fast as possible. + +

+ +Note that when the functional version of a function +can be naturally named with an operator, this is done. +So for example, NTL provides a 3-argument mul routine +for ZZ multiplication, and a functional version +whose name is operator *, and not mul. + +

+ +While we are taking about temporaries, consider the first version +of the inner loop. +Execution of the statement + + +

+ +   res = (res*res) % n;
+
+

+ + +will result in the creation of two temporary objects, +one for the product, and one for the result of the mod operation, +whose value is copied into res. +Of course, the compiler automatically generates the code for +cleaning up temporaries and other local objects at the right time. +The programmer does not have to worry about this. + + +


+ +This example is a bit more interesting. +The following program prompts the user for an input, +and applies a simple probabilistic primality test. +Note that NTL already provides a slightly more sophisticated +primality test. + + + +

+ +#include <NTL/ZZ.h>
+
+using namespace std;
+using namespace NTL;
+
+long witness(const ZZ& n, const ZZ& x)
+{
+   ZZ m, y, z;
+   long j, k;
+
+   if (x == 0return 0;
+
+   // compute m, k such that n-1 = 2^k * m, m odd:
+
+   k = 1;
+   m = n/2;
+   while (m % 2 == 0) {
+      k++;
+      m /= 2;
+   }
+
+   z = PowerMod(x, m, n); // z = x^m % n
+   if (z == 1return 0;
+
+   j = 0;
+   do {
+      y = z;
+      z = (y*y) % n; 
+      j++;
+   } while (j < k && z != 1);
+
+   return z != 1 || y != n-1;
+}
+
+
+long PrimeTest(const ZZ& n, long t)
+{
+   if (n <= 1return 0;
+
+   // first, perform trial division by primes up to 2000
+
+   PrimeSeq s;  // a class for quickly generating primes in sequence
+   long p;
+
+   p = s.next();  // first prime is always 2
+   while (p && p < 2000) {
+      if ((n % p) == 0return (n == p);
+      p = s.next();  
+   }
+
+   // second, perform t Miller-Rabin tests
+
+   ZZ x;
+   long i;
+
+   for (i = 0; i < t; i++) {
+      x = RandomBnd(n); // random number between 0 and n-1
+
+      if (witness(n, x)) 
+         return 0;
+   }
+
+   return 1;
+}
+
+int main()
+{
+   ZZ n;
+
+   cout << "n: ";
+   cin >> n;
+
+   if (PrimeTest(n, 10))
+      cout << n << " is probably prime\n";
+   else
+      cout << n << " is composite\n";
+}
+
+

+ + + +Note that in NTL, there are typically a number of ways to +compute the same thing. +For example, consider the computation of m and k +in function witness. +We could have written it thusly: + + + +

+ +   k = 1;
+   m = n >> 1;
+   while (!IsOdd(m)) {
+      k++;
+      m >>= 1;
+   }
+
+

+ + + +It turns out that this is actually not significantly more +efficient than the original version, because the implementation +optimizes multiplication and division by 2. + +

+ +The following is more efficient: + + + +

+ +   k = 1;
+   while (bit(n, k) == 0) k++;
+   m = n >> k;
+
+

+ + + +As it happens, there is a built-in NTL routine that does just what we want: + + + +

+ +   m = n-1;
+   k = MakeOdd(m);
+
+

+ + + + + +


+ +Having seen a number of examples involving ZZs, +let's look at the ZZ interface in a bit more detail. + +

+ + +Constructors, assignment, and conversions + + +

+ +When you declare an object of type ZZ, +the default constructor initializes to the value 0. +As we have already seen, there is an assignment operator that +allows one to copy the value of one ZZ to another. +Note that these copies (like almost all copies in NTL) are "deep", +i.e., the actual data is copied, and not just a pointer. +Of course, if the amount of space allocated by the destination +of the assignment is insufficient to hold the value of the source, +space is automatically re-allocated. + +

+One can also assign a value of type long to a ZZ: + + +

+ +   ZZ x;
+   x = 1;
+
+

+ + + +

+Note that one cannot write + + +

+ +   ZZ x = 1;  // error
+
+

+ + +to initialize a ZZ. +Instead, one could write + + +

+ +   ZZ x = ZZ(1);
+   ZZ y(1);
+   ZZ z{1}; // C++11 only
+
+

+ + +using the constructor that allows one to explicitly +construct a ZZ from a long. + +

+Alternatively, one could write this as: + + +

+ +   ZZ x = conv<ZZ>(1);
+
+

+ + +This is an example of one of NTL's conversion routines. +For very large constants, one can write: + + +

+ +   ZZ x = conv<ZZ>("99999999999999999999");
+
+

+ + +These examples illustrate conversion rountines in their +functional forms. + + +

+ +   ZZ x;
+   conv(x, 1);
+   conv(x, "99999999999999999999");
+
+

+ + + +

+ +Functionality + +

+ +All of the basic arithmetic operators are supported, +including comparison, arithmetic, shift, and bit-wise logical operations. +One can mix ZZs and longs in any expresion in +a natural way. +NTL does not support implicit type conversion; +rather, for basic operations, it simply overloads the operators +or functions in a way to achieve a kind of "promotion logic": +if one input is a ZZ and the other is a long +(or something that implicitly converts to a long, like +an int), the long input is effectively converted +to a ZZ. +Moreover, wherever possible, the implementation does this +as efficiently as possible, and usually avoids the creation +of a temporary ZZ. + +

+There are also procedural versions for all the basic arithmetic +operations: +

+   add, sub, negate, mul, sqr, div, rem, DivRem, 
+   LeftShift, RightShift,
+   bit_and, bit_or, bit_xor
+
+ +

+There are many other routines. +Here is a brief summary: +

    +
  • +GCD -- computes greatest common divisor of two integers +
  • +XGCD -- extended Euclidean algorithm +
  • +AddMod, SubMod, NegateMod, +MulMod, SqrMod, InvMod, +PowerMod -- routines for modular arithmetic, +including inversion and exponentiation +
  • +NumBits -- length of binary representation +
  • +bit -- extract a bit +
  • +ZZFromBytes, BytesFromZZ -- +convert between octet strings and ZZs +
  • +RandomBnd, RandomBits, RandomLen -- +routines for generating pseudo-random numbers +
  • +GenPrime, ProbPrime -- routines for generating primes +and testing primality +
  • +power -- (non-modular) exponentiation +
  • +SqrRoot -- integer part of square root +
  • +Jacobi, SqrRootMod -- Jacobi symbol and modular +square root +
+ +

+Most of these functions also have pure long versions as +well, and as usual, there are both functional and procedural +variants. + +

+There are other functions as well. +See ZZ.txt for complete details. +Also see tools.txt for some basic +services provided by NTL. + + +

+

+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-ex2.html b/thirdparty/linux/ntl/doc/tour-ex2.html new file mode 100644 index 0000000000..f7c9036356 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-ex2.html @@ -0,0 +1,385 @@ + + + +A Tour of NTL: Examples: Vectors and Matrices + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Examples: Vectors and Matrices +

+

+ + +


+ + +

+The following routine sums up the +numbers in a vector of ZZ's. + + + +

+#include <NTL/ZZ.h>
+#include <NTL/vector.h>
+
+using namespace std;
+using namespace NTL;
+
+ZZ sum(const Vec<ZZ>& v)
+{
+   ZZ acc;
+
+   acc = 0;
+
+   for (long i = 0; i < v.length(); i++)
+      acc += v[i];
+
+   return acc;
+}
+

+ + + +

+The class Vec<ZZ> is a dynamic-length array of ZZs; +more generally, NTL provides a template class Vec<T> +for +dynamic-length +vectors over any type T. + +Some history is in order here. +NTL predates the STL and the vector template +found in modern C++. +Older versions of NTL (prior to v6) did not use templates, but instead +defined generic vectors using macros. +By convention, NTL named these vec_T. +For backward compatibility, NTL now provides typedefs +all these "legacy" vector types. + + +

+Vectors in NTL are indexed from 0, but in many situations +it is convenient or more natural to index from 1. +The generic vector class allows for this; +the above example could be written as follows. + + + +

+#include <NTL/ZZ.h>
+#include <NTL/vector.h>
+
+using namespace std;
+using namespace NTL;
+
+ZZ sum(ZZ& s, const Vec<ZZ>& v)
+{
+   ZZ acc;
+
+   acc = 0;
+
+   for (long i = 1; i <= v.length(); i++)
+      acc += v(i);
+
+   return acc;
+}
+

+ + + +

+Note that by default, NTL does not perform range checks on +vector indices. +However, there is a compile-time flag that activates range checking. +Therefore, it is good practice to always assume that range checking +may be activated, and to not access elements that are out of range. + +


+ +The following example illustrates vector I/O, +as well as changing the length of a vector. +This program reads a Vec<ZZ>, +and then creates and prints a "palindrome". + + + +

+#include <NTL/ZZ.h>
+#include <NTL/vector.h>
+
+using namespace std;
+using namespace NTL;
+
+int main()
+{
+   Vec<ZZ> v;
+   cin >> v;
+
+   long n = v.length();
+   v.SetLength(2*n);
+
+   long i;
+   for (i = 0 ; i < n; i++)
+      v[n+i] = v[n-1-i];
+
+   cout << v << "\n";
+}
+

+ + + +

+ +Notice that changing the length of a vector does not change +its contents. + +

+ +When we compile and run this program, +if we type in +

+   [1 -2 3]
+
+as input, the output is +
+   [1 -2 3 3 -2 1]
+
+ +

+ +See vector.txt for +complete details of NTL's generic vector mechanism. + +Also note that for several fundamental vector types, such as +Vec<ZZ>.txt, there is a corresponding header file +<NTL/vec_ZZ.h> that defines +a number of basic arithmetic operations, +as well as provides the typedef +typedef vec_ZZ for backward compatibilty. +See vec_ZZ.txt for +complete details on the arithmetic operations for Vec<ZZ>'s +provided by NTL. + + + +


+ +There is also basic support for matrices +in NTL. +In general, the class Mat<T> is a special +kind of Vec< Vec< T > >, where each row is +a vector of the same length. +Row i of matrix M +can be accessed as M[i] (indexing from 0) +or as M(i) (indexing from 1). +Column j of row i can be accessed +as M[i][j] or M(i)(j); +for notational convenience, the latter is equivalent to M(i,j). + +

+Here is a matrix multiplication routine, +which in fact is already provided by NTL. + + + +

+#include <NTL/ZZ.h>
+#include <NTL/matrix.h>
+
+using namespace std;
+using namespace NTL;
+
+void mul(Mat<ZZ>& X, const Mat<ZZ>& A, const Mat<ZZ>& B)
+{
+   long n = A.NumRows();
+   long l = A.NumCols();
+   long m = B.NumCols();
+
+   if (l != B.NumRows())
+      Error("matrix mul: dimension mismatch");
+
+   X.SetDims(n, m); // make X have n rows and m columns
+
+   long i, j, k;
+   ZZ acc, tmp;
+
+   for (i = 1; i <= n; i++) {
+      for (j = 1; j <= m; j++) {
+         acc = 0;
+         for(k = 1; k <= l; k++) {
+            mul(tmp, A(i,k), B(k,j));
+            add(acc, acc, tmp);
+         }
+         X(i,j) = acc;
+      }
+   }
+}
+

+ + + +

+In case of a dimension mismatch, the routine calls the +Error function, which is a part of NTL and which simply +prints the message and aborts. +That is generally how NTL deals with errors. + +

+This routine will not work properly if X aliases +A or B. +The actual matrix multiplication routine in NTL takes care of this. +In fact, all of NTL's routines allow outputs to alias inputs. + +

+To call NTL's built-in multiplication routine +(declared in <NTL/mat_ZZ.h>), one can write +

+   mul(X, A, B);
+
+or one can also use the operator notation +
+   X = A * B;
+
+ +

+NTL provides several matrix types. +See matrix.txt +for complete details on NTL's generic matrix mechanism. +Also see mat_ZZ.txt for +complete details on the arithmetic operations for Mat<ZZ>'s +provideed by NTL (including basic linear algebra). +Also see LLL.txt +for details on routines for lattice basis reduction +(as well as routines for finding the kernel and image of a matrix). + +

+One thing you may have noticed by now is that +NTL code generally avoids the type +int, preferring instead to use long. +This seems to go against what most "style" books preach, +but nevertheless seems to make the most sense in today's world. +Although int was originally meant to represent the +"natural" word size, this seems to no longer be the case. +On 32-bit machines, int and long +are the same, +but on 64-bit machines, they are often different, with +int's having 32 bits and long's having 64 bits. +Indeed, there is a standard, called "LP64", which is being adopted +by all Unix-like systems, and which specifies that on 64-bit machines, +int's have 32 bits, and long's and pointers have 64 bits. +Moreover, on such 64-bit machines, +the "natural" word size is usually 64-bits; +indeed, it is often more expensive to manipulate 32-bit integers. +Thus, for simplicity, efficiency, and safety, NTL uses long +for all integer values. +If you are used to writing int all the time, +it takes a little while to get used to this. + +

+ +

+[Previous] + [Up] + [Next] +
+ + + + diff --git a/thirdparty/linux/ntl/doc/tour-ex3.html b/thirdparty/linux/ntl/doc/tour-ex3.html new file mode 100644 index 0000000000..1455fc8e89 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-ex3.html @@ -0,0 +1,361 @@ + + + +A Tour of NTL: Examples: Polynomials + + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Examples: Polynomials +

+

+ +


+ +NTL provides extensive support for very fast polynomial arithmetic. +In fact, this was the main motivation for creating NTL in the first place, +because existing computer algebra systems and software +libraries had very slow polynomial arithmetic. +The class ZZX represents univariate polynomials +with integer coefficients. + +The following program reads a polynomial, +factors it, and prints the factorization. + + + +

+#include <NTL/ZZXFactoring.h>
+
+using namespace std;
+using namespace NTL;
+
+int main()
+{
+   ZZX f;
+
+   cin >> f;
+
+   Vec< Pair< ZZX, long > > factors;
+   ZZ c;
+
+   factor(c, factors, f);
+
+   cout << c << "\n";
+   cout << factors << "\n";
+}
+

+ + + +When this program is compiled an run on input + +

+   [2 10 14 6]
+
+ +which represents the polynomial 2 + 10*X + 14*x^2 +6*X^3, +the output is + +
+   2
+   [[[1 3] 1] [[1 1] 2]]
+
+ +The first line of output is the content of the polynomial, which +is 2 in this case as each coefficient of the input polynomial +is divisible by 2. +The second line is a vector of pairs, the first member of each +pair is an irreducible factor of the input, and the second +is the exponent to which is appears in the factorization. +Thus, all of the above simply means that + +
+2 + 10*X + 14*x^2 +6*X^3 = 2 * (1 + 3*X) * (1 + X)^2 
+
+ +

+Admittedly, I/O in NTL is not exactly user friendly, +but then NTL has no pretensions about being an interactive +computer algebra system: it is a library for programmers. + +

+In this example, the type Vec< Pair< ZZX, long > > +is an NTL vector whose base type is Pair< ZZX, long >. +The type Pair< ZZX, long > is the instantiation +of a template "pair" type defined by NTL. +See pair.txt for more details. + + + +


+ +Here is another example. +The following program prints out the first 100 cyclotomic polynomials. + + + +

+
+#include <NTL/ZZX.h>
+
+using namespace std;
+using namespace NTL;
+
+int main()
+{
+   Vec<ZZX> phi(INIT_SIZE, 100);  
+
+   for (long i = 1; i <= 100; i++) {
+      ZZX t;
+      t = 1;
+
+      for (long j = 1; j <= i-1; j++)
+         if (i % j == 0)
+            t *= phi(j);
+
+      phi(i) = (ZZX(INIT_MONO, i) - 1)/t;  
+
+      cout << phi(i) << "\n";
+   }
+}
+

+ + + +

+To illustrate more of the NTL interface, let's look at alternative ways +this routine could have been written. + +

+First, instead of + + +

+ +   Vec<ZZX> phi(INIT_SIZE, 100);  
+
+

+ + +one can write + + +

+ +   Vec<ZZX> phi;
+   phi.SetLength(100);
+
+

+ + + +

+Second, +instead of + + +

+ +            t *= phi(j);
+
+

+ + +one can write this as + + +

+ +            mul(t, t, phi(j));
+
+

+ + +or + + +

+ +            t = t * phi(j);
+
+

+ + +Also, one can write phi[j-1] in place of phi(j). + +

+Third, instead of + + +

+ +      phi(i) = (ZZX(INIT_MONO, i) - 1)/t;  
+
+

+ + +one can write + + +

+ +      ZZX t1;
+      SetCoeff(t1, i);
+      SetCoeff(t1, 0, -1);
+      div(phi(i), t1, t);
+
+

+ + +Alternatively, one could directly access the coefficient vector as follows: + + +

+ +      ZZX t1;
+      t1.SetLength(i+1); // all vector elements are initialized to zero
+      t1[i] = 1;
+      t1[0] = -1;
+      t1.normalize();  // not necessary here, but good practice in general
+      div(phi(i), t1, t);
+
+

+ + +Generally, you can freely access the coefficient vector +of a polynomial, as above. +However, after fiddling with this vector, you should "normalize" +the polynomial, so that the leading coefficient is non-zero: +this is an invariant which all routines that work with polynomials +expect to hold. +Of course, if you can avoid directly accessing the +coefficient vector, you should do so. +You can always use the SetCoeff routine above to set or +change coefficients, and you can always read the value of a coefficient +using the routine coeff, e.g., + + +

+ +   ... f[i] == 1 ...
+
+

+ + +is equivalent to + + +

+ +   ... coeff(f, i) == 1 ...
+
+

+ + +except that in the latter case, a read-only reference to zero is returned +if the index i is out of range. +There are also special-purpose read-only access routines LeadCoeff(f) +and ConstTerm(f). + + +

+NTL provides a full compliment of arithmetic operations for polynomials +over the integers, in both operator and procedural form. +All of the basic operations support a "promotion logic" similar +to that for ZZ, except that inputs of both types +long and ZZ are promoted to ZZX. +See ZZX.txt for details, +and see ZZXFactoring.txt for details +on the polynomial factoring routines. + +

+ +

+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-ex4.html b/thirdparty/linux/ntl/doc/tour-ex4.html new file mode 100644 index 0000000000..3efc3cd32d --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-ex4.html @@ -0,0 +1,725 @@ + + + +A Tour of NTL: Examples: Modular Arithmetic + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Examples: Modular Arithmetic +

+

+ +


+ + +NTL also supports modular integer arithmetic. +The class ZZ_p +represents the integers mod p. +Despite the notation, p need not in general be prime, +except in situations where this is mathematically required. +The classes Vec<ZZ_p> (a.k.a., vec_ZZ_p), +Mat<ZZ_p> (a.k.a., mat_ZZ_p), +and ZZ_pX represent vectors, matrices, and polynomials +mod p, and work much the same way as the corresponding +classes for ZZ. + +

+Here is a program that reads a prime number p, +and a polynomial f modulo p, and factors it. + + + +

+#include <NTL/ZZ_pXFactoring.h>
+
+using namespace std;
+using namespace NTL;
+
+int main()
+{
+   ZZ p;
+   cin >> p;
+   ZZ_p::init(p);
+
+   ZZ_pX f;
+   cin >> f;
+
+   Vec< Pair< ZZ_pX, long > > factors;
+
+   CanZass(factors, f);  // calls "Cantor/Zassenhaus" algorithm
+
+   cout << factors << "\n";
+    
+}
+

+ + + +

+As a program is running, NTL keeps track of a "current modulus" +for the class ZZ_p, which can be initialized or changed +using ZZ_p::init. +This must be done before any variables are declared or +computations are done that depend on this modulus. + +

+Please note that for efficiency reasons, +NTL does not make any attempt to ensure that +variables declared under one modulus are not used +under a different one. +If that happens, the behavior of a program +is completely unpredictable. + + +


+ +Here are two more examples that illustrate the ZZ_p-related +classes. +The first is a vector addition routine (already supplied by NTL): + + + +

+#include <NTL/ZZ_p.h>
+
+using namespace std;
+using namespace NTL;
+
+void add(Vec<ZZ_p>& x, const Vec<ZZ_p>& a, const Vec<ZZ_p>& b)
+{
+   long n = a.length();
+   if (b.length() != n) Error("vector add: dimension mismatch");
+
+   x.SetLength(n);
+   long i;
+   for (i = 0; i < n; i++)
+      add(x[i], a[i], b[i]);
+}
+

+ + + +

+ +The second example is an inner product routine (also supplied by NTL): + + + +

+#include <NTL/ZZ_p.h>
+
+using namespace std;
+using namespace NTL;
+
+void InnerProduct(ZZ_p& x, const Vec<ZZ_p>& a, const Vec<ZZ_p>& b)
+{
+   long n = min(a.length(), b.length());
+   long i;
+   ZZ accum, t;
+
+   accum = 0;
+   for (i = 0; i < n; i++) {
+      mul(t, rep(a[i]), rep(b[i]));
+      add(accum, accum, t);
+   }
+
+   conv(x, accum);
+}
+

+ + + +This second example illustrates two things. +First, it illustrates the use of function rep which +returns a read-only reference to the representation of a ZZ_p +as a ZZ between 0 and p-1. +Second, it illustrates a useful algorithmic technique, +whereby one computes over ZZ, reducing mod p +only when necessary. +This reduces the number of divisions that need to be performed significantly, +leading to much faster execution. + + +

+The class ZZ_p supports all the basic arithmetic +operations in both operator and procedural form. +All of the basic operations support a "promotion logic", +promoting long to ZZ_p. + +

+Note that the class ZZ_p is mainly useful only +when you want to work with vectors, matrices, or polynomials +mod p. +If you just want to do some simple modular arithemtic, +it is probably easier to just work with ZZ's directly. +This is especially true if you want to work with many different +moduli: modulus switching is supported, but it is a bit awkward. + +

+The class ZZ_pX supports all the basic arithmetic +operations in both operator and procedural form. +All of the basic operations support a "promotion logic", +promoting both long and ZZ_p to ZZ_pX. + +

+See ZZ_p.txt for details on ZZ_p; +see ZZ_pX.txt for details on ZZ_pX; +see ZZ_pXFactoring.txt for details on +the routines for factoring polynomials over ZZ_p; +see vec_ZZ_p.txt for details +on mathematical operations on Vec<ZZ_p>'s; +see mat_ZZ_p.txt for details on +mathematical operations on Mat<ZZ_p>'s. + +


+ +There is a mechanism for saving and restoring a modulus, +which the following example illustrates. +This routine takes as input an integer polynomial +and a prime, and tests if the polynomial is irreducible modulo +the prime. + + + +

+#include <NTL/ZZX.h>
+#include <NTL/ZZ_pXFactoring.h>
+
+using namespace std;
+using namespace NTL;
+
+long IrredTestMod(const ZZX& f, const ZZ& p)
+{
+   ZZ_pPush push(p); // save current modulus and install p
+                     // as current modulus
+
+   return DetIrredTest(conv<ZZ_pX>(f));
+
+   // old modulus is restored automatically when push is destroyed
+   // upon return
+}
+

+ + + +The modulus switching mechanism is actually quite a bit +more general and flexible than this example illustrates. + +

+Note the use of the conversion function +conv<ZZ_pX>. +We could also have used the equivalent procedural form: + + +

+ +   ZZ_pX f1;
+   conv(f1, f);
+   return DetIrredTest(f1);
+
+

+ + + + +


+ +Suppose in the above example that p is known in advance +to be a small, single-precision prime. +In this case, NTL provides a class zz_p, that +acts just like ZZ_p, +along with corresponding classes Vec<zz_p>, +Mat<zz_p>, and zz_pX. +The interfaces to all of the routines are generally identical +to those for ZZ_p. +However, the routines are much more efficient, in both time and space. + +

+For small primes, the routine in the previous example could be coded +as follows. + + + + +

+#include <NTL/ZZX.h>
+#include <NTL/lzz_pXFactoring.h>
+
+using namespace std;
+using namespace NTL;
+
+long IrredTestMod(const ZZX& f, long p)
+{
+   zz_pPush push(p);
+   return DetIrredTest(conv<zz_pX>(f));
+}
+

+ + + +


+ +The following is a routine (essentially the same as implemented in NTL) +for computing the GCD of polynomials with integer coefficients. +It uses a "modular" approach: the GCDs are computed modulo small +primes, and the results are combined using the Chinese Remainder Theorem (CRT). +The small primes are specially chosen "FFT primes", which are of +a special form that allows for particular fast polynomial arithmetic. + + + +

+#include <NTL/ZZX.h>
+
+using namespace std;
+using namespace NTL;
+
+void GCD(ZZX& d, const ZZX& a, const ZZX& b)
+{
+   if (a == 0) {
+      d = b;
+      if (LeadCoeff(d) < 0) negate(d, d);
+      return;
+   }
+
+   if (b == 0) {
+      d = a;
+      if (LeadCoeff(d) < 0) negate(d, d);
+      return;
+   }
+
+   ZZ c1, c2, c;
+   ZZX f1, f2;
+
+   content(c1, a);
+   divide(f1, a, c1);
+
+   content(c2, b);
+   divide(f2, b, c2);
+
+   GCD(c, c1, c2);
+
+   ZZ ld;
+   GCD(ld, LeadCoeff(f1), LeadCoeff(f2));
+
+   ZZX g, res;
+
+   ZZ prod;
+
+   zz_pPush push; // save current modulus, restore upon return
+
+   long FirstTime = 1;
+
+   long i;
+   for (i = 0; ;i++) {
+      zz_p::FFTInit(i);
+      long p = zz_p::modulus();
+
+      if (divide(LeadCoeff(f1), p) || divide(LeadCoeff(f2), p)) continue;
+
+      zz_pX G, F1, F2;
+      zz_p  LD;
+
+      conv(F1, f1);
+      conv(F2, f2);
+      conv(LD, ld);
+
+      GCD(G, F1, F2);
+      mul(G, G, LD);
+
+
+      if (deg(G) == 0) {
+         res = 1;
+         break;
+      }
+
+      if (FirstTime || deg(G) < deg(g)) {
+         prod = 1;
+         g = 0;
+         FirstTime = 0;
+      }
+      else if (deg(G) > deg(g)) {
+         continue;
+      }
+
+      if (!CRT(g, prod, G)) {
+         PrimitivePart(res, g);
+         if (divide(f1, res) && divide(f2, res))
+            break;
+      }
+
+   }
+
+   mul(d, res, c);
+   if (LeadCoeff(d) < 0) negate(d, d);
+}
+

+ + + + +

+See lzz_p.txt for details on zz_p; +see lzz_pX.txt for details on zz_pX; +see lzz_pXFactoring.txt for details on +the routines for factoring polynomials over zz_p; +see vec_lzz_p.txt for details on vec_zz_p; +see mat_lzz_p.txt for details on mat_zz_p. + + +


+ +NTL provides a number of "residue class" types with a dynamic modulus +stored as a global variable: the types ZZ_p and zz_p, +discussed above, as well as the types ZZ_pE, zz_pE, +and GF2E, discussed later. + +

+Some caution must be used so that a variable constructed under +one modulus is not used "out of context", when a different modulus, or perhaps +no modulus, is installed as the current modulus. +While arithmetic operations should certainly be avoided, +NTL does take care to allow for certain operations to be safely +performed "out of context". +These operations include default and copy constructors, as well as assignment. + + +


+ +Arithmetic mod 2 is such an important special case that NTL +provides a class GF2, that +acts just like ZZ_p when p == 2, +along with corresponding classes Vec<GF2>, +Mat<GF2>, and GF2X. +The interfaces to all of the routines are generally identical +to those for ZZ_p. +However, the routines are much more efficient, in both time and space. + +Note that Vec<GF2> is an explicit specialization +of the template class Vec<T>, with a special +implementation that packs the coefficients into the bits +of a machine word. +You need to include the header file <NTL/vec_GF2.h> +to use the class Vec<GF2>. + +

+ +This example illustrates the GF2X and Mat<GF2> +classes with a simple routine to test if a polynomial over GF(2) +is irreducible using linear algebra. +NTL's built-in irreducibility test is to be preferred, however. + + + +

+#include <NTL/GF2X.h>
+#include <NTL/mat_GF2.h>
+
+using namespace std;
+using namespace NTL;
+
+long MatIrredTest(const GF2X& f)
+{
+   long n = deg(f);
+
+   if (n <= 0) return 0;
+   if (n == 1) return 1;
+
+   if (GCD(f, diff(f)) != 1) return 0;
+
+   Mat<GF2> M;
+
+   M.SetDims(n, n);
+
+   GF2X x_squared = GF2X(INIT_MONO, 2);
+
+   GF2X g;
+   g = 1;
+
+   for (long i = 0; i < n; i++) {
+      VectorCopy(M[i], g, n);
+      M[i][i] += 1;
+      g = (g * x_squared) % f;
+   }
+
+   long rank = gauss(M);
+
+   if (rank == n-1)
+      return 1;
+   else
+      return 0;
+}
+

+ + + +

+Note that the statement + + +

+ +   g = (g * x_squared) % f;
+
+

+ + +could be replace d by the more efficient code sequence + + +

+ +   MulByXMod(g, g, f);
+   MulByXMod(g, g, f);
+
+

+ + +but this would not significantly impact the overall +running time, since it is the Gaussian elimination that +dominates the running time. + +

+See GF2.txt for details on GF2; +see GF2X.txt for details on GF2X; +see GF2XFactoring.txt for details on +the routines for factoring polynomials over GF2; +see vec_GF2.txt for details on vec_GF2; +see mat_GF2.txt for details on mat_GF2. + +

+ +

+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-ex5.html b/thirdparty/linux/ntl/doc/tour-ex5.html new file mode 100644 index 0000000000..c369ec6bab --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-ex5.html @@ -0,0 +1,136 @@ + + + +A Tour of NTL: Examples: Extension Rings and Fields + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Examples: Extension Rings and Fields +

+

+ +


+ +NTL also supports extension rings and fields over finite fields, +and polynomial arithmetic over such extensions. +Here is a little program that illustrates this. + + + +

+#include <NTL/ZZ_pXFactoring.h>
+#include <NTL/ZZ_pEX.h>
+
+using namespace std;
+using namespace NTL;
+
+int main()
+{
+   ZZ_p::init(ZZ(17)); // define GF(17)
+
+   ZZ_pX P;
+   BuildIrred(P, 10); // generate an irreducible polynomial P
+                      // of degree 10 over GF(17)
+
+   ZZ_pE::init(P); // define GF(17^10)
+
+   ZZ_pEX f, g, h;  // declare polynomials over GF(17^10)
+
+   random(f, 20);  // f is a random, monic polynomial of degree 20
+   SetCoeff(f, 20);
+
+   random(h, 20); // h is a random polynomial of degree less than 20
+
+   g = MinPolyMod(h, f); // compute the minimum polynomial of h modulo f
+
+   if (g == 0) Error("oops (1)"); // check that g != 0
+
+   if (CompMod(g, h, f) != 0) // check that g(h) = 0 mod f
+      Error("oops (2)");
+}
+

+ + + +

+This example illustrates building extension rings over ZZ_p. +One can also use zz_p and GF2 as base classes; +the syntax is exactly the same. + +

+See ZZ_pE.txt for the basics of the extension +ring ZZ_pE over ZZ_p. +Also see ZZ_pEX.txt for polynomial +arithmetic over ZZ_pE, and +ZZ_pEXFactoring.txt for factoring +routines over ZZ_pE. +See vec_ZZ_pE.txt for vectors over ZZ_pE, +and mat_ZZ_pE.txt for matrices over ZZ_pE. + +

+See lzz_pE.txt for the basics of the extension +ring zz_pE over zz_p. +Also see lzz_pEX.txt for polynomial +arithmetic over zz_pE, and +lzz_pEXFactoring.txt for factoring +routines over zz_pE. +See vec_lzz_pE.txt for vectors over zz_pE, +and mat_lzz_pE.txt for matrices over zz_pE. + +

+See GF2E.txt for the basics of the extension +ring GF2E over GF2. +Also see GF2EX.txt for polynomial +arithmetic over GF2E, and +GF2EXFactoring.txt for factoring +routines over GF2E. +See vec_GF2E.txt for vectors over GF2E, +and mat_GF2E.txt for matrices over GF2E. + + +

+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-ex6.html b/thirdparty/linux/ntl/doc/tour-ex6.html new file mode 100644 index 0000000000..72618cd4f8 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-ex6.html @@ -0,0 +1,137 @@ + + + +A Tour of NTL: Examples: Floating Point Classes + + +
+[Previous] + [Up] +[Next] +
+ +

+

+A Tour of NTL: Examples: Floating Point Classes +

+

+ +


+ +NTL also supports arbitrary precision floating point with +the class RR. +Additionally, it supports two specialized classes: quad_float, +which gives a form of quadruple precision, but without an extended +exponent range, +and xdouble, +which gives double precision, but with an extended exponent range. +The advantage of the latter two classes is efficiency. + +

+ +Here again is a program that reads a list of numbers from the input, +and outputs the sum of their squares, using the class RR. + + + +

+#include <NTL/RR.h>
+
+using namespace std;
+using namespace NTL;
+
+int main()
+{
+   RR acc, val;
+
+   acc = 0;
+   while (cin >> val)
+      acc += val*val;
+
+   cout << acc << "\n";
+}
+

+ + + +

+ +The precision used for the computation can be set by executing + + +

+ +   RR::SetPrecision(p);
+
+

+ + +which sets the effective precision to p bits. +By default, p=150. +All of the basic arithmetic operations compute their results +by rounding to the nearest p-bit floating point number. +The semantics of this are exactly the same as in the IEEE floating +point standard (except that there are no special values, like +"infinity" and "not a number"). + +

+ +The number of decimal digits of precision that are used when +printing an RR can be set be executing + + +

+ +   RR::SetOutputPrecision(d);
+
+

+ + +which sets the output precision to d. +By default, d=10. + +

+See RR.txt for details. + +

+ +By replacing the occurences of RR by either quad_float +or xdouble, one gets an equivalent program using one of the +other floating point classes. +The output precision for these two classes can be controlled just +as with RR. +See quad_float.txt and +xdouble.txt +for details. + +

+ + +

+[Previous] + [Up] +[Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-ex7.html b/thirdparty/linux/ntl/doc/tour-ex7.html new file mode 100644 index 0000000000..22baae0cb1 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-ex7.html @@ -0,0 +1,200 @@ + + + +A Tour of NTL: Examples: Thread Pools + + +
+[Previous] + [Up] +[Next] +
+ +

+

+A Tour of NTL: Examples: Thread Pools +

+

+ +


+ +If you have built NTL with NTL_THREAD_BOOST=on, +then not only is NTL thread safe, but certain parts +of NTL are designed to use multiple threads to speed things +up. +To implement this, NTL makes use of a thread pool, +which is a collection of threads that are created once +and then used over and over again, to avoid the significant +overhead of thread creation and destruction. +You can also use this same thread pool to speed up +NTL client code. +

+To use this feature, you have to include the header file +NTL/BasicThreadPool.h. +In your main program, you should also indicate how many threads +you want in the pool. +If you want, say, 8 threads, you so this by calling the function +SetNumThreads(8). +

+If you do this, then certain parts of NTL will use these +threads when possible (this is a working in progress). +To use these threads in your own code, the easiest way +to do this is with a parallel for loop, +illustrated in the following example. + +See BasicThreadPool.txt +for more details. + +Consider the following routine: + + + + +

+   void mul(ZZ *x, const ZZ *a, const ZZ *b, long n)
+   {
+      for (long i = 0; i < n; i++)
+         mul(x[i], a[i], b[i]);
+   }
+

+ + + + + + + +

+We can parallelize it as follows: + + + +

+   void mul(ZZ *x, const ZZ *a, const ZZ *b, long n)
+   {
+      NTL_EXEC_RANGE(n, first, last)
+
+         for (long i = first; i < last; i++)
+            mul(x[i], a[i], b[i]);
+
+      NTL_EXEC_RANGE_END
+   }
+

+ + + + + +

+NTL_EXEC_RANGE and +NTL_EXEC_RANGE_END are macros that just do the right +thing. If there are nt threads available, the interval +[0..n) will be +partitioned into (up to) nt subintervals, and a different thread will be +used to process each subinterval. You still have to write the for loop +yourself: the macro just declares and initializes variables first and +last (or whatever you want to call them) of type long +that represent the +subinterval [first..last) to be processed by one thread. + + + +

+Note that the current thread participates as one of the nt available +threads, and that the current thread will wait for all participating threads +to finish their task before proceeding. + +

+Withing the "body" of this construct, you can freely reference any variables +that are visible at this point. This is implemented using the C++ lambda +feature (capturing all variables by reference). + +

+This construct will still work even if threads are disabled, in which case +it runs single-threaded with first=0 and last=n. + +

+Note that the code within the EXEC_RANGE + body could call other routines that +themselves attempt to execute an EXEC_RANGE: +if this happens, the latter +EXEC_RANGE will detect this and run single-threaded. + +

+You may wish to do other things within the EXEC_RANGE + body than just execute +a loop. One thing you may want to do is to declare variables. Another +thing you may want to do is setup a local context +for a ZZ_p modulus (or +other type of modulus). +Here is an example of doing this: + + + + +

+   void mul(ZZ_p *x, const ZZ_p *a, const ZZ_p *b, long n)
+   {
+      ZZ_pContext context;
+      context.save();
+
+      NTL_EXEC_RANGE(n, first, last)
+      
+         context.restore();
+
+         for (long i = first; i < last; i++)
+            mul(x[i], a[i], b[i]);
+
+      NTL_EXEC_RANGE_END
+   }
+

+ + + + + +

+A lower-level set of tools is available, which allow for +more fine-grained control. +See BasicThreadPool.txt +for more details. + +

+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-examples.html b/thirdparty/linux/ntl/doc/tour-examples.html new file mode 100644 index 0000000000..5f1ce65bf2 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-examples.html @@ -0,0 +1,44 @@ + + + +A Tour of NTL: Examples + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Examples +

+

+ +


+ +

+Perhaps the best way to introduce the basics of NTL +is by way of example. +Browse the following for a number of examples that illustrate +some aspects of the functionality and programming interface of NTL. + +

+ +

    + +
  1. Big Integers +
  2. Vectors and Matrices +
  3. Polynomials +
  4. Modular Arithmetic +
  5. Extension Rings and Fields +
  6. Floating Point Classes +
  7. Thread Pools + + +
+ + + + diff --git a/thirdparty/linux/ntl/doc/tour-gf2x.html b/thirdparty/linux/ntl/doc/tour-gf2x.html new file mode 100644 index 0000000000..39c4180f74 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-gf2x.html @@ -0,0 +1,173 @@ + + + +A Tour of NTL: Using NTL with the gf2x library + + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Using NTL with the GF2X library +

+

+ +


+ +gf2x is a library for fast multiplication +of polynomials over GF(2). +The gf2x library was developed by Emmanuel Thomé, +Paul Zimmermmann, Pierrick Gaudry, and Richard Brent. +You can get more information about it, as well as the latest version +from here. + +

+ +Unlike NTL, which only imlements a version of Karatsuba multiplication, +gf2x implements other algorithms that are faster +for very large degree polynomials. +If you use NTL if the gf2x library, +then multiplication, division, GCD, and minimum polynomal +calculations for the GF2X class will +be faster for large degree polymials. + +

+Warning: +the current version of gf2x (v1.1) is neither thread safe +or exception safe; +do not use it if you need these features in NTL. + + + +

+

+Downloading and building gf2x +

+

+ +Download gf2x from here. +You will get a file gf2x-XXX.tar.gz. +

+Now do the following: +

+   % gunzip gf2x-XXX.tar.gz
+   % tar xf gf2x-XXX.tar
+   % cd gf2x-XXX
+   % ./configure --prefix=$HOME/sw
+   % make
+   % make check
+   % make install
+
+This will build, test, and install gf2x in $HOME/sw. +Of course, change $HOME/sw to whatever you want (the default is +/usr/local). +You will find the gf2x header files in $HOME/sw/include +and the compiled binaries in $HOME/sw/lib. + +

+You can also supply the option +--disable-shared to the configure script, +if you only want static libraries. +However, if you ultimately want to build NTL as a shared +library, then you must also buld gf2x as a shared library. + +

+

+You must ensure that NTL and gf2x have the same +ABI. +gf2x's configuration script might need some +help to select the right one. +For example, you may have to pass +

+   ABI=64 CFLAGS="-m64 -O2"
+
+to gf2x's configure script to force a 64-bit ABI. + + + + + +

+

+Building and using NTL with gf2x +

+

+ +When building NTL with gf2x, you have to tell NTL that you want to +use it. +The easiest way to do this is by passing the argument +NTL_GF2X_LIB=on to the NTL configuration script +when you are installing NTL. +Assuming you installed gf2x in $HOME/sw as above, +and you also want to install NTL in $HOME/sw, +you execute: +

+   % ./configure PREFIX=$HOME/sw NTL_GF2X_LIB=on  GF2X_PREFIX=$HOME/sw
+
+You can write this more simply as +
+   % ./configure DEF_PREFIX=$HOME/sw NTL_GF2X_LIB=on 
+
+Here, DEF_PREFIX is a variable that is used +to specify the location of all software, +and it defaults to /usr/local. + + +

+If you installed gf2x in /usr/local (or some other +standard system directory where your compiler will look by default) +then simply +

+   % ./configure PREFIX=$HOME/sw NTL_GF2X_LIB=on
+
+does the job. +Moreover, if NTL is also to be installed in /usr/local, +then +
+   % ./configure NTL_GF2X_LIB=on
+
+does the job. + +

+Instead of passing arguments to the configure script, +you can also just edit the config.h and makefile by hand. +The documentation in these files should be self-explanatory. + + +

+When compiling programs that use NTL with gf2x, +you need to link with the gf2x library. +If gf2x is installed as above in +$HOME/sw, rather than in a standard system directory, + this just means adding +-L$HOME/sw/lib -lgf2x to the compilation command. +If you installed gf2x in a standard system directory, +then just -lgf2x does the job. +Note that -lgf2x must come after -lntl +on the command line. +Finally, if NTL and gf2x are installed as +shared libraries, then you don't even need -lgf2x. + + + + +


+ + +

+ +

+[Previous] + [Up] + [Next] +
+ + + + + diff --git a/thirdparty/linux/ntl/doc/tour-gmp.html b/thirdparty/linux/ntl/doc/tour-gmp.html new file mode 100644 index 0000000000..03290d6aa0 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-gmp.html @@ -0,0 +1,301 @@ + + + +A Tour of NTL: Using NTL with GMP + + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Using NTL with GMP +

+

+ +


+ +GMP is the GNU Multi-Precision library. +You can get more information about it, as well as the latest version +from here. + +

+ +Briefly, GMP is a library for long integer arithmetic. +It has hand-crafted assembly routines for a wide variety +of architectures. +For basic operations, like integer multiplication, it can be +two to three (and sometimes bit more) times faster than NTL's +traditional long integer package. +The speedup is most dramatic on x86 machines. + +

+As of version 9.6.3, NTL uses GMP by default. +You can disable GMP by passing NTL_GMP_LIP=off +as an option to NTL's configure script. +If you disable the use of GMP, +NTL uses a long integer package derived +from Arjen Lenstra's LIP package. +This is not recommended: GMP is much faster. + +

+Even if you do not use GMP, +you should still read the section below +on backward compatabilty +so that you can write portable code and avoid deprecated constructs. + +

+Note: GMP is thread safe, so you should feel free to use it +in a thread-safe build of NTL. +However, the current version of GMP (v6.1) +is not entirely exception friendly (it may +abort a running program, but only in some very extreme and +unusal circumstances). + + + +

+

+Downloading and building GMP +

+

+ +Many unix distributions now include GMP by default. +But if not, it is pretty easy to install it directly from source, +as follows. +

+ +Download GMP from here. +You will get a file gmp-XXX.tar.gz. +

+Now do the following: +

+   % gunzip gmp-XXX.tar.gz
+   % tar xf gmp-XXX.tar
+   % cd gmp-XXX
+   % ./configure --prefix=$HOME/sw
+   % make
+   % make check
+   % make install
+
+This will build, test, and install GMP in $HOME/sw. +Of course, change $HOME/sw to whatever you want (the default is +/usr/local). +You will find the GMP header files in $HOME/sw/include +and the compiled binaries in $HOME/sw/lib. + +

+You can also supply the option +--disable-shared to the configure script, +if you only want static libraries. +However, if you ultimately want to build NTL as a shared +library, then you must also buld GMP as a shared library. + +

+You must ensure that NTL and GMP have the same +ABI. +Usually, GMP's configure script will automatically +choose a 64-bit ABI if available. + + +

+

+Building and using NTL with GMP +

+

+ +When you are installing NTL, +if you installed GMP in $HOME/sw as above, +and you also want to install NTL in $HOME/sw, +you execute: +

+   % ./configure PREFIX=$HOME/sw GMP_PREFIX=$HOME/sw
+
+You can write this more simply as +
+   % ./configure DEF_PREFIX=$HOME/sw 
+
+Here, DEF_PREFIX is a variable that is used +to specify the location of all software, +and it defaults to /usr/local. + + +

+If you installed GMP in /usr/local (or some other +standard system directory where your compiler will look by default) +then simply +

+   % ./configure PREFIX=$HOME/sw 
+
+does the job. +Moreover, if NTL is also to be installed in /usr/local, +then +
+   % ./configure 
+
+does the job. + +

+Instead of passing arguments to the configure script, +you can also just edit the config.h and makefile by hand. +The documentation in these files should be self-explanatory. + + +

+When compiling programs that use NTL with GMP, +you need to link with the GMP library. +If GMP is installed as above in +$HOME/sw, rather than in a standard system directory, + this just means adding +-L$HOME/sw/lib -lgmp to the compilation command. +If you installed GMP in a standard system directory, +then just -lgmp does the job. +Note that -lgmp must come after -lntl +on the command line. +Finally, if NTL and GMP are installed +as shared libraries, then you don't even need -lgmp. + + +

+NTL has been tested and works correctly with GMP versions 3.1, 3.1.1, +4.1.4, 5.1, 6.0, and 6.1 (among others). +It is not possible to use versions of GMP prior to 3.1 with NTL. + +

+When using NTL with GMP, +as a user of NTL, you do not need to +know or understand anything about the the GMP library. +So while there is detailed documentation available about how +to use GMP, you do not have to read it. +The programming interface to the long integer package +completely hides implementation details. + + + + +

+

+ +Backward compatbility + +

+

+ +With version 5.0 of NTL, some aspects of the programming interface +are 'deprecated' so as to allow the use of another long integer package, +such as GMP, as the long integer package. + +

+Prior to version 5.0, the macro NTL_NBITS was defined, +along with the macro NTL_RADIX defined to be +(1L << NTL_NBITS). +While these macros are still available when using NTL's traditional +long integer package (i.e., when NTL_GMP_LIP is not set), +they are not available when using the GMP as the long integer +package (i.e., when NTL_GMP_LIP is set). +Furthermore, when writing portable programs, one should avoid these macros. + +

+ + +Also, the static function long ZZ::digit(const ZZ &, long); +is defined when using traditional long integer arithmetic, +but is not available when using GMP as the long integer package, +and in any case, its use should be avoided when writing portable programs. + + +

+Instead of the above macros, one should use the followng macros: + +

+   NTL_ZZ_NBITS -- number of bits in a zzigit;
+                   a ZZ is represented as a sequence of zzigits.
+
+   NTL_SP_NBITS -- max number of bits in a "single-precision" number
+
+   NTL_WSP_NBITS -- max number of bits in a "wide single-precision" number
+
+

+The following relations hold: +

+   NTL_SP_NBITS <= NTL_WSP_NBITS <= NTL_ZZ_NBITS
+   26 <= NTL_SP_NBITS <= min(NTL_BITS_PER_LONG-2, NTL_DOUBLE_PRECISION-3)
+   NTL_WSP_NBITS <= NTL_BITS_PER_LONG-2
+
+ +

+ +Note that NTL_ZZ_NBITS may be less than, equal to, or greater than +NTL_BITS_PER_LONG -- no particular relationship +should be assumed to hold. +In particular, expressions like (1L << NTL_ZZ_BITS) +might overflow. + +

+"single-precision" numbers are meant to be used in conjunction with the +single-precision modular arithmetic routines. + +

+"wide single-precision" numbers are meant to be used in conjunction +with the ZZ arithmetic routines for optimal efficiency. + +

+Note that when using traditional long integer arithmetic, we have +

+    NTL_ZZ_NBITS = NTL_SP_NBITS = NTL_WSP_NBITS = NTL_NBITS.
+
+ +

+The following auxilliary macros are also defined: + +

+NTL_FRADIX -- double-precision value of 2^NTL_ZZ_NBITS
+NTL_SP_BOUND -- (1L << NTL_SP_NBITS)
+NTL_WSP_BOUND -- (1L << NTL_WSP_NBITS)
+
+ +

+ +Note that for a ZZ n, +n.size() returns the number of "zzigits" of n. +This is supported with either traditional or GMP integer arithemtic. +Note, however, that some old codes might write n.size() <= 1 +as a way to test if NumBits(n) <= NTL_NBITS. +This is no longer the right thing to do, if one wants portable code +that works with either traditional or GMP long integer arithmetic. +First, one has to decide whether one wants to test if +NumBits(n) is bounded by NTL_ZZ_NBITS, +NTL_SP_NBITS, or NTL_WSP_NBITS. +In the first case, n.size() <= 1 is still +the right way to test this. +In the second case, write this as n.SinglePrecision(). +In the third case, write this as n.WideSinglePrecision(). +The routines SinglePrecision and WideSinglePrecision +are new to NTL version 5.0. + +

+ +Most "high level" applications that use NTL should not be affected +by these changes to NTL's programming interface, and if they are, +changing the programs should be quite easy. + + +


+ + +

+ +

+[Previous] + [Up] + [Next] +
+ + + + + diff --git a/thirdparty/linux/ntl/doc/tour-impl.html b/thirdparty/linux/ntl/doc/tour-impl.html new file mode 100644 index 0000000000..599faeb3e2 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-impl.html @@ -0,0 +1,476 @@ + + + +A Tour of NTL: NTL Implementation and Portability + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: NTL Implementation and Portability +

+

+ +


+ +NTL is designed to be portable, fast, +and relatively easy to use and extend. + +

+To make NTL portable, no assembly code is used (well, almost none, see below). +This is highly desirable, as architectures are constantly +changing and evolving, and maintaining assembly +code is quite costly. +By avoiding assembly code, NTL should remain usable, +with virtually no maintenance, for many years. + +

+ +

Minimal platform requirements

+ +When the configuration flags NTL_CLEAN_INT +and NTL_CLEAN_PTR are both on (this is not the default, +see below), +NTL makes two requirements +of its platform, +neither of which are guaranteed by the C++ language +definition, but are essentially universal: + +
    +
  1. +int and long quantities, respectively, +are represented using a 2's complement +representation whose width is equal to the width of unsigned int +and unsigned long, respectively. +
  2. +Double precision floating point +conforms to the IEEE floating point standard. +
+ +

+NTL makes very conservative requirements of the C++ compiler: +

    +
  • +it is assumed that the C++ compiler conforms to the 1998 standard, +including the basic of templates; +
  • +it does not assume any features not in the 1998 standard, +unless compiled with the NTL_THREADS or NTL_EXCEPTIONS flags. +
+ + +

+ +

The NTL_CLEAN_INT flag

+ +

+ +The configuration flag NTL_CLEAN_INT +is currently off by default. + +

+When this flag is off, NTL makes another requirement of its platform; +namely, that conversions from unsigned long to long +convert the bit pattern without change to the corresponding 2's complement +signed integer. +Note that the C++ standard defines the behavior of +converting unsigned +to signed values as implementation defined when the value +cannot be represented in the range of nonnegative signed values. +Nevertheless, this behavior is essentially universal, and more importantly, +is is not undefined behavior: implementation-defined behavior must be +documented and respected by the compiler, while undefined behavior can +be exploited by the compiler in some surprising ways. + + +

+Actually, with NTL_CLEAN_INT off, it is also assumed +that right shifts of signed integers are consistent, +in the sense that if it is sometimes an arithmetic shift, +then it is always an arithmetic shift (the installation +scripts check if right shift appears to be arithmetic, and if so, +this assumption is made elsewhere). +Arithmetic right shift is also implementation defined behavior +that is essentially universal. + + +

+It seems fairly unlikely that one would ever have to turn the +NTL_CLEAN_INT flag on, but it seems a good idea +to make this possible, and at the very least +to identify and isolate the code that +relies on this assumption. +The only code affected by this flag +is the traditional LIP long integer package (which, if you use +GMP as the long integer package, is not involved), +and the single-precision modular multiplication routines. + +

+Note that prior to NTL 9.0, the default compilation mode required +that in a few critical places, signed integer arithmetic quietly wraps around +on overflow; however, signed integer overflow is undefined behavior, +and it seems that in recent years compilers have been getting +more aggressive in exploiting such undefined behavior in their optimizations. +Moreover, recent versions of GCC now come with a "sanitizer" that checks +for undefined behavior. +So, both to avoid potentially dangerous optimizations and to allow +NTL to pass such sanitzer checks, it seemed safer to move to this +more conservative approach. +There should, in fact, be zero performance penalty in doing so. +Also note that I was never aware of any compiler that generated incorrect +code in the pre-9.0 code: this new approach is just to be on the safe side +in the future. + + + +

The NTL_CLEAN_PTR flag

+ +

+ +The configuration flag NTL_CLEAN_PTR +is currently off by default. + +

+When this flag is off, NTL makes another requirement of its platform; +namely, that the address space is "flat", and in particular, +that one can test if an object pointed to by a pointer p +is located in a array of objects v[0..n-1] by testing +if p >= v and p < v + n. +The C++ standard does not guarantee that such a test will +work; the only way to perform this test in a standard-conforming way +is to iteratively test if p == v, p == v+1, etc. + +

+This assumption of a "flat" address space is essentially universally +valid, and making this assumption leads to more efficicient code. +For this reason, the NTL_CLEAN_PTR is off by default, +but one can always turn it on, and in fact, the overall performance +penalty should be negligible for most applications. + + +

Some floating point issues

+ + +

+NTL uses floating point arithmetic in a number of places, +including a number of exact computations, where one might +not expect to see floating point. +Relying on floating point may seem prone to errors, +but with the guarantees provided by the IEEE standard, +one can prove the correctness of the NTL code that uses floating point. + +

+Briefly, the IEEE floating point standard says that basic arithmetic operations +on doubles should work as if the operation were performed with infinite +precision, and then rounded to p bits, +where p is the precision (typically, p = 53). + + +

+Throughout most of NTL, correctness follows from weaker assumptions, +namely +

+

    +
  • +basic arithmetic operations and conversion from integral types +produce results with a relative error of +2^{-p + 1} (assuming no overflow), +
  • +multiplication by powers of 2 produce exact results (assuming no overflow), +
  • +basic arithmetic operations on integers represented as doubles and conversions from integral types +to doubles produce exact results, provided the inputs and outputs +are less than 2^p in absolute value, +
  • +assuming no overflow, x - long(x) produces an exact result for nonnegative x. +
+ +

+It is also generally assumed that the compiler does not +do too much "regrouping" of arithmetic expressions involving +floating point. +Most compilers respect the implied grouping of floating point +computations, and NTL goes out of its way to make its +intentions clear: instead of x = (a + b) + c, +if the grouping is truly important, this is written +as t = a + b; x = t + c. +Current standards do not allow, and most implementations will not +perform, any regrouping of this, e.g., x = a + (b + c), +since in floating point, addition and subtraction are not +associative. + +

+Unfortunately, some compilers do not do this correctly, +unless you tell them. +With Intel's C compiler icc, for example, +you should compile NTL with the flag -fp-model strict +to enforce strict adherence to floating point standards. +That said, some effort has been made to ensure that NTL +works correctly even if the compiler does perform such +regrouping, including replacement of x/y +by x*(1/y). + +

+Also, you should be wary of compiling using an optimization +level higher than the default -O2 -- +this may break some floating point assumptions (and maybe +some other assumptions as well). + +

+In any case, programs that compile against NTL header files +should compile correctly, even under very aggressive optimizations. + +

+One big problem with the IEEE standard is that it allows intermediate +quantities to be computed in a higher precision than the standard +double precision. +Most platforms today implement the "strict" IEEE standard, with no +excess precision. +Up until recently, the Intel x86 machine with the GCC compiler +was a notable exception to this: on older x86 machines, floating point +was performed using the x87 FPU instructions, which operate on 80-bit, +extended precision numbers; nowadays, most compilers use the SSE instructions, +which operate on the standard, 64-bit numbers. + +

+Historically, +NTL went out of its way to ensure that its code is correct with +both "strict" and "loose" IEEE floating point. +This is achieved in a portable fashion throughout NTL, except +for the quad_float module, where some desperate hacks, +including assembly code, may be used +to try to work around problems created by "loose" IEEE floating point +[more details]. +But note that even if the quad_float package does not work correctly +because of these problems, the only other routines that are affected +are the LLL_QP routines in the LLL module -- the +rest of NTL should work fine. +Hopefully, because of the newer SSE instructions, this whole strict/loose +issue is a thing of the past. + +

+Another problem is that some hardware (especially newer Intel chips) +support fused multiply-add (FMA) instructions. +Again, this is only a problem for quad_float, and some +care is taken to detect the problem and to work around it. +The rest of NTL will work fine regardles. + + + +

+Mostly, NTL does not +require that the IEEE floating point +special quantities "infinity" +and "not a number" are implemented correctly. +This is certainly the case for core code where +floating point arithmetic is used for exact (but fast) +computations, as the numbers involved never get too big (or small). +However, the behavior of +certain explicit floating point computations +(e.g., the xdouble and quad_float classes, +and the floating point versions of LLL) will be +much more predictable and reliable if "infinity" +and "not a number" are implemented correctly. + + + + +

+

Algorithms

+

+NTL makes fairly consistent use of asymptotically fast algorithms. + +

+Long integer multiplication is implemented using the classical +algorithm, crossing over to Karatsuba for very big numbers. +Long integer division is currently only implemented using +the classical algorithm -- unless you use NTL with GMP (version 3 or later), +which +employs an algorithm that is about twice as slow as multiplication +for very large numbers. +

+Polynomial multiplication and division is carried out +using a combination of the classical algorithm, Karatsuba, +the FFT using small primes, and the FFT using the Schoenhagge-Strassen +approach. +The choice of algorithm depends on the coefficient domain. +

+Many algorithms employed throughout NTL are inventions +of the author (Victor Shoup) +and his colleagues +Joachim von zur Gathen +and +Erich Kaltofen, +as well as John Abbott +and +Paul Zimmermann. + +

+

+Thread safety +

+

+As of v7.0, NTL is thread safe. +That said, there are several things to be aware of: +

    + +
  • +To use this feature, you have to enable NTL_THREADS +in the configuration script. +Also, you will need a compiler and runtime library that +implements several key C++11 features, +including thread_local storage. +
      +
    • +NOTE: as of v9.8, the requirements have been relaxed, so that +for gcc and gcc-compatible compilers +(such as clang and icc) only support of the gcc __thread +storage specifier is required. +
    • +With these relaxed requirements, it is possible to build +a thread safe version of NTL on Linux using gcc 4.8 and above, +or on Mac OSX 10.10 and above. + +
    + +

  • +You must build NTL using GMP (i.e., configure with NTL_GMP_LIP=on). +The classic LIP integer arithmetic is not thread safe: it could +be made so, but it is not a priority at this time. + +

    +

  • +The current version (v1.1) of the external gf2x +library is not thread safe. +Therefore, you should NOT build NTL using gf2x if you need a thread-safe +build. +
+ +To obtain thread safety, I used the following strategies: +
    +

    +

  • +In places where NTL's interface demands global variables, +such as the "current modulus" for the ZZ_p +class, these global variables have been made thread local. +So, you can pass around various ZZ_pContext objects +among threads, and individual threads can install these locally. +Thus, different threads can concurrently use the same or different +moduli, and it all just works, with no changes to NTL's interface. + +

    +

  • +In places where NTL used static variables to hold on to space +for scratch variables, I make these variables thread local, +and I also make sure the storage used by these variables +get released when the thread terminates. +In all NTL builds (thread safe or not), +I try to make sure that fairly large chunks of memory get released immediately. + +

    +

  • +In places where NTL uses a lazy strategy to build various tables +(such as FFT primes), I uses a "double checked locking" strategy +to grow these tables in a way that (a) the tables can be +shared among different threads, and (b) taking a lock +on a mutex is very rare. +The new C++11 concurrent memory model is essential here. + +

    +

  • +Smart pointers (for things like ZZ_pContext's) are +designed to do the necesary reference counting in a thread-safe +manner. + +

    +

  • +For psuedo-random number generation, +the internal state of the PRG +is thread local, +and the default initial seed is guaranteed to be unique +among all threads in a given process (and an attempt is made to +make the seed globally unique among all processes and threads, +but this is hard to do in a completely portable way). + + +
+ +The overall structure of the code has been modified so that +the code base is nearly identical for regular and thread-safe builds: +there are just a few ifdef's on the NTL_THREADS +flag. + + +

+

+Thread Boosting +

+

+ +As of v9.5.0, NTL provides a thread boosting feature. +With this feature, certain code within NTL will use available +threads to speed up computations on a multicore +machine. +This feature is enabled by setting NTL_THREAD_BOOST=on +during configuration. +See BasicThreadPool.txt +for more information. + +

+This feature is a work in progress. +Currently, basic ZZ_pX arithmetic has been thread boosted. +More code will be boosted later. + + +

+

+Error Handling and Exceptions +

+

+ +As of v8.0, NTL provides error handling through exceptions. +To enable exptions, you have to configure +NTL with NTL_EXCEPTIONS flag turned on. +By default, exceptions are not enabled, and NTL +reverts to its old error handling method: +abort with an error message. + +

+If exceptions are enabled, then instead of aborting your +program, and appropriate exception is thrown. +More details ion the programming interface +of this feature are available here. + +

+If you enable exceptions, you must use a C++11 compiler. +Specifically, your compiler will need support for lambdas +(which are used to conveniently implement the "scope guard" idiom), +and your compiler should implement the new default exception +specification semantics (namely, that destructors are "noexcept" by +default). + +

+Implementation of this required a top-to-bottom scrub of NTL's code, +replacing a lot of old-fashioned code with more modern, RAII-oriented +code (RAII = "resource acquisition is initialization"). + + + + + +

+ +

+[Previous] + [Up] + [Next] +
+ + + + diff --git a/thirdparty/linux/ntl/doc/tour-intro.html b/thirdparty/linux/ntl/doc/tour-intro.html new file mode 100644 index 0000000000..8d4f14abe5 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-intro.html @@ -0,0 +1,130 @@ + + + +A Tour of NTL: Introduction + + + +
+[Previous] + [Up] + [Next] +
+ + +

+

+A Tour of NTL: Introduction +

+

+ +


+ + +

+NTL is a high-performance, portable C++ library providing +data structures and algorithms +for arbitrary length integers; +for vectors, matrices, and polynomials over the integers and over +finite fields; and for arbitrary precision floating point arithmetic. + +

+NTL provides high quality implementations of state-of-the-art +algorithms for: + +

    + +
  • +arbitrary length integer arithmetic and arbitrary precision +floating point arithmetic; + +
  • +polynomial arithmetic over the integers and finite fields +including basic arithmetic, polynomial factorization, +irreducibility testing, +computation +of minimal polynomials, traces, norms, and more; + +
  • +lattice basis reduction, including very robust and fast implementations +of Schnorr-Euchner, block Korkin-Zolotarev reduction, +and the new Schnorr-Horner pruning heuristic for +block Korkin-Zolotarev; + +
  • +basic linear algebra over the integers, +finite fields, and arbitrary precision +floating point numbers. + +
+ +

+NTL's polynomial arithmetic is one of the fastest available anywhere, +and has been used to set "world records" for polynomial factorization +and determining orders of elliptic curves. + + +

+NTL's lattice reduction code is also one of the best available anywhere, +in terms of both speed and robustness, and one of the few +implementations of +block Korkin-Zolotarev reduction with the Schnorr-Horner pruning heuristic. +It has been used to "crack" several cryptosystems. + + +

+NTL can be easily installed in a matter of minutes +on just about any platform, +including virtually any 32- or 64-bit machine running +any flavor of Unix, +as well as PCs running Windows 95, 98, or NT, and Macintoshes. +NTL achieves this portability by avoiding esoteric C++ features, +and by avoiding assembly code; it should therefore remain usable +for years to come with little or no maintenance, even as +processors and operating systems continue to change and evolve. +However, NTL can be used in conjunction with +GMP (the GNU Multi-Precision library) +for enhanced performance. +NTL can also be used in conjunction with the +gf2x library for faster +arithmetic of large degree polynomials over GF(2). + + +

+NTL provides a clean and consistent interface to +a large variety of classes representing mathematical objects. +It provides a good environment for easily +and quickly implementing new +number-theoretic algorithms, without sacrificing performance. + +

+NTL is written and maintained by +Victor Shoup +with some contributions made by others +(see Acknowledgements). + +

+ +

+Legalistic Nonsense +

+

+

+NTL is free software, and may be used according to +the terms of the GNU General Public License. +

+[the precise licensing information of NTL] +

+[more information +about the GNU General Public License] + +

+ +

+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-modules.html b/thirdparty/linux/ntl/doc/tour-modules.html new file mode 100644 index 0000000000..cde500ebd5 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-modules.html @@ -0,0 +1,739 @@ + + + +A Tour of NTL: Summary of NTL's Main Modules + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Summary of NTL's Main Modules +

+

+ +


+ +

+ +NTL consists of a number of software modules. +Generally speaking, for each module foo, there is +

    +
  • +a header +file <NTL/foo.h>, found in subdirectory include, +
  • +a documentation file foo.txt, +found in subdirectory doc, +and +
  • +a source file foo.c, found in subdirectory src. +
+ +

+Note that all of the header files for NTL modules include the header file +<NTL/tools.h>, and by default, this header file includes +the standard headers +

    +
  • +<cstdlib>, +
  • +<cmath>, and +
  • +<iostream>. +
+Moreover, +the standard library is wrapped in namespace std +and NTL is wrapped in namespace NTL. + + + +

+ +The documentation file takes the form of a header file, +but stripped of implementation +details and declarations of some of the more esoteric +routines and data structures, and it contains more complete +and usually clearer documentation than in the header file. +Also, note that the documentation files do not explicitly +mention anything about namespaces std and NTL. + + +

+ +There is a plethora of conversion routines. +These are not documented in any of the individual +documentation files, but rather, +they are all briefly summarized in +conversions.txt. + +

+ +The following is a summary of the main NTL modules. +The corresponding documentation file can be obtained by clicking on +the module name. Note that the links below will take you to +a "pretty printed" version of the correspinding .txt file. + +

+ + + + + + +

+BasicThreadPool + + + +class BasicThreadPool: a simple thread pool; +plus additional thread boosting features + + + + +

+GF2 + + + +class GF2: integers mod 2 + + + +

+GF2X + + + +class GF2X: polynomials over GF(2) (much more efficient +than using zz_pX with p=2); +includes routines for GCDs and minimal polynomials + + + +

+GF2XFactoring + + +routines for factoring polynomials over GF(2); +also includes routines for testing for and constructing +irreducible polynomials + + + +

+GF2XVec + + +class GF2XVec: fixed-length vectors of fixed-length GF2Xs; +less flexible, but more efficient than vec_GF2X + + + + + +

+GF2E + + +class GF2E: polynomial extension field/ring over GF(2), +implemented as GF(2)[X]/(P). + + + +

+GF2EX + + +class GF2EX +class GF2EX: polynomials over GF2E; +includes routines for modular polynomials arithmetic, +modular composition, minimal and characteristic +polynomials, and interpolation. + + + + +

+GF2EXFactoring + + +routines for factoring polynomials over GF2E; +also includes routines for testing for and constructing +irreducible polynomials + + + +

+HNF + + +routines for computing the Hermite Normal Form +of a lattice + + +

+Lazy + + +Support for thread-safe lazy initialization of objects + + +

+LazyTable + + +Support for thread-safe lazy initialization of tables + + +

+LLL + + +routines for performing lattice basis reduction, +including very fast and robust implementations of the Schnorr-Euchner LLL +and Block Korkin Zolotarev reduction algorithm, +as well as an integer-only reduction algorithm. +Also, there are routines here for computing the kernel +and image of an integer matrix, as well as finding integer +solutions to linear systems of equations over the integers. + + + + +

+RR + + +class RR: arbitrary-precision floating point numbers. + + + +

+SmartPtr + + +template classes SmartPtr, UniquePtr, +and a few other useful classes for managing pointers. + + + + +

+ZZ + + +class ZZ: arbitrary length integers; +includes routines for GCDs, Jacobi symbols, +modular arithmetic, and primality testing; +also includes small prime generation routines +and in-line routines for single-precision +modular arithmetic + + + + +

+ZZVec + + +class ZZVec: fixed-length vectors of fixed-length ZZs; +less flexible, but more efficient than vec_ZZ + + + + +

+ZZX + + +class ZZX: polynomials over ZZ; +includes routines for GCDs, minimal and characteristic +polynomials, norms and traces + + + + +

+ZZXFactoring + + +routines for factoring univariate polynomials over ZZ + + + + +

+ZZ_p + + +class ZZ_p: integers mod p + + +

+ZZ_pE + + +class ZZ_pE: ring/field extension of ZZ_p + + + + +

+ZZ_pEX + + +class ZZ_pEX: polynomials over ZZ_pE; +includes routines for modular polynomials arithmetic, +modular composition, minimal and characteristic +polynomials, and interpolation. + + +

+ZZ_pEXFactoring + + +routines for factoring polynomials over ZZ_pE; +also includes routines for testing for and constructing +irreducible polynomials + + + + +

+ZZ_pX + + +class ZZ_pX: polynomials over ZZ_p; +includes routines for modular polynomials arithmetic, +modular composition, minimal and characteristic +polynomials, and interpolation. + + + + +

+ZZ_pXFactoring + + +routines for factoring polynomials over ZZ_p; +also includes routines for testing for and constructing +irreducible polynomials + + + + +

+lzz_p + + +class zz_p: +integers mod p, where p is single-precision + + + +

+lzz_pE + + +class zz_pE: +ring/field extension of zz_p + + + +

+lzz_pEX + + +class zz_pEX: polynomials over zz_pE; +provides the same functionality as class ZZ_pEX, +but for single-precision p + + +

+lzz_pEXFactoring + + +routines for factoring polynomials over zz_pE; +provides the same functionality as class ZZ_pEX, +but for single-precision p + + + +

+lzz_pX + + +class zz_pX: polynomials over zz_p; +provides the same functionality as class ZZ_pX, +but for single-precision p + + + +

+lzz_pXFactoring + + +routines for factoring polynomials over zz_p; +provides the same functionality as class ZZ_pX, +but for single-precision p + + + +

+matrix + + +template class for +dynamic-size 2-dimensional arrays + + +

+mat_GF2 + + +class mat_GF2: matrices over GF(2); +includes basic matrix arithmetic operations, +including determinant calculation, matrix inversion, +solving nonsingular systems of linear equations, +and Gaussian elimination + + + +

+mat_GF2E + + +class mat_GF2E: matrices over GF2E; +includes basic matrix arithmetic operations, +including determinant calculation, matrix inversion, +solving nonsingular systems of linear equations, +and Gaussian elimination + + + + +

+mat_RR + + +class mat_RR: matrices over RR; +includes basic matrix arithmetic operations, +including determinant calculation, matrix inversion, +and solving nonsingular systems of linear equations. + + + + +

+mat_ZZ + + +class mat_ZZ: matrices over ZZ; +includes basic matrix arithmetic operations, +including determinant calculation, matrix inversion, +and solving nonsingular systems of linear equations. +See also the LLL module for additional routines. + + + + +

+mat_ZZ_p + + +class mat_ZZ_p: matrices over ZZ_p; +includes basic matrix arithmetic operations, +including determinant calculation, matrix inversion, +solving nonsingular systems of linear equations, +and Gaussian elimination + + + +

+mat_ZZ_pE + + +class mat_ZZ_pE: matrices over ZZ_pE; +includes basic matrix arithmetic operations, +including determinant calculation, matrix inversion, +solving nonsingular systems of linear equations, +and Gaussian elimination + + + + + +

+mat_lzz_p + + +class mat_zz_p: matrices over zz_p; +includes basic matrix arithmetic operations, +including determinant calculation, matrix inversion, +solving nonsingular systems of linear equations, +and Gaussian elimination + + +

+mat_lzz_pE + + +class mat_zz_pE: matrices over zz_pE; +includes basic matrix arithmetic operations, +including determinant calculation, matrix inversion, +solving nonsingular systems of linear equations, +and Gaussian elimination + + + + +

+mat_poly_ZZ + + +routine for computing the characteristic +polynomial of a mat_ZZ + + + + +

+mat_poly_ZZ_p + + +routine for computing the characteristic +polynomial of a mat_ZZ_p + + + + +

+mat_poly_lzz_p + + +routine for computing the characteristic +polynomial of a mat_zz_p + + + + + + +

+pair + + +template class for +pairs + + + + +

+quad_float + + +class quad_float: quadruple-precision floating point numbers. + + + + +

+tools + + +some basic types and utility routines, including the +timing function GetTime(), and several overloaded +versions of min() and max() + + + +

+vector + + +template class for +dynamic-size vectors + + +

+vec_GF2 + + +class vec_GF2: vectors over GF(2), with arithmetic + + +

+vec_GF2E + + +class vec_GF2E: vectors over GF2E, with arithmetic + + + +

+vec_RR + + +class vec_RR: vectors over RR, with arithmetic + + + + +

+vec_ZZ + + +class vec_ZZ: vectors over ZZ, with arithmetic + + + + +

+vec_ZZ_p + + +class vec_ZZ_p: vectors over ZZ_p, with arithmetic + + +

+vec_ZZ_pE + + +class vec_ZZ_pE: vectors over ZZ_pE, with arithmetic + + + + +

+vec_lzz_p + + +class vec_zz_p: vectors over zz_p, with arithmetic + + + +

+vec_lzz_pE + + +class vec_zz_pE: vectors over zz_pE, with arithmetic + + + +

+version + + +macros defining the NTL version number + + + + +

+xdouble + + +class xdouble: double-precision floating point numbers with +extended exponent range. + +
+ +

+ +

+Some other types +

+ + +

+In addition to the above, other generic vectors are declared, +not explicitly documented elsewhere: +

+ +

    +
  • vec_GF2XVec +
  • vec_ZZVec +
  • vec_double +
  • vec_long +
  • vec_quad_float +
  • vec_ulong +
  • vec_vec_GF2 +
  • vec_vec_GF2E +
  • vec_vec_RR +
  • vec_vec_ZZ +
  • vec_vec_ZZ_p +
  • vec_vec_ZZ_pE +
  • vec_vec_long +
  • vec_vec_lzz_p +
  • vec_vec_lzz_pE +
  • vec_vec_ulong +
  • vec_xdouble +
+ +

+ +These decalarations are found in ".h" files with +corresponding names. +These header files simply provide typedefs for the +corresponding template types, mainly for +backward compatibility, +e.g., +vec_double is a typedef for +Vec<double>, +and vec_vec_RR is a typedef for +Vec< Vec<RR> >. +No additional functionality is provided. + +

+All of the header files for polynomial classes ZZ_pX, +ZZX, etc., declare typedefs for the corresponding +vectors of polynomials vec_ZZ_pX, +vec_ZZX, etc. + +

+There are also a number of generic pair classes defined, +not explicitly documented elsewhere: + +

+ +

    +
  • pair_GF2EX_long +
  • pair_GF2X_long +
  • pair_ZZX_long +
  • pair_ZZ_pEX_long +
  • pair_ZZ_pX_long +
  • pair_lzz_pEX_long +
  • pair_lzz_pX_long +
+ +

+ +These decalarations are found in ".h" files with +corresponding names. +Again, these files mainly exist for backward compatibilty, +and provide typedefs for the corresponding template types, +e.g., pair_GF2EX_long is a typedef for +Pair<GF2EX,long>. +These files also give typedefs for the corresponding vector types, +e.g., +vec_pair_GF2EX_long is a typedef for +Vec< Pair<GF2EX,long> >. +No additional functionality is provided. + + +

+ +

+[Previous] + [Up] + [Next] +
+ + diff --git a/thirdparty/linux/ntl/doc/tour-roadmap.html b/thirdparty/linux/ntl/doc/tour-roadmap.html new file mode 100644 index 0000000000..d4e854eec1 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-roadmap.html @@ -0,0 +1,185 @@ + + + +A Tour of NTL: NTL past, present, and future + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: NTL past, present, and future +

+

+ +


+ +

+Some History +

+ +

+ +Work on NTL started around 1990, when I wanted to implement some new +algorithms for factoring polynomials over finite fields. +I found that none of the available software was adequate for +this task, mainly because the code for polynomial arithmetic in +the available software was too slow. +So I wrote my own. +My starting point was Arjen Lenstra's LIP package for long integer +arithmetic, which was written in C. +It soon became clear that using C++ instead of C +would be much more productive and less prone to errors, +mainly because of C++'s constructors and destructors +which allow memory management to be automated. +Using C++ has other benefits as well, like function +and operator overloading, which makes for more readable code. + +

+One of the basic design principles of LIP was portability. +I adopted this principle for NTL as well, for a number of reasons, +not the least of which was that my computing environment +kept changing whenever I changed jobs. +Achieving portability is getting easier as standards, +like IEEE floating point, get widely adopted, and as the definition of +and implementations of the +C++ language stabilize. + + +

+Since 1990, NTL has evolved in many ways, +and it now provides a fairly polished and well-rounded programming interface. + +

+When I started working on NTL, there really were not that many +good, portable long integer packages around. +Besides LIP, there was the BSD Unix MP library. +The first version of GMP was released in the early 1990's. +At that point in time, LIP seemed like the best starting point. +LIP remains a reasonable long integer package, but in recent years, +GMP has really become quite good: it seems well supported on +many platforms, and is typically much faster than LIP. + +

+I've now re-structured NTL so that one can use +either 'traditional' LIP or GMP as the long integer package. + +

+ +

+The Future of NTL +

+ +

+ + +As you can well imagine, there is potentially no end to algorithms one +could implement. +That is why I have to stop somewhere. +I think NTL has reached a point where it provides a reasonably +well-rounded suite of algorithms for basic problems. +I plan to continue supporting NTL, fixing bugs and improving performance. + +

+While I don't have time to add significant new functionality to NTL, +there seems to be an ever-growing number of NTL users +out there, and I encourage them to make their code available to +others. +These might be in the form of NTL "add ons", but there is the +possibility of integrating +new functionality or algorithmic improvements into NTL itself. + + +

Wish list

+These are a few things I wish others could perhaps contribute to +NTL. +I'd be happy to discuss and assist with any design and integration issues, +or any other ideas for improvement. +I'd also be happy to discuss ideas for making NTL more +open to make it easier for others to contribute. + +
    +

  • +Support for +bivariate polynomial arithmetic, including GCDs, resultants, +and factoring, and for integer and all the various finite field +coefficient rings. + +

  • +Code for elliptic curves, +including an elliptic curve point counting algorithm. + +

  • +Integer factorization algorithms. + +

  • +Implementations of some of the newer lattice basis reduction algorithms. + +

  • +Improvements to the +polynomial multiplication algorithms over ZZ +could be improved. +One specific improvement: the Schoenhage-Strassen algorithm +currently does not incorporate the so-called "square root of two trick". + +

  • +Improvements to zz_pX arithmetic. +For small p, it is likely faster to use +Kronecker-substitution to reduce zz_pX +multiplication to ZZ multiplication. +This is especially true if GMP is used for ZZ arithmetic. +Implementing this should not be too hard, but then one would have +to go through all of the zz_pX code to make all +other operations directly reduce to multiplication in zz_pX. +This will be a bit tedious, but it shouldn't be too difficult, +since one can copy and paste corresponding code from zz_pEX, +where this that already been done. + +

  • +Improvements to some of the RR algorithms. +In particular, the trig, exp, and log functions are currently woefully +inefficient. + +
+ +

Some things I plan to work on

+ +Here are a few things I plan to work on in the near future. + +
    +

  • +Now that NTL is thread safe, it is possible to use multiple cores +within NTL to improve performance. +One possibilty is to utilize multiple cores in the modular +FFT implementation of polynomial multiplication. +Both the FFT (over different small primes) and reduce/CRT +(over different coefficients) steps are trivially parallelizable. + +

  • +Introduce some C++11 features, like "move constructors" +and "move assignment". +This would have to be done with compile-time flags to support +older compilers. + +

  • +If nobody else will do it, I will eventually do the zz_pX +improvements outlined above. + + +
+ + +

+ +

+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-struct.html b/thirdparty/linux/ntl/doc/tour-struct.html new file mode 100644 index 0000000000..cc06afedb6 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-struct.html @@ -0,0 +1,1354 @@ + + + +A Tour of NTL: Programming Interface + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Programming Interface +

+

+ +


+ +In this section, we give a general overview of the +NTL's programming interface. +The following section has links to detailed documentation on +each and every class and function. +

+Note that only those classes and functions documented +in these pages are a part of the "official API": +all other interfaces are subject to change without notice. + + +

+

+

+Basic Ring Classes +

+

+ +The basic ring classes are: +

    +
  • +ZZ: big integers +
  • +ZZ_p: big integers modulo p +
  • +zz_p: integers mod "single precision" p +
  • +GF2: integers mod 2 +
  • +ZZX: univariate polynomials over ZZ +
  • +ZZ_pX: univariate polynomials over ZZ_p +
  • +zz_pX: univariate polynomials over zz_p +
  • +GF2X: polynomials over GF2 +
  • +ZZ_pE: ring/field extension over ZZ_p +
  • +zz_pE: ring/field extension over zz_p +
  • +GF2E: ring/field extension over GF2 +
  • +ZZ_pEX: univariate polynomials over ZZ_pE +
  • +zz_pEX: univariate polynomials over zz_pE +
  • +GF2EX: univariate polynomials over GF2E +
+ +

+All these classes all support basic +arithmetic operators +

+   +, -, (unary) -, +=, -=, ++, --, 
+   *, *=, /, /=, %, %=.
+
+ +

+However, the operations +

+   %, %=
+
+only exist for integer and polynomial classes, and +do not exist +for classes +
+  ZZ_p, zz_p, GF2, ZZ_pE, zz_pE, GF2E.
+
+ +

+The standard equality operators (== and !=) +are provided for each class. +In addition, the class ZZ +supports the usual inequality +operators. + +

+The integer and polynomial classes also support "shift operators" +for left and right shifting. +For polynomial classes, this means multiplication or division +by a power of X. + +

+

+

+Floating Point Classes +

+

+ +In addition to the above ring classes, NTL also provides three +different floating point classes: +

    +
  • +xdouble: "double precision" floating point with +extended exponent range (for very large numbers); +
  • +quad_float: "quasi" quadruple-precision floating point; +
  • +RR: aribitrary precision floating point. +
+ + +

+

+

+Vectors and Matrices +

+

+ +There are also vectors and matrices over +

+   ZZ ZZ_p zz_p GF2 ZZ_pE zz_pE GF2E RR
+
+which support the usual arithmetic operations. + +

+

+

+Functional and Procedural forms +

+

+ +Generally, for any function defined by NTL, there is +a functional form, and a procedural form. +For example: + + + +

+ +   ZZ x, a, n;
+   x = InvMod(a, n);  // functional form
+   InvMod(x, a, n);   // procedural form
+
+

+ + + +

+This example illustrates the normal way these two forms differ +syntactically. +However, there are exceptions. + +First, if there is a operator that can play the role of the +functional form, that is the notation used: + + + +

+ +   ZZ x, a, b;
+   x = a + b;    // functional form
+   add(x, a, b); // procedural form
+
+

+ + + +Second, if the functional form's name would be ambiguous, +the return type is simply appended to its name: + + + +

+ +   ZZ_p x;
+   x = random_ZZ_p();  // functional form
+   random(x);          // procedural form
+
+

+ + + +Third, there are a number of conversion functions (see below), whose name +in procedural form is conv, but whose name in +functional form is conv<T>, where T is the return type: + + + +

+ +   ZZ x;  
+   double a;
+
+   x = conv<ZZ>(a);  // functional form
+   conv(x, a);       // procedural form
+
+

+ + + + + +

+The use of the procedural form may be more efficient, +since it will generally avoid the creation of a temporary object +to store its result. +However, it is generally silly to get too worked up about +such efficiencies, and the functional form is usually preferable +because the resulting code is usually easier to understand. + +

+The above rules governing procedural and functional forms apply +to essentially all of the arithmetic classes supported by NTL, +with the exception of +xdouble and quad_float. +These two classes only support the functional/operator notation +for arithmetic operations (but do support both forms for conversion). + + + + +

+

+

+Conversions and Promotions +

+

+ +As mentioned above, there are numerous explicit conversion routines, +which come in both functional and procedural forms. +A complete list of these can be found in +conversions.txt. +This is the only place these are documented; they do not appear +in the other ".txt" files. + +

+It is worth mentioning here, however, that generic conversion operators +are provided for vectors and matrices, which act component-wise. +For example, since there is a conversion from ZZ to RR, +there is automatically a conversion from +Vec<ZZ> to Vec<RR>. + + + + + +

+ +Even though there are no implicit conversions, users +of NTL can still have most of their benefits. +This is because all of the basic arithmetic operations +(in both their functional and procedural forms), +comparison operators, and assignment are overloaded +to get the effect of automatic "promotions". +For example: + + + +

+ +   ZZ x, a;
+
+   x = a + 1;
+   if (x < 0
+      mul(x, 2, a);
+   else
+      x = -1;
+
+

+ + + +

+ +These promotions are documented in the ".txt" files, +usually using a kind of "short hand" notation. +For example: + + + +

+ +ZZ operator+(const ZZ& a, const ZZ& b);
+
+// PROMOTIONS: operator + promotes long to ZZ on (a, b).
+
+

+ + + +This means that in addition to the declared function, there +are two other functions that are logically equivalent to the following: + + +

+ +ZZ operator+(long a, const ZZ& b) { return ZZ(a) + b; }
+ZZ operator+(const ZZ& a, long b) { return a + ZZ(b); }
+
+

+ + + +

+Note that this is not how NTL actually implements these functions. +It is in generally more efficient to write + + +

+ +   x = y + 2;
+
+

+ + +than it is to write + + +

+ +   x = y + ZZ(2);
+
+

+ + +The former notation avoids the creation and destruction +of a temporary ZZ +object to hold the value 2. + +

+Also, don't have any inhibitions about writing tests like + + +

+ +   if (x == 0) ...
+
+

+ + +and assignments like + + +

+ +   x = 1
+
+

+ + +These are all optimized, and do not execute significaltly slower +than the "lower level" (and much less natural) + + +

+ +   if (IsZero(x)) ...
+
+

+ + +and + + +

+ +   set(x);
+
+

+ + + +

+Some types have even more promotions. +For example, the type ZZ_pX has promotions +from long and ZZ_p. +Thus, the add function for ZZ_pX takes the following +argument types: +

+   (ZZ_pX, ZZ_pX), (ZZ_pX, ZZ_p), (ZZ_pX, long), (ZZ_p, ZZ_pX), (long, ZZ_pX)
+
+Each of these functions effectively converts the argument to be promoted +to a ZZ_pX. + +

+Note that when promoting a pair of arguments, at least one +of the arguments must be of the target type. + +

+I have tried to be very consistent with these promotions so +that one usually won't need to hunt through the documentation. +For a given type, there is a natural, fixed set of types +that promote to it. +Here is the complete list: + + +

+ +   destination  source
+   
+   xdouble      double
+   quad_float   double
+   RR           double
+   ZZ           long
+   ZZ_p         long
+   ZZ_pX        long, ZZ_p
+   zz_p         long
+   zz_pX        long, zz_p
+   ZZX          long, ZZ
+   GF2          long
+   GF2X         long, GF2
+   GF2E         long, GF2
+   GF2EX        long, GF2, GF2E
+   ZZ_pE        long, ZZ_p
+   ZZ_pEX       long, ZZ_p, ZZ_pE
+   zz_pE        long, zz_p
+   zz_pEX       long, zz_p, zz_pE
+
+

+ + + +

+All the promotions are documented, but here +are a few general rules describing the available promotions: + +

    + +
  • +All classes provide explicit constructors for promoted types. +For example, + + +

    + +   ZZ w = ZZ(1);
    +   ZZ x(1);  // allowed
    +   ZZ y{1};  // allowed in C++11
    +   ZZ z = 1// not allowed
    +
    +

    + + + +

  • +Promotions apply uniformly to both procedural and functional +forms, as well as to the corresponding assignment operator forms. +E.g., + + +

    + +   x = x + 2;
    +   add(x, x, 2);
    +   x += 2;
    +
    +

    + + + +

  • +The addition, subtraction, multiplication, equality and comparison +routines always promote both arguments. E.g., + + +

    + +   x = 2 + y;
    +   add(x, 2, y);
    +   if (3 > x || y == 5) ...
    +
    +

    + + + +

  • +The assignment operator always promotes the right-hand side. +E.g., + + +

    + +   x = 2;
    +
    +

    + + + +

  • +For non-integer, non-polynomial types, the division routine +promotes both arguments. +E.g., + + +

    + +   RR x, y, z;
    +      ...
    +   x = 1.0/y;
    +   z = y/2.0;
    +
    +

    + + + +For integer or polynomial types, the division routine +promotes the denominator only. E.g., +

    +   ZZ x, y;
    +      ...
    +   y = x/2;
    +
    + + +
  • +Matrix by scalar and vector by scalar multiplication promote the scalar. +E.g., + + +

    + +   Vec<ZZ> v, w;
    +      ...
    +   v = w*2;
    +   v = 2*w;
    +   v *= 2;
    +
    +

    + + + + +

  • +The monomial constructors for polynomials +and the corresponding SetCoeff routines +promote the coefficient argument. +E.g., + + +

    + +   ZZX f;
    +   f = ZZX(INIT_MONO, 35);  // f == 5*X^3
    +   SetCoeff(f, 02);  // f == 5*x^3 + 2;
    +
    +

    + + + +

  • +In module ZZ, the modular arithmetic routines, as well as +the bit-wise and, or, and xor routines promote their arguments. +There are also several other routines in module ZZ +that have both ZZ and long versions, e.g., +NumBits, bit, weight. +Check the documentation in ZZ.txt +for complete details. + +
+ +

+ + +

+

+

+Some Conversion and Promotion Technicalities +

+

+ +

+Usually, conversions and promotions are semantically equivalent. +There are three exceptions, however. + +

+One exception +is conversion of floating point double to +ZZ. +The safest way to do this is to apply an explicit conversion operator, +and not to rely on promotions. +For example, consider + + +

+ +   ZZ a; double x;
+
+   a = a + x;
+
+

+ + +This is equivialent to + + +

+ +   a = a + long(x);
+
+

+ + +and to + + +

+ +   a = a + ZZ(x);
+
+

+ + +One could also use an explicit conversion function: + + +

+ +   a = a + conv<ZZ>(x);
+
+

+ + +This last version guarantees that there is no loss of precision, +and also guarantees that the floor of x is computed. +With the first version, one may lose precision when x +is converted to a long, and also the direction of truncation +for negative numbers is implementation dependent +(usually truncating towards zero, instead of computing the floor). +

+The second exception is conversion of unsigned int +or unsigned long to ZZ. +Again, the safest way to do this is with an explicit conversion operator. +As above, if one relies on promotions, the unsigned integer +will be first converted to a signed long, which is most +likely not what was intended. +

+The third exception can occur +on 64-bit machines when +converting a signed or unsigned long to one of NTL's +extended precision floating-point types (RR or quad_float). +These types only provide promotions from double, +and converting a long to a double on a 64-bit machine +can lead to a loss of precision. +Again, if one uses the appropriate NTL conversion routine, +no loss of precision will occur. + +

+ +Another pitfall too avoid is initialzing ZZ's +with integer constants that are too big. +Consider the following: + + +

+ +   ZZ x;
+   x = 1234567890123456789012;
+
+

+ + +This integer constant is too big, and this overflow +condition may or may not cause your compiler to give +you a warning or an error. +The easiest way to introduce such large constants into your +program is as follows: + + +

+ +   ZZ x;
+   x = conv<ZZ>("1234567890123456789012");
+
+

+ + +Conversion functions are provided for converting C character strings +to the types ZZ, RR, quad_float, +and xdouble. + +

+One should also be careful when converting to RR. +All of these conversions round to the current working precision, which is +usually, but not always, what one wants. + +

+

+

+Input and Output +

+

+NTL provides input and output operators for all +types, using the usual conventions for input and output streams. +If an input error occurs, the "fail bit" of the input stream +is set, and the input variable remains unchanged. +

+Although conversions from C-style character strings +to the types ZZ, xdouble, quad_float, +and RR are provided, one can always read and write +to C++ character streams using the stringstream +class from the standard library, in conjunction with the input +and output operators provided by NTL. + +

+

+

+Aliasing +

+

+ +An important feature of NTL is that aliasing of input and output +parameters is generally allowed. For example, if you +write mul(x, a, b), then a or b +may alias (have the same address as) x +(or any object that x contains, e.g., scalar/vector +or scalar/polynomial multiplication). + +

+One exception to this rule: +the generic conversions provided for vectors and +matrices assume that their inputs do not alias their outputs. + + +

+

+

+Constructors, Destructors, and Memory Management +

+

+ +NTL generally takes care of managing the space occupied by large, +dynamically sized objects, like objects of class ZZ or any of +NTL's dynamic vectors. +However, it is helpful to understand a little of what is happening behind the scenes. + +

+Almost all classes are implemented as a pointer, and the default constructor +just sets this pointer to 0. +Space is allocated for the object as needed, and when the object's +destructor is called, the space is freed. + +

+Copies are "deep" rather than "shallow". +This means the data itself is copied, and not just a pointer to the data. +If the destination object does not have enough space to hold the source data, +then the space held by the destination object is "grown". +This is done using the C routine realloc(). +Note, however, that if the source object is smaller than the destination +object, the space held by the destination object is retained. +This strategy usually yields reasonable behaviour; +however, one can take explicit control of the situation if necessary, since +almost all NTL classes have a method kill() +which frees all space held by the object, and sets its state to +the default initial state (a value 0 or a zero-length vector). + +

+The only exception to the above is the class +ZZ_pContext, and the analogous classes for zz_p, +ZZ_pE, zz_pE, and GF2E. +These objects are implemented as referenced-counted pointers, +and copies are "shallow". + +

+While we are discussing initialization, there is one technical point +worth mentioning. +It is safe to declare global objects of any NTL type +as long as one uses only the default constructor. +For example, the global declarations + + +

+ +   ZZ global_integer;
+   Vec<ZZ_p> global_vector;
+
+

+ + +should always work, since their initialization only involves +setting a pointer to 0. +However, +one should avoid initializing global objects with +non-default constructors, and should avoid doing anything that would lead to +non-trivial computations with NTL objects +prior to the beginning of the execution of routine main(). +The reasons for this are quite esoteric and can only be appreciated +by a true +C++ afficianado. +Actually, most such initializations and computations probably will work, +but it is somewhat platform dependant. + +

+Normal people usually do none of these things, so all of this +should not matter too much. +There is, however, one possible exception to this. +A programmer might want to have a global constant initialized like this: + + +

+ +   const quad_float Pi = conv<quad_float>("3.1415926535897932384626433832795029");
+
+

+ + +While this probably will work fine on most platforms, +it may not be an entirely portable construction, +since it will involve a non-trivial computation before +execution of main() begins. +A more portable strategy +is to define a function returning a read-only +reference: + + +

+ +   const quad_float& Pi()
+   {
+      static quad_float pi = 
+         conv<quad_float>("3.1415926535897932384626433832795029");
+      return pi;
+   }
+
+

+ + +and then call the function Pi() to get a read-only reference +to this constant value: + + +

+ +   area = Pi()*r*r;
+
+

+ + +The initialization will then take place the first time Pi() +is called, which is presumably after main() starts, +and so everything should work fine. +This is a very simple and general strategy that most C++ +experts recommend using whenever the initialization of a non-global +object requires non-trivial computation. + + + +

+

+

+Residue class rings and modulus switching +

+

+ +NTL provides a number of classes to represent residue class rings: +

+   ZZ_p, zz_p, GF2, ZZ_pE, lzz_pE, GF2E.
+
+For each such class, except GF2, there is a global, current modulus. + +

+We focus on the class ZZ_p, but similar comments apply to the other +residue class types. +For example, for ZZ_p, you can set the current modulus to p +as follows: + + +

+ +   ZZ_p::init(p);
+
+

+ + +The current modulus must be initialized before any operations +on ZZ_p's are performed. The modulus may be changed, and a mechanism is provided +for saving and restoring a modulus. + +

+Here is what you do to save the current modulus, temporarily +set it to p, and automatically restore it: + + + +

+ +   { 
+      ZZ_pPush push(p); 
+
+      ...
+
+   }
+
+

+ + + +The constructor for push will save the current modulus, and install p as the +current modulus. The destructor for push will restore the old modulus when the +scope enclosing it exits. This is the so-called RAII (resource acquisition is +initialization) paradigm. + +

+You could also do the following: + + + +

+   {
+      ZZ_pPush push; // just backup current modulus
+
+        ...
+
+      ZZ_p::init(p1); // install p1
+
+        ...
+
+      ZZ_p::init(p2); // install p2
+
+      // reinstall original modulus at close of scope
+   }
+

+ + + +

+Warning: C++ syntax can be rather unfriendly sometimes. +When using RAII objects like ZZ_pPush, watch out for +the following errors: + + +

+   ZZ_pPush push();  // ERROR: local declaration of a function!!
+   ZZ_pPush(p);      // ERROR: temporary RAII-object created and
+                     //        immediately destroyed!!
+

+ + +Unfortunately, most compilers do not issue any warnings +in these situations. +I have fallen into both traps myself. + +

+The ZZ_pPush interface is good for implementing simple stack-like +"context switching". For more general context switching, +use the class ZZ_pContext: + + +

+   ZZ_p::init(p);     // set current modulus to p
+
+      ...
+
+   ZZ_pContext context;
+   context.save();    // save the current modulus p
+
+      ...
+
+   ZZ_p::init(q);     // set current modulus to q
+
+      ...
+  
+   context.restore(); // restore p as the current modulus
+

+ + +Note that ZZ_pContext's are essentially "smart pointers", +and they may be copied. +Generally speaking, saving, restoring, and copying ZZ_pContext's +are very cheap operations. +Likewise, saving and restoring contexts using ZZ_pPush +objects are very cheap operations. + + +

+It is critical that ZZ_p objects created under one ZZ_p modulus are not used in +any non-trivial way "out of context", i.e., under a different (or undefined) +ZZ_p modulus. However, for ease-of-use, some operations may be safely +performed out of context. These safe operations include: the default and copy +constructor, the destructor, and the assignment operator. In addition it is +generally safe to read any ZZ_p object out of context (i.e., printing it out, or +fetching its underlying representive using the rep() function). + +

+Any unsafe uses out of context are not in general checked, and may +lead to unpredictable behavior. + + + +

+The implementations of Vec<ZZ_p>, Vec<GF2E>, and Vec<GF2> +are specialized to manage memory more +efficiently than in the default implementation of Vec<T>: +

    +

  • +Contiguous elements in a Vec<ZZ_p> are allocated in a contiguous region of +memory. This reduces the number of calls to the memory allocator, and +leads to greater locality of reference. A consequence of +this implementation is that any calls to SetLength on a Vec<ZZ_p> object will +need to use information about the current modulus, and so such calls should +only be done "in context". That said, it is still safe to construct a +Vec<ZZ_p> using the default or copy contructor, and to assign or append one +Vec<ZZ_p> to another "out of context". + +

  • +The same strategy is used for Vec<GF2E>'s. + +

  • +In any case, the above restrictions adhere to the general rules +for safely using residue class ring objects "out of context". + +

  • +Vec<GF2>'s are implemented by packing coefficients (which are just bits) +into words. A mechanism is provided to make indexing these vectors +behave like normal vectors, via a class the mimics ordinary references +to GF2's. +
+ + +

+

+

+Error Handling and Exceptions +

+

+Prior to version 8.0 of NTL, errors were dealt with in a simlple way: +print an error message and abort. + +As of version 8.0, NTL provides error handling with exceptions. +To use this feature, you will need to configure NTL with the +NTL_EXCEPTIONS flag turned on. +You will also need a C++11 compiler. + +

+The exceptions thrown by NTL are either a std::bad_alloc +exception (in case of memory allocation error), +or a class (defined in namespace NTL) +derived from std::runtime_error: +

    +
  • ErrorObjectstd::runtime_error +
    • base class
    +
  • LogicErrorObjectErrorObject +
    • used to indicate a logic error, such as incorrect + function parameters, index out of range, etc.
    +
  • ArithmeticErrorObjectErrorObject +
    • used to indicate an arithmetic error, such as divide by zero
    +
  • ResourceErrorObjectErrorObject +
    • used to indicate an overflow error (e.g., when a number cannot be stored as a long)
    +
  • FileErrorObjectErrorObject +
    • used to indicate a problem opening or closing a file
    +
  • InputErrorObjectErrorObject +
    • used to indicate a problem reading from a stream
    +
+ +

+All of these error objects override the what() +method of std::exception with an appropriate +error message. + +

+There is also a special exception class InvModErrorObject, +which is derived from ArithmeticErrorObject, +and is thrown when a modular inverse computation over ZZ fails +(either directly, or indirectly through PowerMod +computation, or via an inverse computation in ZZ_p). +The InvModErrorObject provides two methods, +get_a() and get_n(), which provide read-only +references to the offending objects a and n +(so GCD(a, n) != 1). + +

+The generic class ErrorObject is not thrown directly +by any NTL routines, except for the legacy function Error, +which is no longer called by any NTL routines. +New functions +

+   MemoryError, LogicError, ArithmeticError, ResourceError, FileError, InputError
+
+are used to throw exceptions derived from ErrorObject. + +

+Issues with GMP: +As of this writing (December 2014), +GMP itself provides only the very crude print-message-then-abort +error handling. +Note that NTL only uses low-level GMP routines (the mpn-level +routines), +and these routines should only abort if they cannot allocate space +for temporary big integers within GMP itself. +So this should only be an issue of you are working with some +very large integers. +The GMP developers are working on improving their error handling. +When that happens, NTL will inherit these improvements. +If you really need proper error handling, and are willing to pay +a certain performance penalty, then you should configure +and build NTL without GMP. + +

+Issues with gf2x: +Similar comments apply to NTL builds that use the gf2x +library. + + +

+

+Exception safety: +I have tried to carefully document exception safety characterstics +for just a few, critical, low-level classes: +vectors and matrices +(vector.txt and +matrix.txt), +smart pointer classes (SmartPtr.txt), +thread-safe lazy initialization classes +(Lazy.txt and +LazyTable.txt). +Otherwise, it is only safe to assume that NTL functions +provide a weak exception-safety guarantee: +if an exception is thrown, the stack unwinding process will +will not leak any resources and will leave all modified objects +in a reasonable state: at least, such objects may be safely +destroyed, and may also be assigned to or reset; +however, they may not necessarily +be safely used as inputs to other functions. +When stronger exception safety is required, you can always +compute results into dynamically allocated objects +pointed to by "smart pointers", +and then move or swap these pointers into place after all computations +have succeeded. +

+As NTL provides swap functions for all its major classes, +and as swap functions have evolved to play a critical role +in writing exception-safe code, they deserve a special mention here: +

    +

  • +For all classes except ZZ, ZZ_p, GF2X, +GF2E, and Vec<T>, the swap function is guaranteed to not throw +any exceptions. +

  • +For ZZ objects that are not elements of a ZZVec, +ZZ_p objects that are not elements of a Vec<ZZ_p>, +GF2X objects that are not elements of a GF2XVec, +and +GF2E objects that are not elements of a Vec<GF2E>, +the swap function is guaranteed to not throw any exceptions. +

  • +For Vec<T> objects whose length has not been fixed, +the swap function is guaranteed to not throw any exceptions. +

  • +For the remaining cases, the swap function provides a strong exception-safety +guarantee (the operation either succeeds, or throws and leaves data unchanged). +
+These rules are unfortunatley a bit complicated, due to NTL's historical +legacy, and to its special memory management of +ZZVec, +Vec<ZZ_p>, +GF2XVec, +and +Vec<GF2E> +types. + + + + + + + +

+ +

+[Previous] + [Up] + [Next] +
+ + + + diff --git a/thirdparty/linux/ntl/doc/tour-time.html b/thirdparty/linux/ntl/doc/tour-time.html new file mode 100644 index 0000000000..24b921b384 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-time.html @@ -0,0 +1,117 @@ + + + +A Tour of NTL: Some Performance Data + + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Some Performance Data +

+

+ +


+ +Here are some timing figures from using NTL. +They were obtained using NTL 9.9.0 compiled with g++ 4.8.5 +and with GMP 6.1 on a 2.3GHz Intel Haswell processor +(E5-2698 v3) running Linux. + +

+All times are ins seconds. +The times were obtained using the program Timing +included in the distribution. +The data was generated using NTL's random number generator, +but running this on a different machine should (in theory) +generate the same data. + +

+NOTE: that the PRG changed in v9.4.0, so there may be +some inconsistencies. + +

+

+
+multiply 1000-bit ints: 1.76284e-07
+remainder 2000/1000-bit ints: 3.60535e-07
+gcd 1000-bit ints: 2.87045e-06
+multiply degree-1000 poly mod 1000-bit prime: 0.00432981
+remainder degree-2000/1000 poly mod 1000-bit prime: 0.0125583
+preconditioned remainder degree-2000/1000 poly mod 1000-bit prime: 0.00443356
+gcd degree-1000 poly mod 1000-bit prime: 0.122722
+multiply degree-1000 int poly with 1000-bit coeffs: 0.00812543
+
+factoring degree-1000 poly mod 1000-bit prime...
+square-free decomposition...0.122685
+factoring multiplicity 1, deg = 1000
+computing X^p...7.23809
+computing DDF...generating baby steps...+++++++++++++++++++++6.21623
+generating giant steps...++++++++++++++++++++++6.49462
+giant refine...++++split 1 1
+split 2 26
+*++++*++++*++++*++++*++*split 0 973
+giant refine time: 4.8139
+baby refine...split 1 1
+split 26 26
+split 973 973
+baby refine time: 3.3e-05
+DDF time: 17.5262
+...total time = 24.8965
+
+multiply 500-bit GF2Xs: 5.54208e-08
+remainder 1000/500-bit GF2Xs: 8.40658e-07
+gcd 500-bit GF2Xs: 3.60963e-06
+
+factoring degree-500 GF2X: 0.00015574
+gcd 500-bit GF2X: 3.61365e-06
+multiply degree-500 poly mod 500-bit GF2X: 0.00251375
+remainder degree-1000/500 poly mod 500-bit GF2X: 0.00905957
+preconditioned remainder degree-1000/500 poly mod 500-bit GF2X: 0.00505149
+gcd degree-500 poly mod 500-bit GF2X: 0.0478557
+
+factoring degree-500 poly mod 500-bit GF2X...
+square-free decomposition...0.004635
+factoring multiplicity 1, deg = 250
+computing X^p...0.488941
+computing DDF...generating baby steps...++++++++++0.332162
+generating giant steps...+++++++++++0.357681
+giant refine...++++split 1 9
+split 2 13
+split 4 44
+*++++split 7 73
+*split 0 111
+giant refine time: 0.233787
+baby refine...split 9 9
+split 13 13
+split 44 44
+split 73 73
+split 111 111
+baby refine time: 0.001275
+DDF time: 0.924938
+
+...total time = 1.41792
+
+
+
+
+ + +

+ + +

+[Previous] + [Up] + [Next] +
+ + + + diff --git a/thirdparty/linux/ntl/doc/tour-tips.html b/thirdparty/linux/ntl/doc/tour-tips.html new file mode 100644 index 0000000000..1fa37a683f --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-tips.html @@ -0,0 +1,132 @@ + + + +A Tour of NTL: Tips for Getting the Best Performance out of NTL + + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Tips for Getting the Best Performance out of NTL +

+

+ +


+ +

    + +

    +

  1. +Make sure you run the configuration wizard when you install NTL. +This is the default behaviour in the makefile +in the Unix distribution, so don't change this; +in the Windows distribution, there is unfortunately no +easy way to run the wizard. + +

    +

  2. +In time-critical code, avoid creating unnecessary temporary +objects. +For example, instead of + + +

    + +ZZ InnerProduct(const ZZ *a, const ZZ *b, long n)
    +{
    +   long i;
    +   ZZ res;
    +   for (i = 0; i < n; i++)
    +      res += a[i] * b[i];
    +   return res;
    +}
    +
    +

    + + +write this as + + +

    + +ZZ InnerProduct(const ZZ *a, const ZZ *b, long n)
    +{
    +   long i;
    +   ZZ res, t;
    +   for (i = 0; i < n; i++) {
    +      mul(t, a[i], b[i]);
    +      add(res, res, t);
    +   }
    +   return res;
    +}
    +
    +

    + + +The first version of InnerProduct +creates and destroys a temporary object, holding the value +a[i]*b[i], in every loop iteration. +The second does not. + +

    +NOTE: actually, for the class ZZ, there is a +special function MulAddTo, with whic one can write +the loop body simply as +

    +   MulAddTo(res, a[i], b[i]);
    +
    + + + + +

    +

  3. +If you use the class ZZ_p, try to avoid switching the modulus +too often, as this can be a rather expensive operation. +If you must switch the modulus often, +use the class ZZ_pContext to save the information +associated with the modulus (see ZZ_p.txt). +The same holds for analogous classes, such as zz_p +and GF2E. + + + +
+ +

+ +

+[Previous] + [Up] + [Next] +
+ + + + diff --git a/thirdparty/linux/ntl/doc/tour-unix.html b/thirdparty/linux/ntl/doc/tour-unix.html new file mode 100644 index 0000000000..15566c03f7 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-unix.html @@ -0,0 +1,546 @@ + + + +A Tour of NTL: Obtaining and Installing NTL for UNIX + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Obtaining and Installing NTL for UNIX +

+

+ +


+ +This procedure should work on most Unix or Unix-like platorms +(including Mac OSX, and Windows with MinGW or Cygwin tools). + +

+ + +To obtain the source code and documentation for NTL, + +download ntl-xxx.tar.gz, +placing it a directory, and then, working in this directory, +do the following. +Here, "xxx" denotes the current version number. + + + +

+

+   % gunzip ntl-xxx.tar.gz
+   % tar xf ntl-xxx.tar
+   % cd ntl-xxx/src
+   % ./configure 
+   % make
+   % make check
+   % make install
+
+ +This will build, test, and install NTL in +/usr/local. +For this to work, GMP must already be installed +(most Unix distributions already come with GMP installed, +but see this page for more +details). +If you really do not want to use GMP, +you can pass the option +NTL_GMP_LIP=off +to configure. + +

+After installation, +you will find the NTL header files in /usr/local/include/NTL + and the compiled binary +in /usr/local/lib/libntl.a +(this is a static library -- if you want a shared +library, see below). +Documentation is in /usr/local/share/doc, +with the main entry-point at /usr/local/share/doc/tour.html. + + + +

+If you want very high-performance for polynomial arithmetic +over GF(2), you may want to consider using the gf2x library. +To do this, gf2x must already be installed. +In addition, you should invoke configure +with the option NTL_GF2X_LIB=on. +This page provides more details. + + + +

+If you want to install NTL install NTL somewhere besides /usr/local, +pass the option PREFIX=/path/to/install/ntl to +configure. +If GMP is installed somewhere besides /usr/local, +pass the optopn +GMP_PREFIX=/path/to/gmp +to configure. +You can also pass +GF2X_PREFIX=/path/to/gf2x +to configure, +if gf2x is installed somewhere besides /usr/local. +As a shorthand, you pass the option +DEF_PREFIX=/path/to/all/software, which will +override the default for PREFIX, +GMP_PREFIX, and GF2X_PREFIX. + + + +

+Now suppose you want to compile a program that uses NTL. +Suppose you are working in some arbitrary directory and foo.c +is your program. +Assume that you have installed NTL in /usr/local as above. +The following should work: +

+   % g++ -g -O2 foo.c -o foo -lntl -lgmp -lm
+
+If you have installed NTL and/or GMP is a non-standard location, +say /path/to/sw, +then: +
+   % g++ -g -O2 -I/path/to/sw/include foo.c -o foo  -L/path/to/sw/lib -lntl -lgmp -lm
+
+If you build NTL with gf2x, just add the option +-lgf2x to the above, right after -lgmp. + +

+If you are working in the NTL src directory itself, +you can just run: +

+   % make foo
+
+to compile a program foo.c, as above. + +

+

+More Details +

+

+What follows is a more detailed description of the installation process. + + + + +

+Step 1. +Extract the source files by executing: +

+   % gunzip ntl-xxx.tar.gz
+   % tar xvf ntl-xxx.tar
+
+ +On most systems, the following shortcut works: +
+   % tar xzvf ntl-xxx.tar.gz
+
+ +

+Note that this will unpack everything into a sub-directory ntl-xxx, +creating this directory if necessary. +Next: +

+   % cd ntl-xxx
+   % ls
+
+You should see a file "README", and directories +"include", "doc", and "src". +The directory "doc" contains all the documentation. +The file "doc/tour.html" contains a copy of the on-line documentation. +The directory "include" +contains all the header files within a subdirectory +"include/NTL". +The directory "src" contains everything else. +Go there now: +
+   % cd src
+
+ +

+Step 2. +Run the configuration script. + +

+Execute the command +

+   % ./configure [ variable=value ]...
+
+ +This configure script generates the file "makefile" and the file +"../include/NTL/config.h", based upon the values assigned to the +variables on the command line. + + +

+ +Here are the most important variables, and their default values. + +

+

+
+CXX=g++              # The C++ compiler
+
+CXXFLAGS=-g -O2      # C++ complilation flags
+
+NATIVE=on            # Compiles code targeted to the current hardware
+
+DEF_PREFIX=/usr/local# Default software directory
+
+PREFIX=$(DEF_PREFIX) # Directory in which to install NTL library components
+SHARED=off           # Generate a shared library (as well as static)
+
+NTL_THREADS=off      # compile in thread-safe mode
+NTL_THREAD_BOOST=off # compile with thread boosting enabled
+NTL_EXCEPTIONS=off   # compile with exceptions enabled
+
+NTL_GMP_LIP=on       # Switch to enable the use of GMP as primary 
+                     #   long integer package
+
+GMP_PREFIX=$(DEF_PREFIX) # Directory in which GMP components are installed
+
+NTL_GF2X_LIB=off     # Switch to enable the use of the gf2x package
+                     #   for faster arithmetic GF(2)[X]
+
+GF2X_PREFIX=$(DEF_PREFIX) # Directory in which gf2x components are installed
+
+
+
+ +

+Examples. +

+ +

    +
  • +If you are happy with all the default values, run: +
    +   % ./configure
    +
    +Actually, the initially installed makefile and config.h files +already reflect the default values, and you do not have to even run +the configure script. + +

    +

  • +If your C++ compilers is called CC, run: +
    +   % ./configure CXX=CC
    +
    + +

    +

  • +If you want to use, say, the options -g, -O3, +compiling C++, run: +
    +   % ./configure "CXXFLAGS=-g -O3"
    +
    + + +

    +If GMP was installed in + $HOME/sw, +run: +

    +   % ./configure GMP_PREFIX=$HOME/sw
    +
    +Go here for complete details. + + +

    +

  • +If gf2x +is installed in a standard system directory, and you want to use it +to obtain better performance for polynomial arithemtic over GF(2), +run: +
    +   % ./configure NTL_GF2X_LIB=on
    +
    +If gf2x was installed in + $HOME/sw, +run: +
    +   % ./configure NTL_GF2X_LIB=on GF2X_PREFIX=$HOME/sw
    +
    +Go here for complete details. + +

    +

  • +If you want to install NTL in the directory $HOME/sw, +run: +
    +   % ./configure PREFIX=$HOME/sw
    +
    +
+ +

+There are a number of more esoteric configuration variables that can be set. +See config.txt for a complete +description. + +

+Note that many of these configuration options can also be set +by editing the two files makefile +and ../include/NTL/config.h by hand. +These files are fairly simple and well documented, and so this is not +too hard to do. + +

+Note that the file "../include/NTL/def_config.h" +contains a backup copy of the original config.h file, +and that the file "def_makefile" +contains a backup copy of the original makefile file. + + + + +

+Step 3. +Execute make. + +

+Just type: +

+   % make
+
+ +

+The build process after this point is fully automatic. +But here is a description of what happens. + +

+ +

    +
  1. +The makefile +builds the file "../include/NTL/mach_desc.h", which defines some machine characteristics +such as word size and machine precision. +This is done by compiling and running a C program +called MakeDesc +that figures out these characteristics on its +own, and prints some diagnostics to the terminal. + +

    +

  2. +Several scripts are run to obtain more information +about your system (e.g., +to find a timing function, a "getpid" function, +and to detect if things like Intel AVX intrinsics work). + + +

    +

  3. +The file "../include/NTL/gmp_aux.h" +is generated for use with GMP. +If not using GMP, this file is still created, but it is empty. + + +

    +

  4. +The configuration wizard script is run. +This script works in a sub-directory, +compiling several programs, +and performing a number of timing experiments, +in order to determine the optimal setting for a number of flags +in the file ../include/NTL/config.h. +When the script finishes (it may take several minutes), +you will be told what the wizard thinks are the best settings, +and your config.h file will be automatically updated. +Note that any flags you set in Step 2 +will be in +effect while the wizard runs, and will be retained in the updated +config.h file, with the exception of the flags +
    +   NTL_LONG_LONG NTL_AVOID_FLOAT NTL_TBL_REM NTL_TBL_REM_LL NTL_AVOID_BRANCHING 
    +   NTL_SPMM_ULL NTL_SPMM_ASM NTL_GF2X_NOINLINE NTL_GF2X_ALTCODE
    +   NTL_GF2X_ALTCODE1 NTL_FFT_LAZYMUL NTL_FFT_BIGTAB NTL_PCLMUL
    +
    +which are set by the wizard. +Also note that if you do not want the wizard to run, +you should pass WIZARD=off to the configure script; +however, this is not recommended. + +

    +

  5. +The makefile will compile all the source files, +and then creates the library "ntl.a" in the current directory. +
+ +

+Note that for finer control you can optionally break up this process +into the five +component steps: +

+   % make setup1
+   % make setup2
+   % make setup3
+   % make setup4
+   % make ntl.a
+
+ + +

+ +

+After NTL is built. + +

+Executing make check runs a series of timing and test programs. +It is a good idea to run this to see if everything really +went well. + +

+Executing make install +copies a number of files to a directory <prefix> that you +specify by passing PREFIX=<prefix> +as an argument to configure at configuration time, +or as an argument to make install at installation time +(e.g., make install PREFIX=/path/to/sw). +The default is /usr/local, so either you need root +permissions, or you choose a <prefix> for which +you have write permission. +The files ../include/NTL/* are copied into +<prefix>/include/NTL. +The file ntl.a is copied to <prefix>/lib/libntl.a. +The files ../doc/* are copied into +<prefix>/share/doc/NTL. + +

+You can also "fine tune" the installation procedure further. +See the configure documentation for details. + +

+Executing make uninstall undoes make install. + + +

+Executing make clobber essentially +undoes make. +Make sure you do this if you re-build NTL for a different architecture! + +

+Executing make clean will remove object files, but not +ntl.a. +To rebuild after executing make clean, execute make ntl.a. + + +

+

+ +Building a Shared Library + +

+

+ + +By default, the above installation procedure builds +a static library only. +Static libraries are nice because the procedures for building +and using them are nearly identical across various flavors +of Unix. +However, static libraries have their drawbacks, and sometimes +it is desirable to build a shared library. +This can be done (in theory) by simply passing SHARED=on to +NTL's configure. + +

+If you set SHARED=on, then behind the scenes, +the procedure used by the makefile changes a bit. +In particular, the magical GNU program libtool is used +to deal with all idiosyncracies of shared libraries. +You may need to set the +configuration variable LIBTOOL, to point to another version of +libtool. +For example, on Mac OSX, the built-in command called libtool +is not +actually the GNU libtool program; +in this case, you will want to set +LIBTOOL=glibtool. +On other systems, it may be necssary to download and +install a fresh copy of the libtool program (which can be obtained from +here). +Note that if SHARED=on, then +in addition to using the libtool program, the makefile +relies on +features specific to GNU make. + +

+Note that if you want to build NTL as a shared library, +then if you use them, GMP and gf2x must also +be built and installed as shared libraries. +Also note that to use a shared library version of NTL, +you may have to do something special, like set a special +shell variable: the output generated by the libtool +program during make install should give specific +instructions. +In addition, if NTL is built as a shared library, then +you typically do not have to include -lgmp (if using GMP), +or -lgf2x (if using gf2x), or corresponding +-L flags, or -lm +on +the command line when compiling programs that use NTL. + + +

+

+ +32-bit and 64-bit ABIs + +

+

+ + +An ABI (Application Binary Interface) defines the sizes of various +C data types. +Typically, with a 32-bit ABI, int's and long's are 32 bits, +while on a 64-bit ABI, int's are 32 bits and long's are 64 bits. +Some platforms +support both 64-bit and 32-bit ABI's; +typically in such settings, the 64-bit ABI will yield much better +performance, while the 32-bit ABI is available for backward compatibility. +In addition, the 64-bit ABI may not be the default: +if you are using gcc, you need to pass the -m64 +flag to the compiler to get the 64-bit ABI. + +

+When compiling NTL, you may want to try running configure +with CFLAGS="-O2 -m64" to force a 64-bit ABI -- this +may yield a very marked performance improvement. + +

+If you are using NTL with either the GMP or gf2x libraries, +then these must be built with the same ABI as NTL. +The installation script for GMP will typically select the 64-bit ABI +automatically if it is available. +The installation script for gf2x may need some hints. + +

+When compiling programs that use NTL, you must also ensure that +the program is compiled with the same ABI as NTL. +Again, if you want a 64-bit ABI, then just pass the flag -m64 +to the compiler. + + + + + +

+

+ +

+[Previous] + [Up] + [Next] +
+ + + diff --git a/thirdparty/linux/ntl/doc/tour-win.html b/thirdparty/linux/ntl/doc/tour-win.html new file mode 100644 index 0000000000..e857a3c18b --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour-win.html @@ -0,0 +1,287 @@ + + + +A Tour of NTL: Obtaining and Installing NTL for Windows +and other Platforms + + +
+[Previous] + [Up] + [Next] +
+ +

+

+A Tour of NTL: Obtaining and Installing NTL for Windows +and other Platforms +

+

+ +


+ +The WinNTL distribution of +NTL can be used on any Windows platform +(Windows 95 or later). +Actually, there is nothing Windows-specific about WinNTL. +The source code is identical to the UNIX NTL distribution; +only the packaging is slightly different, and no assumptions +are made about the program development environment. +Thus, it should be possible to install WinNTL on +other operating systems +with little difficulty. + +

+MAC OSX Users: +since MAC OSX is essentially just a (rather funny) flavor of Unix, +you will be much better served using the +Unix distribution. + +

+Windows Users: +you should consider using a Unix emulation environment like +Cygwin +or +MinGW, +instead of +Microsoft development tools. +

+Why? +

    +
  • +These environments use gcc, which generally adheres closer to language +standards and produces more efficient code that Microsoft's +compiler. +

  • +With these environments, you can use NTL's +Unix distribution, +and the installation is almost entirely automatic: +no pointing and clicking -- not much more +than ./configure and make. +You can also easily to install GMP +and run NTL's performance-tuning Wizard. +These factors combined can make a hige difference in performance, +easily giving you a huge (10x or more) performance improvement. +

  • +On 64-bit machines, you should definitely consider Cygwin: +the 64-bit version of Cygwin gives you an +LP64 data model, +which for many reasons is preferable to the Windows data model. +In particular, you will get the most performance out of NTL +in this environment. +
+ + + + +

+The remaining instructions on this page only apply +if you do not use a Unix emulation environment +like Cygwin or MinGW. + +

+If you really want to get the most out of NTL, please stop, +and seriously consider using a Unix emulation environment +and +NTL's +Unix distribution. +Your code will be much snappier, and your quality of life +will be much better. + +

+You have been warned. + +

+ +Obtaining and unpacking NTL. + + +

+ +To obtain the source code and documentation for NTL, + +download WinNTL-xxx.zip. +Here, "xxx" is the current version number. +Then unzip this file into a directory (folder). +This will unpack everything into a directory called "WinNTL-xxx". +Inside this directory, you will find several directories. + +

+ +

    +
  • + +The directory "doc" contains all of NTL's documentation, +including this "tour" ("tour.html") +and the ".txt" files explaining each module, +which can be accessed directly, or through the "tour". + +

    +

  • + +The directory "src" +contains all of the source files for the library, +all with ".cpp" extensions. + +

    +

  • + +The directory "include" contains a single directory called +"NTL", +and the latter contains all of the ".h" files. + +
+ +

+ + +Platform dependent macros. + +

+ +In directory "include/NTL" there is a file called +"mach_desc.h", +which contains all of the platform-dependent macro definitions. +The default settings should be correct for any +system running Windows; however, the correct definitions +can depend on the compiler and run-time environment. +Therefore, to be on the safe side, +you might consider compiling and running the program MakeDesc, +whose source files are in directory "MakeDesc". +This program will +dynamically build +a correct "mach_desc.h" for your platform (processor, compiler, +run-time environment). +To get accurate results, +you must compile this program using the level of optimization +(or higher) that you will use for NTL. +The program will print some diagnostics to the screen, +and create the file "mach_desc.h" (in the current +directory, and not in the "include/NTL" directory, +where it needs to go). + +

+ +Configuration flags. + +

+ +Also in directory "include/NTL" is a file called "config.h". +You can edit this file to override some of NTL's default options +for basic configuration and performance. +Note that the file "def_config.h" +contains a backup copy of the original config.h file. + + +

+ +Test programs. + +

+ +The directory "tests" contains several test programs. +For each program FooTest, there is a source file +"FooTest.cpp", and optionally two files +"FooTestIn" and "FooTestOut". +If the latter exist, then the program should be run with +the "FooTestIn" as standard input; +correct output (printed to standard output) should match +the contents of "FooTestOut" exactly; +note that these programs also print diagnostic output on the screen +(through standard error output). + +

+ +Timing functions. + +

+ +The directory "GetTime" contains several alternative +definitions of the GetTime() function. +The file "GetTime.cpp" in the "src" directory should be OK, +but your compiler might like one of the definitions in +the directory "GetTime" better. + +

+ +Other tools. + +

+ +The directory "misc" contains a program newnames.cpp +to help make the +transition to NTL version 3.5 from earlier versions of NTL. +See the changes section for more details. +It also contains the programs gen_lip_gmp_aux.cpp +and gen_gmp_aux.cpp that automatically generate +the auxilliary files needed when using NTL with GMP. +You will have to look at the makefile in the Unix distribution +to see how to use these. + + + +

+ +Compiling NTL. + +

+ +Since there are a number of incompatible compilers and program development +environments available for Windows, no attempt has been made to +provide automatic tools for building and testing, +as is done for the Unix distribution. +Nevertheless, +it should be straightforward to install NTL (even if it involves a bit of +pointing and clicking). +First, compile all of the files in "src", and create a static library. +Make sure the compiler knows where to find NTL's include files +(directory "include" and not "include/NTL") +Then, to compile a program using the library, +make sure the compiler knows about the library and the directory +of NTL's include files. +In any case, if you want to do any serious computations, +you will certainly want to compile everything with your +compiler's code optimizer on. + +

+Here a a link to a video showing how NTL can be built using a Microsoft compiler. +To me, it looks very complicated: I recommend MinGW or Cygwin. + +

+Further remarks. + +

+ +

+TIP: When writing programs using NTL, +you should include files using the syntax +

+   #include <NTL/ZZ.h>
+
+and not using a backslash ("\") as a delimiter. + +

+TIP: When writing windows applications using NTL +(as opposed to console applications) you might want to compile +your program with the NTL_NO_MIN_MAX macro defined. +This suppresses the declaration of several min +and max functions in file tools.h that conflict +with macro names in the MFC header files. +Do not attempt to build the library with this macro defined -- only +programs that use the library. +Another solution is to define the macro NOMINMAX, which will +tell the Microsoft compiler to not define min/max macros. + + + +

+ +

+[Previous] + [Up] + [Next] +
+ + + + diff --git a/thirdparty/linux/ntl/doc/tour.html b/thirdparty/linux/ntl/doc/tour.html new file mode 100644 index 0000000000..a44ff27ec6 --- /dev/null +++ b/thirdparty/linux/ntl/doc/tour.html @@ -0,0 +1,113 @@ + + + +A Tour of NTL + + +

+

+A Tour of NTL +

+

+ +

+ +

+


+

+Table of Contents +

+ +

+

    + +
  1. + +Introduction + + +
  2. + +Examples + + +
  3. + +Programming Interface + +(Read this to get an overall view on NTL's interfaces +and conventions) + +
  4. + +Summary of NTL's Main Modules + +(This contains links to detailed documentation of all +classes and functions) + +
  5. + +Obtaining and Installing NTL for UNIX + + +
  6. + +Obtaining and Installing NTL for Windows and other Platforms + + +
  7. + +Tips for Getting the Best Performance out of NTL + + +
  8. + +NTL Implementation and Portability + + +
  9. + +Using NTL with GMP + + +
  10. + +Using NTL with the gf2x library + + +
  11. + +Some Performance Data + + +
  12. + +NTL past, present, and future + + +
  13. + +Summary of Changes + + +
  14. + +Acknowledgements + + +
+ +

+


+ + +Back to NTL page + +

+ +Back to Victor Shoup's home page + + + + + diff --git a/thirdparty/linux/ntl/doc/vec_GF2.cpp.html b/thirdparty/linux/ntl/doc/vec_GF2.cpp.html new file mode 100644 index 0000000000..5d2e73c175 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_GF2.cpp.html @@ -0,0 +1,242 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vec_GF2.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: vec_GF2
+
+SUMMARY:
+
+
+The class Vec<GF2> is explicitly specialized.
+It behaves much like a generic Vec<T> (see vector.txt),
+but there are some differences.
+
+For efficiency, elements of a Vec<GF2> are "packed" into a word.
+You can still use subscript notation v[i] or v(i).
+For const vectors, these evaluate to values of type const GF2.
+For non-const vectors, these evaluate to values of the
+special type ref_GF2, which is defined in the GF2 header file.
+
+There are implicit conversions from ref_GF2 to const GF2
+and from GF2& to ref_GF2.  Therefore, if you want to declare
+a function that takes a non-const reference to a GF2, you
+should declare the parameter of type ref_GF2: this will
+allow you to pass variables of type GF2 as well as
+elements of vec_GF2's obtained through indexing.
+
+As an alternative, one can use the get and put methods below to access
+vector elements.
+
+There is one subtle but important difference in the semantics
+of Vec<GF2> and that of generic NTL vectors.  With a Vec<GF2>, whenever its
+length is increased (via SetLength), the "new" bits are always 0.
+For example, if v.length() == 20, then
+
+   v.SetLength(10); v.setLength(20);
+
+will effectively clear bits 10..19 of v.
+This is quite different from the semantics of generic NTL vectors, where
+the above sequence would not change the value of v at all.
+One has to be aware of this difference, but it will not matter
+in most ordinary circumstances.
+
+
+\**************************************************************************/
+
+
+
+template<>
+class Vec<GF2> {
+
+public:
+
+   Vec(); // 0 length vector
+   Vec(INIT_SIZE_TYPE, long n); // initialize to length n
+                                // usage: Vec(INIT_SIZE, n)
+
+   Vec(const Vec<GF2>& a); // copy constructor
+   Vec& operator=(const Vec<GF2>& a); // assignment
+   ~Vec(); // destructor
+
+   void SetLength(long n); // set length to n bits
+   void SetLength(long n, GF2 a);
+      // set length to n, if length increases, initialize new bits to a
+
+   void SetMaxLength(long n); // allocate space for n bits
+
+   long length() const; // current length, in bits
+
+   long MaxLength() const; // maximum length, i.e., the maximum
+                           // value passed to either SetLength or SetMaxLength
+                           // since creation or last kill
+
+   long allocated() const; // number of bits for which space is allocated;
+                           // if n <= v.allocated(), then v.SetLength(n)
+                           // will not result in any memory re-allocation.
+
+   // INVARIANT:
+   //    length() <= MaxLength() <= allocated() < 2^(NTL_BITS_PER_LONG-4)
+
+
+   void FixLength(long n); // fix length to n bits
+   // can only be applied after default initialization or kill
+
+
+   void FixAtCurrentLength();
+   // fixes the length at the cuurent length and prohibits
+   // all future length changes.  
+
+   // It is required that length() == MaxLength() when called.
+
+   // EXCEPTIONS: if length() != MaxLength() and error is raised;
+   // Strong ES.
+
+
+   long fixed() const; // test if length has been fixed
+
+   void kill(); // free space and make length 0
+
+   const GF2 get(long i) const; // fetch value at index i (indexing from 0)
+
+   void put(long i, GF2 a); // write value a to index i (indexing from 0)
+   void put(long i, long a);
+
+// Here are the subscripting operators, defined using the
+// "helper" class ref_GF2
+
+   ref_GF2 operator[](long i);
+   ref_GF2 operator()(long i);
+
+   const GF2 operator[](long i) const;
+   const GF2 operator()(long i) const;
+
+
+   void swap(Vec<GF2>& y);
+   // swap with y (fast: just swaps pointers)
+
+   void append(GF2 a);
+   // append a to end of vector
+
+   void append(const Vec<GF2>& w);
+   // append w to end of vector
+
+
+// Some partial STL compatibility...also used
+// to interface with the Matrix template class
+
+   typedef GF2 value_type;
+   typedef ref_GF2 reference;
+   typedef const GF2 const_reference;
+
+
+
+};
+
+
+
+void swap(Vec<GF2>& x, Vec<GF2>& y);
+// swap x and y (fast pointer swap)
+
+void append(Vec<GF2>& v, GF2 a);
+// append a to v
+
+void append(Vec<GF2>& v, const Vec<GF2>& a);
+// append a to v
+
+// equality operators:
+
+long operator==(const Vec<GF2>& a, const Vec<GF2>& b);
+long operator!=(const Vec<GF2>& a, const Vec<GF2>& b);
+
+
+// I/O operators:
+
+ostream& operator<<(ostream& s, const Vec<GF2>& a);
+istream& operator>>(istream& s, Vec<GF2>& a);
+
+// The I/O format is [a_0 a_1 ... a_{n-1}], where each a_i is "0" or "1".
+// On input, the a_i may be arbitrary integers, which are reduced mod 2.
+
+
+
+typedef Vec<GF2> vec_GF2;  // backward compatibility
+
+// utility routines:
+
+void clear(vec_GF2& x); // clear all bits--length unchanged
+long IsZero(const vec_GF2& a); // test if all bits are zero
+
+void shift(vec_GF2& x, const vec_GF2& a, long n);
+vec_GF2 shift(const vec_GF2& a, long n);
+// x = a shifted n places, where n may be positive or negative.
+// Generally, x[i] = a[i-n], so positive n shifts to a higher index.
+// The length of x is set to the length of a, and bits
+// are zero-filled or discarded as necessary.
+
+void reverse(vec_GF2& x, const vec_GF2& a); // c = a reversed
+vec_GF2 reverse(const vec_GF2& a);
+
+long weight(const vec_GF2& a); // return number of 1 bits in a
+
+void random(vec_GF2& x, long n);  // x = random vector of length n
+vec_GF2 random_vec_GF2(long n);
+
+
+// arithmetic operations over GF(2):
+
+void add(vec_GF2& x, const vec_GF2& a, const vec_GF2& b);
+void sub(vec_GF2& x, const vec_GF2& a, const vec_GF2& b);
+void negate(vec_GF2& x, const vec_GF2& a);
+
+void mul(vec_GF2& x, const vec_GF2& a, GF2 b);
+void mul(vec_GF2& x, const vec_GF2& a, long b);
+
+void mul(vec_GF2& x, GF2 a, const vec_GF2& b);
+void mul(vec_GF2& x, long a, const vec_GF2& b);
+// x = a * b
+
+void InnerProduct(ref_GF2 x, const vec_GF2& a, const vec_GF2& b);
+// vectors may differ in length
+
+void VectorCopy(vec_GF2& x, const vec_GF2& a, long n);
+vec_GF2 VectorCopy(const vec_GF2& a, long n);
+// x = a copy of a of length exactly n.
+// The input is truncated or padded with zeroes, as necessary.
+
+
+
+// arithmetic operator notation:
+
+vec_GF2 operator+(const vec_GF2& a, const vec_GF2& b);
+vec_GF2 operator-(const vec_GF2& a, const vec_GF2& b);
+vec_GF2 operator-(const vec_GF2& a);
+
+// scalar mul:
+
+vec_GF2 operator*(const vec_GF2& a, GF2 b);
+vec_GF2 operator*(const vec_GF2& a, long b);
+
+vec_GF2 operator*(GF2 a, const vec_GF2& b);
+vec_GF2 operator*(long a, const vec_GF2& b);
+
+// inner product:
+
+inline GF2 operator*(const vec_GF2& a, const vec_GF2& b);
+
+// assignment operator notation:
+
+vec_GF2& operator+=(vec_GF2& x, const vec_GF2& a);
+vec_GF2& operator-=(vec_GF2& x, const vec_GF2& a);
+
+vec_GF2& operator*=(vec_GF2& x, GF2 a);
+vec_GF2& operator*=(vec_GF2& x, long a);
+
+
+ diff --git a/thirdparty/linux/ntl/doc/vec_GF2.txt b/thirdparty/linux/ntl/doc/vec_GF2.txt new file mode 100644 index 0000000000..84c63e984a --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_GF2.txt @@ -0,0 +1,232 @@ + +/**************************************************************************\ + +MODULE: vec_GF2 + +SUMMARY: + + +The class Vec is explicitly specialized. +It behaves much like a generic Vec (see vector.txt), +but there are some differences. + +For efficiency, elements of a Vec are "packed" into a word. +You can still use subscript notation v[i] or v(i). +For const vectors, these evaluate to values of type const GF2. +For non-const vectors, these evaluate to values of the +special type ref_GF2, which is defined in the GF2 header file. + +There are implicit conversions from ref_GF2 to const GF2 +and from GF2& to ref_GF2. Therefore, if you want to declare +a function that takes a non-const reference to a GF2, you +should declare the parameter of type ref_GF2: this will +allow you to pass variables of type GF2 as well as +elements of vec_GF2's obtained through indexing. + +As an alternative, one can use the get and put methods below to access +vector elements. + +There is one subtle but important difference in the semantics +of Vec and that of generic NTL vectors. With a Vec, whenever its +length is increased (via SetLength), the "new" bits are always 0. +For example, if v.length() == 20, then + + v.SetLength(10); v.setLength(20); + +will effectively clear bits 10..19 of v. +This is quite different from the semantics of generic NTL vectors, where +the above sequence would not change the value of v at all. +One has to be aware of this difference, but it will not matter +in most ordinary circumstances. + + +\**************************************************************************/ + + + +template<> +class Vec { + +public: + + Vec(); // 0 length vector + Vec(INIT_SIZE_TYPE, long n); // initialize to length n + // usage: Vec(INIT_SIZE, n) + + Vec(const Vec& a); // copy constructor + Vec& operator=(const Vec& a); // assignment + ~Vec(); // destructor + + void SetLength(long n); // set length to n bits + void SetLength(long n, GF2 a); + // set length to n, if length increases, initialize new bits to a + + void SetMaxLength(long n); // allocate space for n bits + + long length() const; // current length, in bits + + long MaxLength() const; // maximum length, i.e., the maximum + // value passed to either SetLength or SetMaxLength + // since creation or last kill + + long allocated() const; // number of bits for which space is allocated; + // if n <= v.allocated(), then v.SetLength(n) + // will not result in any memory re-allocation. + + // INVARIANT: + // length() <= MaxLength() <= allocated() < 2^(NTL_BITS_PER_LONG-4) + + + void FixLength(long n); // fix length to n bits + // can only be applied after default initialization or kill + + + void FixAtCurrentLength(); + // fixes the length at the cuurent length and prohibits + // all future length changes. + + // It is required that length() == MaxLength() when called. + + // EXCEPTIONS: if length() != MaxLength() and error is raised; + // Strong ES. + + + long fixed() const; // test if length has been fixed + + void kill(); // free space and make length 0 + + const GF2 get(long i) const; // fetch value at index i (indexing from 0) + + void put(long i, GF2 a); // write value a to index i (indexing from 0) + void put(long i, long a); + +// Here are the subscripting operators, defined using the +// "helper" class ref_GF2 + + ref_GF2 operator[](long i); + ref_GF2 operator()(long i); + + const GF2 operator[](long i) const; + const GF2 operator()(long i) const; + + + void swap(Vec& y); + // swap with y (fast: just swaps pointers) + + void append(GF2 a); + // append a to end of vector + + void append(const Vec& w); + // append w to end of vector + + +// Some partial STL compatibility...also used +// to interface with the Matrix template class + + typedef GF2 value_type; + typedef ref_GF2 reference; + typedef const GF2 const_reference; + + + +}; + + + +void swap(Vec& x, Vec& y); +// swap x and y (fast pointer swap) + +void append(Vec& v, GF2 a); +// append a to v + +void append(Vec& v, const Vec& a); +// append a to v + +// equality operators: + +long operator==(const Vec& a, const Vec& b); +long operator!=(const Vec& a, const Vec& b); + + +// I/O operators: + +ostream& operator<<(ostream& s, const Vec& a); +istream& operator>>(istream& s, Vec& a); + +// The I/O format is [a_0 a_1 ... a_{n-1}], where each a_i is "0" or "1". +// On input, the a_i may be arbitrary integers, which are reduced mod 2. + + + +typedef Vec vec_GF2; // backward compatibility + +// utility routines: + +void clear(vec_GF2& x); // clear all bits--length unchanged +long IsZero(const vec_GF2& a); // test if all bits are zero + +void shift(vec_GF2& x, const vec_GF2& a, long n); +vec_GF2 shift(const vec_GF2& a, long n); +// x = a shifted n places, where n may be positive or negative. +// Generally, x[i] = a[i-n], so positive n shifts to a higher index. +// The length of x is set to the length of a, and bits +// are zero-filled or discarded as necessary. + +void reverse(vec_GF2& x, const vec_GF2& a); // c = a reversed +vec_GF2 reverse(const vec_GF2& a); + +long weight(const vec_GF2& a); // return number of 1 bits in a + +void random(vec_GF2& x, long n); // x = random vector of length n +vec_GF2 random_vec_GF2(long n); + + +// arithmetic operations over GF(2): + +void add(vec_GF2& x, const vec_GF2& a, const vec_GF2& b); +void sub(vec_GF2& x, const vec_GF2& a, const vec_GF2& b); +void negate(vec_GF2& x, const vec_GF2& a); + +void mul(vec_GF2& x, const vec_GF2& a, GF2 b); +void mul(vec_GF2& x, const vec_GF2& a, long b); + +void mul(vec_GF2& x, GF2 a, const vec_GF2& b); +void mul(vec_GF2& x, long a, const vec_GF2& b); +// x = a * b + +void InnerProduct(ref_GF2 x, const vec_GF2& a, const vec_GF2& b); +// vectors may differ in length + +void VectorCopy(vec_GF2& x, const vec_GF2& a, long n); +vec_GF2 VectorCopy(const vec_GF2& a, long n); +// x = a copy of a of length exactly n. +// The input is truncated or padded with zeroes, as necessary. + + + +// arithmetic operator notation: + +vec_GF2 operator+(const vec_GF2& a, const vec_GF2& b); +vec_GF2 operator-(const vec_GF2& a, const vec_GF2& b); +vec_GF2 operator-(const vec_GF2& a); + +// scalar mul: + +vec_GF2 operator*(const vec_GF2& a, GF2 b); +vec_GF2 operator*(const vec_GF2& a, long b); + +vec_GF2 operator*(GF2 a, const vec_GF2& b); +vec_GF2 operator*(long a, const vec_GF2& b); + +// inner product: + +inline GF2 operator*(const vec_GF2& a, const vec_GF2& b); + +// assignment operator notation: + +vec_GF2& operator+=(vec_GF2& x, const vec_GF2& a); +vec_GF2& operator-=(vec_GF2& x, const vec_GF2& a); + +vec_GF2& operator*=(vec_GF2& x, GF2 a); +vec_GF2& operator*=(vec_GF2& x, long a); + diff --git a/thirdparty/linux/ntl/doc/vec_GF2E.cpp.html b/thirdparty/linux/ntl/doc/vec_GF2E.cpp.html new file mode 100644 index 0000000000..0faa03cdaa --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_GF2E.cpp.html @@ -0,0 +1,112 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vec_GF2E.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: vec_GF2E
+
+SUMMARY:
+
+Provides vectors over GF2E, along with some related operations.
+
+\**************************************************************************/
+
+#include <NTL/GF2E.h>
+#include <NTL/vector.h>
+
+
+typedef Vec<GF2E> vec_GF2E; // backward compatibility
+
+void mul(vec_GF2E& x, const vec_GF2E& a, const GF2E& b);
+void mul(vec_GF2E& x, const vec_GF2E& a, GF2 b);
+void mul(vec_GF2E& x, const vec_GF2E& a, long b);
+
+void mul(vec_GF2E& x, const GF2E& a, const vec_GF2E& b);
+void mul(vec_GF2E& x, GF2 a, const vec_GF2E& b);
+void mul(vec_GF2E& x, long a, const vec_GF2E& b);
+// x = a * b
+
+void add(vec_GF2E& x, const vec_GF2E& a, const vec_GF2E& b);
+// x = a + b
+
+void sub(vec_GF2E& x, const vec_GF2E& a, const vec_GF2E& b);
+// x = a - b = x + a
+
+void negate(vec_GF2E& x, const vec_GF2E& a);
+// x = - a = a
+
+void clear(vec_GF2E& x);
+// x = 0 (length unchanged)
+
+long IsZero(const vec_GF2E& a);
+// test if a is the zero vector
+
+
+
+void InnerProduct(GF2E& x, const vec_GF2E& a, const vec_GF2E& b);
+// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(), b.length())
+
+void InnerProduct(GF2E& x, const vec_GF2E& a, const vec_GF2E& b,
+                  long offset);
+// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(),
+// b.length()+offset)
+
+void VectorCopy(vec_GF2E& x, const vec_GF2E& a, long n);
+vec_GF2E VectorCopy(const vec_GF2E& a, long n);
+// x = a copy of a of length exactly n.
+// The input is truncated or padded with zeroes, as necessary.
+
+
+
+// operator notation:
+
+vec_GF2E
+operator+(const vec_GF2E& a, const vec_GF2E& b);
+
+vec_GF2E
+operator-(const vec_GF2E& a, const vec_GF2E& b);
+
+vec_GF2E operator-(const vec_GF2E& a);
+
+
+// vector/scalar multiplication:
+
+vec_GF2E operator*(const vec_GF2E& a, const GF2E& b);
+vec_GF2E operator*(const vec_GF2E& a, GF2 b);
+vec_GF2E operator*(const vec_GF2E& a, long b);
+
+vec_GF2E operator*(const GF2E& a, const vec_GF2E& b);
+vec_GF2E operator*(GF2 a, const vec_GF2E& b);
+vec_GF2E operator*(long a, const vec_GF2E& b);
+
+// inner product:
+
+GF2E operator*(const vec_GF2E& a, const vec_GF2E& b);
+
+
+// assignment operator notation:
+
+vec_GF2E& operator+=(vec_GF2E& x, const vec_GF2E& a);
+vec_GF2E& operator-=(vec_GF2E& x, const vec_GF2E& a);
+
+vec_GF2E& operator*=(vec_GF2E& x, const GF2E& a);
+vec_GF2E& operator*=(vec_GF2E& x, GF2 a);
+vec_GF2E& operator*=(vec_GF2E& x, long a);
+
+
+
+// Implementation note: the BlockConstruct routine has been customized
+// for GF2E so that when a vec_GF2E is grown, space for the needed
+// elements is allocated in one contiguous chunk.  This saves on calls to
+// malloc and free, and should also yield better locality of reference.
+// One consequence of this is that swapping an element of a vec_GF2E
+// with another GF2E can not be implemented by pointer swap, and will in
+// this case be done by copy.
+
+ diff --git a/thirdparty/linux/ntl/doc/vec_GF2E.txt b/thirdparty/linux/ntl/doc/vec_GF2E.txt new file mode 100644 index 0000000000..7b17120e06 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_GF2E.txt @@ -0,0 +1,102 @@ + +/**************************************************************************\ + +MODULE: vec_GF2E + +SUMMARY: + +Provides vectors over GF2E, along with some related operations. + +\**************************************************************************/ + +#include +#include + + +typedef Vec vec_GF2E; // backward compatibility + +void mul(vec_GF2E& x, const vec_GF2E& a, const GF2E& b); +void mul(vec_GF2E& x, const vec_GF2E& a, GF2 b); +void mul(vec_GF2E& x, const vec_GF2E& a, long b); + +void mul(vec_GF2E& x, const GF2E& a, const vec_GF2E& b); +void mul(vec_GF2E& x, GF2 a, const vec_GF2E& b); +void mul(vec_GF2E& x, long a, const vec_GF2E& b); +// x = a * b + +void add(vec_GF2E& x, const vec_GF2E& a, const vec_GF2E& b); +// x = a + b + +void sub(vec_GF2E& x, const vec_GF2E& a, const vec_GF2E& b); +// x = a - b = x + a + +void negate(vec_GF2E& x, const vec_GF2E& a); +// x = - a = a + +void clear(vec_GF2E& x); +// x = 0 (length unchanged) + +long IsZero(const vec_GF2E& a); +// test if a is the zero vector + + + +void InnerProduct(GF2E& x, const vec_GF2E& a, const vec_GF2E& b); +// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(), b.length()) + +void InnerProduct(GF2E& x, const vec_GF2E& a, const vec_GF2E& b, + long offset); +// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(), +// b.length()+offset) + +void VectorCopy(vec_GF2E& x, const vec_GF2E& a, long n); +vec_GF2E VectorCopy(const vec_GF2E& a, long n); +// x = a copy of a of length exactly n. +// The input is truncated or padded with zeroes, as necessary. + + + +// operator notation: + +vec_GF2E +operator+(const vec_GF2E& a, const vec_GF2E& b); + +vec_GF2E +operator-(const vec_GF2E& a, const vec_GF2E& b); + +vec_GF2E operator-(const vec_GF2E& a); + + +// vector/scalar multiplication: + +vec_GF2E operator*(const vec_GF2E& a, const GF2E& b); +vec_GF2E operator*(const vec_GF2E& a, GF2 b); +vec_GF2E operator*(const vec_GF2E& a, long b); + +vec_GF2E operator*(const GF2E& a, const vec_GF2E& b); +vec_GF2E operator*(GF2 a, const vec_GF2E& b); +vec_GF2E operator*(long a, const vec_GF2E& b); + +// inner product: + +GF2E operator*(const vec_GF2E& a, const vec_GF2E& b); + + +// assignment operator notation: + +vec_GF2E& operator+=(vec_GF2E& x, const vec_GF2E& a); +vec_GF2E& operator-=(vec_GF2E& x, const vec_GF2E& a); + +vec_GF2E& operator*=(vec_GF2E& x, const GF2E& a); +vec_GF2E& operator*=(vec_GF2E& x, GF2 a); +vec_GF2E& operator*=(vec_GF2E& x, long a); + + + +// Implementation note: the BlockConstruct routine has been customized +// for GF2E so that when a vec_GF2E is grown, space for the needed +// elements is allocated in one contiguous chunk. This saves on calls to +// malloc and free, and should also yield better locality of reference. +// One consequence of this is that swapping an element of a vec_GF2E +// with another GF2E can not be implemented by pointer swap, and will in +// this case be done by copy. diff --git a/thirdparty/linux/ntl/doc/vec_RR.cpp.html b/thirdparty/linux/ntl/doc/vec_RR.cpp.html new file mode 100644 index 0000000000..0562cabc60 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_RR.cpp.html @@ -0,0 +1,88 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vec_RR.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: vec_RR
+
+SUMMARY:
+
+Defines the class vec_RR.
+
+\**************************************************************************/
+
+
+typedef Vec<RR> vec_RR; // backward compatibility
+
+void mul(vec_RR& x, const vec_RR& a, const RR& b);
+void mul(vec_RR& x, const vec_RR& a, double b);
+
+void mul(vec_RR& x, const RR& a, const vec_RR& b);
+void mul(vec_RR& x, double a, const vec_RR& b);
+// x = a * b
+
+
+void add(vec_RR& x, const vec_RR& a, const vec_RR& b);
+// x = a + b
+
+void sub(vec_RR& x, const vec_RR& a, const vec_RR& b);
+// x = a - b
+
+void clear(vec_RR& x);
+// x = 0 (length unchanged)
+
+void negate(vec_RR& x, const vec_RR& a);
+// x = -a
+
+long IsZero(const vec_RR& a);
+// test if a is the zero vector
+
+
+void InnerProduct(RR& x, const vec_RR& a, const vec_RR& b);
+// x = inner product of a and b, padded with zeros to make the lengths
+// even.
+
+void VectorCopy(vec_RR& x, const vec_RR& a, long n);
+vec_RR VectorCopy(const vec_RR& a, long n);
+// x = a copy of a of length exactly n.
+// The input is truncated or padded with zeroes, as necessary.
+
+
+// operator notation:
+
+vec_RR operator+(const vec_RR& a, const vec_RR& b);
+vec_RR operator-(const vec_RR& a, const vec_RR& b);
+
+vec_RR operator-(const vec_RR& a);
+
+
+// vector/scalar multiplication:
+
+vec_RR operator*(const vec_RR& a, const RR& b);
+vec_RR operator*(const vec_RR& a, double b);
+
+vec_RR operator*(const RR& a, const vec_RR& b);
+vec_RR operator*(double a, const vec_RR& b);
+
+// inner product:
+
+RR operator*(const vec_RR& a, const vec_RR& b);
+
+
+// assignment operator notation:
+
+vec_RR& operator+=(vec_RR& x, const vec_RR& a);
+vec_RR& operator-=(vec_RR& x, const vec_RR& a);
+
+vec_RR& operator*=(vec_RR& x, const RR& a);
+vec_RR& operator*=(vec_RR& x, double a);
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/vec_RR.txt b/thirdparty/linux/ntl/doc/vec_RR.txt new file mode 100644 index 0000000000..9cf273573e --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_RR.txt @@ -0,0 +1,78 @@ + +/**************************************************************************\ + +MODULE: vec_RR + +SUMMARY: + +Defines the class vec_RR. + +\**************************************************************************/ + + +typedef Vec vec_RR; // backward compatibility + +void mul(vec_RR& x, const vec_RR& a, const RR& b); +void mul(vec_RR& x, const vec_RR& a, double b); + +void mul(vec_RR& x, const RR& a, const vec_RR& b); +void mul(vec_RR& x, double a, const vec_RR& b); +// x = a * b + + +void add(vec_RR& x, const vec_RR& a, const vec_RR& b); +// x = a + b + +void sub(vec_RR& x, const vec_RR& a, const vec_RR& b); +// x = a - b + +void clear(vec_RR& x); +// x = 0 (length unchanged) + +void negate(vec_RR& x, const vec_RR& a); +// x = -a + +long IsZero(const vec_RR& a); +// test if a is the zero vector + + +void InnerProduct(RR& x, const vec_RR& a, const vec_RR& b); +// x = inner product of a and b, padded with zeros to make the lengths +// even. + +void VectorCopy(vec_RR& x, const vec_RR& a, long n); +vec_RR VectorCopy(const vec_RR& a, long n); +// x = a copy of a of length exactly n. +// The input is truncated or padded with zeroes, as necessary. + + +// operator notation: + +vec_RR operator+(const vec_RR& a, const vec_RR& b); +vec_RR operator-(const vec_RR& a, const vec_RR& b); + +vec_RR operator-(const vec_RR& a); + + +// vector/scalar multiplication: + +vec_RR operator*(const vec_RR& a, const RR& b); +vec_RR operator*(const vec_RR& a, double b); + +vec_RR operator*(const RR& a, const vec_RR& b); +vec_RR operator*(double a, const vec_RR& b); + +// inner product: + +RR operator*(const vec_RR& a, const vec_RR& b); + + +// assignment operator notation: + +vec_RR& operator+=(vec_RR& x, const vec_RR& a); +vec_RR& operator-=(vec_RR& x, const vec_RR& a); + +vec_RR& operator*=(vec_RR& x, const RR& a); +vec_RR& operator*=(vec_RR& x, double a); + + diff --git a/thirdparty/linux/ntl/doc/vec_ZZ.cpp.html b/thirdparty/linux/ntl/doc/vec_ZZ.cpp.html new file mode 100644 index 0000000000..31eb2a67f6 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_ZZ.cpp.html @@ -0,0 +1,87 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vec_ZZ.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: vec_ZZ
+
+SUMMARY:
+
+Defines the class vec_ZZ.
+
+\**************************************************************************/
+
+
+typedef Vec<ZZ> vec_ZZ; // backward compatibility
+
+void mul(vec_ZZ& x, const vec_ZZ& a, const ZZ& b);
+void mul(vec_ZZ& x, const vec_ZZ& a, long b);
+
+void mul(vec_ZZ& x, const ZZ& a, const vec_ZZ& b);
+void mul(vec_ZZ& x, long a, const vec_ZZ& b);
+// x = a * b
+
+void add(vec_ZZ& x, const vec_ZZ& a, const vec_ZZ& b);
+// x = a + b
+
+void sub(vec_ZZ& x, const vec_ZZ& a, const vec_ZZ& b);
+// x = a - b
+
+void clear(vec_ZZ& x);
+// x = 0 (length unchanged)
+
+void negate(vec_ZZ& x, const vec_ZZ& a);
+// x = -a
+
+long IsZero(const vec_ZZ& a);
+// test if a is the zero vector
+
+void InnerProduct(ZZ& x, const vec_ZZ& a, const vec_ZZ& b);
+// x = inner product of a and b, padded with zeros to make the lengths
+// even.
+
+void VectorCopy(vec_ZZ& x, const vec_ZZ& a, long n);
+vec_ZZ VectorCopy(const vec_ZZ& a, long n);
+// x = a copy of a of length exactly n.
+// The input is truncated or padded with zeroes, as necessary.
+
+
+// operator notation:
+
+vec_ZZ operator+(const vec_ZZ& a, const vec_ZZ& b);
+vec_ZZ operator-(const vec_ZZ& a, const vec_ZZ& b);
+
+vec_ZZ operator-(const vec_ZZ& a);
+
+
+// vector/scalar multiplication:
+
+vec_ZZ operator*(const vec_ZZ& a, const ZZ& b);
+vec_ZZ operator*(const vec_ZZ& a, long b);
+
+vec_ZZ operator*(const ZZ& a, const vec_ZZ& b);
+vec_ZZ operator*(long a, const vec_ZZ& b);
+
+// inner product:
+
+ZZ operator*(const vec_ZZ& a, const vec_ZZ& b);
+
+
+
+// assignment operator notation:
+
+vec_ZZ& operator+=(vec_ZZ& x, const vec_ZZ& a);
+vec_ZZ& operator-=(vec_ZZ& x, const vec_ZZ& a);
+
+vec_ZZ& operator*=(vec_ZZ& x, const ZZ& a);
+vec_ZZ& operator*=(vec_ZZ& x, long a);
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/vec_ZZ.txt b/thirdparty/linux/ntl/doc/vec_ZZ.txt new file mode 100644 index 0000000000..19b427f321 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_ZZ.txt @@ -0,0 +1,77 @@ + +/**************************************************************************\ + +MODULE: vec_ZZ + +SUMMARY: + +Defines the class vec_ZZ. + +\**************************************************************************/ + + +typedef Vec vec_ZZ; // backward compatibility + +void mul(vec_ZZ& x, const vec_ZZ& a, const ZZ& b); +void mul(vec_ZZ& x, const vec_ZZ& a, long b); + +void mul(vec_ZZ& x, const ZZ& a, const vec_ZZ& b); +void mul(vec_ZZ& x, long a, const vec_ZZ& b); +// x = a * b + +void add(vec_ZZ& x, const vec_ZZ& a, const vec_ZZ& b); +// x = a + b + +void sub(vec_ZZ& x, const vec_ZZ& a, const vec_ZZ& b); +// x = a - b + +void clear(vec_ZZ& x); +// x = 0 (length unchanged) + +void negate(vec_ZZ& x, const vec_ZZ& a); +// x = -a + +long IsZero(const vec_ZZ& a); +// test if a is the zero vector + +void InnerProduct(ZZ& x, const vec_ZZ& a, const vec_ZZ& b); +// x = inner product of a and b, padded with zeros to make the lengths +// even. + +void VectorCopy(vec_ZZ& x, const vec_ZZ& a, long n); +vec_ZZ VectorCopy(const vec_ZZ& a, long n); +// x = a copy of a of length exactly n. +// The input is truncated or padded with zeroes, as necessary. + + +// operator notation: + +vec_ZZ operator+(const vec_ZZ& a, const vec_ZZ& b); +vec_ZZ operator-(const vec_ZZ& a, const vec_ZZ& b); + +vec_ZZ operator-(const vec_ZZ& a); + + +// vector/scalar multiplication: + +vec_ZZ operator*(const vec_ZZ& a, const ZZ& b); +vec_ZZ operator*(const vec_ZZ& a, long b); + +vec_ZZ operator*(const ZZ& a, const vec_ZZ& b); +vec_ZZ operator*(long a, const vec_ZZ& b); + +// inner product: + +ZZ operator*(const vec_ZZ& a, const vec_ZZ& b); + + + +// assignment operator notation: + +vec_ZZ& operator+=(vec_ZZ& x, const vec_ZZ& a); +vec_ZZ& operator-=(vec_ZZ& x, const vec_ZZ& a); + +vec_ZZ& operator*=(vec_ZZ& x, const ZZ& a); +vec_ZZ& operator*=(vec_ZZ& x, long a); + + diff --git a/thirdparty/linux/ntl/doc/vec_ZZ_p.cpp.html b/thirdparty/linux/ntl/doc/vec_ZZ_p.cpp.html new file mode 100644 index 0000000000..b5fe1b2965 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_ZZ_p.cpp.html @@ -0,0 +1,107 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vec_ZZ_p.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: vec_ZZ_p
+
+SUMMARY:
+
+Provides vectors over ZZ_p, along with some related operations.
+
+\**************************************************************************/
+
+#include <NTL/ZZ_p.h>
+#include <NTL/vec_ZZ.h>
+#include <NTL/vector.h>
+
+
+typedef Vec<ZZ_p> vec_ZZ_p; // backward compatibility
+
+void mul(vec_ZZ_p& x, const vec_ZZ_p& a, const ZZ_p& b);
+void mul(vec_ZZ_p& x, const vec_ZZ_p& a, long b);
+
+void mul(vec_ZZ_p& x, const ZZ_p& a, const vec_ZZ_p& b);
+void mul(vec_ZZ_p& x, long a, const vec_ZZ_p& b);
+// x = a * b
+
+void add(vec_ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b);
+// x = a + b
+
+void sub(vec_ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b);
+// x = a - b
+
+void clear(vec_ZZ_p& x);
+// x = 0 (length unchanged)
+
+void negate(vec_ZZ_p& x, const vec_ZZ_p& a);
+// x = -a
+
+long IsZero(const vec_ZZ_p& a);
+// test if a is the zero vector
+
+
+void InnerProduct(ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b);
+// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(),
+// b.length())
+
+void InnerProduct(ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b,
+                  long offset);
+// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(),
+// b.length()+offset)
+
+void VectorCopy(vec_ZZ_p& x, const vec_ZZ_p& a, long n);
+vec_ZZ_p VectorCopy(const vec_ZZ_p& a, long n);
+// x = a copy of a of length exactly n.
+// The input is truncated or padded with zeroes, as necessary.
+
+
+
+
+
+// operator notation:
+
+vec_ZZ_p operator+(const vec_ZZ_p& a, const vec_ZZ_p& b);
+vec_ZZ_p operator-(const vec_ZZ_p& a, const vec_ZZ_p& b);
+
+vec_ZZ_p operator-(const vec_ZZ_p& a);
+
+
+// vector/scalar multiplication:
+
+vec_ZZ_p operator*(const vec_ZZ_p& a, const ZZ_p& b);
+vec_ZZ_p operator*(const vec_ZZ_p& a, long b);
+
+vec_ZZ_p operator*(const ZZ_p& a, const vec_ZZ_p& b);
+vec_ZZ_p operator*(long a, const vec_ZZ_p& b);
+
+// inner product:
+
+ZZ_p operator*(const vec_ZZ_p& a, const vec_ZZ_p& b);
+
+
+// assignment operator notation:
+
+vec_ZZ_p& operator+=(vec_ZZ_p& x, const vec_ZZ_p& a);
+vec_ZZ_p& operator-=(vec_ZZ_p& x, const vec_ZZ_p& a);
+
+vec_ZZ_p& operator*=(vec_ZZ_p& x, const ZZ_p& a);
+vec_ZZ_p& operator*=(vec_ZZ_p& x, long a);
+
+
+
+// Implementation note: the BlockConstruct routine has been customized
+// for ZZ_p so that when a vec_ZZ_p is grown, space for the needed
+// elements is allocated in one contiguous chunk.  This saves on calls to
+// malloc and free, and should also yield better locality of reference.
+// One connsequence of this is that swapping an element of a vec_ZZ_p
+// with another ZZ_p can not be implemented by pointer swap, and will in
+// this case be done by copy.
+
+ diff --git a/thirdparty/linux/ntl/doc/vec_ZZ_p.txt b/thirdparty/linux/ntl/doc/vec_ZZ_p.txt new file mode 100644 index 0000000000..f0830a9e5e --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_ZZ_p.txt @@ -0,0 +1,97 @@ + +/**************************************************************************\ + +MODULE: vec_ZZ_p + +SUMMARY: + +Provides vectors over ZZ_p, along with some related operations. + +\**************************************************************************/ + +#include +#include +#include + + +typedef Vec vec_ZZ_p; // backward compatibility + +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, const ZZ_p& b); +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, long b); + +void mul(vec_ZZ_p& x, const ZZ_p& a, const vec_ZZ_p& b); +void mul(vec_ZZ_p& x, long a, const vec_ZZ_p& b); +// x = a * b + +void add(vec_ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b); +// x = a + b + +void sub(vec_ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b); +// x = a - b + +void clear(vec_ZZ_p& x); +// x = 0 (length unchanged) + +void negate(vec_ZZ_p& x, const vec_ZZ_p& a); +// x = -a + +long IsZero(const vec_ZZ_p& a); +// test if a is the zero vector + + +void InnerProduct(ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b); +// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(), +// b.length()) + +void InnerProduct(ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b, + long offset); +// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(), +// b.length()+offset) + +void VectorCopy(vec_ZZ_p& x, const vec_ZZ_p& a, long n); +vec_ZZ_p VectorCopy(const vec_ZZ_p& a, long n); +// x = a copy of a of length exactly n. +// The input is truncated or padded with zeroes, as necessary. + + + + + +// operator notation: + +vec_ZZ_p operator+(const vec_ZZ_p& a, const vec_ZZ_p& b); +vec_ZZ_p operator-(const vec_ZZ_p& a, const vec_ZZ_p& b); + +vec_ZZ_p operator-(const vec_ZZ_p& a); + + +// vector/scalar multiplication: + +vec_ZZ_p operator*(const vec_ZZ_p& a, const ZZ_p& b); +vec_ZZ_p operator*(const vec_ZZ_p& a, long b); + +vec_ZZ_p operator*(const ZZ_p& a, const vec_ZZ_p& b); +vec_ZZ_p operator*(long a, const vec_ZZ_p& b); + +// inner product: + +ZZ_p operator*(const vec_ZZ_p& a, const vec_ZZ_p& b); + + +// assignment operator notation: + +vec_ZZ_p& operator+=(vec_ZZ_p& x, const vec_ZZ_p& a); +vec_ZZ_p& operator-=(vec_ZZ_p& x, const vec_ZZ_p& a); + +vec_ZZ_p& operator*=(vec_ZZ_p& x, const ZZ_p& a); +vec_ZZ_p& operator*=(vec_ZZ_p& x, long a); + + + +// Implementation note: the BlockConstruct routine has been customized +// for ZZ_p so that when a vec_ZZ_p is grown, space for the needed +// elements is allocated in one contiguous chunk. This saves on calls to +// malloc and free, and should also yield better locality of reference. +// One connsequence of this is that swapping an element of a vec_ZZ_p +// with another ZZ_p can not be implemented by pointer swap, and will in +// this case be done by copy. diff --git a/thirdparty/linux/ntl/doc/vec_ZZ_pE.cpp.html b/thirdparty/linux/ntl/doc/vec_ZZ_pE.cpp.html new file mode 100644 index 0000000000..7185b27213 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_ZZ_pE.cpp.html @@ -0,0 +1,102 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vec_ZZ_pE.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: vec_ZZ_pE
+
+SUMMARY:
+
+Provides vectors over ZZ_pE, along with some related operations.
+
+\**************************************************************************/
+
+#include <NTL/ZZ_pE.h>
+#include <NTL/vec_ZZ.h>
+#include <NTL/vector.h>
+
+typedef Vec<ZZ_pE> vec_ZZ_pE; // backward compatibility
+
+void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_pE& b);
+void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_p& b);
+void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, long b);
+
+void mul(vec_ZZ_pE& x, const ZZ_pE& a, const vec_ZZ_pE& b);
+void mul(vec_ZZ_pE& x, const ZZ_p& a, const vec_ZZ_pE& b);
+void mul(vec_ZZ_pE& x, long a, const vec_ZZ_pE& b);
+// x = a * b
+
+void add(vec_ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b);
+// x = a + b
+
+void sub(vec_ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b);
+// x = a - b
+
+void clear(vec_ZZ_pE& x);
+// x = 0 (length unchanged)
+
+void negate(vec_ZZ_pE& x, const vec_ZZ_pE& a);
+// x = -a
+
+long IsZero(const vec_ZZ_pE& a);
+// test if a is the zero vector
+
+
+void InnerProduct(ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b);
+// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(),
+// b.length())
+
+void InnerProduct(ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b,
+                  long offset);
+// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(),
+// b.length()+offset)
+
+void VectorCopy(vec_ZZ_pE& x, const vec_ZZ_pE& a, long n);
+vec_ZZ_pE VectorCopy(const vec_ZZ_pE& a, long n);
+// x = a copy of a of length exactly n.
+// The input is truncated or padded with zeroes, as necessary.
+
+
+
+
+
+// operator notation:
+
+vec_ZZ_pE operator+(const vec_ZZ_pE& a, const vec_ZZ_pE& b);
+vec_ZZ_pE operator-(const vec_ZZ_pE& a, const vec_ZZ_pE& b);
+
+vec_ZZ_pE operator-(const vec_ZZ_pE& a);
+
+
+// vector/scalar multiplication:
+
+vec_ZZ_pE operator*(const vec_ZZ_pE& a, const ZZ_pE& b);
+vec_ZZ_pE operator*(const vec_ZZ_pE& a, const ZZ_p& b);
+vec_ZZ_pE operator*(const vec_ZZ_pE& a, long b);
+
+vec_ZZ_pE operator*(const ZZ_pE& a, const vec_ZZ_pE& b);
+vec_ZZ_pE operator*(const ZZ_p& a, const vec_ZZ_pE& b);
+vec_ZZ_pE operator*(long a, const vec_ZZ_pE& b);
+
+// inner product:
+
+ZZ_pE operator*(const vec_ZZ_pE& a, const vec_ZZ_pE& b);
+
+
+// assignment operator notation:
+
+vec_ZZ_pE& operator+=(vec_ZZ_pE& x, const vec_ZZ_pE& a);
+vec_ZZ_pE& operator-=(vec_ZZ_pE& x, const vec_ZZ_pE& a);
+
+vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const ZZ_pE& a);
+vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const ZZ_p& a);
+vec_ZZ_pE& operator*=(vec_ZZ_pE& x, long a);
+
+
+ diff --git a/thirdparty/linux/ntl/doc/vec_ZZ_pE.txt b/thirdparty/linux/ntl/doc/vec_ZZ_pE.txt new file mode 100644 index 0000000000..205be53c2c --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_ZZ_pE.txt @@ -0,0 +1,92 @@ + +/**************************************************************************\ + +MODULE: vec_ZZ_pE + +SUMMARY: + +Provides vectors over ZZ_pE, along with some related operations. + +\**************************************************************************/ + +#include +#include +#include + +typedef Vec vec_ZZ_pE; // backward compatibility + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_pE& b); +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_p& b); +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, long b); + +void mul(vec_ZZ_pE& x, const ZZ_pE& a, const vec_ZZ_pE& b); +void mul(vec_ZZ_pE& x, const ZZ_p& a, const vec_ZZ_pE& b); +void mul(vec_ZZ_pE& x, long a, const vec_ZZ_pE& b); +// x = a * b + +void add(vec_ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b); +// x = a + b + +void sub(vec_ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b); +// x = a - b + +void clear(vec_ZZ_pE& x); +// x = 0 (length unchanged) + +void negate(vec_ZZ_pE& x, const vec_ZZ_pE& a); +// x = -a + +long IsZero(const vec_ZZ_pE& a); +// test if a is the zero vector + + +void InnerProduct(ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b); +// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(), +// b.length()) + +void InnerProduct(ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b, + long offset); +// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(), +// b.length()+offset) + +void VectorCopy(vec_ZZ_pE& x, const vec_ZZ_pE& a, long n); +vec_ZZ_pE VectorCopy(const vec_ZZ_pE& a, long n); +// x = a copy of a of length exactly n. +// The input is truncated or padded with zeroes, as necessary. + + + + + +// operator notation: + +vec_ZZ_pE operator+(const vec_ZZ_pE& a, const vec_ZZ_pE& b); +vec_ZZ_pE operator-(const vec_ZZ_pE& a, const vec_ZZ_pE& b); + +vec_ZZ_pE operator-(const vec_ZZ_pE& a); + + +// vector/scalar multiplication: + +vec_ZZ_pE operator*(const vec_ZZ_pE& a, const ZZ_pE& b); +vec_ZZ_pE operator*(const vec_ZZ_pE& a, const ZZ_p& b); +vec_ZZ_pE operator*(const vec_ZZ_pE& a, long b); + +vec_ZZ_pE operator*(const ZZ_pE& a, const vec_ZZ_pE& b); +vec_ZZ_pE operator*(const ZZ_p& a, const vec_ZZ_pE& b); +vec_ZZ_pE operator*(long a, const vec_ZZ_pE& b); + +// inner product: + +ZZ_pE operator*(const vec_ZZ_pE& a, const vec_ZZ_pE& b); + + +// assignment operator notation: + +vec_ZZ_pE& operator+=(vec_ZZ_pE& x, const vec_ZZ_pE& a); +vec_ZZ_pE& operator-=(vec_ZZ_pE& x, const vec_ZZ_pE& a); + +vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const ZZ_pE& a); +vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const ZZ_p& a); +vec_ZZ_pE& operator*=(vec_ZZ_pE& x, long a); + diff --git a/thirdparty/linux/ntl/doc/vec_lzz_p.cpp.html b/thirdparty/linux/ntl/doc/vec_lzz_p.cpp.html new file mode 100644 index 0000000000..0961cfb44b --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_lzz_p.cpp.html @@ -0,0 +1,103 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vec_lzz_p.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: vec_zz_p
+
+SUMMARY:
+
+Provides vectors over zz_p, along with some related operations.
+
+\**************************************************************************/
+
+#include "zz_p.h"
+#include "vec_zz.h"
+#include <NTL/vector.h>
+
+typedef Vec<zz_p> vec_zz_p; // backward compatibility
+
+void mul(vec_zz_p& x, const vec_zz_p& a, zz_p b);
+void mul(vec_zz_p& x, const vec_zz_p& a, long b);
+
+void mul(vec_zz_p& x, zz_p a, const vec_zz_p& b);
+void mul(vec_zz_p& x, long a, const vec_zz_p& b);
+// x = a * b
+
+void add(vec_zz_p& x, const vec_zz_p& a, const vec_zz_p& b);
+// x = a + b
+
+void sub(vec_zz_p& x, const vec_zz_p& a, const vec_zz_p& b);
+// x = a - b
+
+void clear(vec_zz_p& x);
+// x = 0 (length unchanged)
+
+void negate(vec_zz_p& x, const vec_zz_p& a);
+// x = -a
+
+long IsZero(const vec_zz_p& a);
+// test if a is the zero vector
+
+void VectorCopy(vec_zz_p& x, const vec_zz_p& a, long n);
+vec_zz_p VectorCopy(const vec_zz_p& a, long n);
+// x = a copy of a of length exactly n.
+// The input is truncated or padded with zeroes, as necessary.
+
+
+
+void InnerProduct(zz_p& x, const vec_zz_p& a, const vec_zz_p& b);
+// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(),
+// b.length())
+
+void InnerProduct(zz_p& x, const vec_zz_p& a, const vec_zz_p& b,
+                  long offset);
+// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(),
+// b.length()+offset)
+
+long CRT(vec_ZZ& a, ZZ& prod, const vec_zz_p& A);
+// Incremental Chinese Remaindering: If p is the current zz_p modulus with
+// (p, prod) = 1; Computes a' such that a' = a mod prod and a' = A mod p,
+// with coefficients in the interval (-p*prod/2, p*prod/2];
+// Sets a := a', prod := p*prod, and returns 1 if a's value changed.
+
+
+// operator notation:
+
+vec_zz_p operator+(const vec_zz_p& a, const vec_zz_p& b);
+vec_zz_p operator-(const vec_zz_p& a, const vec_zz_p& b);
+
+vec_zz_p operator-(const vec_zz_p& a);
+
+
+// vector/scalar multiplication:
+
+vec_zz_p operator*(const vec_zz_p& a, zz_p b);
+vec_zz_p operator*(const vec_zz_p& a, long b);
+
+vec_zz_p operator*(zz_p a, const vec_zz_p& b);
+vec_zz_p operator*(long a, const vec_zz_p& b);
+
+
+// inner product:
+
+zz_p operator*(const vec_zz_p& a, const vec_zz_p& b);
+
+
+
+// assignment operator notation:
+
+vec_zz_p& operator+=(vec_zz_p& x, const vec_zz_p& a);
+vec_zz_p& operator-=(vec_zz_p& x, const vec_zz_p& a);
+
+vec_zz_p& operator*=(vec_zz_p& x, zz_p a);
+vec_zz_p& operator*=(vec_zz_p& x, long a);
+
+
+ diff --git a/thirdparty/linux/ntl/doc/vec_lzz_p.txt b/thirdparty/linux/ntl/doc/vec_lzz_p.txt new file mode 100644 index 0000000000..dd5701967a --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_lzz_p.txt @@ -0,0 +1,93 @@ + +/**************************************************************************\ + +MODULE: vec_zz_p + +SUMMARY: + +Provides vectors over zz_p, along with some related operations. + +\**************************************************************************/ + +#include "zz_p.h" +#include "vec_zz.h" +#include + +typedef Vec vec_zz_p; // backward compatibility + +void mul(vec_zz_p& x, const vec_zz_p& a, zz_p b); +void mul(vec_zz_p& x, const vec_zz_p& a, long b); + +void mul(vec_zz_p& x, zz_p a, const vec_zz_p& b); +void mul(vec_zz_p& x, long a, const vec_zz_p& b); +// x = a * b + +void add(vec_zz_p& x, const vec_zz_p& a, const vec_zz_p& b); +// x = a + b + +void sub(vec_zz_p& x, const vec_zz_p& a, const vec_zz_p& b); +// x = a - b + +void clear(vec_zz_p& x); +// x = 0 (length unchanged) + +void negate(vec_zz_p& x, const vec_zz_p& a); +// x = -a + +long IsZero(const vec_zz_p& a); +// test if a is the zero vector + +void VectorCopy(vec_zz_p& x, const vec_zz_p& a, long n); +vec_zz_p VectorCopy(const vec_zz_p& a, long n); +// x = a copy of a of length exactly n. +// The input is truncated or padded with zeroes, as necessary. + + + +void InnerProduct(zz_p& x, const vec_zz_p& a, const vec_zz_p& b); +// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(), +// b.length()) + +void InnerProduct(zz_p& x, const vec_zz_p& a, const vec_zz_p& b, + long offset); +// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(), +// b.length()+offset) + +long CRT(vec_ZZ& a, ZZ& prod, const vec_zz_p& A); +// Incremental Chinese Remaindering: If p is the current zz_p modulus with +// (p, prod) = 1; Computes a' such that a' = a mod prod and a' = A mod p, +// with coefficients in the interval (-p*prod/2, p*prod/2]; +// Sets a := a', prod := p*prod, and returns 1 if a's value changed. + + +// operator notation: + +vec_zz_p operator+(const vec_zz_p& a, const vec_zz_p& b); +vec_zz_p operator-(const vec_zz_p& a, const vec_zz_p& b); + +vec_zz_p operator-(const vec_zz_p& a); + + +// vector/scalar multiplication: + +vec_zz_p operator*(const vec_zz_p& a, zz_p b); +vec_zz_p operator*(const vec_zz_p& a, long b); + +vec_zz_p operator*(zz_p a, const vec_zz_p& b); +vec_zz_p operator*(long a, const vec_zz_p& b); + + +// inner product: + +zz_p operator*(const vec_zz_p& a, const vec_zz_p& b); + + + +// assignment operator notation: + +vec_zz_p& operator+=(vec_zz_p& x, const vec_zz_p& a); +vec_zz_p& operator-=(vec_zz_p& x, const vec_zz_p& a); + +vec_zz_p& operator*=(vec_zz_p& x, zz_p a); +vec_zz_p& operator*=(vec_zz_p& x, long a); + diff --git a/thirdparty/linux/ntl/doc/vec_lzz_pE.cpp.html b/thirdparty/linux/ntl/doc/vec_lzz_pE.cpp.html new file mode 100644 index 0000000000..e92257f3e3 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_lzz_pE.cpp.html @@ -0,0 +1,102 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vec_lzz_pE.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: vec_zz_pE
+
+SUMMARY:
+
+Provides vectors over zz_pE, along with some related operations.
+
+\**************************************************************************/
+
+#include <NTL/lzz_pE.h>
+#include <NTL/vec_ZZ.h>
+#include <NTL/vector.h>
+
+typedef Vec<zz_pE> vec_zz_pE; // backward compatibility
+
+void mul(vec_zz_pE& x, const vec_zz_pE& a, const zz_pE& b);
+void mul(vec_zz_pE& x, const vec_zz_pE& a, const zz_p& b);
+void mul(vec_zz_pE& x, const vec_zz_pE& a, long b);
+
+void mul(vec_zz_pE& x, const zz_pE& a, const vec_zz_pE& b);
+void mul(vec_zz_pE& x, const zz_p& a, const vec_zz_pE& b);
+void mul(vec_zz_pE& x, long a, const vec_zz_pE& b);
+// x = a * b
+
+void add(vec_zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b);
+// x = a + b
+
+void sub(vec_zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b);
+// x = a - b
+
+void clear(vec_zz_pE& x);
+// x = 0 (length unchanged)
+
+void negate(vec_zz_pE& x, const vec_zz_pE& a);
+// x = -a
+
+long IsZero(const vec_zz_pE& a);
+// test if a is the zero vector
+
+
+void InnerProduct(zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b);
+// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(),
+// b.length())
+
+void InnerProduct(zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b,
+                  long offset);
+// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(),
+// b.length()+offset)
+
+void VectorCopy(vec_zz_pE& x, const vec_zz_pE& a, long n);
+vec_zz_pE VectorCopy(const vec_zz_pE& a, long n);
+// x = a copy of a of length exactly n.
+// The input is truncated or padded with zeroes, as necessary.
+
+
+
+
+
+// operator notation:
+
+vec_zz_pE operator+(const vec_zz_pE& a, const vec_zz_pE& b);
+vec_zz_pE operator-(const vec_zz_pE& a, const vec_zz_pE& b);
+
+vec_zz_pE operator-(const vec_zz_pE& a);
+
+
+// vector/scalar multiplication:
+
+vec_zz_pE operator*(const vec_zz_pE& a, const zz_pE& b);
+vec_zz_pE operator*(const vec_zz_pE& a, const zz_p& b);
+vec_zz_pE operator*(const vec_zz_pE& a, long b);
+
+vec_zz_pE operator*(const zz_pE& a, const vec_zz_pE& b);
+vec_zz_pE operator*(const zz_p& a, const vec_zz_pE& b);
+vec_zz_pE operator*(long a, const vec_zz_pE& b);
+
+// inner product:
+
+zz_pE operator*(const vec_zz_pE& a, const vec_zz_pE& b);
+
+
+// assignment operator notation:
+
+vec_zz_pE& operator+=(vec_zz_pE& x, const vec_zz_pE& a);
+vec_zz_pE& operator-=(vec_zz_pE& x, const vec_zz_pE& a);
+
+vec_zz_pE& operator*=(vec_zz_pE& x, const zz_pE& a);
+vec_zz_pE& operator*=(vec_zz_pE& x, const zz_p& a);
+vec_zz_pE& operator*=(vec_zz_pE& x, long a);
+
+
+ diff --git a/thirdparty/linux/ntl/doc/vec_lzz_pE.txt b/thirdparty/linux/ntl/doc/vec_lzz_pE.txt new file mode 100644 index 0000000000..c0b6a403ae --- /dev/null +++ b/thirdparty/linux/ntl/doc/vec_lzz_pE.txt @@ -0,0 +1,92 @@ + +/**************************************************************************\ + +MODULE: vec_zz_pE + +SUMMARY: + +Provides vectors over zz_pE, along with some related operations. + +\**************************************************************************/ + +#include +#include +#include + +typedef Vec vec_zz_pE; // backward compatibility + +void mul(vec_zz_pE& x, const vec_zz_pE& a, const zz_pE& b); +void mul(vec_zz_pE& x, const vec_zz_pE& a, const zz_p& b); +void mul(vec_zz_pE& x, const vec_zz_pE& a, long b); + +void mul(vec_zz_pE& x, const zz_pE& a, const vec_zz_pE& b); +void mul(vec_zz_pE& x, const zz_p& a, const vec_zz_pE& b); +void mul(vec_zz_pE& x, long a, const vec_zz_pE& b); +// x = a * b + +void add(vec_zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b); +// x = a + b + +void sub(vec_zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b); +// x = a - b + +void clear(vec_zz_pE& x); +// x = 0 (length unchanged) + +void negate(vec_zz_pE& x, const vec_zz_pE& a); +// x = -a + +long IsZero(const vec_zz_pE& a); +// test if a is the zero vector + + +void InnerProduct(zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b); +// x = sum_{i=0}^{n-1} a[i]*b[i], where n = min(a.length(), +// b.length()) + +void InnerProduct(zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b, + long offset); +// x = sum_{i=offset}^{n-1} a[i]*b[i-offset], where n = min(a.length(), +// b.length()+offset) + +void VectorCopy(vec_zz_pE& x, const vec_zz_pE& a, long n); +vec_zz_pE VectorCopy(const vec_zz_pE& a, long n); +// x = a copy of a of length exactly n. +// The input is truncated or padded with zeroes, as necessary. + + + + + +// operator notation: + +vec_zz_pE operator+(const vec_zz_pE& a, const vec_zz_pE& b); +vec_zz_pE operator-(const vec_zz_pE& a, const vec_zz_pE& b); + +vec_zz_pE operator-(const vec_zz_pE& a); + + +// vector/scalar multiplication: + +vec_zz_pE operator*(const vec_zz_pE& a, const zz_pE& b); +vec_zz_pE operator*(const vec_zz_pE& a, const zz_p& b); +vec_zz_pE operator*(const vec_zz_pE& a, long b); + +vec_zz_pE operator*(const zz_pE& a, const vec_zz_pE& b); +vec_zz_pE operator*(const zz_p& a, const vec_zz_pE& b); +vec_zz_pE operator*(long a, const vec_zz_pE& b); + +// inner product: + +zz_pE operator*(const vec_zz_pE& a, const vec_zz_pE& b); + + +// assignment operator notation: + +vec_zz_pE& operator+=(vec_zz_pE& x, const vec_zz_pE& a); +vec_zz_pE& operator-=(vec_zz_pE& x, const vec_zz_pE& a); + +vec_zz_pE& operator*=(vec_zz_pE& x, const zz_pE& a); +vec_zz_pE& operator*=(vec_zz_pE& x, const zz_p& a); +vec_zz_pE& operator*=(vec_zz_pE& x, long a); + diff --git a/thirdparty/linux/ntl/doc/vector.cpp.html b/thirdparty/linux/ntl/doc/vector.cpp.html new file mode 100644 index 0000000000..a35627b98c --- /dev/null +++ b/thirdparty/linux/ntl/doc/vector.cpp.html @@ -0,0 +1,464 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/vector.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: vector
+
+SUMMARY:
+
+Template class for dynamic-sized vectors.
+
+The declaration
+
+   Vec<T> v;
+
+creates a zero-length vector.  To grow this vector to length n,
+execute
+
+   v.SetLength(n)
+
+This causes space to be allocated for (at least) n elements, and also
+causes the delault constructor for T to be called to initialize these
+elements.
+
+The current length of a vector is available as v.length().
+
+The i-th vector element (counting from 0) is accessed as v[i].  If the
+macro NTL_RANGE_CHECK is defined, code is emitted to test if 0 <= i <
+v.length().  This check is not performed by default.
+
+For old-time FORTRAN programmers, the i-th vector element (counting
+from 1) is accessed as v(i).
+
+Let n = v.length().  Calling v.SetLength(m) with m <= n sets the
+current length of v to m (but does not call any destructors or free
+any space).  Calling v.SetLength(m) with m > n will allocate space and
+initialize as necessary, but will leave the values of the already
+allocated elements unchanged (although their addresses may change).
+If T has a user-defined default constructor, that is invoked.
+Otherwise, the new memory locations are "default initialized".
+In particular, this means that POD types may be uninitialized.
+
+v.MaxLength() is the largest value of n for which v.SetLength(n) was invoked,
+and is equal to the number of entries that have been initialized.
+v.SetMaxLength(n) will allocate space for and initialize up to n elements,
+without changing v.length().
+
+When v's destructor is called, all constructed elements will be
+destructed, and all space will be relinquished.
+
+Space is managed using malloc, realloc, and free.  When a vector is
+grown, a bit more space may be allocated than was requested for
+efficiency reasons.
+
+Note that when a vector is grown, the space is reallocated using
+realloc, and thus the addresses of vector elements may change,
+possibly creating dangling references to vector elements.  One has to
+be especially careful of this when using vectors passed as reference
+parameters that may alias one another.
+
+Because realloc is used to grow a vector, the objects stored
+in a vector should be "relocatable"---that is, they shouldn't care
+what their actual address is, which may change over time.
+Most reasonable objects satisfy this constraint.
+
+v.allocated() is the number of elements which have been allocated,
+which may be more than the number elements initialized.
+Note that if n <= v.allocated(), then v.SetLength(n) is guaranteed
+not to cause any memory allocation, or movement of objects.
+
+IMPLEMENTATION DETAILS:
+
+A Vec<T> object is just a pointer to the first element of the array.
+There is a control block immediately before the first element that
+keeps track of several parameters:
+   len    -- the logical length of the array (returned by length())
+   init   -- the number of elements constructed (returned ny MaxLength())
+   alloc  -- the number of elements for which space has been allocated
+             (returned by allocated())
+   fixed  -- flag that indicates that the length is fixed
+             (returned by fixed())
+
+Note that 0 <= len <= init <- alloc
+
+COMPARISON TO STL VECTORS:
+
+When the length of an NTL vector is reduced, no objects are destroyed.
+In contrast, when the length of an STL vector is reduced, objects are
+destroyed (effectively, maintaining the invariant len == init).
+
+When the length of an NTL vector is increased, and the new value of len
+exceeds the current value of alloc, the underying array of objects is
+resized using malloc.  This implies that existing objects are moved using
+a bit-wise copy.  As mentioned above, this means that objects should
+be "relocatable", in the sense that they do not care what their actual
+address is.  Most reasonable objects satisfy this constraint.  An example
+of an object that does not is one that stores in one data member a pointer
+to another data member within the same object.
+
+In contrast, when the length of an STL vector is increased, an new array
+is allocated, and objects from the old array are copied to the new array,
+and then destroyed in the old array.  This obviously is much more expensive
+that NTL's strategy.  However, the new "move semantics", introduced in C++11,
+mitigate this issue somewhat.
+
+Because of NTL's relocatability requirement, it is not recommended to use NTL
+vectors over classes coming from the standard library, which may not satisfy
+the requirement.  In those cases, you could either use an STL vector, or use an
+NTL vector and wrap the suspect classes in an NTL smart pointer of some kind
+(e.g., SmartPtr or CopiedPtr).
+
+Note also that Facebook's open source "folly" library also provides
+a vector class that uses realloc in a manner very similar to NTL's vector class.
+See https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md
+
+
+
+\**************************************************************************/
+
+
+// EXCEPTIONS: all functions below do not throw any exceptions,
+//   except as noted
+
+template<class T>
+class Vec {  
+public:  
+
+   Vec();  // initially length 0
+
+   Vec(const Vec<T>& a);
+   // copy constructor;  uses the assignment operator of T
+   // for copying into locations that have already been initialized,
+   // and uses the copy constructor for T for initializing new locations.
+  
+   // EXCEPTIONS: may throw
+
+   Vec& operator=(const Vec<T>& a);  
+   // assignment;  uses the assignment operator of T
+   // for copying into locations that have already been initialized,
+   // and uses the copy constructor for T for initializing new locations.
+
+   // EXCEPTIONS: weak ES (but if it throws, neither length nor MaxLength
+   //    will change, although some previously initialized elements
+   //    may have been assigned new values).
+
+   ~Vec();  
+   // destructor: calls T's destructor for all initialized
+   // elements in the vector, and then frees the vector itself
+  
+   void SetLength(long n);  
+   // set current length to n, growing vector if necessary
+   // new objects are initialized using the default contructor for T
+
+   // EXCEPTIONS: strong ES (but the vector may have been
+   //    reallocated)
+  
+   void SetLength(long n, const T& a);  
+   // set current length to n, growing vector if necessary
+   // new objects are initialized using the copy contructor for T
+
+   // EXCEPTIONS: strong ES (but the vector may have been
+   //    reallocated)
+
+   template<class F>
+   void SetLengthAndApply(long n, F f);
+   // set current length to n, growing vector if necessary
+   // any new objects are initialized using defauly constructor
+   // for T, and after that, f is applied to each new object x
+   // as f(x).
+
+   // EXCEPTIONS: strong ES (but the vector may have been
+   //    reallocated)
+
+   long length() const;
+   // current length
+  
+   T& operator[](long i);
+   const T& operator[](long i) const;
+   // indexing operation, starting from 0.
+   // The first version is applied to non-const Vec<T>,
+   // and returns a non-const reference to a T, while the second version
+   // is applied to a const Vec<T> and returns a const reference to a T.
+
+   // EXCEPTIONS: may throw if range checking turned on, strong ES
+  
+   T& operator()(long i);
+   const T& operator()(long i) const;
+   // indexing operation, starting from 1
+   // The first version is applied to non-const Vec<T>,
+   // and returns a non-const reference to a T, while the second version
+   // is applied to a const Vec<T> and returns a const reference to a T.
+
+   // EXCEPTIONS: may throw if range checking turned on, strong ES
+  
+   T* elts();
+   const T* elts() const;
+   // returns address of first vector element (or 0 if no space has been
+   // allocated for this vector).  If a vector potentially has length 0, it is
+   // safer to write v.elts() instead of &v[0]: the latter is not well defined
+   // by the C++ standard (although this is likely an academic concern).
+   //
+   // The first version is applied to non-const Vec<T>, and returns a non-const
+   // pointer to a T, while the second version is applied to a const Vec<T> and
+   // returns a const reference to a T.
+
+  
+   void swap(Vec<T>& y);
+   // swap with y (fast: just swaps pointers)
+
+   // EXCEPTIONS: throws if vectors are fixed and lengths do not match, strong ES
+
+   void append(const T& a);
+   // append a to end of vector; uses the assignment operator of T
+   // for copying into locations that have already been initialized,
+   // and uses the copy constructor for T for initializing new locations.
+
+   // EXCEPTIONS: strong ES if initializing a new element (and in any
+   //    case, if an exception throws, length and MaxLength remain
+   //    unchanged).
+
+   void append(const Vec<T>& w);
+   // append w to end of vector; uses the assignment operator of T
+   // for copying into locations that have already been initialized,
+   // and uses the copy constructor for T for initializing new locations.
+
+   // EXCEPTIONS: strong ES if initializing new elements (and in any
+   //    case, if an exception throws, length and MaxLength remain
+   //    unchanged).
+
+
+// Alternative access interface
+
+   const T& get(long i) const;
+   // v.get(i) returns v[i]

+   void put(long i, const T& a);
+   // v.put(i, a) equivalent to v[i] = q
+
+
+
+// Some STL compatibility
+
+   typedef T value_type;
+   typedef value_type& reference;
+   typedef const value_type& const_reference;
+   typedef value_type *iterator;
+   typedef const value_type *const_iterator;
+
+   T* data();
+   const T* data() const;
+   // v.data() same as v.elts()
+
+   T* begin();
+   const T* begin() const;
+   // v.begin() same as v.elts()
+
+   T* end();
+   const T* end() const;
+   // pointer to last element (or NULL)
+
+   T& at(long i);
+   const T& at(long i) const;
+   // indexing with range checking
+
+
+// the remaining member functions are a bit esoteric (skip on first
+// reading)
+
+   Vec(INIT_SIZE_TYPE, long n);
+   // Vec(INIT_SIZE, n) initializes vector with an intial length of n.
+   // new objects are initialized using the default contructor for T
+
+   // EXCEPTIONS: may throw
+
+   Vec(INIT_SIZE_TYPE, long n, const T& a);
+   // Vec(INIT_SIZE, n, a) initializes vector with an intial length of n.
+   // new objects are initialized using the copy contructor for T
+
+   // EXCEPTIONS: may throw
+
+   void kill();
+   // release space and set to length 0
+
+   void SetMaxLength(long n);
+   // allocates space and initializes up to n elements. Does not change
+   // current length
+
+   // EXCEPTIONS: may throw, strong ES
+
+   void FixLength(long n);
+   // sets length to n and prohibits all future length changes.
+   // FixLength may only be invoked immediately after the default
+   // construction or kill.
+
+   // The kill operation is also subsequently prohibited, and swap is
+   // allowed on fixed length vectors of the same length.
+
+   // FixLength is provided mainly to implement Mat<T>, to enforce
+   // the restriction that all rows have the same length.
+
+   // EXCEPTIONS: may throw, strong ES
+
+   void FixAtCurrentLength();
+   // fixes the length at the cuurent length and prohibits
+   // all future length changes.  
+
+   // It is required that length() == MaxLength() when called.
+
+   // EXCEPTIONS: if length() != MaxLength() and error is raised;
+   // if length() == 0, a memory allocation error may be raised.
+   // Strong ES.
+
+   long fixed() const;
+   // test if length has been fixed by FixLength() or FixAtCurrentLength()
+
+   long MaxLength() const;
+   // maximum length, i.e., number of allocated and initialized elements
+
+   long allocated() const;
+   // the number of objects for which space has been allocated, but not
+   // necessarily initialized;  this may be larger than MaxLength().
+
+   T& RawGet(long i);
+   const T& RawGet(long i) const;
+   // indexing with no range checking
+
+   long position(const T& a) const;
+   // returns position of a in the vector, or -1 if it is not there.
+   // The search is conducted from position 0 to allocated()-1 the vector,
+   // and an error is raised if the object is found at position MaxLength()
+   // or higher (in which case a references an uninitialized object).
+   // Note that if NTL_CLEAN_PTR flag is set, this routine takes
+   // linear time, and otherwise, it takes constant time.
+
+   // EXCEPTIONS: may throw (as indicated above)
+
+   long position1(const T& a) const;
+   // returns position of a in the vector, or -1 if it is not there.
+   // The search is conducted from position 0 to length()-1 of the vector.
+   // Note that if NTL_CLEAN_PTR flag is set, this routine takes
+   // linear time, and otherwise, it takes constant time.
+        
+};  
+
+
+/**************************************************************************\
+
+                       Some utility routines
+
+\**************************************************************************/
+
+  
+template<class T>
+void swap(Vec<T>& x, Vec<T>& y);
+// swaps x & y; same as x.swap(y)
+
+// EXCEPTIONS: same as for swap member function
+
+template<class T>
+void append(Vec<T>& v, const T& a);
+// appends a to the end of v; same as v.append(a)
+
+// EXCEPTIONS: same as for append member function
+
+template<class T>
+void append(Vec<T>& v, const Vec<T>& w);
+// appends w to the end of v; same as v.append(w)
+
+// EXCEPTIONS: same as for append member function
+
+
+
+/**************************************************************************\
+
+                             Input/Output
+
+
+The I/O format for a vector v with n elements is:
+
+   [v[0] v[1] ... v[n-1]]
+
+Uses corresponding I/O operators for T
+
+\**************************************************************************/
+
+template<class T>
+istream& operator>>(istream&, Vec<T>&);  
+
+// EXCEPTIONS: may throw, weak ES
+
+template<class T>
+ostream& operator<<(ostream&, const Vec<T>&);  
+
+// EXCEPTIONS: may throw, weak ES
+
+
+
+/**************************************************************************\
+
+                              Equality Testing
+
+\**************************************************************************/
+
+
+template<class T>
+long operator==(const Vec<T>& a, const Vec<T>& b);  
+
+template<class T>
+long operator!=(const Vec<T>& a, const Vec<T>& b);
+
+
+/**************************************************************************\
+
+                  Customized Constructors and Destructors
+ 
+Esoteric: skip on first reading...also these interfaces are subject to change
+
+When new elements in a vector need to be constructed, one of the
+following routines is called:
+
+   void BlockConstruct(T* p, long n);
+   // invokes T() to initialize p[i] for i = 0..n-1
+
+   void BlockConstructFromVec(T* p, long n, const T* q);
+   // invokes T(q[i]) to initialize p[i] for i = 0..n-1;
+   // q points to elements from a Vec<T>
+
+   void BlockConstructFromObj(T* p, long n, const T& q);
+   // invokes T(q) to initialize p[i] for i = 0..n-1
+
+
+When a vector is destroyed, the following routine is called:
+
+   void BlockDestroy(T* p, long n);
+   // invokes ~T() on p[i] for i = 0..n-1
+
+The default behavior of these routines may be modified by
+overloading these functions with a custom implementation.
+
+EXCEPTIONS:
+In order to provide exception safe code, the Construct routines
+should provide strong ES; in particular, if any constructor
+throws, all newly constructed objects should be destroyed.
+Moreover, the BlockDestroy routine should not throw at all.
+
+
+In NTL, these routines are overridden for the ZZ_p and GF2E classes,
+so that many vector entries will be packed into contiguous storage
+locations.  This reduces the number of invocations of malloc, and
+increases locality of reference.
+
+
+
+\**************************************************************************/
+
+
+ diff --git a/thirdparty/linux/ntl/doc/vector.txt b/thirdparty/linux/ntl/doc/vector.txt new file mode 100644 index 0000000000..7df6ca9144 --- /dev/null +++ b/thirdparty/linux/ntl/doc/vector.txt @@ -0,0 +1,454 @@ + + +/**************************************************************************\ + +MODULE: vector + +SUMMARY: + +Template class for dynamic-sized vectors. + +The declaration + + Vec v; + +creates a zero-length vector. To grow this vector to length n, +execute + + v.SetLength(n) + +This causes space to be allocated for (at least) n elements, and also +causes the delault constructor for T to be called to initialize these +elements. + +The current length of a vector is available as v.length(). + +The i-th vector element (counting from 0) is accessed as v[i]. If the +macro NTL_RANGE_CHECK is defined, code is emitted to test if 0 <= i < +v.length(). This check is not performed by default. + +For old-time FORTRAN programmers, the i-th vector element (counting +from 1) is accessed as v(i). + +Let n = v.length(). Calling v.SetLength(m) with m <= n sets the +current length of v to m (but does not call any destructors or free +any space). Calling v.SetLength(m) with m > n will allocate space and +initialize as necessary, but will leave the values of the already +allocated elements unchanged (although their addresses may change). +If T has a user-defined default constructor, that is invoked. +Otherwise, the new memory locations are "default initialized". +In particular, this means that POD types may be uninitialized. + +v.MaxLength() is the largest value of n for which v.SetLength(n) was invoked, +and is equal to the number of entries that have been initialized. +v.SetMaxLength(n) will allocate space for and initialize up to n elements, +without changing v.length(). + +When v's destructor is called, all constructed elements will be +destructed, and all space will be relinquished. + +Space is managed using malloc, realloc, and free. When a vector is +grown, a bit more space may be allocated than was requested for +efficiency reasons. + +Note that when a vector is grown, the space is reallocated using +realloc, and thus the addresses of vector elements may change, +possibly creating dangling references to vector elements. One has to +be especially careful of this when using vectors passed as reference +parameters that may alias one another. + +Because realloc is used to grow a vector, the objects stored +in a vector should be "relocatable"---that is, they shouldn't care +what their actual address is, which may change over time. +Most reasonable objects satisfy this constraint. + +v.allocated() is the number of elements which have been allocated, +which may be more than the number elements initialized. +Note that if n <= v.allocated(), then v.SetLength(n) is guaranteed +not to cause any memory allocation, or movement of objects. + +IMPLEMENTATION DETAILS: + +A Vec object is just a pointer to the first element of the array. +There is a control block immediately before the first element that +keeps track of several parameters: + len -- the logical length of the array (returned by length()) + init -- the number of elements constructed (returned ny MaxLength()) + alloc -- the number of elements for which space has been allocated + (returned by allocated()) + fixed -- flag that indicates that the length is fixed + (returned by fixed()) + +Note that 0 <= len <= init <- alloc + +COMPARISON TO STL VECTORS: + +When the length of an NTL vector is reduced, no objects are destroyed. +In contrast, when the length of an STL vector is reduced, objects are +destroyed (effectively, maintaining the invariant len == init). + +When the length of an NTL vector is increased, and the new value of len +exceeds the current value of alloc, the underying array of objects is +resized using malloc. This implies that existing objects are moved using +a bit-wise copy. As mentioned above, this means that objects should +be "relocatable", in the sense that they do not care what their actual +address is. Most reasonable objects satisfy this constraint. An example +of an object that does not is one that stores in one data member a pointer +to another data member within the same object. + +In contrast, when the length of an STL vector is increased, an new array +is allocated, and objects from the old array are copied to the new array, +and then destroyed in the old array. This obviously is much more expensive +that NTL's strategy. However, the new "move semantics", introduced in C++11, +mitigate this issue somewhat. + +Because of NTL's relocatability requirement, it is not recommended to use NTL +vectors over classes coming from the standard library, which may not satisfy +the requirement. In those cases, you could either use an STL vector, or use an +NTL vector and wrap the suspect classes in an NTL smart pointer of some kind +(e.g., SmartPtr or CopiedPtr). + +Note also that Facebook's open source "folly" library also provides +a vector class that uses realloc in a manner very similar to NTL's vector class. +See https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md + + + +\**************************************************************************/ + + +// EXCEPTIONS: all functions below do not throw any exceptions, +// except as noted + +template +class Vec { +public: + + Vec(); // initially length 0 + + Vec(const Vec& a); + // copy constructor; uses the assignment operator of T + // for copying into locations that have already been initialized, + // and uses the copy constructor for T for initializing new locations. + + // EXCEPTIONS: may throw + + Vec& operator=(const Vec& a); + // assignment; uses the assignment operator of T + // for copying into locations that have already been initialized, + // and uses the copy constructor for T for initializing new locations. + + // EXCEPTIONS: weak ES (but if it throws, neither length nor MaxLength + // will change, although some previously initialized elements + // may have been assigned new values). + + ~Vec(); + // destructor: calls T's destructor for all initialized + // elements in the vector, and then frees the vector itself + + void SetLength(long n); + // set current length to n, growing vector if necessary + // new objects are initialized using the default contructor for T + + // EXCEPTIONS: strong ES (but the vector may have been + // reallocated) + + void SetLength(long n, const T& a); + // set current length to n, growing vector if necessary + // new objects are initialized using the copy contructor for T + + // EXCEPTIONS: strong ES (but the vector may have been + // reallocated) + + template + void SetLengthAndApply(long n, F f); + // set current length to n, growing vector if necessary + // any new objects are initialized using defauly constructor + // for T, and after that, f is applied to each new object x + // as f(x). + + // EXCEPTIONS: strong ES (but the vector may have been + // reallocated) + + long length() const; + // current length + + T& operator[](long i); + const T& operator[](long i) const; + // indexing operation, starting from 0. + // The first version is applied to non-const Vec, + // and returns a non-const reference to a T, while the second version + // is applied to a const Vec and returns a const reference to a T. + + // EXCEPTIONS: may throw if range checking turned on, strong ES + + T& operator()(long i); + const T& operator()(long i) const; + // indexing operation, starting from 1 + // The first version is applied to non-const Vec, + // and returns a non-const reference to a T, while the second version + // is applied to a const Vec and returns a const reference to a T. + + // EXCEPTIONS: may throw if range checking turned on, strong ES + + T* elts(); + const T* elts() const; + // returns address of first vector element (or 0 if no space has been + // allocated for this vector). If a vector potentially has length 0, it is + // safer to write v.elts() instead of &v[0]: the latter is not well defined + // by the C++ standard (although this is likely an academic concern). + // + // The first version is applied to non-const Vec, and returns a non-const + // pointer to a T, while the second version is applied to a const Vec and + // returns a const reference to a T. + + + void swap(Vec& y); + // swap with y (fast: just swaps pointers) + + // EXCEPTIONS: throws if vectors are fixed and lengths do not match, strong ES + + void append(const T& a); + // append a to end of vector; uses the assignment operator of T + // for copying into locations that have already been initialized, + // and uses the copy constructor for T for initializing new locations. + + // EXCEPTIONS: strong ES if initializing a new element (and in any + // case, if an exception throws, length and MaxLength remain + // unchanged). + + void append(const Vec& w); + // append w to end of vector; uses the assignment operator of T + // for copying into locations that have already been initialized, + // and uses the copy constructor for T for initializing new locations. + + // EXCEPTIONS: strong ES if initializing new elements (and in any + // case, if an exception throws, length and MaxLength remain + // unchanged). + + +// Alternative access interface + + const T& get(long i) const; + // v.get(i) returns v[i] + + void put(long i, const T& a); + // v.put(i, a) equivalent to v[i] = q + + + +// Some STL compatibility + + typedef T value_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef value_type *iterator; + typedef const value_type *const_iterator; + + T* data(); + const T* data() const; + // v.data() same as v.elts() + + T* begin(); + const T* begin() const; + // v.begin() same as v.elts() + + T* end(); + const T* end() const; + // pointer to last element (or NULL) + + T& at(long i); + const T& at(long i) const; + // indexing with range checking + + +// the remaining member functions are a bit esoteric (skip on first +// reading) + + Vec(INIT_SIZE_TYPE, long n); + // Vec(INIT_SIZE, n) initializes vector with an intial length of n. + // new objects are initialized using the default contructor for T + + // EXCEPTIONS: may throw + + Vec(INIT_SIZE_TYPE, long n, const T& a); + // Vec(INIT_SIZE, n, a) initializes vector with an intial length of n. + // new objects are initialized using the copy contructor for T + + // EXCEPTIONS: may throw + + void kill(); + // release space and set to length 0 + + void SetMaxLength(long n); + // allocates space and initializes up to n elements. Does not change + // current length + + // EXCEPTIONS: may throw, strong ES + + void FixLength(long n); + // sets length to n and prohibits all future length changes. + // FixLength may only be invoked immediately after the default + // construction or kill. + + // The kill operation is also subsequently prohibited, and swap is + // allowed on fixed length vectors of the same length. + + // FixLength is provided mainly to implement Mat, to enforce + // the restriction that all rows have the same length. + + // EXCEPTIONS: may throw, strong ES + + void FixAtCurrentLength(); + // fixes the length at the cuurent length and prohibits + // all future length changes. + + // It is required that length() == MaxLength() when called. + + // EXCEPTIONS: if length() != MaxLength() and error is raised; + // if length() == 0, a memory allocation error may be raised. + // Strong ES. + + long fixed() const; + // test if length has been fixed by FixLength() or FixAtCurrentLength() + + long MaxLength() const; + // maximum length, i.e., number of allocated and initialized elements + + long allocated() const; + // the number of objects for which space has been allocated, but not + // necessarily initialized; this may be larger than MaxLength(). + + T& RawGet(long i); + const T& RawGet(long i) const; + // indexing with no range checking + + long position(const T& a) const; + // returns position of a in the vector, or -1 if it is not there. + // The search is conducted from position 0 to allocated()-1 the vector, + // and an error is raised if the object is found at position MaxLength() + // or higher (in which case a references an uninitialized object). + // Note that if NTL_CLEAN_PTR flag is set, this routine takes + // linear time, and otherwise, it takes constant time. + + // EXCEPTIONS: may throw (as indicated above) + + long position1(const T& a) const; + // returns position of a in the vector, or -1 if it is not there. + // The search is conducted from position 0 to length()-1 of the vector. + // Note that if NTL_CLEAN_PTR flag is set, this routine takes + // linear time, and otherwise, it takes constant time. + +}; + + +/**************************************************************************\ + + Some utility routines + +\**************************************************************************/ + + +template +void swap(Vec& x, Vec& y); +// swaps x & y; same as x.swap(y) + +// EXCEPTIONS: same as for swap member function + +template +void append(Vec& v, const T& a); +// appends a to the end of v; same as v.append(a) + +// EXCEPTIONS: same as for append member function + +template +void append(Vec& v, const Vec& w); +// appends w to the end of v; same as v.append(w) + +// EXCEPTIONS: same as for append member function + + + +/**************************************************************************\ + + Input/Output + + +The I/O format for a vector v with n elements is: + + [v[0] v[1] ... v[n-1]] + +Uses corresponding I/O operators for T + +\**************************************************************************/ + +template +istream& operator>>(istream&, Vec&); + +// EXCEPTIONS: may throw, weak ES + +template +ostream& operator<<(ostream&, const Vec&); + +// EXCEPTIONS: may throw, weak ES + + + +/**************************************************************************\ + + Equality Testing + +\**************************************************************************/ + + +template +long operator==(const Vec& a, const Vec& b); + +template +long operator!=(const Vec& a, const Vec& b); + + +/**************************************************************************\ + + Customized Constructors and Destructors + +Esoteric: skip on first reading...also these interfaces are subject to change + +When new elements in a vector need to be constructed, one of the +following routines is called: + + void BlockConstruct(T* p, long n); + // invokes T() to initialize p[i] for i = 0..n-1 + + void BlockConstructFromVec(T* p, long n, const T* q); + // invokes T(q[i]) to initialize p[i] for i = 0..n-1; + // q points to elements from a Vec + + void BlockConstructFromObj(T* p, long n, const T& q); + // invokes T(q) to initialize p[i] for i = 0..n-1 + + +When a vector is destroyed, the following routine is called: + + void BlockDestroy(T* p, long n); + // invokes ~T() on p[i] for i = 0..n-1 + +The default behavior of these routines may be modified by +overloading these functions with a custom implementation. + +EXCEPTIONS: +In order to provide exception safe code, the Construct routines +should provide strong ES; in particular, if any constructor +throws, all newly constructed objects should be destroyed. +Moreover, the BlockDestroy routine should not throw at all. + + +In NTL, these routines are overridden for the ZZ_p and GF2E classes, +so that many vector entries will be packed into contiguous storage +locations. This reduces the number of invocations of malloc, and +increases locality of reference. + + + +\**************************************************************************/ + diff --git a/thirdparty/linux/ntl/doc/version.cpp.html b/thirdparty/linux/ntl/doc/version.cpp.html new file mode 100644 index 0000000000..97002de8f1 --- /dev/null +++ b/thirdparty/linux/ntl/doc/version.cpp.html @@ -0,0 +1,37 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/version.cpp.html + + + + +
+/**************************************************************************\
+
+MODULE: version
+
+SUMMARY:
+
+Macros defining the NTL version number.
+
+\**************************************************************************/
+
+
+
+#define NTL_VERSION        ... // version number as a string, e.g., "5.2"
+
+#define NTL_MAJOR_VERSION  ... // e.g., 5 in the above example
+#define NTL_MINOR_VERSION  ... // e.g., 2        "
+#define NTL_REVISION       ... // e.g., 0        "
+
+// The choice as to whether a new version warrants a higher
+// Major version number or Minor version number is fairly subjective,
+// with no particular rule.
+
+// Revision numbers are only used for small bug fixes that generally
+// do not affect the programming interface at all.
+
+
+
+ diff --git a/thirdparty/linux/ntl/doc/version.txt b/thirdparty/linux/ntl/doc/version.txt new file mode 100644 index 0000000000..041ee24eb6 --- /dev/null +++ b/thirdparty/linux/ntl/doc/version.txt @@ -0,0 +1,27 @@ + +/**************************************************************************\ + +MODULE: version + +SUMMARY: + +Macros defining the NTL version number. + +\**************************************************************************/ + + + +#define NTL_VERSION ... // version number as a string, e.g., "5.2" + +#define NTL_MAJOR_VERSION ... // e.g., 5 in the above example +#define NTL_MINOR_VERSION ... // e.g., 2 " +#define NTL_REVISION ... // e.g., 0 " + +// The choice as to whether a new version warrants a higher +// Major version number or Minor version number is fairly subjective, +// with no particular rule. + +// Revision numbers are only used for small bug fixes that generally +// do not affect the programming interface at all. + + diff --git a/thirdparty/linux/ntl/doc/xdouble.cpp.html b/thirdparty/linux/ntl/doc/xdouble.cpp.html new file mode 100644 index 0000000000..6752546628 --- /dev/null +++ b/thirdparty/linux/ntl/doc/xdouble.cpp.html @@ -0,0 +1,215 @@ + + + +/Volumes/unix-files/u/ntl-new/ntl-9.11.0dev/doc/xdouble.cpp.html + + + + +
+
+/**************************************************************************\
+
+MODULE: xdouble
+
+SUMMARY:
+
+The class xdouble is used to represent floating point numbers with the
+same precision as a 'double', but with extended exponent range
+(offering a few more bits than that of a 'long' for the exponent).
+
+The programming interface for xdoubles is almost identical to that of
+ordinary doubles.
+
+
+\**************************************************************************/
+
+#include <NTL/ZZ.h>
+
+
+class xdouble {
+
+public:
+
+xdouble(); // = 0
+
+xdouble(const xdouble& a);  // copy constructor
+
+explicit xdouble(double a);  // promotion constructor
+
+xdouble& operator=(const xdouble& a);  // assignment operator
+xdouble& operator=(double a);
+
+~xdouble();
+
+
+double mantissa() const;  // read-only access to mantissa
+long exponent() const;  // read-only access to exponenent
+
+
+
+static void SetOutputPrecision(long p);
+// This sets the number of decimal digits to be output.  Default is
+// 10.
+
+static long OutputPrecision();
+// returns current output precision.
+
+};
+
+
+
+/**************************************************************************\
+
+                             Arithmetic Operations
+
+The following are the standard arithmetic operators, whose meaning should
+be clear.
+
+\**************************************************************************/
+
+
+xdouble operator+(const xdouble& a, const xdouble& b);
+xdouble operator-(const xdouble& a, const xdouble& b);
+xdouble operator*(const xdouble& a, const xdouble& b);
+xdouble operator/(const xdouble& a, const xdouble& b);
+
+// PROMOTIONS: +, -, *, / promote double to xdouble on (a, b).
+
+xdouble operator-(const xdouble& a);
+
+xdouble& operator+=(xdouble& a, const xdouble& b);
+xdouble& operator+=(xdouble& a, double b);
+
+xdouble& operator-=(xdouble& a, const xdouble& b);
+xdouble& operator-=(xdouble& a, double b);
+
+xdouble& operator*=(xdouble& a, const xdouble& b);
+xdouble& operator*=(xdouble& a, double b);
+
+xdouble& operator/=(xdouble& a, const xdouble& b);
+xdouble& operator/=(xdouble& a, xdouble b);
+
+xdouble& operator++(xdouble& a); // prefix
+void operator++(xdouble& a, int); // postfix
+
+xdouble& operator--(xdouble& a); // prefix
+void operator--(xdouble& a, int); // postfix
+
+
+
+/**************************************************************************\
+
+                                  Comparison
+
+\**************************************************************************/
+
+long sign(const xdouble& a);
+// returns sign (+1, -1, 0) of a
+
+long compare(const xdouble& a, const xdouble& b);
+// returns sign of a - b
+
+long operator==(const xdouble& a, const xdouble& b);
+long operator!=(const xdouble& a, const xdouble& b);
+long operator<=(const xdouble& a, const xdouble& b);
+long operator>=(const xdouble& a, const xdouble& b);
+long operator <(const xdouble& a, const xdouble& b);
+long operator >(const xdouble& a, const xdouble& b);
+
+// PROMOTIONS: compare and operators ==, ..., > promote double to xdouble
+// on (a, b).
+
+
+
+/**************************************************************************\
+
+                               Input/Output
+Input Syntax:
+
+<number>: [ "-" ] <unsigned-number>
+<unsigned-number>: <dotted-number> [ <e-part> ] | <e-part>
+<dotted-number>: <digits> | <digits> "." <digits> | "." <digits> | <digits> "."
+<digits>: <digit> <digits> | <digit>
+<digit>: "0" | ... | "9"
+<e-part>: ( "E" | "e" ) [ "+" | "-" ] <digits>
+
+Examples of valid input:
+
+17 1.5 0.5 .5  5.  -.5 e10 e-10 e+10 1.5e10 .5e10 .5E10
+
+Note that the number of decimal digits of precision that are used
+for output can be set to any number p >= 1 by calling
+the routine xdouble::SetOutputPrecision(p).  
+The default value of p is 10.
+The current value of p is returned by a call to xdouble::OutputPrecision().
+
+\**************************************************************************/
+
+
+
+ostream& operator<<(ostream& s, const xdouble& a);
+
+istream& operator>>(istream& s, xdouble& x);
+
+
+/**************************************************************************\
+
+                                  Miscellaneous
+
+\**************************************************************************/
+
+
+
+xdouble trunc(const xdouble& a);  // returns integer obtained by truncating
+xdouble floor(const xdouble& a);  // returns greatest integer <= a
+xdouble ceil(const xdouble& a);   // returns smallest integer >= a
+xdouble fabs(const xdouble& a);   // returns |a|
+xdouble sqrt(const xdouble& a);   // returns a^{1/2}; error is raised if a < 0
+
+double log(const xdouble& a);  // returns log(a) (note return val is double!)
+xdouble xexp(double a);        // returns exp(a) (note argument is double!)
+xdouble exp(const double& a);  // equivalent to xexp(to_double(a))
+
+
+void power(xdouble& z, const xdouble& a, const ZZ& e);
+xdouble power(const xdouble& a, const ZZ& e);
+
+void power(xdouble& z, const xdouble& a, long e);
+xdouble power(const xdouble& a, long e);
+// z = a^e, e may be negative
+
+void power2(xdouble& z, long e);
+xdouble power2_xdouble(long e);
+// z = 2^e, e may be negative
+
+void MulAdd(xdouble& z, const xdouble& a, const xdouble& b, const xdouble& c);
+xdouble MulAdd(const xdouble& a, const xdouble& b, const xdouble& c);
+// z = a + b*c, but faster
+
+void MulSub(xdouble& z, const xdouble& a, const xdouble& b, const xdouble& c);
+xdouble MulSub(const xdouble& a, const xdouble& b, const xdouble& c);
+// z = a - b*c, but faster
+
+
+/**************************************************************************\
+
+Implementation details:
+
+An xdouble is represented as a mantissa/exponent pair (x, e), where x
+is a double and e is a long.  The real number represented by (x, e) is
+x * NTL_XD_BOUND^e, where
+
+  NTL_XD_BOUND = NTL_XD_HBOUND^2, and
+  NTL_XD_HBOUND = 2^{(max(NTL_DOUBLE_PRECISION,NTL_BITS_PER_LONG)+4)}.
+
+Also, the mantissa x satisfies 1/NTL_XD_HBOUND <= |x| <= NTL_XD_HBOUND, except
+that the number 0 is always represented as (0, 0).  
+Both NTL_XD_BOUND and NTL_XD_HBOUND are macros defined in <NTL/xdouble.h>.
+
+SIZE INVARIANT: |e| < 2^(NTL_BITS_PER_LONG-4).
+
+\**************************************************************************/
+
+
+ diff --git a/thirdparty/linux/ntl/doc/xdouble.txt b/thirdparty/linux/ntl/doc/xdouble.txt new file mode 100644 index 0000000000..e41d868452 --- /dev/null +++ b/thirdparty/linux/ntl/doc/xdouble.txt @@ -0,0 +1,205 @@ + + +/**************************************************************************\ + +MODULE: xdouble + +SUMMARY: + +The class xdouble is used to represent floating point numbers with the +same precision as a 'double', but with extended exponent range +(offering a few more bits than that of a 'long' for the exponent). + +The programming interface for xdoubles is almost identical to that of +ordinary doubles. + + +\**************************************************************************/ + +#include + + +class xdouble { + +public: + +xdouble(); // = 0 + +xdouble(const xdouble& a); // copy constructor + +explicit xdouble(double a); // promotion constructor + +xdouble& operator=(const xdouble& a); // assignment operator +xdouble& operator=(double a); + +~xdouble(); + + +double mantissa() const; // read-only access to mantissa +long exponent() const; // read-only access to exponenent + + + +static void SetOutputPrecision(long p); +// This sets the number of decimal digits to be output. Default is +// 10. + +static long OutputPrecision(); +// returns current output precision. + +}; + + + +/**************************************************************************\ + + Arithmetic Operations + +The following are the standard arithmetic operators, whose meaning should +be clear. + +\**************************************************************************/ + + +xdouble operator+(const xdouble& a, const xdouble& b); +xdouble operator-(const xdouble& a, const xdouble& b); +xdouble operator*(const xdouble& a, const xdouble& b); +xdouble operator/(const xdouble& a, const xdouble& b); + +// PROMOTIONS: +, -, *, / promote double to xdouble on (a, b). + +xdouble operator-(const xdouble& a); + +xdouble& operator+=(xdouble& a, const xdouble& b); +xdouble& operator+=(xdouble& a, double b); + +xdouble& operator-=(xdouble& a, const xdouble& b); +xdouble& operator-=(xdouble& a, double b); + +xdouble& operator*=(xdouble& a, const xdouble& b); +xdouble& operator*=(xdouble& a, double b); + +xdouble& operator/=(xdouble& a, const xdouble& b); +xdouble& operator/=(xdouble& a, xdouble b); + +xdouble& operator++(xdouble& a); // prefix +void operator++(xdouble& a, int); // postfix + +xdouble& operator--(xdouble& a); // prefix +void operator--(xdouble& a, int); // postfix + + + +/**************************************************************************\ + + Comparison + +\**************************************************************************/ + +long sign(const xdouble& a); +// returns sign (+1, -1, 0) of a + +long compare(const xdouble& a, const xdouble& b); +// returns sign of a - b + +long operator==(const xdouble& a, const xdouble& b); +long operator!=(const xdouble& a, const xdouble& b); +long operator<=(const xdouble& a, const xdouble& b); +long operator>=(const xdouble& a, const xdouble& b); +long operator <(const xdouble& a, const xdouble& b); +long operator >(const xdouble& a, const xdouble& b); + +// PROMOTIONS: compare and operators ==, ..., > promote double to xdouble +// on (a, b). + + + +/**************************************************************************\ + + Input/Output +Input Syntax: + +: [ "-" ] +: [ ] | +: | "." | "." | "." +: | +: "0" | ... | "9" +: ( "E" | "e" ) [ "+" | "-" ] + +Examples of valid input: + +17 1.5 0.5 .5 5. -.5 e10 e-10 e+10 1.5e10 .5e10 .5E10 + +Note that the number of decimal digits of precision that are used +for output can be set to any number p >= 1 by calling +the routine xdouble::SetOutputPrecision(p). +The default value of p is 10. +The current value of p is returned by a call to xdouble::OutputPrecision(). + +\**************************************************************************/ + + + +ostream& operator<<(ostream& s, const xdouble& a); + +istream& operator>>(istream& s, xdouble& x); + + +/**************************************************************************\ + + Miscellaneous + +\**************************************************************************/ + + + +xdouble trunc(const xdouble& a); // returns integer obtained by truncating +xdouble floor(const xdouble& a); // returns greatest integer <= a +xdouble ceil(const xdouble& a); // returns smallest integer >= a +xdouble fabs(const xdouble& a); // returns |a| +xdouble sqrt(const xdouble& a); // returns a^{1/2}; error is raised if a < 0 + +double log(const xdouble& a); // returns log(a) (note return val is double!) +xdouble xexp(double a); // returns exp(a) (note argument is double!) +xdouble exp(const double& a); // equivalent to xexp(to_double(a)) + + +void power(xdouble& z, const xdouble& a, const ZZ& e); +xdouble power(const xdouble& a, const ZZ& e); + +void power(xdouble& z, const xdouble& a, long e); +xdouble power(const xdouble& a, long e); +// z = a^e, e may be negative + +void power2(xdouble& z, long e); +xdouble power2_xdouble(long e); +// z = 2^e, e may be negative + +void MulAdd(xdouble& z, const xdouble& a, const xdouble& b, const xdouble& c); +xdouble MulAdd(const xdouble& a, const xdouble& b, const xdouble& c); +// z = a + b*c, but faster + +void MulSub(xdouble& z, const xdouble& a, const xdouble& b, const xdouble& c); +xdouble MulSub(const xdouble& a, const xdouble& b, const xdouble& c); +// z = a - b*c, but faster + + +/**************************************************************************\ + +Implementation details: + +An xdouble is represented as a mantissa/exponent pair (x, e), where x +is a double and e is a long. The real number represented by (x, e) is +x * NTL_XD_BOUND^e, where + + NTL_XD_BOUND = NTL_XD_HBOUND^2, and + NTL_XD_HBOUND = 2^{(max(NTL_DOUBLE_PRECISION,NTL_BITS_PER_LONG)+4)}. + +Also, the mantissa x satisfies 1/NTL_XD_HBOUND <= |x| <= NTL_XD_HBOUND, except +that the number 0 is always represented as (0, 0). +Both NTL_XD_BOUND and NTL_XD_HBOUND are macros defined in . + +SIZE INVARIANT: |e| < 2^(NTL_BITS_PER_LONG-4). + +\**************************************************************************/ + diff --git a/thirdparty/linux/ntl/include/NTL/BasicThreadPool.h b/thirdparty/linux/ntl/include/NTL/BasicThreadPool.h new file mode 100644 index 0000000000..475e5bdd0f --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/BasicThreadPool.h @@ -0,0 +1,725 @@ + +#ifndef NTL_BasicThreadPool__H +#define NTL_BasicThreadPool__H + +#include +#include +#include +#include + + +NTL_OPEN_NNS + + +inline long AvailableThreads(); + +struct PartitionInfo { + long nintervals; // number of intervals + long intervalsz; // interval size + long nsintervals; // number of small intervals + + explicit + PartitionInfo(long sz, long nt = AvailableThreads()) + // partitions [0..sz) into nintervals intervals, + // so that there are nsintervals of size intervalsz-1 + // and nintervals-nsintervals of size intervalsz + { + if (sz <= 0) { + nintervals = intervalsz = nsintervals = 0; + return; + } + + if (nt <= 0) LogicError("PartitionInfo: bad args"); + + // NOTE: this overflow check probably unnecessary + if (NTL_OVERFLOW(sz, 1, 0) || NTL_OVERFLOW(nt, 1, 0)) + ResourceError("PartitionInfo: arg too big"); + + if (sz < nt) { + nintervals = sz; + intervalsz = 1; + nsintervals = 0; + return; + } + + nintervals = nt; + + long q, r; + q = sz/nt; + r = sz - nt*q; + + if (r == 0) { + intervalsz = q; + nsintervals = 0; + } + else { + intervalsz = q+1; + nsintervals = nt - r; + } + } + + long NumIntervals() const { return nintervals; } + + void interval(long& first, long& last, long i) const + // [first..last) is the ith interval -- no range checking is done + { + +#if 0 + // this is the logic, naturally expressed + if (i < nsintervals) { + first = i*(intervalsz-1); + last = first + (intervalsz-1); + } + else { + first = nsintervals*(intervalsz-1) + (i-nsintervals)*intervalsz; + last = first + intervalsz; + } +#else + // this is the same logic, but branch-free (and portable) + // ...probably unnecessary optimization + + long mask = -long(cast_unsigned(i-nsintervals) >> (NTL_BITS_PER_LONG-1)); + // mask == -1 if i < nsintervals, 0 o/w + + long lfirst = i*(intervalsz-1); + lfirst += long((~cast_unsigned(mask)) & cast_unsigned(i-nsintervals)); + // lfirst += max(0, i-nsintervals) + + long llast = lfirst + intervalsz + mask; + + first = lfirst; + last = llast; +#endif + } + +}; + + + +NTL_CLOSE_NNS + + + +#ifdef NTL_THREADS + + +#include +#include +#include + + +NTL_OPEN_NNS + +/************************************************************* + +Some simple thread pooling. + +You create a thread pool by constructing a BasicThreadPool object. +For example: + + long nthreads = 4; + BasicThreadPool pool(nthreads); + +creates a thread pool of 4 threads. These threads will exist +until the destructor for pool is called. + +The simplest way to use a thread pools is as follows. +Suppose you have a task that consists of N subtasks, +indexed 0..N-1. Then you can write: + + + pool.exec_range(N, + [&](long first, long last) { + for (long i = first; i < last; i++) { + ... code to process subtask i ... + } + } + ); + +The second argument to exec1 is a C++11 "lambda". +The "[&]" indicates that all local variables in the calling +context are captured by reference, so the lambda body can +reference all visible local variables directly. + +A lower-level interface is also provided. +One can write: + + pool.exec_index(n, + [&](long index) { + ... code to process index i ... + } + ); + +This will activate n threads with indices 0..n-1, and execute +the given code on each index. The parameter n must be +in the range 1..nthreads, otherwise an error is raised. + +This lower-level interface is useful in some cases, +especially when memory is managed in some special way. +For convenience, a method is provided to break +subtasks up into smaller, almost-equal-sized groups +of subtasks: + + Vec pvec; + long n = pool.SplitProblems(N, pvec); + +can be used for this. N is the number of subtasks, indexed 0..N-1. +This method will compute n as needed by exec, and +the range of subtasks to be processed by a given index in the range +0..n-1 is pvec[index]..pvec[index+1]-1 +Thus, the logic of the above exec1 example can be written +using the lower-level exec interface as follows: + + + Vec pvec; + long n = pool.SplitProblems(N, pvec); + pool.exec_index(n, + [&](long index) { + long first = pvec[index]; + long last = pvec[index+1]; + for (long i = first; i < last; i++) { + ... code to process subtask i ... + } + } + ); + +However, with this approach, memory or other resources can be +assigned to each index = 0..n-1, and managed externally. + + + + +*************************************************************/ + + +class BasicThreadPool { +private: + +// lots of nested stuff + + template + class SimpleSignal { + private: + T val; + std::mutex m; + std::condition_variable cv; + + SimpleSignal(const SimpleSignal&); // disabled + void operator=(const SimpleSignal&); // disabled + + public: + SimpleSignal() : val(0) { } + + T wait() + { + std::unique_lock lock(m); + cv.wait(lock, [&]() { return val; } ); + T old_val = val; + val = 0; + return old_val; + } + + void send(T new_val) + { + std::lock_guard lock(m); + val = new_val; + cv.notify_one(); + } + }; + + + template + class CompositeSignal { + private: + T val; + T1 val1; + std::mutex m; + std::condition_variable cv; + + CompositeSignal(const CompositeSignal&); // disabled + void operator=(const CompositeSignal&); // disabled + + public: + CompositeSignal() : val(0) { } + + T wait(T1& _val1) + { + std::unique_lock lock(m); + cv.wait(lock, [&]() { return val; } ); + T _val = val; + _val1 = val1; + val = 0; + return _val; + } + + void send(T _val, T1 _val1) + { + std::lock_guard lock(m); + val = _val; + val1 = _val1; + cv.notify_one(); + } + }; + + + + class ConcurrentTask { + BasicThreadPool *pool; + public: + ConcurrentTask(BasicThreadPool *_pool) : pool(_pool) { } + BasicThreadPool *getBasicThreadPool() const { return pool; } + + virtual void run(long index) = 0; + }; + + + + // dummy class, used for signalling termination + class ConcurrentTaskTerminate : public ConcurrentTask { + public: + ConcurrentTaskTerminate() : ConcurrentTask(0) { } + void run(long index) { } + }; + + + + template + class ConcurrentTaskFct : public ConcurrentTask { + public: + const Fct& fct; + + ConcurrentTaskFct(BasicThreadPool *_pool, const Fct& _fct) : + ConcurrentTask(_pool), fct(_fct) { } + + void run(long index) { fct(index); } + }; + + template + class ConcurrentTaskFct1 : public ConcurrentTask { + public: + const Fct& fct; + const PartitionInfo& pinfo; + + ConcurrentTaskFct1(BasicThreadPool *_pool, const Fct& _fct, + const PartitionInfo& _pinfo) : + ConcurrentTask(_pool), fct(_fct), pinfo(_pinfo) { } + + void run(long index) + { + long first, last; + pinfo.interval(first, last, index); + fct(first, last); + } + }; + + + + struct AutomaticThread { + CompositeSignal< ConcurrentTask *, long > localSignal; + ConcurrentTaskTerminate term; + std::thread t; + + + AutomaticThread() : t(worker, &localSignal) + { + // cerr << "starting thread " << t.get_id() << "\n"; + } + + ~AutomaticThread() + { + // cerr << "stopping thread " << t.get_id() << "..."; + localSignal.send(&term, -1); + t.join(); + // cerr << "\n"; + } + }; + + + +// BasicThreadPool data members + + long nthreads; + + bool active_flag; + + std::atomic counter; + SimpleSignal globalSignal; + + Vec< UniquePtr > threadVec; + + std::exception_ptr eptr; + std::mutex eptr_guard; + +// BasicThreadPool private member functions + + BasicThreadPool(const BasicThreadPool&); // disabled + void operator=(const BasicThreadPool&); // disabled + + void launch(ConcurrentTask *task, long index) + { + threadVec[index-1]->localSignal.send(task, index); + // we use threadVec[index-1] to allow for the fact + // that we want the current thread to have index 0 + } + + void begin(long cnt) + { + + active_flag = true; + counter = cnt; + } + + void end() + { + globalSignal.wait(); + + active_flag = false; + + if (eptr) { + std::exception_ptr eptr1 = eptr; + eptr = nullptr; + std::rethrow_exception(eptr1); + } + } + + static void runOneTask(ConcurrentTask *task, long index) + { + BasicThreadPool *pool = task->getBasicThreadPool(); + + try { + task->run(index); + } + catch (...) { + std::lock_guard lock(pool->eptr_guard); + if (!pool->eptr) pool->eptr = std::current_exception(); + } + + if (--(pool->counter) == 0) pool->globalSignal.send(true); + } + + static void worker(CompositeSignal< ConcurrentTask *, long > *localSignal) + { + for (;;) { + long index = -1; + ConcurrentTask *task = localSignal->wait(index); + if (index == -1) return; + + runOneTask(task, index); + } + } + + +public: + + + long NumThreads() const { return nthreads; } + bool active() const { return active_flag; } + + explicit + BasicThreadPool(long _nthreads) : + nthreads(_nthreads), active_flag(false), counter(0) + { + if (nthreads <= 0) LogicError("BasicThreadPool::BasicThreadPool: bad args"); + + if (NTL_OVERFLOW(nthreads, 1, 0)) + ResourceError("BasicThreadPool::BasicThreadPool: arg too big"); + + threadVec.SetLength(nthreads-1); + + for (long i = 0; i < nthreads-1; i++) { + threadVec[i].make(); + } + } + + ~BasicThreadPool() + { + if (active()) TerminalError("BasicThreadPool: destructor called while active"); + } + + + // adding, deleting, moving threads + + void add(long n = 1) + { + if (active()) LogicError("BasicThreadPool: illegal operation while active"); + if (n <= 0) LogicError("BasicThreadPool::add: bad args"); + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("BasicThreadPool::add: arg too big"); + + Vec< UniquePtr > newThreads; + + newThreads.SetLength(n); + for (long i = 0; i < n; i++) + newThreads[i].make(); + + threadVec.SetLength(n + nthreads - 1); + for (long i = 0; i < n; i++) + threadVec[nthreads-1+i].move(newThreads[i]); + + nthreads += n; + } + + + void remove(long n = 1) + { + if (active()) LogicError("BasicThreadPool: illegal operation while active"); + if (n <= 0 || n >= nthreads) LogicError("BasicThreadPool::remove: bad args"); + + for (long i = nthreads-1-n; i < nthreads-1; i++) + threadVec[i] = 0; + + threadVec.SetLength(nthreads-1-n); + nthreads -= n; + } + + + void move(BasicThreadPool& other, long n = 1) + { + if (active() || other.active()) + LogicError("BasicThreadPool: illegal operation while active"); + if (n <= 0 || n >= other.nthreads) LogicError("BasicThreadPool::move: bad args"); + + if (this == &other) return; + + threadVec.SetLength(n + nthreads - 1); + for (long i = 0; i < n; i++) + threadVec[nthreads-1+i].move(other.threadVec[other.nthreads-1-n+i]); + + other.threadVec.SetLength(other.nthreads-1-n); + other.nthreads -= n; + + nthreads += n; + } + + + + // High level interfaces, intended to be used with lambdas + + // In this version, fct takes one argument, which is + // an index in [0..cnt) + + template + void exec_index(long cnt, const Fct& fct) + { + if (active()) LogicError("BasicThreadPool: illegal operation while active"); + if (cnt <= 0) return; + if (cnt > nthreads) LogicError("BasicThreadPool::exec_index: bad args"); + + ConcurrentTaskFct task(this, fct); + + begin(cnt); + for (long t = 1; t < cnt; t++) launch(&task, t); + runOneTask(&task, 0); + end(); + } + + template + static void relaxed_exec_index(BasicThreadPool *pool, long cnt, const Fct& fct) + { + if (cnt <= 0) return; + if (!pool || pool->active()) { + if (cnt > 1) LogicError("relaxed_exec_index: not enough threads"); + fct(0); + } + else { + pool->exec_index(cnt, fct); + } + } + + // even higher level version: sz is the number of subproblems, + // and fct takes two args, first and last, so that subproblems + // [first..last) are processed. + + template + void exec_range(long sz, const Fct& fct) + { + if (active()) LogicError("BasicThreadPool: illegal operation while active"); + if (sz <= 0) return; + + PartitionInfo pinfo(sz, nthreads); + + long cnt = pinfo.NumIntervals(); + ConcurrentTaskFct1 task(this, fct, pinfo); + + begin(cnt); + for (long t = 1; t < cnt; t++) launch(&task, t); + runOneTask(&task, 0); + end(); + } + + template + static void relaxed_exec_range(BasicThreadPool *pool, long sz, const Fct& fct) + { + if (sz <= 0) return; + if (!pool || pool->active() || sz == 1) { + fct(0, sz); + } + else { + pool->exec_range(sz, fct); + } + } + +}; + + + + +NTL_CLOSE_NNS + + +#endif + + + +#ifdef NTL_THREAD_BOOST + +#ifndef NTL_THREADS +#error "NTL_THREAD_BOOST requires NTL_THREADS" +#endif + +NTL_OPEN_NNS + +extern +NTL_CHEAP_THREAD_LOCAL BasicThreadPool *NTLThreadPool_ptr; + +inline +BasicThreadPool *GetThreadPool() +{ + return NTLThreadPool_ptr; +} + +void ResetThreadPool(BasicThreadPool *pool = 0); +BasicThreadPool *ReleaseThreadPool(); + +inline void SetNumThreads(long n) +{ + ResetThreadPool(MakeRaw(n)); +} + +inline long AvailableThreads() +{ + BasicThreadPool *pool = GetThreadPool(); + if (!pool || pool->active()) + return 1; + else + return pool->NumThreads(); +} + + +NTL_CLOSE_NNS + + +#define NTL_EXEC_RANGE(n, first, last) \ +{ \ + NTL_NNS BasicThreadPool::relaxed_exec_range(NTL_NNS GetThreadPool(), (n), \ + [&](long first, long last) { \ + + +#define NTL_EXEC_RANGE_END \ + } ); \ +} \ + + +#define NTL_GEXEC_RANGE(seq, n, first, last) \ +{ \ + NTL_NNS BasicThreadPool::relaxed_exec_range((seq) ? 0 : NTL_NNS GetThreadPool(), (n), \ + [&](long first, long last) { \ + + +#define NTL_GEXEC_RANGE_END \ + } ); \ +} \ + + +#define NTL_EXEC_INDEX(n, index) \ +{ \ + NTL_NNS BasicThreadPool::relaxed_exec_index(NTL_NNS GetThreadPool(), (n), \ + [&](long index) { \ + + +#define NTL_EXEC_INDEX_END \ + } ); \ +} \ + + + +// NOTE: at least with gcc >= 4.9.2, the GEXEC versions will evaluate seq, and +// if it is true, jump directly (more or less) to the body + + +#define NTL_TBDECL(x) static void basic_ ## x +#define NTL_TBDECL_static(x) static void basic_ ## x + + +#else + +NTL_OPEN_NNS + + +inline void SetNumThreads(long n) { } + +inline long AvailableThreads() { return 1; } + + +NTL_CLOSE_NNS + +#define NTL_EXEC_RANGE(n, first, last) \ +{ \ + long _ntl_par_exec_n = (n); \ + if (_ntl_par_exec_n > 0) { \ + long first = 0; \ + long last = _ntl_par_exec_n; \ + { \ + + +#define NTL_EXEC_RANGE_END }}} + +#define NTL_GEXEC_RANGE(seq, n, first, last) \ +{ \ + long _ntl_par_exec_n = (n); \ + if (_ntl_par_exec_n > 0) { \ + long first = 0; \ + long last = _ntl_par_exec_n; \ + { \ + + +#define NTL_GEXEC_RANGE_END }}} + + + + +#define NTL_EXEC_INDEX(n, index) \ +{ \ + long _ntl_par_exec_n = (n); \ + if (_ntl_par_exec_n > 0) { \ + if (_ntl_par_exec_n > 1) NTL_NNS LogicError("NTL_EXEC_INDEX: not enough threads"); \ + long index = 0; \ + { \ + + +#define NTL_EXEC_INDEX_END }}} + + + +#define NTL_TBDECL(x) void x +#define NTL_TBDECL_static(x) static void x + +#endif + + + +#ifdef NTL_THREADS + +#define NTL_IMPORT(x) auto _ntl_hidden_variable_IMPORT__ ## x = x; auto x = _ntl_hidden_variable_IMPORT__ ##x; + +#else + +#define NTL_IMPORT(x) + + +#endif + + + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/FFT.h b/thirdparty/linux/ntl/include/NTL/FFT.h new file mode 100644 index 0000000000..c4b2abb832 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/FFT.h @@ -0,0 +1,210 @@ + +#ifndef NTL_FFT__H +#define NTL_FFT__H + +#include +#include +#include +#include +#include + +NTL_OPEN_NNS + +#define NTL_FFTFudge (4) +// This constant is used in selecting the correct +// number of FFT primes for polynomial multiplication +// in ZZ_pX and zz_pX. Set at 4, this allows for +// two FFT reps to be added or subtracted once, +// before performing CRT, and leaves a reasonable margin for error. +// Don't change this! + +#define NTL_FFTMaxRootBnd (NTL_SP_NBITS-2) +// Absolute maximum root bound for FFT primes. +// Don't change this! + +#if (25 <= NTL_FFTMaxRootBnd) +#define NTL_FFTMaxRoot (25) +#else +#define NTL_FFTMaxRoot NTL_FFTMaxRootBnd +#endif +// Root bound for FFT primes. Held to a maximum +// of 25 to avoid large tables and excess precomputation, +// and to keep the number of FFT primes needed small. +// This means we can multiply polynomials of degree less than 2^24. +// This can be increased, with a slight performance penalty. + + + + +class FFTVectorPair { +public: + Vec wtab_precomp; + Vec wqinvtab_precomp; +}; + +typedef LazyTable FFTMultipliers; + + +class FFTMulTabs { +public: + + FFTMultipliers MulTab[2]; + +}; + +class zz_pInfoT; // forward reference, defined in lzz_p.h + + +struct FFTPrimeInfo { + long q; // the prime itself + mulmod_t qinv; // 1/((wide_double) q) -- but subject to change!! + double qrecip; // 1/double(q) + + SmartPtr zz_p_context; + // pointer to corresponding zz_p context, which points back to this + // object in the case of a non-user FFT prime + + Vec RootTable[2]; + // RootTable[0][j] = w^{2^{MaxRoot-j}}, + // where w is a primitive 2^MaxRoot root of unity + // for q + // RootInvTable[1][j] = 1/RootTable[0][j] mod q + + + Vec TwoInvTable; + // TwoInvTable[j] = 1/2^j mod q + + Vec TwoInvPreconTable; + // mulmod preconditioning data + + UniquePtr< FFTMulTabs > bigtab; + +}; + +void InitFFTPrimeInfo(FFTPrimeInfo& info, long q, long w, bool bigtab); + + +#define NTL_MAX_FFTPRIMES (20000) +// for a thread-safe implementation, it is most convenient to +// impose a reasonabel upper bound on he number of FFT primes. +// without this restriction, a growing table would have to be +// relocated in one thread, leaving dangling pointers in +// another thread. Each entry in the table is just a poiner, +// so this does not incur too much space overhead. +// One could alo implement a 2D-table, which would allocate +// rows on demand, thus reducing wasted space at the price +// of extra arithmetic to actually index into the table. +// This may be an option to consider at some point. + +// At the current setting of 20000, on 64-bit machines with 50-bit +// FFT primes, this allows for polynomials with 20*50/2 = 500K-bit +// coefficients, while the table itself takes 160KB. + + +typedef LazyTable FFTTablesType; + +extern FFTTablesType FFTTables; +// a truly GLOBAL variable, shared among all threads + + +static inline +long GetFFTPrime(long i) +{ + return FFTTables[i]->q; +} + +static inline +mulmod_t GetFFTPrimeInv(long i) +{ + return FFTTables[i]->qinv; +} + +static inline +double GetFFTPrimeRecip(long i) +{ + return FFTTables[i]->qrecip; +} + + + +long CalcMaxRoot(long p); +// calculates max power of two supported by this FFT prime. + +void UseFFTPrime(long index); +// allocates and initializes information for FFT prime + + +void FFT(long* A, const long* a, long k, const FFTPrimeInfo& info, long dir); +// the low-level FFT routine. +// computes a 2^k point FFT modulo q = info.q +// dir == 0 => forward direction (using roots) +// dir == 1 => backwards direction (using inverse roots) + + + + + +static inline +void FFTFwd(long* A, const long *a, long k, const FFTPrimeInfo& info) +// Slightly higher level interface...using the ith FFT prime +{ + FFT(A, a, k, info, 0); +} + + +static inline +void FFTFwd(long* A, const long *a, long k, long i) +{ + FFTFwd(A, a, k, *FFTTables[i]); +} + +static inline +void FFTRev(long* A, const long *a, long k, const FFTPrimeInfo& info) +// Slightly higher level interface...using the ith FFT prime +{ + FFT(A, a, k, info, 1); +} + +static inline +void FFTRev(long* A, const long *a, long k, long i) +{ + FFTRev(A, a, k, *FFTTables[i]); +} + +static inline +void FFTMulTwoInv(long* A, const long *a, long k, const FFTPrimeInfo& info) +{ + VectorMulModPrecon(1L << k, A, a, info.TwoInvTable[k], info.q, + info.TwoInvPreconTable[k]); +} + +static inline +void FFTMulTwoInv(long* A, const long *a, long k, long i) +{ + FFTMulTwoInv(A, a, k, *FFTTables[i]); +} + +static inline +void FFTRev1(long* A, const long *a, long k, const FFTPrimeInfo& info) +// FFTRev + FFTMulTwoInv +{ + FFTRev(A, a, k, info); + FFTMulTwoInv(A, A, k, info); +} + +static inline +void FFTRev1(long* A, const long *a, long k, long i) +{ + FFTRev1(A, a, k, *FFTTables[i]); +} + + +long IsFFTPrime(long n, long& w); +// tests if n is an "FFT prime" and returns corresponding root + + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/FacVec.h b/thirdparty/linux/ntl/include/NTL/FacVec.h new file mode 100644 index 0000000000..378595ce20 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/FacVec.h @@ -0,0 +1,27 @@ + +#ifndef NTL_FacVec__H +#define NTL_FacVec__H + +#include + +NTL_OPEN_NNS + +struct IntFactor { + IntFactor() { } + ~IntFactor() { } + + long q; + long a; + long val; + long link; +}; + + +typedef Vec vec_IntFactor; +typedef vec_IntFactor FacVec; + +void FactorInt(FacVec& fvec, long n); + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/GF2.h b/thirdparty/linux/ntl/include/NTL/GF2.h new file mode 100644 index 0000000000..03b47359cd --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/GF2.h @@ -0,0 +1,506 @@ + +#ifndef NTL_GF2__H +#define NTL_GF2__H + +#include +#include + +NTL_OPEN_NNS + + + + +// Context, Bak, and Push types, just for consistency. +// They don't do anything + +class GF2Context { +public: +GF2Context() {} +explicit GF2Context(long p) { if (p != 2) LogicError("GF2Context with p != 2"); } +void save() {} +void restore() const {} +}; + +class GF2Bak { +public: +void save(); +void restore(); + + +private: +GF2Bak(const GF2Bak&); // disabled +void operator=(const GF2Bak&); // disabled + + +}; + +class GF2Push { + +GF2Push(const GF2Push&); // disabled +void operator=(const GF2Push&); // disabled + +public: +GF2Push() { } +explicit GF2Push(const GF2Context& context) { } +explicit GF2Push(long p) { if (p != 2) LogicError("GF2Push with p != 2"); } + + +}; + +class GF2X; // forward declaration + + +class GF2 { +public: +typedef long rep_type; +typedef GF2Context context_type; +typedef GF2Bak bak_type; +typedef GF2Push push_type; +typedef GF2X poly_type; + + +unsigned long _GF2__rep; + + +GF2() : _GF2__rep(0) { } +GF2(const GF2& a) : _GF2__rep(a._GF2__rep) { } + +explicit GF2(long a) : _GF2__rep(0) { *this = a; } + +GF2(INIT_VAL_TYPE, long a) : _GF2__rep(a & 1) { } +GF2(INIT_LOOP_HOLE_TYPE, unsigned long a) : _GF2__rep(a) { } + + + + +~GF2() { } + +GF2& operator=(const GF2& a) { _GF2__rep = a._GF2__rep; return *this; } +GF2& operator=(long a) { _GF2__rep = a & 1; return *this; } + +static long modulus() { return 2; } +static GF2 zero() { return GF2(); } + +// for consistency +GF2(INIT_NO_ALLOC_TYPE) : _GF2__rep(0) { } +GF2(INIT_ALLOC_TYPE) : _GF2__rep(0) { } +void allocate() { } + +void swap(GF2& x) { GF2 t; t = *this; *this = x; x = t; } + + +}; + + + +class ref_GF2 { +public: + +unsigned long *_ref_GF2__ptr; +long _ref_GF2__pos; + +ref_GF2() : _ref_GF2__ptr(0), _ref_GF2__pos(0) { } +ref_GF2(const ref_GF2& a) : + _ref_GF2__ptr(a._ref_GF2__ptr), _ref_GF2__pos(a._ref_GF2__pos) { } +ref_GF2(GF2& a) : + _ref_GF2__ptr(&a._GF2__rep), _ref_GF2__pos(0) { } +ref_GF2(INIT_LOOP_HOLE_TYPE, unsigned long *ptr, long pos) : + _ref_GF2__ptr(ptr), _ref_GF2__pos(pos) { } + +operator const GF2() const +{ + return GF2(INIT_LOOP_HOLE, (*_ref_GF2__ptr >> _ref_GF2__pos) & 1); +} + +~ref_GF2() { } + +ref_GF2 operator=(const ref_GF2& a) +{ + unsigned long rval = (*a._ref_GF2__ptr >> a._ref_GF2__pos) & 1; + unsigned long lval = *_ref_GF2__ptr; + lval = (lval & ~(1UL << _ref_GF2__pos)) | (rval << _ref_GF2__pos); + *_ref_GF2__ptr = lval; + return *this; +} + +ref_GF2 operator=(const GF2& a) +{ + unsigned long rval = (a._GF2__rep) & 1; + unsigned long lval = *_ref_GF2__ptr; + lval = (lval & ~(1UL << _ref_GF2__pos)) | (rval << _ref_GF2__pos); + *_ref_GF2__ptr = lval; + return *this; +} + + +ref_GF2 operator=(long a) +{ + unsigned long rval = a & 1; + unsigned long lval = *_ref_GF2__ptr; + lval = (lval & ~(1UL << _ref_GF2__pos)) | (rval << _ref_GF2__pos); + *_ref_GF2__ptr = lval; + return *this; +} + +void swap(ref_GF2 x) { GF2 t; t = *this; *this = x; x = t; } + + +}; + + + + +// functions + + +inline long rep(GF2 a) { return a._GF2__rep; } + + + +inline long IsZero(GF2 a) + { return a._GF2__rep == 0; } + +inline long IsOne(GF2 a) + { return a._GF2__rep == 1; } + + + + +inline GF2 to_GF2(long a) + { return GF2(INIT_VAL, a); } + +inline GF2 to_GF2(const ZZ& a) + { return GF2(INIT_LOOP_HOLE, IsOdd(a)); } + + + + +inline GF2 operator+(GF2 a, GF2 b) + { return GF2(INIT_LOOP_HOLE, a._GF2__rep ^ b._GF2__rep); } + +inline GF2 operator+(GF2 a, long b) + { return a + to_GF2(b); } + +inline GF2 operator+(long a, GF2 b) + { return to_GF2(a) + b; } + +inline GF2 operator-(GF2 a, GF2 b) + { return a + b; } + +inline GF2 operator-(GF2 a, long b) + { return a + b; } + +inline GF2 operator-(long a, GF2 b) + { return a + b; } + +inline GF2 operator-(GF2 a) + { return a; } + + +inline GF2 sqr(GF2 a) + { return a; } + +inline GF2 operator*(GF2 a, GF2 b) + { return GF2(INIT_LOOP_HOLE, a._GF2__rep & b._GF2__rep); } + +inline GF2 operator*(GF2 a, long b) + { return a * to_GF2(b); } + +inline GF2 operator*(long a, GF2 b) + { return to_GF2(a) * b; } + + + + + + +inline GF2 operator/(GF2 a, GF2 b) +{ + if (IsZero(b)) ArithmeticError("GF2: division by zero"); + return a; +} + +inline GF2 operator/(GF2 a, long b) + { return a / to_GF2(b); } + +inline GF2 operator/(long a, GF2 b) + { return to_GF2(a) / b; } + + + +inline GF2 inv(GF2 a) + { return 1 / a; } + + + + + + + +inline long operator==(GF2 a, GF2 b) + { return a._GF2__rep == b._GF2__rep; } + + +inline long operator==(GF2 a, long b) + { return a == to_GF2(b); } + +inline long operator==(long a, GF2 b) + { return to_GF2(a) == b; } + +inline long operator!=(GF2 a, GF2 b) { return !(a == b); } +inline long operator!=(GF2 a, long b) { return !(a == b); } +inline long operator!=(long a, GF2 b) { return !(a == b); } + + + +GF2 power(GF2 a, long e); + + + +inline GF2 random_GF2() + { return GF2(INIT_LOOP_HOLE, RandomBnd(2)); } + + + +// procedural versions + +inline GF2& operator+=(GF2& x, GF2 b) + { return x = x + b; } + +inline GF2& operator+=(GF2& x, long b) + { return x = x + b; } + +inline GF2& operator-=(GF2& x, GF2 b) + { return x = x - b; } + +inline GF2& operator-=(GF2& x, long b) + { return x = x - b; } + +inline GF2& operator++(GF2& x) { return x = x + 1; } +inline void operator++(GF2& x, int) { x = x + 1; } +inline GF2& operator--(GF2& x) { return x = x - 1; } +inline void operator--(GF2& x, int) { x = x - 1; } + +inline GF2& operator*=(GF2& x, GF2 b) + { return x = x * b; } + +inline GF2& operator*=(GF2& x, long b) + { return x = x * b; } + +inline GF2& operator/=(GF2& x, GF2 b) + { return x = x / b; } + +inline GF2& operator/=(GF2& x, long b) + { return x = x / b; } + + + + +inline void conv(GF2& x, long a) { x = to_GF2(a); } + +inline void conv(GF2& x, const ZZ& a) { x = to_GF2(a); } + + +inline void clear(GF2& x) { x = 0; } + +inline void set(GF2& x) { x = 1; } + +inline void swap(GF2& x, GF2& y) { x.swap(y); } + +inline void add(GF2& x, GF2 a, GF2 b) + { x = a + b; } + +inline void sub(GF2& x, GF2 a, GF2 b) + { x = a - b; } + +inline void negate(GF2& x, GF2 a) + { x = -a; } + +inline void add(GF2& x, GF2 a, long b) + { x = a + b; } + +inline void add(GF2& x, long a, GF2 b) + { x = a + b; } + +inline void sub(GF2& x, GF2 a, long b) + { x = a - b; } + +inline void sub(GF2& x, long a, GF2 b) + { x = a - b; } + + +inline void mul(GF2& x, GF2 a, GF2 b) + { x = a * b; } + +inline void mul(GF2& x, GF2 a, long b) + { x = a * b; } + +inline void mul(GF2& x, long a, GF2 b) + { x = a * b; } + +inline void sqr(GF2& x, GF2 a) + { x = sqr(a); } + + +inline void div(GF2& x, GF2 a, GF2 b) + { x = a / b; } + +inline void div(GF2& x, long a, GF2 b) + { x = a / b; } + +inline void div(GF2& x, GF2 a, long b) + { x = a / b; } + +inline void inv(GF2& x, GF2 a) + { x = inv(a); } + + +inline void power(GF2& x, GF2 a, long e) + { x = power(a, e); } + + + +inline void random(GF2& x) + { x = random_GF2(); } + +// ref_GF2 variants...theoretically, these would +// have sufficed, because of the implicit conversion +// from GF2& to ref_GF2, but it may be a bit more efficient +// to explicitly overload everything. Moreover, +// the return types of the += type operators would +// not be right. + + +inline ref_GF2 operator+=(ref_GF2 x, GF2 b) + { return x = x + b; } + +inline ref_GF2 operator+=(ref_GF2 x, long b) + { return x = x + b; } + +inline ref_GF2 operator-=(ref_GF2 x, GF2 b) + { return x = x - b; } + +inline ref_GF2 operator-=(ref_GF2 x, long b) + { return x = x - b; } + +inline ref_GF2 operator++(ref_GF2 x) { return x = x + 1; } +inline void operator++(ref_GF2 x, int) { x = x + 1; } +inline ref_GF2 operator--(ref_GF2 x) { return x = x - 1; } +inline void operator--(ref_GF2 x, int) { x = x - 1; } + +inline ref_GF2 operator*=(ref_GF2 x, GF2 b) + { return x = x * b; } + +inline ref_GF2 operator*=(ref_GF2 x, long b) + { return x = x * b; } + +inline ref_GF2 operator/=(ref_GF2 x, GF2 b) + { return x = x / b; } + +inline ref_GF2 operator/=(ref_GF2 x, long b) + { return x = x / b; } + + + + +inline void conv(ref_GF2 x, long a) { x = to_GF2(a); } + +inline void conv(ref_GF2 x, const ZZ& a) { x = to_GF2(a); } + + +inline void clear(ref_GF2 x) { x = 0; } + +inline void set(ref_GF2 x) { x = 1; } + +inline void swap(ref_GF2 x, ref_GF2 y) { x.swap(y); } + +inline void add(ref_GF2 x, GF2 a, GF2 b) + { x = a + b; } + +inline void sub(ref_GF2 x, GF2 a, GF2 b) + { x = a - b; } + +inline void negate(ref_GF2 x, GF2 a) + { x = -a; } + +inline void add(ref_GF2 x, GF2 a, long b) + { x = a + b; } + +inline void add(ref_GF2 x, long a, GF2 b) + { x = a + b; } + +inline void sub(ref_GF2 x, GF2 a, long b) + { x = a - b; } + +inline void sub(ref_GF2 x, long a, GF2 b) + { x = a - b; } + + +inline void mul(ref_GF2 x, GF2 a, GF2 b) + { x = a * b; } + +inline void mul(ref_GF2 x, GF2 a, long b) + { x = a * b; } + +inline void mul(ref_GF2 x, long a, GF2 b) + { x = a * b; } + +inline void sqr(ref_GF2 x, GF2 a) + { x = sqr(a); } + + +inline void div(ref_GF2 x, GF2 a, GF2 b) + { x = a / b; } + +inline void div(ref_GF2 x, long a, GF2 b) + { x = a / b; } + +inline void div(ref_GF2 x, GF2 a, long b) + { x = a / b; } + +inline void inv(ref_GF2 x, GF2 a) + { x = inv(a); } + + +inline void power(ref_GF2 x, GF2 a, long e) + { x = power(a, e); } + + + +inline void random(ref_GF2 x) + { x = random_GF2(); } + + + + +// I/O...for input, we only provide the ref_GF2 variant + +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, GF2 a); + +NTL_SNS istream& operator>>(NTL_SNS istream& s, ref_GF2 x); + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(int& x, GF2 a) { conv(x, rep(a)); } +inline void conv(unsigned int& x, GF2 a) { conv(x, rep(a)); } +inline void conv(long& x, GF2 a) { conv(x, rep(a)); } +inline void conv(unsigned long& x, GF2 a) { conv(x, rep(a)); } +inline void conv(ZZ& x, GF2 a) { conv(x, rep(a)); } + + +inline void conv(GF2& x, GF2 a) { x = a; } +inline void conv(ref_GF2 x, GF2 a) { x = a; } + +/* ------------------------------------- */ + + + + +// Finally, we declare an specialization Vec: + +template<> class Vec; + +NTL_CLOSE_NNS + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/GF2E.h b/thirdparty/linux/ntl/include/NTL/GF2E.h new file mode 100644 index 0000000000..db3c6d28d7 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/GF2E.h @@ -0,0 +1,494 @@ + + +#ifndef NTL_GF2E__H +#define NTL_GF2E__H + +#include +#include +#include + +NTL_OPEN_NNS + + + +class GF2EInfoT { +private: + + GF2EInfoT(); // disabled + GF2EInfoT(const GF2EInfoT&); // disabled + void operator=(const GF2EInfoT&); // disabled +public: + + GF2EInfoT(const GF2X& NewP); + ~GF2EInfoT() { } + + GF2XModulus p; + + long KarCross; + long ModCross; + long DivCross; + + long _card_exp; + Lazy _card; +}; + +extern +NTL_CHEAP_THREAD_LOCAL +GF2EInfoT *GF2EInfo; +// info for current modulus, initially null +// fast TLS access + + + +class GF2EContext { +private: +SmartPtr ptr; + +public: + +GF2EContext() { } +explicit GF2EContext(const GF2X& p) : ptr(MakeSmart(p)) { } + +// copy constructor, assignment, destructor: default + +void save(); +void restore() const; + +}; + + +class GF2EBak { +private: +GF2EContext c; +bool MustRestore; + +GF2EBak(const GF2EBak&); // disabled +void operator=(const GF2EBak&); // disabled + +public: +void save(); +void restore(); + +GF2EBak() : MustRestore(false) { } + +~GF2EBak(); + + +}; + + + +class GF2EPush { +private: +GF2EBak bak; + +GF2EPush(const GF2EPush&); // disabled +void operator=(const GF2EPush&); // disabled + +public: +GF2EPush() { bak.save(); } +explicit GF2EPush(const GF2EContext& context) { bak.save(); context.restore(); } +explicit GF2EPush(const GF2X& p) { bak.save(); GF2EContext c(p); c.restore(); } + + +}; + + + + +class GF2EX; // forward declaration + +class GF2E { +public: +typedef GF2X rep_type; +typedef GF2EContext context_type; +typedef GF2EBak bak_type; +typedef GF2EPush push_type; +typedef GF2EX poly_type; + + +GF2X _GF2E__rep; + + +// ****** constructors and assignment + +GF2E() { } // NO_ALLOC +GF2E(const GF2E& a) { _GF2E__rep = a._GF2E__rep; } // NO_ALLOC + +explicit GF2E(long a) { *this = a; } // NO_ALLOC +explicit GF2E(GF2 a) { *this = a; } // NO_ALLOC + +GF2E(GF2E& x, INIT_TRANS_TYPE) : _GF2E__rep(x._GF2E__rep, INIT_TRANS) { } + +GF2E(INIT_NO_ALLOC_TYPE) { } // allocates no space +GF2E(INIT_ALLOC_TYPE) { _GF2E__rep.xrep.SetMaxLength(GF2E::WordLength()); } // allocates space +void allocate() { _GF2E__rep.xrep.SetMaxLength(GF2E::WordLength()); } + +~GF2E() { } + +GF2E& operator=(const GF2E& a) { _GF2E__rep = a._GF2E__rep; return *this; } + +inline GF2E& operator=(long a); +inline GF2E& operator=(GF2 a); + +// You can always access the _GF2E__representation directly...if you dare. +GF2X& LoopHole() { return _GF2E__rep; } + + +void swap(GF2E& y) { _GF2E__rep.swap(y._GF2E__rep); } + +static long WordLength() { return GF2EInfo->p.WordLength(); } + +static long storage() { return WV_storage(GF2E::WordLength()); } + +static const GF2XModulus& modulus() { return GF2EInfo->p; } + +static long KarCross() { return GF2EInfo->KarCross; } +static long ModCross() { return GF2EInfo->ModCross; } +static long DivCross() { return GF2EInfo->DivCross; } + +static long degree() { return GF2EInfo->p.n; } + +static const GF2E& zero(); + +static const ZZ& cardinality(); + +static void init(const GF2X& NewP); + + +}; + + + +// read-only access to GF2E representation +inline const GF2X& rep(const GF2E& a) { return a._GF2E__rep; } + +inline void clear(GF2E& x) +// x = 0 + { clear(x._GF2E__rep); } + +inline void set(GF2E& x) +// x = 1 + { set(x._GF2E__rep); } + +inline void swap(GF2E& x, GF2E& y) +// swap x and y + + { x.swap(y); } + +// ****** addition + +inline void add(GF2E& x, const GF2E& a, const GF2E& b) + { add(x._GF2E__rep, a._GF2E__rep, b._GF2E__rep); } + +inline void add(GF2E& x, const GF2E& a, GF2 b) + { add(x._GF2E__rep, a._GF2E__rep, b); } + +inline void add(GF2E& x, const GF2E& a, long b) + { add(x._GF2E__rep, a._GF2E__rep, b); } + +inline void add(GF2E& x, GF2 a, const GF2E& b) { add(x, b, a); } +inline void add(GF2E& x, long a, const GF2E& b) { add(x, b, a); } + +inline void sub(GF2E& x, const GF2E& a, const GF2E& b) { add(x, a, b); } +inline void sub(GF2E& x, const GF2E& a, GF2 b) { add(x, a, b); } +inline void sub(GF2E& x, const GF2E& a, long b) { add(x, a, b); } +inline void sub(GF2E& x, GF2 a, const GF2E& b) { add(x, a, b); } +inline void sub(GF2E& x, long a, const GF2E& b) { add(x, a, b); } + +inline void negate(GF2E& x, const GF2E& a) { x = a; } + + +inline GF2E operator+(const GF2E& a, const GF2E& b) + { GF2E x; add(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator+(const GF2E& a, GF2 b) + { GF2E x; add(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator+(const GF2E& a, long b) + { GF2E x; add(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator+(GF2 a, const GF2E& b) + { GF2E x; add(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator+(long a, const GF2E& b) + { GF2E x; add(x, a, b); NTL_OPT_RETURN(GF2E, x); } + + +inline GF2E operator-(const GF2E& a, const GF2E& b) + { GF2E x; sub(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator-(const GF2E& a, GF2 b) + { GF2E x; sub(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator-(const GF2E& a, long b) + { GF2E x; sub(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator-(GF2 a, const GF2E& b) + { GF2E x; sub(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator-(long a, const GF2E& b) + { GF2E x; sub(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator-(const GF2E& a) + { GF2E x; negate(x, a); NTL_OPT_RETURN(GF2E, x); } + + +inline GF2E& operator+=(GF2E& x, const GF2E& b) + { add(x, x, b); return x; } + +inline GF2E& operator+=(GF2E& x, GF2 b) + { add(x, x, b); return x; } + +inline GF2E& operator+=(GF2E& x, long b) + { add(x, x, b); return x; } + + +inline GF2E& operator-=(GF2E& x, const GF2E& b) + { sub(x, x, b); return x; } + +inline GF2E& operator-=(GF2E& x, GF2 b) + { sub(x, x, b); return x; } + +inline GF2E& operator-=(GF2E& x, long b) + { sub(x, x, b); return x; } + + +inline GF2E& operator++(GF2E& x) { add(x, x, 1); return x; } + +inline void operator++(GF2E& x, int) { add(x, x, 1); } + +inline GF2E& operator--(GF2E& x) { sub(x, x, 1); return x; } + +inline void operator--(GF2E& x, int) { sub(x, x, 1); } + + + +// ****** multiplication + +inline void mul(GF2E& x, const GF2E& a, const GF2E& b) +// x = a*b + + { MulMod(x._GF2E__rep, a._GF2E__rep, b._GF2E__rep, GF2E::modulus()); } + + +inline void sqr(GF2E& x, const GF2E& a) +// x = a^2 + + { SqrMod(x._GF2E__rep, a._GF2E__rep, GF2E::modulus()); } + +inline GF2E sqr(const GF2E& a) + { GF2E x; sqr(x, a); NTL_OPT_RETURN(GF2E, x); } + +inline void mul(GF2E& x, const GF2E& a, GF2 b) + { mul(x._GF2E__rep, a._GF2E__rep, b); } + +inline void mul(GF2E& x, const GF2E& a, long b) + { mul(x._GF2E__rep, a._GF2E__rep, b); } + +inline void mul(GF2E& x, GF2 a, const GF2E& b) { mul(x, b, a); } +inline void mul(GF2E& x, long a, const GF2E& b) { mul(x, b, a); } + + + +inline GF2E operator*(const GF2E& a, const GF2E& b) + { GF2E x; mul(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator*(const GF2E& a, GF2 b) + { GF2E x; mul(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator*(const GF2E& a, long b) + { GF2E x; mul(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator*(GF2 a, const GF2E& b) + { GF2E x; mul(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator*(long a, const GF2E& b) + { GF2E x; mul(x, a, b); NTL_OPT_RETURN(GF2E, x); } + + +inline GF2E& operator*=(GF2E& x, const GF2E& b) + { mul(x, x, b); return x; } + +inline GF2E& operator*=(GF2E& x, GF2 b) + { mul(x, x, b); return x; } + +inline GF2E& operator*=(GF2E& x, long b) + { mul(x, x, b); return x; } + + + +// ****** division + + + +void div(GF2E& x, const GF2E& a, const GF2E& b); + +void inv(GF2E& x, const GF2E& a); + +inline GF2E inv(const GF2E& a) + { GF2E x; inv(x, a); NTL_OPT_RETURN(GF2E, x); } + +inline void div(GF2E& x, const GF2E& a, GF2 b) + { div(x._GF2E__rep, a._GF2E__rep, b); } + +inline void div(GF2E& x, const GF2E& a, long b) + { div(x._GF2E__rep, a._GF2E__rep, b); } + +void div(GF2E& x, GF2 a, const GF2E& b); +void div(GF2E& x, long a, const GF2E& b); + + +inline GF2E operator/(const GF2E& a, const GF2E& b) + { GF2E x; div(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator/(const GF2E& a, GF2 b) + { GF2E x; div(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator/(const GF2E& a, long b) + { GF2E x; div(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator/(GF2 a, const GF2E& b) + { GF2E x; div(x, a, b); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E operator/(long a, const GF2E& b) + { GF2E x; div(x, a, b); NTL_OPT_RETURN(GF2E, x); } + + +inline GF2E& operator/=(GF2E& x, const GF2E& b) + { div(x, x, b); return x; } + +inline GF2E& operator/=(GF2E& x, GF2 b) + { div(x, x, b); return x; } + +inline GF2E& operator/=(GF2E& x, long b) + { div(x, x, b); return x; } + + +// ****** exponentiation + +inline void power(GF2E& x, const GF2E& a, const ZZ& e) + { PowerMod(x._GF2E__rep, a._GF2E__rep, e, GF2E::modulus()); } + +inline GF2E power(const GF2E& a, const ZZ& e) + { GF2E x; power(x, a, e); NTL_OPT_RETURN(GF2E, x); } + +inline void power(GF2E& x, const GF2E& a, long e) + { PowerMod(x._GF2E__rep, a._GF2E__rep, e, GF2E::modulus()); } + +inline GF2E power(const GF2E& a, long e) + { GF2E x; power(x, a, e); NTL_OPT_RETURN(GF2E, x); } + + +// ****** conversion + +inline void conv(GF2E& x, const GF2X& a) +// x = (a mod p) + + { rem(x._GF2E__rep, a, GF2E::modulus()); } + +inline void conv(GF2E& x, long a) + { conv(x._GF2E__rep, a); } + +inline void conv(GF2E& x, GF2 a) + { conv(x._GF2E__rep, a); } + +inline void conv(GF2E& x, const ZZ& a) + { conv(x._GF2E__rep, a); } + +inline GF2E to_GF2E(const GF2X& a) + { GF2E x; conv(x, a); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E to_GF2E(long a) + { GF2E x; conv(x, a); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E to_GF2E(GF2 a) + { GF2E x; conv(x, a); NTL_OPT_RETURN(GF2E, x); } + +inline GF2E to_GF2E(const ZZ& a) + { GF2E x; conv(x, a); NTL_OPT_RETURN(GF2E, x); } + + +// ****** comparison + +inline long IsZero(const GF2E& a) + { return IsZero(a._GF2E__rep); } + +inline long IsOne(const GF2E& a) + { return IsOne(a._GF2E__rep); } + +inline long operator==(const GF2E& a, const GF2E& b) + { return a._GF2E__rep == b._GF2E__rep; } + +inline long operator==(const GF2E& a, GF2 b) + { return a._GF2E__rep == b; } + +inline long operator==(const GF2E& a, long b) + { return a._GF2E__rep == b; } + +inline long operator==(const GF2 a, const GF2E& b) + { return a == b._GF2E__rep; } + +inline long operator==(const long a, const GF2E& b) + { return a == b._GF2E__rep; } + + +inline long operator!=(const GF2E& a, const GF2E& b) { return !(a == b); } +inline long operator!=(const GF2E& a, GF2 b) { return !(a == b); } +inline long operator!=(const GF2E& a, long b) { return !(a == b); } +inline long operator!=(GF2 a, const GF2E& b) { return !(a == b); } +inline long operator!=(long a, const GF2E& b) { return !(a == b); } + +// ****** trace + +inline void trace(ref_GF2 x, const GF2E& a) + { TraceMod(x, a._GF2E__rep, GF2E::modulus()); } +inline GF2 trace(const GF2E& a) + { return TraceMod(a._GF2E__rep, GF2E::modulus()); } + + + +// ****** random numbers + +inline void random(GF2E& x) +// x = random element in GF2E + + { random(x._GF2E__rep, GF2EInfo->p.n); } + +inline GF2E random_GF2E() + { GF2E x; random(x); NTL_OPT_RETURN(GF2E, x); } + + +// ****** input/output + +inline NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const GF2E& a) + { return s << a._GF2E__rep; } + +NTL_SNS istream& operator>>(NTL_SNS istream& s, GF2E& x); + + +inline GF2E& GF2E::operator=(long a) { conv(*this, a); return *this; } +inline GF2E& GF2E::operator=(GF2 a) { conv(*this, a); return *this; } + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(GF2X& x, const GF2E& a) { x = rep(a); } +inline void conv(GF2E& x, const GF2E& a) { x = a; } + + +/* ------------------------------------- */ + + +// overload these functions for Vec. +// They are defined in vec_GF2E.c +void BlockConstruct(GF2E* p, long n); +void BlockConstructFromVec(GF2E* p, long n, const GF2E* q); +void BlockConstructFromObj(GF2E* p, long n, const GF2E& q); +void BlockDestroy(GF2E* p, long n); + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/GF2EX.h b/thirdparty/linux/ntl/include/NTL/GF2EX.h new file mode 100644 index 0000000000..8836af5d6e --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/GF2EX.h @@ -0,0 +1,1052 @@ + + +#ifndef NTL_GF2EX__H +#define NTL_GF2EX__H + +#include +#include +#include +#include +#include +#include + + +NTL_OPEN_NNS + +class GF2EXModulus; // forward declaration + +class GF2EX { +public: +typedef GF2E coeff_type; +typedef GF2EXModulus modulus_type; + + +vec_GF2E rep; + + +/*************************************************************** + + Constructors, Destructors, and Assignment + +****************************************************************/ + + +GF2EX() { } + +explicit GF2EX(long a) { *this = a; } +explicit GF2EX(GF2 a) { *this = a; } +explicit GF2EX(const GF2& a) { *this = a; } + + +GF2EX(INIT_SIZE_TYPE, long n) { rep.SetMaxLength(n); } + +GF2EX(const GF2EX& a) : rep(a.rep) { } + + +GF2EX& operator=(const GF2EX& a) + { rep = a.rep; return *this; } + +~GF2EX() { } + +void normalize(); +// strip leading zeros + +void SetMaxLength(long n) +// pre-allocate space for n coefficients. +// Value is unchanged + + { rep.SetMaxLength(n); } + + +void kill() +// free space held by this polynomial. Value becomes 0. + + { rep.kill(); } + + + +void SetLength(long n) { rep.SetLength(n); } +GF2E& operator[](long i) { return rep[i]; } +const GF2E& operator[](long i) const { return rep[i]; } + + + + +static const GF2EX& zero(); + + + +inline GF2EX& operator=(long a); +inline GF2EX& operator=(GF2 a); +inline GF2EX& operator=(const GF2E& a); + +inline GF2EX(long i, long a); +inline GF2EX(long i, GF2 a); +inline GF2EX(long i, const GF2E& a); + + +inline GF2EX(INIT_MONO_TYPE, long i, long a); +inline GF2EX(INIT_MONO_TYPE, long i, GF2 a); +inline GF2EX(INIT_MONO_TYPE, long i, const GF2E& a); +inline GF2EX(INIT_MONO_TYPE, long i); + + +GF2EX(GF2EX& x, INIT_TRANS_TYPE) : rep(x.rep, INIT_TRANS) { } + +void swap(GF2EX& x) { rep.swap(x.rep); } + + +}; + + + + +/******************************************************************** + + input and output + +*********************************************************************/ + + +NTL_SNS istream& operator>>(NTL_SNS istream& s, GF2EX& x); +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const GF2EX& a); + + + + +/********************************************************** + + Some utility routines + +***********************************************************/ + + +inline long deg(const GF2EX& a) { return a.rep.length() - 1; } + +const GF2E& coeff(const GF2EX& a, long i); +// zero if i not in range + +void GetCoeff(GF2E& x, const GF2EX& a, long i); +// x = a[i], or zero if i not in range + +const GF2E& LeadCoeff(const GF2EX& a); +// zero if a == 0 + +const GF2E& ConstTerm(const GF2EX& a); +// zero if a == 0 + +void SetCoeff(GF2EX& x, long i, const GF2E& a); +void SetCoeff(GF2EX& x, long i, GF2 a); +void SetCoeff(GF2EX& x, long i, long a); +// x[i] = a, error is raised if i < 0 + +void SetCoeff(GF2EX& x, long i); +// x[i] = 1, error is raised if i < 0 + +inline GF2EX::GF2EX(long i, const GF2E& a) { SetCoeff(*this, i, a); } +inline GF2EX::GF2EX(long i, GF2 a) { SetCoeff(*this, i, a); } +inline GF2EX::GF2EX(long i, long a) { SetCoeff(*this, i, a); } + + +inline GF2EX::GF2EX(INIT_MONO_TYPE, long i, const GF2E& a) { SetCoeff(*this, i, a); } +inline GF2EX::GF2EX(INIT_MONO_TYPE, long i, GF2 a) { SetCoeff(*this, i, a); } +inline GF2EX::GF2EX(INIT_MONO_TYPE, long i, long a) { SetCoeff(*this, i, a); } +inline GF2EX::GF2EX(INIT_MONO_TYPE, long i) { SetCoeff(*this, i); } + + +void SetX(GF2EX& x); +// x is set to the monomial X + +long IsX(const GF2EX& a); +// test if x = X + +inline void clear(GF2EX& x) +// x = 0 + + { x.rep.SetLength(0); } + +inline void set(GF2EX& x) +// x = 1 + + { x.rep.SetLength(1); set(x.rep[0]); } + +inline void swap(GF2EX& x, GF2EX& y) +// swap x & y (only pointers are swapped) + + { x.swap(y); } + +void random(GF2EX& x, long n); +inline GF2EX random_GF2EX(long n) + { GF2EX x; random(x, n); NTL_OPT_RETURN(GF2EX, x); } +// generate a random polynomial of degree < n + +void trunc(GF2EX& x, const GF2EX& a, long m); +inline GF2EX trunc(const GF2EX& a, long m) + { GF2EX x; trunc(x, a, m); NTL_OPT_RETURN(GF2EX, x); } +// x = a % X^m + +void RightShift(GF2EX& x, const GF2EX& a, long n); +inline GF2EX RightShift(const GF2EX& a, long n) + { GF2EX x; RightShift(x, a, n); NTL_OPT_RETURN(GF2EX, x); } +// x = a/X^n + +void LeftShift(GF2EX& x, const GF2EX& a, long n); +inline GF2EX LeftShift(const GF2EX& a, long n) + { GF2EX x; LeftShift(x, a, n); NTL_OPT_RETURN(GF2EX, x); } +// x = a*X^n + +#ifndef NTL_TRANSITION + +inline GF2EX operator>>(const GF2EX& a, long n) + { GF2EX x; RightShift(x, a, n); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator<<(const GF2EX& a, long n) + { GF2EX x; LeftShift(x, a, n); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX& operator<<=(GF2EX& x, long n) + { LeftShift(x, x, n); return x; } + +inline GF2EX& operator>>=(GF2EX& x, long n) + { RightShift(x, x, n); return x; } + +#endif + + + +void diff(GF2EX& x, const GF2EX& a); +inline GF2EX diff(const GF2EX& a) + { GF2EX x; diff(x, a); NTL_OPT_RETURN(GF2EX, x); } +// x = derivative of a + + + +void MakeMonic(GF2EX& x); + +void reverse(GF2EX& c, const GF2EX& a, long hi); + +inline GF2EX reverse(const GF2EX& a, long hi) + { GF2EX x; reverse(x, a, hi); NTL_OPT_RETURN(GF2EX, x); } + +inline void reverse(GF2EX& c, const GF2EX& a) +{ reverse(c, a, deg(a)); } + +inline GF2EX reverse(const GF2EX& a) + { GF2EX x; reverse(x, a); NTL_OPT_RETURN(GF2EX, x); } + +inline void VectorCopy(vec_GF2E& x, const GF2EX& a, long n) + { VectorCopy(x, a.rep, n); } + +inline vec_GF2E VectorCopy(const GF2EX& a, long n) + { return VectorCopy(a.rep, n); } + + + + +/******************************************************************* + + conversion routines + +********************************************************************/ + + + +void conv(GF2EX& x, long a); +void conv(GF2EX& x, GF2 a); +void conv(GF2EX& x, const GF2E& a); +void conv(GF2EX& x, const ZZ& a); + +#ifndef NTL_TRANSITION +void conv(GF2EX& x, const GF2X& a); +#endif + +void conv(GF2EX& x, const vec_GF2E& a); + +inline GF2EX to_GF2EX(long a) + { GF2EX x; conv(x, a); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX to_GF2EX(GF2 a) + { GF2EX x; conv(x, a); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX to_GF2EX(const GF2E& a) + { GF2EX x; conv(x, a); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX to_GF2EX(const ZZ& a) + { GF2EX x; conv(x, a); NTL_OPT_RETURN(GF2EX, x); } + +#ifndef NTL_TRANSITION +inline GF2EX to_GF2EX(const GF2X& a) + { GF2EX x; conv(x, a); NTL_OPT_RETURN(GF2EX, x); } +#endif + +inline GF2EX to_GF2EX(const vec_GF2E& a) + { GF2EX x; conv(x, a); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX& GF2EX::operator=(const GF2E& a) { conv(*this, a); return *this; } +inline GF2EX& GF2EX::operator=(GF2 a) { conv(*this, a); return *this; } +inline GF2EX& GF2EX::operator=(long a) { conv(*this, a); return *this; } + + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(GF2EX& x, const GF2EX& a) + { x = a; } + +inline void conv(vec_GF2E& x, const GF2EX& a) + { x = a.rep; } + +class ZZX; +void conv(GF2EX& x, const ZZX& a); + + +/* ------------------------------------- */ + + + + + +/************************************************************* + + Comparison + +**************************************************************/ + +long IsZero(const GF2EX& a); + +long IsOne(const GF2EX& a); + +inline long operator==(const GF2EX& a, const GF2EX& b) + { return a.rep == b.rep; } + +long operator==(const GF2EX& a, const GF2E& b); +long operator==(const GF2EX& a, GF2 b); +long operator==(const GF2EX& a, long b); + +inline long operator==(const GF2E& a, const GF2EX& b) { return b == a; } +inline long operator==(GF2 a, const GF2EX& b) { return b == a; } +inline long operator==(long a, const GF2EX& b) { return b == a; } + +inline long operator!=(const GF2EX& a, const GF2EX& b) { return !(a == b); } + +inline long operator!=(const GF2EX& a, const GF2E& b) { return !(a == b); } +inline long operator!=(const GF2EX& a, GF2 b) { return !(a == b); } +inline long operator!=(const GF2EX& a, long b) { return !(a == b); } + +inline long operator!=(const GF2E& a, const GF2EX& b) { return !(a == b); } +inline long operator!=(GF2 a, const GF2EX& b) { return !(a == b); } +inline long operator!=(long a, const GF2EX& b) { return !(a == b); } + + +/*************************************************************** + + Addition + +****************************************************************/ + +void add(GF2EX& x, const GF2EX& a, const GF2EX& b); +// x = a + b + +void add(GF2EX& x, const GF2EX& a, const GF2E& b); +void add(GF2EX& x, const GF2EX& a, GF2 b); +void add(GF2EX& x, const GF2EX& a, long); + +inline void add(GF2EX& x, const GF2E& a, const GF2EX& b) { add(x, b, a); } +inline void add(GF2EX& x, GF2 a, const GF2EX& b) { add(x, b, a); } +inline void add(GF2EX& x, long a, const GF2EX& b) { add(x, b, a); } + +inline void sub(GF2EX& x, const GF2EX& a, const GF2EX& b) { add(x, a, b); } + +inline void sub(GF2EX& x, const GF2EX& a, const GF2E& b) { add(x, a, b); } +inline void sub(GF2EX& x, const GF2EX& a, GF2 b) { add(x, a, b); } +inline void sub(GF2EX& x, const GF2EX& a, long b) { add(x, a, b); } + +inline void sub(GF2EX& x, const GF2E& a, const GF2EX& b) { add(x, a, b); } +inline void sub(GF2EX& x, GF2 a, const GF2EX& b) { add(x, a, b); } +inline void sub(GF2EX& x, long a, const GF2EX& b) { add(x, a, b); } + +inline void negate(GF2EX& x, const GF2EX& a) { x = a; } + + + + +inline GF2EX operator+(const GF2EX& a, const GF2EX& b) + { GF2EX x; add(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator+(const GF2EX& a, const GF2E& b) + { GF2EX x; add(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator+(const GF2EX& a, GF2 b) + { GF2EX x; add(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator+(const GF2EX& a, long b) + { GF2EX x; add(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator+(const GF2E& a, const GF2EX& b) + { GF2EX x; add(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator+(GF2 a, const GF2EX& b) + { GF2EX x; add(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator+(long a, const GF2EX& b) + { GF2EX x; add(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + + +inline GF2EX operator-(const GF2EX& a, const GF2EX& b) + { GF2EX x; sub(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator-(const GF2EX& a, const GF2E& b) + { GF2EX x; sub(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator-(const GF2EX& a, GF2 b) + { GF2EX x; sub(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator-(const GF2EX& a, long b) + { GF2EX x; sub(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator-(const GF2E& a, const GF2EX& b) + { GF2EX x; sub(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator-(GF2 a, const GF2EX& b) + { GF2EX x; sub(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator-(long a, const GF2EX& b) + { GF2EX x; sub(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + + +inline GF2EX& operator+=(GF2EX& x, const GF2EX& b) + { add(x, x, b); return x; } + +inline GF2EX& operator+=(GF2EX& x, const GF2E& b) + { add(x, x, b); return x; } + +inline GF2EX& operator+=(GF2EX& x, GF2 b) + { add(x, x, b); return x; } + +inline GF2EX& operator+=(GF2EX& x, long b) + { add(x, x, b); return x; } + +inline GF2EX& operator-=(GF2EX& x, const GF2EX& b) + { sub(x, x, b); return x; } + +inline GF2EX& operator-=(GF2EX& x, const GF2E& b) + { sub(x, x, b); return x; } + +inline GF2EX& operator-=(GF2EX& x, GF2 b) + { sub(x, x, b); return x; } + +inline GF2EX& operator-=(GF2EX& x, long b) + { sub(x, x, b); return x; } + + +inline GF2EX operator-(const GF2EX& a) + { GF2EX x; negate(x, a); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX& operator++(GF2EX& x) { add(x, x, 1); return x; } +inline void operator++(GF2EX& x, int) { add(x, x, 1); } +inline GF2EX& operator--(GF2EX& x) { sub(x, x, 1); return x; } +inline void operator--(GF2EX& x, int) { sub(x, x, 1); } + + +/***************************************************************** + + Multiplication + +******************************************************************/ + + +void mul(GF2EX& x, const GF2EX& a, const GF2EX& b); +// x = a * b + +void sqr(GF2EX& x, const GF2EX& a); +inline GF2EX sqr(const GF2EX& a) + { GF2EX x; sqr(x, a); NTL_OPT_RETURN(GF2EX, x); } +// x = a^2 + +void mul(GF2EX & x, const GF2EX& a, const GF2E& b); +void mul(GF2EX & x, const GF2EX& a, GF2 b); +void mul(GF2EX & x, const GF2EX& a, long b); + +inline void mul(GF2EX& x, const GF2E& a, const GF2EX& b) { mul(x, b, a); } +inline void mul(GF2EX& x, GF2 a, const GF2EX& b) { mul(x, b, a); } +inline void mul(GF2EX& x, long a, const GF2EX& b) { mul(x, b, a); } + +void MulTrunc(GF2EX& x, const GF2EX& a, const GF2EX& b, long n); +inline GF2EX MulTrunc(const GF2EX& a, const GF2EX& b, long n) + { GF2EX x; MulTrunc(x, a, b, n); NTL_OPT_RETURN(GF2EX, x); } +// x = a * b % X^n + +void SqrTrunc(GF2EX& x, const GF2EX& a, long n); +inline GF2EX SqrTrunc(const GF2EX& a, long n) + { GF2EX x; SqrTrunc(x, a, n); NTL_OPT_RETURN(GF2EX, x); } +// x = a*a % X^n + + +inline GF2EX operator*(const GF2EX& a, const GF2EX& b) + { GF2EX x; mul(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator*(const GF2EX& a, const GF2E& b) + { GF2EX x; mul(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator*(const GF2EX& a, GF2 b) + { GF2EX x; mul(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator*(const GF2EX& a, long b) + { GF2EX x; mul(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator*(const GF2E& a, const GF2EX& b) + { GF2EX x; mul(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator*(GF2 a, const GF2EX& b) + { GF2EX x; mul(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator*(long a, const GF2EX& b) + { GF2EX x; mul(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX& operator*=(GF2EX& x, const GF2EX& b) + { mul(x, x, b); return x; } + +inline GF2EX& operator*=(GF2EX& x, const GF2E& b) + { mul(x, x, b); return x; } + +inline GF2EX& operator*=(GF2EX& x, GF2 b) + { mul(x, x, b); return x; } + +inline GF2EX& operator*=(GF2EX& x, long b) + { mul(x, x, b); return x; } + + +void power(GF2EX& x, const GF2EX& a, long e); +inline GF2EX power(const GF2EX& a, long e) + { GF2EX x; power(x, a, e); NTL_OPT_RETURN(GF2EX, x); } + + + + +/************************************************************* + + Division + +**************************************************************/ + +void DivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EX& b); +// q = a/b, r = a%b + +void div(GF2EX& q, const GF2EX& a, const GF2EX& b); +void div(GF2EX& q, const GF2EX& a, const GF2E& b); +void div(GF2EX& q, const GF2EX& a, GF2 b); +void div(GF2EX& q, const GF2EX& a, long b); +// q = a/b + +void rem(GF2EX& r, const GF2EX& a, const GF2EX& b); +// r = a%b + +long divide(GF2EX& q, const GF2EX& a, const GF2EX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const GF2EX& a, const GF2EX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +void InvTrunc(GF2EX& x, const GF2EX& a, long m); +inline GF2EX InvTrunc(const GF2EX& a, long m) + { GF2EX x; InvTrunc(x, a, m); NTL_OPT_RETURN(GF2EX, x); } + +// computes x = a^{-1} % X^m +// constant term must be non-zero + + + +inline GF2EX operator/(const GF2EX& a, const GF2EX& b) + { GF2EX x; div(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator/(const GF2EX& a, const GF2E& b) + { GF2EX x; div(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator/(const GF2EX& a, GF2 b) + { GF2EX x; div(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator/(const GF2EX& a, long b) + { GF2EX x; div(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX& operator/=(GF2EX& x, const GF2EX& b) + { div(x, x, b); return x; } + +inline GF2EX& operator/=(GF2EX& x, const GF2E& b) + { div(x, x, b); return x; } + +inline GF2EX& operator/=(GF2EX& x, GF2 b) + { div(x, x, b); return x; } + +inline GF2EX& operator/=(GF2EX& x, long b) + { div(x, x, b); return x; } + + +inline GF2EX operator%(const GF2EX& a, const GF2EX& b) + { GF2EX x; rem(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX& operator%=(GF2EX& x, const GF2EX& b) + { rem(x, x, b); return x; } + + + + +/*********************************************************** + + GCD's + +************************************************************/ + + +void GCD(GF2EX& x, const GF2EX& a, const GF2EX& b); +inline GF2EX GCD(const GF2EX& a, const GF2EX& b) + { GF2EX x; GCD(x, a, b); NTL_OPT_RETURN(GF2EX, x); } + +// x = GCD(a, b), x is always monic (or zero if a==b==0). + +void XGCD(GF2EX& d, GF2EX& s, GF2EX& t, const GF2EX& a, const GF2EX& b); +// d = gcd(a,b), a s + b t = d + + +/************************************************************* + + Modular Arithmetic without pre-conditioning + +**************************************************************/ + +// arithmetic mod f. +// all inputs and outputs are polynomials of degree less than deg(f). +// ASSUMPTION: f is assumed monic, and deg(f) > 0. +// NOTE: if you want to do many computations with a fixed f, +// use the GF2EXModulus data structure and associated routines below. + + + +void MulMod(GF2EX& x, const GF2EX& a, const GF2EX& b, const GF2EX& f); +inline GF2EX MulMod(const GF2EX& a, const GF2EX& b, const GF2EX& f) + { GF2EX x; MulMod(x, a, b, f); NTL_OPT_RETURN(GF2EX, x); } +// x = (a * b) % f + +void SqrMod(GF2EX& x, const GF2EX& a, const GF2EX& f); +inline GF2EX SqrMod(const GF2EX& a, const GF2EX& f) + { GF2EX x; SqrMod(x, a, f); NTL_OPT_RETURN(GF2EX, x); } +// x = a^2 % f + +void MulByXMod(GF2EX& x, const GF2EX& a, const GF2EX& f); +inline GF2EX MulByXMod(const GF2EX& a, const GF2EX& f) + { GF2EX x; MulByXMod(x, a, f); NTL_OPT_RETURN(GF2EX, x); } +// x = (a * X) mod f + +void InvMod(GF2EX& x, const GF2EX& a, const GF2EX& f); +inline GF2EX InvMod(const GF2EX& a, const GF2EX& f) + { GF2EX x; InvMod(x, a, f); NTL_OPT_RETURN(GF2EX, x); } +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(GF2EX& x, const GF2EX& a, const GF2EX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f +// otherwise, returns 1 and sets x = (a, f) + + + + + +/****************************************************************** + + Modular Arithmetic with Pre-conditioning + +*******************************************************************/ + + +// If you need to do a lot of arithmetic modulo a fixed f, +// build GF2EXModulus F for f. This pre-computes information about f +// that speeds up the computation a great deal. + +class GF2EXModulus { +public: + GF2EXModulus(); + ~GF2EXModulus() { } + + GF2EXModulus(const GF2EX& ff); + + GF2EX f; // the modulus + + operator const GF2EX& () const { return f; } + const GF2EX& val() const { return f; } + + long n; // deg(f) + + long method; // GF2EX_MOD_PLAIN or GF2EX_MOD_MUL + + GF2EX h0; + GF2E hlc; + GF2EX f0; + + OptionalVal< Lazy > tracevec; + // extra level of indirection to ensure class is relocatable + +}; + + +inline long deg(const GF2EXModulus& F) { return F.n; } + +void build(GF2EXModulus& F, const GF2EX& f); + + + +void rem(GF2EX& r, const GF2EX& a, const GF2EXModulus& F); + +void DivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EXModulus& F); + +void div(GF2EX& q, const GF2EX& a, const GF2EXModulus& F); + +void MulMod(GF2EX& c, const GF2EX& a, const GF2EX& b, const GF2EXModulus& F); +inline GF2EX MulMod(const GF2EX& a, const GF2EX& b, const GF2EXModulus& F) + { GF2EX x; MulMod(x, a, b, F); NTL_OPT_RETURN(GF2EX, x); } + +void SqrMod(GF2EX& c, const GF2EX& a, const GF2EXModulus& F); +inline GF2EX SqrMod(const GF2EX& a, const GF2EXModulus& F) + { GF2EX x; SqrMod(x, a, F); NTL_OPT_RETURN(GF2EX, x); } + + +void PowerMod(GF2EX& h, const GF2EX& g, const ZZ& e, const GF2EXModulus& F); + +inline void PowerMod(GF2EX& h, const GF2EX& g, long e, const GF2EXModulus& F) + { PowerMod(h, g, ZZ_expo(e), F); } + +inline GF2EX PowerMod(const GF2EX& g, const ZZ& e, const GF2EXModulus& F) + { GF2EX x; PowerMod(x, g, e, F); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX PowerMod(const GF2EX& g, long e, const GF2EXModulus& F) + { GF2EX x; PowerMod(x, g, e, F); NTL_OPT_RETURN(GF2EX, x); } + +void PowerXMod(GF2EX& hh, const ZZ& e, const GF2EXModulus& F); + +inline void PowerXMod(GF2EX& h, long e, const GF2EXModulus& F) + { PowerXMod(h, ZZ_expo(e), F); } + + +inline GF2EX PowerXMod(const ZZ& e, const GF2EXModulus& F) + { GF2EX x; PowerXMod(x, e, F); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX PowerXMod(long e, const GF2EXModulus& F) + { GF2EX x; PowerXMod(x, e, F); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX operator%(const GF2EX& a, const GF2EXModulus& F) + { GF2EX x; rem(x, a, F); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX& operator%=(GF2EX& x, const GF2EXModulus& F) + { rem(x, x, F); return x; } + +inline GF2EX operator/(const GF2EX& a, const GF2EXModulus& F) + { GF2EX x; div(x, a, F); NTL_OPT_RETURN(GF2EX, x); } + +inline GF2EX& operator/=(GF2EX& x, const GF2EXModulus& F) + { div(x, x, F); return x; } + + + +/***************************************************************** + + vectors of GF2EX's + +*****************************************************************/ + + + +typedef Vec vec_GF2EX; + + + +/******************************************************* + + Evaluation and related problems + +********************************************************/ + + +void BuildFromRoots(GF2EX& x, const vec_GF2E& a); +inline GF2EX BuildFromRoots(const vec_GF2E& a) + { GF2EX x; BuildFromRoots(x, a); NTL_OPT_RETURN(GF2EX, x); } +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + + +void eval(GF2E& b, const GF2EX& f, const GF2E& a); +inline GF2E eval(const GF2EX& f, const GF2E& a) + { GF2E x; eval(x, f, a); NTL_OPT_RETURN(GF2E, x); } +// b = f(a) + +void eval(vec_GF2E& b, const GF2EX& f, const vec_GF2E& a); +inline vec_GF2E eval(const GF2EX& f, const vec_GF2E& a) + { vec_GF2E x; eval(x, f, a); NTL_OPT_RETURN(vec_GF2E, x); } +// b[i] = f(a[i]) + +inline void eval(GF2E& b, const GF2X& f, const GF2E& a) + { conv(b, CompMod(f, rep(a), GF2E::modulus())); } + +inline GF2E eval(const GF2X& f, const GF2E& a) + { GF2E x; eval(x, f, a); NTL_OPT_RETURN(GF2E, x); } +// b = f(a) + + +void interpolate(GF2EX& f, const vec_GF2E& a, const vec_GF2E& b); +inline GF2EX interpolate(const vec_GF2E& a, const vec_GF2E& b) + { GF2EX x; interpolate(x, a, b); NTL_OPT_RETURN(GF2EX, x); } +// computes f such that f(a[i]) = b[i] + + + + +/********************************************************** + + Modular Composition and Minimal Polynomials + +***********************************************************/ + + +// algorithms for computing g(h) mod f + + + + +void CompMod(GF2EX& x, const GF2EX& g, const GF2EX& h, const GF2EXModulus& F); +inline GF2EX +CompMod(const GF2EX& g, const GF2EX& h, const GF2EXModulus& F) + { GF2EX x; CompMod(x, g, h, F); NTL_OPT_RETURN(GF2EX, x); } +// x = g(h) mod f + +void Comp2Mod(GF2EX& x1, GF2EX& x2, const GF2EX& g1, const GF2EX& g2, + const GF2EX& h, const GF2EXModulus& F); +// xi = gi(h) mod f (i=1,2) + +void Comp3Mod(GF2EX& x1, GF2EX& x2, GF2EX& x3, + const GF2EX& g1, const GF2EX& g2, const GF2EX& g3, + const GF2EX& h, const GF2EXModulus& F); +// xi = gi(h) mod f (i=1..3) + + + +// The routine build (see below) which is implicitly called +// by the various compose and UpdateMap routines builds a table +// of polynomials. +// If GF2EXArgBound > 0, then the table is limited in +// size to approximamtely that many KB. +// If GF2EXArgBound <= 0, then it is ignored, and space is allocated +// so as to maximize speed. +// Initially, GF2EXArgBound = 0. + + +// If a single h is going to be used with many g's +// then you should build a GF2EXArgument for h, +// and then use the compose routine below. +// build computes and stores h, h^2, ..., h^m mod f. +// After this pre-computation, composing a polynomial of degree +// roughly n with h takes n/m multiplies mod f, plus n^2 +// scalar multiplies. +// Thus, increasing m increases the space requirement and the pre-computation +// time, but reduces the composition time. +// If GF2EXArgBound > 0, a table of size less than m may be built. + +struct GF2EXArgument { + vec_GF2EX H; +}; + +extern +NTL_CHEAP_THREAD_LOCAL +long GF2EXArgBound; + + +void build(GF2EXArgument& H, const GF2EX& h, const GF2EXModulus& F, long m); + +// m must be > 0, otherwise an error is raised + +void CompMod(GF2EX& x, const GF2EX& g, const GF2EXArgument& H, + const GF2EXModulus& F); + +inline GF2EX +CompMod(const GF2EX& g, const GF2EXArgument& H, const GF2EXModulus& F) + { GF2EX x; CompMod(x, g, H, F); NTL_OPT_RETURN(GF2EX, x); } + + + + +void MinPolySeq(GF2EX& h, const vec_GF2E& a, long m); +inline GF2EX MinPolySeq(const vec_GF2E& a, long m) + { GF2EX x; MinPolySeq(x, a, m); NTL_OPT_RETURN(GF2EX, x); } + + +void MinPolyMod(GF2EX& hh, const GF2EX& g, const GF2EXModulus& F); +inline GF2EX MinPolyMod(const GF2EX& g, const GF2EXModulus& F) + { GF2EX x; MinPolyMod(x, g, F); NTL_OPT_RETURN(GF2EX, x); } + + +void MinPolyMod(GF2EX& hh, const GF2EX& g, const GF2EXModulus& F, long m); +inline GF2EX MinPolyMod(const GF2EX& g, const GF2EXModulus& F, long m) + { GF2EX x; MinPolyMod(x, g, F, m); NTL_OPT_RETURN(GF2EX, x); } + +void ProbMinPolyMod(GF2EX& hh, const GF2EX& g, const GF2EXModulus& F); +inline GF2EX ProbMinPolyMod(const GF2EX& g, const GF2EXModulus& F) + { GF2EX x; ProbMinPolyMod(x, g, F); NTL_OPT_RETURN(GF2EX, x); } + +void ProbMinPolyMod(GF2EX& hh, const GF2EX& g, const GF2EXModulus& F, long m); +inline GF2EX ProbMinPolyMod(const GF2EX& g, const GF2EXModulus& F, long m) + { GF2EX x; ProbMinPolyMod(x, g, F, m); NTL_OPT_RETURN(GF2EX, x); } + +void IrredPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F); +inline GF2EX IrredPolyMod(const GF2EX& g, const GF2EXModulus& F) + { GF2EX x; IrredPolyMod(x, g, F); NTL_OPT_RETURN(GF2EX, x); } + +void IrredPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m); +inline GF2EX IrredPolyMod(const GF2EX& g, const GF2EXModulus& F, long m) + { GF2EX x; IrredPolyMod(x, g, F, m); NTL_OPT_RETURN(GF2EX, x); } + + +struct GF2EXTransMultiplier { + GF2EX f0, fbi, b; + long shamt, shamt_fbi, shamt_b; +}; + +void build(GF2EXTransMultiplier& B, const GF2EX& b, const GF2EXModulus& F); + +void TransMulMod(GF2EX& x, const GF2EX& a, const GF2EXTransMultiplier& B, + const GF2EXModulus& F); + +void UpdateMap(vec_GF2E& x, const vec_GF2E& a, + const GF2EXTransMultiplier& B, const GF2EXModulus& F); + +inline vec_GF2E UpdateMap(const vec_GF2E& a, + const GF2EXTransMultiplier& B, const GF2EXModulus& F) + { vec_GF2E x; UpdateMap(x, a, B, F); NTL_OPT_RETURN(vec_GF2E, x); } + +void ProjectPowers(vec_GF2E& x, const vec_GF2E& a, long k, + const GF2EXArgument& H, const GF2EXModulus& F); +inline vec_GF2E ProjectPowers(const vec_GF2E& a, long k, + const GF2EXArgument& H, const GF2EXModulus& F) + { vec_GF2E x; ProjectPowers(x, a, k, H, F); NTL_OPT_RETURN(vec_GF2E, x); } + +void ProjectPowers(vec_GF2E& x, const vec_GF2E& a, long k, const GF2EX& h, + const GF2EXModulus& F); +inline vec_GF2E ProjectPowers(const vec_GF2E& a, long k, + const GF2EX& H, const GF2EXModulus& F) + { vec_GF2E x; ProjectPowers(x, a, k, H, F); NTL_OPT_RETURN(vec_GF2E, x); } + +inline void project(GF2E& x, const vec_GF2E& a, const GF2EX& b) + { InnerProduct(x, a, b.rep); } + +inline GF2E project(const vec_GF2E& a, const GF2EX& b) + { GF2E x; InnerProduct(x, a, b.rep); NTL_OPT_RETURN(GF2E, x); } + +/********************************************************** + + Modular Composition and Minimal Polynomials + in towers + +***********************************************************/ + +// composition + +void CompTower(GF2EX& x, const GF2X& g, const GF2EXArgument& A, + const GF2EXModulus& F); + +inline GF2EX CompTower(const GF2X& g, const GF2EXArgument& A, + const GF2EXModulus& F) + { GF2EX x; CompTower(x, g, A, F); NTL_OPT_RETURN(GF2EX, x); } + +void CompTower(GF2EX& x, const GF2X& g, const GF2EX& h, + const GF2EXModulus& F); + +inline GF2EX CompTower(const GF2X& g, const GF2EX& h, + const GF2EXModulus& F) + { GF2EX x; CompTower(x, g, h, F); NTL_OPT_RETURN(GF2EX, x); } + +// prob min poly + +void ProbMinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, + long m); + +inline GF2X ProbMinPolyTower(const GF2EX& g, const GF2EXModulus& F, + long m) + { GF2X x; ProbMinPolyTower(x, g, F, m); NTL_OPT_RETURN(GF2X, x); } + +inline void ProbMinPolyTower(GF2X& h, const GF2EX& g, + const GF2EXModulus& F) + { ProbMinPolyTower(h, g, F, deg(F)*GF2E::degree()); } + +inline GF2X ProbMinPolyTower(const GF2EX& g, const GF2EXModulus& F) + { GF2X x; ProbMinPolyTower(x, g, F); NTL_OPT_RETURN(GF2X, x); } + + +// min poly + + +void MinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, + long m); + +inline GF2X MinPolyTower(const GF2EX& g, const GF2EXModulus& F, + long m) + { GF2X x; MinPolyTower(x, g, F, m); NTL_OPT_RETURN(GF2X, x); } + +inline void MinPolyTower(GF2X& h, const GF2EX& g, + const GF2EXModulus& F) + { MinPolyTower(h, g, F, deg(F)*GF2E::degree()); } + +inline GF2X MinPolyTower(const GF2EX& g, const GF2EXModulus& F) + { GF2X x; MinPolyTower(x, g, F); NTL_OPT_RETURN(GF2X, x); } + +// irred poly + + +void IrredPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, + long m); + +inline GF2X IrredPolyTower(const GF2EX& g, const GF2EXModulus& F, + long m) + { GF2X x; IrredPolyTower(x, g, F, m); NTL_OPT_RETURN(GF2X, x); } + +inline void IrredPolyTower(GF2X& h, const GF2EX& g, + const GF2EXModulus& F) + { IrredPolyTower(h, g, F, deg(F)*GF2E::degree()); } + +inline GF2X IrredPolyTower(const GF2EX& g, const GF2EXModulus& F) + { GF2X x; IrredPolyTower(x, g, F); NTL_OPT_RETURN(GF2X, x); } + + + +/***************************************************************** + + Traces, norms, resultants + +******************************************************************/ + +void TraceVec(vec_GF2E& S, const GF2EX& f); + +inline vec_GF2E TraceVec(const GF2EX& f) + { vec_GF2E x; TraceVec(x, f); NTL_OPT_RETURN(vec_GF2E, x); } + + +void TraceMod(GF2E& x, const GF2EX& a, const GF2EXModulus& F); + +inline GF2E TraceMod(const GF2EX& a, const GF2EXModulus& F) + { GF2E x; TraceMod(x, a, F); NTL_OPT_RETURN(GF2E, x); } + +void TraceMod(GF2E& x, const GF2EX& a, const GF2EX& f); + +inline GF2E TraceMod(const GF2EX& a, const GF2EX& f) + { GF2E x; TraceMod(x, a, f); NTL_OPT_RETURN(GF2E, x); } + + + + + +void NormMod(GF2E& x, const GF2EX& a, const GF2EX& f); + +inline GF2E NormMod(const GF2EX& a, const GF2EX& f) + { GF2E x; NormMod(x, a, f); NTL_OPT_RETURN(GF2E, x); } + +void resultant(GF2E& rres, const GF2EX& a, const GF2EX& b); + +inline GF2E resultant(const GF2EX& a, const GF2EX& b) + { GF2E x; resultant(x, a, b); NTL_OPT_RETURN(GF2E, x); } + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/GF2EXFactoring.h b/thirdparty/linux/ntl/include/NTL/GF2EXFactoring.h new file mode 100644 index 0000000000..4b282573c0 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/GF2EXFactoring.h @@ -0,0 +1,245 @@ + + +#ifndef NTL_GF2EXFactoring__H +#define NTL_GF2EXFactoring__H + +#include +#include + +NTL_OPEN_NNS + + +/************************************************************ + + factorization routines + +************************************************************/ + + + + + +void SquareFreeDecomp(vec_pair_GF2EX_long& u, const GF2EX& f); +inline vec_pair_GF2EX_long SquareFreeDecomp(const GF2EX& f) + { vec_pair_GF2EX_long x; SquareFreeDecomp(x, f); return x; } + + +// Performs square-free decomposition. +// f must be monic. +// If f = prod_i g_i^i, then u is set to a lest of pairs (g_i, i). +// The list is is increasing order of i, with trivial terms +// (i.e., g_i = 1) deleted. + + +void FindRoots(vec_GF2E& x, const GF2EX& f); +inline vec_GF2E FindRoots(const GF2EX& f) + { vec_GF2E x; FindRoots(x, f); return x; } + + +// f is monic, and has deg(f) distinct roots. +// returns the list of roots + +void FindRoot(GF2E& root, const GF2EX& f); +inline GF2E FindRoot(const GF2EX& f) + { GF2E x; FindRoot(x, f); return x; } + + +// finds a single root of f. +// assumes that f is monic and splits into distinct linear factors + + +void SFBerlekamp(vec_GF2EX& factors, const GF2EX& f, long verbose=0); +inline vec_GF2EX SFBerlekamp(const GF2EX& f, long verbose=0) + { vec_GF2EX x; SFBerlekamp(x, f, verbose); return x; } + + +// Assumes f is square-free and monic. +// returns list of factors of f. +// Uses "Berlekamp" appraoch. + + +void berlekamp(vec_pair_GF2EX_long& factors, const GF2EX& f, long verbose=0); +inline vec_pair_GF2EX_long +berlekamp(const GF2EX& f, long verbose=0) + { vec_pair_GF2EX_long x; berlekamp(x, f, verbose); return x; } + + +// returns a list of factors, with multiplicities. +// f must be monic. +// Uses "Berlekamp" appraoch. + + +extern +NTL_CHEAP_THREAD_LOCAL +long GF2EX_BlockingFactor; +// Controls GCD blocking for DDF. + +void DDF(vec_pair_GF2EX_long& factors, const GF2EX& f, const GF2EX& h, + long verbose=0); + +inline vec_pair_GF2EX_long DDF(const GF2EX& f, const GF2EX& h, + long verbose=0) + { vec_pair_GF2EX_long x; DDF(x, f, h, verbose); return x; } + + +// Performs distinct-degree factorization. +// Assumes f is monic and square-free, and h = X^p mod f +// Obsolete: see NewDDF, below. + +extern +NTL_CHEAP_THREAD_LOCAL +long GF2EX_GCDTableSize; /* = 4 */ +// Controls GCD blocking for NewDDF + + +extern +NTL_CHEAP_THREAD_LOCAL +double GF2EXFileThresh; +// external files are used for baby/giant steps if size +// of these tables exceeds GF2EXFileThresh KB. + + +void NewDDF(vec_pair_GF2EX_long& factors, const GF2EX& f, const GF2EX& h, + long verbose=0); +inline vec_pair_GF2EX_long NewDDF(const GF2EX& f, const GF2EX& h, + long verbose=0) + { vec_pair_GF2EX_long x; NewDDF(x, f, h, verbose); return x; } + + +// same as above, but uses baby-step/giant-step method + + +void EDF(vec_GF2EX& factors, const GF2EX& f, const GF2EX& b, + long d, long verbose=0); +inline vec_GF2EX EDF(const GF2EX& f, const GF2EX& b, + long d, long verbose=0) + { vec_GF2EX x; EDF(x, f, b, d, verbose); return x; } + + +// Performs equal-degree factorization. +// f is monic, square-free, and all irreducible factors have same degree. +// b = X^p mod f. +// d = degree of irreducible factors of f +// Space for the trace-map computation can be controlled via ComposeBound. + + + +void RootEDF(vec_GF2EX& factors, const GF2EX& f, long verbose=0); +inline vec_GF2EX RootEDF(const GF2EX& f, long verbose=0) + { vec_GF2EX x; RootEDF(x, f, verbose); return x; } + + +// EDF for d==1 + +void SFCanZass(vec_GF2EX& factors, const GF2EX& f, long verbose=0); +inline vec_GF2EX SFCanZass(const GF2EX& f, long verbose=0) + { vec_GF2EX x; SFCanZass(x, f, verbose); return x; } + + +// Assumes f is monic and square-free. +// returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach. + + + +void CanZass(vec_pair_GF2EX_long& factors, const GF2EX& f, long verbose=0); +inline vec_pair_GF2EX_long CanZass(const GF2EX& f, long verbose=0) + { vec_pair_GF2EX_long x; CanZass(x, f, verbose); return x; } + + +// returns a list of factors, with multiplicities. +// f must be monic. +// Uses "Cantor/Zassenhaus" approach. + + +void mul(GF2EX& f, const vec_pair_GF2EX_long& v); +inline GF2EX mul(const vec_pair_GF2EX_long& v) + { GF2EX x; mul(x, v); return x; } + + +// multiplies polynomials, with multiplicities + + +/************************************************************* + + irreducible poly's: tests and constructions + +**************************************************************/ + +long ProbIrredTest(const GF2EX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test +// the test can err only if f is reducible, and the +// error probability is bounded by p^{-iter}. + +long DetIrredTest(const GF2EX& f); + +// performs a recursive deterministic irreducibility test +// fast in the worst-case (when input is irreducible). + +long IterIrredTest(const GF2EX& f); + +// performs an iterative deterministic irreducibility test, +// based on DDF. Fast on average (when f has a small factor). + +void BuildIrred(GF2EX& f, long n); +inline GF2EX BuildIrred_GF2EX(long n) + { GF2EX x; BuildIrred(x, n); NTL_OPT_RETURN(GF2EX, x); } + + + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(GF2EX& f, const GF2EX& g); +inline GF2EX BuildRandomIrred(const GF2EX& g) + { GF2EX x; BuildRandomIrred(x, g); NTL_OPT_RETURN(GF2EX, x); } + + +// g is a monic irreducible polynomial. +// constructs a random monic irreducible polynomial f of the same degree. + + +long RecComputeDegree(const GF2EX& h, const GF2EXModulus& F); + +// f = F.f is assumed to be an "equal degree" polynomial +// h = X^p mod f +// the common degree of the irreducible factors of f is computed +// This routine is useful in counting points on elliptic curves + + +long IterComputeDegree(const GF2EX& h, const GF2EXModulus& F); + + +void TraceMap(GF2EX& w, const GF2EX& a, long d, const GF2EXModulus& F, + const GF2EX& b); +inline GF2EX TraceMap(const GF2EX& a, long d, const GF2EXModulus& F, + const GF2EX& b) + { GF2EX x; TraceMap(x, a, d, F, b); return x; } + + +// w = a+a^q+...+^{q^{d-1}} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see ) + + + +void PowerCompose(GF2EX& w, const GF2EX& a, long d, const GF2EXModulus& F); +inline GF2EX PowerCompose(const GF2EX& a, long d, const GF2EXModulus& F) + { GF2EX x; PowerCompose(x, a, d, F); return x; } + + +// w = X^{q^d} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see ) + +void PlainFrobeniusMap(GF2EX& h, const GF2EXModulus& F); +void ComposeFrobeniusMap(GF2EX& y, const GF2EXModulus& F); +void FrobeniusMap(GF2EX& h, const GF2EXModulus& F); +inline GF2EX FrobeniusMap(const GF2EXModulus& F) + { GF2EX x; FrobeniusMap(x, F); return x; } +long UseComposeFrobenius(long d, long n); + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/GF2X.h b/thirdparty/linux/ntl/include/NTL/GF2X.h new file mode 100644 index 0000000000..df48395adc --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/GF2X.h @@ -0,0 +1,762 @@ + +#ifndef NTL_GF2X__H +#define NTL_GF2X__H + +#include +#include +#include +#include +#include + +NTL_OPEN_NNS + +class GF2E; // forward declaration +class GF2XModulus; + +class GF2X { +public: +typedef GF2 coeff_type; +typedef GF2E residue_type; +typedef GF2XModulus modulus_type; + + +WordVector xrep; + +typedef vec_GF2 VectorBaseType; + + +GF2X() { } + +explicit GF2X(long a) { *this = a; } +explicit GF2X(GF2 a) { *this = a; } + +~GF2X() { } + +GF2X(INIT_SIZE_TYPE, long n); + +GF2X& operator=(const GF2X& a) { xrep = a.xrep; return *this; } + +inline GF2X& operator=(GF2 a); +inline GF2X& operator=(long a); + +void normalize(); + +static const GF2X& zero(); + +void kill() { xrep.kill(); } + +void SetMaxLength(long n); + + + +void SetLength(long n); +ref_GF2 operator[](long i); +const GF2 operator[](long i) const; + + + + +static NTL_CHEAP_THREAD_LOCAL long HexOutput; + +inline GF2X(long i, GF2 c); +inline GF2X(long i, long c); + +inline GF2X(INIT_MONO_TYPE, long i, GF2 c); +inline GF2X(INIT_MONO_TYPE, long i, long c); +inline GF2X(INIT_MONO_TYPE, long i); + +GF2X(GF2X& x, INIT_TRANS_TYPE) : xrep(x.xrep, INIT_TRANS) { } +// This should only be used for simple, local variables +// that are not be subject to special memory management. + + +void swap(GF2X& x) { xrep.swap(x.xrep); } + + + + + +// mainly for internal consumption by GF2XWatcher + +void KillBig() { xrep.KillBig(); } + +}; + + +long IsZero(const GF2X& a); + +long IsOne(const GF2X& a); + +long IsX(const GF2X& a); + +const GF2 coeff(const GF2X& a, long i); + +const GF2 LeadCoeff(const GF2X& a); + +const GF2 ConstTerm(const GF2X& a); + + +inline void clear(GF2X& x) +{ x.xrep.ZeroLength(); } + +void set(GF2X& x); + +void SetX(GF2X& x); + +void SetCoeff(GF2X& x, long i); + +void SetCoeff(GF2X& x, long i, GF2 a); +void SetCoeff(GF2X& x, long i, long a); + +inline GF2X::GF2X(long i, GF2 a) { SetCoeff(*this, i, a); } +inline GF2X::GF2X(long i, long a) { SetCoeff(*this, i, a); } + +inline GF2X::GF2X(INIT_MONO_TYPE, long i, GF2 a) { SetCoeff(*this, i, a); } +inline GF2X::GF2X(INIT_MONO_TYPE, long i, long a) { SetCoeff(*this, i, a); } +inline GF2X::GF2X(INIT_MONO_TYPE, long i) { SetCoeff(*this, i); } + + +inline void swap(GF2X& a, GF2X& b) { a.swap(b); } + +long deg(const GF2X& aa); + +long weight(const GF2X& a); + +long operator==(const GF2X& a, const GF2X& b); + +inline long operator!=(const GF2X& a, const GF2X& b) + { return !(a == b); } + +long operator==(const GF2X& a, GF2 b); +long operator==(const GF2X& a, long b); + +inline long operator==(GF2 a, const GF2X& b) { return b == a; } +inline long operator==(long a, const GF2X& b) { return b == a; } + +inline long operator!=(const GF2X& a, GF2 b) { return !(a == b); } +inline long operator!=(const GF2X& a, long b) { return !(a == b); } +inline long operator!=(GF2 a, const GF2X& b) { return !(a == b); } +inline long operator!=(long a, const GF2X& b) { return !(a == b); } + + +NTL_SNS istream & operator>>(NTL_SNS istream& s, GF2X& a); + +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const GF2X& a); + + + + +void random(GF2X& x, long n); +inline GF2X random_GF2X(long n) + { GF2X x; random(x, n); NTL_OPT_RETURN(GF2X, x); } + + + + +void add(GF2X& x, const GF2X& a, const GF2X& b); +void add(GF2X& x, const GF2X& a, GF2 b); +void add(GF2X& x, const GF2X& a, long b); + +inline void add(GF2X& x, GF2 a, const GF2X& b) { add(x, b, a); } +inline void add(GF2X& x, long a, const GF2X& b) { add(x, b, a); } + +inline void sub(GF2X& x, const GF2X& a, const GF2X& b) { add(x, a, b); } +inline void sub(GF2X& x, const GF2X& a, GF2 b) { add(x, a, b); } +inline void sub(GF2X& x, const GF2X& a, long b) { add(x, a, b); } +inline void sub(GF2X& x, GF2 a, const GF2X& b) { add(x, a, b); } +inline void sub(GF2X& x, long a, const GF2X& b) { add(x, a, b); } + +inline void negate(GF2X& x, const GF2X& a) { x = a; } + +inline GF2X operator+(const GF2X& a, const GF2X& b) + { GF2X x; add(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator+(const GF2X& a, GF2 b) + { GF2X x; add(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator+(const GF2X& a, long b) + { GF2X x; add(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator+(GF2 a, const GF2X& b) + { GF2X x; add(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator+(long a, const GF2X& b) + { GF2X x; add(x, a, b); NTL_OPT_RETURN(GF2X, x); } + + +inline GF2X operator-(const GF2X& a, const GF2X& b) + { GF2X x; sub(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator-(const GF2X& a, GF2 b) + { GF2X x; sub(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator-(const GF2X& a, long b) + { GF2X x; sub(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator-(GF2 a, const GF2X& b) + { GF2X x; sub(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator-(long a, const GF2X& b) + { GF2X x; sub(x, a, b); NTL_OPT_RETURN(GF2X, x); } + + +inline GF2X& operator+=(GF2X& x, const GF2X& b) + { add(x, x, b); return x; } + +inline GF2X& operator+=(GF2X& x, GF2 b) + { add(x, x, b); return x; } + +inline GF2X& operator+=(GF2X& x, long b) + { add(x, x, b); return x; } + +inline GF2X& operator-=(GF2X& x, const GF2X& b) + { sub(x, x, b); return x; } + +inline GF2X& operator-=(GF2X& x, GF2 b) + { sub(x, x, b); return x; } + +inline GF2X& operator-=(GF2X& x, long b) + { sub(x, x, b); return x; } + + +inline GF2X operator-(const GF2X& a) + { GF2X x; negate(x, a); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X& operator++(GF2X& x) { add(x, x, 1); return x; } +inline void operator++(GF2X& x, int) { add(x, x, 1); } +inline GF2X& operator--(GF2X& x) { sub(x, x, 1); return x; } +inline void operator--(GF2X& x, int) { sub(x, x, 1); } + + +void mul(GF2X& c, const GF2X& a, const GF2X& b); +void OldMul(GF2X& c, const GF2X& a, const GF2X& b); + +void mul(GF2X& x, const GF2X& a, GF2 b); +void mul(GF2X& x, const GF2X& a, long b); + +inline void mul(GF2X& x, GF2 a, const GF2X& b) { mul(x, b, a); } +inline void mul(GF2X& x, long a, const GF2X& b) { mul(x, b, a); } + +void MulByX(GF2X& x, const GF2X& a); +inline GF2X MulByX(const GF2X& a) + { GF2X x; MulByX(x, a); NTL_OPT_RETURN(GF2X, x); } + + +void sqr(GF2X& c, const GF2X& a); + +inline GF2X sqr(const GF2X& a) + { GF2X x; sqr(x, a); NTL_OPT_RETURN(GF2X, x); } + +void trunc(GF2X& x, const GF2X& a, long m); +inline GF2X trunc(const GF2X& a, long m) + { GF2X x; trunc(x, a, m); NTL_OPT_RETURN(GF2X, x); } + + +inline GF2X operator*(const GF2X& a, const GF2X& b) + { GF2X x; mul(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator*(const GF2X& a, GF2 b) + { GF2X x; mul(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator*(const GF2X& a, long b) + { GF2X x; mul(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator*(GF2 a, const GF2X& b) + { GF2X x; mul(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator*(long a, const GF2X& b) + { GF2X x; mul(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X& operator*=(GF2X& x, const GF2X& b) + { mul(x, x, b); return x; } + +inline GF2X& operator*=(GF2X& x, GF2 b) + { mul(x, x, b); return x; } + +inline GF2X& operator*=(GF2X& x, long b) + { mul(x, x, b); return x; } + +void power(GF2X& x, const GF2X& a, long e); // x = a^e (e >= 0) +inline GF2X power(const GF2X& a, long e) + { GF2X x; power(x, a, e); NTL_OPT_RETURN(GF2X, x); } + + + +typedef Vec vec_GF2X; + +void LeftShift(GF2X& c, const GF2X& a, long n); +inline GF2X LeftShift(const GF2X& a, long n) + { GF2X x; LeftShift(x, a, n); NTL_OPT_RETURN(GF2X, x); } + +void ShiftAdd(GF2X& c, const GF2X& a, long n); + + +void RightShift(GF2X& c, const GF2X& a, long n); +inline GF2X RightShift(const GF2X& a, long n) + { GF2X x; RightShift(x, a, n); NTL_OPT_RETURN(GF2X, x); } + + +#ifndef NTL_TRANSITION + +inline GF2X operator>>(const GF2X& a, long n) + { GF2X x; RightShift(x, a, n); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator<<(const GF2X& a, long n) + { GF2X x; LeftShift(x, a, n); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X& operator<<=(GF2X& x, long n) + { LeftShift(x, x, n); return x; } + +inline GF2X& operator>>=(GF2X& x, long n) + { RightShift(x, x, n); return x; } + +#endif + + + +void CopyReverse(GF2X& c, const GF2X& a, long hi); +// c[0..hi] = reverse(a[0..hi]), with zero fill as necessary + +inline void reverse(GF2X& c, const GF2X& a, long hi) +{ CopyReverse(c, a, hi); } + +inline GF2X reverse(const GF2X& a, long hi) + { GF2X x; reverse(x, a, hi); NTL_OPT_RETURN(GF2X, x); } + +inline void reverse(GF2X& c, const GF2X& a) +{ CopyReverse(c, a, deg(a)); } + + +inline GF2X reverse(const GF2X& a) + { GF2X x; reverse(x, a); NTL_OPT_RETURN(GF2X, x); } + +void InvTrunc(GF2X& c, const GF2X& a, long e); + + +inline GF2X InvTrunc(const GF2X& a, long e) + { GF2X x; InvTrunc(x, a, e); NTL_OPT_RETURN(GF2X, x); } + + +class GF2XModulus { + +public: + GF2XModulus(); + + GF2XModulus(const GF2XModulus&); + GF2XModulus& operator=(const GF2XModulus&); + + GF2XModulus(const GF2X& ff); + + GF2X f; // the modulus + + operator const GF2X& () const { return f; } + const GF2X& val() const { return f; } + + long n; // deg(f) + long sn; // f.xrep.length() + long posn; // n - NTL_BITS_PER_LONG*(sn-1); + + long k3; // used for trinomials and pentanomials + long k2; + long k1; + + long size; // word length of residues + + long WordLength() const { return size; } + + _ntl_ulong msk; // mask of high bits of residues + + long method; + + vec_GF2X stab; + + UniqueArray stab_ptr; + UniqueArray stab_cnt; + UniqueArray stab1; + + + GF2X h0, f0; + + OptionalVal< Lazy > tracevec; + +}; + + +inline long deg(const GF2XModulus& F) { return F.n; } + + +void build(GF2XModulus& F, const GF2X& f); + +void rem(GF2X& r, const GF2X& a, const GF2XModulus& F); + +void DivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2XModulus& F); + +void div(GF2X& q, const GF2X& a, const GF2XModulus& F); + +void PlainDivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2X& b); +void PlainDiv(GF2X& q, const GF2X& a, const GF2X& b); +void PlainRem(GF2X& r, const GF2X& a, const GF2X& b); + + +void MulMod(GF2X& c, const GF2X& a, const GF2X& b, const GF2XModulus& F); +inline GF2X MulMod(const GF2X& a, const GF2X& b, const GF2XModulus& F) + { GF2X x; MulMod(x, a, b, F); NTL_OPT_RETURN(GF2X, x); } + +void SqrMod(GF2X& c, const GF2X& a, const GF2XModulus& F); +inline GF2X SqrMod(const GF2X& a, const GF2XModulus& F) + { GF2X x; SqrMod(x, a, F); NTL_OPT_RETURN(GF2X, x); } + +void MulByXMod(GF2X& c, const GF2X& a, const GF2XModulus& F); +inline GF2X MulByXMod(const GF2X& a, const GF2XModulus& F) + { GF2X x; MulByXMod(x, a, F); NTL_OPT_RETURN(GF2X, x); } + + + +void MulMod(GF2X& c, const GF2X& a, const GF2X& b, const GF2X& f); +inline GF2X MulMod(const GF2X& a, const GF2X& b, const GF2X& f) + { GF2X x; MulMod(x, a, b, f); NTL_OPT_RETURN(GF2X, x); } + +void SqrMod(GF2X& c, const GF2X& a, const GF2X& f); +inline GF2X SqrMod(const GF2X& a, const GF2X& f) + { GF2X x; SqrMod(x, a, f); NTL_OPT_RETURN(GF2X, x); } + +void MulByXMod(GF2X& c, const GF2X& a, const GF2X& f); +inline GF2X MulByXMod(const GF2X& a, const GF2X& f) + { GF2X x; MulByXMod(x, a, f); NTL_OPT_RETURN(GF2X, x); } + + +void InvMod(GF2X& c, const GF2X& a, const GF2X& f); +inline GF2X InvMod(const GF2X& a, const GF2X& f) + { GF2X x; InvMod(x, a, f); NTL_OPT_RETURN(GF2X, x); } + +long InvModStatus(GF2X& c, const GF2X& a, const GF2X& f); + +inline long InvModStatus(GF2X& c, const GF2X& a, const GF2XModulus& F) + { return InvModStatus(c, a, F.f); } + + +void PowerMod(GF2X& h, const GF2X& g, const ZZ& e, const GF2XModulus& F); +inline void PowerMod(GF2X& x, const GF2X& g, long e, const GF2XModulus& F) + { PowerMod(x, g, ZZ_expo(e), F); } + +void PowerXMod(GF2X& hh, const ZZ& e, const GF2XModulus& F); +inline void PowerXMod(GF2X& x, long e, const GF2XModulus& F) + { PowerXMod(x, ZZ_expo(e), F); } + +inline GF2X PowerMod(const GF2X& g, const ZZ& e, const GF2XModulus& F) + { GF2X x; PowerMod(x, g, e, F); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X PowerMod(const GF2X& g, long e, const GF2XModulus& F) + { GF2X x; PowerMod(x, g, e, F); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X PowerXMod(const ZZ& e, const GF2XModulus& F) + { GF2X x; PowerXMod(x, e, F); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X PowerXMod(long e, const GF2XModulus& F) + { GF2X x; PowerXMod(x, e, F); NTL_OPT_RETURN(GF2X, x); } + + + +inline GF2X operator%(const GF2X& a, const GF2XModulus& F) + { GF2X x; rem(x, a, F); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X& operator%=(GF2X& x, const GF2XModulus& F) + { rem(x, x, F); return x; } + + +inline GF2X operator/(const GF2X& a, const GF2XModulus& F) + { GF2X x; div(x, a, F); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X& operator/=(GF2X& x, const GF2XModulus& F) + { div(x, x, F); return x; } + + + +void DivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2X& b); + +void div(GF2X& q, const GF2X& a, const GF2X& b); + +void div(GF2X& q, const GF2X& a, GF2 b); +void div(GF2X& q, const GF2X& a, long b); + +void rem(GF2X& r, const GF2X& a, const GF2X& b); + + + + +inline GF2X operator/(const GF2X& a, const GF2X& b) + { GF2X x; div(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator/(const GF2X& a, GF2 b) + { GF2X x; div(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X operator/(const GF2X& a, long b) + { GF2X x; div(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X& operator/=(GF2X& x, GF2 b) + { div(x, x, b); return x; } + +inline GF2X& operator/=(GF2X& x, long b) + { div(x, x, b); return x; } + +inline GF2X& operator/=(GF2X& x, const GF2X& b) + { div(x, x, b); return x; } + + +inline GF2X operator%(const GF2X& a, const GF2X& b) + { GF2X x; rem(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X& operator%=(GF2X& x, const GF2X& b) + { rem(x, x, b); return x; } + + +void GCD(GF2X& d, const GF2X& a, const GF2X& b); +inline GF2X GCD(const GF2X& a, const GF2X& b) + { GF2X x; GCD(x, a, b); NTL_OPT_RETURN(GF2X, x); } + +void OldGCD(GF2X& d, const GF2X& a, const GF2X& b); + + +void XGCD(GF2X& d, GF2X& s, GF2X& t, const GF2X& a, const GF2X& b); + +void OldXGCD(GF2X& d, GF2X& s, GF2X& t, const GF2X& a, const GF2X& b); + + +void diff(GF2X& c, const GF2X& a); +inline GF2X diff(const GF2X& a) + { GF2X x; diff(x, a); NTL_OPT_RETURN(GF2X, x); } + +void conv(GF2X& c, long a); +void conv(GF2X& c, GF2 a); +void conv(GF2X& x, const vec_GF2& a); +inline void conv(GF2X& x, const ZZ& a) + { conv(x, to_GF2(a)); } + +void conv(vec_GF2& x, const GF2X& a); + +inline GF2X to_GF2X(long a) + { GF2X x; conv(x, a); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X to_GF2X(GF2 a) + { GF2X x; conv(x, a); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X to_GF2X(const vec_GF2& a) + { GF2X x; conv(x, a); NTL_OPT_RETURN(GF2X, x); } + +inline GF2X to_GF2X(const ZZ& a) + { GF2X x; conv(x, a); NTL_OPT_RETURN(GF2X, x); } + +inline vec_GF2 to_vec_GF2(const GF2X& a) + { vec_GF2 x; conv(x, a); NTL_OPT_RETURN(vec_GF2, x); } + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(GF2X& x, const GF2X& a) + { x = a; } + +class ZZX; +void conv(GF2X& x, const ZZX& a); +void conv(ZZX& x, const GF2X& a); + + +/* ------------------------------------- */ + + + + + +inline GF2X& GF2X::operator=(long a) + { conv(*this, a); return *this; } + +inline GF2X& GF2X::operator=(GF2 a) + { conv(*this, a); return *this; } + +void VectorCopy(vec_GF2& x, const GF2X& a, long n); + +inline vec_GF2 VectorCopy(const GF2X& a, long n) + { vec_GF2 x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_GF2, x); } + + +void MulTrunc(GF2X& c, const GF2X& a, const GF2X& b, long n); +inline GF2X MulTrunc(const GF2X& a, const GF2X& b, long n) + { GF2X x; MulTrunc(x, a, b, n); NTL_OPT_RETURN(GF2X, x); } + +void SqrTrunc(GF2X& c, const GF2X& a, long n); +inline GF2X SqrTrunc(const GF2X& a, long n) + { GF2X x; SqrTrunc(x, a, n); NTL_OPT_RETURN(GF2X, x); } + +long divide(GF2X& q, const GF2X& a, const GF2X& b); + +long divide(const GF2X& a, const GF2X& b); + + +/*** modular composition routines and data structures ***/ + +struct GF2XArgument { + vec_GF2X H; +}; + + +void CompMod(GF2X& x, const GF2X& g, + const GF2XArgument& A, const GF2XModulus& F); + +inline GF2X CompMod(const GF2X& g, + const GF2XArgument& A, const GF2XModulus& F) + { GF2X x; CompMod(x, g, A, F); NTL_OPT_RETURN(GF2X, x); } + +void build(GF2XArgument& A, const GF2X& h, const GF2XModulus& F, long m); + +void CompMod(GF2X& x, const GF2X& g, const GF2X& h, const GF2XModulus& F); +inline GF2X CompMod(const GF2X& g, const GF2X& h, const GF2XModulus& F) + { GF2X x; CompMod(x, g, h, F); NTL_OPT_RETURN(GF2X, x); } + +void Comp2Mod(GF2X& x1, GF2X& x2, const GF2X& g1, const GF2X& g2, + const GF2X& h, const GF2XModulus& F); + +void Comp3Mod(GF2X& x1, GF2X& x2, GF2X& x3, + const GF2X& g1, const GF2X& g2, const GF2X& g3, + const GF2X& h, const GF2XModulus& F); + + +void MinPolySeq(GF2X& h, const vec_GF2& a, long m); +inline GF2X MinPolySeq(const vec_GF2& a, long m) + { GF2X x; MinPolySeq(x, a, m); NTL_OPT_RETURN(GF2X, x); } + +void ProbMinPolyMod(GF2X& hh, const GF2X& g, const GF2XModulus& F); +inline GF2X ProbMinPolyMod(const GF2X& g, const GF2XModulus& F) + { GF2X x; ProbMinPolyMod(x, g, F); NTL_OPT_RETURN(GF2X, x); } + +void ProbMinPolyMod(GF2X& hh, const GF2X& g, const GF2XModulus& F, long m); +inline GF2X ProbMinPolyMod(const GF2X& g, const GF2XModulus& F, long m) + { GF2X x; ProbMinPolyMod(x, g, F, m); NTL_OPT_RETURN(GF2X, x); } + +void MinPolyMod(GF2X& hh, const GF2X& g, const GF2XModulus& F); +inline GF2X MinPolyMod(const GF2X& g, const GF2XModulus& F) + { GF2X x; MinPolyMod(x, g, F); NTL_OPT_RETURN(GF2X, x); } + +void MinPolyMod(GF2X& hh, const GF2X& g, const GF2XModulus& F, long m); +inline GF2X MinPolyMod(const GF2X& g, const GF2XModulus& F, long m) + { GF2X x; MinPolyMod(x, g, F, m); NTL_OPT_RETURN(GF2X, x); } + +void IrredPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F); +inline GF2X IrredPolyMod(const GF2X& g, const GF2XModulus& F) + { GF2X x; IrredPolyMod(x, g, F); NTL_OPT_RETURN(GF2X, x); } + +void IrredPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m); +inline GF2X IrredPolyMod(const GF2X& g, const GF2XModulus& F, long m) + { GF2X x; IrredPolyMod(x, g, F, m); NTL_OPT_RETURN(GF2X, x); } + + +// undocumented stuff: + +void MinPolyInternal(GF2X& h, const GF2X& x, long m); + +void OldMinPolyInternal(GF2X& h, const GF2X& x, long m); + + + +struct GF2XTransMultiplier { + GF2X f0, fbi, b; + long shamt, shamt_fbi, shamt_b; +}; + +void build(GF2XTransMultiplier& B, const GF2X& b, const GF2XModulus& F); + +void UpdateMap(vec_GF2& x, const vec_GF2& a, const GF2XTransMultiplier& B, + const GF2XModulus& F); + +inline vec_GF2 UpdateMap(const vec_GF2& a, + const GF2XTransMultiplier& B, const GF2XModulus& F) + { vec_GF2 x; UpdateMap(x, a, B, F); NTL_OPT_RETURN(vec_GF2, x); } + +inline void project(ref_GF2 x, const vec_GF2& a, const GF2X& b) + { x = to_GF2(InnerProduct(a.rep, b.xrep)); } + +inline GF2 project(const vec_GF2& a, const GF2X& b) + { return to_GF2(InnerProduct(a.rep, b.xrep)); } + + +void ProjectPowers(vec_GF2& x, const vec_GF2& a, long k, + const GF2XArgument& H, const GF2XModulus& F); + +inline vec_GF2 ProjectPowers(const vec_GF2& a, long k, + const GF2XArgument& H, const GF2XModulus& F) + { vec_GF2 x; ProjectPowers(x, a, k, H, F); + NTL_OPT_RETURN(vec_GF2, x); } + +void ProjectPowers(vec_GF2& x, const vec_GF2& a, long k, const GF2X& h, + const GF2XModulus& F); + +inline vec_GF2 ProjectPowers(const vec_GF2& a, long k, + const GF2X& H, const GF2XModulus& F) + { vec_GF2 x; ProjectPowers(x, a, k, H, F); + NTL_OPT_RETURN(vec_GF2, x); } + +void TraceVec(vec_GF2& S, const GF2X& f); + +inline vec_GF2 TraceVec(const GF2X& f) + { vec_GF2 x; TraceVec(x, f); NTL_OPT_RETURN(vec_GF2, x); } + + +void TraceMod(ref_GF2 x, const GF2X& a, const GF2XModulus& F); + +inline GF2 TraceMod(const GF2X& a, const GF2XModulus& F) + { GF2 x; TraceMod(x, a, F); return x; } + +void TraceMod(ref_GF2 x, const GF2X& a, const GF2X& f); + +inline GF2 TraceMod(const GF2X& a, const GF2X& f) + { GF2 x; TraceMod(x, a, f); return x; } + + + +void GF2XFromBytes(GF2X& x, const unsigned char *p, long n); +inline GF2X GF2XFromBytes(const unsigned char *p, long n) + { GF2X x; GF2XFromBytes(x, p, n); NTL_OPT_RETURN(GF2X, x); } + +void BytesFromGF2X(unsigned char *p, const GF2X& a, long n); + +inline long NumBits(const GF2X& a) + { return deg(a) + 1; } + +inline long NumBytes(const GF2X& a) + { return (NumBits(a) + 7)/8; } + + + +// GF2X scratch variabes + + +class GF2XWatcher { +public: + GF2X& watched; + explicit + GF2XWatcher(GF2X& _watched) : watched(_watched) {} + + ~GF2XWatcher() { watched.KillBig(); } +}; + +#define NTL_GF2XRegister(x) NTL_TLS_LOCAL(GF2X, x); GF2XWatcher _WATCHER__ ## x(x) + + + +// RAII for HexOutput + +class GF2XHexOutputPush { +private: + long OldHexOutput; + + GF2XHexOutputPush(const GF2XHexOutputPush&); // disable + void operator=(const GF2XHexOutputPush&); // disable + +public: + GF2XHexOutputPush() : OldHexOutput(GF2X::HexOutput) { } + ~GF2XHexOutputPush() { GF2X::HexOutput = OldHexOutput; } +}; + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/GF2XFactoring.h b/thirdparty/linux/ntl/include/NTL/GF2XFactoring.h new file mode 100644 index 0000000000..210f7e3f86 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/GF2XFactoring.h @@ -0,0 +1,60 @@ +#ifndef NTL_GF2XFactoring__H +#define NTL_GF2XFactoring__H + +#include +#include + +NTL_OPEN_NNS + +long IterIrredTest(const GF2X& f); + +void SquareFreeDecomp(vec_pair_GF2X_long& u, const GF2X& ff); +inline vec_pair_GF2X_long SquareFreeDecomp(const GF2X& f) + { vec_pair_GF2X_long x; SquareFreeDecomp(x, f); return x; } + + +void DDF(vec_pair_GF2X_long& factors, const GF2X& ff, long verbose=0); +inline vec_pair_GF2X_long DDF(const GF2X& f, + long verbose=0) + { vec_pair_GF2X_long x; DDF(x, f, verbose); return x; } + + +void EDF(vec_GF2X& factors, const GF2X& ff, long d, long verbose=0); +inline vec_GF2X EDF(const GF2X& f, + long d, long verbose=0) + { vec_GF2X x; EDF(x, f, d, verbose); return x; } + + +void SFCanZass(vec_GF2X& factors, const GF2X& ff, long verbose=0); +inline vec_GF2X SFCanZass(const GF2X& f, long verbose=0) + { vec_GF2X x; SFCanZass(x, f, verbose); return x; } + + +void CanZass(vec_pair_GF2X_long& factors, const GF2X& f, long verbose=0); +inline vec_pair_GF2X_long CanZass(const GF2X& f, long verbose=0) + { vec_pair_GF2X_long x; CanZass(x, f, verbose); return x; } + + +void mul(GF2X& f, const vec_pair_GF2X_long& v); +inline GF2X mul(const vec_pair_GF2X_long& v) + { GF2X x; mul(x, v); return x; } + + +void BuildIrred(GF2X& f, long n); +inline GF2X BuildIrred_GF2X(long n) + { GF2X x; BuildIrred(x, n); NTL_OPT_RETURN(GF2X, x); } + + +void BuildRandomIrred(GF2X& f, const GF2X& g); +inline GF2X BuildRandomIrred(const GF2X& g) + { GF2X x; BuildRandomIrred(x, g); NTL_OPT_RETURN(GF2X, x); } + + +void BuildSparseIrred(GF2X& f, long n); +inline GF2X BuildSparseIrred_GF2X(long n) + { GF2X x; BuildSparseIrred(x, n); NTL_OPT_RETURN(GF2X, x); } + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/GF2XVec.h b/thirdparty/linux/ntl/include/NTL/GF2XVec.h new file mode 100644 index 0000000000..cd9d25c981 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/GF2XVec.h @@ -0,0 +1,62 @@ +#ifndef NTL_GF2XVec__H +#define NTL_GF2XVec__H + +#include + +NTL_OPEN_NNS + + + +/***************************************************************** + +The class GF2XVec implements vectors of fixed-length GF2X's. +You can allocate a vector of GF2X's of a specified length, where +the maximum size of each GF2X is also specified. +These parameters can be specified once, either with a constructor, +or with SetSize. +It is an error to try to re-size a vector, or store a GF2X that +doesn't fit. +The space can be released with "kill", and then you are free to +call SetSize again. +If you want more flexible---but less efficient---vectors, +use vec_GF2X. + +*****************************************************************/ + + + +class GF2XVec { + +private: + GF2X* v; + long len; + long bsize; + + +public: + GF2XVec& operator=(const GF2XVec&); + GF2XVec(const GF2XVec&); + + long length() const { return len; } + long BaseSize() const { return bsize; } + void SetSize(long n, long d); + void kill(); + + GF2XVec() : v(0), len(0), bsize(0) { } + GF2XVec(long n, long d) : v(0), len(0), bsize(0) { SetSize(n, d); } + ~GF2XVec() { kill(); }; + + GF2X* elts() { return v; } + const GF2X* elts() const { return v; } + + GF2X& operator[](long i) { return v[i]; } + const GF2X& operator[](long i) const { return v[i]; } + + void swap(GF2XVec& x); +}; + +inline void swap(GF2XVec& x, GF2XVec& y) { x.swap(y); } + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/HAVE_AVX.h b/thirdparty/linux/ntl/include/NTL/HAVE_AVX.h new file mode 100644 index 0000000000..35f55e47e5 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/HAVE_AVX.h @@ -0,0 +1,3 @@ +#ifndef NTL_HAVE_AVX +#define NTL_HAVE_AVX +#endif diff --git a/thirdparty/linux/ntl/include/NTL/HAVE_BUILTIN_CLZL.h b/thirdparty/linux/ntl/include/NTL/HAVE_BUILTIN_CLZL.h new file mode 100644 index 0000000000..cb0f5795e6 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/HAVE_BUILTIN_CLZL.h @@ -0,0 +1,3 @@ +#ifndef NTL_HAVE_BUILTIN_CLZL +#define NTL_HAVE_BUILTIN_CLZL +#endif diff --git a/thirdparty/linux/ntl/include/NTL/HAVE_FMA.h b/thirdparty/linux/ntl/include/NTL/HAVE_FMA.h new file mode 100644 index 0000000000..fd3b735c1b --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/HAVE_FMA.h @@ -0,0 +1,3 @@ +#ifndef NTL_HAVE_FMA +#define NTL_HAVE_FMA +#endif diff --git a/thirdparty/linux/ntl/include/NTL/HAVE_LL_TYPE.h b/thirdparty/linux/ntl/include/NTL/HAVE_LL_TYPE.h new file mode 100644 index 0000000000..610d3d77e7 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/HAVE_LL_TYPE.h @@ -0,0 +1,3 @@ +#ifndef NTL_HAVE_LL_TYPE +#define NTL_HAVE_LL_TYPE +#endif diff --git a/thirdparty/linux/ntl/include/NTL/HNF.h b/thirdparty/linux/ntl/include/NTL/HNF.h new file mode 100644 index 0000000000..90d19bf9f2 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/HNF.h @@ -0,0 +1,25 @@ + +#ifndef NTL_HNF__H +#define NTL_HNF__H + +#include + +NTL_OPEN_NNS + +void HNF(mat_ZZ& W, const mat_ZZ& A, const ZZ& D); +// The input matrix A is an n x m matrix of rank m (so n >= m), and +// D is a multiple of the determinant of the lattice L spanned by +// the rows of A. +// W is computed as the Hermite Normal Form of A; +// that is, W is the unique m x m matrix whose rows span L, such that +// - W is lower triangular, +// - the diagonal entries are positive, +// - any entry below the diagonal is a non-negative number +// strictly less than the diagonal entry in its column. + +// Currently, this is implemented using the algorithm of +// [P. Domich, R. Kannan and L. Trotter, Math. Oper. Research 12:50-59, 1987]. + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/LLL.h b/thirdparty/linux/ntl/include/NTL/LLL.h new file mode 100644 index 0000000000..c5a5eb1fc0 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/LLL.h @@ -0,0 +1,149 @@ +#ifndef NTL_LLL__H +#define NTL_LLL__H + +#include +#include + +NTL_OPEN_NNS + +long LLL(ZZ& det, mat_ZZ& B, long verbose = 0); +long LLL(ZZ& det, mat_ZZ& B, mat_ZZ& U, long verbose = 0); + +long LLL(ZZ& det, mat_ZZ& B, long a, long b, long verbose = 0); +long LLL(ZZ& det, mat_ZZ& B, mat_ZZ& U, long a, long b, long verbose = 0); + +long LLL_plus(vec_ZZ& D, mat_ZZ& B, mat_ZZ& U, long verbose=0); +long LLL_plus(vec_ZZ& D, mat_ZZ& B, long verbose=0); +long LLL_plus(vec_ZZ& D, mat_ZZ& B, mat_ZZ& U, long a, long b, long verbose=0); +long LLL_plus(vec_ZZ& D, mat_ZZ& B, long a, long b, long verbose=0); + +long image(ZZ& det, mat_ZZ& B, long verbose = 0); +long image(ZZ& det, mat_ZZ& B, mat_ZZ& U, long verbose = 0); + +long LatticeSolve(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& y, long reduce=0); + + + +typedef long (*LLLCheckFct)(const vec_ZZ&); + +extern NTL_CHEAP_THREAD_LOCAL double LLLStatusInterval; +extern NTL_CHEAP_THREAD_LOCAL char *LLLDumpFile; + + +// classical Gramm-Schmidt versions + +long LLL_FP(mat_ZZ& B, double delta = 0.99, + long deep = 0, LLLCheckFct check = 0, long verbose = 0); + +long LLL_FP(mat_ZZ& B, mat_ZZ& U, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); + + +long BKZ_FP(mat_ZZ& BB, double delta=0.99, long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0) ; +long BKZ_FP(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); + +long LLL_XD(mat_ZZ& B, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); +long LLL_XD(mat_ZZ& B, mat_ZZ& U, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); + + +long BKZ_XD(mat_ZZ& BB, double delta=0.99, long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); +long BKZ_XD(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, LLLCheckFct check = 0, long verbose = 0); + +long LLL_QP(mat_ZZ& B, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); +long LLL_QP(mat_ZZ& B, mat_ZZ& U, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); + + +long BKZ_QP(mat_ZZ& BB, double delta=0.99, long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); +long BKZ_QP(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, LLLCheckFct check = 0, long verbose = 0); + +long BKZ_QP1(mat_ZZ& BB, double delta=0.99, long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); +long BKZ_QP1(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, LLLCheckFct check = 0, long verbose = 0); + +long LLL_RR(mat_ZZ& B, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); +long LLL_RR(mat_ZZ& B, mat_ZZ& U, double delta = 0.99, + long deep = 0, LLLCheckFct check = 0, long verbose = 0); + + +long BKZ_RR(mat_ZZ& BB, double delta=0.99, long BlockSize=10, + long prune=0, LLLCheckFct check = 0, long verbose = 0); + +long BKZ_RR(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, LLLCheckFct check = 0, long verbose = 0); + + +// Givens rotations versions + +long G_LLL_FP(mat_ZZ& B, double delta = 0.99, + long deep = 0, LLLCheckFct check = 0, long verbose = 0); + +long G_LLL_FP(mat_ZZ& B, mat_ZZ& U, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); + + +long G_BKZ_FP(mat_ZZ& BB, double delta=0.99, long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0) ; +long G_BKZ_FP(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); + +long G_LLL_XD(mat_ZZ& B, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); +long G_LLL_XD(mat_ZZ& B, mat_ZZ& U, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); + + +long G_BKZ_XD(mat_ZZ& BB, double delta=0.99, long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); +long G_BKZ_XD(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, LLLCheckFct check = 0, long verbose = 0); + +long G_LLL_QP(mat_ZZ& B, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); +long G_LLL_QP(mat_ZZ& B, mat_ZZ& U, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); + + +long G_BKZ_QP(mat_ZZ& BB, double delta=0.99, long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); +long G_BKZ_QP(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, LLLCheckFct check = 0, long verbose = 0); + +long G_BKZ_QP1(mat_ZZ& BB, double delta=0.99, long BlockSize=10, long prune=0, + LLLCheckFct check = 0, long verbose = 0); +long G_BKZ_QP1(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, LLLCheckFct check = 0, long verbose = 0); + +long G_LLL_RR(mat_ZZ& B, double delta = 0.99, long deep = 0, + LLLCheckFct check = 0, long verbose = 0); +long G_LLL_RR(mat_ZZ& B, mat_ZZ& U, double delta = 0.99, + long deep = 0, LLLCheckFct check = 0, long verbose = 0); + + +long G_BKZ_RR(mat_ZZ& BB, double delta=0.99, long BlockSize=10, + long prune=0, LLLCheckFct check = 0, long verbose = 0); + +long G_BKZ_RR(mat_ZZ& BB, mat_ZZ& U, double delta=0.99, + long BlockSize=10, long prune=0, LLLCheckFct check = 0, long verbose = 0); + +void ComputeGS(const mat_ZZ& B, mat_RR& mu, vec_RR& c); + + +void NearVector(vec_ZZ& ww, const mat_ZZ& BB, const vec_ZZ& a); + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/Lazy.h b/thirdparty/linux/ntl/include/NTL/Lazy.h new file mode 100644 index 0000000000..97ce157ab7 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/Lazy.h @@ -0,0 +1,206 @@ + +/*************************************************************************** + + +Lazy: template class for lazy initialization of objects whose +values do not change after initialization. +In a multi-threaded environment, this makes use of "double checked locking" +for an efficient, thread-safe solution. + +Usage: + + Lazy obj; // declaration of the lazy object + + ... + + do { + Lazy::Builder builder(obj); + if (!builder()) break; // if we are not building, the break out + + UniquePtr p; // create a pointer + + ... + + builder.move(p); // move p into the object to complete the initialization + // We can then complete the initialization process. + } while(0); // When this scope closes, the object is fully initialized. + // subsequent attempts to build the object will yield + // !builder.built() + + + T objCopy = *obj; // *obj returns a read-only reference + // one can also use -> operator + +It is important to follow this recipe carefully. In particular, +the builder must be enclosed in a scope, as it's destructor +plays a crucial role in finalizing the initialization. + +NOTE: if p is null in builder.move(p), the object is still considered +built. + + +template +class Lazy { +public: + Lazy(); + + Lazy(const Lazy&); // "deep" copies + Lazy& operator=(const Lazy&); + + const T& operator*() const; // pointer access + const T* operator->() const; + const T* get() const; + operator fake_null_type() const; // test for null pointer + + ~Lazy(); + + kill(); // destroy and reset + + bool built() const; // test if already built + + + + + class Builder { + Builder(const Lazy&); + ~Builder() + + bool operator()() const; // test if we are building + void move(UniquePtr&); + + }; + + + +****************************************************************************/ + +#ifndef NTL_Lazy__H +#define NTL_Lazy__H + + +#include +#include +#include + + +NTL_OPEN_NNS + + + +// NOTE: For more on double-checked locking, see +// http://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ + +// NOTE: when compiled with the NTL_THREADS option, the Lazy +// class may contain data members from the standard library +// that may not satisfy the requirements of the Vec class +// (i.e., relocatability). One can wrap it in a pointer +// class (e.g., OptionalVal) to deal with this. + +template +class Lazy { +private: + /* we make data members mutable so that Lazy members of + other classes don't have to be. */ + + mutable AtomicBool initialized; + mutable MutexProxy mtx; + + mutable UniquePtr data; + + + class Dummy { }; + typedef void (Lazy::*fake_null_type)(Dummy) const; + void fake_null_function(Dummy) const {} + + +public: + Lazy() : initialized(false) { } + + // EXCEPTIONS: This always succeeds in killing the object + void kill() + { + UniquePtr tmp; + tmp.swap(data); + initialized = false; + } + + // This is provided for convenience for some legacy code. + // It us up to the client code to ensure there are no race conditions. + + // EXCEPTIONS: strong ES + Lazy& operator=(const Lazy& other) + { + if (this == &other) return *this; + + if (other.initialized) { + UniquePtr p; + if (other.data) p.make(*other.data); + p.swap(data); + initialized = true; + } + else + kill(); + + return *this; + } + + Lazy(const Lazy& other) : initialized(false) + { + *this = other; + } + + const T& operator*() const { return *data; } + const T* operator->() const { return data.operator->(); } + const T* get() const { return data.get(); } + + bool built() const { return initialized; } + + operator fake_null_type() const + { + return data ? &Lazy::fake_null_function : 0; + } + + + class Builder { + private: + bool building; + bool moved; + const Lazy& ref; + GuardProxy guard; + + Builder(const Builder&); // disabled + void operator=(const Builder&); // disabled + + + + public: + Builder(const Lazy& _ref) : building(false), moved(false), + ref(_ref), guard(_ref.mtx) + { + // Double-checked locking + if (ref.initialized || (guard.lock(), ref.initialized)) + return; + + building = true; // we set this to true after we lock the mutex + // and see the the object is still uninitialized + } + + ~Builder() { if (moved) ref.initialized = true; } + + void move(UniquePtr& p) + { + if (!building || moved) LogicError("Lazy::Builder illegal call to move"); + ref.data.move(p); + moved = true; + } + + bool operator()() const { return building; } + }; +}; + + +NTL_CLOSE_NNS + + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/LazyTable.h b/thirdparty/linux/ntl/include/NTL/LazyTable.h new file mode 100644 index 0000000000..8985a96b4a --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/LazyTable.h @@ -0,0 +1,155 @@ + +#ifndef NTL_LazyTable__H +#define NTL_LazyTable__H + +#include +#include +#include + +NTL_OPEN_NNS + + +/*************************************************************************** + + +LazyTable: template class for lazy initialization of objects whose +values do not change after initialization. +In a multi-threaded environment, this makes use of "double checked locking" +for an efficient, thread-safe solution. + +Usage: + + LazyTable tab; // declaration of the lazy table, with max size == MAX + + ... + + do { + LazyTable::Builder builder(tab, n); // request length n + long amt = builder.amt(); + if (!amt) break; + + ... initialize elements i = n-amt..n-1 + using builder.move(p), where p is a UnqiuePtr + note that each move application appends one element + + } while(0); // When this scope closes, + // the table is fully initialized to length n + + + const T* val = table[i]; // read-only access to table elements 0..n-1 + + +It is important to follow this recipe carefully. In particular, +the builder must be enclosed in a scope, as it's destructor +plays a crucial role in finalizing the initialization. + + +template +class LazyTable { +public: + LazyTable(); + + + const T * operator[] (long i) const; + + ~LazyTable(); + + long length() const; + + class Builder { + Builder(const LazyTable&, long request); + ~Builder() + + long amt() const; + void move(UniquePtr& p); +private: + LazyTable(const LazyTable&); // disabled + LazyTable& operator=(const LazyTable&); + +}; + + + +****************************************************************************/ + + +// NOTE: For more on double-checked locking, see +// http://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ + +// NOTE: when compiled with the NTL_THREADS option, the LazyTable +// class may contain data members from the standard library +// that may not satisfy the requirements of the Vec class +// (i.e., relocatability). One can wrap it in a pointer +// class (e.g., OptionalVal) to deal with this. + + + +template +class LazyTable { +private: + mutable AtomicLong len; + mutable MutexProxy mtx; + + mutable UniqueArray< UniquePtr > data; + + LazyTable(const LazyTable&); // disabled + void operator=(const LazyTable&); // disabled + +public: + LazyTable() : len(0) { } + + const T * operator[] (long i) const + { + // FIXME: add optional range checking + + return data[i].get(); + } + + long length() const { return len; } + + class Builder { + private: + const LazyTable& ref; + long request; + GuardProxy guard; + + long amount; + long curlen; + + Builder(const Builder&); // disabled + void operator=(const Builder&); // disabled + + public: + Builder(const LazyTable& _ref, long _request) + : ref(_ref), request(_request), guard(_ref.mtx), amount(0), curlen(0) + { + if (request < 0 || request > MAX) + LogicError("request out of range in LazyTable::Builder"); + + + // Double-checked locking + if (request <= ref.len || (guard.lock(), request <= ref.len)) + return; + + curlen = ref.len; + amount = request - curlen; + if (!ref.data) ref.data.SetLength(MAX); + } + + ~Builder() { if (amount) ref.len = curlen; } + + void move(UniquePtr& p) + { + if (!amount || curlen >= request) LogicError("LazyTable::Builder illegal move"); + ref.data[curlen].move(p); + curlen++; + } + + long amt() const { return amount; } + }; +}; + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/RR.h b/thirdparty/linux/ntl/include/NTL/RR.h new file mode 100644 index 0000000000..96ea3de36a --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/RR.h @@ -0,0 +1,543 @@ +#ifndef NTL_RR__H +#define NTL_RR__H + +#include +#include +#include + +NTL_OPEN_NNS + + +class RR { + +public: + +ZZ x; +long e; + +RR() { e = 0; } + +explicit RR(double a) : e(0) { *this = a; } + +inline RR(INIT_VAL_TYPE, const ZZ& a); +inline RR(INIT_VAL_TYPE, int a); +inline RR(INIT_VAL_TYPE, long a); +inline RR(INIT_VAL_TYPE, unsigned int a); +inline RR(INIT_VAL_TYPE, unsigned long a); +inline RR(INIT_VAL_TYPE, float a); +inline RR(INIT_VAL_TYPE, double a); +inline RR(INIT_VAL_TYPE, const xdouble& a); +inline RR(INIT_VAL_TYPE, const quad_float& a); +inline RR(INIT_VAL_TYPE, const char *a); // read from string +inline RR(INIT_VAL_TYPE, const RR& a); + + +inline RR& operator=(double a); + +RR(RR& z, INIT_TRANS_TYPE) : x(z.x, INIT_TRANS), e(z.e) { } + +void swap(RR& z) { x.swap(z.x); _ntl_swap(e, z.e); } + + +~RR() { } + +const ZZ& mantissa() const { return x; } +long exponent() const { return e; } + +static NTL_CHEAP_THREAD_LOCAL long prec; +static void SetPrecision(long p); +static long precision() { return prec; } + +static NTL_CHEAP_THREAD_LOCAL long oprec; +static void SetOutputPrecision(long p); +static long OutputPrecision() { return oprec; } + +#ifdef NTL_TRANSITION +private: +RR& operator=(const RR&); +RR(const RR&); +#endif + +}; + + +inline void swap(RR& a, RR& b) { a.swap(b); } + +// RAII for saving/restoring precision +// FIXME: document. + +class RRPush { +private: + long old_p; + + RRPush(const RRPush&); // disable + void operator=(const RRPush&); // disable + +public: + RRPush() : old_p(RR::prec) { } + ~RRPush() { RR::prec = old_p; } + +}; + +// RAII for saving/restoring output precision +// FIXME: document. + +class RROutputPush { +private: + long old_p; + + RROutputPush(const RROutputPush&); // disable + void operator=(const RROutputPush&); // disable + +public: + RROutputPush() : old_p(RR::oprec) { } + ~RROutputPush() { RR::oprec = old_p; } + +}; + + +long IsZero(const RR& a); +long IsOne(const RR& a); +long sign(const RR& a); +void clear(RR& z); +void set(RR& z); + +void add(RR& z, const RR& a, const RR& b); + +void add(RR& z, const RR& a, double b); +inline void add(RR& z, double a, const RR& b) { add(z, b, a); } + + + +void sub(RR& z, const RR& a, const RR& b); + +void sub(RR& z, const RR& a, double b); +void sub(RR& z, double a, const RR& b); + +void negate(RR& z, const RR& a); + +void abs(RR& z, const RR& a); +inline RR abs(const RR& a) + { RR z; abs(z, a); NTL_OPT_RETURN(RR, z); } +inline RR fabs(const RR& a) + { RR z; abs(z, a); NTL_OPT_RETURN(RR, z); } + +void mul(RR& z, const RR& a, const RR& b); + +void mul(RR& z, const RR& a, double b); +inline void mul(RR& z, double a, const RR& b) { mul(z, b, a); } + +void sqr(RR& z, const RR& a); +inline RR sqr(const RR& a) + { RR z; sqr(z, a); NTL_OPT_RETURN(RR, z); } + +void div(RR& z, const RR& a, const RR& b); + +void div(RR& z, const RR& a, double b); +void div(RR& z, double a, const RR& b); + +void inv(RR& z, const RR& a); +inline RR inv(const RR& a) + { RR z; inv(z, a); NTL_OPT_RETURN(RR, z); } + +// operator notation: + +inline RR operator+(const RR& a, const RR& b) + { RR x; add(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR operator+(const RR& a, double b) + { RR x; add(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR operator+(double a, const RR& b) + { RR x; add(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR& operator+=(RR& x, const RR& b) + { add(x, x, b); return x; } + +inline RR& operator+=(RR& x, double b) + { add(x, x, b); return x; } + + + +inline RR operator-(const RR& a, const RR& b) + { RR x; sub(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR operator-(const RR& a, double b) + { RR x; sub(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR operator-(double a, const RR& b) + { RR x; sub(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR& operator-=(RR& x, const RR& b) + { sub(x, x, b); return x; } + +inline RR& operator-=(RR& x, double b) + { sub(x, x, b); return x; } + + + +inline RR operator*(const RR& a, const RR& b) + { RR x; mul(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR operator*(const RR& a, double b) + { RR x; mul(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR operator*(double a, const RR& b) + { RR x; mul(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR& operator*=(RR& x, const RR& b) + { mul(x, x, b); return x; } + +inline RR& operator*=(RR& x, double b) + { mul(x, x, b); return x; } + + +inline RR operator/(const RR& a, const RR& b) + { RR x; div(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR operator/(const RR& a, double b) + { RR x; div(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR operator/(double a, const RR& b) + { RR x; div(x, a, b); NTL_OPT_RETURN(RR, x); } + +inline RR& operator/=(RR& x, const RR& b) + { div(x, x, b); return x; } + +inline RR& operator/=(RR& x, double b) + { div(x, x, b); return x; } + + +inline RR operator-(const RR& a) + { RR x; negate(x, a); NTL_OPT_RETURN(RR, x); } + + +inline RR& operator++(RR& x) { add(x, x, 1); return x; } +inline void operator++(RR& x, int) { add(x, x, 1); } +inline RR& operator--(RR& x) { sub(x, x, 1); return x; } +inline void operator--(RR& x, int) { sub(x, x, 1); } + + + +long compare(const RR& a, const RR& b); + +long compare(const RR& a, double b); +inline long compare(double a, const RR& b) { return -compare(b, a); } + + +long operator==(const RR& a, const RR& b); +inline long operator!=(const RR& a, const RR& b) { return !(a == b); } +inline long operator<=(const RR& a, const RR& b) { return compare(a, b) <= 0; } +inline long operator>=(const RR& a, const RR& b) { return compare(a, b) >= 0; } +inline long operator <(const RR& a, const RR& b) { return compare(a, b) < 0; } +inline long operator >(const RR& a, const RR& b) { return compare(a, b) > 0; } + +long operator==(const RR& a, double b); +inline long operator!=(const RR& a, double b) { return !(a == b); } +inline long operator<=(const RR& a, double b) { return compare(a, b) <= 0; } +inline long operator>=(const RR& a, double b) { return compare(a, b) >= 0; } +inline long operator <(const RR& a, double b) { return compare(a, b) < 0; } +inline long operator >(const RR& a, double b) { return compare(a, b) > 0; } + +inline long operator==(double a, const RR& b) { return (b == a); } +inline long operator!=(double a, const RR& b) { return !(a == b); } +inline long operator<=(double a, const RR& b) { return compare(a, b) <= 0; } +inline long operator>=(double a, const RR& b) { return compare(a, b) >= 0; } +inline long operator <(double a, const RR& b) { return compare(a, b) < 0; } +inline long operator >(double a, const RR& b) { return compare(a, b) > 0; } + +void ceil(RR& z, const RR& a); +inline RR ceil(const RR& a) + { RR z; ceil(z, a); NTL_OPT_RETURN(RR, z); } + +void floor(RR& z, const RR& a); +inline RR floor(const RR& a) + { RR z; floor(z, a); NTL_OPT_RETURN(RR, z); } + +void trunc(RR& z, const RR& a); +inline RR trunc(const RR& a) + { RR z; trunc(z, a); NTL_OPT_RETURN(RR, z); } + +void round(RR& z, const RR& a); +inline RR round(const RR& a) + { RR z; round(z, a); NTL_OPT_RETURN(RR, z); } + +void RoundToPrecision(RR& z, const RR& a, long p); +inline RR RoundToPrecision(const RR& a, long p) + { RR z; RoundToPrecision(z, a, p); NTL_OPT_RETURN(RR, z); } + + +// routines with a precision parameter + +void ConvPrec(RR& z, const RR& a, long p); +inline RR ConvPrec(const RR& a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void AddPrec(RR& z, const RR& a, const RR& b, long p); +inline RR AddPrec(const RR& a, const RR& b, long p) + { RR z; AddPrec(z, a, b, p); NTL_OPT_RETURN(RR, z); } + +void SubPrec(RR& z, const RR& a, const RR& b, long p); +inline RR SubPrec(const RR& a, const RR& b, long p) + { RR z; SubPrec(z, a, b, p); NTL_OPT_RETURN(RR, z); } + +void NegatePrec(RR& z, const RR& a, long p); +inline RR NegatePrec(const RR& a, long p) + { RR z; NegatePrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void AbsPrec(RR& z, const RR& a, long p); +inline RR AbsPrec(const RR& a, long p) + { RR z; AbsPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void MulPrec(RR& z, const RR& a, const RR& b, long p); +inline RR MulPrec(const RR& a, const RR& b, long p) + { RR z; MulPrec(z, a, b, p); NTL_OPT_RETURN(RR, z); } + +void SqrPrec(RR& z, const RR& a, long p); +inline RR SqrPrec(const RR& a, long p) + { RR z; SqrPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void DivPrec(RR& z, const RR& a, const RR& b, long p); +inline RR DivPrec(const RR& a, const RR& b, long p) + { RR z; DivPrec(z, a, b, p); NTL_OPT_RETURN(RR, z); } + +void InvPrec(RR& z, const RR& a, long p); +inline RR InvPrec(const RR& a, long p) + { RR z; InvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void SqrRootPrec(RR& z, const RR& a, long p); +inline RR SqrRootPrec(const RR& a, long p) + { RR z; SqrRootPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void TruncPrec(RR& z, const RR& a, long p); +inline RR TruncPrec(const RR& a, long p) + { RR z; TruncPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void FloorPrec(RR& z, const RR& a, long p); +inline RR FloorPrec(const RR& a, long p) + { RR z; FloorPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void CeilPrec(RR& z, const RR& a, long p); +inline RR CeilPrec(const RR& a, long p) + { RR z; CeilPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void RoundPrec(RR& z, const RR& a, long p); +inline RR RoundPrec(const RR& a, long p) + { RR z; RoundPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void ConvPrec(RR& z, const ZZ& a, long p); +inline RR ConvPrec(const ZZ& a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void ConvPrec(RR& z, long a, long p); +inline RR ConvPrec(long a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +inline void ConvPrec(RR& z, int a, long p) { ConvPrec(z, long(a), p); } +inline RR ConvPrec(int a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void ConvPrec(RR& z, unsigned long a, long p); +inline RR ConvPrec(unsigned long a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +inline void ConvPrec(RR& z, unsigned int a, long p) + { ConvPrec(z, (unsigned long)(a), p); } +inline RR ConvPrec(unsigned int a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void ConvPrec(RR& z, double a, long p); +inline RR ConvPrec(double a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void ConvPrec(RR& z, const xdouble& a, long p); +inline RR ConvPrec(const xdouble& a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void ConvPrec(RR& z, const quad_float& a, long p); +inline RR ConvPrec(const quad_float& a, long p) + { RR z; ConvPrec(z, a, p); NTL_OPT_RETURN(RR, z); } + +void ConvPrec(RR& z, const char *s, long p); +inline RR ConvPrec(const char *s, long p) + { RR z; ConvPrec(z, s, p); NTL_OPT_RETURN(RR, z); } + +NTL_SNS istream& InputPrec(RR& z, NTL_SNS istream& s, long p); +inline RR InputPrec(NTL_SNS istream& s, long p) + { RR z; NTL_INPUT_CHECK_ERR(InputPrec(z, s, p)); NTL_OPT_RETURN(RR, z); } + +void MakeRRPrec(RR& z, const ZZ& a, long e, long p); +inline RR MakeRRPrec(const ZZ& a, long e, long p) + { RR z; MakeRRPrec(z, a, e, p); NTL_OPT_RETURN(RR, z); } + + + + + + +void conv(RR& z, const ZZ& a); +void conv(RR& z, long a); +inline void conv(RR& z, int a) { conv(z, long(a)); } +void conv(RR& z, unsigned long a); +inline void conv(RR& z, unsigned int a) { conv(z, (unsigned long)(a)); } +void conv(RR& z, const char *s); +void conv(RR& z, double a); +inline void conv(RR& z, float a) { conv(z, double(a)); } +void conv(RR& z, const xdouble& a); +void conv(RR& z, const quad_float& a); + +void conv(RR& z, const RR& a); + + + +inline RR::RR(INIT_VAL_TYPE, int a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, long a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, unsigned int a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, unsigned long a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, float a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, double a) { e = 0; conv(*this, a); } + +inline RR::RR(INIT_VAL_TYPE, const RR& a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, const ZZ& a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, const xdouble& a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, const quad_float& a) { e = 0; conv(*this, a); } +inline RR::RR(INIT_VAL_TYPE, const char *a) { e = 0; conv(*this, a); } + + +inline RR to_RR(int a) { return RR(INIT_VAL, a); } +inline RR to_RR(long a) { return RR(INIT_VAL, a); } +inline RR to_RR(unsigned int a) { return RR(INIT_VAL, a); } +inline RR to_RR(unsigned long a) { return RR(INIT_VAL, a); } +inline RR to_RR(float a) { return RR(INIT_VAL, a); } +inline RR to_RR(double a) { return RR(INIT_VAL, a); } +inline RR to_RR(const ZZ& a) { return RR(INIT_VAL, a); } +inline RR to_RR(const RR& a) { return RR(INIT_VAL, a); } +inline RR to_RR(const xdouble& a) { return RR(INIT_VAL, a); } +inline RR to_RR(const quad_float& a) { return RR(INIT_VAL, a); } +inline RR to_RR(const char *a) { return RR(INIT_VAL, a); } + +inline RR& RR::operator=(double a) { conv(*this, a); return *this; } + +void conv(ZZ& z, const RR& a); +void conv(long& z, const RR& a); +void conv(double& z, const RR& a); +void conv(xdouble& z, const RR& a); +void conv(quad_float& z, const RR& a); + +inline void conv(int& z, const RR& a) + { long t; conv(t, a); z = int(t); } + +inline void conv(float& z, const RR& a) + { double t; conv(t, a); z = float(t); } + +inline int to_int(const RR& a) { int z; conv(z, a); return z; } +inline long to_long(const RR& a) { long z; conv(z, a); return z; } +inline float to_float(const RR& a) { float z; conv(z, a); return z; } +inline double to_double(const RR& a) { double z; conv(z, a); return z; } + +inline xdouble to_xdouble(const RR& a) + { xdouble z; conv(z, a); return z; } +inline quad_float to_quad_float(const RR& a) + { quad_float z; conv(z, a); return z; } + +inline ZZ to_ZZ(const RR& a) + { ZZ z; conv(z, a); NTL_OPT_RETURN(ZZ, z); } + +void CeilToZZ(ZZ& z, const RR& a); +inline ZZ CeilToZZ(const RR& a) + { ZZ z; CeilToZZ(z, a); NTL_OPT_RETURN(ZZ, z); } + +void TruncToZZ(ZZ& z, const RR& a); +inline ZZ TruncToZZ(const RR& a) + { ZZ z; TruncToZZ(z, a); NTL_OPT_RETURN(ZZ, z); } + +void RoundToZZ(ZZ& z, const RR& a); +inline ZZ RoundToZZ(const RR& a) + { ZZ z; RoundToZZ(z, a); NTL_OPT_RETURN(ZZ, z); } + +inline void FloorToZZ(ZZ& z, const RR& a) { conv(z, a); } +inline ZZ FloorToZZ(const RR& a) + { ZZ z; conv(z, a); NTL_OPT_RETURN(ZZ, z); } + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(unsigned int& x, const RR& a) + { long z; conv(z, a); conv(x, z); } + +inline void conv(unsigned long& x, const RR& a) + { long z; conv(z, a); conv(x, z); } + + +/* ------------------------------------- */ + +void MakeRR(RR& z, const ZZ& a, long e); +inline RR MakeRR(const ZZ& a, long e) + { RR z; MakeRR(z, a, e); NTL_OPT_RETURN(RR, z); } + +void random(RR& z); +inline RR random_RR() + { RR z; random(z); NTL_OPT_RETURN(RR, z); } + + +void power(RR& z, const RR& a, long e); +inline RR power(const RR& a, long e) + { RR z; power(z, a, e); NTL_OPT_RETURN(RR, z); } + +void power2(RR& z, long e); + +inline RR power2_RR(long e) + { RR z; power2(z, e); NTL_OPT_RETURN(RR, z); } + +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const RR& a); +NTL_SNS istream& operator>>(NTL_SNS istream& s, RR& x); + + +void SqrRoot(RR& x, const RR& a); +inline RR SqrRoot(const RR& a) + { RR z; SqrRoot(z, a); NTL_OPT_RETURN(RR, z); } +inline RR sqrt(const RR& a) + { RR z; SqrRoot(z, a); NTL_OPT_RETURN(RR, z); } + +void exp(RR& res, const RR& x); +inline RR exp(const RR& a) + { RR z; exp(z, a); NTL_OPT_RETURN(RR, z); } + +void log(RR& res, const RR& x); +inline RR log(const RR& a) + { RR z; log(z, a); NTL_OPT_RETURN(RR, z); } + +void log10(RR& res, const RR& x); +inline RR log10(const RR& a) + { RR z; log10(z, a); NTL_OPT_RETURN(RR, z); } + +void expm1(RR& res, const RR& x); +inline RR expm1(const RR& a) + { RR z; expm1(z, a); NTL_OPT_RETURN(RR, z); } + +void log1p(RR& res, const RR& x); +inline RR log1p(const RR& a) + { RR z; log1p(z, a); NTL_OPT_RETURN(RR, z); } + +void pow(RR& res, const RR& x, const RR& y); +inline RR pow(const RR& x, const RR& y) + { RR z; pow(z, x, y); NTL_OPT_RETURN(RR, z); } + +void ComputePi(RR& res); +inline RR ComputePi_RR() + { RR z; ComputePi(z); NTL_OPT_RETURN(RR, z); } + +void sin(RR& res, const RR& x); +inline RR sin(const RR& a) + { RR z; sin(z, a); NTL_OPT_RETURN(RR, z); } + +void cos(RR& res, const RR& x); +inline RR cos(const RR& a) + { RR z; cos(z, a); NTL_OPT_RETURN(RR, z); } + + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/SPMM_ASM.h b/thirdparty/linux/ntl/include/NTL/SPMM_ASM.h new file mode 100644 index 0000000000..f38d274b1e --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/SPMM_ASM.h @@ -0,0 +1,202 @@ + +/************************************************************* + + + Assembly code support for computing the high-order + word of a word * word product (unsigned). + + Note that these typically only make a significant difference + on some 64-bit machines, as on 32-bit machines, the "long long" + solution is usually just as good. + + These code sequences were extracted from a recent version of + the file longlong.h from gmp. Copyright notice follows: + + Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003 + Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this file; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + MA 02111-1307, USA. + + + +*************************************************************/ + + + + + + +#if (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7))) + +// To simplify things, we require gcc v2.7 or higher. + + + +// ------ POWERPC ------ + +#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \ + || defined (__ppc__) || defined(__ppc64__) \ + || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ + || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ + && CPU_FAMILY == PPC) + +#if (NTL_BITS_PER_LONG == 32) + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi; + __asm__ ("mulhwu %0,%1,%2" : "=r" (hi) : "%r" (a), "r" (b)); + return hi; +} + +#elif (NTL_BITS_PER_LONG == 64) + + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi; + __asm__ ("mulhdu %0,%1,%2" : "=r" (hi) : "%r" (a), "r" (b)); + return hi; +} + +#endif + +#endif + + + + +// ------ ALPHA ------ + +#if (defined (__alpha) && NTL_BITS_PER_LONG == 64) + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi; + __asm__ ("umulh %r1,%2,%0" : "=r" (hi) : "%rJ" (a), "rI" (b)); + return hi; +} + +#endif + + +// ------ IA64 ------ + +#if (defined (__ia64) && NTL_BITS_PER_LONG == 64) + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi; + __asm__ ("xma.hu %0 = %1, %2, f0" : "=f" (hi) : "f" (a), "f" (b)); + return hi; +} + + +#endif + + +// ------ x86 ------ + +#if ((defined (__i386__) || defined (__i486__)) && NTL_BITS_PER_LONG == 32) + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi, lo; + __asm__ ("mull %3" : "=a" (lo), "=d" (hi) : "%0" (a), "rm" (b)); + + return hi; +} + +#endif + + +// ------ x86-64 ------ + +#if (defined (__x86_64__) && NTL_BITS_PER_LONG == 64) + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi, lo; + __asm__ ("mulq %3" : "=a" (lo), "=d" (hi) : "%0" (a), "rm" (b)); + + //__asm__ ("mulxq %2,%1,%0" : "=r" (hi), "=r" (lo) : "rm" (a), "d" (b)); + // this uses the mulx instruction - no real benefit + + return hi; +} + + +#endif + + +// ------ MIPS ------ + +#if (defined (__mips)) + +#if (NTL_BITS_PER_LONG == 32) + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi, lo; + __asm__ ("multu %2,%3" : "=l" (lo), "=h" (hi) : "d" (a), "d" (b)); + return hi; +} + + + +#elif (NTL_BITS_PER_LONG == 64) + + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi, lo; + __asm__ ("dmultu %2,%3" : "=l" (lo), "=h" (hi) : "d" (a), "d" (b)); + return hi; +} + + +#endif + +#endif + + +// -------- SPARC -------- + + +#if (defined (__sparc__) && NTL_BITS_PER_LONG == 32) + +#if (defined (__sparc_v9__) || defined (__sparcv9) || \ + defined (__sparc_v8__) || defined (__sparcv8) || defined (__sparclite__)) + +static inline unsigned long MulHiUL(unsigned long a, unsigned long b) +{ + unsigned long hi, lo; + __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (hi), "=r" (lo) : "r" (a), "r" (b)); + return hi; +} + + +#endif + + + + + +#endif + + + +#endif // __GNUC__ diff --git a/thirdparty/linux/ntl/include/NTL/SmartPtr.h b/thirdparty/linux/ntl/include/NTL/SmartPtr.h new file mode 100644 index 0000000000..21cd0af093 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/SmartPtr.h @@ -0,0 +1,1677 @@ + + +#ifndef NTL_SmartPtr__H +#define NTL_SmartPtr__H + +#include +#include + + + +NTL_OPEN_NNS + + + +/**************************************************************************** + +SmartPtr: a smart pointer class. + +Synopsis: provides a reference counted smart pointer, similar to shared_ptr +in the standard library. It is provided here to minimize reliance +on the standard library, especially for older C++ compilers, which may +not provide shared_ptr, or it may be in TR1, which gets messy. + + +Examples: + + + SmartPtr p1; // initialize to null + SmartPtr p1(0); + SmartPtr p1 = 0; + + SmartPtr p2(p1); // copy constructor + + T *rp; + SmartPtr p2(rp); // construct using raw pointer (explicit): better + // to use MakeSmart below + + p1 = MakeSmart(...); // build new T object by invoking constructor + // T(...) with pseudo-variadic templates. + // This is safer and more efficient that + // using the raw-pointer constructor + + p1 = p2; // assignment + p1 = 0; // assign null + + + if (!p1) ... // test for null + if (p1 == 0) ... + + if (p1) ... // test for not null ... + if (p1 != 0) ... + + if (p1 == p2) ... // test for equality + if (p1 != p2) + + *p1 // dereferencing + p1->... + + p1.get(); // return the underlying raw pointer...dangerous! + + p1.swap(p2); // fast swap + swap(p1, p2); + + +Automatic Conversions: + +If S is another class, SmartPtr converts to SmartPtr if S* converts to T* +(for example, if S is a subclass of T). Similarly, SmartPtr and SmartPtr +may be compared if S* and T* may be compared. + +MakeSmart: + +One can write SmartPtr p = MakeSmart(x1, ..., xn), and this will create a +smart pointer to an object constructed as T(x1, ..., xn). Besides notational +convenience, it also reduces the number of memory allocations from 2 to 1, as +the data and control block can be allocated in one chunck of memory. + +This is implemented without reliance on C++11 features, which means that there +are limitations. First, the number n of arguments is limited to 9. And +second, all arguments are pass by const reference. However, you can work around +this by using the helper function Fwd. For example, if T has a 2-argument +constructor where the second must be a non-const reference of some type, and x2 +is a variable of that type, you can write MakeSmart(x1, Fwd(x2)), to forward +that reference through all the template nonsense in a typesafe manner. + +MakeRaw: + +One can also write T *p = MakeRaw(x1, ..., xn) to create a +raw pointer. This is the same as writing T *p = new T(x1, ..., xn), +except that if the construction fails, NTL's error routine will be called +(as opposed to an exception being thrown). The same restrictions and +limitations that apply to MakeSmart appy to MakeRaw. + +MakeRawArray: + +Another utility routine: one can write T *p = MakeRawArray(n) +to make a plain array of n T's. NTL's error routine will be +called if the allocation fails. + +Dynamic casting: + +I've also supplied a dynamic cast operation for smart pointers. + + SmartPtr d = MakeSmart(); // d points to Derived + SmartPtr b = d; // implicit upcast: OK + + SmartPtr d1 = DynamicCast(b); + // downcast to a Derived object -- returns null for a bad cast + + + + +Implementation notes: + +If NTL is compiled with the NTL_THREADS option, then the reference counting +should be thread safe. + +The SmartPtrControl class heirarchy is used to make sure the right destructor +is called when the ref count goes to zero. This can be an issue for forward +declared classes and for subclasses. For example, if T is forward declared in +a context where the ref count goes to zero, or if the object's actual type is a +subclass of T and T's destructor was not declared virtual. + +The null tests p, !p, p == 0, are all affected via an implicit conversion from +SmartPtr to a funny pointer type (a pointer to a member function, which +avoids other, unwanted implicit conversions: this is the so-called "safe bool +idiom"); + +There is also an assigmment from a funny pointer type to a SmartPtr, +which asslows assigment of 0 to a SmartPtr. + +In C++11 both of the above effects could perhaps be achieved more directly. +The new "explict bool" operator can replace the "safe bool idiom", and I would +think that the new null pointer type could be used to get the assignment of 0 +to work. + +NOTES: See http://www.artima.com/cppsource/safebool.html for more +on the "safe bool idiom". + + + + +*****************************************************************************/ + +// Helper class for somewhat finer-grained control of deleter. +// Useful in the PIMPL pattern. + +struct DefaultDeleterPolicy { + + template + static void deleter(T *p) { delete p; } + +}; + +// A tagging class, for better readability + +template +struct ChoosePolicy { }; + +// usage: SmartPtr p(r, ChoosePolicy()); + + + +class SmartPtrControl { +public: + AtomicRefCount cnt; + SmartPtrControl() { } + virtual ~SmartPtrControl() { } + + +private: + void operator=(const SmartPtrControl&); // =delete + SmartPtrControl(const SmartPtrControl&); // =delete +}; + + + +template +class SmartPtrControlDerived : public SmartPtrControl { +public: + T* p; + + SmartPtrControlDerived(T* _p) : p(_p) { } + + ~SmartPtrControlDerived() + { + P::deleter(p); + } + +}; + + +struct SmartPtrLoopHole { }; + + +template +class SmartPtr { +private: + T *dp; + SmartPtrControl *cp; + + void AddRef() const + { + if (cp) cp->cnt.inc(); + } + + void RemoveRef() const + { + if (cp && cp->cnt.dec()) { delete cp; } + } + + class Dummy { }; + typedef void (SmartPtr::*fake_null_type)(Dummy) const; + void fake_null_function(Dummy) const {} + + class Dummy1 { }; + typedef void (SmartPtr::*fake_null_type1)(Dummy1) const; + + +public: + template + explicit SmartPtr(Y* p) : dp(p), cp(0) + { + if (p) { + cp = NTL_NEW_OP SmartPtrControlDerived(p); + if (!cp) { + delete p; // this could theoretically throw an exception + MemoryError(); + } + AddRef(); + } + } + + template + SmartPtr(Y* p, ChoosePolicy

) : dp(p), cp(0) + { + if (p) { + cp = NTL_NEW_OP SmartPtrControlDerived(p); + if (!cp) { + delete p; // this could theoretically throw an exception + MemoryError(); + } + AddRef(); + } + } + + SmartPtr() : dp(0), cp(0) { } + + SmartPtr(fake_null_type1) : dp(0), cp(0) { } + + SmartPtr(SmartPtrLoopHole, T* _dp, SmartPtrControl *_cp) : dp(_dp), cp(_cp) + { + AddRef(); + } + + ~SmartPtr() { + RemoveRef(); + } + + SmartPtr(const SmartPtr& other) : dp(other.dp), cp(other.cp) + { + AddRef(); + } + + + SmartPtr& operator=(const SmartPtr& other) + { + SmartPtr tmp(other); + tmp.swap(*this); + return *this; + } + + template friend class SmartPtr; + + template + SmartPtr(const SmartPtr& other) : dp(other.dp), cp(other.cp) + { + AddRef(); + } + + + template + SmartPtr& operator=(const SmartPtr& other) + { + SmartPtr tmp(other); + tmp.swap(*this); + return *this; + } + + + T& operator*() const { return *dp; } + T* operator->() const { return dp; } + + T* get() const { return dp; } + + void swap(SmartPtr& other) + { + _ntl_swap(dp, other.dp); + _ntl_swap(cp, other.cp); + } + + + operator fake_null_type() const + { + return dp ? &SmartPtr::fake_null_function : 0; + } + + + template + SmartPtr DynamicCast() const + { + if (!dp) + return SmartPtr(); + else { + Y* dp1 = dynamic_cast(dp); + if (!dp1) return SmartPtr(); + return SmartPtr(SmartPtrLoopHole(), dp1, cp); + } + } + + +}; + + +// free swap function +template +void swap(SmartPtr& p, SmartPtr& q) { p.swap(q); } + +// free dynamic cast function +template +SmartPtr DynamicCast(const SmartPtr& p) { return p.template DynamicCast(); } + + + +// Equality testing + +template +bool operator==(const SmartPtr& a, const SmartPtr& b) +{ + return a.get() == b.get(); +} + +template +bool operator!=(const SmartPtr& a, const SmartPtr& b) +{ + return a.get() != b.get(); +} + + +/********************************************************************************* + +Experimantal: CloneablePtr ...essentially same interface as SmartPtr, but +allows cloning of complete objects. The differences: +* must construct using MakeCloneable +* a clone method is provided +* implicit conversion from CloneablePtr to SmartPtr is allowed + +Example: + + CloneablePtr d = MakeCloneable(); // d points to Derived + CloneablePtr b = d; // implicit upcast: OK + + CloneablePtr b1 = b.clone(); // clone of b, which is really a Derived object + CloneablePtr d1 = DynamicCast(b1); + // downcast to a Derived object -- returns null for a bad cast + SmartPtr b2 = d1; + + + +Implementation: + +In the clone method, the object is constructed using the copy constructor for the +type T, where T is the compile-time type with which the first smart pointer to this +object was was created, even if the pointer has been subsequently upcasted to a +base type S. Such objects must have been initially created using the +MakeCloneable function. It turns out, this is hard to do in a completely +standards-compliant way, because of the type erasure going on. The only way I +could figure out how to do it in a standards-compliant way was by using +exceptions: the control block throws a T* and the smart pointer doing the clone +catches an S*. However, this turned out to be dreadfully slow, and even this +does not completely solve the problem, because there could be ambiguities +in this type of upcasting that miay arise with multiple inheritance. So I settled +on the current method, which does some low-level pointer arithmetic. Even with +fancy things like multiple and virtual inheritance, it should work, under the +assumption that if two objects have the same (runtime) type, then their memory +layout is the same. I don't think anything like that is guaranteed by the +standard, but this seems reasonable, and it seems to work. +Like I said, it is experimental, and I would appreciate feedback +from C++ gurus. + +Note that NTL does not use this feature, but I do have applications where this +is convenient. + + +**********************************************************************************/ + +class CloneablePtrControl : public SmartPtrControl { +public: + virtual CloneablePtrControl *clone() const = 0; + virtual void *get() = 0; + +}; + + +template +class CloneablePtrControlDerived : public CloneablePtrControl { +public: + T d; + + CloneablePtrControlDerived(const T& x) : d(x) { } + CloneablePtrControl *clone() const + { + CloneablePtrControl *q = NTL_NEW_OP CloneablePtrControlDerived(d); + if (!q) MemoryError(); + return q; + } + void *get() { return &d; } +}; + + + + +struct CloneablePtrLoopHole { }; + + +template +class CloneablePtr { +private: + T *dp; + CloneablePtrControl *cp; + + void AddRef() const + { + if (cp) cp->cnt.inc(); + } + + void RemoveRef() const + { + if (cp && cp->cnt.dec()) { delete cp; } + } + + class Dummy { }; + typedef void (CloneablePtr::*fake_null_type)(Dummy) const; + void fake_null_function(Dummy) const {} + + class Dummy1 { }; + typedef void (CloneablePtr::*fake_null_type1)(Dummy1) const; + +public: + CloneablePtr() : dp(0), cp(0) { } + + CloneablePtr(fake_null_type1) : dp(0), cp(0) { } + + CloneablePtr(CloneablePtrLoopHole, T* _dp, CloneablePtrControl *_cp) : dp(_dp), cp(_cp) + { + AddRef(); + } + + ~CloneablePtr() { + RemoveRef(); + } + + CloneablePtr(const CloneablePtr& other) : dp(other.dp), cp(other.cp) + { + AddRef(); + } + + + CloneablePtr& operator=(const CloneablePtr& other) + { + CloneablePtr tmp(other); + tmp.swap(*this); + return *this; + } + + template friend class CloneablePtr; + + template + CloneablePtr(const CloneablePtr& other) : dp(other.dp), cp(other.cp) + { + AddRef(); + } + + + template + CloneablePtr& operator=(const CloneablePtr& other) + { + CloneablePtr tmp(other); + tmp.swap(*this); + return *this; + } + + + T& operator*() const { return *dp; } + T* operator->() const { return dp; } + + T* get() const { return dp; } + + void swap(CloneablePtr& other) + { + _ntl_swap(dp, other.dp); + _ntl_swap(cp, other.cp); + } + + operator fake_null_type() const + { + return dp ? &CloneablePtr::fake_null_function : 0; + } + + template + CloneablePtr DynamicCast() const + { + if (!dp) + return CloneablePtr(); + else { + Y* dp1 = dynamic_cast(dp); + if (!dp1) return CloneablePtr(); + return CloneablePtr(CloneablePtrLoopHole(), dp1, cp); + } + } + + CloneablePtr clone() const + { + if (!dp) + return CloneablePtr(); + else { + CloneablePtrControl *cp1 = cp->clone(); + char *complete = (char *) cp->get(); + char *complete1 = (char *) cp1->get(); + T *dp1 = (T *) (complete1 + (((char *)dp) - complete)); + return CloneablePtr(CloneablePtrLoopHole(), dp1, cp1); + } + } + + template + operator SmartPtr() { return SmartPtr(SmartPtrLoopHole(), dp, cp); } + +}; + + +// free swap function +template +void swap(CloneablePtr& p, CloneablePtr& q) { p.swap(q); } + +// free dynamic cast function +template +CloneablePtr DynamicCast(const CloneablePtr& p) { return p.template DynamicCast(); } + + + +// Equality testing + +template +bool operator==(const CloneablePtr& a, const CloneablePtr& b) +{ + return a.get() == b.get(); +} + + +template +bool operator!=(const CloneablePtr& a, const CloneablePtr& b) +{ + return a.get() != b.get(); +} + + + + +// ****************************************************** + +// Implementation of MakeSmart and friends + + +// Reference forwarding + +template +class ReferenceWrapper +{ +private: + T& x; +public: + ReferenceWrapper(T& _x) : x(_x) { } + operator T& () const { return x; } +}; + +template +ReferenceWrapper Fwd(T& x) { return ReferenceWrapper(x); } + +template +class ConstReferenceWrapper +{ +private: + T& x; +public: + ConstReferenceWrapper(const T& _x) : x(_x) { } + operator const T& () const { return x; } +}; + +template +ConstReferenceWrapper Fwd(const T& x) { return ConstReferenceWrapper(x); } + +template +T& UnwrapReference(const ReferenceWrapper& x) { return x; } + +template +const T& UnwrapReference(const ConstReferenceWrapper& x) { return x; } + +template +const T& UnwrapReference(const T& x) { return x; } + + + + +// Some useful macros for simulating variadic templates + +#define NTL_REPEATER_0(m) +#define NTL_REPEATER_1(m) m(1) +#define NTL_REPEATER_2(m) m(1),m(2) +#define NTL_REPEATER_3(m) m(1),m(2),m(3) +#define NTL_REPEATER_4(m) m(1),m(2),m(3),m(4) +#define NTL_REPEATER_5(m) m(1),m(2),m(3),m(4),m(5) +#define NTL_REPEATER_6(m) m(1),m(2),m(3),m(4),m(5),m(6) +#define NTL_REPEATER_7(m) m(1),m(2),m(3),m(4),m(5),m(6),m(7) +#define NTL_REPEATER_8(m) m(1),m(2),m(3),m(4),m(5),m(6),m(7),m(8) +#define NTL_REPEATER_9(m) m(1),m(2),m(3),m(4),m(5),m(6),m(7),m(8),m(9) + +#define NTL_SEPARATOR_0 +#define NTL_SEPARATOR_1 , +#define NTL_SEPARATOR_2 , +#define NTL_SEPARATOR_3 , +#define NTL_SEPARATOR_4 , +#define NTL_SEPARATOR_5 , +#define NTL_SEPARATOR_6 , +#define NTL_SEPARATOR_7 , +#define NTL_SEPARATOR_8 , +#define NTL_SEPARATOR_9 , + + +#define NTL_FOREACH_ARG(m) \ + m(0) m(1) m(2) m(3) m(4) m(5) m(6) m(7) m(8) m(9) + +#define NTL_FOREACH_ARG1(m) \ + m(1) m(2) m(3) m(4) m(5) m(6) m(7) m(8) m(9) + +// ******************************** + +#define NTL_ARGTYPE(n) class X##n +#define NTL_ARGTYPES(n) NTL_REPEATER_##n(NTL_ARGTYPE) +#define NTL_MORE_ARGTYPES(n) NTL_SEPARATOR_##n NTL_REPEATER_##n(NTL_ARGTYPE) + +#define NTL_VARARG(n) const X##n & x##n +#define NTL_VARARGS(n) NTL_REPEATER_##n(NTL_VARARG) + +#define NTL_PASSTYPE(n) X ## n +#define NTL_PASSTYPES(n) NTL_REPEATER_##n(NTL_PASSTYPE) +#define NTL_MORE_PASSTYPES(n) NTL_SEPARATOR_##n NTL_REPEATER_##n(NTL_PASSTYPE) + +#define NTL_PASSARG(n) x ## n +#define NTL_PASSARGS(n) NTL_REPEATER_##n(NTL_PASSARG) + +#define NTL_UNWRAPARG(n) UnwrapReference(x ## n) +#define NTL_UNWRAPARGS(n) NTL_REPEATER_##n(NTL_UNWRAPARG) + + + +// ******************************** + + + +#define NTL_DEFINE_MAKESMART(n) \ +template \ +class MakeSmartAux##n : public SmartPtrControl {\ +public: T d; \ +MakeSmartAux##n( NTL_VARARGS(n) ) : \ +d( NTL_UNWRAPARGS(n) ) { }\ +};\ +\ +template\ +SmartPtr MakeSmart( NTL_VARARGS(n) ) { \ + MakeSmartAux##n *cp = \ + NTL_NEW_OP MakeSmartAux##n( NTL_PASSARGS(n) ); \ + if (!cp) MemoryError();\ + return SmartPtr(SmartPtrLoopHole(), &cp->d, cp);\ +};\ + +NTL_FOREACH_ARG(NTL_DEFINE_MAKESMART) + + + + +// ******************************** + + +#define NTL_DEFINE_MAKECLONEABLE(n) \ +template \ +class MakeCloneableAux##n : public CloneablePtrControl {\ +public: T d; \ +MakeCloneableAux##n( NTL_VARARGS(n) ) : \ +d( NTL_UNWRAPARGS(n) ) { }\ +CloneablePtrControl *clone() const \ +{\ + CloneablePtrControl *q = NTL_NEW_OP CloneablePtrControlDerived(d);\ + if (!q) MemoryError();\ + return q;\ +}\ +void *get() { return &d; }\ +};\ +\ +template\ +CloneablePtr MakeCloneable( NTL_VARARGS(n) ) { \ + MakeCloneableAux##n *cp = \ + NTL_NEW_OP MakeCloneableAux##n( NTL_PASSARGS(n) ); \ + if (!cp) MemoryError();\ + return CloneablePtr(CloneablePtrLoopHole(), &cp->d, cp);\ +};\ + +NTL_FOREACH_ARG(NTL_DEFINE_MAKECLONEABLE) + + + +// ******************************** + + +#ifdef NTL_TEST_EXCEPTIONS + +#define NTL_DEFINE_MAKERAW(n)\ +template\ +T* MakeRaw(NTL_VARARGS(n)) { \ + T *p = 0; \ + if (--exception_counter != 0) p = NTL_NEW_OP T(NTL_UNWRAPARGS(n)); \ + if (!p) MemoryError();\ + return p;\ +};\ + + +#else + +#define NTL_DEFINE_MAKERAW(n)\ +template\ +T* MakeRaw(NTL_VARARGS(n)) { \ + T *p = NTL_NEW_OP T(NTL_UNWRAPARGS(n)); \ + if (!p) MemoryError();\ + return p;\ +};\ + +#endif + + +NTL_FOREACH_ARG(NTL_DEFINE_MAKERAW) + + +// ******************************** + + +#ifdef NTL_TEST_EXCEPTIONS + +template +T *MakeRawArray(long n) +{ + if (n < 0) LogicError("negative length in MakeRawArray"); + if (n == 0) return 0; + T *p = 0; + if (--exception_counter != 0) p = new T[n]; + if (!p) MemoryError(); + return p; +} + +#else + +template +T *MakeRawArray(long n) +{ + if (n < 0) LogicError("negative length in MakeRawArray"); + if (n == 0) return 0; + T *p = new T[n]; + if (!p) MemoryError(); + return p; +} + +#endif + + + +/********************************************************************** + +UniquePtr -- unique pointer to object with copying disabled. +Useful for pointers inside classes so that we can +automatically destruct them. + +Constructors: + UniquePtr p1; // initialize with null + UniquePtr p1(0); + + T* rp; + UniquePtr p1(rp); // construct using raw pointer (explicit) + + p1 = 0; // destroy's p1's referent and assigns null + + p1.make(...); // destroy's p1's referent and assigns + // a fresh objected constructed via T(...), + // using psuedo variadic templates + + p1.reset(rp); // destroy's p1's referent and assign rp + + if (!p1) ... // test for null + if (p1 == 0) ... + + if (p1) ... // test for nonnull + if (p1 != 0) ... + + if (p1 == p2) ... // test for equality + if (p1 != p2) ... + + *p1 // dereferencing + p1->... + + + rp = p1.get(); // fetch raw pointer + rp = p1.release(); // fetch raw pointer, and set to NULL + p1.move(p2); // equivalent to p1.reset(p2.release()) -- + // if p1 != p2 then: + // makes p1 point to p2's referent, + // setting p2 to NULL and destroying + // p1's referent + + p1.swap(p2); // fast swap + swap(p1, p2); + + +**********************************************************************/ + + + +template +class UniquePtr { +private: + T *dp; + + class Dummy { }; + typedef void (UniquePtr::*fake_null_type)(Dummy) const; + void fake_null_function(Dummy) const {} + + class Dummy1 { }; + typedef void (UniquePtr::*fake_null_type1)(Dummy1) const; + + bool cannot_compare_these_types() const { return false; } + + UniquePtr(const UniquePtr&); // disabled + void operator=(const UniquePtr&); // disabled + +public: + explicit UniquePtr(T *p) : dp(p) { } + + UniquePtr() : dp(0) { } + + + ~UniquePtr() { P::deleter(dp); } + + void reset(T* p = 0) + { + UniquePtr tmp(p); + tmp.swap(*this); + } + + UniquePtr& operator=(fake_null_type1) { reset(); return *this; } + + void make() + { + reset(MakeRaw()); + } + +#define NTL_DEFINE_UNIQUE_MAKE(n) \ + template< NTL_ARGTYPES(n) >\ + void make( NTL_VARARGS(n) )\ + {\ + reset(MakeRaw( NTL_PASSARGS(n) ));\ + }\ + + NTL_FOREACH_ARG1(NTL_DEFINE_UNIQUE_MAKE) + + T& operator*() const { return *dp; } + T* operator->() const { return dp; } + + T* get() const { return dp; } + + T* release() { T *p = dp; dp = 0; return p; } + void move(UniquePtr& other) { reset(other.release()); } + + void swap(UniquePtr& other) + { + _ntl_swap(dp, other.dp); + } + + + operator fake_null_type() const + { + return dp ? &UniquePtr::fake_null_function : 0; + } + +}; + + +// free swap function +template +void swap(UniquePtr& p, UniquePtr& q) { p.swap(q); } + + + +// Equality testing + +template +bool operator==(const UniquePtr& a, const UniquePtr& b) +{ + return a.get() == b.get(); +} + +template +bool operator!=(const UniquePtr& a, const UniquePtr& b) +{ + return a.get() != b.get(); +} + + +// the following definitions of == and != prevent comparisons +// on UniquePtr's to different types...such comparisons +// don't make sense...defining these here ensures the compiler +// emits an error message...and a pretty readable one + + +template +bool operator==(const UniquePtr& a, const UniquePtr& b) +{ + return a.cannot_compare_these_types(); +} + +template +bool operator!=(const UniquePtr& a, const UniquePtr& b) +{ + return a.cannot_compare_these_types(); +} + + + +/********************************************************************** + + + CopiedPtr: identical interface to UniquePtr, but copy constructor + and assignment are defined, and both are implemented using the + underlying type's copy constructor + + This provides very similar functionilty to OptionalVal, but I think + it is simpler to provide the same interface. + + It also allows some fine control of deleting and copying. + This allows for "clone on copy" as well as other things, + like a copying or cloning PIMPL pattern. + + +**********************************************************************/ + +struct DefaultCopierPolicy { + + template + static T* copier(T *p) { return (p ? MakeRaw(*p) : 0); } + +}; + +struct CloningCopier { + + template + static T* copier(T *p) { return (p ? p->clone() : 0); } + +}; + +struct DefaultCopiedPtrPolicy : DefaultDeleterPolicy, DefaultCopierPolicy { }; +struct CloningCopiedPtrPolicy : DefaultDeleterPolicy, CloningCopier { }; + +template +class CopiedPtr { +private: + T *dp; + + class Dummy { }; + typedef void (CopiedPtr::*fake_null_type)(Dummy) const; + void fake_null_function(Dummy) const {} + + class Dummy1 { }; + typedef void (CopiedPtr::*fake_null_type1)(Dummy1) const; + + bool cannot_compare_these_types() const { return false; } + +public: + explicit CopiedPtr(T *p) : dp(p) { } + + CopiedPtr() : dp(0) { } + + CopiedPtr(const CopiedPtr& other) : dp(0) + { + reset(P::copier(other.dp)); + } + + CopiedPtr& operator=(const CopiedPtr& other) + { + if (this == &other) return *this; + CopiedPtr tmp(other); + tmp.swap(*this); + return *this; + } + + ~CopiedPtr() { P::deleter(dp); } + + void reset(T* p = 0) + { + CopiedPtr tmp(p); + tmp.swap(*this); + } + + CopiedPtr& operator=(fake_null_type1) { reset(); return *this; } + + void make() + { + reset(MakeRaw()); + } + +#define NTL_DEFINE_COPIED_MAKE(n) \ + template< NTL_ARGTYPES(n) >\ + void make( NTL_VARARGS(n) )\ + {\ + reset(MakeRaw( NTL_PASSARGS(n) ));\ + }\ + + NTL_FOREACH_ARG1(NTL_DEFINE_COPIED_MAKE) + + T& operator*() const { return *dp; } + T* operator->() const { return dp; } + + T* get() const { return dp; } + + T* release() { T *p = dp; dp = 0; return p; } + void move(CopiedPtr& other) { reset(other.release()); } + + void swap(CopiedPtr& other) + { + _ntl_swap(dp, other.dp); + } + + + operator fake_null_type() const + { + return dp ? &CopiedPtr::fake_null_function : 0; + } + + + +}; + + + +// free swap function +template +void swap(CopiedPtr& p, CopiedPtr& q) { p.swap(q); } + + + +// Equality testing + +template +bool operator==(const CopiedPtr& a, const CopiedPtr& b) +{ + return a.get() == b.get(); +} + +template +bool operator!=(const CopiedPtr& a, const CopiedPtr& b) +{ + return a.get() != b.get(); +} + + +// the following definitions of == and != prevent comparisons +// on CopiedPtr's to different types...such comparisons +// don't make sense...defining these here ensures the compiler +// emits an error message...and a pretty readable one + + +template +bool operator==(const CopiedPtr& a, const CopiedPtr& b) +{ + return a.cannot_compare_these_types(); +} + +template +bool operator!=(const CopiedPtr& a, const CopiedPtr& b) +{ + return a.cannot_compare_these_types(); +} + + + + + +/********************************************************************** + +OptionalVal -- unique pointer to object with copying enabled. + +Constructors: + OptionalVal p1; // initialize with null + + T* rp; + OptionalVal p1(rp); // construct using raw pointer (explicit) + + OptionalVal p2(p1); // construct a copy of p1's referrent + + + + p1.make(...); // destroy's p1's referent and assigns + // a fresh objected constructed via T(...), + // using psuedo variadic templates + + p1.reset(rp); // destroy's p1's referent and assign rp + + + if (p1.exists()) ... // test for null + + p1.val() // dereference + + rp = p1.get(); // fetch raw pointer + rp = p1.release(); // fetch raw pointer, and set to NULL + p1.move(p2); // if p1 != p2 then: + // makes p1 point to p2's referent, + // setting p2 to NULL and destroying + // p1's referent + + p1 = p2; // if p1 != p2 then + // if p2 == NULL then + // reset p1 + // else + // p1.make(p2.val()) + + p1.swap(p2); // fast swap + swap(p1, p2); + + +**********************************************************************/ + + +template +class OptionalVal { +private: + UniquePtr dp; + +public: + explicit OptionalVal(T *p) : dp(p) { } + OptionalVal() { } + + OptionalVal(const OptionalVal& other) + { + if (other.exists()) + make(other.val()); + } + + OptionalVal& operator=(const OptionalVal& other) + { + if (this == &other) return *this; + OptionalVal tmp(other); + tmp.swap(*this); + return *this; + } + + ~OptionalVal() { } + + + void reset(T* p = 0) { dp.reset(p); } + + void make() { dp.make(); } + +#define NTL_DEFINE_OPTIONAL_VAL_MAKE(n) \ + template< NTL_ARGTYPES(n) >\ + void make( NTL_VARARGS(n) )\ + {\ + dp.make( NTL_PASSARGS(n) );\ + }\ + + NTL_FOREACH_ARG1(NTL_DEFINE_OPTIONAL_VAL_MAKE) + + T& val() const { return *dp; } + + bool exists() const { return dp != 0; } + + T* get() const { return dp.get(); } + + T* release() { return dp.release(); } + + void move(OptionalVal& other) { dp.move(other.dp); } + + void swap(OptionalVal& other) { dp.swap(other.dp); } + +}; + + +// free swap function +template +void swap(OptionalVal& p, OptionalVal& q) { p.swap(q); } + + + + + + +/********************************************************************** + +UniqueArray -- unique pointer to array of objects with copying disabled. +Useful for pointers inside classes so that we can +automatically destruct them. + +Constructors: + UniqueArray p1; // initialize with null + UniqueArray p1(0); + + T* rp; + UniqueArray p1(rp); // construct using raw pointer (explicit) + + p1 = 0; // destroy's p1's referent and assigns null + + p1.SetLength(n); // destroy's p1's referent and assigns + // a fresh objected constructed via new T[n] + + p1.reset(rp); // destroy's p1's referent and assign rp + + if (!p1) ... // test for null + if (p1 == 0) ... + + if (p1) ... // test for nonnull + if (p1 != 0) ... + + if (p1 == p2) ... // test for equality + if (p1 != p2) ... + + p1[i] // array indexing + + rp = p1.get(); // fetch raw pointer + rp = p1.release(); // fetch raw pointer, and set to NULL + p1.move(p2); // equivalent to p1.reset(p2.release()) -- + // if p1 != p2 then: + // makes p1 point to p2's referent, + // setting p2 to NULL and destroying + // p1's referent + + p1.swap(p2); // fast swap + swap(p1, p2); + + +**********************************************************************/ + + +template +class UniqueArray { +private: + T *dp; + + class Dummy { }; + typedef void (UniqueArray::*fake_null_type)(Dummy) const; + void fake_null_function(Dummy) const {} + + class Dummy1 { }; + typedef void (UniqueArray::*fake_null_type1)(Dummy1) const; + + bool cannot_compare_these_types() const { return false; } + + UniqueArray(const UniqueArray&); // disabled + void operator=(const UniqueArray&); // disabled + +public: + explicit UniqueArray(T *p) : dp(p) { } + + UniqueArray() : dp(0) { } + + ~UniqueArray() { delete[] dp; } + + + void reset(T* p = 0) + { + UniqueArray tmp(p); + tmp.swap(*this); + } + + UniqueArray& operator=(fake_null_type1) { reset(); return *this; } + + void SetLength(long n) + { + reset( MakeRawArray(n) ); + } + + T& operator[](long i) const { return dp[i]; } + + T* get() const { return dp; } + T *elts() const { return dp; } + + T* release() { T *p = dp; dp = 0; return p; } + void move(UniqueArray& other) { reset(other.release()); } + + void swap(UniqueArray& other) + { + _ntl_swap(dp, other.dp); + } + + operator fake_null_type() const + { + return dp ? &UniqueArray::fake_null_function : 0; + } + +}; + + + +// free swap function +template +void swap(UniqueArray& p, UniqueArray& q) { p.swap(q); } + + + +// Equality testing + +template +bool operator==(const UniqueArray& a, const UniqueArray& b) +{ + return a.get() == b.get(); +} + +template +bool operator!=(const UniqueArray& a, const UniqueArray& b) +{ + return a.get() != b.get(); +} + + +// the following definitions of == and != prevent comparisons +// on UniqueArray's to different types...such comparisons +// don't make sense...defining these here ensures the compiler +// emits an error message...and a pretty readable one + + +template +bool operator==(const UniqueArray& a, const UniqueArray& b) +{ + return a.cannot_compare_these_types(); +} + +template +bool operator!=(const UniqueArray& a, const UniqueArray& b) +{ + return a.cannot_compare_these_types(); +} + + + + + +/********************************************************************** + +Unique2DArray -- unique pointer to array of arrays. + +This is very similar to UniqueArray< UniqueArray >, except that +we can retrofit old code that excepts objects of type T**. + +Constructors: + Unique2DArray p1; // initialize with null + Unique2DArray p1(0); + + p1 = 0; // destroy's p1's referent and assigns null + p1.reset(); + + p1.SetLength(n); // destroy's p1's referent and assigns + // a fresh array of null pointers + + p1.SetDims(n, m) // creates an n x m array + + if (!p1) ... // test for null + if (p1 == 0) ... + + if (p1) ... // test for nonnull + if (p1 != 0) ... + + if (p1 == p2) ... // test for equality + if (p1 != p2) ... + + p1[i] // array indexing + + T **rp; + rp = p1.get(); // fetch raw pointer + rp = p1.release(); // fetch raw pointer, and set to NULL + p1.move(p2); // if p1 != p2 then: + // makes p1 point to p2's referent, + // setting p2 to NULL and destroying + // p1's referent + + p1.swap(p2); // fast swap + swap(p1, p2); + + +**********************************************************************/ + + +template +class Unique2DArray { +public: + typedef T *T_ptr; + +private: + UniqueArray dp; + long len; + + class Dummy { }; + typedef void (Unique2DArray::*fake_null_type)(Dummy) const; + void fake_null_function(Dummy) const {} + + class Dummy1 { }; + typedef void (Unique2DArray::*fake_null_type1)(Dummy1) const; + + bool cannot_compare_these_types() const { return false; } + + Unique2DArray(const Unique2DArray&); // disabled + void operator=(const Unique2DArray&); // disabled + + +public: + + Unique2DArray() : len(0) { } + + ~Unique2DArray() + { + if (dp) { + long i; + for (i = 0; i < len; i++) delete [] dp[i]; + } + } + + void reset() + { + Unique2DArray tmp; + tmp.swap(*this); + } + + Unique2DArray& operator=(fake_null_type1) { reset(); return *this; } + + void SetLength(long n) + { + UniqueArray tmp; + tmp.SetLength(n); + + long i; + for (i = 0; i < n; i++) tmp[i] = 0; + + reset(); + dp.move(tmp); + len = n; + } + + // EXCEPTIONS: strong ES + void SetDims(long n, long m) + { + Unique2DArray tmp; + tmp.SetLength(n); + + long i; + for (i = 0; i < n; i++) + tmp[i] = MakeRawArray(m); + + this->move(tmp); + } + + // EXCEPTIONS: strong ES + // This is a special-purpose routine to help + // with some legacy code...only rows 1..n-1 are allocated + void SetDimsFrom1(long n, long m) + { + Unique2DArray tmp; + tmp.SetLength(n); + + long i; + for (i = 1; i < n; i++) + tmp[i] = MakeRawArray(m); + + this->move(tmp); + } + + T_ptr& operator[](long i) const { return dp[i]; } + + T_ptr* get() const { return dp.get(); } + + T_ptr* release() { len = 0; return dp.release(); } + + + void move(Unique2DArray& other) + { + Unique2DArray tmp; + tmp.swap(other); + tmp.swap(*this); + } + + void swap(Unique2DArray& other) + { + dp.swap(other.dp); + _ntl_swap(len, other.len); + } + + operator fake_null_type() const + { + return dp ? &Unique2DArray::fake_null_function : 0; + } + +}; + + +// free swap function +template +void swap(Unique2DArray& p, Unique2DArray& q) { p.swap(q); } + + + +// Equality testing + +template +bool operator==(const Unique2DArray& a, const Unique2DArray& b) +{ + return a.get() == b.get(); +} + +template +bool operator!=(const Unique2DArray& a, const Unique2DArray& b) +{ + return a.get() != b.get(); +} + + +// the following definitions of == and != prevent comparisons +// on Unique2DArray's to different types...such comparisons +// don't make sense...defining these here ensures the compiler +// emits an error message...and a pretty readable one + + +template +bool operator==(const Unique2DArray& a, const Unique2DArray& b) +{ + return a.cannot_compare_these_types(); +} + +template +bool operator!=(const Unique2DArray& a, const Unique2DArray& b) +{ + return a.cannot_compare_these_types(); +} + + + + +// AlignedArray: +// +// specialized arrays that have similar interface to UniqueArray, but: +// * they are allocated with a given alignment +// * they (currently) only work on POD types +// +// DIRT: +// The current implementation just uses the _ntl_make_aligned function, +// which is not entirely portable. +// However, AlignedArray is currently only used if NTL_HAVE_AVX +// is defined, and under the assumptions imposed with that, +// it should definitely work. +// +// For now, this is not a part of the documented interface. + +// This could all change in the future, if and when there is a more portable +// way of doing this. + + +template +class AlignedArray { +private: + T *dp; + char *sp; + + class Dummy { }; + typedef void (AlignedArray::*fake_null_type)(Dummy) const; + void fake_null_function(Dummy) const {} + + class Dummy1 { }; + typedef void (AlignedArray::*fake_null_type1)(Dummy1) const; + + bool cannot_compare_these_types() const { return false; } + + AlignedArray(const AlignedArray&); // disabled + void operator=(const AlignedArray&); // disabled + + char* release() { char *p = sp; dp = 0; sp = 0; return p; } + + void reset(char* p) + { + AlignedArray tmp; + if (p) { + tmp.dp = (T*) _ntl_make_aligned(p, align); + tmp.sp = p; + } + else { + tmp.dp = 0; + tmp.sp = 0; + } + + tmp.swap(*this); + } + + + +public: + + AlignedArray() : dp(0), sp(0) { } + explicit AlignedArray(fake_null_type1) : dp(0), sp(0) { } + + ~AlignedArray() { NTL_SNS free(sp); } + + void reset() { reset(0); } + + AlignedArray& operator=(fake_null_type1) { reset(); return *this; } + + void SetLength(long n) + { + if (align <= 0 || n < 0) LogicError("AlignedArray::SetLength: bad args"); + if (NTL_OVERFLOW1(n, sizeof(T), align)) ResourceError("AlignedArray::SetLength: overflow"); + + if (n == 0) { + reset(); + } + else { + char *p = (char *) NTL_SNS malloc(n*sizeof(T) + align); + if (!p) MemoryError(); + reset(p); + } + } + + T& operator[](long i) const { return dp[i]; } + + T* get() const { return dp; } + T* elts() const { return dp; } + + + void move(AlignedArray& other) { reset(other.release()); } + + void swap(AlignedArray& other) + { + _ntl_swap(dp, other.dp); + _ntl_swap(sp, other.sp); + } + + operator fake_null_type() const + { + return dp ? &AlignedArray::fake_null_function : 0; + } + +}; + + +// free swap function +template +void swap(AlignedArray& p, AlignedArray& q) { p.swap(q); } + + + + + + + + + + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/WordVector.h b/thirdparty/linux/ntl/include/NTL/WordVector.h new file mode 100644 index 0000000000..ba62392501 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/WordVector.h @@ -0,0 +1,169 @@ + +#ifndef NTL_WordVector__H +#define NTL_WordVector__H + +/************************************************************** + + A WordVector is functionally similar to + a generic NTL vector of _ntl_ulong. + + Be careful! the MaxLength() function does not return + the max length ever set, but rather the max space allocated, + which *may* be more. + + The FixLength() facility is not available. + + The reason for special-casing is efficiency (of course). + +**************************************************************/ + + + +#include +#include + +NTL_OPEN_NNS + + + +#ifndef NTL_RANGE_CHECK +#define NTL_WV_RANGE_CHECK_CODE +#else +#define NTL_WV_RANGE_CHECK_CODE if (i < 0 || !rep || i >= long(rep[-1])) LogicError("index out of range in WordVector"); +#endif + +// vectors are allocated in chunks of this size + +#ifndef NTL_WordVectorMinAlloc +#define NTL_WordVectorMinAlloc (4) +#endif + +// vectors are always expanded by at least this ratio + +#ifndef NTL_WordVectorExpansionRatio +#define NTL_WordVectorExpansionRatio (1.2) +#endif + +// controls initialization during input + +#ifndef NTL_WordVectorInputBlock +#define NTL_WordVectorInputBlock 50 +#endif + + +class WordVector { +public: + _ntl_ulong *rep; + + WordVector(WordVector& x, INIT_TRANS_TYPE) { rep = x.rep; x.rep = 0; } + + + + WordVector() : rep(0) { } + WordVector(INIT_SIZE_TYPE, long n) : rep(0) { DoSetLength(n); } + WordVector(const WordVector& a) : rep(0) { *this = a; } + + WordVector& operator=(const WordVector& a); + + ~WordVector(); + void kill(); + + void KillBig() { if (MaxLength() > NTL_RELEASE_THRESH) kill(); } + // this conditinally kills the vector, if its size is excessive + + void DoSetLength(long n); + + void SetLength(long n) + { + _ntl_ulong *x = rep; + if (x && long(x[-2] >> 1) >= n && n >= 0) + x[-1] = n; + else + DoSetLength(n); + } + + void ZeroLength() { if (rep) rep[-1] = 0; } + + void SetMaxLength(long n); + void QuickSetLength(long n) { rep[-1] = _ntl_ulong(n); } + + long length() const { return (!rep) ? 0 : long(rep[-1]); } + long MaxLength() const + { return (!rep) ? 0 : long(rep[-2] >> 1); } + + _ntl_ulong& operator[](long i) + { + NTL_WV_RANGE_CHECK_CODE + return rep[i]; + } + + const _ntl_ulong& operator[](long i) const + { + NTL_WV_RANGE_CHECK_CODE + return rep[i]; + } + + _ntl_ulong& operator()(long i) { return (*this)[i-1]; } + const _ntl_ulong& operator()(long i) const { return (*this)[i-1]; } + + + const _ntl_ulong* elts() const { return rep; } + _ntl_ulong* elts() { return rep; } + + void swap(WordVector& y); + void append(_ntl_ulong a); + void append(const WordVector& w); +}; + + + + +class WordVectorWatcher { +public: + WordVector& watched; + explicit + WordVectorWatcher(WordVector& _watched) : watched(_watched) {} + + ~WordVectorWatcher() { watched.KillBig(); } +}; + + +inline void swap(WordVector& x, WordVector& y) + { x.swap(y); } + +inline void append(WordVector& v, _ntl_ulong a) + { v.append(a); } + +inline void append(WordVector& v, const WordVector& w) + { v.append(w); } + + +NTL_SNS istream& operator>>(NTL_SNS istream&, WordVector&); +NTL_SNS ostream& operator<<(NTL_SNS ostream&, const WordVector&); + + +long operator==(const WordVector& a, const WordVector& b); +long operator!=(const WordVector& a, const WordVector& b); + + +long InnerProduct(const WordVector& a, const WordVector& b); + +void ShiftAdd(_ntl_ulong *cp, const _ntl_ulong* ap, long sa, long n); +// cp = cp + (a << n) + + +long WV_BlockConstructAlloc(WordVector& x, long d, long n); + +void WV_BlockConstructSet(WordVector& x, WordVector& y, long i); + +long WV_BlockDestroy(WordVector& x); + +long WV_storage(long d); + + + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZ.h b/thirdparty/linux/ntl/include/NTL/ZZ.h new file mode 100644 index 0000000000..6b2035e9a5 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZ.h @@ -0,0 +1,1585 @@ + + +#ifndef NTL_ZZ__H +#define NTL_ZZ__H + + + + +/******************************************************** + + LIP INTERFACE + + The class ZZ implements signed, arbitrary length integers. + +**********************************************************/ + + +#include +#include +#include +#include +#include + +NTL_OPEN_NNS + + +class ZZ_p; // forward declaration +class ZZX; + +class ZZ { +public: +typedef ZZ_p residue_type; +typedef ZZX poly_type; + + + +class Deleter { +public: + static void apply(NTL_verylong& p) { NTL_zfree(&p); } +}; + +WrappedPtr rep; +// This is currently public for "emergency" situations +// May be private in future versions. + +ZZ() { } + + +explicit ZZ(long a) { *this = a; } + +ZZ(INIT_SIZE_TYPE, long k) +// initial value is 0, but space is pre-allocated so that numbers +// x with x.size() <= k can be stored without re-allocation. +// Call with ZZ(INIT_SIZE, k). +// The purpose for the INIT_SIZE argument is to prevent automatic +// type conversion from long to ZZ, which would be tempting, but wrong. + + +{ + NTL_zsetlength(&rep, k); +} + +ZZ(const ZZ& a) +// initial value is a. + +{ + NTL_zcopy(a.rep, &rep); +} + + +ZZ(INIT_VAL_TYPE, long a) { NTL_zintoz(a, &rep); } +ZZ(INIT_VAL_TYPE, int a) { NTL_zintoz(a, &rep); } + +ZZ(INIT_VAL_TYPE, unsigned long a) { NTL_zuintoz(a, &rep); } +ZZ(INIT_VAL_TYPE, unsigned int a) { NTL_zuintoz((unsigned long) a, &rep); } + +inline ZZ(INIT_VAL_TYPE, const char *); +inline ZZ(INIT_VAL_TYPE, float); +inline ZZ(INIT_VAL_TYPE, double); + + +ZZ& operator=(const ZZ& a) { NTL_zcopy(a.rep, &rep); return *this; } + +ZZ& operator=(long a) { NTL_zintoz(a, &rep); return *this; } + + +void kill() +// force the space held by this ZZ to be released. +// The value then becomes 0. + +{ rep.kill(); } + + +void swap(ZZ& x) +{ NTL_zswap(&rep, &x.rep); } + +void SetSize(long k) +// pre-allocates space for k-digit numbers (base 2^NTL_ZZ_NBITS); +// does not change the value. + +{ NTL_zsetlength(&rep, k); } + +long size() const +// returns the number of (NTL_ZZ_NBIT-bit) digits of |a|; the size of 0 is 0. + { return NTL_zsize(rep); } + +long null() const +// test of rep is null + { return !rep; } + +long MaxAlloc() const +// returns max allocation request, possibly rounded up a bit... + { return NTL_zmaxalloc(rep); } + + +long SinglePrecision() const + { return NTL_zsptest(rep); } + +// tests if less than NTL_SP_BOUND in absolute value + +long WideSinglePrecision() const + { return NTL_zwsptest(rep); } + +// tests if less than NTL_WSP_BOUND in absolute value + +static const ZZ& zero(); + + +ZZ(ZZ& x, INIT_TRANS_TYPE) { rep.swap(x.rep); } +// used to cheaply hand off memory management of return value, +// without copying, assuming compiler implements the +// "return value optimization". This is probably obsolete by +// now, as modern compilers can and should optimize +// the copy constructor in the situations where this is used. +// This should only be used for simple, local variables +// that are not be subject to special memory management. + + +// mainly for internal consumption by ZZWatcher + +void KillBig() { if (MaxAlloc() > NTL_RELEASE_THRESH) kill(); } + +}; + + +class ZZWatcher { +public: + ZZ& watched; + explicit + ZZWatcher(ZZ& _watched) : watched(_watched) {} + + ~ZZWatcher() { watched.KillBig(); } +}; + +#define NTL_ZZRegister(x) NTL_TLS_LOCAL(ZZ, x); ZZWatcher _WATCHER__ ## x(x) + + + + + +const ZZ& ZZ_expo(long e); + + +inline void clear(ZZ& x) +// x = 0 + + { NTL_zzero(&x.rep); } + +inline void set(ZZ& x) +// x = 1 + + { NTL_zone(&x.rep); } + + +inline void swap(ZZ& x, ZZ& y) +// swap the values of x and y (swaps pointers only) + + { x.swap(y); } + + +inline double log(const ZZ& a) + { return NTL_zlog(a.rep); } + + + + +/********************************************************** + + Conversion routines. + +***********************************************************/ + + + +inline void conv(ZZ& x, const ZZ& a) { x = a; } +inline ZZ to_ZZ(const ZZ& a) { return a; } + +inline void conv(ZZ& x, long a) { NTL_zintoz(a, &x.rep); } +inline ZZ to_ZZ(long a) { return ZZ(INIT_VAL, a); } + +inline void conv(ZZ& x, int a) { NTL_zintoz(long(a), &x.rep); } +inline ZZ to_ZZ(int a) { return ZZ(INIT_VAL, a); } + +inline void conv(ZZ& x, unsigned long a) { NTL_zuintoz(a, &x.rep); } +inline ZZ to_ZZ(unsigned long a) { return ZZ(INIT_VAL, a); } + +inline void conv(ZZ& x, unsigned int a) { NTL_zuintoz((unsigned long)(a), &x.rep); } +inline ZZ to_ZZ(unsigned int a) { return ZZ(INIT_VAL, a); } + +void conv(ZZ& x, const char *s); +inline ZZ::ZZ(INIT_VAL_TYPE, const char *s) { conv(*this, s); } +inline ZZ to_ZZ(const char *s) { return ZZ(INIT_VAL, s); } + +inline void conv(ZZ& x, double a) { NTL_zdoubtoz(a, &x.rep); } +inline ZZ::ZZ(INIT_VAL_TYPE, double a) { conv(*this, a); } +inline ZZ to_ZZ(double a) { return ZZ(INIT_VAL, a); } + +inline void conv(ZZ& x, float a) { NTL_zdoubtoz(double(a), &x.rep); } +inline ZZ::ZZ(INIT_VAL_TYPE, float a) { conv(*this, a); } +inline ZZ to_ZZ(float a) { return ZZ(INIT_VAL, a); } + +inline void conv(long& x, const ZZ& a) { x = NTL_ztoint(a.rep); } +inline long to_long(const ZZ& a) { return NTL_ztoint(a.rep); } + +inline void conv(int& x, const ZZ& a) + { unsigned int res = (unsigned int) NTL_ztouint(a.rep); + x = NTL_UINT_TO_INT(res); } + +inline int to_int(const ZZ& a) + { unsigned int res = (unsigned int) NTL_ztouint(a.rep); + return NTL_UINT_TO_INT(res); } + +inline void conv(unsigned long& x, const ZZ& a) { x = NTL_ztouint(a.rep); } +inline unsigned long to_ulong(const ZZ& a) { return NTL_ztouint(a.rep); } + +inline void conv(unsigned int& x, const ZZ& a) + { x = (unsigned int)(NTL_ztouint(a.rep)); } +inline unsigned int to_uint(const ZZ& a) + { return (unsigned int)(NTL_ztouint(a.rep)); } + +inline void conv(double& x, const ZZ& a) { x = NTL_zdoub(a.rep); } +inline double to_double(const ZZ& a) { return NTL_zdoub(a.rep); } + +inline void conv(float& x, const ZZ& a) { x = float(NTL_zdoub(a.rep)); } +inline float to_float(const ZZ& a) { return float(NTL_zdoub(a.rep)); } + +inline void ZZFromBytes(ZZ& x, const unsigned char *p, long n) + { NTL_zfrombytes(&x.rep, p, n); } + +inline ZZ ZZFromBytes(const unsigned char *p, long n) + { ZZ x; ZZFromBytes(x, p, n); NTL_OPT_RETURN(ZZ, x); } + +inline void BytesFromZZ(unsigned char *p, const ZZ& a, long n) + { NTL_zbytesfromz(p, a.rep, n); } + + + + +// ****** comparisons + + +inline long sign(const ZZ& a) +// returns the sign of a (-1, 0, or 1). + + { return NTL_zsign(a.rep); } + + +inline long compare(const ZZ& a, const ZZ& b) +// returns the sign of a-b (-1, 0, or 1). + +{ + return NTL_zcompare(a.rep, b.rep); +} + +inline long IsZero(const ZZ& a) +// zero test + + { return NTL_ziszero(a.rep); } + + +inline long IsOne(const ZZ& a) + { return NTL_zisone(a.rep); } +// test for 1 + + +/* the usual comparison operators */ + +inline long operator==(const ZZ& a, const ZZ& b) + { return NTL_zcompare(a.rep, b.rep) == 0; } +inline long operator!=(const ZZ& a, const ZZ& b) + { return NTL_zcompare(a.rep, b.rep) != 0; } +inline long operator<(const ZZ& a, const ZZ& b) + { return NTL_zcompare(a.rep, b.rep) < 0; } +inline long operator>(const ZZ& a, const ZZ& b) + { return NTL_zcompare(a.rep, b.rep) > 0; } +inline long operator<=(const ZZ& a, const ZZ& b) + { return NTL_zcompare(a.rep, b.rep) <= 0; } +inline long operator>=(const ZZ& a, const ZZ& b) + { return NTL_zcompare(a.rep, b.rep) >= 0; } + +/* single-precision versions of the above */ + +inline long compare(const ZZ& a, long b) { return NTL_zscompare(a.rep, b); } +inline long compare(long a, const ZZ& b) { return -NTL_zscompare(b.rep, a); } + +inline long operator==(const ZZ& a, long b) { return NTL_zscompare(a.rep, b) == 0; } +inline long operator!=(const ZZ& a, long b) { return NTL_zscompare(a.rep, b) != 0; } +inline long operator<(const ZZ& a, long b) { return NTL_zscompare(a.rep, b) < 0; } +inline long operator>(const ZZ& a, long b) { return NTL_zscompare(a.rep, b) > 0; } +inline long operator<=(const ZZ& a, long b) { return NTL_zscompare(a.rep, b) <= 0; } +inline long operator>=(const ZZ& a, long b) { return NTL_zscompare(a.rep, b) >= 0; } + + +inline long operator==(long a, const ZZ& b) { return b == a; } +inline long operator!=(long a, const ZZ& b) { return b != a; } +inline long operator<(long a, const ZZ& b) { return b > a; } +inline long operator>(long a, const ZZ& b) { return b < a; } +inline long operator<=(long a, const ZZ& b) { return b >= a; } +inline long operator>=(long a, const ZZ& b) { return b <= a; } + +/************************************************** + + Addition + +**************************************************/ + + +inline void add(ZZ& x, const ZZ& a, const ZZ& b) +// x = a + b + + { NTL_zadd(a.rep, b.rep, &x.rep); } + +inline void sub(ZZ& x, const ZZ& a, const ZZ& b) +// x = a - b + + { NTL_zsub(a.rep, b.rep, &x.rep); } + +inline void SubPos(ZZ& x, const ZZ& a, const ZZ& b) +// x = a - b; assumes a >= b >= 0. + + { NTL_zsubpos(a.rep, b.rep, &x.rep); } + +inline void negate(ZZ& x, const ZZ& a) +// x = -a + + { NTL_zcopy(a.rep, &x.rep); NTL_znegate(&x.rep); } + +inline void abs(ZZ& x, const ZZ& a) +// x = |a| +{ NTL_zcopy(a.rep, &x.rep); NTL_zabs(&x.rep); } + + +/* single-precision versions of the above */ + +inline void add(ZZ& x, const ZZ& a, long b) + { NTL_zsadd(a.rep, b, &x.rep); } + +inline void add(ZZ& x, long a, const ZZ& b) { add(x, b, a); } + + +void sub(ZZ& x, const ZZ& a, long b); +void sub(ZZ& x, long a, const ZZ& b); + +/* operator/function notation */ + +inline ZZ operator+(const ZZ& a, const ZZ& b) + { ZZ x; add(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator+(const ZZ& a, long b) + { ZZ x; add(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator+(long a, const ZZ& b) + { ZZ x; add(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator-(const ZZ& a, const ZZ& b) + { ZZ x; sub(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator-(const ZZ& a, long b) + { ZZ x; sub(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator-(long a, const ZZ& b) + { ZZ x; sub(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator-(const ZZ& a) + { ZZ x; negate(x, a); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ abs(const ZZ& a) + { ZZ x; abs(x, a); NTL_OPT_RETURN(ZZ, x); } + +/* op= notation */ + +inline ZZ& operator+=(ZZ& x, const ZZ& a) + { add(x, x, a); return x; } + +inline ZZ& operator+=(ZZ& x, long a) + { add(x, x, a); return x; } + +inline ZZ& operator-=(ZZ& x, const ZZ& a) + { sub(x, x, a); return x; } + +inline ZZ& operator-=(ZZ& x, long a) + { sub(x, x, a); return x; } + +/* inc/dec */ + +inline ZZ& operator++(ZZ& x) { add(x, x, 1); return x; } + +inline void operator++(ZZ& x, int) { add(x, x, 1); } + +inline ZZ& operator--(ZZ& x) { add(x, x, -1); return x; } + +inline void operator--(ZZ& x, int) { add(x, x, -1); } + + + +/******************************************************* + + Multiplication. + +********************************************************/ + + +inline void mul(ZZ& x, const ZZ& a, const ZZ& b) +// x = a * b + + { NTL_zmul(a.rep, b.rep, &x.rep); } + + +inline void sqr(ZZ& x, const ZZ& a) +// x = a*a + + { NTL_zsq(a.rep, &x.rep); } + +inline ZZ sqr(const ZZ& a) + { ZZ x; sqr(x, a); NTL_OPT_RETURN(ZZ, x); } + + +/* single-precision versions */ + +inline void mul(ZZ& x, const ZZ& a, long b) + { NTL_zsmul(a.rep, b, &x.rep); } + +inline void mul(ZZ& x, long a, const ZZ& b) + { mul(x, b, a); } + +/* operator notation */ + +inline ZZ operator*(const ZZ& a, const ZZ& b) + { ZZ x; mul(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator*(const ZZ& a, long b) + { ZZ x; mul(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator*(long a, const ZZ& b) + { ZZ x; mul(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +/* op= notation */ + +inline ZZ& operator*=(ZZ& x, const ZZ& a) + { mul(x, x, a); return x; } + +inline ZZ& operator*=(ZZ& x, long a) + { mul(x, x, a); return x; } + +// x += a*b + +inline void +MulAddTo(ZZ& x, const ZZ& a, long b) +{ + NTL_zsaddmul(a.rep, b, &x.rep); +} + +inline void +MulAddTo(ZZ& x, const ZZ& a, const ZZ& b) +{ + NTL_zaddmul(a.rep, b.rep, &x.rep); +} + +// x -= a*b + +inline void +MulSubFrom(ZZ& x, const ZZ& a, long b) +{ + NTL_zssubmul(a.rep, b, &x.rep); +} + +inline void +MulSubFrom(ZZ& x, const ZZ& a, const ZZ& b) +{ + NTL_zsubmul(a.rep, b.rep, &x.rep); +} + + + +// Special routines for implementing CRT in ZZ_pX arithmetic +// These are verbose, but fairly boilerplate + + + +class ZZ_CRTStructAdapter; +class ZZ_RemStructAdapter; + +class ZZ_TmpVecAdapter { +public: + UniquePtr<_ntl_tmp_vec> rep; + + inline void fetch(const ZZ_CRTStructAdapter&); + inline void fetch(ZZ_CRTStructAdapter&); + inline void fetch(const ZZ_RemStructAdapter&); +}; + + +class ZZ_CRTStructAdapter { +public: + UniquePtr<_ntl_crt_struct> rep; + + void init(long n, const ZZ& p, long (*primes)(long)) + { + rep.reset(_ntl_crt_struct_build(n, p.rep, primes)); + } + + void insert(long i, const ZZ& m) + { + rep->insert(i, m.rep); + } + + void eval(ZZ& t, const long *a, ZZ_TmpVecAdapter& tmp_vec) const + { + rep->eval(&t.rep, a, tmp_vec.rep.get()); + } + + bool special() const + { + return rep->special(); + } +}; + + +class ZZ_RemStructAdapter { +public: + UniquePtr<_ntl_rem_struct> rep; + + void init(long n, const ZZ& p, long (*primes)(long)) + { + rep.reset(_ntl_rem_struct_build(n, p.rep, primes)); + } + + void eval(long *x, const ZZ& a, ZZ_TmpVecAdapter& tmp_vec) const + { + rep->eval(x, a.rep, tmp_vec.rep.get()); + } +}; + + +inline void ZZ_TmpVecAdapter::fetch(const ZZ_CRTStructAdapter& crt_struct) +{ + rep.reset(crt_struct.rep->fetch()); +} + +inline void ZZ_TmpVecAdapter::fetch(ZZ_CRTStructAdapter& crt_struct) +{ + rep.reset(crt_struct.rep->extract()); // EXTRACT!! +} + + +inline void ZZ_TmpVecAdapter::fetch(const ZZ_RemStructAdapter& rem_struct) +{ + rep.reset(rem_struct.rep->fetch()); +} + + +// montgomery +class ZZ_ReduceStructAdapter { +public: + UniquePtr<_ntl_reduce_struct> rep; + + void init(const ZZ& p, const ZZ& excess) + { + rep.reset(_ntl_reduce_struct_build(p.rep, excess.rep)); + } + + void eval(ZZ& x, ZZ& a) const + { + rep->eval(&x.rep, &a.rep); + } + + void adjust(ZZ& x) const + { + rep->adjust(&x.rep); + } +}; + + + +/******************************************************* + + Division + +*******************************************************/ + + +inline void DivRem(ZZ& q, ZZ& r, const ZZ& a, const ZZ& b) +// q = [a/b], r = a - b*q +// |r| < |b|, and if r != 0, sign(r) = sign(b) + + { NTL_zdiv(a.rep, b.rep, &q.rep, &r.rep); } + + + +inline void div(ZZ& q, const ZZ& a, const ZZ& b) +// q = a/b + + { NTL_zdiv(a.rep, b.rep, &q.rep, 0); } + +inline void rem(ZZ& r, const ZZ& a, const ZZ& b) +// r = a%b + + { NTL_zmod(a.rep, b.rep, &r.rep); } + + +inline void QuickRem(ZZ& r, const ZZ& b) +// r = r%b +// assumes b > 0 and r >=0 +// division is performed in place and may cause r to be re-allocated. + + { NTL_zquickmod(&r.rep, b.rep); } + +long divide(ZZ& q, const ZZ& a, const ZZ& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0. + +long divide(const ZZ& a, const ZZ& b); +// if b | a, returns 1; otherwise returns 0. + + +/* non-standard single-precision versions */ + +inline long DivRem(ZZ& q, const ZZ& a, long b) + { return NTL_zsdiv(a.rep, b, &q.rep); } + +inline long rem(const ZZ& a, long b) + { return NTL_zsmod(a.rep, b); } + + +/* single precision versions */ + +inline void div(ZZ& q, const ZZ& a, long b) + { (void) NTL_zsdiv(a.rep, b, &q.rep); } + + +long divide(ZZ& q, const ZZ& a, long b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0. + +long divide(const ZZ& a, long b); +// if b | a, returns 1; otherwise returns 0. + + +inline ZZ operator/(const ZZ& a, const ZZ& b) + { ZZ x; div(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator/(const ZZ& a, long b) + { ZZ x; div(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator%(const ZZ& a, const ZZ& b) + { ZZ x; rem(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline long operator%(const ZZ& a, long b) + { return rem(a, b); } + +inline ZZ& operator/=(ZZ& x, const ZZ& b) + { div(x, x, b); return x; } + +inline ZZ& operator/=(ZZ& x, long b) + { div(x, x, b); return x; } + +inline ZZ& operator%=(ZZ& x, const ZZ& b) + { rem(x, x, b); return x; } + + +// preconditioned single-precision variant +// not documented for now... + + + + +struct sp_ZZ_reduce_struct_policy { + + static + void deleter(_ntl_general_rem_one_struct *pinfo) + { + _ntl_general_rem_one_struct_delete(pinfo); + } + +}; + +struct sp_ZZ_reduce_struct { + long p; + UniquePtr<_ntl_general_rem_one_struct,sp_ZZ_reduce_struct_policy> pinfo; + + sp_ZZ_reduce_struct() : p(0) { } + + void build(long _p) + { + pinfo.reset(_ntl_general_rem_one_struct_build(_p)); + p = _p; + } + + long rem(const ZZ& a) const + { + return _ntl_general_rem_one_struct_apply(a.rep, p, pinfo.get()); + } +}; + + + +/********************************************************** + + GCD's + +***********************************************************/ + + +inline void GCD(ZZ& d, const ZZ& a, const ZZ& b) +// d = gcd(a, b) + + { NTL_zgcd(a.rep, b.rep, &d.rep); } + +inline ZZ GCD(const ZZ& a, const ZZ& b) + { ZZ x; GCD(x, a, b); NTL_OPT_RETURN(ZZ, x); } + + +inline void XGCD(ZZ& d, ZZ& s, ZZ& t, const ZZ& a, const ZZ& b) +// d = gcd(a, b) = a*s + b*t; + + { NTL_zexteucl(a.rep, &s.rep, b.rep, &t.rep, &d.rep); } + +// single-precision versions +long GCD(long a, long b); + +void XGCD(long& d, long& s, long& t, long a, long b); + + + + + + + +/************************************************************ + + Bit Operations + +*************************************************************/ + + +inline void LeftShift(ZZ& x, const ZZ& a, long k) +// x = (a << k), k < 0 => RightShift + + { NTL_zlshift(a.rep, k, &x.rep); } + +inline ZZ LeftShift(const ZZ& a, long k) + { ZZ x; LeftShift(x, a, k); NTL_OPT_RETURN(ZZ, x); } + + +inline void RightShift(ZZ& x, const ZZ& a, long k) +// x = (a >> k), k < 0 => LeftShift + + { NTL_zrshift(a.rep, k, &x.rep); } + +inline ZZ RightShift(const ZZ& a, long k) + { ZZ x; RightShift(x, a, k); NTL_OPT_RETURN(ZZ, x); } + +#ifndef NTL_TRANSITION + +inline ZZ operator>>(const ZZ& a, long n) + { ZZ x; RightShift(x, a, n); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator<<(const ZZ& a, long n) + { ZZ x; LeftShift(x, a, n); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ& operator<<=(ZZ& x, long n) + { LeftShift(x, x, n); return x; } + +inline ZZ& operator>>=(ZZ& x, long n) + { RightShift(x, x, n); return x; } + +#endif + + +inline long MakeOdd(ZZ& x) +// removes factors of 2 from x, returns the number of 2's removed +// returns 0 if x == 0 + + { return NTL_zmakeodd(&x.rep); } + +inline long NumTwos(const ZZ& x) +// returns max e such that 2^e divides x if x != 0, and returns 0 if x == 0. + + { return NTL_znumtwos(x.rep); } + + +inline long IsOdd(const ZZ& a) +// returns 1 if a is odd, otherwise 0 + + { return NTL_zodd(a.rep); } + + +inline long NumBits(const ZZ& a) +// returns the number of bits in |a|; NumBits(0) = 0 + { return NTL_z2log(a.rep); } + + + +inline long bit(const ZZ& a, long k) +// returns bit k of a, 0 being the low-order bit + + { return NTL_zbit(a.rep, k); } + +#ifndef NTL_GMP_LIP + +// only defined for the "classic" long integer package, for backward +// compatability. + +inline long digit(const ZZ& a, long k) + { return NTL_zdigit(a.rep, k); } + +#endif + +// returns k-th digit of |a|, 0 being the low-order digit. + +inline void trunc(ZZ& x, const ZZ& a, long k) +// puts k low order bits of |a| into x + + { NTL_zlowbits(a.rep, k, &x.rep); } + +inline ZZ trunc_ZZ(const ZZ& a, long k) + { ZZ x; trunc(x, a, k); NTL_OPT_RETURN(ZZ, x); } + +inline long trunc_long(const ZZ& a, long k) +// returns k low order bits of |a| + + { return NTL_zslowbits(a.rep, k); } + +inline long SetBit(ZZ& x, long p) +// returns original value of p-th bit of |a|, and replaces +// p-th bit of a by 1 if it was zero; +// error if p < 0 + + { return NTL_zsetbit(&x.rep, p); } + +inline long SwitchBit(ZZ& x, long p) +// returns original value of p-th bit of |a|, and switches +// the value of p-th bit of a; +// p starts counting at 0; +// error if p < 0 + + { return NTL_zswitchbit(&x.rep, p); } + +inline long weight(long a) +// returns Hamming weight of |a| + + { return NTL_zweights(a); } + +inline long weight(const ZZ& a) +// returns Hamming weight of |a| + + { return NTL_zweight(a.rep); } + +inline void bit_and(ZZ& x, const ZZ& a, const ZZ& b) +// x = |a| AND |b| + + { NTL_zand(a.rep, b.rep, &x.rep); } + +void bit_and(ZZ& x, const ZZ& a, long b); +inline void bit_and(ZZ& x, long a, const ZZ& b) + { bit_and(x, b, a); } + + +inline void bit_or(ZZ& x, const ZZ& a, const ZZ& b) +// x = |a| OR |b| + + { NTL_zor(a.rep, b.rep, &x.rep); } + +void bit_or(ZZ& x, const ZZ& a, long b); +inline void bit_or(ZZ& x, long a, const ZZ& b) + { bit_or(x, b, a); } + +inline void bit_xor(ZZ& x, const ZZ& a, const ZZ& b) +// x = |a| XOR |b| + + { NTL_zxor(a.rep, b.rep, &x.rep); } + +void bit_xor(ZZ& x, const ZZ& a, long b); +inline void bit_xor(ZZ& x, long a, const ZZ& b) + { bit_xor(x, b, a); } + + +inline ZZ operator&(const ZZ& a, const ZZ& b) + { ZZ x; bit_and(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator&(const ZZ& a, long b) + { ZZ x; bit_and(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator&(long a, const ZZ& b) + { ZZ x; bit_and(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator|(const ZZ& a, const ZZ& b) + { ZZ x; bit_or(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator|(const ZZ& a, long b) + { ZZ x; bit_or(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator|(long a, const ZZ& b) + { ZZ x; bit_or(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator^(const ZZ& a, const ZZ& b) + { ZZ x; bit_xor(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator^(const ZZ& a, long b) + { ZZ x; bit_xor(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ operator^(long a, const ZZ& b) + { ZZ x; bit_xor(x, a, b); NTL_OPT_RETURN(ZZ, x); } + +inline ZZ& operator&=(ZZ& x, const ZZ& b) + { bit_and(x, x, b); return x; } + +inline ZZ& operator&=(ZZ& x, long b) + { bit_and(x, x, b); return x; } + +inline ZZ& operator|=(ZZ& x, const ZZ& b) + { bit_or(x, x, b); return x; } + +inline ZZ& operator|=(ZZ& x, long b) + { bit_or(x, x, b); return x; } + +inline ZZ& operator^=(ZZ& x, const ZZ& b) + { bit_xor(x, x, b); return x; } + +inline ZZ& operator^=(ZZ& x, long b) + { bit_xor(x, x, b); return x; } + + + +long NumBits(long a); + +long bit(long a, long k); + +long NextPowerOfTwo(long m); +// returns least nonnegative k such that 2^k >= m + +inline +long NumBytes(const ZZ& a) + { return (NumBits(a)+7)/8; } + +inline +long NumBytes(long a) + { return (NumBits(a)+7)/8; } + + + +/*********************************************************** + + Some specialized routines + +************************************************************/ + + +inline long ZZ_BlockConstructAlloc(ZZ& x, long d, long n) + { return NTL_zblock_construct_alloc(&x.rep, d, n); } + +inline void ZZ_BlockConstructSet(ZZ& x, ZZ& y, long i) + { NTL_zblock_construct_set(x.rep, &y.rep, i); } + +inline long ZZ_BlockDestroy(ZZ& x) + { return NTL_zblock_destroy(x.rep); } + +inline long ZZ_storage(long d) + { return NTL_zblock_storage(d); } + +inline long ZZ_RoundCorrection(const ZZ& a, long k, long residual) + { return NTL_zround_correction(a.rep, k, residual); } + + +/*********************************************************** + + Psuedo-random Numbers + +************************************************************/ + + +// ================ NEW PRG STUFF ================= + + +// Low-level key-derivation + + +void DeriveKey(unsigned char *key, long klen, + const unsigned char *data, long dlen); + + + +// Low-level chacha stuff + +#define NTL_PRG_KEYLEN (32) + +class RandomStream { +private: + _ntl_uint32 state[16]; + unsigned char buf[64]; + long pos; + + void do_get(unsigned char *res, long n); + +public: + explicit + RandomStream(const unsigned char *key); + + // No default constructor + // default copy and assignment + + void get(unsigned char *res, long n) + { + // optimize short reads + if (n >= 0 && n <= 64-pos) { + long i; + for (i = 0; i < n; i++) { + res[i] = buf[pos+i]; + } + pos += n; + } + else { + do_get(res, n); + } + } + +}; + + + + +RandomStream& GetCurrentRandomStream(); +// get reference to the current random by stream -- +// if SetSeed has not been called, it is called with +// a default value (which should be unique to each +// process/thread + + +void SetSeed(const ZZ& s); +void SetSeed(const unsigned char *data, long dlen); +void SetSeed(const RandomStream& s); +// initialize random number generator +// in the first two version, a PRG key is derived from +// the data using DeriveKey. + + +// RAII for saving/restoring current state of PRG + +class RandomStreamPush { +private: + RandomStream saved; + + RandomStreamPush(const RandomStreamPush&); // disable + void operator=(const RandomStreamPush&); // disable + +public: + RandomStreamPush() : saved(GetCurrentRandomStream()) { } + ~RandomStreamPush() { SetSeed(saved); } + +}; + + + + +void RandomBnd(ZZ& x, const ZZ& n); +// x = "random number" in the range 0..n-1, or 0 if n <= 0 + +inline ZZ RandomBnd(const ZZ& n) + { ZZ x; RandomBnd(x, n); NTL_OPT_RETURN(ZZ, x); } + + +void RandomLen(ZZ& x, long NumBits); +// x = "random number" with precisely NumBits bits. + + +inline ZZ RandomLen_ZZ(long NumBits) + { ZZ x; RandomLen(x, NumBits); NTL_OPT_RETURN(ZZ, x); } + + +void RandomBits(ZZ& x, long NumBits); +// x = "random number", 0 <= x < 2^NumBits + +inline ZZ RandomBits_ZZ(long NumBits) + { ZZ x; RandomBits(x, NumBits); NTL_OPT_RETURN(ZZ, x); } + + +// single-precision version of the above + +long RandomBnd(long n); +inline void RandomBnd(long& x, long n) { x = RandomBnd(n); } + +long RandomLen_long(long l); +inline void RandomLen(long& x, long l) { x = RandomLen_long(l); } + +long RandomBits_long(long l); +inline void RandomBits(long& x, long l) { x = RandomBits_long(l); } + + +// specialty routines + +unsigned long RandomWord(); +unsigned long RandomBits_ulong(long l); + +// helper class to make generating small random numbers faster +// FIXME: add documentation? + +struct RandomBndGenerator { + + long p; + long nb; + unsigned long mask; + + RandomStream *str; + + RandomBndGenerator() : p(0) { } + + explicit + RandomBndGenerator(long _p) : p(0) { build(_p); } + + void build(long _p) + { + if (_p <= 1) LogicError("RandomBndGenerator::init: bad args"); + + if (!p) { + str = &GetCurrentRandomStream(); + } + + p = _p; + long l = NumBits(p-1); + nb = (l+7)/8; + mask = (1UL << l)-1UL; + } + + long next() + { + unsigned char buf[NTL_BITS_PER_LONG/8]; + long tmp; + + do { + str->get(buf, nb); + + unsigned long word = 0; + for (long i = nb-1; i >= 0; i--) word = (word << 8) | buf[i]; + + tmp = long(word & mask); + } while (tmp >= p); + + return tmp; + } +}; + + +inline void VectorRandomBnd(long k, long* x, long n) +{ + if (k <= 0) return; + if (n <= 1) { + for (long i = 0; i < k; i++) x[i] = 0; + } + else { + RandomBndGenerator gen(n); + for (long i = 0; i < k; i++) x[i] = gen.next(); + } +} + + + +/********************************************************** + + Incremental Chinese Remaindering + +***********************************************************/ + +long CRT(ZZ& a, ZZ& p, const ZZ& A, const ZZ& P); +long CRT(ZZ& a, ZZ& p, long A, long P); +// 0 <= A < P, (p, P) = 1; +// computes b such that b = a mod p, b = A mod p, +// and -p*P/2 < b <= p*P/2; +// sets a = b, p = p*P, and returns 1 if a's value +// has changed, otherwise 0 + +inline long CRTInRange(const ZZ& gg, const ZZ& aa) + { return NTL_zcrtinrange(gg.rep, aa.rep); } + +// an auxilliary routine used by newer CRT routines to maintain +// backward compatability. + +// test if a > 0 and -a/2 < g <= a/2 +// this is "hand crafted" so as not too waste too much time +// in the CRT routines. + + + +/********************************************************** + + Rational Reconstruction + +***********************************************************/ + +inline +long ReconstructRational(ZZ& a, ZZ& b, const ZZ& u, const ZZ& m, + const ZZ& a_bound, const ZZ& b_bound) +{ + return NTL_zxxratrecon(u.rep, m.rep, a_bound.rep, b_bound.rep, &a.rep, &b.rep); + +} + + + + +/************************************************************ + + Primality Testing + +*************************************************************/ + + +void GenPrime(ZZ& n, long l, long err = 80); +inline ZZ GenPrime_ZZ(long l, long err = 80) +{ ZZ x; GenPrime(x, l, err); NTL_OPT_RETURN(ZZ, x); } + +long GenPrime_long(long l, long err = 80); +// This generates a random prime n of length l so that the +// probability of erroneously returning a composite is bounded by 2^(-err). + +void GenGermainPrime(ZZ& n, long l, long err = 80); +inline ZZ GenGermainPrime_ZZ(long l, long err = 80) +{ ZZ x; GenGermainPrime(x, l, err); NTL_OPT_RETURN(ZZ, x); } + +long GenGermainPrime_long(long l, long err = 80); +// This generates a random prime n of length l so that the + + +long ProbPrime(const ZZ& n, long NumTrials = 10); +// tests if n is prime; performs a little trial division, +// followed by a single-precision MillerWitness test, followed by +// up to NumTrials general MillerWitness tests. + +long MillerWitness(const ZZ& n, const ZZ& w); +// Tests if w is a witness to primality a la Miller. +// Assumption: n is odd and positive, 0 <= w < n. + +void RandomPrime(ZZ& n, long l, long NumTrials=10); +// n = random l-bit prime + +inline ZZ RandomPrime_ZZ(long l, long NumTrials=10) + { ZZ x; RandomPrime(x, l, NumTrials); NTL_OPT_RETURN(ZZ, x); } + +void NextPrime(ZZ& n, const ZZ& m, long NumTrials=10); +// n = smallest prime >= m. + +inline ZZ NextPrime(const ZZ& m, long NumTrials=10) + { ZZ x; NextPrime(x, m, NumTrials); NTL_OPT_RETURN(ZZ, x); } + +// single-precision versions + +long ProbPrime(long n, long NumTrials = 10); + + +long RandomPrime_long(long l, long NumTrials=10); + +long NextPrime(long l, long NumTrials=10); + + +/************************************************************ + + Exponentiation + +*************************************************************/ + +inline void power(ZZ& x, const ZZ& a, long e) + { NTL_zexp(a.rep, e, &x.rep); } + +inline ZZ power(const ZZ& a, long e) + { ZZ x; power(x, a, e); NTL_OPT_RETURN(ZZ, x); } + +inline void power(ZZ& x, long a, long e) + { NTL_zexps(a, e, &x.rep); } + +inline ZZ power_ZZ(long a, long e) + { ZZ x; power(x, a, e); NTL_OPT_RETURN(ZZ, x); } + +long power_long(long a, long e); + +void power2(ZZ& x, long e); + +inline ZZ power2_ZZ(long e) + { ZZ x; power2(x, e); NTL_OPT_RETURN(ZZ, x); } + + + + + +/************************************************************* + + Square Roots + +**************************************************************/ + + + + +inline void SqrRoot(ZZ& x, const ZZ& a) +// x = [a^{1/2}], a >= 0 + +{ + NTL_zsqrt(a.rep, &x.rep); +} + +inline ZZ SqrRoot(const ZZ& a) + { ZZ x; SqrRoot(x, a); NTL_OPT_RETURN(ZZ, x); } + + +inline long SqrRoot(long a) { return NTL_zsqrts(a); } +// single-precision version + + + +/*************************************************************** + + Modular Arithmetic + +***************************************************************/ + +// The following routines perform arithmetic mod n, n positive. +// All args (other than exponents) are assumed to be in the range 0..n-1. + + + +inline void AddMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n) +// x = (a+b)%n + { NTL_zaddmod(a.rep, b.rep, n.rep, &x.rep); } + + +inline ZZ AddMod(const ZZ& a, const ZZ& b, const ZZ& n) + { ZZ x; AddMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + +inline void SubMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n) +// x = (a-b)%n + + { NTL_zsubmod(a.rep, b.rep, n.rep, &x.rep); } + +inline ZZ SubMod(const ZZ& a, const ZZ& b, const ZZ& n) + { ZZ x; SubMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + +inline void NegateMod(ZZ& x, const ZZ& a, const ZZ& n) +// x = -a % n + + { NTL_zsubmod(0, a.rep, n.rep, &x.rep); } + +inline ZZ NegateMod(const ZZ& a, const ZZ& n) + { ZZ x; NegateMod(x, a, n); NTL_OPT_RETURN(ZZ, x); } + +void AddMod(ZZ& x, const ZZ& a, long b, const ZZ& n); +inline ZZ AddMod(const ZZ& a, long b, const ZZ& n) + { ZZ x; AddMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + +inline void AddMod(ZZ& x, long a, const ZZ& b, const ZZ& n) + { AddMod(x, b, a, n); } +inline ZZ AddMod(long a, const ZZ& b, const ZZ& n) + { ZZ x; AddMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + +void SubMod(ZZ& x, const ZZ& a, long b, const ZZ& n); +inline ZZ SubMod(const ZZ& a, long b, const ZZ& n) + { ZZ x; SubMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + +void SubMod(ZZ& x, long a, const ZZ& b, const ZZ& n); +inline ZZ SubMod(long a, const ZZ& b, const ZZ& n) + { ZZ x; SubMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + +inline void MulMod(ZZ& x, const ZZ& a, const ZZ& b, const ZZ& n) +// x = (a*b)%n + + { NTL_zmulmod(a.rep, b.rep, n.rep, &x.rep); } + +inline ZZ MulMod(const ZZ& a, const ZZ& b, const ZZ& n) + { ZZ x; MulMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + +inline void MulMod(ZZ& x, const ZZ& a, long b, const ZZ& n) +// x = (a*b)%n + + { NTL_zsmulmod(a.rep, b, n.rep, &x.rep); } + +inline ZZ MulMod(const ZZ& a, long b, const ZZ& n) + { ZZ x; MulMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + +inline void MulMod(ZZ& x, long a, const ZZ& b, const ZZ& n) + { MulMod(x, b, a, n); } + +inline ZZ MulMod(long a, const ZZ& b, const ZZ& n) + { ZZ x; MulMod(x, a, b, n); NTL_OPT_RETURN(ZZ, x); } + + +inline void SqrMod(ZZ& x, const ZZ& a, const ZZ& n) +// x = a^2 % n + + { NTL_zsqmod(a.rep, n.rep, &x.rep); } + +inline ZZ SqrMod(const ZZ& a, const ZZ& n) + { ZZ x; SqrMod(x, a, n); NTL_OPT_RETURN(ZZ, x); } + +void InvMod(ZZ& x, const ZZ& a, const ZZ& n); +// defined in ZZ.c in terms of InvModStatus + +inline ZZ InvMod(const ZZ& a, const ZZ& n) + { ZZ x; InvMod(x, a, n); NTL_OPT_RETURN(ZZ, x); } + + +inline long InvModStatus(ZZ& x, const ZZ& a, const ZZ& n) +// if gcd(a,n) = 1, then ReturnValue = 0, x = a^{-1} mod n +// otherwise, ReturnValue = 1, x = gcd(a, n) + + { return NTL_zinv(a.rep, n.rep, &x.rep); } + + +void PowerMod(ZZ& x, const ZZ& a, const ZZ& e, const ZZ& n); +// defined in ZZ.c in terms of LowLevelPowerMod + +inline void LowLevelPowerMod(ZZ& x, const ZZ& a, const ZZ& e, const ZZ& n) + { NTL_zpowermod(a.rep, e.rep, n.rep, &x.rep); } + +inline ZZ PowerMod(const ZZ& a, const ZZ& e, const ZZ& n) + { ZZ x; PowerMod(x, a, e, n); NTL_OPT_RETURN(ZZ, x); } + +inline void PowerMod(ZZ& x, const ZZ& a, long e, const ZZ& n) + { PowerMod(x, a, ZZ_expo(e), n); } + +inline ZZ PowerMod(const ZZ& a, long e, const ZZ& n) + { ZZ x; PowerMod(x, a, e, n); NTL_OPT_RETURN(ZZ, x); } + + + + + + +/************************************************************* + + Jacobi symbol and modular squre roots + +**************************************************************/ + + +long Jacobi(const ZZ& a, const ZZ& n); +// compute Jacobi symbol of a and n; +// assumes 0 <= a < n, n odd + +void SqrRootMod(ZZ& x, const ZZ& a, const ZZ& n); +// computes square root of a mod n; +// assumes n is an odd prime, and that a is a square mod n + +inline ZZ SqrRootMod(const ZZ& a, const ZZ& n) + { ZZ x; SqrRootMod(x, a, n); NTL_OPT_RETURN(ZZ, x); } + + + + +/************************************************************* + + + Small Prime Generation + + +*************************************************************/ + + +// primes are generated in sequence, starting at 2, +// and up until (2*NTL_PRIME_BND+1)^2, which is less than NTL_SP_BOUND. + +#if (NTL_SP_NBITS > 30) +#define NTL_PRIME_BND ((1L << 14) - 1) +#else +#define NTL_PRIME_BND ((1L << (NTL_SP_NBITS/2-1)) - 1) +#endif + + +class PrimeSeq { + +const char *movesieve; +Vec movesieve_mem; +long pindex; +long pshift; +long exhausted; + +public: + +PrimeSeq(); + +long next(); +// returns next prime in the sequence. +// returns 0 if list of small primes is exhausted. + +void reset(long b); +// resets generator so that the next prime in the sequence +// is the smallest prime >= b. + +private: + +PrimeSeq(const PrimeSeq&); // disabled +void operator=(const PrimeSeq&); // disabled + +// auxilliary routines + +void start(); +void shift(long); + +}; + + + + +/************************************************************** + + Input/Output + +***************************************************************/ + +NTL_SNS istream& operator>>(NTL_SNS istream& s, ZZ& x); +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const ZZ& a); + + + + +// Some additional SP arithmetic routines, not defined in sp_arith.h + + +long InvMod(long a, long n); +// computes a^{-1} mod n. Error is raised if undefined. + +long InvModStatus(long& x, long a, long n); +// if gcd(a,n) = 1, then ReturnValue = 0, x = a^{-1} mod n +// otherwise, ReturnValue = 1, x = gcd(a, n) + +long PowerMod(long a, long e, long n); +// computes a^e mod n, e >= 0 + + +// Error handling + +#ifdef NTL_EXCEPTIONS + +class InvModErrorObject : public ArithmeticErrorObject { +private: + SmartPtr a_ptr; + SmartPtr n_ptr; +public: + InvModErrorObject(const char *s, const ZZ& a, const ZZ& n) + : ArithmeticErrorObject(s) , a_ptr(MakeSmart(a)), + n_ptr(MakeSmart(n)) { } + + const ZZ& get_a() const { return *a_ptr; } + const ZZ& get_n() const { return *n_ptr; } +}; + +#else + +// We need this alt definition to keep pre-C++11 +// compilers happy (NTL_EXCEPTIONS should only be used +// with C++11 compilers). + +class InvModErrorObject : public ArithmeticErrorObject { +public: + InvModErrorObject(const char *s, const ZZ& a, const ZZ& n) + : ArithmeticErrorObject(s) { } + + const ZZ& get_a() const { return ZZ::zero(); } + const ZZ& get_n() const { return ZZ::zero(); } +}; + +#endif + + +void InvModError(const char *s, const ZZ& a, const ZZ& n); + +NTL_CLOSE_NNS + + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/ZZVec.h b/thirdparty/linux/ntl/include/NTL/ZZVec.h new file mode 100644 index 0000000000..e07b293c11 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZVec.h @@ -0,0 +1,61 @@ +#ifndef NTL_ZZVec__H +#define NTL_ZZVec__H + +#include + +NTL_OPEN_NNS + +/***************************************************************** + +The class ZZVec implements vectors of fixed-length ZZ's. +You can allocate a vector of ZZ's of a specified length, where +the maximum size of each ZZ is also specified. +These parameters can be specified once, either with a constructor, +or with SetSize. +It is an error to try to re-size a vector, or store a ZZ that +doesn't fit. +The space can be released with "kill", and then you are free to +call SetSize again. +If you want more flexible---but less efficient---vectors, +use vec_ZZ. + +*****************************************************************/ + + + +class ZZVec { + +private: + ZZ* v; + long len; + long bsize; + + +public: + ZZVec& operator=(const ZZVec&); + ZZVec(const ZZVec&); + + long length() const { return len; } + long BaseSize() const { return bsize; } + void SetSize(long n, long d); + void kill(); + + ZZVec() : v(0), len(0), bsize(0) { } + ZZVec(long n, long d) : v(0), len(0), bsize(0) { SetSize(n, d); } + ~ZZVec() { kill(); }; + + ZZ* elts() { return v; } + const ZZ* elts() const { return v; } + + ZZ& operator[](long i) { return v[i]; } + const ZZ& operator[](long i) const { return v[i]; } + + void swap(ZZVec& x); + +}; + +inline void swap(ZZVec& x, ZZVec& y) { x.swap(y); } + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZX.h b/thirdparty/linux/ntl/include/NTL/ZZX.h new file mode 100644 index 0000000000..a94d2153f0 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZX.h @@ -0,0 +1,754 @@ + +#ifndef NTL_ZZX__H +#define NTL_ZZX__H + +#include +#include +#include + +NTL_OPEN_NNS + + +class ZZX { + +public: + +vec_ZZ rep; + + +/*************************************************************** + + Constructors, Destructors, and Assignment + +****************************************************************/ + + +ZZX() { } +// initial value 0 + +explicit ZZX(long a) { *this = a; } +explicit ZZX(const ZZ& a) { *this = a; } + +ZZX(INIT_SIZE_TYPE, long n) +// initial value 0, but space is pre-allocated for n coefficients + + { rep.SetMaxLength(n); } + +ZZX(const ZZX& a) : rep(a.rep) { } +// initial value is a + + +ZZX& operator=(const ZZX& a) + { rep = a.rep; return *this; } + +~ZZX() { } + +void normalize(); +// strip leading zeros + +void SetMaxLength(long n) +// pre-allocate space for n coefficients. +// Value is unchanged + + { rep.SetMaxLength(n); } + + +void kill() +// free space held by this polynomial. Value becomes 0. + + { rep.kill(); } + + + +typedef ZZ coeff_type; +void SetLength(long n) { rep.SetLength(n); } +ZZ& operator[](long i) { return rep[i]; } +const ZZ& operator[](long i) const { return rep[i]; } + + + + +static const ZZX& zero(); + +inline ZZX(long i, const ZZ& c); +inline ZZX(long i, long c); + +inline ZZX(INIT_MONO_TYPE, long i, const ZZ& c); +inline ZZX(INIT_MONO_TYPE, long i, long c); +inline ZZX(INIT_MONO_TYPE, long i); + + +inline ZZX& operator=(long a); +inline ZZX& operator=(const ZZ& a); + + +ZZX(ZZX& x, INIT_TRANS_TYPE) : rep(x.rep, INIT_TRANS) { } + +void swap(ZZX& x) { rep.swap(x.rep); } +// swap with x (only pointers are swapped) + +}; + + + + +/******************************************************************** + + input and output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be integers between 0 and p-1, +amd a_n not zero (the zero polynomial is [ ]). +Leading zeroes are stripped. + +*********************************************************************/ + + +NTL_SNS istream& operator>>(NTL_SNS istream& s, ZZX& x); +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const ZZX& a); + + + + +/********************************************************** + + Some utility routines + +***********************************************************/ + + +inline long deg(const ZZX& a) { return a.rep.length() - 1; } +// degree of a polynomial. +// note that the zero polynomial has degree -1. + +const ZZ& coeff(const ZZX& a, long i); +// zero if i not in range + +void GetCoeff(ZZ& x, const ZZX& a, long i); +// x = a[i], or zero if i not in range + +const ZZ& LeadCoeff(const ZZX& a); +// zero if a == 0 + +const ZZ& ConstTerm(const ZZX& a); +// zero if a == 0 + +void SetCoeff(ZZX& x, long i, const ZZ& a); +// x[i] = a, error is raised if i < 0 + +void SetCoeff(ZZX& x, long i, long a); +// x[i] = a, error is raised if i < 0 + +void SetCoeff(ZZX& x, long i); +// x[i] = 1, error is raised if i < 0 + +inline ZZX::ZZX(long i, const ZZ& a) { SetCoeff(*this, i, a); } +inline ZZX::ZZX(long i, long a) { SetCoeff(*this, i, a); } + +inline ZZX::ZZX(INIT_MONO_TYPE, long i, const ZZ& a) { SetCoeff(*this, i, a); } +inline ZZX::ZZX(INIT_MONO_TYPE, long i, long a) { SetCoeff(*this, i, a); } +inline ZZX::ZZX(INIT_MONO_TYPE, long i) { SetCoeff(*this, i); } + + +void SetX(ZZX& x); +// x is set to the monomial X + +long IsX(const ZZX& a); +// test if x = X + +inline void clear(ZZX& x) +// x = 0 + + { x.rep.SetLength(0); } + +inline void set(ZZX& x) +// x = 1 + + { x.rep.SetLength(1); set(x.rep[0]); } + +inline void swap(ZZX& x, ZZX& y) +// swap x & y (only pointers are swapped) + + { x.swap(y); } + +void trunc(ZZX& x, const ZZX& a, long m); +// x = a % X^m + +inline ZZX trunc(const ZZX& a, long m) + { ZZX x; trunc(x, a, m); NTL_OPT_RETURN(ZZX, x); } + +void RightShift(ZZX& x, const ZZX& a, long n); +// x = a/X^n + +inline ZZX RightShift(const ZZX& a, long n) + { ZZX x; RightShift(x, a, n); NTL_OPT_RETURN(ZZX, x); } + +void LeftShift(ZZX& x, const ZZX& a, long n); +// x = a*X^n + +inline ZZX LeftShift(const ZZX& a, long n) + { ZZX x; LeftShift(x, a, n); NTL_OPT_RETURN(ZZX, x); } + + +#ifndef NTL_TRANSITION + +inline ZZX operator>>(const ZZX& a, long n) + { ZZX x; RightShift(x, a, n); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator<<(const ZZX& a, long n) + { ZZX x; LeftShift(x, a, n); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX& operator<<=(ZZX& x, long n) + { LeftShift(x, x, n); return x; } + +inline ZZX& operator>>=(ZZX& x, long n) + { RightShift(x, x, n); return x; } + +#endif + + +void diff(ZZX& x, const ZZX& a); +// x = derivative of a + +inline ZZX diff(const ZZX& a) + { ZZX x; diff(x, a); NTL_OPT_RETURN(ZZX, x); } + +void InvTrunc(ZZX& x, const ZZX& a, long m); +// computes x = a^{-1} % X^m +// constant term must be non-zero + +inline ZZX InvTrunc(const ZZX& a, long m) + { ZZX x; InvTrunc(x, a, m); NTL_OPT_RETURN(ZZX, x); } + +void MulTrunc(ZZX& x, const ZZX& a, const ZZX& b, long n); +// x = a * b % X^n + +inline ZZX MulTrunc(const ZZX& a, const ZZX& b, long n) + { ZZX x; MulTrunc(x, a, b, n); NTL_OPT_RETURN(ZZX, x); } + +void SqrTrunc(ZZX& x, const ZZX& a, long n); +// x = a^2 % X^n + +inline ZZX SqrTrunc(const ZZX& a, long n) + { ZZX x; SqrTrunc(x, a, n); NTL_OPT_RETURN(ZZX, x); } + +void reverse(ZZX& c, const ZZX& a, long hi); + +inline ZZX reverse(const ZZX& a, long hi) + { ZZX x; reverse(x, a, hi); NTL_OPT_RETURN(ZZX, x); } + +inline void reverse(ZZX& c, const ZZX& a) +{ reverse(c, a, deg(a)); } + +inline ZZX reverse(const ZZX& a) + { ZZX x; reverse(x, a); NTL_OPT_RETURN(ZZX, x); } + + +inline void VectorCopy(vec_ZZ& x, const ZZX& a, long n) + { VectorCopy(x, a.rep, n); } + +inline vec_ZZ VectorCopy(const ZZX& a, long n) + { return VectorCopy(a.rep, n); } + + + + + + + +/******************************************************************* + + conversion routines + +********************************************************************/ + + +void conv(ZZX& x, long a); +inline ZZX to_ZZX(long a) + { ZZX x; conv(x, a); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX& ZZX::operator=(long a) + { conv(*this, a); return *this; } + +void conv(ZZX& x, const ZZ& a); +inline ZZX to_ZZX(const ZZ& a) + { ZZX x; conv(x, a); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX& ZZX::operator=(const ZZ& a) + { conv(*this, a); return *this; } + +void conv(ZZX& x, const vec_ZZ& a); +inline ZZX to_ZZX(const vec_ZZ& a) + { ZZX x; conv(x, a); NTL_OPT_RETURN(ZZX, x); } + +void conv(zz_pX& x, const ZZX& a); +inline zz_pX to_zz_pX(const ZZX& a) + { zz_pX x; conv(x, a); NTL_OPT_RETURN(zz_pX, x); } + +void conv(ZZ_pX& x, const ZZX& a); +inline ZZ_pX to_ZZ_pX(const ZZX& a) + { ZZ_pX x; conv(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + +void conv(ZZX& x, const ZZ_pX& a); +inline ZZX to_ZZX(const ZZ_pX& a) + { ZZX x; conv(x, a); NTL_OPT_RETURN(ZZX, x); } + +void conv(ZZX& x, const zz_pX& a); +inline ZZX to_ZZX(const zz_pX& a) + { ZZX x; conv(x, a); NTL_OPT_RETURN(ZZX, x); } + + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(ZZX& x, const ZZX& a) + { x = a; } + +inline void conv(vec_ZZ& x, const ZZX& a) + { x = a.rep; } + + +/* ------------------------------------- */ + + + +/************************************************************* + + Comparison + +**************************************************************/ + +long IsZero(const ZZX& a); + +long IsOne(const ZZX& a); + +long operator==(const ZZX& a, const ZZX& b); + +inline long operator!=(const ZZX& a, const ZZX& b) { return !(a == b); } + +long operator==(const ZZX& a, const ZZ& b); +long operator==(const ZZX& a, long b); + +inline long operator==(const ZZ& a, const ZZX& b) { return b == a; } +inline long operator==(long a, const ZZX& b) { return b == a; } + +inline long operator!=(const ZZX& a, const ZZ& b) { return !(a == b); } +inline long operator!=(const ZZX& a, long b) { return !(a == b); } +inline long operator!=(const ZZ& a, const ZZX& b) { return !(a == b); } +inline long operator!=(long a, const ZZX& b) { return !(a == b); } + + +/*************************************************************** + + Addition + +****************************************************************/ + +void add(ZZX& x, const ZZX& a, const ZZX& b); +// x = a + b + +void sub(ZZX& x, const ZZX& a, const ZZX& b); +// x = a - b + +void negate(ZZX& x, const ZZX& a); +// x = -a + +// scalar versions + + +void add(ZZX & x, const ZZX& a, const ZZ& b); // x = a + b +void add(ZZX& x, const ZZX& a, long b); + +inline void add(ZZX& x, const ZZ& a, const ZZX& b) { add(x, b, a); } +inline void add(ZZX& x, long a, const ZZX& b) { add(x, b, a); } + + +void sub(ZZX & x, const ZZX& a, const ZZ& b); // x = a - b +void sub(ZZX& x, const ZZX& a, long b); + +void sub(ZZX& x, const ZZ& a, const ZZX& b); +void sub(ZZX& x, long a, const ZZX& b); + + +inline ZZX operator+(const ZZX& a, const ZZX& b) + { ZZX x; add(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator+(const ZZX& a, const ZZ& b) + { ZZX x; add(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator+(const ZZX& a, long b) + { ZZX x; add(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator+(const ZZ& a, const ZZX& b) + { ZZX x; add(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator+(long a, const ZZX& b) + { ZZX x; add(x, a, b); NTL_OPT_RETURN(ZZX, x); } + + +inline ZZX operator-(const ZZX& a, const ZZX& b) + { ZZX x; sub(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator-(const ZZX& a, const ZZ& b) + { ZZX x; sub(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator-(const ZZX& a, long b) + { ZZX x; sub(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator-(const ZZ& a, const ZZX& b) + { ZZX x; sub(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator-(long a, const ZZX& b) + { ZZX x; sub(x, a, b); NTL_OPT_RETURN(ZZX, x); } + + +inline ZZX& operator+=(ZZX& x, const ZZX& b) + { add(x, x, b); return x; } + +inline ZZX& operator+=(ZZX& x, const ZZ& b) + { add(x, x, b); return x; } + +inline ZZX& operator+=(ZZX& x, long b) + { add(x, x, b); return x; } + +inline ZZX& operator-=(ZZX& x, const ZZX& b) + { sub(x, x, b); return x; } + +inline ZZX& operator-=(ZZX& x, const ZZ& b) + { sub(x, x, b); return x; } + +inline ZZX& operator-=(ZZX& x, long b) + { sub(x, x, b); return x; } + + +inline ZZX operator-(const ZZX& a) + { ZZX x; negate(x, a); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX& operator++(ZZX& x) { add(x, x, 1); return x; } +inline void operator++(ZZX& x, int) { add(x, x, 1); } +inline ZZX& operator--(ZZX& x) { sub(x, x, 1); return x; } +inline void operator--(ZZX& x, int) { sub(x, x, 1); } + + +/***************************************************************** + + Multiplication + +******************************************************************/ + + +void mul(ZZX& x, const ZZX& a, const ZZX& b); +// x = a * b + + +void sqr(ZZX& x, const ZZX& a); +inline ZZX sqr(const ZZX& a) + { ZZX x; sqr(x, a); NTL_OPT_RETURN(ZZX, x); } +// x = a^2 + +void PlainMul(ZZX& x, const ZZX& a, const ZZX& b); +void PlainSqr(ZZX& x, const ZZX& a); + +void KarMul(ZZX& x, const ZZX& a, const ZZX& b); +void KarSqr(ZZX& x, const ZZX& a); + +void HomMul(ZZX& x, const ZZX& a, const ZZX& b); +void HomSqr(ZZX& x, const ZZX& a); + +void SSMul(ZZX& x, const ZZX& a, const ZZX& b); +void SSSqr(ZZX& x, const ZZX& a); + +double SSRatio(long na, long maxa, long nb, long maxb); + + +void mul(ZZX & x, const ZZX& a, const ZZ& b); +void mul(ZZX& x, const ZZX& a, long b); + +inline void mul(ZZX& x, const ZZ& a, const ZZX& b) { mul(x, b, a); } +inline void mul(ZZX& x, long a, const ZZX& b) { mul(x, b, a); } + + +inline ZZX operator*(const ZZX& a, const ZZX& b) + { ZZX x; mul(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator*(const ZZX& a, const ZZ& b) + { ZZX x; mul(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator*(const ZZX& a, long b) + { ZZX x; mul(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator*(const ZZ& a, const ZZX& b) + { ZZX x; mul(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator*(long a, const ZZX& b) + { ZZX x; mul(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX& operator*=(ZZX& x, const ZZX& b) + { mul(x, x, b); return x; } + +inline ZZX& operator*=(ZZX& x, const ZZ& b) + { mul(x, x, b); return x; } + +inline ZZX& operator*=(ZZX& x, long b) + { mul(x, x, b); return x; } + + + + + + +/************************************************************* + + Division + +**************************************************************/ + + + +// "plain" versions +void PlainPseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b); +void PlainPseudoDiv(ZZX& q, const ZZX& a, const ZZX& b); +void PlainPseudoRem(ZZX& r, const ZZX& a, const ZZX& b); + +// "homomorphic imaging" versions +void HomPseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b); +void HomPseudoDiv(ZZX& q, const ZZX& a, const ZZX& b); +void HomPseudoRem(ZZX& r, const ZZX& a, const ZZX& b); + +inline void PseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b) +// performs pseudo-division: computes q and r +// with deg(r) < deg(b), and LeadCoeff(b)^(deg(a)-deg(b)+1) a = b q + r. +// current implementation always defaults to "plain" + + { PlainPseudoDivRem(q, r, a, b); } + +inline void PseudoDiv(ZZX& q, const ZZX& a, const ZZX& b) + + { PlainPseudoDiv(q, a, b); } + +inline void PseudoRem(ZZX& r, const ZZX& a, const ZZX& b) + + { PlainPseudoRem(r, a, b); } + +inline ZZX PseudoDiv(const ZZX& a, const ZZX& b) + { ZZX x; PseudoDiv(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX PseudoRem(const ZZX& a, const ZZX& b) + { ZZX x; PseudoRem(x, a, b); NTL_OPT_RETURN(ZZX, x); } + + +#ifndef NTL_TRANSITION + +void DivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b); + +void div(ZZX& q, const ZZX& a, const ZZX& b); +void div(ZZX& q, const ZZX& a, const ZZ& b); +void div(ZZX& q, const ZZX& a, long b); + +void rem(ZZX& r, const ZZX& a, const ZZX& b); + +inline ZZX operator/(const ZZX& a, const ZZX& b) + { ZZX x; div(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator/(const ZZX& a, const ZZ& b) + { ZZX x; div(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX operator/(const ZZX& a, long b) + { ZZX x; div(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX& operator/=(ZZX& x, const ZZ& b) + { div(x, x, b); return x; } + +inline ZZX& operator/=(ZZX& x, long b) + { div(x, x, b); return x; } + +inline ZZX& operator/=(ZZX& x, const ZZX& b) + { div(x, x, b); return x; } + + +inline ZZX operator%(const ZZX& a, const ZZX& b) + { ZZX x; rem(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +inline ZZX& operator%=(ZZX& x, const ZZX& b) + { rem(x, x, b); return x; } + +#endif + + +// Modular arithemtic---f must be monic, and other args +// must have degree less than that of f + +void MulMod(ZZX& x, const ZZX& a, const ZZX& b, const ZZX& f); + +inline ZZX MulMod(const ZZX& a, const ZZX& b, const ZZX& f) + { ZZX x; MulMod(x, a, b, f); NTL_OPT_RETURN(ZZX, x); } + +void SqrMod(ZZX& x, const ZZX& a, const ZZX& f); + +inline ZZX SqrMod(const ZZX& a, const ZZX& f) + { ZZX x; SqrMod(x, a, f); NTL_OPT_RETURN(ZZX, x); } + +void MulByXMod(ZZX& x, const ZZX& a, const ZZX& f); + +inline ZZX MulByXMod(const ZZX& a, const ZZX& f) + { ZZX x; MulByXMod(x, a, f); NTL_OPT_RETURN(ZZX, x); } + + +// these always use "plain" division +long PlainDivide(ZZX& q, const ZZX& a, const ZZX& b); +long PlainDivide(const ZZX& a, const ZZX& b); + +// these always use "homomorphic imaging" +long HomDivide(ZZX& q, const ZZX& a, const ZZX& b); +long HomDivide(const ZZX& a, const ZZX& b); + +long divide(ZZX& q, const ZZX& a, const ZZX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const ZZX& a, const ZZX& b); + + +long divide(ZZX& q, const ZZX& a, const ZZ& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const ZZX& a, const ZZ& b); +// if b | a, returns 1; otherwise returns 0 + +//single-precision versions +long divide(ZZX& q, const ZZX& a, long b); +long divide(const ZZX& a, long b); + + + +void content(ZZ& d, const ZZX& f); +// d = content of f, sign(d) == sign(LeadCoeff(f)) + +inline ZZ content(const ZZX& f) + { ZZ x; content(x, f); NTL_OPT_RETURN(ZZ, x); } + + + +void PrimitivePart(ZZX& pp, const ZZX& f); +// pp = primitive part of f, LeadCoeff(pp) >= 0 + +inline ZZX PrimitivePart(const ZZX& f) + { ZZX x; PrimitivePart(x, f); NTL_OPT_RETURN(ZZX, x); } + + +void GCD(ZZX& d, const ZZX& a, const ZZX& b); +// d = gcd(a, b), LeadCoeff(d) >= 0 + +inline ZZX GCD(const ZZX& a, const ZZX& b) + { ZZX x; GCD(x, a, b); NTL_OPT_RETURN(ZZX, x); } + +long MaxBits(const ZZX& f); +// returns max NumBits of coefficients of f + +long CharPolyBound(const ZZX& a, const ZZX& f); + + + +/*************************************************************** + + traces, norms, resultants + +****************************************************************/ + +void TraceVec(vec_ZZ& S, const ZZX& f); +// S[i] = Trace(X^i mod f), for i = 0..deg(f)-1. +// f must be a monic polynomial. + +inline vec_ZZ TraceVec(const ZZX& f) + { vec_ZZ x; TraceVec(x, f); NTL_OPT_RETURN(vec_ZZ, x); } + +void TraceMod(ZZ& res, const ZZX& a, const ZZX& f); +inline ZZ TraceMod(const ZZX& a, const ZZX& f) + { ZZ x; TraceMod(x, a, f); NTL_OPT_RETURN(ZZ, x); } +// res = trace of (a mod f) +// f must be monic + + +void resultant(ZZ& res, const ZZX& a, const ZZX& b, long deterministic=0); +inline ZZ resultant(const ZZX& a, const ZZX& b, long deterministic=0) + { ZZ x; resultant(x, a, b, deterministic); NTL_OPT_RETURN(ZZ, x); } + +// res = resultant of a and b +// if !deterministic, then it may use a randomized strategy +// that errs with probability no more than 2^{-80}. + +void NormMod(ZZ& res, const ZZX& a, const ZZX& f, long deterministic=0); +inline ZZ NormMod(const ZZX& a, const ZZX& f, long deterministic=0) + { ZZ x; NormMod(x, a, f, deterministic); NTL_OPT_RETURN(ZZ, x); } +// res = norm of (a mod f) +// f must be monic +// if !deterministic, then it may use a randomized strategy +// that errs with probability no more than 2^{-80}. + + +void discriminant(ZZ& d, const ZZX& a, long deterministic=0); +inline ZZ discriminant(const ZZX& a, long deterministic=0) + { ZZ x; discriminant(x, a, deterministic); NTL_OPT_RETURN(ZZ, x); } +// d = discriminant of a +// = (-1)^{m(m-1)/2} resultant(a, a')/lc(a), +// where m = deg(a) +// if !deterministic, then it may use a randomized strategy +// that errs with probability no more than 2^{-80}. + + +void CharPolyMod(ZZX& g, const ZZX& a, const ZZX& f, long deterministic=0); +inline ZZX CharPolyMod(const ZZX& a, const ZZX& f, long deterministic=0) + { ZZX x; CharPolyMod(x, a, f, deterministic); NTL_OPT_RETURN(ZZX, x); } +// g = char poly of (a mod f) +// f must be monic +// if !deterministic, then it may use a randomized strategy +// that errs with probability no more than 2^{-80}. + + +void MinPolyMod(ZZX& g, const ZZX& a, const ZZX& f); +inline ZZX MinPolyMod(const ZZX& a, const ZZX& f) + { ZZX x; MinPolyMod(x, a, f); NTL_OPT_RETURN(ZZX, x); } +// g = min poly of (a mod f) +// f must be monic +// may use a probabilistic strategy that errs with +// probability no more than 2^{-80} + + +void XGCD(ZZ& r, ZZX& s, ZZX& t, const ZZX& a, const ZZX& b, + long deterministic=0); +// r = resultant of a and b; +// if r != 0, then computes s and t such that: +// a*s + b*t = r; +// otherwise s and t not affected. +// if !deterministic, then resultant computation may use a randomized strategy +// that errs with probability no more than 2^{-80}. + + + +/****************************************************** + + Incremental Chinese Remaindering + +*******************************************************/ + +long CRT(ZZX& a, ZZ& prod, const zz_pX& A); +long CRT(ZZX& a, ZZ& prod, const ZZ_pX& A); +// If p is the current modulus with (p, prod) = 1; +// Computes b such that b = a mod prod and b = A mod p, +// with coefficients in the interval (-p*prod/2, p*prod/2]; +// Sets a = b, prod = p*prod, and returns 1 if a's value changed. + + + + +typedef Vec vec_ZZX; + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZXFactoring.h b/thirdparty/linux/ntl/include/NTL/ZZXFactoring.h new file mode 100644 index 0000000000..0c90649704 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZXFactoring.h @@ -0,0 +1,65 @@ +#ifndef NTL_ZZXFactoring__H +#define NTL_ZZXFactoring__H + + +#include +#include + +NTL_OPEN_NNS + +void mul(ZZX& x, const vec_pair_ZZX_long& a); +inline ZZX mul(const vec_pair_ZZX_long& v) + { ZZX x; mul(x, v); return x; } + +void SquareFreeDecomp(vec_pair_ZZX_long& u, const ZZX& f); +inline vec_pair_ZZX_long SquareFreeDecomp(const ZZX& f) + { vec_pair_ZZX_long x; SquareFreeDecomp(x, f); return x; } + +// input is primitive, with positive leading coefficient + +void MultiLift(vec_ZZX& A, const vec_zz_pX& a, const ZZX& f, long e, + long verbose=0); + +// Using current value p of zz_p::modulus(), this lifts +// the square-free factorization a mod p of f to a factorization +// A mod p^e of f. +// It is required that f and all the polynomials in a are monic. + + + +void SFFactor(vec_ZZX& factors, const ZZX& f, + long verbose=0, + long bnd=0); + +inline vec_ZZX SFFactor(const ZZX& f, long verbose=0, long bnd=0) + { vec_ZZX x; SFFactor(x, f, verbose, bnd); return x; } + +// input f is primitive and square-free, with positive leading +// coefficient. + +// bnd, if not zero, indicates that +// f divides a polynomial h whose Euclidean norm +// is bounded by 2^{bnd} in absolute value. + +extern NTL_CHEAP_THREAD_LOCAL long ZZXFac_MaxPrune; +extern NTL_CHEAP_THREAD_LOCAL long ZZXFac_InitNumPrimes; +extern NTL_CHEAP_THREAD_LOCAL long ZZXFac_MaxNumPrimes; +extern NTL_CHEAP_THREAD_LOCAL long ZZXFac_PowerHack; +extern NTL_CHEAP_THREAD_LOCAL long ZZXFac_van_Hoeij; + + +void factor(ZZ& c, + vec_pair_ZZX_long& factors, + const ZZX& f, + long verbose=0, + long bnd=0); + +// input f is is an arbitrary polynomial. +// c is the content of f, and factors is the factorization +// of its primitive part. + +// bnd is as in SFFactor. + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZ_p.h b/thirdparty/linux/ntl/include/NTL/ZZ_p.h new file mode 100644 index 0000000000..6835da4ece --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZ_p.h @@ -0,0 +1,552 @@ + + +#ifndef NTL_ZZ_p__H +#define NTL_ZZ_p__H + +#include +#include +#include +#include + +NTL_OPEN_NNS + + +// ZZ_p representation: each ZZ_p is represented by a ZZ in the range 0..p-1. + + +class ZZ_pFFTInfoT { +private: + ZZ_pFFTInfoT(const ZZ_pFFTInfoT&); // disabled + void operator=(const ZZ_pFFTInfoT&); // disabled + +public: + ZZ_pFFTInfoT() { } + + long NumPrimes; + long MaxRoot; + ZZ MinusMModP; // -M mod p, M = product of primes + ZZ_CRTStructAdapter crt_struct; + ZZ_RemStructAdapter rem_struct; + + + // the following arrays are indexed 0..NumPrimes-1 + // q[i] = FFTPrime[i] + Vec prime; // prime[i] = q[i] + Vec prime_recip; // prime_recip[i] = 1/double(q[i]) + Vec u; // u[i] = (M/q[i])^{-1} mod q[i] + Vec uqinv; + + ZZ_ReduceStructAdapter reduce_struct; + +}; + + +class ZZ_pInfoT { +private: + ZZ_pInfoT(); // disabled + ZZ_pInfoT(const ZZ_pInfoT&); // disabled + void operator=(const ZZ_pInfoT&); // disabled +public: + ZZ_pInfoT(const ZZ& NewP); + + ZZ p; // the modulus + long size; // p.size() + long ExtendedModulusSize; + + Lazy FFTInfo; + +}; + + + +// auxilliary data structures to store space for temporaries +// used by the crt and rem routines in the low-level lip module. +// These used to be stored in data structures managed by the +// lip module, but to achieve thread-safety, they have to be +// externally on a per-thread basis. + +class ZZ_pTmpSpaceT { +public: + ZZ_TmpVecAdapter crt_tmp_vec; + ZZ_TmpVecAdapter rem_tmp_vec; +}; + + +extern +NTL_CHEAP_THREAD_LOCAL +ZZ_pInfoT *ZZ_pInfo; +// info for current modulus, initially null +// plain pointer for faster TLS access + +extern +NTL_CHEAP_THREAD_LOCAL +ZZ_pTmpSpaceT *ZZ_pTmpSpace; +// space for temps associated with current modulus, +// plain pointer for faster TLS access + +extern +NTL_CHEAP_THREAD_LOCAL +bool ZZ_pInstalled; +// flag indicating if current modulus is fully installed + + + + +class ZZ_pContext { +private: +SmartPtr ptr; + +public: + +ZZ_pContext() { } +explicit ZZ_pContext(const ZZ& p) : ptr(MakeSmart(p)) { } + +// copy constructor, assignment, destructor: default + +void save(); +void restore() const; + +}; + + +class ZZ_pBak { +private: +ZZ_pContext c; +bool MustRestore; + +ZZ_pBak(const ZZ_pBak&); // disabled +void operator=(const ZZ_pBak&); // disabled + +public: +void save(); +void restore(); + +ZZ_pBak() : MustRestore(false) { } + +~ZZ_pBak(); + + +}; + +class ZZ_pPush { +private: +ZZ_pBak bak; + +ZZ_pPush(const ZZ_pPush&); // disabled +void operator=(const ZZ_pPush&); // disabled + +public: +ZZ_pPush() { bak.save(); } +explicit ZZ_pPush(const ZZ_pContext& context) { bak.save(); context.restore(); } +explicit ZZ_pPush(const ZZ& p) { bak.save(); ZZ_pContext c(p); c.restore(); } + + +}; + + +class ZZ_pX; // forward declaration + +class ZZ_p { + +public: +typedef ZZ rep_type; +typedef ZZ_pContext context_type; +typedef ZZ_pBak bak_type; +typedef ZZ_pPush push_type; +typedef ZZ_pX poly_type; + + + +ZZ _ZZ_p__rep; + + +static void init(const ZZ&); + + +typedef void (*DivHandlerPtr)(const ZZ_p& a); // error-handler for division + +static +NTL_CHEAP_THREAD_LOCAL +DivHandlerPtr DivHandler; + + +// ****** constructors and assignment + +ZZ_p() { } // NO_ALLOC +explicit ZZ_p(long a) { *this = a; } + +ZZ_p(const ZZ_p& a) { _ZZ_p__rep = a._ZZ_p__rep; } // NO_ALLOC + +ZZ_p(INIT_NO_ALLOC_TYPE) { } // allocates no space +ZZ_p(INIT_ALLOC_TYPE) { _ZZ_p__rep.SetSize(ZZ_pInfo->size); } // allocates space + +~ZZ_p() { } + +ZZ_p& operator=(const ZZ_p& a) { _ZZ_p__rep = a._ZZ_p__rep; return *this; } + +inline ZZ_p& operator=(long a); + +// You can always access the _ZZ_p__representation directly...if you dare. +ZZ& LoopHole() { return _ZZ_p__rep; } + +ZZ_p(ZZ_p& x, INIT_TRANS_TYPE) : _ZZ_p__rep(x._ZZ_p__rep, INIT_TRANS) { } + + + +static const ZZ& modulus() { return ZZ_pInfo->p; } +static long ModulusSize() { return ZZ_pInfo->size; } +static long storage() { return ZZ_storage(ZZ_pInfo->size); } +static long ExtendedModulusSize() { return ZZ_pInfo->ExtendedModulusSize; } + +static const ZZ_p& zero(); + +static void DoInstall(); + +static void install() +{ + // we test and set ZZ_pInstalled here, to allow better + // inlining and optimization + if (!ZZ_pInstalled) { DoInstall(); ZZ_pInstalled = true; } +} + +static const ZZ_pFFTInfoT* GetFFTInfo() +{ + install(); + return ZZ_pInfo->FFTInfo.get(); +} + +static ZZ_pTmpSpaceT* GetTmpSpace() +{ + install(); + return ZZ_pTmpSpace; +} + + +ZZ_p(INIT_VAL_TYPE, const ZZ& a); +ZZ_p(INIT_VAL_TYPE, long a); + + +void swap(ZZ_p& x) +{ + _ZZ_p__rep.swap(x._ZZ_p__rep); +} + + + +void allocate() +{ + long sz = ZZ_pInfo->size; + if (_ZZ_p__rep.MaxAlloc() < sz) + _ZZ_p__rep.SetSize(sz); +} + +// mainly for internal consumption by the ZZ_pWatcher class below + +void KillBig() { _ZZ_p__rep.KillBig(); } + + +}; + + + +// read-only access to _ZZ_p__representation +inline const ZZ& rep(const ZZ_p& a) { return a._ZZ_p__rep; } + +// ****** conversion + +inline void conv(ZZ_p& x, const ZZ& a) + { rem(x._ZZ_p__rep, a, ZZ_p::modulus()); } + + +inline ZZ_p to_ZZ_p(const ZZ& a) + { return ZZ_p(INIT_VAL, a); } + + +void conv(ZZ_p& x, long a); + + +inline ZZ_p to_ZZ_p(long a) + { return ZZ_p(INIT_VAL, a); } + + + + +// ****** some basics + + +inline void clear(ZZ_p& x) +// x = 0 + { clear(x._ZZ_p__rep); } + +inline void set(ZZ_p& x) +// x = 1 + { set(x._ZZ_p__rep); } + +inline void swap(ZZ_p& x, ZZ_p& y) +// swap x and y + + { x.swap(y); } + +// ****** addition + +inline void add(ZZ_p& x, const ZZ_p& a, const ZZ_p& b) +// x = a + b + + { AddMod(x._ZZ_p__rep, a._ZZ_p__rep, b._ZZ_p__rep, ZZ_p::modulus()); } + +inline void sub(ZZ_p& x, const ZZ_p& a, const ZZ_p& b) +// x = a - b + + { SubMod(x._ZZ_p__rep, a._ZZ_p__rep, b._ZZ_p__rep, ZZ_p::modulus()); } + + +inline void negate(ZZ_p& x, const ZZ_p& a) +// x = -a + + { NegateMod(x._ZZ_p__rep, a._ZZ_p__rep, ZZ_p::modulus()); } + + +// scalar versions + +void add(ZZ_p& x, const ZZ_p& a, long b); +inline void add(ZZ_p& x, long a, const ZZ_p& b) { add(x, b, a); } + +void sub(ZZ_p& x, const ZZ_p& a, long b); +void sub(ZZ_p& x, long a, const ZZ_p& b); + + +// ****** multiplication + +inline void mul(ZZ_p& x, const ZZ_p& a, const ZZ_p& b) +// x = a*b + + { MulMod(x._ZZ_p__rep, a._ZZ_p__rep, b._ZZ_p__rep, ZZ_p::modulus()); } + + +inline void sqr(ZZ_p& x, const ZZ_p& a) +// x = a^2 + + { SqrMod(x._ZZ_p__rep, a._ZZ_p__rep, ZZ_p::modulus()); } + +inline ZZ_p sqr(const ZZ_p& a) + { ZZ_p x; sqr(x, a); NTL_OPT_RETURN(ZZ_p, x); } + + +// scalar versions + +void mul(ZZ_p& x, const ZZ_p& a, long b); +inline void mul(ZZ_p& x, long a, const ZZ_p& b) { mul(x, b, a); } + +// ****** division + + +void div(ZZ_p& x, const ZZ_p& a, const ZZ_p& b); +// x = a/b +// If b != 0 & b not invertible & DivHandler != 0, +// then DivHandler will be called with the offending b. +// In this case, of course, p is not really prime, and one +// can factor p by taking a gcd with rep(b). +// Otherwise, if b is not invertible, an error occurs. + +void inv(ZZ_p& x, const ZZ_p& a); +// x = 1/a +// Error handling is the same as above. + +inline ZZ_p inv(const ZZ_p& a) + { ZZ_p x; inv(x, a); NTL_OPT_RETURN(ZZ_p, x); } + +void div(ZZ_p& x, const ZZ_p& a, long b); +void div(ZZ_p& x, long a, const ZZ_p& b); + + +// operator notation: + +inline ZZ_p operator+(const ZZ_p& a, const ZZ_p& b) + { ZZ_p x; add(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p operator+(const ZZ_p& a, long b) + { ZZ_p x; add(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p operator+(long a, const ZZ_p& b) + { ZZ_p x; add(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p& operator+=(ZZ_p& x, const ZZ_p& b) + { add(x, x, b); return x; } + +inline ZZ_p& operator+=(ZZ_p& x, long b) + { add(x, x, b); return x; } + + + +inline ZZ_p operator-(const ZZ_p& a, const ZZ_p& b) + { ZZ_p x; sub(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p operator-(const ZZ_p& a, long b) + { ZZ_p x; sub(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p operator-(long a, const ZZ_p& b) + { ZZ_p x; sub(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p& operator-=(ZZ_p& x, const ZZ_p& b) + { sub(x, x, b); return x; } + +inline ZZ_p& operator-=(ZZ_p& x, long b) + { sub(x, x, b); return x; } + + + +inline ZZ_p operator*(const ZZ_p& a, const ZZ_p& b) + { ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p operator*(const ZZ_p& a, long b) + { ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p operator*(long a, const ZZ_p& b) + { ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p& operator*=(ZZ_p& x, const ZZ_p& b) + { mul(x, x, b); return x; } + +inline ZZ_p& operator*=(ZZ_p& x, long b) + { mul(x, x, b); return x; } + + +inline ZZ_p operator/(const ZZ_p& a, const ZZ_p& b) + { ZZ_p x; div(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p operator/(const ZZ_p& a, long b) + { ZZ_p x; div(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p operator/(long a, const ZZ_p& b) + { ZZ_p x; div(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +inline ZZ_p& operator/=(ZZ_p& x, const ZZ_p& b) + { div(x, x, b); return x; } + +inline ZZ_p& operator/=(ZZ_p& x, long b) + { div(x, x, b); return x; } + + +inline ZZ_p operator-(const ZZ_p& a) + { ZZ_p x; negate(x, a); NTL_OPT_RETURN(ZZ_p, x); } + + +inline ZZ_p& operator++(ZZ_p& x) { add(x, x, 1); return x; } +inline void operator++(ZZ_p& x, int) { add(x, x, 1); } +inline ZZ_p& operator--(ZZ_p& x) { sub(x, x, 1); return x; } +inline void operator--(ZZ_p& x, int) { sub(x, x, 1); } + + +// ****** exponentiation + +inline void power(ZZ_p& x, const ZZ_p& a, const ZZ& e) + { PowerMod(x._ZZ_p__rep, a._ZZ_p__rep, e, ZZ_p::modulus()); } + +inline ZZ_p power(const ZZ_p& a, const ZZ& e) + { ZZ_p x; power(x, a, e); NTL_OPT_RETURN(ZZ_p, x); } + +inline void power(ZZ_p& x, const ZZ_p& a, long e) + { PowerMod(x._ZZ_p__rep, a._ZZ_p__rep, e, ZZ_p::modulus()); } + +inline ZZ_p power(const ZZ_p& a, long e) + { ZZ_p x; power(x, a, e); NTL_OPT_RETURN(ZZ_p, x); } + + +// ****** comparison + +inline long IsZero(const ZZ_p& a) + { return IsZero(a._ZZ_p__rep); } + + +inline long IsOne(const ZZ_p& a) + { return IsOne(a._ZZ_p__rep); } + +inline long operator==(const ZZ_p& a, const ZZ_p& b) + { return a._ZZ_p__rep == b._ZZ_p__rep; } + +inline long operator!=(const ZZ_p& a, const ZZ_p& b) + { return !(a == b); } + +long operator==(const ZZ_p& a, long b); +inline long operator==(long a, const ZZ_p& b) { return b == a; } + +inline long operator!=(const ZZ_p& a, long b) { return !(a == b); } +inline long operator!=(long a, const ZZ_p& b) { return !(a == b); } + + +// ****** random numbers + +inline void random(ZZ_p& x) +// x = random element in ZZ_p + + { RandomBnd(x._ZZ_p__rep, ZZ_p::modulus()); } + +inline ZZ_p random_ZZ_p() + { ZZ_p x; random(x); NTL_OPT_RETURN(ZZ_p, x); } + + +// ****** input/output + +inline NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const ZZ_p& a) + { return s << a._ZZ_p__rep; } + +NTL_SNS istream& operator>>(NTL_SNS istream& s, ZZ_p& x); + + +inline ZZ_p& ZZ_p::operator=(long a) { conv(*this, a); return *this; } + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(int& x, const ZZ_p& a) { conv(x, rep(a)); } +inline void conv(unsigned int& x, const ZZ_p& a) { conv(x, rep(a)); } +inline void conv(long& x, const ZZ_p& a) { conv(x, rep(a)); } +inline void conv(unsigned long& x, const ZZ_p& a) { conv(x, rep(a)); } +inline void conv(ZZ& x, const ZZ_p& a) { conv(x, rep(a)); } + +inline void conv(ZZ_p& x, const ZZ_p& a) { x = a; } + +/* ------------------------------------- */ + + + + +// overload these functions for Vec. +// They are defined in vec_ZZ_p.c +void BlockConstruct(ZZ_p* p, long n); +void BlockConstructFromVec(ZZ_p* p, long n, const ZZ_p* q); +void BlockConstructFromObj(ZZ_p* p, long n, const ZZ_p& q); +void BlockDestroy(ZZ_p* p, long n); + + +// ZZ_p scratch variables + + + +class ZZ_pWatcher { +public: + ZZ_p& watched; + explicit + ZZ_pWatcher(ZZ_p& _watched) : watched(_watched) { } + + ~ZZ_pWatcher() { watched.KillBig(); } +}; + +#define NTL_ZZ_pRegister(x) NTL_TLS_LOCAL(ZZ_p, x); ZZ_pWatcher _WATCHER__ ## x(x); x.allocate() + +// FIXME: register variables that are allocated with respect to one modulus +// and then reused with another modulus may have initial values that are +// not in the correct range. This should not cause any problems, though, +// as these register values should always be written to before being read. +// Note also that the underlying integer reps may have space +// allocated that is smaller or *bigger* than the current modulus. +// This may impact future interface design changes --- especially +// one that tries to make "out of context" copy constructors +// safe by reading the allocated space of the source. + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZ_pE.h b/thirdparty/linux/ntl/include/NTL/ZZ_pE.h new file mode 100644 index 0000000000..ccb271ec7d --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZ_pE.h @@ -0,0 +1,532 @@ + +#ifndef NTL_ZZ_pE__H +#define NTL_ZZ_pE__H + +#include +#include +#include +#include + +NTL_OPEN_NNS + + +class ZZ_pEInfoT { +private: + + ZZ_pEInfoT(); // disabled + ZZ_pEInfoT(const ZZ_pEInfoT&); // disabled + void operator=(const ZZ_pEInfoT&); // disabled +public: + + ZZ_pEInfoT(const ZZ_pX&); + ~ZZ_pEInfoT() { } + + ZZ_pXModulus p; + + ZZ _card_base; + long _card_exp; + + Lazy _card; + + +}; + +extern +NTL_CHEAP_THREAD_LOCAL +ZZ_pEInfoT *ZZ_pEInfo; +// info for current modulus, initially null +// raw pointer for faster TLS access + + + + + +class ZZ_pEContext { +private: +SmartPtr ptr; + +public: + +ZZ_pEContext() { } +explicit ZZ_pEContext(const ZZ_pX& p) : ptr(MakeSmart(p)) { } + +// copy constructor, assignment, destructor: default + +void save(); +void restore() const; + +}; + + +class ZZ_pEBak { +private: +ZZ_pEContext c; +bool MustRestore; + +ZZ_pEBak(const ZZ_pEBak&); // disabled +void operator=(const ZZ_pEBak&); // disabled + +public: +void save(); +void restore(); + +ZZ_pEBak() : MustRestore(false) { } + +~ZZ_pEBak(); + + +}; + + + + + +class ZZ_pEPush { +private: +ZZ_pEBak bak; + +ZZ_pEPush(const ZZ_pEPush&); // disabled +void operator=(const ZZ_pEPush&); // disabled + +public: +ZZ_pEPush() { bak.save(); } +explicit ZZ_pEPush(const ZZ_pEContext& context) { bak.save(); context.restore(); } +explicit ZZ_pEPush(const ZZ_pX& p) { bak.save(); ZZ_pEContext c(p); c.restore(); } + + +}; + + + + + +class ZZ_pEX; // forward declaration + + +class ZZ_pE { +public: +typedef ZZ_pX rep_type; +typedef ZZ_pEContext context_type; +typedef ZZ_pEBak bak_type; +typedef ZZ_pEPush push_type; +typedef ZZ_pEX poly_type; + + +ZZ_pX _ZZ_pE__rep; + +// static data + + +static long DivCross() { return 16; } +static long ModCross() { return 8; } + + +// ****** constructors and assignment + +ZZ_pE() { } // NO_ALLOC + +explicit ZZ_pE(long a) { *this = a; } // NO_ALLOC +explicit ZZ_pE(const ZZ_p& a) { *this = a; } // NO_ALLOC + + +ZZ_pE(const ZZ_pE& a) { _ZZ_pE__rep = a._ZZ_pE__rep; } // NO_ALLOC + +ZZ_pE(INIT_NO_ALLOC_TYPE) { } // allocates no space +ZZ_pE(INIT_ALLOC_TYPE) {_ZZ_pE__rep.rep.SetMaxLength(ZZ_pE::degree()); } // allocates space +void allocate() { _ZZ_pE__rep.rep.SetMaxLength(ZZ_pE::degree()); } + +~ZZ_pE() { } + +ZZ_pE& operator=(const ZZ_pE& a) { _ZZ_pE__rep = a._ZZ_pE__rep; return *this; } + +inline ZZ_pE& operator=(long a); +inline ZZ_pE& operator=(const ZZ_p& a); + +ZZ_pE(ZZ_pE& x, INIT_TRANS_TYPE) : _ZZ_pE__rep(x._ZZ_pE__rep, INIT_TRANS) { } + +void swap(ZZ_pE& x) { _ZZ_pE__rep.swap(x._ZZ_pE__rep); } + + +// You can always access the _ZZ_pE__representation directly...if you dare. +ZZ_pX& LoopHole() { return _ZZ_pE__rep; } + +static const ZZ_pXModulus& modulus() { return ZZ_pEInfo->p; } + +static long degree() { return deg(ZZ_pEInfo->p); } + +static const ZZ& cardinality(); + +static const ZZ_pE& zero(); + +static long initialized() { return (ZZ_pEInfo != 0); } + +static void init(const ZZ_pX&); + + + +}; + + + + +// read-only access to _ZZ_pE__representation +inline const ZZ_pX& rep(const ZZ_pE& a) { return a._ZZ_pE__rep; } + +inline void clear(ZZ_pE& x) +// x = 0 + { clear(x._ZZ_pE__rep); } + +inline void set(ZZ_pE& x) +// x = 1 + { set(x._ZZ_pE__rep); } + +inline void swap(ZZ_pE& x, ZZ_pE& y) +// swap x and y + + { x.swap(y); } + +// ****** addition + +inline void add(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b) +// x = a + b + + { add(x._ZZ_pE__rep, a._ZZ_pE__rep, b._ZZ_pE__rep); } + +inline void sub(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b) +// x = a - b + + { sub(x._ZZ_pE__rep, a._ZZ_pE__rep, b._ZZ_pE__rep); } + + +inline void negate(ZZ_pE& x, const ZZ_pE& a) + + { negate(x._ZZ_pE__rep, a._ZZ_pE__rep); } + + +inline void add(ZZ_pE& x, const ZZ_pE& a, long b) + { add(x._ZZ_pE__rep, a._ZZ_pE__rep, b); } + +inline void add(ZZ_pE& x, const ZZ_pE& a, const ZZ_p& b) + { add(x._ZZ_pE__rep, a._ZZ_pE__rep, b); } + +inline void add(ZZ_pE& x, long a, const ZZ_pE& b) + { add(x._ZZ_pE__rep, a, b._ZZ_pE__rep); } + +inline void add(ZZ_pE& x, const ZZ_p& a, const ZZ_pE& b) + { add(x._ZZ_pE__rep, a, b._ZZ_pE__rep); } + + + + + +inline void sub(ZZ_pE& x, const ZZ_pE& a, long b) + { sub(x._ZZ_pE__rep, a._ZZ_pE__rep, b); } + +inline void sub(ZZ_pE& x, const ZZ_pE& a, const ZZ_p& b) + { sub(x._ZZ_pE__rep, a._ZZ_pE__rep, b); } + +inline void sub(ZZ_pE& x, long a, const ZZ_pE& b) + { sub(x._ZZ_pE__rep, a, b._ZZ_pE__rep); } + +inline void sub(ZZ_pE& x, const ZZ_p& a, const ZZ_pE& b) + { sub(x._ZZ_pE__rep, a, b._ZZ_pE__rep); } + + + + + +// ****** multiplication + +inline void mul(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b) +// x = a*b + + { MulMod(x._ZZ_pE__rep, a._ZZ_pE__rep, b._ZZ_pE__rep, ZZ_pE::modulus()); } + + +inline void sqr(ZZ_pE& x, const ZZ_pE& a) +// x = a^2 + + { SqrMod(x._ZZ_pE__rep, a._ZZ_pE__rep, ZZ_pE::modulus()); } + +inline ZZ_pE sqr(const ZZ_pE& a) + { ZZ_pE x; sqr(x, a); NTL_OPT_RETURN(ZZ_pE, x); } + + +inline void mul(ZZ_pE& x, const ZZ_pE& a, long b) + { mul(x._ZZ_pE__rep, a._ZZ_pE__rep, b); } + +inline void mul(ZZ_pE& x, const ZZ_pE& a, const ZZ_p& b) + { mul(x._ZZ_pE__rep, a._ZZ_pE__rep, b); } + +inline void mul(ZZ_pE& x, long a, const ZZ_pE& b) + { mul(x._ZZ_pE__rep, a, b._ZZ_pE__rep); } + +inline void mul(ZZ_pE& x, const ZZ_p& a, const ZZ_pE& b) + { mul(x._ZZ_pE__rep, a, b._ZZ_pE__rep); } + + +// ****** division + + + +void div(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b); +void div(ZZ_pE& x, const ZZ_pE& a, long b); +void div(ZZ_pE& x, const ZZ_pE& a, const ZZ_p& b); +void div(ZZ_pE& x, long a, const ZZ_pE& b); +void div(ZZ_pE& x, const ZZ_p& a, const ZZ_pE& b); + +void inv(ZZ_pE& x, const ZZ_pE& a); + +inline ZZ_pE inv(const ZZ_pE& a) + { ZZ_pE x; inv(x, a); NTL_OPT_RETURN(ZZ_pE, x); } + + + +// ****** exponentiation + +inline void power(ZZ_pE& x, const ZZ_pE& a, const ZZ& e) +// x = a^e + + { PowerMod(x._ZZ_pE__rep, a._ZZ_pE__rep, e, ZZ_pE::modulus()); } + +inline ZZ_pE power(const ZZ_pE& a, const ZZ& e) + { ZZ_pE x; power(x, a, e); NTL_OPT_RETURN(ZZ_pE, x); } + +inline void power(ZZ_pE& x, const ZZ_pE& a, long e) + { power(x, a, ZZ_expo(e)); } + +inline ZZ_pE power(const ZZ_pE& a, long e) + { ZZ_pE x; power(x, a, e); NTL_OPT_RETURN(ZZ_pE, x); } + + + + +// ****** conversion + +inline void conv(ZZ_pE& x, const ZZ_pX& a) + { rem(x._ZZ_pE__rep, a, ZZ_pE::modulus()); } + +inline void conv(ZZ_pE& x, long a) + { conv(x._ZZ_pE__rep, a); } + +inline void conv(ZZ_pE& x, const ZZ_p& a) + { conv(x._ZZ_pE__rep, a); } + +inline void conv(ZZ_pE& x, const ZZ& a) + { conv(x._ZZ_pE__rep, a); } + +inline ZZ_pE to_ZZ_pE(const ZZ_pX& a) + { ZZ_pE x; conv(x, a); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE to_ZZ_pE(long a) + { ZZ_pE x; conv(x, a); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE to_ZZ_pE(const ZZ_p& a) + { ZZ_pE x; conv(x, a); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE to_ZZ_pE(const ZZ& a) + { ZZ_pE x; conv(x, a); NTL_OPT_RETURN(ZZ_pE, x); } + + + +// ****** comparison + +inline long IsZero(const ZZ_pE& a) + { return IsZero(a._ZZ_pE__rep); } + +inline long IsOne(const ZZ_pE& a) + { return IsOne(a._ZZ_pE__rep); } + +inline long operator==(const ZZ_pE& a, const ZZ_pE& b) + { return a._ZZ_pE__rep == b._ZZ_pE__rep; } +inline long operator==(const ZZ_pE& a, long b) + { return a._ZZ_pE__rep == b; } +inline long operator==(const ZZ_pE& a, const ZZ_p& b) + { return a._ZZ_pE__rep == b; } +inline long operator==(long a, const ZZ_pE& b) + { return a == b._ZZ_pE__rep; } +inline long operator==(const ZZ_p& a, const ZZ_pE& b) + { return a == b._ZZ_pE__rep; } + +inline long operator!=(const ZZ_pE& a, const ZZ_pE& b) + { return !(a == b); } +inline long operator!=(const ZZ_pE& a, long b) + { return !(a == b); } +inline long operator!=(const ZZ_pE& a, const ZZ_p& b) + { return !(a == b); } +inline long operator!=(long a, const ZZ_pE& b) + { return !(a == b); } +inline long operator!=(const ZZ_p& a, const ZZ_pE& b) + { return !(a == b); } + + +// ****** norm and trace + +inline void trace(ZZ_p& x, const ZZ_pE& a) + { TraceMod(x, a._ZZ_pE__rep, ZZ_pE::modulus()); } +inline ZZ_p trace(const ZZ_pE& a) + { return TraceMod(a._ZZ_pE__rep, ZZ_pE::modulus()); } + +inline void norm(ZZ_p& x, const ZZ_pE& a) + { NormMod(x, a._ZZ_pE__rep, ZZ_pE::modulus()); } +inline ZZ_p norm(const ZZ_pE& a) + { return NormMod(a._ZZ_pE__rep, ZZ_pE::modulus()); } + + +// ****** random numbers + +inline void random(ZZ_pE& x) +// x = random element in ZZ_pE + + { random(x._ZZ_pE__rep, ZZ_pE::degree()); } + +inline ZZ_pE random_ZZ_pE() + { ZZ_pE x; random(x); NTL_OPT_RETURN(ZZ_pE, x); } + + +// ****** input/output + +inline NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const ZZ_pE& a) + { return s << a._ZZ_pE__rep; } + +NTL_SNS istream& operator>>(NTL_SNS istream& s, ZZ_pE& x); + + +inline ZZ_pE& ZZ_pE::operator=(long a) { conv(*this, a); return *this; } +inline ZZ_pE& ZZ_pE::operator=(const ZZ_p& a) { conv(*this, a); return *this; } + + + +inline ZZ_pE operator+(const ZZ_pE& a, const ZZ_pE& b) + { ZZ_pE x; add(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator+(const ZZ_pE& a, const ZZ_p& b) + { ZZ_pE x; add(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator+(const ZZ_pE& a, long b) + { ZZ_pE x; add(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator+(const ZZ_p& a, const ZZ_pE& b) + { ZZ_pE x; add(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator+(long a, const ZZ_pE& b) + { ZZ_pE x; add(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + + +inline ZZ_pE operator-(const ZZ_pE& a, const ZZ_pE& b) + { ZZ_pE x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator-(const ZZ_pE& a, const ZZ_p& b) + { ZZ_pE x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator-(const ZZ_pE& a, long b) + { ZZ_pE x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator-(const ZZ_p& a, const ZZ_pE& b) + { ZZ_pE x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator-(long a, const ZZ_pE& b) + { ZZ_pE x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator-(const ZZ_pE& a) + { ZZ_pE x; negate(x, a); NTL_OPT_RETURN(ZZ_pE, x); } + + +inline ZZ_pE& operator+=(ZZ_pE& x, const ZZ_pE& b) + { add(x, x, b); return x; } + +inline ZZ_pE& operator+=(ZZ_pE& x, const ZZ_p& b) + { add(x, x, b); return x; } + +inline ZZ_pE& operator+=(ZZ_pE& x, long b) + { add(x, x, b); return x; } + + +inline ZZ_pE& operator-=(ZZ_pE& x, const ZZ_pE& b) + { sub(x, x, b); return x; } + +inline ZZ_pE& operator-=(ZZ_pE& x, const ZZ_p& b) + { sub(x, x, b); return x; } + +inline ZZ_pE& operator-=(ZZ_pE& x, long b) + { sub(x, x, b); return x; } + + +inline ZZ_pE& operator++(ZZ_pE& x) { add(x, x, 1); return x; } + +inline void operator++(ZZ_pE& x, int) { add(x, x, 1); } + +inline ZZ_pE& operator--(ZZ_pE& x) { sub(x, x, 1); return x; } + +inline void operator--(ZZ_pE& x, int) { sub(x, x, 1); } + + + +inline ZZ_pE operator*(const ZZ_pE& a, const ZZ_pE& b) + { ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator*(const ZZ_pE& a, const ZZ_p& b) + { ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator*(const ZZ_pE& a, long b) + { ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator*(const ZZ_p& a, const ZZ_pE& b) + { ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator*(long a, const ZZ_pE& b) + { ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + + +inline ZZ_pE& operator*=(ZZ_pE& x, const ZZ_pE& b) + { mul(x, x, b); return x; } + +inline ZZ_pE& operator*=(ZZ_pE& x, const ZZ_p& b) + { mul(x, x, b); return x; } + +inline ZZ_pE& operator*=(ZZ_pE& x, long b) + { mul(x, x, b); return x; } + + + + +inline ZZ_pE operator/(const ZZ_pE& a, const ZZ_pE& b) + { ZZ_pE x; div(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator/(const ZZ_pE& a, const ZZ_p& b) + { ZZ_pE x; div(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator/(const ZZ_pE& a, long b) + { ZZ_pE x; div(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator/(const ZZ_p& a, const ZZ_pE& b) + { ZZ_pE x; div(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + +inline ZZ_pE operator/(long a, const ZZ_pE& b) + { ZZ_pE x; div(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + + +inline ZZ_pE& operator/=(ZZ_pE& x, const ZZ_pE& b) + { div(x, x, b); return x; } + +inline ZZ_pE& operator/=(ZZ_pE& x, const ZZ_p& b) + { div(x, x, b); return x; } + +inline ZZ_pE& operator/=(ZZ_pE& x, long b) + { div(x, x, b); return x; } + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(ZZ_pX& x, const ZZ_pE& a) { x = rep(a); } +inline void conv(ZZ_pE& x, const ZZ_pE& a) { x = a; } + + +/* ------------------------------------- */ + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZ_pEX.h b/thirdparty/linux/ntl/include/NTL/ZZ_pEX.h new file mode 100644 index 0000000000..670608034e --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZ_pEX.h @@ -0,0 +1,1050 @@ + +#ifndef NTL_ZZ_pEX__H +#define NTL_ZZ_pEX__H + +#include +#include + +NTL_OPEN_NNS + +class ZZ_pEXModulus; // forward declaration + +class ZZ_pEX { +public: +typedef ZZ_pE coeff_type; +typedef ZZ_pEXModulus modulus_type; + + +vec_ZZ_pE rep; + + +/*************************************************************** + + Constructors, Destructors, and Assignment + +****************************************************************/ + + +ZZ_pEX() { } +// initial value 0 + +explicit ZZ_pEX(long a) { *this = a; } +explicit ZZ_pEX(const ZZ_p& a) { *this = a; } +explicit ZZ_pEX(const ZZ_pE& a) { *this = a; } + + +ZZ_pEX(INIT_SIZE_TYPE, long n) { rep.SetMaxLength(n); } + +~ZZ_pEX() { } + +void normalize(); +// strip leading zeros + +void SetMaxLength(long n) +// pre-allocate space for n coefficients. +// Value is unchanged + + { rep.SetMaxLength(n); } + + +void kill() +// free space held by this polynomial. Value becomes 0. + + { rep.kill(); } + + + +void SetLength(long n) { rep.SetLength(n); } +ZZ_pE& operator[](long i) { return rep[i]; } +const ZZ_pE& operator[](long i) const { return rep[i]; } + + + + + +static const ZZ_pEX& zero(); + +inline ZZ_pEX(long i, const ZZ_pE& c); +inline ZZ_pEX(long i, const ZZ_p& c); +inline ZZ_pEX(long i, long c); + +inline ZZ_pEX(INIT_MONO_TYPE, long i, const ZZ_pE& c); +inline ZZ_pEX(INIT_MONO_TYPE, long i, const ZZ_p& c); +inline ZZ_pEX(INIT_MONO_TYPE, long i, long c); +inline ZZ_pEX(INIT_MONO_TYPE, long i); + + +inline ZZ_pEX& operator=(long a); +inline ZZ_pEX& operator=(const ZZ_p& a); +inline ZZ_pEX& operator=(const ZZ_pE& a); + +ZZ_pEX(ZZ_pEX& x, INIT_TRANS_TYPE) : rep(x.rep, INIT_TRANS) { } + + +void swap(ZZ_pEX& x) { rep.swap(x.rep); } + + +}; + + +NTL_SNS istream& operator>>(NTL_SNS istream& s, ZZ_pEX& x); +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const ZZ_pEX& a); + + + + +/********************************************************** + + Some utility routines + +***********************************************************/ + + +inline long deg(const ZZ_pEX& a) { return a.rep.length() - 1; } +// degree of a polynomial. +// note that the zero polynomial has degree -1. + +const ZZ_pE& coeff(const ZZ_pEX& a, long i); +// zero if i not in range + +const ZZ_pE& LeadCoeff(const ZZ_pEX& a); +// zero if a == 0 + +const ZZ_pE& ConstTerm(const ZZ_pEX& a); +// zero if a == 0 + +void SetCoeff(ZZ_pEX& x, long i, const ZZ_pE& a); +void SetCoeff(ZZ_pEX& x, long i, const ZZ_p& a); +void SetCoeff(ZZ_pEX& x, long i, long a); +// x[i] = a, error is raised if i < 0 + +void SetCoeff(ZZ_pEX& x, long i); +// x[i] = 1, error is raised if i < 0 + +inline ZZ_pEX::ZZ_pEX(long i, const ZZ_pE& a) { SetCoeff(*this, i, a); } +inline ZZ_pEX::ZZ_pEX(long i, const ZZ_p& a) { SetCoeff(*this, i, a); } +inline ZZ_pEX::ZZ_pEX(long i, long a) { SetCoeff(*this, i, a); } + +inline ZZ_pEX::ZZ_pEX(INIT_MONO_TYPE, long i, const ZZ_pE& a) { SetCoeff(*this, i, a); } +inline ZZ_pEX::ZZ_pEX(INIT_MONO_TYPE, long i, const ZZ_p& a) { SetCoeff(*this, i, a); } +inline ZZ_pEX::ZZ_pEX(INIT_MONO_TYPE, long i, long a) { SetCoeff(*this, i, a); } +inline ZZ_pEX::ZZ_pEX(INIT_MONO_TYPE, long i) { SetCoeff(*this, i); } + +void SetX(ZZ_pEX& x); +// x is set to the monomial X + +long IsX(const ZZ_pEX& a); +// test if x = X + +inline void clear(ZZ_pEX& x) +// x = 0 + + { x.rep.SetLength(0); } + +inline void set(ZZ_pEX& x) +// x = 1 + + { x.rep.SetLength(1); set(x.rep[0]); } + +inline void swap(ZZ_pEX& x, ZZ_pEX& y) +// swap x & y (only pointers are swapped) + + { x.swap(y); } + +void random(ZZ_pEX& x, long n); +inline ZZ_pEX random_ZZ_pEX(long n) + { ZZ_pEX x; random(x, n); NTL_OPT_RETURN(ZZ_pEX, x); } +// generate a random polynomial of degree < n + +void trunc(ZZ_pEX& x, const ZZ_pEX& a, long m); +inline ZZ_pEX trunc(const ZZ_pEX& a, long m) + { ZZ_pEX x; trunc(x, a, m); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = a % X^m + +void RightShift(ZZ_pEX& x, const ZZ_pEX& a, long n); +inline ZZ_pEX RightShift(const ZZ_pEX& a, long n) + { ZZ_pEX x; RightShift(x, a, n); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = a/X^n + +void LeftShift(ZZ_pEX& x, const ZZ_pEX& a, long n); +inline ZZ_pEX LeftShift(const ZZ_pEX& a, long n) + { ZZ_pEX x; LeftShift(x, a, n); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = a*X^n + +#ifndef NTL_TRANSITION + +inline ZZ_pEX operator>>(const ZZ_pEX& a, long n) + { ZZ_pEX x; RightShift(x, a, n); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator<<(const ZZ_pEX& a, long n) + { ZZ_pEX x; LeftShift(x, a, n); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX& operator<<=(ZZ_pEX& x, long n) + { LeftShift(x, x, n); return x; } + +inline ZZ_pEX& operator>>=(ZZ_pEX& x, long n) + { RightShift(x, x, n); return x; } + +#endif + + + +void diff(ZZ_pEX& x, const ZZ_pEX& a); +inline ZZ_pEX diff(const ZZ_pEX& a) + { ZZ_pEX x; diff(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = derivative of a + + + +void MakeMonic(ZZ_pEX& x); + +void reverse(ZZ_pEX& c, const ZZ_pEX& a, long hi); + +inline ZZ_pEX reverse(const ZZ_pEX& a, long hi) + { ZZ_pEX x; reverse(x, a, hi); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline void reverse(ZZ_pEX& c, const ZZ_pEX& a) +{ reverse(c, a, deg(a)); } + +inline ZZ_pEX reverse(const ZZ_pEX& a) + { ZZ_pEX x; reverse(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline void VectorCopy(vec_ZZ_pE& x, const ZZ_pEX& a, long n) + { VectorCopy(x, a.rep, n); } + +inline vec_ZZ_pE VectorCopy(const ZZ_pEX& a, long n) + { return VectorCopy(a.rep, n); } + + + + + + +/******************************************************************* + + conversion routines + +********************************************************************/ + + + +void conv(ZZ_pEX& x, long a); + +void conv(ZZ_pEX& x, const ZZ& a); + +void conv(ZZ_pEX& x, const ZZ_p& a); +void conv(ZZ_pEX& x, const ZZ_pX& a); +void conv(ZZ_pEX& x, const ZZ_pE& a); + + +void conv(ZZ_pEX& x, const vec_ZZ_pE& a); + +inline ZZ_pEX to_ZZ_pEX(long a) + { ZZ_pEX x; conv(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX to_ZZ_pEX(const ZZ& a) + { ZZ_pEX x; conv(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX to_ZZ_pEX(const ZZ_p& a) + { ZZ_pEX x; conv(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX to_ZZ_pEX(const ZZ_pX& a) + { ZZ_pEX x; conv(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX to_ZZ_pEX(const ZZ_pE& a) + { ZZ_pEX x; conv(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX to_ZZ_pEX(const vec_ZZ_pE& a) + { ZZ_pEX x; conv(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX& ZZ_pEX::operator=(long a) + { conv(*this, a); return *this; } + +inline ZZ_pEX& ZZ_pEX::operator=(const ZZ_p& a) + { conv(*this, a); return *this; } + +inline ZZ_pEX& ZZ_pEX::operator=(const ZZ_pE& a) + { conv(*this, a); return *this; } + + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(ZZ_pEX& x, const ZZ_pEX& a) + { x = a; } + +inline void conv(vec_ZZ_pE& x, const ZZ_pEX& a) + { x = a.rep; } + +class ZZX; +void conv(ZZ_pEX& x, const ZZX& a); + + +/* ------------------------------------- */ + + + + +/************************************************************* + + Comparison + +**************************************************************/ + +long IsZero(const ZZ_pEX& a); + +long IsOne(const ZZ_pEX& a); + +inline long operator==(const ZZ_pEX& a, const ZZ_pEX& b) +{ return a.rep == b.rep; } + +long operator==(const ZZ_pEX& a, long b); +long operator==(const ZZ_pEX& a, const ZZ_p& b); +long operator==(const ZZ_pEX& a, const ZZ_pE& b); + +inline long operator==(long a, const ZZ_pEX& b) + { return (b == a); } +inline long operator==(const ZZ_p& a, const ZZ_pEX& b) + { return (b == a); } +inline long operator==(const ZZ_pE& a, const ZZ_pEX& b) + { return (b == a); } + +inline long operator!=(const ZZ_pEX& a, const ZZ_pEX& b) + { return !(a == b); } +inline long operator!=(const ZZ_pEX& a, long b) + { return !(a == b); } +inline long operator!=(const ZZ_pEX& a, const ZZ_p& b) + { return !(a == b); } +inline long operator!=(const ZZ_pEX& a, const ZZ_pE& b) + { return !(a == b); } +inline long operator!=(const long a, const ZZ_pEX& b) + { return !(a == b); } +inline long operator!=(const ZZ_p& a, const ZZ_pEX& b) + { return !(a == b); } +inline long operator!=(const ZZ_pE& a, const ZZ_pEX& b) + { return !(a == b); } + + +/*************************************************************** + + Addition + +****************************************************************/ + +void add(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); + +void sub(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); + +void negate(ZZ_pEX& x, const ZZ_pEX& a); + +// scalar versions + +void add(ZZ_pEX & x, const ZZ_pEX& a, long b); +void add(ZZ_pEX & x, const ZZ_pEX& a, const ZZ_p& b); +void add(ZZ_pEX & x, const ZZ_pEX& a, const ZZ_pE& b); + +inline void add(ZZ_pEX& x, const ZZ_pE& a, const ZZ_pEX& b) + { add(x, b, a); } +inline void add(ZZ_pEX& x, const ZZ_p& a, const ZZ_pEX& b) + { add(x, b, a); } +inline void add(ZZ_pEX& x, long a, const ZZ_pEX& b) + { add(x, b, a); } + +void sub(ZZ_pEX & x, const ZZ_pEX& a, long b); +void sub(ZZ_pEX & x, const ZZ_pEX& a, const ZZ_p& b); +void sub(ZZ_pEX & x, const ZZ_pEX& a, const ZZ_pE& b); + +void sub(ZZ_pEX& x, const ZZ_pE& a, const ZZ_pEX& b); +void sub(ZZ_pEX& x, const ZZ_p& a, const ZZ_pEX& b); +void sub(ZZ_pEX& x, long a, const ZZ_pEX& b); + + + +inline ZZ_pEX operator+(const ZZ_pEX& a, const ZZ_pEX& b) + { ZZ_pEX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator+(const ZZ_pEX& a, const ZZ_pE& b) + { ZZ_pEX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator+(const ZZ_pEX& a, const ZZ_p& b) + { ZZ_pEX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator+(const ZZ_pEX& a, long b) + { ZZ_pEX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator+(const ZZ_pE& a, const ZZ_pEX& b) + { ZZ_pEX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator+(const ZZ_p& a, const ZZ_pEX& b) + { ZZ_pEX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator+(long a, const ZZ_pEX& b) + { ZZ_pEX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + + +inline ZZ_pEX operator-(const ZZ_pEX& a, const ZZ_pEX& b) + { ZZ_pEX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator-(const ZZ_pEX& a, const ZZ_pE& b) + { ZZ_pEX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator-(const ZZ_pEX& a, const ZZ_p& b) + { ZZ_pEX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator-(const ZZ_pEX& a, long b) + { ZZ_pEX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator-(const ZZ_pE& a, const ZZ_pEX& b) + { ZZ_pEX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator-(const ZZ_p& a, const ZZ_pEX& b) + { ZZ_pEX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator-(long a, const ZZ_pEX& b) + { ZZ_pEX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + + +inline ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_pEX& b) + { add(x, x, b); return x; } + +inline ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_pE& b) + { add(x, x, b); return x; } + +inline ZZ_pEX& operator+=(ZZ_pEX& x, const ZZ_p& b) + { add(x, x, b); return x; } + +inline ZZ_pEX& operator+=(ZZ_pEX& x, long b) + { add(x, x, b); return x; } + +inline ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_pEX& b) + { sub(x, x, b); return x; } + +inline ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_pE& b) + { sub(x, x, b); return x; } + +inline ZZ_pEX& operator-=(ZZ_pEX& x, const ZZ_p& b) + { sub(x, x, b); return x; } + +inline ZZ_pEX& operator-=(ZZ_pEX& x, long b) + { sub(x, x, b); return x; } + + +inline ZZ_pEX operator-(const ZZ_pEX& a) + { ZZ_pEX x; negate(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX& operator++(ZZ_pEX& x) { add(x, x, 1); return x; } +inline void operator++(ZZ_pEX& x, int) { add(x, x, 1); } +inline ZZ_pEX& operator--(ZZ_pEX& x) { sub(x, x, 1); return x; } +inline void operator--(ZZ_pEX& x, int) { sub(x, x, 1); } + + + +/***************************************************************** + + Multiplication + +******************************************************************/ + + +void mul(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); +// x = a * b + +void sqr(ZZ_pEX& x, const ZZ_pEX& a); +inline ZZ_pEX sqr(const ZZ_pEX& a) + { ZZ_pEX x; sqr(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = a^2 + + +void mul(ZZ_pEX & x, const ZZ_pEX& a, long b); +void mul(ZZ_pEX & x, const ZZ_pEX& a, const ZZ_p& b); +void mul(ZZ_pEX & x, const ZZ_pEX& a, const ZZ_pE& b); + +inline void mul(ZZ_pEX& x, long a, const ZZ_pEX& b) + { mul(x, b, a); } +inline void mul(ZZ_pEX& x, const ZZ_p& a, const ZZ_pEX& b) + { mul(x, b, a); } +inline void mul(ZZ_pEX& x, const ZZ_pE& a, const ZZ_pEX& b) + { mul(x, b, a); } + +void MulTrunc(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, long n); +inline ZZ_pEX MulTrunc(const ZZ_pEX& a, const ZZ_pEX& b, long n) + { ZZ_pEX x; MulTrunc(x, a, b, n); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = a * b % X^n + +void SqrTrunc(ZZ_pEX& x, const ZZ_pEX& a, long n); +inline ZZ_pEX SqrTrunc(const ZZ_pEX& a, long n) + { ZZ_pEX x; SqrTrunc(x, a, n); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = a*a % X^n + + +inline ZZ_pEX operator*(const ZZ_pEX& a, const ZZ_pEX& b) + { ZZ_pEX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator*(const ZZ_pEX& a, const ZZ_pE& b) + { ZZ_pEX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator*(const ZZ_pEX& a, const ZZ_p& b) + { ZZ_pEX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator*(const ZZ_pEX& a, long b) + { ZZ_pEX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator*(const ZZ_pE& a, const ZZ_pEX& b) + { ZZ_pEX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator*(const ZZ_p& a, const ZZ_pEX& b) + { ZZ_pEX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator*(long a, const ZZ_pEX& b) + { ZZ_pEX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_pEX& b) + { mul(x, x, b); return x; } + +inline ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_pE& b) + { mul(x, x, b); return x; } + +inline ZZ_pEX& operator*=(ZZ_pEX& x, const ZZ_p& b) + { mul(x, x, b); return x; } + +inline ZZ_pEX& operator*=(ZZ_pEX& x, long b) + { mul(x, x, b); return x; } + + +void power(ZZ_pEX& x, const ZZ_pEX& a, long e); +inline ZZ_pEX power(const ZZ_pEX& a, long e) + { ZZ_pEX x; power(x, a, e); NTL_OPT_RETURN(ZZ_pEX, x); } + + + + + +/************************************************************* + + Division + +**************************************************************/ + +void DivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b); +// q = a/b, r = a%b + +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b); +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pE& b); +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_p& b); +void div(ZZ_pEX& q, const ZZ_pEX& a, long b); +// q = a/b + +void rem(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b); +// r = a%b + +long divide(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const ZZ_pEX& a, const ZZ_pEX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +void InvTrunc(ZZ_pEX& x, const ZZ_pEX& a, long m); +inline ZZ_pEX InvTrunc(const ZZ_pEX& a, long m) + { ZZ_pEX x; InvTrunc(x, a, m); NTL_OPT_RETURN(ZZ_pEX, x); } +// computes x = a^{-1} % X^m +// constant term must be invertible + + +inline ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pEX& b) + { ZZ_pEX x; div(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pE& b) + { ZZ_pEX x; div(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_p& b) + { ZZ_pEX x; div(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX operator/(const ZZ_pEX& a, long b) + { ZZ_pEX x; div(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pEX& b) + { div(x, x, b); return x; } + +inline ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pE& b) + { div(x, x, b); return x; } + +inline ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_p& b) + { div(x, x, b); return x; } + +inline ZZ_pEX& operator/=(ZZ_pEX& x, long b) + { div(x, x, b); return x; } + + +inline ZZ_pEX operator%(const ZZ_pEX& a, const ZZ_pEX& b) + { ZZ_pEX x; rem(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX& operator%=(ZZ_pEX& x, const ZZ_pEX& b) + { rem(x, x, b); return x; } + + + +/*********************************************************** + + GCD's + +************************************************************/ + + +void GCD(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b); +inline ZZ_pEX GCD(const ZZ_pEX& a, const ZZ_pEX& b) + { ZZ_pEX x; GCD(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = GCD(a, b), x is always monic (or zero if a==b==0). + +void XGCD(ZZ_pEX& d, ZZ_pEX& s, ZZ_pEX& t, const ZZ_pEX& a, const ZZ_pEX& b); +// d = gcd(a,b), a s + b t = d + + +/************************************************************* + + Modular Arithmetic without pre-conditioning + +**************************************************************/ + +// arithmetic mod f. +// all inputs and outputs are polynomials of degree less than deg(f). +// ASSUMPTION: f is assumed monic, and deg(f) > 0. +// NOTE: if you want to do many computations with a fixed f, +// use the ZZ_pEXModulus data structure and associated routines below. + + + +void MulMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEX& f); +inline ZZ_pEX MulMod(const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEX& f) + { ZZ_pEX x; MulMod(x, a, b, f); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = (a * b) % f + +void SqrMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f); +inline ZZ_pEX SqrMod(const ZZ_pEX& a, const ZZ_pEX& f) + { ZZ_pEX x; SqrMod(x, a, f); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = a^2 % f + +void MulByXMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f); +inline ZZ_pEX MulByXMod(const ZZ_pEX& a, const ZZ_pEX& f) + { ZZ_pEX x; MulByXMod(x, a, f); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = (a * X) mod f + +void InvMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f); +inline ZZ_pEX InvMod(const ZZ_pEX& a, const ZZ_pEX& f) + { ZZ_pEX x; InvMod(x, a, f); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f +// otherwise, returns 1 and sets x = (a, f) + + + + + +/****************************************************************** + + Modular Arithmetic with Pre-conditioning + +*******************************************************************/ + + +// If you need to do a lot of arithmetic modulo a fixed f, +// build ZZ_pEXModulus F for f. This pre-computes information about f +// that speeds up the computation a great deal. + +class ZZ_pEXModulus { +public: + ZZ_pEXModulus(); + ~ZZ_pEXModulus(); + + ZZ_pEXModulus(const ZZ_pEX& ff); + + ZZ_pEX f; // the modulus + + operator const ZZ_pEX& () const { return f; } + const ZZ_pEX& val() const { return f; } + + long n; // deg(f) + + long method; + + ZZ_pEX h0; + ZZ_pE hlc; + ZZ_pEX f0; + + OptionalVal< Lazy > tracevec; + // extra level of indirection to ensure relocatability + +}; + + + +inline long deg(const ZZ_pEXModulus& F) { return F.n; } + + +void build(ZZ_pEXModulus& F, const ZZ_pEX& f); + +void rem(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEXModulus& F); + +void DivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEXModulus& F); + +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEXModulus& F); + +void MulMod(ZZ_pEX& c, const ZZ_pEX& a, const ZZ_pEX& b, + const ZZ_pEXModulus& F); +inline ZZ_pEX MulMod(const ZZ_pEX& a, const ZZ_pEX& b, + const ZZ_pEXModulus& F) + { ZZ_pEX x; MulMod(x, a, b, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +void SqrMod(ZZ_pEX& c, const ZZ_pEX& a, const ZZ_pEXModulus& F); +inline ZZ_pEX SqrMod(const ZZ_pEX& a, const ZZ_pEXModulus& F) + { ZZ_pEX x; SqrMod(x, a, F); NTL_OPT_RETURN(ZZ_pEX, x); } + + +void PowerMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ& e, const ZZ_pEXModulus& F); + +inline void PowerMod(ZZ_pEX& h, const ZZ_pEX& g, long e, + const ZZ_pEXModulus& F) + { PowerMod(h, g, ZZ_expo(e), F); } + +inline ZZ_pEX PowerMod(const ZZ_pEX& g, const ZZ& e, + const ZZ_pEXModulus& F) + { ZZ_pEX x; PowerMod(x, g, e, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX PowerMod(const ZZ_pEX& g, long e, const ZZ_pEXModulus& F) + { ZZ_pEX x; PowerMod(x, g, e, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +void PowerXMod(ZZ_pEX& hh, const ZZ& e, const ZZ_pEXModulus& F); + +inline void PowerXMod(ZZ_pEX& h, long e, const ZZ_pEXModulus& F) + { PowerXMod(h, ZZ_expo(e), F); } + + +inline ZZ_pEX PowerXMod(const ZZ& e, const ZZ_pEXModulus& F) + { ZZ_pEX x; PowerXMod(x, e, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX PowerXMod(long e, const ZZ_pEXModulus& F) + { ZZ_pEX x; PowerXMod(x, e, F); NTL_OPT_RETURN(ZZ_pEX, x); } + + +inline ZZ_pEX operator%(const ZZ_pEX& a, const ZZ_pEXModulus& F) + { ZZ_pEX x; rem(x, a, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX& operator%=(ZZ_pEX& x, const ZZ_pEXModulus& F) + { rem(x, x, F); return x; } + +inline ZZ_pEX operator/(const ZZ_pEX& a, const ZZ_pEXModulus& F) + { ZZ_pEX x; div(x, a, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +inline ZZ_pEX& operator/=(ZZ_pEX& x, const ZZ_pEXModulus& F) + { div(x, x, F); return x; } + + + +/***************************************************************** + + vectors of ZZ_pEX's + +*****************************************************************/ + + + +typedef Vec vec_ZZ_pEX; + + + + + +/******************************************************* + + Evaluation and related problems + +********************************************************/ + + + + +void BuildFromRoots(ZZ_pEX& x, const vec_ZZ_pE& a); +inline ZZ_pEX BuildFromRoots(const vec_ZZ_pE& a) + { ZZ_pEX x; BuildFromRoots(x, a); NTL_OPT_RETURN(ZZ_pEX, x); } +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + + +void eval(ZZ_pE& b, const ZZ_pEX& f, const ZZ_pE& a); +inline ZZ_pE eval(const ZZ_pEX& f, const ZZ_pE& a) + { ZZ_pE x; eval(x, f, a); NTL_OPT_RETURN(ZZ_pE, x); } +// b = f(a) + +void eval(vec_ZZ_pE& b, const ZZ_pEX& f, const vec_ZZ_pE& a); +inline vec_ZZ_pE eval(const ZZ_pEX& f, const vec_ZZ_pE& a) + { vec_ZZ_pE x; eval(x, f, a); NTL_OPT_RETURN(vec_ZZ_pE, x); } +// b[i] = f(a[i]) + +inline void eval(ZZ_pE& b, const ZZ_pX& f, const ZZ_pE& a) + { conv(b, CompMod(f, rep(a), ZZ_pE::modulus())); } + +inline ZZ_pE eval(const ZZ_pX& f, const ZZ_pE& a) + { ZZ_pE x; eval(x, f, a); NTL_OPT_RETURN(ZZ_pE, x); } +// b = f(a) + + +void interpolate(ZZ_pEX& f, const vec_ZZ_pE& a, const vec_ZZ_pE& b); +inline ZZ_pEX interpolate(const vec_ZZ_pE& a, const vec_ZZ_pE& b) + { ZZ_pEX x; interpolate(x, a, b); NTL_OPT_RETURN(ZZ_pEX, x); } +// computes f such that f(a[i]) = b[i] + + + + + +/********************************************************** + + Modular Composition and Minimal Polynomials + +***********************************************************/ + + + +void CompMod(ZZ_pEX& x, const ZZ_pEX& g, const ZZ_pEX& h, const ZZ_pEXModulus& F); +inline ZZ_pEX +CompMod(const ZZ_pEX& g, const ZZ_pEX& h, const ZZ_pEXModulus& F) + { ZZ_pEX x; CompMod(x, g, h, F); NTL_OPT_RETURN(ZZ_pEX, x); } +// x = g(h) mod f + +void Comp2Mod(ZZ_pEX& x1, ZZ_pEX& x2, const ZZ_pEX& g1, const ZZ_pEX& g2, + const ZZ_pEX& h, const ZZ_pEXModulus& F); +// xi = gi(h) mod f (i=1,2) + +void Comp3Mod(ZZ_pEX& x1, ZZ_pEX& x2, ZZ_pEX& x3, + const ZZ_pEX& g1, const ZZ_pEX& g2, const ZZ_pEX& g3, + const ZZ_pEX& h, const ZZ_pEXModulus& F); +// xi = gi(h) mod f (i=1..3) + + + +// The routine build (see below) which is implicitly called +// by the various compose and UpdateMap routines builds a table +// of polynomials. +// If ZZ_pEXArgBound > 0, then the table is limited in +// size to approximamtely that many KB. +// If ZZ_pEXArgBound <= 0, then it is ignored, and space is allocated +// so as to maximize speed. +// Initially, ZZ_pEXArgBound = 0. + + +// If a single h is going to be used with many g's +// then you should build a ZZ_pEXArgument for h, +// and then use the compose routine below. +// build computes and stores h, h^2, ..., h^m mod f. +// After this pre-computation, composing a polynomial of degree +// roughly n with h takes n/m multiplies mod f, plus n^2 +// scalar multiplies. +// Thus, increasing m increases the space requirement and the pre-computation +// time, but reduces the composition time. +// If ZZ_pEXArgBound > 0, a table of size less than m may be built. + +struct ZZ_pEXArgument { + vec_ZZ_pEX H; +}; + +extern NTL_CHEAP_THREAD_LOCAL long ZZ_pEXArgBound; + + +void build(ZZ_pEXArgument& H, const ZZ_pEX& h, const ZZ_pEXModulus& F, long m); + +// m must be > 0, otherwise an error is raised + +void CompMod(ZZ_pEX& x, const ZZ_pEX& g, const ZZ_pEXArgument& H, + const ZZ_pEXModulus& F); + +inline ZZ_pEX +CompMod(const ZZ_pEX& g, const ZZ_pEXArgument& H, const ZZ_pEXModulus& F) + { ZZ_pEX x; CompMod(x, g, H, F); NTL_OPT_RETURN(ZZ_pEX, x); } + + + + +void MinPolySeq(ZZ_pEX& h, const vec_ZZ_pE& a, long m); +inline ZZ_pEX MinPolySeq(const vec_ZZ_pE& a, long m) + { ZZ_pEX x; MinPolySeq(x, a, m); NTL_OPT_RETURN(ZZ_pEX, x); } + + +void MinPolyMod(ZZ_pEX& hh, const ZZ_pEX& g, const ZZ_pEXModulus& F); +inline ZZ_pEX MinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F) + { ZZ_pEX x; MinPolyMod(x, g, F); NTL_OPT_RETURN(ZZ_pEX, x); } + + +void MinPolyMod(ZZ_pEX& hh, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); +inline ZZ_pEX MinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m) + { ZZ_pEX x; MinPolyMod(x, g, F, m); NTL_OPT_RETURN(ZZ_pEX, x); } + +void ProbMinPolyMod(ZZ_pEX& hh, const ZZ_pEX& g, const ZZ_pEXModulus& F); +inline ZZ_pEX ProbMinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F) + { ZZ_pEX x; ProbMinPolyMod(x, g, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +void ProbMinPolyMod(ZZ_pEX& hh, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); +inline ZZ_pEX ProbMinPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m) + { ZZ_pEX x; ProbMinPolyMod(x, g, F, m); NTL_OPT_RETURN(ZZ_pEX, x); } + +void IrredPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F); +inline ZZ_pEX IrredPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F) + { ZZ_pEX x; IrredPolyMod(x, g, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +void IrredPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m); +inline ZZ_pEX IrredPolyMod(const ZZ_pEX& g, const ZZ_pEXModulus& F, long m) + { ZZ_pEX x; IrredPolyMod(x, g, F, m); NTL_OPT_RETURN(ZZ_pEX, x); } + + +struct ZZ_pEXTransMultiplier { + ZZ_pEX f0, fbi, b; + long shamt, shamt_fbi, shamt_b; +}; + +void build(ZZ_pEXTransMultiplier& B, const ZZ_pEX& b, const ZZ_pEXModulus& F); + +void TransMulMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEXTransMultiplier& B, + const ZZ_pEXModulus& F); + +void UpdateMap(vec_ZZ_pE& x, const vec_ZZ_pE& a, + const ZZ_pEXTransMultiplier& B, const ZZ_pEXModulus& F); + +inline vec_ZZ_pE UpdateMap(const vec_ZZ_pE& a, + const ZZ_pEXTransMultiplier& B, const ZZ_pEXModulus& F) + { vec_ZZ_pE x; UpdateMap(x, a, B, F); NTL_OPT_RETURN(vec_ZZ_pE, x); } + +void ProjectPowers(vec_ZZ_pE& x, const vec_ZZ_pE& a, long k, + const ZZ_pEXArgument& H, const ZZ_pEXModulus& F); +inline vec_ZZ_pE ProjectPowers(const vec_ZZ_pE& a, long k, + const ZZ_pEXArgument& H, const ZZ_pEXModulus& F) + { vec_ZZ_pE x; ProjectPowers(x, a, k, H, F); NTL_OPT_RETURN(vec_ZZ_pE, x); } + +void ProjectPowers(vec_ZZ_pE& x, const vec_ZZ_pE& a, long k, const ZZ_pEX& h, + const ZZ_pEXModulus& F); +inline vec_ZZ_pE ProjectPowers(const vec_ZZ_pE& a, long k, + const ZZ_pEX& H, const ZZ_pEXModulus& F) + { vec_ZZ_pE x; ProjectPowers(x, a, k, H, F); NTL_OPT_RETURN(vec_ZZ_pE, x); } + +inline void project(ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_pEX& b) + { InnerProduct(x, a, b.rep); } + +inline ZZ_pE project(const vec_ZZ_pE& a, const ZZ_pEX& b) + { ZZ_pE x; InnerProduct(x, a, b.rep); NTL_OPT_RETURN(ZZ_pE, x); } + + + +/***************************************************************** + + modular composition and minimal polynonomials + in towers + +******************************************************************/ + + +// composition + +void CompTower(ZZ_pEX& x, const ZZ_pX& g, const ZZ_pEXArgument& A, + const ZZ_pEXModulus& F); + +inline ZZ_pEX CompTower(const ZZ_pX& g, const ZZ_pEXArgument& A, + const ZZ_pEXModulus& F) + { ZZ_pEX x; CompTower(x, g, A, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +void CompTower(ZZ_pEX& x, const ZZ_pX& g, const ZZ_pEX& h, + const ZZ_pEXModulus& F); + +inline ZZ_pEX CompTower(const ZZ_pX& g, const ZZ_pEX& h, + const ZZ_pEXModulus& F) + { ZZ_pEX x; CompTower(x, g, h, F); NTL_OPT_RETURN(ZZ_pEX, x); } + +// prob min poly + +void ProbMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m); + +inline ZZ_pX ProbMinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m) + { ZZ_pX x; ProbMinPolyTower(x, g, F, m); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void ProbMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, + const ZZ_pEXModulus& F) + { ProbMinPolyTower(h, g, F, deg(F)*ZZ_pE::degree()); } + +inline ZZ_pX ProbMinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F) + { ZZ_pX x; ProbMinPolyTower(x, g, F); NTL_OPT_RETURN(ZZ_pX, x); } + + +// min poly + + +void MinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m); + +inline ZZ_pX MinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m) + { ZZ_pX x; MinPolyTower(x, g, F, m); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void MinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F) + { MinPolyTower(h, g, F, deg(F)*ZZ_pE::degree()); } + + +inline ZZ_pX MinPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F) + { ZZ_pX x; MinPolyTower(x, g, F); NTL_OPT_RETURN(ZZ_pX, x); } + +// irred poly + + +void IrredPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m); + +inline ZZ_pX IrredPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m) + { ZZ_pX x; IrredPolyTower(x, g, F, m); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void IrredPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F) + { IrredPolyTower(h, g, F, deg(F)*ZZ_pE::degree()); } + + +inline ZZ_pX IrredPolyTower(const ZZ_pEX& g, const ZZ_pEXModulus& F) + { ZZ_pX x; IrredPolyTower(x, g, F); NTL_OPT_RETURN(ZZ_pX, x); } + +/***************************************************************** + + Traces, norms, resultants + +******************************************************************/ + +void TraceVec(vec_ZZ_pE& S, const ZZ_pEX& f); + +inline vec_ZZ_pE TraceVec(const ZZ_pEX& f) + { vec_ZZ_pE x; TraceVec(x, f); NTL_OPT_RETURN(vec_ZZ_pE, x); } + + +void TraceMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEXModulus& F); + +inline ZZ_pE TraceMod(const ZZ_pEX& a, const ZZ_pEXModulus& F) + { ZZ_pE x; TraceMod(x, a, F); NTL_OPT_RETURN(ZZ_pE, x); } + +void TraceMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& f); + +inline ZZ_pE TraceMod(const ZZ_pEX& a, const ZZ_pEX& f) + { ZZ_pE x; TraceMod(x, a, f); NTL_OPT_RETURN(ZZ_pE, x); } + + + + + +void NormMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& f); + +inline ZZ_pE NormMod(const ZZ_pEX& a, const ZZ_pEX& f) + { ZZ_pE x; NormMod(x, a, f); NTL_OPT_RETURN(ZZ_pE, x); } + +void resultant(ZZ_pE& rres, const ZZ_pEX& a, const ZZ_pEX& b); + +inline ZZ_pE resultant(const ZZ_pEX& a, const ZZ_pEX& b) + { ZZ_pE x; resultant(x, a, b); NTL_OPT_RETURN(ZZ_pE, x); } + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZ_pEXFactoring.h b/thirdparty/linux/ntl/include/NTL/ZZ_pEXFactoring.h new file mode 100644 index 0000000000..6eabd42efc --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZ_pEXFactoring.h @@ -0,0 +1,192 @@ + +#ifndef NTL_ZZ_pEXFactoring__H +#define NTL_ZZ_pEXFactoring__H + +#include + +NTL_OPEN_NNS + + +void SquareFreeDecomp(vec_pair_ZZ_pEX_long& u, const ZZ_pEX& f); +inline vec_pair_ZZ_pEX_long SquareFreeDecomp(const ZZ_pEX& f) + { vec_pair_ZZ_pEX_long x; SquareFreeDecomp(x, f); return x; } + + +// Performs square-free decomposition. +// f must be monic. +// If f = prod_i g_i^i, then u is set to a lest of pairs (g_i, i). +// The list is is increasing order of i, with trivial terms +// (i.e., g_i = 1) deleted. + + +void FindRoots(vec_ZZ_pE& x, const ZZ_pEX& f); +inline vec_ZZ_pE FindRoots(const ZZ_pEX& f) + { vec_ZZ_pE x; FindRoots(x, f); return x; } + +// f is monic, and has deg(f) distinct roots. +// returns the list of roots + + +void FindRoot(ZZ_pE& root, const ZZ_pEX& f); +inline ZZ_pE FindRoot(const ZZ_pEX& f) + { ZZ_pE x; FindRoot(x, f); return x; } + + +// finds a single root of f. +// assumes that f is monic and splits into distinct linear factors + + +extern +NTL_CHEAP_THREAD_LOCAL +long ZZ_pEX_GCDTableSize; /* = 4 */ +// Controls GCD blocking for NewDDF + +extern +NTL_CHEAP_THREAD_LOCAL +double ZZ_pEXFileThresh; +// of these tables exceeds ZZ_pEXFileThresh KB. + + + +void NewDDF(vec_pair_ZZ_pEX_long& factors, + const ZZ_pEX& f, const ZZ_pEX& h, long verbose=0); +inline vec_pair_ZZ_pEX_long NewDDF(const ZZ_pEX& f, const ZZ_pEX& h, + long verbose=0) + { vec_pair_ZZ_pEX_long x; NewDDF(x, f, h, verbose); return x; } + + + + + +void EDF(vec_ZZ_pEX& factors, const ZZ_pEX& f, const ZZ_pEX& b, + long d, long verbose=0); +inline vec_ZZ_pEX EDF(const ZZ_pEX& f, const ZZ_pEX& b, + long d, long verbose=0) + { vec_ZZ_pEX x; EDF(x, f, b, d, verbose); return x; } + + +// Performs equal-degree factorization. +// f is monic, square-free, and all irreducible factors have same degree. +// b = X^p mod f. +// d = degree of irreducible factors of f +// Space for the trace-map computation can be controlled via ComposeBound. + + + +void RootEDF(vec_ZZ_pEX& factors, const ZZ_pEX& f, long verbose=0); +inline vec_ZZ_pEX RootEDF(const ZZ_pEX& f, long verbose=0) + { vec_ZZ_pEX x; RootEDF(x, f, verbose); return x; } + + +// EDF for d==1 + +void SFCanZass(vec_ZZ_pEX& factors, const ZZ_pEX& f, long verbose=0); +inline vec_ZZ_pEX SFCanZass(const ZZ_pEX& f, long verbose=0) + { vec_ZZ_pEX x; SFCanZass(x, f, verbose); return x; } + + +// Assumes f is monic and square-free. +// returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach. + + + +void CanZass(vec_pair_ZZ_pEX_long& factors, const ZZ_pEX& f, + long verbose=0); +inline vec_pair_ZZ_pEX_long CanZass(const ZZ_pEX& f, long verbose=0) + { vec_pair_ZZ_pEX_long x; CanZass(x, f, verbose); return x; } + + +// returns a list of factors, with multiplicities. +// f must be monic. +// Uses "Cantor/Zassenhaus" approach. + + +void mul(ZZ_pEX& f, const vec_pair_ZZ_pEX_long& v); +inline ZZ_pEX mul(const vec_pair_ZZ_pEX_long& v) + { ZZ_pEX x; mul(x, v); return x; } + + +// multiplies polynomials, with multiplicities + + +/************************************************************* + + irreducible poly's: tests and constructions + +**************************************************************/ + +long ProbIrredTest(const ZZ_pEX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test +// the test can err only if f is reducible, and the +// error probability is bounded by p^{-iter}. + +long DetIrredTest(const ZZ_pEX& f); + +// performs a recursive deterministic irreducibility test +// fast in the worst-case (when input is irreducible). + +long IterIrredTest(const ZZ_pEX& f); + +// performs an iterative deterministic irreducibility test, +// based on DDF. Fast on average (when f has a small factor). + +void BuildIrred(ZZ_pEX& f, long n); +inline ZZ_pEX BuildIrred_ZZ_pEX(long n) + { ZZ_pEX x; BuildIrred(x, n); NTL_OPT_RETURN(ZZ_pEX, x); } + + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(ZZ_pEX& f, const ZZ_pEX& g); +inline ZZ_pEX BuildRandomIrred(const ZZ_pEX& g) + { ZZ_pEX x; BuildRandomIrred(x, g); NTL_OPT_RETURN(ZZ_pEX, x); } + + +// g is a monic irreducible polynomial. +// constructs a random monic irreducible polynomial f of the same degree. + + +long RecComputeDegree(const ZZ_pEX& h, const ZZ_pEXModulus& F); + +// f = F.f is assumed to be an "equal degree" polynomial +// h = X^p mod f +// the common degree of the irreducible factors of f is computed +// This routine is useful in counting points on elliptic curves + + +long IterComputeDegree(const ZZ_pEX& h, const ZZ_pEXModulus& F); + + +void TraceMap(ZZ_pEX& w, const ZZ_pEX& a, long d, const ZZ_pEXModulus& F, + const ZZ_pEX& b); + +inline ZZ_pEX TraceMap(const ZZ_pEX& a, long d, const ZZ_pEXModulus& F, + const ZZ_pEX& b) + { ZZ_pEX x; TraceMap(x, a, d, F, b); return x; } + + +// w = a+a^q+...+^{q^{d-1}} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see ) + + + +void PowerCompose(ZZ_pEX& w, const ZZ_pEX& a, long d, const ZZ_pEXModulus& F); + +inline ZZ_pEX PowerCompose(const ZZ_pEX& a, long d, const ZZ_pEXModulus& F) + { ZZ_pEX x; PowerCompose(x, a, d, F); return x; } + + +// w = X^{q^d} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see ) + + + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZ_pX.h b/thirdparty/linux/ntl/include/NTL/ZZ_pX.h new file mode 100644 index 0000000000..445665ceae --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZ_pX.h @@ -0,0 +1,1292 @@ + + +#ifndef NTL_ZZ_pX__H +#define NTL_ZZ_pX__H + +#include +#include +#include +#include +#include +#include +#include + +NTL_OPEN_NNS + + + + +// some cross-over points +// macros are used so as to be consistent with zz_pX + +#define NTL_ZZ_pX_FFT_CROSSOVER (20) +#define NTL_ZZ_pX_NEWTON_CROSSOVER (45) +#define NTL_ZZ_pX_DIV_CROSSOVER (90) +#define NTL_ZZ_pX_HalfGCD_CROSSOVER (25) +#define NTL_ZZ_pX_GCD_CROSSOVER (180) +#define NTL_ZZ_pX_BERMASS_CROSSOVER (90) +#define NTL_ZZ_pX_TRACE_CROSSOVER (90) + + + +/************************************************************ + + ZZ_pX + +The class ZZ_pX implements polynomial arithmetic modulo p. +Polynomials are represented as vec_ZZ_p's. +If f is a ZZ_pX, then f.rep is a vec_ZZ_p. +The zero polynomial is represented as a zero length vector. +Otherwise. f.rep[0] is the constant-term, and f.rep[f.rep.length()-1] +is the leading coefficient, which is always non-zero. +The member f.rep is public, so the vector representation is fully +accessible. +Use the member function normalize() to strip leading zeros. + +**************************************************************/ + +class ZZ_pE; // forward declaration +class ZZ_pXModulus; +class FFTRep; +class ZZ_pXMultiplier; + +class ZZ_pX { +public: +typedef ZZ_p coeff_type; +typedef ZZ_pE residue_type; +typedef ZZ_pXModulus modulus_type; +typedef ZZ_pXMultiplier multiplier_type; +typedef FFTRep fft_type; + +typedef vec_ZZ_p VectorBaseType; + + +vec_ZZ_p rep; + + +/*************************************************************** + + Constructors, Destructors, and Assignment + +****************************************************************/ + + +ZZ_pX() { } +// initial value 0 + + +explicit ZZ_pX(long a) { *this = a; } +explicit ZZ_pX(const ZZ_p& a) { *this = a; } + + +ZZ_pX(INIT_SIZE_TYPE, long n) { rep.SetMaxLength(n); } + +ZZ_pX(const ZZ_pX& a) : rep(a.rep) { } +// initial value is a + + +ZZ_pX& operator=(const ZZ_pX& a) + { rep = a.rep; return *this; } + +~ZZ_pX() { } + +void normalize(); +// strip leading zeros + +void SetMaxLength(long n) +// pre-allocate space for n coefficients. +// Value is unchanged + + { rep.SetMaxLength(n); } + + +void kill() +// free space held by this polynomial. Value becomes 0. + + { rep.kill(); } + + +void SetLength(long n) { rep.SetLength(n); } +ZZ_p& operator[](long i) { return rep[i]; } +const ZZ_p& operator[](long i) const { return rep[i]; } + + +static const ZZ_pX& zero(); + + +ZZ_pX(ZZ_pX& x, INIT_TRANS_TYPE) : rep(x.rep, INIT_TRANS) { } + +inline ZZ_pX(long i, const ZZ_p& c); +inline ZZ_pX(long i, long c); + +inline ZZ_pX(INIT_MONO_TYPE, long i, const ZZ_p& c); +inline ZZ_pX(INIT_MONO_TYPE, long i, long c); +inline ZZ_pX(INIT_MONO_TYPE, long i); + +ZZ_pX& operator=(long a); +ZZ_pX& operator=(const ZZ_p& a); + + +void swap(ZZ_pX& x) +{ + rep.swap(x.rep); +} + + +}; + + + + +/******************************************************************** + + input and output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be integers between 0 and p-1, +amd a_n not zero (the zero polynomial is [ ]). +On input, the coefficients are arbitrary integers which are +then reduced modulo p, and leading zeros stripped. + +*********************************************************************/ + + +NTL_SNS istream& operator>>(NTL_SNS istream& s, ZZ_pX& x); +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const ZZ_pX& a); + + + + +/********************************************************** + + Some utility routines + +***********************************************************/ + + +inline long deg(const ZZ_pX& a) { return a.rep.length() - 1; } +// degree of a polynomial. +// note that the zero polynomial has degree -1. + +const ZZ_p& coeff(const ZZ_pX& a, long i); +// zero if i not in range + +void GetCoeff(ZZ_p& x, const ZZ_pX& a, long i); +// x = a[i], or zero if i not in range + +const ZZ_p& LeadCoeff(const ZZ_pX& a); +// zero if a == 0 + +const ZZ_p& ConstTerm(const ZZ_pX& a); +// zero if a == 0 + +void SetCoeff(ZZ_pX& x, long i, const ZZ_p& a); +// x[i] = a, error is raised if i < 0 + +void SetCoeff(ZZ_pX& x, long i, long a); + +void SetCoeff(ZZ_pX& x, long i); +// x[i] = 1, error is raised if i < 0 + +inline ZZ_pX::ZZ_pX(long i, const ZZ_p& a) { SetCoeff(*this, i, a); } +inline ZZ_pX::ZZ_pX(long i, long a) { SetCoeff(*this, i, a); } + +inline ZZ_pX::ZZ_pX(INIT_MONO_TYPE, long i, const ZZ_p& a) { SetCoeff(*this, i, a); } +inline ZZ_pX::ZZ_pX(INIT_MONO_TYPE, long i, long a) { SetCoeff(*this, i, a); } +inline ZZ_pX::ZZ_pX(INIT_MONO_TYPE, long i) { SetCoeff(*this, i); } + +void SetX(ZZ_pX& x); +// x is set to the monomial X + +long IsX(const ZZ_pX& a); +// test if a = X + +inline void clear(ZZ_pX& x) +// x = 0 + + { x.rep.SetLength(0); } + +inline void set(ZZ_pX& x) +// x = 1 + + { x.rep.SetLength(1); set(x.rep[0]); } + +inline void swap(ZZ_pX& x, ZZ_pX& y) +// swap x & y (only pointers are swapped) + + { x.swap(y); } + +void random(ZZ_pX& x, long n); +inline ZZ_pX random_ZZ_pX(long n) + { ZZ_pX x; random(x, n); NTL_OPT_RETURN(ZZ_pX, x); } +// generate a random polynomial of degree < n + +void trunc(ZZ_pX& x, const ZZ_pX& a, long m); +// x = a % X^m + +inline ZZ_pX trunc(const ZZ_pX& a, long m) + { ZZ_pX x; trunc(x, a, m); NTL_OPT_RETURN(ZZ_pX, x); } + +void RightShift(ZZ_pX& x, const ZZ_pX& a, long n); +// x = a/X^n + +inline ZZ_pX RightShift(const ZZ_pX& a, long n) + { ZZ_pX x; RightShift(x, a, n); NTL_OPT_RETURN(ZZ_pX, x); } + +void LeftShift(ZZ_pX& x, const ZZ_pX& a, long n); +// x = a*X^n + +inline ZZ_pX LeftShift(const ZZ_pX& a, long n) + { ZZ_pX x; LeftShift(x, a, n); NTL_OPT_RETURN(ZZ_pX, x); } + +#ifndef NTL_TRANSITION + +inline ZZ_pX operator>>(const ZZ_pX& a, long n) + { ZZ_pX x; RightShift(x, a, n); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator<<(const ZZ_pX& a, long n) + { ZZ_pX x; LeftShift(x, a, n); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX& operator<<=(ZZ_pX& x, long n) + { LeftShift(x, x, n); return x; } + +inline ZZ_pX& operator>>=(ZZ_pX& x, long n) + { RightShift(x, x, n); return x; } + +#endif + + + +void diff(ZZ_pX& x, const ZZ_pX& a); +// x = derivative of a + +inline ZZ_pX diff(const ZZ_pX& a) + { ZZ_pX x; diff(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + + +void MakeMonic(ZZ_pX& x); + +void reverse(ZZ_pX& c, const ZZ_pX& a, long hi); + +inline ZZ_pX reverse(const ZZ_pX& a, long hi) + { ZZ_pX x; reverse(x, a, hi); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void reverse(ZZ_pX& c, const ZZ_pX& a) +{ reverse(c, a, deg(a)); } + +inline ZZ_pX reverse(const ZZ_pX& a) + { ZZ_pX x; reverse(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void VectorCopy(vec_ZZ_p& x, const ZZ_pX& a, long n) + { VectorCopy(x, a.rep, n); } + +inline vec_ZZ_p VectorCopy(const ZZ_pX& a, long n) + { return VectorCopy(a.rep, n); } + + + + +/******************************************************************* + + conversion routines + +********************************************************************/ + + + +void conv(ZZ_pX& x, long a); +void conv(ZZ_pX& x, const ZZ& a); +void conv(ZZ_pX& x, const ZZ_p& a); +void conv(ZZ_pX& x, const vec_ZZ_p& a); + +inline ZZ_pX to_ZZ_pX(long a) + { ZZ_pX x; conv(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX to_ZZ_pX(const ZZ& a) + { ZZ_pX x; conv(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX to_ZZ_pX(const ZZ_p& a) + { ZZ_pX x; conv(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX to_ZZ_pX(const vec_ZZ_p& a) + { ZZ_pX x; conv(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + + + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(ZZ_pX& x, const ZZ_pX& a) + { x = a; } + +inline void conv(vec_ZZ_p& x, const ZZ_pX& a) + { x = a.rep; } + + +/* ------------------------------------- */ + + + +/************************************************************* + + Comparison + +**************************************************************/ + +long IsZero(const ZZ_pX& a); + +long IsOne(const ZZ_pX& a); + +inline long operator==(const ZZ_pX& a, const ZZ_pX& b) +{ + return a.rep == b.rep; +} + +inline long operator!=(const ZZ_pX& a, const ZZ_pX& b) +{ + return !(a == b); +} + +long operator==(const ZZ_pX& a, long b); +long operator==(const ZZ_pX& a, const ZZ_p& b); + +inline long operator==(long a, const ZZ_pX& b) { return b == a; } +inline long operator==(const ZZ_p& a, const ZZ_pX& b) { return b == a; } + +inline long operator!=(const ZZ_pX& a, long b) { return !(a == b); } +inline long operator!=(const ZZ_pX& a, const ZZ_p& b) { return !(a == b); } + +inline long operator!=(long a, const ZZ_pX& b) { return !(a == b); } +inline long operator!=(const ZZ_p& a, const ZZ_pX& b) { return !(a == b); } + + +/*************************************************************** + + Addition + +****************************************************************/ + +void add(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); +// x = a + b + +void sub(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); +// x = a - b + +void negate(ZZ_pX& x, const ZZ_pX& a); +// x = -a + +// scalar versions + +void add(ZZ_pX& x, const ZZ_pX& a, const ZZ_p& b); // x = a + b +void add(ZZ_pX& x, const ZZ_pX& a, long b); + +inline void add(ZZ_pX& x, const ZZ_p& a, const ZZ_pX& b) { add(x, b, a); } +inline void add(ZZ_pX& x, long a, const ZZ_pX& b) { add(x, b, a); } + + +void sub(ZZ_pX & x, const ZZ_pX& a, const ZZ_p& b); // x = a - b + +void sub(ZZ_pX& x, const ZZ_pX& a, long b); +void sub(ZZ_pX& x, const ZZ_pX& a, const ZZ_p& b); + +void sub(ZZ_pX& x, long a, const ZZ_pX& b); +void sub(ZZ_pX& x, const ZZ_p& a, const ZZ_pX& b); + +inline ZZ_pX operator+(const ZZ_pX& a, const ZZ_pX& b) + { ZZ_pX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator+(const ZZ_pX& a, const ZZ_p& b) + { ZZ_pX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator+(const ZZ_pX& a, long b) + { ZZ_pX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator+(const ZZ_p& a, const ZZ_pX& b) + { ZZ_pX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator+(long a, const ZZ_pX& b) + { ZZ_pX x; add(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + + +inline ZZ_pX operator-(const ZZ_pX& a, const ZZ_pX& b) + { ZZ_pX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator-(const ZZ_pX& a, const ZZ_p& b) + { ZZ_pX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator-(const ZZ_pX& a, long b) + { ZZ_pX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator-(const ZZ_p& a, const ZZ_pX& b) + { ZZ_pX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator-(long a, const ZZ_pX& b) + { ZZ_pX x; sub(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + + +inline ZZ_pX& operator+=(ZZ_pX& x, const ZZ_pX& b) + { add(x, x, b); return x; } + +inline ZZ_pX& operator+=(ZZ_pX& x, const ZZ_p& b) + { add(x, x, b); return x; } + +inline ZZ_pX& operator+=(ZZ_pX& x, long b) + { add(x, x, b); return x; } + +inline ZZ_pX& operator-=(ZZ_pX& x, const ZZ_pX& b) + { sub(x, x, b); return x; } + +inline ZZ_pX& operator-=(ZZ_pX& x, const ZZ_p& b) + { sub(x, x, b); return x; } + +inline ZZ_pX& operator-=(ZZ_pX& x, long b) + { sub(x, x, b); return x; } + + +inline ZZ_pX operator-(const ZZ_pX& a) + { ZZ_pX x; negate(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX& operator++(ZZ_pX& x) { add(x, x, 1); return x; } +inline void operator++(ZZ_pX& x, int) { add(x, x, 1); } +inline ZZ_pX& operator--(ZZ_pX& x) { sub(x, x, 1); return x; } +inline void operator--(ZZ_pX& x, int) { sub(x, x, 1); } + +/***************************************************************** + + Multiplication + +******************************************************************/ + + +void mul(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); +// x = a * b + +void sqr(ZZ_pX& x, const ZZ_pX& a); +inline ZZ_pX sqr(const ZZ_pX& a) + { ZZ_pX x; sqr(x, a); NTL_OPT_RETURN(ZZ_pX, x); } +// x = a^2 + +void mul(ZZ_pX & x, const ZZ_pX& a, const ZZ_p& b); +void mul(ZZ_pX& x, const ZZ_pX& a, long b); + + +inline void mul(ZZ_pX& x, const ZZ_p& a, const ZZ_pX& b) + { mul(x, b, a); } + +inline void mul(ZZ_pX& x, long a, const ZZ_pX& b) + { mul(x, b, a); } + +inline ZZ_pX operator*(const ZZ_pX& a, const ZZ_pX& b) + { ZZ_pX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator*(const ZZ_pX& a, const ZZ_p& b) + { ZZ_pX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator*(const ZZ_pX& a, long b) + { ZZ_pX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator*(const ZZ_p& a, const ZZ_pX& b) + { ZZ_pX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator*(long a, const ZZ_pX& b) + { ZZ_pX x; mul(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX& operator*=(ZZ_pX& x, const ZZ_pX& b) + { mul(x, x, b); return x; } + +inline ZZ_pX& operator*=(ZZ_pX& x, const ZZ_p& b) + { mul(x, x, b); return x; } + +inline ZZ_pX& operator*=(ZZ_pX& x, long b) + { mul(x, x, b); return x; } + + +void PlainMul(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); +// always uses the "classical" algorithm + +void PlainSqr(ZZ_pX& x, const ZZ_pX& a); +// always uses the "classical" algorithm + + +void FFTMul(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); +// always uses the FFT + +void FFTSqr(ZZ_pX& x, const ZZ_pX& a); +// always uses the FFT + +void MulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n); +// x = a * b % X^n + +inline ZZ_pX MulTrunc(const ZZ_pX& a, const ZZ_pX& b, long n) + { ZZ_pX x; MulTrunc(x, a, b, n); NTL_OPT_RETURN(ZZ_pX, x); } + +void PlainMulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n); +void FFTMulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n); + +void SqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n); +// x = a^2 % X^n + +inline ZZ_pX SqrTrunc(const ZZ_pX& a, long n) + { ZZ_pX x; SqrTrunc(x, a, n); NTL_OPT_RETURN(ZZ_pX, x); } + +void PlainSqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n); +void FFTSqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n); + + +void power(ZZ_pX& x, const ZZ_pX& a, long e); +inline ZZ_pX power(const ZZ_pX& a, long e) + { ZZ_pX x; power(x, a, e); NTL_OPT_RETURN(ZZ_pX, x); } + + +// The following data structures and routines allow one +// to hand-craft various algorithms, using the FFT convolution +// algorithms directly. +// Look in the file ZZ_pX.c for examples. + + + + +// FFT representation of polynomials + +class FFTRep { +public: + long k; // a 2^k point representation + long MaxK; // maximum space allocated + long NumPrimes; + Unique2DArray tbl; + + FFTRep() : k(-1), MaxK(-1), NumPrimes(0) { } + + FFTRep(const FFTRep& R) : k(-1), MaxK(-1), NumPrimes(0) + { *this = R; } + + FFTRep(INIT_SIZE_TYPE, long InitK) : k(-1), MaxK(-1), NumPrimes(0) + { SetSize(InitK); } + + FFTRep& operator=(const FFTRep& R); + void SetSize(long NewK); + void DoSetSize(long NewK, long NewNumPrimes); +}; + + +void ToFFTRep(FFTRep& y, const ZZ_pX& x, long k, long lo, long hi); +// computes an n = 2^k point convolution of x[lo..hi]. + +inline void ToFFTRep(FFTRep& y, const ZZ_pX& x, long k) + + { ToFFTRep(y, x, k, 0, deg(x)); } + +void RevToFFTRep(FFTRep& y, const vec_ZZ_p& x, + long k, long lo, long hi, long offset); +// computes an n = 2^k point convolution of X^offset*x[lo..hi] mod X^n-1 +// using "inverted" evaluation points. + + +void FromFFTRep(ZZ_pX& x, FFTRep& y, long lo, long hi); +// converts from FFT-representation to coefficient representation +// only the coefficients lo..hi are computed +// NOTE: this version destroys the data in y + +// non-destructive versions of the above + +void NDFromFFTRep(ZZ_pX& x, const FFTRep& y, long lo, long hi, FFTRep& temp); +void NDFromFFTRep(ZZ_pX& x, const FFTRep& y, long lo, long hi); + +void RevFromFFTRep(vec_ZZ_p& x, FFTRep& y, long lo, long hi); + + // converts from FFT-representation to coefficient representation + // using "inverted" evaluation points. + // only the coefficients lo..hi are computed + + + + +void FromFFTRep(ZZ_p* x, FFTRep& y, long lo, long hi); +// convert out coefficients lo..hi of y, store result in x. +// no normalization is done. + + +// direct manipulation of FFT reps + +void mul(FFTRep& z, const FFTRep& x, const FFTRep& y); +void sub(FFTRep& z, const FFTRep& x, const FFTRep& y); +void add(FFTRep& z, const FFTRep& x, const FFTRep& y); + +void reduce(FFTRep& x, const FFTRep& a, long k); +// reduces a 2^l point FFT-rep to a 2^k point FFT-rep + +void AddExpand(FFTRep& x, const FFTRep& a); +// x = x + (an "expanded" version of a) + + + + + +// This data structure holds unconvoluted modular representations +// of polynomials + +class ZZ_pXModRep { +private: + ZZ_pXModRep(const ZZ_pXModRep&); // disabled + void operator=(const ZZ_pXModRep&); // disabled + +public: + long n; + long MaxN; + long NumPrimes; + Unique2DArray tbl; + + void SetSize(long NewN); + + ZZ_pXModRep() : n(0), MaxN(0), NumPrimes(0) { } + ZZ_pXModRep(INIT_SIZE_TYPE, long NewN) : n(0), MaxN(0), NumPrimes(0) + { SetSize(NewN); } +}; + + +void ToZZ_pXModRep(ZZ_pXModRep& x, const ZZ_pX& a, long lo, long hi); + +void ToFFTRep(FFTRep& x, const ZZ_pXModRep& a, long k, long lo, long hi); +// converts coefficients lo..hi to a 2^k-point FFTRep. +// must have hi-lo+1 < 2^k + + +void FromFFTRep(ZZ_pXModRep& x, const FFTRep& a); +// for testing and timing purposes only -- converts from FFTRep + +void FromZZ_pXModRep(ZZ_pX& x, const ZZ_pXModRep& a, long lo, long hi); +// for testing and timing purposes only -- converts from ZZ_pXModRep + + + + +/************************************************************* + + Division + +**************************************************************/ + +void DivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b); +// q = a/b, r = a%b + +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b); +// q = a/b + +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_p& b); +void div(ZZ_pX& q, const ZZ_pX& a, long b); + + +void rem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b); +// r = a%b + +long divide(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const ZZ_pX& a, const ZZ_pX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +void InvTrunc(ZZ_pX& x, const ZZ_pX& a, long m); +// computes x = a^{-1} % X^m +// constant term must be non-zero + +inline ZZ_pX InvTrunc(const ZZ_pX& a, long m) + { ZZ_pX x; InvTrunc(x, a, m); NTL_OPT_RETURN(ZZ_pX, x); } + + + +// These always use "classical" arithmetic +void PlainDivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b); +void PlainDiv(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b); +void PlainRem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b); + +void PlainRem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b, ZZVec& tmp); +void PlainDivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b, + ZZVec& tmp); + + +// These always use FFT arithmetic +void FFTDivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b); +void FFTDiv(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b); +void FFTRem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b); + +void PlainInvTrunc(ZZ_pX& x, const ZZ_pX& a, long m); +// always uses "classical" algorithm +// ALIAS RESTRICTION: input may not alias output + +void NewtonInvTrunc(ZZ_pX& x, const ZZ_pX& a, long m); +// uses a Newton Iteration with the FFT. +// ALIAS RESTRICTION: input may not alias output + + +inline ZZ_pX operator/(const ZZ_pX& a, const ZZ_pX& b) + { ZZ_pX x; div(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator/(const ZZ_pX& a, const ZZ_p& b) + { ZZ_pX x; div(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX operator/(const ZZ_pX& a, long b) + { ZZ_pX x; div(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX& operator/=(ZZ_pX& x, const ZZ_p& b) + { div(x, x, b); return x; } + +inline ZZ_pX& operator/=(ZZ_pX& x, long b) + { div(x, x, b); return x; } + +inline ZZ_pX& operator/=(ZZ_pX& x, const ZZ_pX& b) + { div(x, x, b); return x; } + + +inline ZZ_pX operator%(const ZZ_pX& a, const ZZ_pX& b) + { ZZ_pX x; rem(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX& operator%=(ZZ_pX& x, const ZZ_pX& b) + { rem(x, x, b); return x; } + + +/*********************************************************** + + GCD's + +************************************************************/ + + +void GCD(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); +// x = GCD(a, b), x is always monic (or zero if a==b==0). + +inline ZZ_pX GCD(const ZZ_pX& a, const ZZ_pX& b) + { ZZ_pX x; GCD(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + +void XGCD(ZZ_pX& d, ZZ_pX& s, ZZ_pX& t, const ZZ_pX& a, const ZZ_pX& b); +// d = gcd(a,b), a s + b t = d + +void PlainXGCD(ZZ_pX& d, ZZ_pX& s, ZZ_pX& t, const ZZ_pX& a, const ZZ_pX& b); +// same as above, but uses classical algorithm + + +void PlainGCD(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b); +// always uses "cdlassical" arithmetic + + +class ZZ_pXMatrix { +private: + + ZZ_pXMatrix(const ZZ_pXMatrix&); // disable + ZZ_pX elts[2][2]; + +public: + + ZZ_pXMatrix() { } + ~ZZ_pXMatrix() { } + + void operator=(const ZZ_pXMatrix&); + ZZ_pX& operator() (long i, long j) { return elts[i][j]; } + const ZZ_pX& operator() (long i, long j) const { return elts[i][j]; } +}; + + +void HalfGCD(ZZ_pXMatrix& M_out, const ZZ_pX& U, const ZZ_pX& V, long d_red); +// deg(U) > deg(V), 1 <= d_red <= deg(U)+1. +// +// This computes a 2 x 2 polynomial matrix M_out such that +// M_out * (U, V)^T = (U', V')^T, +// where U', V' are consecutive polynomials in the Euclidean remainder +// sequence of U, V, and V' is the polynomial of highest degree +// satisfying deg(V') <= deg(U) - d_red. + +void XHalfGCD(ZZ_pXMatrix& M_out, ZZ_pX& U, ZZ_pX& V, long d_red); + +// same as above, except that U is replaced by U', and V by V' + + +/************************************************************* + + Modular Arithmetic without pre-conditioning + +**************************************************************/ + +// arithmetic mod f. +// all inputs and outputs are polynomials of degree less than deg(f). +// ASSUMPTION: f is assumed monic, and deg(f) > 0. +// NOTE: if you want to do many computations with a fixed f, +// use the ZZ_pXModulus data structure and associated routines below. + + + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, const ZZ_pX& f); +// x = (a * b) % f + +inline ZZ_pX MulMod(const ZZ_pX& a, const ZZ_pX& b, const ZZ_pX& f) + { ZZ_pX x; MulMod(x, a, b, f); NTL_OPT_RETURN(ZZ_pX, x); } + +void SqrMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f); +// x = a^2 % f + +inline ZZ_pX SqrMod(const ZZ_pX& a, const ZZ_pX& f) + { ZZ_pX x; SqrMod(x, a, f); NTL_OPT_RETURN(ZZ_pX, x); } + +void MulByXMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f); +// x = (a * X) mod f + +inline ZZ_pX MulByXMod(const ZZ_pX& a, const ZZ_pX& f) + { ZZ_pX x; MulByXMod(x, a, f); NTL_OPT_RETURN(ZZ_pX, x); } + + + +void InvMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f); +// x = a^{-1} % f, error is a is not invertible + +inline ZZ_pX InvMod(const ZZ_pX& a, const ZZ_pX& f) + { ZZ_pX x; InvMod(x, a, f); NTL_OPT_RETURN(ZZ_pX, x); } + +long InvModStatus(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f +// otherwise, returns 1 and sets x = (a, f) + + + + +/****************************************************************** + + Modular Arithmetic with Pre-conditioning + +*******************************************************************/ + + +// If you need to do a lot of arithmetic modulo a fixed f, +// build ZZ_pXModulus F for f. This pre-computes information about f +// that speeds up the computation a great deal. + + +class ZZ_pXModulus { +public: + ZZ_pXModulus() : UseFFT(0), n(-1) { } + ~ZZ_pXModulus() { } + + + // the following members may become private in future + ZZ_pX f; // the modulus + long UseFFT;// flag indicating whether FFT should be used. + long n; // n = deg(f) + long k; // least k s/t 2^k >= n + long l; // least l s/t 2^l >= 2n-3 + FFTRep FRep; // 2^k point rep of f + // H = rev((rev(f))^{-1} rem X^{n-1}) + FFTRep HRep; // 2^l point rep of H + + OptionalVal< Lazy > tracevec; + // an extra level of indirection to ensure the class + // can be used in a Vec (there may be a mutex in the Lazy object) + + // but these will remain public + ZZ_pXModulus(const ZZ_pX& ff); + + const ZZ_pX& val() const { return f; } + operator const ZZ_pX& () const { return f; } + +}; + +inline long deg(const ZZ_pXModulus& F) { return F.n; } + +void build(ZZ_pXModulus& F, const ZZ_pX& f); +// deg(f) > 0. + + +void rem21(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F); +// x = a % f +// deg(a) <= 2(n-1), where n = F.n = deg(f) + +void rem(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F); +// x = a % f, no restrictions on deg(a); makes repeated calls to rem21 + +inline ZZ_pX operator%(const ZZ_pX& a, const ZZ_pXModulus& F) + { ZZ_pX x; rem(x, a, F); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX& operator%=(ZZ_pX& x, const ZZ_pXModulus& F) + { rem(x, x, F); return x; } + +void DivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pXModulus& F); + +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_pXModulus& F); + +inline ZZ_pX operator/(const ZZ_pX& a, const ZZ_pXModulus& F) + { ZZ_pX x; div(x, a, F); NTL_OPT_RETURN(ZZ_pX, x); } + +inline ZZ_pX& operator/=(ZZ_pX& x, const ZZ_pXModulus& F) + { div(x, x, F); return x; } + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, const ZZ_pXModulus& F); +// x = (a * b) % f +// deg(a), deg(b) < n + +inline ZZ_pX MulMod(const ZZ_pX& a, const ZZ_pX& b, const ZZ_pXModulus& F) + { ZZ_pX x; MulMod(x, a, b, F); NTL_OPT_RETURN(ZZ_pX, x); } + +void SqrMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F); +// x = a^2 % f +// deg(a) < n + +inline ZZ_pX SqrMod(const ZZ_pX& a, const ZZ_pXModulus& F) + { ZZ_pX x; SqrMod(x, a, F); NTL_OPT_RETURN(ZZ_pX, x); } + +void PowerMod(ZZ_pX& x, const ZZ_pX& a, const ZZ& e, const ZZ_pXModulus& F); +// x = a^e % f, e >= 0 + +inline ZZ_pX PowerMod(const ZZ_pX& a, const ZZ& e, const ZZ_pXModulus& F) + { ZZ_pX x; PowerMod(x, a, e, F); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void PowerMod(ZZ_pX& x, const ZZ_pX& a, long e, const ZZ_pXModulus& F) + { PowerMod(x, a, ZZ_expo(e), F); } + +inline ZZ_pX PowerMod(const ZZ_pX& a, long e, const ZZ_pXModulus& F) + { ZZ_pX x; PowerMod(x, a, e, F); NTL_OPT_RETURN(ZZ_pX, x); } + + + +void PowerXMod(ZZ_pX& x, const ZZ& e, const ZZ_pXModulus& F); +// x = X^e % f, e >= 0 + +inline ZZ_pX PowerXMod(const ZZ& e, const ZZ_pXModulus& F) + { ZZ_pX x; PowerXMod(x, e, F); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void PowerXMod(ZZ_pX& x, long e, const ZZ_pXModulus& F) + { PowerXMod(x, ZZ_expo(e), F); } + +inline ZZ_pX PowerXMod(long e, const ZZ_pXModulus& F) + { ZZ_pX x; PowerXMod(x, e, F); NTL_OPT_RETURN(ZZ_pX, x); } + +void PowerXPlusAMod(ZZ_pX& x, const ZZ_p& a, const ZZ& e, const ZZ_pXModulus& F); +// x = (X + a)^e % f, e >= 0 + +inline ZZ_pX PowerXPlusAMod(const ZZ_p& a, const ZZ& e, const ZZ_pXModulus& F) + { ZZ_pX x; PowerXPlusAMod(x, a, e, F); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void PowerXPlusAMod(ZZ_pX& x, const ZZ_p& a, long e, const ZZ_pXModulus& F) + { PowerXPlusAMod(x, a, ZZ_expo(e), F); } + + +inline ZZ_pX PowerXPlusAMod(const ZZ_p& a, long e, const ZZ_pXModulus& F) + { ZZ_pX x; PowerXPlusAMod(x, a, e, F); NTL_OPT_RETURN(ZZ_pX, x); } + +// If you need to compute a * b % f for a fixed b, but for many a's +// (for example, computing powers of b modulo f), it is +// much more efficient to first build a ZZ_pXMultiplier B for b, +// and then use the routine below. + +class ZZ_pXMultiplier { +public: + ZZ_pXMultiplier() : UseFFT(0) { } + ZZ_pXMultiplier(const ZZ_pX& b, const ZZ_pXModulus& F); + + ~ZZ_pXMultiplier() { } + + + // the following members may become private in the future + ZZ_pX b; + long UseFFT; + FFTRep B1; + FFTRep B2; + + // but this will remain public + const ZZ_pX& val() const { return b; } + +}; + +void build(ZZ_pXMultiplier& B, const ZZ_pX& b, const ZZ_pXModulus& F); + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXMultiplier& B, + const ZZ_pXModulus& F); + +inline ZZ_pX MulMod(const ZZ_pX& a, const ZZ_pXMultiplier& B, + const ZZ_pXModulus& F) + { ZZ_pX x; MulMod(x, a, B, F); NTL_OPT_RETURN(ZZ_pX, x); } + +// x = (a * b) % f + + +/******************************************************* + + Evaluation and related problems + +********************************************************/ + + +void BuildFromRoots(ZZ_pX& x, const vec_ZZ_p& a); +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + +inline ZZ_pX BuildFromRoots(const vec_ZZ_p& a) + { ZZ_pX x; BuildFromRoots(x, a); NTL_OPT_RETURN(ZZ_pX, x); } + + +void eval(ZZ_p& b, const ZZ_pX& f, const ZZ_p& a); +// b = f(a) + +inline ZZ_p eval(const ZZ_pX& f, const ZZ_p& a) + { ZZ_p x; eval(x, f, a); NTL_OPT_RETURN(ZZ_p, x); } + +void eval(vec_ZZ_p& b, const ZZ_pX& f, const vec_ZZ_p& a); +// b[i] = f(a[i]) + +inline vec_ZZ_p eval(const ZZ_pX& f, const vec_ZZ_p& a) + { vec_ZZ_p x; eval(x, f, a); NTL_OPT_RETURN(vec_ZZ_p, x); } + + +void interpolate(ZZ_pX& f, const vec_ZZ_p& a, const vec_ZZ_p& b); +// computes f such that f(a[i]) = b[i] + +inline ZZ_pX interpolate(const vec_ZZ_p& a, const vec_ZZ_p& b) + { ZZ_pX x; interpolate(x, a, b); NTL_OPT_RETURN(ZZ_pX, x); } + + +/***************************************************************** + + vectors of ZZ_pX's + +*****************************************************************/ + +typedef Vec vec_ZZ_pX; + + + +/********************************************************** + + Modular Composition and Minimal Polynomials + +***********************************************************/ + + +// algorithms for computing g(h) mod f + + + +void CompMod(ZZ_pX& x, const ZZ_pX& g, const ZZ_pX& h, const ZZ_pXModulus& F); +// x = g(h) mod f + +inline ZZ_pX CompMod(const ZZ_pX& g, const ZZ_pX& h, + const ZZ_pXModulus& F) + { ZZ_pX x; CompMod(x, g, h, F); NTL_OPT_RETURN(ZZ_pX, x); } + +void Comp2Mod(ZZ_pX& x1, ZZ_pX& x2, const ZZ_pX& g1, const ZZ_pX& g2, + const ZZ_pX& h, const ZZ_pXModulus& F); +// xi = gi(h) mod f (i=1,2) + +void Comp3Mod(ZZ_pX& x1, ZZ_pX& x2, ZZ_pX& x3, + const ZZ_pX& g1, const ZZ_pX& g2, const ZZ_pX& g3, + const ZZ_pX& h, const ZZ_pXModulus& F); +// xi = gi(h) mod f (i=1..3) + + + +// The routine build (see below) which is implicitly called +// by the various compose and UpdateMap routines builds a table +// of polynomials. +// If ZZ_pXArgBound > 0, then the table is limited in +// size to approximamtely that many KB. +// If ZZ_pXArgBound <= 0, then it is ignored, and space is allocated +// so as to maximize speed. +// Initially, ZZ_pXArgBound = 0. + + +// If a single h is going to be used with many g's +// then you should build a ZZ_pXArgument for h, +// and then use the compose routine below. +// build computes and stores h, h^2, ..., h^m mod f. +// After this pre-computation, composing a polynomial of degree +// roughly n with h takes n/m multiplies mod f, plus n^2 +// scalar multiplies. +// Thus, increasing m increases the space requirement and the pre-computation +// time, but reduces the composition time. +// If ZZ_pXArgBound > 0, a table of size less than m may be built. + +struct ZZ_pXArgument { + vec_ZZ_pX H; +}; + +extern NTL_CHEAP_THREAD_LOCAL long ZZ_pXArgBound; + + +void build(ZZ_pXArgument& H, const ZZ_pX& h, const ZZ_pXModulus& F, long m); + +// m must be > 0, otherwise an error is raised + +void CompMod(ZZ_pX& x, const ZZ_pX& g, const ZZ_pXArgument& H, + const ZZ_pXModulus& F); + +inline ZZ_pX +CompMod(const ZZ_pX& g, const ZZ_pXArgument& H, const ZZ_pXModulus& F) + { ZZ_pX x; CompMod(x, g, H, F); NTL_OPT_RETURN(ZZ_pX, x); } + + +#ifndef NTL_TRANSITION + +void UpdateMap(vec_ZZ_p& x, const vec_ZZ_p& a, + const ZZ_pXMultiplier& B, const ZZ_pXModulus& F); + +inline vec_ZZ_p +UpdateMap(const vec_ZZ_p& a, + const ZZ_pXMultiplier& B, const ZZ_pXModulus& F) + { vec_ZZ_p x; UpdateMap(x, a, B, F); + NTL_OPT_RETURN(vec_ZZ_p, x); } + +#endif + + +/* computes (a, b), (a, (b*X)%f), ..., (a, (b*X^{n-1})%f), + where ( , ) denotes the vector inner product. + + This is really a "transposed" MulMod by B. +*/ + +void PlainUpdateMap(vec_ZZ_p& x, const vec_ZZ_p& a, + const ZZ_pX& b, const ZZ_pX& f); + +// same as above, but uses only classical arithmetic + + +void ProjectPowers(vec_ZZ_p& x, const vec_ZZ_p& a, long k, + const ZZ_pX& h, const ZZ_pXModulus& F); + +inline vec_ZZ_p ProjectPowers(const vec_ZZ_p& a, long k, + const ZZ_pX& h, const ZZ_pXModulus& F) +{ + vec_ZZ_p x; + ProjectPowers(x, a, k, h, F); + NTL_OPT_RETURN(vec_ZZ_p, x); +} + + +// computes (a, 1), (a, h), ..., (a, h^{k-1} % f) +// this is really a "transposed" compose. + +void ProjectPowers(vec_ZZ_p& x, const vec_ZZ_p& a, long k, + const ZZ_pXArgument& H, const ZZ_pXModulus& F); + +inline vec_ZZ_p ProjectPowers(const vec_ZZ_p& a, long k, + const ZZ_pXArgument& H, const ZZ_pXModulus& F) +{ + vec_ZZ_p x; + ProjectPowers(x, a, k, H, F); + NTL_OPT_RETURN(vec_ZZ_p, x); +} + +// same as above, but uses a pre-computed ZZ_pXArgument + +inline void project(ZZ_p& x, const vec_ZZ_p& a, const ZZ_pX& b) + { InnerProduct(x, a, b.rep); } + +inline ZZ_p project(const vec_ZZ_p& a, const ZZ_pX& b) + { ZZ_p x; project(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +void MinPolySeq(ZZ_pX& h, const vec_ZZ_p& a, long m); +// computes the minimum polynomial of a linealy generated sequence; +// m is a bound on the degree of the polynomial; +// required: a.length() >= 2*m + +inline ZZ_pX MinPolySeq(const vec_ZZ_p& a, long m) + { ZZ_pX x; MinPolySeq(x, a, m); NTL_OPT_RETURN(ZZ_pX, x); } + +void ProbMinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m); + +inline ZZ_pX ProbMinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m) + { ZZ_pX x; ProbMinPolyMod(x, g, F, m); NTL_OPT_RETURN(ZZ_pX, x); } + + +inline void ProbMinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F) + { ProbMinPolyMod(h, g, F, F.n); } + +inline ZZ_pX ProbMinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F) + { ZZ_pX x; ProbMinPolyMod(x, g, F); NTL_OPT_RETURN(ZZ_pX, x); } + + +// computes the monic minimal polynomial if (g mod f). +// m = a bound on the degree of the minimal polynomial. +// If this argument is not supplied, it defaults to deg(f). +// The algorithm is probabilistic, always returns a divisor of +// the minimal polynomial, and returns a proper divisor with +// probability at most m/p. + +void MinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m); + +inline ZZ_pX MinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m) + { ZZ_pX x; MinPolyMod(x, g, F, m); NTL_OPT_RETURN(ZZ_pX, x); } + +inline void MinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F) + { MinPolyMod(h, g, F, F.n); } + +inline ZZ_pX MinPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F) + { ZZ_pX x; MinPolyMod(x, g, F); NTL_OPT_RETURN(ZZ_pX, x); } + +// same as above, but guarantees that result is correct + +void IrredPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m); + +inline ZZ_pX IrredPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F, long m) + { ZZ_pX x; IrredPolyMod(x, g, F, m); NTL_OPT_RETURN(ZZ_pX, x); } + + +inline void IrredPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F) + { IrredPolyMod(h, g, F, F.n); } + +inline ZZ_pX IrredPolyMod(const ZZ_pX& g, const ZZ_pXModulus& F) + { ZZ_pX x; IrredPolyMod(x, g, F); NTL_OPT_RETURN(ZZ_pX, x); } + +// same as above, but assumes that f is irreducible, +// or at least that the minimal poly of g is itself irreducible. +// The algorithm is deterministic (and is always correct). + +/***************************************************************** + + Traces, norms, resultants + +******************************************************************/ + +void TraceVec(vec_ZZ_p& S, const ZZ_pX& f); + +inline vec_ZZ_p TraceVec(const ZZ_pX& f) + { vec_ZZ_p x; TraceVec(x, f); NTL_OPT_RETURN(vec_ZZ_p, x); } + +void FastTraceVec(vec_ZZ_p& S, const ZZ_pX& f); +void PlainTraceVec(vec_ZZ_p& S, const ZZ_pX& f); + +void TraceMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pXModulus& F); + +inline ZZ_p TraceMod(const ZZ_pX& a, const ZZ_pXModulus& F) + { ZZ_p x; TraceMod(x, a, F); NTL_OPT_RETURN(ZZ_p, x); } + +void TraceMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& f); + +inline ZZ_p TraceMod(const ZZ_pX& a, const ZZ_pX& f) + { ZZ_p x; TraceMod(x, a, f); NTL_OPT_RETURN(ZZ_p, x); } + + + +void ComputeTraceVec(const ZZ_pXModulus& F); + + +void NormMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& f); + +inline ZZ_p NormMod(const ZZ_pX& a, const ZZ_pX& f) + { ZZ_p x; NormMod(x, a, f); NTL_OPT_RETURN(ZZ_p, x); } + +void resultant(ZZ_p& rres, const ZZ_pX& a, const ZZ_pX& b); + +inline ZZ_p resultant(const ZZ_pX& a, const ZZ_pX& b) + { ZZ_p x; resultant(x, a, b); NTL_OPT_RETURN(ZZ_p, x); } + +void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& f); +// g = char poly of (a mod f) + +inline ZZ_pX CharPolyMod(const ZZ_pX& a, const ZZ_pX& f) + { ZZ_pX x; CharPolyMod(x, a, f); NTL_OPT_RETURN(ZZ_pX, x); } + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/ZZ_pXFactoring.h b/thirdparty/linux/ntl/include/NTL/ZZ_pXFactoring.h new file mode 100644 index 0000000000..6034536055 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ZZ_pXFactoring.h @@ -0,0 +1,227 @@ + + +#ifndef NTL_ZZ_pXFactoring__H +#define NTL_ZZ_pXFactoring__H + +#include +#include +#include +#include + +NTL_OPEN_NNS + + + + +/************************************************************ + + factorization routines + +************************************************************/ + + + + + +void SquareFreeDecomp(vec_pair_ZZ_pX_long& u, const ZZ_pX& f); +inline vec_pair_ZZ_pX_long SquareFreeDecomp(const ZZ_pX& f) + { vec_pair_ZZ_pX_long x; SquareFreeDecomp(x, f); return x; } + +// Performs square-free decomposition. +// f must be monic. +// If f = prod_i g_i^i, then u is set to a lest of pairs (g_i, i). +// The list is is increasing order of i, with trivial terms +// (i.e., g_i = 1) deleted. + + +void FindRoots(vec_ZZ_p& x, const ZZ_pX& f); +inline vec_ZZ_p FindRoots(const ZZ_pX& f) + { vec_ZZ_p x; FindRoots(x, f); return x; } + +// f is monic, and has deg(f) distinct roots. +// returns the list of roots + +void FindRoot(ZZ_p& root, const ZZ_pX& f); +inline ZZ_p FindRoot(const ZZ_pX& f) + { ZZ_p x; FindRoot(x, f); return x; } + +// finds a single root of f. +// assumes that f is monic and splits into distinct linear factors + + +void SFBerlekamp(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0); +inline vec_ZZ_pX SFBerlekamp(const ZZ_pX& f, long verbose=0) + { vec_ZZ_pX x; SFBerlekamp(x, f, verbose); return x; } + +// Assumes f is square-free and monic. +// returns list of factors of f. +// Uses "Berlekamp" appraoch. + + +void berlekamp(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, long verbose=0); +inline vec_pair_ZZ_pX_long +berlekamp(const ZZ_pX& f, long verbose=0) + { vec_pair_ZZ_pX_long x; berlekamp(x, f, verbose); return x; } + +// returns a list of factors, with multiplicities. +// f must be monic. +// Uses "Berlekamp" appraoch. + + +extern NTL_CHEAP_THREAD_LOCAL long ZZ_pX_BlockingFactor; +// Controls GCD blocking for DDF. + +void DDF(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, const ZZ_pX& h, + long verbose=0); + +inline vec_pair_ZZ_pX_long DDF(const ZZ_pX& f, const ZZ_pX& h, + long verbose=0) + { vec_pair_ZZ_pX_long x; DDF(x, f, h, verbose); return x; } + +// Performs distinct-degree factorization. +// Assumes f is monic and square-free, and h = X^p mod f +// Obsolete: see NewDDF, below. + +extern NTL_CHEAP_THREAD_LOCAL long ZZ_pX_GCDTableSize; /* = 4 */ +// Controls GCD blocking for NewDDF + + +extern NTL_CHEAP_THREAD_LOCAL double ZZ_pXFileThresh; +// external files are used for baby/giant steps if size +// of these tables exceeds ZZ_pXFileThresh KB. + +void NewDDF(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, const ZZ_pX& h, + long verbose=0); + +inline vec_pair_ZZ_pX_long NewDDF(const ZZ_pX& f, const ZZ_pX& h, + long verbose=0) + { vec_pair_ZZ_pX_long x; NewDDF(x, f, h, verbose); return x; } + +// same as above, but uses baby-step/giant-step method + + +void EDF(vec_ZZ_pX& factors, const ZZ_pX& f, const ZZ_pX& b, + long d, long verbose=0); + +inline vec_ZZ_pX EDF(const ZZ_pX& f, const ZZ_pX& b, + long d, long verbose=0) + { vec_ZZ_pX x; EDF(x, f, b, d, verbose); return x; } + +// Performs equal-degree factorization. +// f is monic, square-free, and all irreducible factors have same degree. +// b = X^p mod f. +// d = degree of irreducible factors of f +// Space for the trace-map computation can be controlled via ComposeBound. + + + +void RootEDF(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0); +inline vec_ZZ_pX RootEDF(const ZZ_pX& f, long verbose=0) + { vec_ZZ_pX x; RootEDF(x, f, verbose); return x; } + +// EDF for d==1 + +void SFCanZass(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose=0); +inline vec_ZZ_pX SFCanZass(const ZZ_pX& f, long verbose=0) + { vec_ZZ_pX x; SFCanZass(x, f, verbose); return x; } + +// Assumes f is monic and square-free. +// returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach. + + + +void CanZass(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, + long verbose=0); + +inline vec_pair_ZZ_pX_long CanZass(const ZZ_pX& f, long verbose=0) + { vec_pair_ZZ_pX_long x; CanZass(x, f, verbose); return x; } + +// returns a list of factors, with multiplicities. +// f must be monic. +// Uses "Cantor/Zassenhaus" approach. + + +void mul(ZZ_pX& f, const vec_pair_ZZ_pX_long& v); +inline ZZ_pX mul(const vec_pair_ZZ_pX_long& v) + { ZZ_pX x; mul(x, v); return x; } + +// multiplies polynomials, with multiplicities + + +/************************************************************* + + irreducible poly's: tests and constructions + +**************************************************************/ + +long ProbIrredTest(const ZZ_pX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test +// the test can err only if f is reducible, and the +// error probability is bounded by p^{-iter}. + +long DetIrredTest(const ZZ_pX& f); + +// performs a recursive deterministic irreducibility test +// fast in the worst-case (when input is irreducible). + +long IterIrredTest(const ZZ_pX& f); + +// performs an iterative deterministic irreducibility test, +// based on DDF. Fast on average (when f has a small factor). + +void BuildIrred(ZZ_pX& f, long n); +inline ZZ_pX BuildIrred_ZZ_pX(long n) + { ZZ_pX x; BuildIrred(x, n); NTL_OPT_RETURN(ZZ_pX, x); } + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(ZZ_pX& f, const ZZ_pX& g); +inline ZZ_pX BuildRandomIrred(const ZZ_pX& g) + { ZZ_pX x; BuildRandomIrred(x, g); NTL_OPT_RETURN(ZZ_pX, x); } + +// g is a monic irreducible polynomial. +// constructs a random monic irreducible polynomial f of the same degree. + + +long ComputeDegree(const ZZ_pX& h, const ZZ_pXModulus& F); + +// f = F.f is assumed to be an "equal degree" polynomial +// h = X^p mod f +// the common degree of the irreducible factors of f is computed +// This routine is useful in counting points on elliptic curves + +long ProbComputeDegree(const ZZ_pX& h, const ZZ_pXModulus& F); + +// same as above, but uses a slightly faster probabilistic algorithm +// the return value may be 0 or may be too big, but for large p +// (relative to n), this happens with very low probability. + + + +void TraceMap(ZZ_pX& w, const ZZ_pX& a, long d, const ZZ_pXModulus& F, + const ZZ_pX& b); + +inline ZZ_pX TraceMap(const ZZ_pX& a, long d, const ZZ_pXModulus& F, + const ZZ_pX& b) + { ZZ_pX x; TraceMap(x, a, d, F, b); return x; } + +// w = a+a^q+...+^{q^{d-1}} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see ) + + + +void PowerCompose(ZZ_pX& w, const ZZ_pX& a, long d, const ZZ_pXModulus& F); +inline ZZ_pX PowerCompose(const ZZ_pX& a, long d, const ZZ_pXModulus& F) + { ZZ_pX x; PowerCompose(x, a, d, F); return x; } + +// w = X^{q^d} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see ) + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/c_lip.h b/thirdparty/linux/ntl/include/NTL/c_lip.h new file mode 100644 index 0000000000..a7eea62dc8 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/c_lip.h @@ -0,0 +1,566 @@ + + +typedef long *_ntl_verylong; +typedef long _ntl_verylong_body; + +#if (defined(NTL_AVOID_FLOAT) && defined(NTL_LONG_LONG)) +#error "at most one of NTL_AVOID_FLOAT NTL_LONG_LONG allowed" +#endif + + +#define NTL_NBITS NTL_NBITS_MAX + + +#define NTL_RADIX (1L<>1) +#define NTL_RADIXM (NTL_RADIX-1) +#define NTL_RADIXROOT (1L<= b >= 0 */ + + void _ntl_zsmul(_ntl_verylong a, long d, _ntl_verylong *b); + /* *b = d * a */ + + void _ntl_zmul(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *c); + /* *c = a * b */ + + void _ntl_zsq(_ntl_verylong a, _ntl_verylong *c); + /* *c = a * a */ + + long _ntl_zsdiv(_ntl_verylong a, long b, _ntl_verylong *q); + /* (*q) = floor(a/b) and a - floor(a/b)*(*q) is returned; + error is raised if b == 0; + if b does not divide a, then sign(*q) == sign(b) */ + + void _ntl_zdiv(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *q, _ntl_verylong *r); + /* (*q) = floor(a/b) and (*r) = a - floor(a/b)*(*q); + error is raised if b == 0; + if b does not divide a, then sign(*q) == sign(b) */ + + void _ntl_zmultirem(_ntl_verylong a, long n, long* dd, long* rr); + /* rr[i] = a % dd[i], i = 0..n-1; + assumes a >= 0, 0 < dd[i] < NTL_RADIX */ + + long _ntl_zsfastrem(_ntl_verylong a, long d); + /* return a % d; + assumes a >= 0, 0 < d < NTL_RADIX */ + + + void _ntl_zmod(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *r); + /* same as _ntl_zdiv, but only remainder is computed */ + + long _ntl_zsmod(_ntl_verylong a, long d); + /* same as _ntl_zsdiv, but only remainder is computed */ + + void _ntl_zquickmod(_ntl_verylong *r, _ntl_verylong b); + /* *r = *r % b; + assumes b > 0 and *r >= 0; + The division is performed in place (but may sometimes + cause *r to grow by one digit) */ + + void _ntl_zsaddmul(_ntl_verylong x, long y, _ntl_verylong *ww); + /* *ww += x*y */ + + void _ntl_zaddmul(_ntl_verylong x, _ntl_verylong y, _ntl_verylong *ww); + /* *ww += x*y */ + + void _ntl_zssubmul(_ntl_verylong x, long y, _ntl_verylong *ww); + /* *ww -= x*y */ + + void _ntl_zsubmul(_ntl_verylong x, _ntl_verylong y, _ntl_verylong *ww); + /* *ww -= x*y */ + + +/******************************************************************** + + Shifting and bit manipulation + +*********************************************************************/ + + void _ntl_z2mul(_ntl_verylong n, _ntl_verylong *a); + /* *a = 2 * n */ + + long _ntl_z2div(_ntl_verylong n, _ntl_verylong *a); + /* *a = sign(n) * (|n|/2) */ + + void _ntl_zlshift(_ntl_verylong n, long k, _ntl_verylong *a); + /* *a = sign(n) * (|n| << k); + shift is in reverse direction for negative k */ + + void _ntl_zrshift(_ntl_verylong n, long k, _ntl_verylong *a); + /* *a = sign(n) * (|n| >> k); + shift is in reverse direction for negative k */ + + long _ntl_zmakeodd(_ntl_verylong *n); + /* + if (n != 0) + *n = m; + return (k such that n == 2 ^ k * m with m odd); + else + return (0); + */ + + long _ntl_znumtwos(_ntl_verylong n); + /* return largest e such that 2^e divides n, or zero if n is zero */ + + + + long _ntl_zodd(_ntl_verylong a); + /* returns 1 if n is odd and 0 if it is even */ + + long _ntl_zbit(_ntl_verylong a, long p); + /* returns p-th bit of a, where the low order bit is indexed by 0; + p out of range returns 0 */ + + long _ntl_zsetbit(_ntl_verylong *a, long p); + /* returns original value of p-th bit of |a|, and replaces + p-th bit of a by 1 if it was zero; + error if p < 0 */ + + long _ntl_zswitchbit(_ntl_verylong *a, long p); + /* returns original value of p-th bit of |a|, and switches + the value of p-th bit of a; + p starts counting at 0; + error if p < 0 */ + + + void _ntl_zlowbits(_ntl_verylong a, long k, _ntl_verylong *b); + /* places k low order bits of |a| in b */ + + long _ntl_zslowbits(_ntl_verylong a, long k); + /* returns k low order bits of |a| */ + + long _ntl_zweights(long a); + /* returns Hamming weight of |a| */ + + long _ntl_zweight(_ntl_verylong a); + /* returns Hamming weight of |a| */ + + void _ntl_zand(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *c); + /* c gets bit pattern `bits of |a|` and `bits of |b|` */ + + void _ntl_zor(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *c); + /* c gets bit pattern `bits of |a|` inclusive or `bits of |b|` */ + + void _ntl_zxor(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *c); + /* c gets bit pattern `bits of |a|` exclusive or `bits of |b|` */ + + + + +/************************************************************************ + + Comparison + +*************************************************************************/ + + long _ntl_zcompare(_ntl_verylong a, _ntl_verylong b); + /* + if (a > b) + return (1); + if (a == b) + return (0); + if (a < b) + return (-1); + */ + + long _ntl_zscompare(_ntl_verylong a, long b); + /* single-precision version of the above */ + + long _ntl_ziszero (_ntl_verylong a); + /* test for 0 */ + + + long _ntl_zsign(_ntl_verylong a); + /* + if (a > 0) + return (1); + if (a == 0) + return (0); + if (a < 0) + return (-1); + */ + + void _ntl_zabs(_ntl_verylong *a); + /* *a = |a| */ + + void _ntl_znegate(_ntl_verylong *a); + /* *a = -a */ + + void _ntl_zcopy(_ntl_verylong a, _ntl_verylong *b); + /* *b = a; space is allocated */ + + void _ntl_zcopy1(_ntl_verylong a, _ntl_verylong *b); + /* *b = a; space not necessarily allocated */ + + void _ntl_zswap(_ntl_verylong *a, _ntl_verylong *b); + /* swap a and b (by swaping pointers) */ + + long _ntl_z2log(_ntl_verylong a); + /* number of bits in |a|; returns 0 if a = 0 */ + + long _ntl_z2logs(long a); + /* single-precision version of the above */ + + +/******************************************************************** + + Conversion + +*********************************************************************/ + + void _ntl_zzero(_ntl_verylong *a); + /* *a = 0; space is allocated */ + + void _ntl_zzero1(_ntl_verylong *a); + /* *a = 0; space not necessarily allocated */ + + void _ntl_zone(_ntl_verylong *a); + /* *a = 1 */ + + void _ntl_zintoz(long d, _ntl_verylong *a); + /* *a = d; space is allocated */ + + void _ntl_zintoz1(long d, _ntl_verylong *a); + /* *a = d; space not necessarily allocated */ + + void _ntl_zuintoz(unsigned long d, _ntl_verylong *a); + /* *a = d; space is allocated */ + + long _ntl_ztoint(_ntl_verylong a); + /* converts a to a long; overflow results in value + mod 2^{NTL_BITS_PER_LONG}. */ + + unsigned long _ntl_ztouint(_ntl_verylong a); + /* converts a to a long; overflow results in value + mod 2^{NTL_BITS_PER_LONG}. */ + + + double _ntl_zdoub(_ntl_verylong n); + /* converts a to a double; no overflow check */ + + long _ntl_zround_correction(_ntl_verylong a, long k, long residual); + /* k >= 1, |a| >= 2^k, and residual is 0, 1, or -1. + The result is what we should add to (a >> k) to round + x = a/2^k to the nearest integer using IEEE-like rounding rules + (i.e., round to nearest, and round to even to break ties). + The result is either 0 or sign(a). + If residual is not zero, it is as if x were replaced by + x' = x + residual*2^{-(k+1)}. + This can be used to break ties when x is exactly + half way between two integers. */ + + double _ntl_zlog(_ntl_verylong a); + /* computes log(a), protecting against overflow */ + + + void _ntl_zdoubtoz(double a, _ntl_verylong *x); + /* x = floor(a); */ + + + + +/************************************************************************ + + Square roots + +*************************************************************************/ + + + long _ntl_zsqrts(long n); + /* return floor(sqrt(n)); error raised in n < 0 */ + + void _ntl_zsqrt(_ntl_verylong n, _ntl_verylong *r); + /* *r = floor(sqrt(n)); error raised in n < 0 */ + +/********************************************************************* + + Exponentiation + +**********************************************************************/ + + void _ntl_zexp(_ntl_verylong a, long e, _ntl_verylong *b); + /* *b = a^e; error raised if e < 0 */ + + void _ntl_zexps(long a, long e, _ntl_verylong *b); + /* *b = a^e; error raised if e < 0 */ + + +/********************************************************************* + + Modular Arithmetic + + Addition, subtraction, multiplication, squaring division, inversion, + and exponentiation modulo a positive modulus n, where all operands + (except for the exponent in exponentiation) and results are in the + range [0, n-1]. + +***********************************************************************/ + + void _ntl_zaddmod(_ntl_verylong a, _ntl_verylong b, _ntl_verylong n, _ntl_verylong *c); + /* *c = (a + b) % n */ + + void _ntl_zsubmod(_ntl_verylong a, _ntl_verylong b, _ntl_verylong n, _ntl_verylong *c); + /* *c = (a - b) % n */ + + void _ntl_zsmulmod(_ntl_verylong a, long b, _ntl_verylong n, _ntl_verylong *c); + /* *c = (a * b) % n */ + + void _ntl_zmulmod(_ntl_verylong a, _ntl_verylong b, _ntl_verylong n, _ntl_verylong *c); + /* *c = (a * b) % n */ + + void _ntl_zsqmod(_ntl_verylong a, _ntl_verylong n, _ntl_verylong *c); + /* *c = (a ^ 2) % n */ + + void _ntl_zinvmod(_ntl_verylong a, _ntl_verylong n, _ntl_verylong *c); + /* *c = (1 / a) % n; error raised if gcd(b, n) != 1 */ + + void _ntl_zpowermod(_ntl_verylong g, _ntl_verylong e, _ntl_verylong F, + _ntl_verylong *h); + + /* *b = (a ^ e) % n; */ + + + +/************************************************************************** + + Euclidean Algorithms + +***************************************************************************/ + void _ntl_zgcd(_ntl_verylong m1, _ntl_verylong m2, _ntl_verylong *r); + /* *r = greatest common divisor of m1 and m2; + uses binary gcd algorithm */ + + + void _ntl_zexteucl(_ntl_verylong a, _ntl_verylong *xa, + _ntl_verylong b, _ntl_verylong *xb, + _ntl_verylong *d); + /* + *d = a * *xa + b * *xb = gcd(a, b); + sets *d, *xa and *xb given a and b; + uses Lehmer`s trick + */ + + + long _ntl_zinv(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *c); + /* + if (a and b coprime) + { + *c = inv; + return(0); + } + else + { + *c = gcd(a, b); + return(1); + } + + where inv is such that (inv * a) == 1 mod b; + error raised if b <= 1 or a < 0 or a >= b + */ + + long _ntl_zxxratrecon(_ntl_verylong x, _ntl_verylong m, + _ntl_verylong a_bound, _ntl_verylong b_bound, + _ntl_verylong *a, _ntl_verylong *b); + + /* rational reconstruction: see doc in ZZ.txt */ + + + +/********************************************************************** + + Storage Allocation + + These routines use malloc and free. + +***********************************************************************/ + + + long _ntl_zmaxalloc(_ntl_verylong x); + /* max allocation request, possibly rounded up a bit */ + + + void _ntl_zsetlength(_ntl_verylong *v, long len); + /* Allocates enough space to hold a len-digit number, + where each digit has NTL_NBITS bits. + If space must be allocated, space for one extra digit + is always allocated. */ + + void _ntl_zfree(_ntl_verylong *x); + /* Free's space held by x, and sets x back to 0. */ + + +/******************************************************************* + + Special routines + +********************************************************************/ + + + +long _ntl_zsize(_ntl_verylong n); +long _ntl_zisone(_ntl_verylong n); +long _ntl_zdigit(_ntl_verylong a, long i); + +long _ntl_zsptest(_ntl_verylong a); +long _ntl_zwsptest(_ntl_verylong a); + +long _ntl_zcrtinrange(_ntl_verylong g, _ntl_verylong a); + +void _ntl_zfrombytes(_ntl_verylong *x, const unsigned char *p, long n); +void _ntl_zbytesfromz(unsigned char *p, _ntl_verylong a, long nn); + +long _ntl_zblock_construct_alloc(_ntl_verylong *x, long d, long n); +void _ntl_zblock_construct_set(_ntl_verylong x, _ntl_verylong *y, long i); +long _ntl_zblock_destroy(_ntl_verylong x); +long _ntl_zblock_storage(long d); + + +#define NTL_verylong _ntl_verylong +#define NTL_verylong_body _ntl_verylong_body +#define NTL_z2log _ntl_z2log +#define NTL_zabs _ntl_zabs +#define NTL_zadd _ntl_zadd +#define NTL_zaddmod _ntl_zaddmod +#define NTL_zand _ntl_zand +#define NTL_zbit _ntl_zbit +#define NTL_zblock_construct_alloc _ntl_zblock_construct_alloc +#define NTL_zblock_construct_set _ntl_zblock_construct_set +#define NTL_zblock_destroy _ntl_zblock_destroy +#define NTL_zblock_storage _ntl_zblock_storage +#define NTL_zbytesfromz _ntl_zbytesfromz +#define NTL_zcompare _ntl_zcompare +#define NTL_zcopy _ntl_zcopy1 +#define NTL_zcrtinrange _ntl_zcrtinrange +#define NTL_zdigit _ntl_zdigit +#define NTL_zdiv _ntl_zdiv +#define NTL_zdoub _ntl_zdoub +#define NTL_zdoubtoz _ntl_zdoubtoz +#define NTL_zexp _ntl_zexp +#define NTL_zexps _ntl_zexps +#define NTL_zexteucl _ntl_zexteucl +#define NTL_zfree _ntl_zfree +#define NTL_zfrombytes _ntl_zfrombytes +#define NTL_zgcd _ntl_zgcd +#define NTL_zintoz _ntl_zintoz1 +#define NTL_zinv _ntl_zinv +#define NTL_zinvmod _ntl_zinvmod +#define NTL_zisone _ntl_zisone +#define NTL_ziszero _ntl_ziszero +#define NTL_zlog _ntl_zlog +#define NTL_zlowbits _ntl_zlowbits +#define NTL_zlshift _ntl_zlshift +#define NTL_zmakeodd _ntl_zmakeodd +#define NTL_zmod _ntl_zmod +#define NTL_zmul _ntl_zmul +#define NTL_zmulmod _ntl_zmulmod +#define NTL_znegate _ntl_znegate +#define NTL_znumtwos _ntl_znumtwos +#define NTL_zodd _ntl_zodd +#define NTL_zone _ntl_zone +#define NTL_zor _ntl_zor +#define NTL_zpowermod _ntl_zpowermod +#define NTL_zquickmod _ntl_zquickmod +#define NTL_zround_correction _ntl_zround_correction +#define NTL_zrshift _ntl_zrshift +#define NTL_zsadd _ntl_zsadd +#define NTL_zscompare _ntl_zscompare +#define NTL_zsdiv _ntl_zsdiv +#define NTL_zsetbit _ntl_zsetbit +#define NTL_zmaxalloc _ntl_zmaxalloc +#define NTL_zsetlength _ntl_zsetlength +#define NTL_zsign _ntl_zsign +#define NTL_zsize _ntl_zsize +#define NTL_zslowbits _ntl_zslowbits +#define NTL_zsmod _ntl_zsmod +#define NTL_zsmul _ntl_zsmul +#define NTL_zsmulmod _ntl_zsmulmod +#define NTL_zsptest _ntl_zsptest +#define NTL_zsq _ntl_zsq +#define NTL_zsqmod _ntl_zsqmod +#define NTL_zsqrt _ntl_zsqrt +#define NTL_zsqrts _ntl_zsqrts +#define NTL_zsub _ntl_zsub +#define NTL_zsubmod _ntl_zsubmod +#define NTL_zsubpos _ntl_zsubpos +#define NTL_zswap _ntl_zswap +#define NTL_zswitchbit _ntl_zswitchbit +#define NTL_ztoint _ntl_ztoint +#define NTL_ztouint _ntl_ztouint +#define NTL_zuintoz _ntl_zuintoz +#define NTL_zweight _ntl_zweight +#define NTL_zweights _ntl_zweights +#define NTL_zwsptest _ntl_zwsptest +#define NTL_zxor _ntl_zxor +#define NTL_zxxratrecon _ntl_zxxratrecon +#define NTL_zzero _ntl_zzero1 + +#define NTL_zsaddmul _ntl_zsaddmul +#define NTL_zaddmul _ntl_zaddmul +#define NTL_zssubmul _ntl_zssubmul +#define NTL_zsubmul _ntl_zsubmul + diff --git a/thirdparty/linux/ntl/include/NTL/config.h b/thirdparty/linux/ntl/include/NTL/config.h new file mode 100644 index 0000000000..30b9b4028e --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/config.h @@ -0,0 +1,632 @@ + +#ifndef NTL_config__H +#define NTL_config__H + +/************************************************************************* + + NTL Configuration File + ---------------------- + +This file may be modified prior to building NTL so as to specify +some basic configuration options, and to customize +how code is generated so as to improve performance. + +The Basic Configuration Options must be set by hand. If you use the +configuration wizard, then these flags should be set before +the installation process begins; there values will be retained +by the wizard. + +The Performance Options can be set either by hand, by editing this +file, or (on most Unix platforms) can be set automatically using +the configuration wizard which runs when NTL is installed. + +All NTL header files include this file. +By setting these flags here, instead of on the compiler command line, +it is easier to guarantee that NTL library and client code use +consistent settings. + + + How to do it + ------------ + +To set a flag, just replace the pre-processor directive +'if 0' by 'if 1' for that flag, which causes the appropriate macro +to be defined. Of course, to unset a flag, just replace the +'if 1' by an 'if 0'. + +You can also do this more conveniently via the command line +using the configure script. + + + *************************************************************************/ + + + +/************************************************************************* + * + * Basic Configuration Options + * + *************************************************************************/ + + + /* None of these flags are set by the configuration wizard; + * they must be set by hand, before installation begins. + */ + + +#if 0 +#define NTL_LEGACY_NO_NAMESPACE + +/* + * By default, NTL components are declared inside the namespace NTL. + * Set this flag if you want to instead have these components + * declared in the global namespace. This is for backward + * compatibility only -- not recommended. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_LEGACY_INPUT_ERROR + +/* + * Also for backward compatibility. Set if you want input + * operations to abort on error, instead of just setting the + * "fail bit" of the input stream. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + +#if 0 +#define NTL_DISABLE_TLS_HACK + +/* Set if you want to compile NTL without "TLS hack" + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_ENABLE_TLS_HACK + +/* Set if you want to compile NTL with "TLS hack" + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_THREADS + +/* Set if you want to compile NTL as a thread-safe library. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_EXCEPTIONS + +/* Set if you want to compile NTL with exceptions enabled + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_THREAD_BOOST + +/* Set if you want to compile NTL to exploit threads internally. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif +# + +#if 0 +#define NTL_GMP_LIP + +/* + * Use this flag if you want to use GMP as the long integer package. + * This can result in significantly faster code on some platforms. + * It requires that the GMP package (version >= 3.1) has already been + * installed. You will also have to set the variables GMP_OPT_INCDIR, + * GMP_OPT_LIBDIR, GMP_OPT_LIB in the makefile (these are set automatically + * by the confiuration script when you pass the flag NTL_GMP_LIP=on + * to that script. + * + * Beware that setting this flag can break some very old NTL codes. + * + * To re-build after changing this flag: + * rm *.o; make setup3; make ntl.a + * You may also have to edit the makefile to modify the variables + * GMP_OPT_INCDIR, GMP_OPT_LIBDIR, and GMP_OPT_LIB. + */ + +#endif + +#if 0 +#define NTL_GF2X_LIB + +/* + * Use this flag if you want to use the gf2x library for + * faster GF2X arithmetic. + * This can result in significantly faster code, especially + * when working with polynomials of huge degree. + * You will also have to set the variables GF2X_OPT_INCDIR, + * GF2X_OPT_LIBDIR, GF2X_OPT_LIB in the makefile (these are set automatically + * by the confiuration script when you pass the flag NTL_GF2X_LIB=on + * to that script. + * + * To re-build after changing this flag: + * rm GF2X.o; GF2X1.o; make ntl.a + * You may also have to edit the makefile to modify the variables + * GF2X_OPT_INCDIR, GF2X_OPT_LIBDIR, and GF2X_OPT_LIB. + */ + +#endif + + +#if 0 +#define NTL_LONG_LONG_TYPE long long + +/* + * If you set the flag NTL_LONG_LONG, then the value of + * NTL_LONG_LONG_TYPE will be used + * to declare 'double word' signed integer types. + * Irrelevant when NTL_GMP_LIP is set. + * If left undefined, some "ifdef magic" will attempt + * to find the best choice for your platform, depending + * on the compiler and wordsize. On 32-bit machines, + * this is usually 'long long'. + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_UNSIGNED_LONG_LONG_TYPE unsigned long long + +/* + * If you set the flag NTL_SPMM_ULL, then the value of + * NTL_UNSIGNED_LONG_LONG_TYPE will be used + * to declare 'double word' unsigned integer types. + * If left undefined, some "ifdef magic" will attempt + * to find the best choice for your platform, depending + * on the compiler and wordsize. On 32-bit machines, + * this is usually 'unsigned long long'. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_CLEAN_INT + +/* + * This will disallow the use of some non-standard integer arithmetic + * that may improve performance somewhat. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_CLEAN_PTR + +/* + * This will disallow the use of some non-standard pointer arithmetic + * that may improve performance somewhat. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_RANGE_CHECK + +/* + * This will generate vector subscript range-check code. + * Useful for debugging, but it slows things down of course. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + + + + +#if 0 +#define NTL_NO_INIT_TRANS + +/* + * Without this flag, NTL uses a special code sequence to avoid + * copying large objects in return statements. However, if your + * compiler optimizes away the return of a *named* local object, + * this is not necessary, and setting this flag will result + * in *slightly* more compact and efficient code. Although + * the emeriging C++ standard allows compilers to perform + * this optimization, I know of none that currently do. + * Most will avoid copying *temporary* objects in return statements, + * and NTL's default code sequence exploits this fact. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_X86_FIX + +/* + * Forces the "x86 floating point fix", overriding the default behavior. + * By default, NTL will apply the "fix" if it looks like it is + * necessary, and if knows how to fix it. + * The problem addressed here is that x86 processors sometimes + * run in a mode where FP registers have more precision than doubles. + * This will cause code in quad_float.c some trouble. + * NTL can normally correctly detect the problem, and fix it, + * so you shouldn't need to worry about this or the next flag. + + * To re-build after changing this flag: rm quad_float.o; make ntl.a + * + */ + +#elif 0 +#define NTL_NO_X86_FIX +/* + * Forces no "x86 floating point fix", overriding the default behavior. + + * To re-build after changing this flag: rm quad_float.o; make ntl.a + */ + +#endif + + + +#if 0 +#define NTL_LEGACY_SP_MULMOD + +/* Forces legacy single-precision MulMod implementation. + */ + +#endif + + +#if 0 +#define NTL_DISABLE_LONGDOUBLE + +/* Explicitly disables us of long double arithmetic + */ + +#endif + + +#if 0 +#define NTL_DISABLE_LONGLONG + +/* Explicitly disables us of long long arithmetic + */ + +#endif + +#if 0 +#define NTL_DISABLE_LL_ASM + +/* Explicitly disables us of inline assembly as a replacement + * for long lobg arithmetic. + */ + +#endif + + +#if 0 +#define NTL_MAXIMIZE_SP_NBITS + +/* Allows for 62-bit single-precision moduli on 64-bit platforms. + * By default, such moduli are restricted to 60 bits, which + * usually gives slightly better performance across a range of + * of parameters. + */ + +#endif + +/************************************************************************* + * + * Performance Options + * + *************************************************************************/ + + +/* One can choose one of three different stragtegies for long integer + * arithmetic: the default, NTL_LONG_LONG, or NTL_AVOID_FLOAT. + * The configuration wizard will choose among them. + * + */ + +#if 0 +#define NTL_LONG_LONG + +/* + * + * For platforms that support it, this flag can be set to cause + * the low-level multiplication code to use the type "long long", + * which may yield a significant performance gain, + * but on others, it can yield no improvement and can even + * slow things down. + * + * + * See below (NTL_LONG_LONG_TYPE) for how to use a type name + * other than "long long". + * + * If you set NTL_LONG_LONG, you might also want to set + * the flag NTL_TBL_REM (see below). + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#elif 0 +#define NTL_AVOID_FLOAT + +/* + * + * On machines with slow floating point or---more comminly---slow int/float + * conversions, this flag can lead to faster code. + * + * If you set NTL_AVOID_FLOAT, you should probably also + * set NTL_TBL_REM (see below). + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#endif + + +/* There are three strategies to implmement single-precision + * modular multiplication with precondinition (see the MulModPrecon + * function in the ZZ module): the default, and NTL_SPMM_ULL, + * and NTL_SPMM_ASM. + * This plays a crucial role in the "small prime FFT" used to + * implement polynomial arithmetic, and in other CRT-based methods + * (such as linear algebra over ZZ), as well as polynomial and matrix + * arithmetic over zz_p. + */ + + + +#if 0 +#define NTL_SPMM_ULL + +/* This also causes an "all integer" + * implementation of MulModPrecon to be used. + * It us usually a faster implementation, + * but it is not enturely portable. + * It relies on double-word unsigned multiplication + * (see NTL_UNSIGNED_LONG_LONG_TYPE above). + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#elif 0 +#define NTL_SPMM_ASM + +/* Like this previous flag, this also causes an "all integer" + * implementation of MulModPrecon to be used. + * It relies assembler code to do double-word unsigned multiplication. + * This is only supported on a select mechines under GCC. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + + +/* + * The following two flags provide additional control for how the + * FFT modulo single-precision primes is implemented. + */ + +#if 0 +#define NTL_FFT_BIGTAB + +/* + * Precomputed tables are used to store all the roots of unity + * used in FFT computations. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + +#if 0 +#define NTL_FFT_LAZYMUL + +/* + * This flag only has an effect when combined with + * either the NTL_SPMM_ULL or NTL_SPMM_ASM flags. + * When set, a "lazy multiplication" strategy due to David Harvey: + * see his paper "FASTER ARITHMETIC FOR NUMBER-THEORETIC TRANSFORMS". + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + + + + +/* The next six flags NTL_AVOID_BRANCHING, NTL_TBL_REM, NTL_TBL_REM_LL, + * NTL_GF2X_ALTCODE, NTL_GF2X_ALTCODE1, and NTL_GF2X_NOINLINE + * are also set by the configuration wizard. + */ + + + +#if 0 +#define NTL_AVOID_BRANCHING + +/* + * With this option, branches are replaced at several + * key points with equivalent code using shifts and masks. + * It may speed things up on machines with + * deep pipelines and high branch penalities. + * This flag mainly affects the implementation of the + * single-precision modular arithmetic routines. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + + +#if 0 +#define NTL_TBL_REM + +/* + * + * With this flag, some divisions are avoided in the + * ZZ_pX multiplication routines. If you use the NTL_AVOID_FLOAT + * or NTL_LONG_LONG flags, then you should probably use this one too. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_TBL_REM_LL + +/* + * + * This forces the LONG_LONG version if TBL_REM + * + * Irrelevent when NTL_GMP_LIP is set. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_CRT_ALTCODE + +/* + * Employs an alternative CRT strategy. + * Only relevant with GMP. + * Seems to be marginally faster on some x86_64 platforms. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_CRT_ALTCODE_SMALL + +/* + * Employs an alternative CRT strategy for small moduli. + * Only relevant with GMP. + * Seems to be marginally faster on some x86_64 platforms. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_GF2X_ALTCODE + +/* + * With this option, the default strategy for implmenting low-level + * GF2X multiplication is replaced with an alternative strategy. + * This alternative strategy seems to work better on RISC machines + * with deep pipelines and high branch penalties (like a powerpc), + * but does no better (or even worse) on x86s. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + +#elif 0 +#define NTL_GF2X_ALTCODE1 + + +/* + * Yest another alternative strategy for implementing GF2X + * multiplication. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + + +#endif + +#if 0 +#define NTL_GF2X_NOINLINE + +/* + * By default, the low-level GF2X multiplication routine in inlined. + * This can potentially lead to some trouble on some platforms, + * and you can override the default by setting this flag. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_PCLMUL + +/* + * Use this flag for faster GF2X arithmetc. + * This enables the use of the PCLMUL instruction on x86-64 + * machines. + * + * To re-build after changing this flag: + * rm GF2X.o; make ntl.a + */ + +#endif + + + + + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/config_log.h b/thirdparty/linux/ntl/include/NTL/config_log.h new file mode 100644 index 0000000000..bd6e22409c --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/config_log.h @@ -0,0 +1,2 @@ +// generated by ./configure +// CXXAUTOFLAGS=" -march=native" diff --git a/thirdparty/linux/ntl/include/NTL/ctools.h b/thirdparty/linux/ntl/include/NTL/ctools.h new file mode 100644 index 0000000000..8338112af7 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/ctools.h @@ -0,0 +1,480 @@ + +#ifndef NTL_ctools__H +#define NTL_ctools__H + +#include +#include +#include +#include +#include +#include + + +/* + * Resolve double-word integer types. + * + * Unfortunately, there is no "standard" way to do this. + * On 32-bit machines, 'long long' usually works (but not + * on MSVC++ or BORLAND), and on 64-bit machines, there is + * no standard. However, most compilers do offer *some* + * non-standard double-word type. + * + * Note that C99 creates a standard header , + * but it is not clear how widely this is implemented yet, + * and for example, GCC does not provide a type int128_t + * in on 64-bit machines. + */ + + +#if (defined(NTL_LONG_LONG_TYPE)) + +#define NTL_LL_TYPE NTL_LONG_LONG_TYPE + +#elif (NTL_BITS_PER_LONG == 64 && defined(__GNUC__)) + +#define NTL_LL_TYPE __int128_t + +#elif (NTL_BITS_PER_LONG == 32 && (defined(_MSC_VER) || defined(__BORLANDC__))) + +#define NTL_LL_TYPE __int64 + +#elif (NTL_BITS_PER_LONG == 64 && (defined(_MSC_VER) || defined(__BORLANDC__))) + +#define NTL_LL_TYPE __int128 + +#endif + +#if (!defined(NTL_LL_TYPE)) + +#define NTL_LL_TYPE long long + +#endif + + + +#if (defined(NTL_UNSIGNED_LONG_LONG_TYPE)) + +#define NTL_ULL_TYPE NTL_UNSIGNED_LONG_LONG_TYPE + +#elif (NTL_BITS_PER_LONG == 64 && defined(__GNUC__)) + +#define NTL_ULL_TYPE __uint128_t + +#elif (NTL_BITS_PER_LONG == 32 && (defined(_MSC_VER) || defined(__BORLANDC__))) + +#define NTL_ULL_TYPE unsigned __int64 + +#elif (NTL_BITS_PER_LONG == 64 && (defined(_MSC_VER) || defined(__BORLANDC__))) + +#define NTL_ULL_TYPE unsigned __int128 + +#endif + +#if (!defined(NTL_ULL_TYPE)) + +#define NTL_ULL_TYPE unsigned long long + +#endif + + +#ifdef NTL_HAVE_LL_TYPE + +typedef NTL_LL_TYPE _ntl_longlong; +typedef NTL_ULL_TYPE _ntl_ulonglong; +// typenames are more convenient than macros + +#else + +#undef NTL_LL_TYPE +#undef NTL_ULL_TYPE +// prevent any use of these macros + +class _ntl_longlong { private: _ntl_longlong() { } }; +class _ntl_ulonglong { private: _ntl_ulonglong() { } }; +// cannot create variables of these types + + +#endif + +/********************************************************/ + + + +// Define an unsigned type with at least 32 bits +// there is no truly portable way to do this, yet... + + +#if (NTL_BITS_PER_INT >= 32) + +typedef unsigned int _ntl_uint32; // 32-bit word +#define NTL_BITS_PER_INT32 NTL_BITS_PER_INT + +#else + +// NOTE: C++ standard guarantees longs are at least 32-bits wide, +// and this is also explicitly checked at builod time + +typedef unsigned long _ntl_uint32; // 32-bit word +#define NTL_BITS_PER_INT32 NTL_BITS_PER_LONG + +#endif + + + +// The usual token pasting stuff... + +#define NTL_PASTE_TOKENS2(a,b) a ## b +#define NTL_PASTE_TOKENS(a,b) NTL_PASTE_TOKENS2(a,b) + +#define NTL_STRINGIFY(x) NTL_STRINGIFY_AUX(x) +#define NTL_STRINGIFY_AUX(x) #x + + + + + + +#define NTL_OVFBND (1L << (NTL_BITS_PER_LONG-4)) + +/* + * NTL_OVFBND is the general bound used throughout NTL to keep various + * integer values comfortably bounded away from an integer overflow + * condition. Do not change this value! + */ + + + + + +#if ((NTL_BITS_PER_SIZE_T-1) < (NTL_BITS_PER_LONG-4)) +#define NTL_OVFBND1 (1L << (NTL_BITS_PER_SIZE_T-1)) +#else +#define NTL_OVFBND1 NTL_OVFBND +#endif + +/* + * NTL_OVFBND1 is a smaller bound than NTL_OVF when size_t is + * narrower than long. This prevents overflow on calls to malloc + * and realloc. + */ + + + + + + +#define NTL_OVERFLOW(n, a, b) \ + (((b) >= NTL_OVFBND) || (((long) (n)) > 0 && (((a) >= NTL_OVFBND) || \ + (((long) (n)) >= (NTL_OVFBND-((long)(b))+((long)(a))-1)/((long)(a)))))) + +/* + * NTL_OVERFLOW(n, a, b) returns 1 if n*a + b >= NTL_OVFBND, + * and returns 0 otherwise. The value n is effectively treated as type long, + * while the values a and b may be *any* integral type. It is assumed that + * n >= 0, a > 0, and b >= 0. Care is taken to ensure that overflow does + * not occur. If a and b are constants, and n has no side effects, + * a good optimizing compiler will * translate this into a single test + * of the form n >= c, where c is a constant. + */ + + + + + + +#define NTL_OVERFLOW1(n, a, b) \ + (((b) >= NTL_OVFBND1) || (((long) (n)) > 0 && (((a) >= NTL_OVFBND1) || \ + (((long) (n)) >= (NTL_OVFBND1-((long)(b))+((long)(a))-1)/((long)(a)))))) + +/* + * NTL_OVERFLOW1 is the same as NTL_OVERFLOW, except that it uses the + * bound NTL_OVFBND1 instead of NTL_OVFBND. + */ + + + + +#ifdef NTL_TEST_EXCEPTIONS + +extern unsigned long exception_counter; + +#define NTL_BASIC_MALLOC(n, a, b) \ + (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \ + ((void *) malloc(((long)(n))*((long)(a)) + ((long)(b))))) + +#define NTL_MALLOC(n, a, b) \ + (--exception_counter == 0 ? (void *) 0 : NTL_BASIC_MALLOC(n, a, b)) + +#else + +#define NTL_MALLOC(n, a, b) \ + (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \ + ((void *) malloc(((long)(n))*((long)(a)) + ((long)(b))))) + + +#endif + +/* + * NTL_MALLOC(n, a, b) returns 0 if a*n + b >= NTL_OVFBND1, and otherwise + * returns malloc(n*a + b). + * The programmer must ensure that the name "malloc" is visible + * at the point in the source code where this macro is expanded. + */ + + +#ifdef NTL_TEST_EXCEPTIONS + +#define NTL_BASIC_SNS_MALLOC(n, a, b) \ + (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \ + ((void *) NTL_SNS malloc(((long)(n))*((long)(a)) + ((long)(b))))) + + +#define NTL_SNS_MALLOC(n, a, b) \ + (--exception_counter == 0 ? (void *) 0 : NTL_BASIC_SNS_MALLOC(n, a, b)) + + +#else + +#define NTL_SNS_MALLOC(n, a, b) \ + (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \ + ((void *) NTL_SNS malloc(((long)(n))*((long)(a)) + ((long)(b))))) + +#endif + +/* + * NTL_SNS_MALLOC is the same as NTL_MALLOC, except that the call + * to malloc is prefixed by NTL_SNS. + */ + + + + + + + + +#define NTL_REALLOC(p, n, a, b) \ + (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \ + ((void *) realloc((p), ((long)(n))*((long)(a)) + ((long)(b))))) + +/* + * NTL_REALLOC(n, a, b) returns 0 if a*n + b >= NTL_OVFBND1, and otherwise + * returns realloc(p, n*a + b). + * The programmer must ensure that the name "realloc" is visible + * at the point in the source code where this macro is expanded. + */ + + + + + + +#define NTL_SNS_REALLOC(p, n, a, b) \ + (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \ + ((void *) NTL_SNS realloc((p), ((long)(n))*((long)(a)) + ((long)(b))))) + +/* + * NTL_SNS_REALLOC is the same as NTL_REALLOC, except that the call + * to realloc is prefixed by NTL_SNS. + */ + + + + + +#define NTL_MAX_ALLOC_BLOCK (40000) + +/* + * NTL_MAX_ALLOC_BLOCK is the number of bytes that are allocated in + * a single block in a number of places throughout NTL (for + * vec_ZZ_p, ZZVec, vec_GF2X, and GF2XVec). + */ + + +#define NTL_ULONG_TO_LONG(a) \ + ((((unsigned long) a) >> (NTL_BITS_PER_LONG-1)) ? \ + (((long) (((unsigned long) a) - ((unsigned long) NTL_MIN_LONG))) + \ + NTL_MIN_LONG) : \ + ((long) a)) + +/* + * This macro converts from unsigned long to signed long. It is portable + * among platforms for which a long has a 2's complement representation + * of the same width as an unsigned long. While it avoids assumptions + * about the behavior of non-standard conversions, a good optimizing + * compiler should turn it into the identity function. + */ + + +#define NTL_UINT_TO_INT(a) \ + ((((unsigned int) a) >> (NTL_BITS_PER_INT-1)) ? \ + (((int) (((unsigned int) a) - ((unsigned int) NTL_MIN_INT))) + \ + NTL_MIN_INT) : \ + ((int) a)) + +/* + * This macro converts from unsigned int to signed int. It is portable + * among platforms for which an int has a 2's complement representation + * of the same width as an unsigned int. While it avoids assumptions + * about the behavior of non-standard conversions, a good optimizing + * compiler should turn it into the identity function. + */ + + +#ifdef NTL_THREADS + +#define NTL_THREAD_LOCAL thread_local + +#ifdef __GNUC__ +#define NTL_CHEAP_THREAD_LOCAL __thread +#else +#define NTL_CHEAP_THREAD_LOCAL thread_local +#endif + +#else + +#define NTL_THREAD_LOCAL +#define NTL_CHEAP_THREAD_LOCAL + +#endif + + +#define NTL_RELEASE_THRESH (128) + +/* + * threshold for releasing scratch memory. + */ + + + +long _ntl_IsFinite(double *p); +/* This forces a double into memory, and tests if it is "normal"; + that means, not NaN, not +/- infinity, not denormalized, etc. + Forcing into memory is sometimes necessary on machines + with "extended" double precision registers (e.g., Intel x86s) + to force the standard IEEE format. */ + +void _ntl_ForceToMem(double *p); +/* This is do-nothing routine that has the effect of forcing + a double into memory (see comment above). */ + +double _ntl_ldexp(double x, long e); + + +#define NTL_DEFINE_SWAP(T)\ +inline void _ntl_swap(T& a, T& b)\ +{\ + T t = a; a = b; b = t;\ +} + +NTL_DEFINE_SWAP(long) +NTL_DEFINE_SWAP(int) +NTL_DEFINE_SWAP(short) +NTL_DEFINE_SWAP(char) + +NTL_DEFINE_SWAP(unsigned long) +NTL_DEFINE_SWAP(unsigned int) +NTL_DEFINE_SWAP(unsigned short) +NTL_DEFINE_SWAP(unsigned char) + +NTL_DEFINE_SWAP(double) +NTL_DEFINE_SWAP(float) + + +template +void _ntl_swap(T*& a, T*& b) +{ + T* t = a; a = b; b = t; +} + +/* These are convenience routines. I don't want it to overload + the std library's swap function, nor do I want to rely on the latter, + as the C++ standard is kind of broken on the issue of where + swap is defined. And I also only want it defined for built-in types. + */ + + +// The following routine increments a pointer so that +// it is properly aligned. +// It is assumed that align > 0. +// If align is a constant power of 2, it compiles +// into a small handful of simple instructions. + +#if (NTL_BIG_POINTERS) + +#define NTL_UPTRINT_T unsigned long long +// DIRT: this should really be std::uintptr_t, defined +// in ; however, that header is not widely available, +// and even if it were, std::uintpre_t is not guaranteed +// to be defined. Of course, unsigned long long may not +// be defined in pre-C++11. + +#else + +#define NTL_UPTRINT_T unsigned long + +#endif + + +#if (!defined(__GNUC__) || !defined(__x86_64__) || NTL_BITS_PER_LONG != 64) + +// DIRT: for now, we only attempt to implement this function properly when +// we really need it, which is for AVX support. +// The source file CheckAVX.c checks for these same conditions. +// We still define it, to simplify the overall code structure. + +static inline +char *_ntl_make_aligned(char *p, long align) +{ + return p; +} + +#else + +// DIRT: in the limited range of platforms for which we attempt to +// implement it, this should work fine. + +static inline +char *_ntl_make_aligned(char *p, long align) +{ + unsigned long r = (unsigned long) (((NTL_UPTRINT_T) (p)) % ((NTL_UPTRINT_T) (align))); + return p + ((((unsigned long) (align)) - r) % ((unsigned long) (align))); +} + +#define NTL_HAVE_ALIGNED_ARRAY + +#endif + + + + + +// The following is for aligning small local arrays +// Equivalent to type x[n], but aligns to align bytes +// Only works for POD types +// NOTE: the gcc aligned attribute might work, but there is +// some chatter on the web that this was (at some point) buggy. +// Not clear what the current status is. +// Anyway, this is only intended for use with gcc on intel +// machines, so it should be OK. + + +#define NTL_ALIGNED_LOCAL_ARRAY(align, x, type, n) \ + char x##__ntl_hidden_variable_storage[n*sizeof(type)+align]; \ + type *x = (type *) _ntl_make_aligned(&x##__ntl_hidden_variable_storage[0], align); + + +#define NTL_AVX_BYTE_ALIGN (32) +#define NTL_AVX_DBL_ALIGN (NTL_AVX_BYTE_ALIGN/long(sizeof(double))) + +#define NTL_AVX_LOCAL_ARRAY(x, type, n) NTL_ALIGNED_LOCAL_ARRAY(NTL_AVX_BYTE_ALIGN, x, type, n) + +#define NTL_DEFAULT_ALIGN (64) +// this should be big enough to satisfy any SIMD instructions, +// and it should also be as big as a cache line + + + + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/def_config.h b/thirdparty/linux/ntl/include/NTL/def_config.h new file mode 100644 index 0000000000..aaf48aa5a6 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/def_config.h @@ -0,0 +1,632 @@ + +#ifndef NTL_config__H +#define NTL_config__H + +/************************************************************************* + + NTL Configuration File + ---------------------- + +This file may be modified prior to building NTL so as to specify +some basic configuration options, and to customize +how code is generated so as to improve performance. + +The Basic Configuration Options must be set by hand. If you use the +configuration wizard, then these flags should be set before +the installation process begins; there values will be retained +by the wizard. + +The Performance Options can be set either by hand, by editing this +file, or (on most Unix platforms) can be set automatically using +the configuration wizard which runs when NTL is installed. + +All NTL header files include this file. +By setting these flags here, instead of on the compiler command line, +it is easier to guarantee that NTL library and client code use +consistent settings. + + + How to do it + ------------ + +To set a flag, just replace the pre-processor directive +'if 0' by 'if 1' for that flag, which causes the appropriate macro +to be defined. Of course, to unset a flag, just replace the +'if 1' by an 'if 0'. + +You can also do this more conveniently via the command line +using the configure script. + + + *************************************************************************/ + + + +/************************************************************************* + * + * Basic Configuration Options + * + *************************************************************************/ + + + /* None of these flags are set by the configuration wizard; + * they must be set by hand, before installation begins. + */ + + +#if 0 +#define NTL_LEGACY_NO_NAMESPACE + +/* + * By default, NTL components are declared inside the namespace NTL. + * Set this flag if you want to instead have these components + * declared in the global namespace. This is for backward + * compatibility only -- not recommended. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_LEGACY_INPUT_ERROR + +/* + * Also for backward compatibility. Set if you want input + * operations to abort on error, instead of just setting the + * "fail bit" of the input stream. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + +#if 0 +#define NTL_DISABLE_TLS_HACK + +/* Set if you want to compile NTL without "TLS hack" + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_ENABLE_TLS_HACK + +/* Set if you want to compile NTL with "TLS hack" + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_THREADS + +/* Set if you want to compile NTL as a thread-safe library. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_EXCEPTIONS + +/* Set if you want to compile NTL with exceptions enabled + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_THREAD_BOOST + +/* Set if you want to compile NTL to exploit threads internally. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif +# + +#if 1 +#define NTL_GMP_LIP + +/* + * Use this flag if you want to use GMP as the long integer package. + * This can result in significantly faster code on some platforms. + * It requires that the GMP package (version >= 3.1) has already been + * installed. You will also have to set the variables GMP_OPT_INCDIR, + * GMP_OPT_LIBDIR, GMP_OPT_LIB in the makefile (these are set automatically + * by the confiuration script when you pass the flag NTL_GMP_LIP=on + * to that script. + * + * Beware that setting this flag can break some very old NTL codes. + * + * To re-build after changing this flag: + * rm *.o; make setup3; make ntl.a + * You may also have to edit the makefile to modify the variables + * GMP_OPT_INCDIR, GMP_OPT_LIBDIR, and GMP_OPT_LIB. + */ + +#endif + +#if 0 +#define NTL_GF2X_LIB + +/* + * Use this flag if you want to use the gf2x library for + * faster GF2X arithmetic. + * This can result in significantly faster code, especially + * when working with polynomials of huge degree. + * You will also have to set the variables GF2X_OPT_INCDIR, + * GF2X_OPT_LIBDIR, GF2X_OPT_LIB in the makefile (these are set automatically + * by the confiuration script when you pass the flag NTL_GF2X_LIB=on + * to that script. + * + * To re-build after changing this flag: + * rm GF2X.o; GF2X1.o; make ntl.a + * You may also have to edit the makefile to modify the variables + * GF2X_OPT_INCDIR, GF2X_OPT_LIBDIR, and GF2X_OPT_LIB. + */ + +#endif + + +#if 0 +#define NTL_LONG_LONG_TYPE long long + +/* + * If you set the flag NTL_LONG_LONG, then the value of + * NTL_LONG_LONG_TYPE will be used + * to declare 'double word' signed integer types. + * Irrelevant when NTL_GMP_LIP is set. + * If left undefined, some "ifdef magic" will attempt + * to find the best choice for your platform, depending + * on the compiler and wordsize. On 32-bit machines, + * this is usually 'long long'. + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_UNSIGNED_LONG_LONG_TYPE unsigned long long + +/* + * If you set the flag NTL_SPMM_ULL, then the value of + * NTL_UNSIGNED_LONG_LONG_TYPE will be used + * to declare 'double word' unsigned integer types. + * If left undefined, some "ifdef magic" will attempt + * to find the best choice for your platform, depending + * on the compiler and wordsize. On 32-bit machines, + * this is usually 'unsigned long long'. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_CLEAN_INT + +/* + * This will disallow the use of some non-standard integer arithmetic + * that may improve performance somewhat. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_CLEAN_PTR + +/* + * This will disallow the use of some non-standard pointer arithmetic + * that may improve performance somewhat. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_RANGE_CHECK + +/* + * This will generate vector subscript range-check code. + * Useful for debugging, but it slows things down of course. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + + + + +#if 0 +#define NTL_NO_INIT_TRANS + +/* + * Without this flag, NTL uses a special code sequence to avoid + * copying large objects in return statements. However, if your + * compiler optimizes away the return of a *named* local object, + * this is not necessary, and setting this flag will result + * in *slightly* more compact and efficient code. Although + * the emeriging C++ standard allows compilers to perform + * this optimization, I know of none that currently do. + * Most will avoid copying *temporary* objects in return statements, + * and NTL's default code sequence exploits this fact. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_X86_FIX + +/* + * Forces the "x86 floating point fix", overriding the default behavior. + * By default, NTL will apply the "fix" if it looks like it is + * necessary, and if knows how to fix it. + * The problem addressed here is that x86 processors sometimes + * run in a mode where FP registers have more precision than doubles. + * This will cause code in quad_float.c some trouble. + * NTL can normally correctly detect the problem, and fix it, + * so you shouldn't need to worry about this or the next flag. + + * To re-build after changing this flag: rm quad_float.o; make ntl.a + * + */ + +#elif 0 +#define NTL_NO_X86_FIX +/* + * Forces no "x86 floating point fix", overriding the default behavior. + + * To re-build after changing this flag: rm quad_float.o; make ntl.a + */ + +#endif + + + +#if 0 +#define NTL_LEGACY_SP_MULMOD + +/* Forces legacy single-precision MulMod implementation. + */ + +#endif + + +#if 0 +#define NTL_DISABLE_LONGDOUBLE + +/* Explicitly disables us of long double arithmetic + */ + +#endif + + +#if 0 +#define NTL_DISABLE_LONGLONG + +/* Explicitly disables us of long long arithmetic + */ + +#endif + +#if 0 +#define NTL_DISABLE_LL_ASM + +/* Explicitly disables us of inline assembly as a replacement + * for long lobg arithmetic. + */ + +#endif + + +#if 0 +#define NTL_MAXIMIZE_SP_NBITS + +/* Allows for 62-bit single-precision moduli on 64-bit platforms. + * By default, such moduli are restricted to 60 bits, which + * usually gives slightly better performance across a range of + * of parameters. + */ + +#endif + +/************************************************************************* + * + * Performance Options + * + *************************************************************************/ + + +/* One can choose one of three different stragtegies for long integer + * arithmetic: the default, NTL_LONG_LONG, or NTL_AVOID_FLOAT. + * The configuration wizard will choose among them. + * + */ + +#if 0 +#define NTL_LONG_LONG + +/* + * + * For platforms that support it, this flag can be set to cause + * the low-level multiplication code to use the type "long long", + * which may yield a significant performance gain, + * but on others, it can yield no improvement and can even + * slow things down. + * + * + * See below (NTL_LONG_LONG_TYPE) for how to use a type name + * other than "long long". + * + * If you set NTL_LONG_LONG, you might also want to set + * the flag NTL_TBL_REM (see below). + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#elif 0 +#define NTL_AVOID_FLOAT + +/* + * + * On machines with slow floating point or---more comminly---slow int/float + * conversions, this flag can lead to faster code. + * + * If you set NTL_AVOID_FLOAT, you should probably also + * set NTL_TBL_REM (see below). + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#endif + + +/* There are three strategies to implmement single-precision + * modular multiplication with precondinition (see the MulModPrecon + * function in the ZZ module): the default, and NTL_SPMM_ULL, + * and NTL_SPMM_ASM. + * This plays a crucial role in the "small prime FFT" used to + * implement polynomial arithmetic, and in other CRT-based methods + * (such as linear algebra over ZZ), as well as polynomial and matrix + * arithmetic over zz_p. + */ + + + +#if 0 +#define NTL_SPMM_ULL + +/* This also causes an "all integer" + * implementation of MulModPrecon to be used. + * It us usually a faster implementation, + * but it is not enturely portable. + * It relies on double-word unsigned multiplication + * (see NTL_UNSIGNED_LONG_LONG_TYPE above). + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#elif 0 +#define NTL_SPMM_ASM + +/* Like this previous flag, this also causes an "all integer" + * implementation of MulModPrecon to be used. + * It relies assembler code to do double-word unsigned multiplication. + * This is only supported on a select mechines under GCC. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + + +/* + * The following two flags provide additional control for how the + * FFT modulo single-precision primes is implemented. + */ + +#if 0 +#define NTL_FFT_BIGTAB + +/* + * Precomputed tables are used to store all the roots of unity + * used in FFT computations. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + +#if 0 +#define NTL_FFT_LAZYMUL + +/* + * This flag only has an effect when combined with + * either the NTL_SPMM_ULL or NTL_SPMM_ASM flags. + * When set, a "lazy multiplication" strategy due to David Harvey: + * see his paper "FASTER ARITHMETIC FOR NUMBER-THEORETIC TRANSFORMS". + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + + + + +/* The next six flags NTL_AVOID_BRANCHING, NTL_TBL_REM, NTL_TBL_REM_LL, + * NTL_GF2X_ALTCODE, NTL_GF2X_ALTCODE1, and NTL_GF2X_NOINLINE + * are also set by the configuration wizard. + */ + + + +#if 0 +#define NTL_AVOID_BRANCHING + +/* + * With this option, branches are replaced at several + * key points with equivalent code using shifts and masks. + * It may speed things up on machines with + * deep pipelines and high branch penalities. + * This flag mainly affects the implementation of the + * single-precision modular arithmetic routines. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + + +#if 0 +#define NTL_TBL_REM + +/* + * + * With this flag, some divisions are avoided in the + * ZZ_pX multiplication routines. If you use the NTL_AVOID_FLOAT + * or NTL_LONG_LONG flags, then you should probably use this one too. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_TBL_REM_LL + +/* + * + * This forces the LONG_LONG version if TBL_REM + * + * Irrelevent when NTL_GMP_LIP is set. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_CRT_ALTCODE + +/* + * Employs an alternative CRT strategy. + * Only relevant with GMP. + * Seems to be marginally faster on some x86_64 platforms. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_CRT_ALTCODE_SMALL + +/* + * Employs an alternative CRT strategy for small moduli. + * Only relevant with GMP. + * Seems to be marginally faster on some x86_64 platforms. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_GF2X_ALTCODE + +/* + * With this option, the default strategy for implmenting low-level + * GF2X multiplication is replaced with an alternative strategy. + * This alternative strategy seems to work better on RISC machines + * with deep pipelines and high branch penalties (like a powerpc), + * but does no better (or even worse) on x86s. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + +#elif 0 +#define NTL_GF2X_ALTCODE1 + + +/* + * Yest another alternative strategy for implementing GF2X + * multiplication. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + + +#endif + +#if 0 +#define NTL_GF2X_NOINLINE + +/* + * By default, the low-level GF2X multiplication routine in inlined. + * This can potentially lead to some trouble on some platforms, + * and you can override the default by setting this flag. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_PCLMUL + +/* + * Use this flag for faster GF2X arithmetc. + * This enables the use of the PCLMUL instruction on x86-64 + * machines. + * + * To re-build after changing this flag: + * rm GF2X.o; make ntl.a + */ + +#endif + + + + + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/fileio.h b/thirdparty/linux/ntl/include/NTL/fileio.h new file mode 100644 index 0000000000..7a8e955279 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/fileio.h @@ -0,0 +1,59 @@ + +#ifndef NTL_fileio__H +#define NTL_fileio__H + +#include +#include +#include +#include + + +NTL_OPEN_NNS + + +class FileList { +private: + Vec< Vec > data; + + FileList(const FileList&); // disable + void operator=(const FileList&); // disable + +public: + FileList() { } + void AddFile(const char *name); + void RemoveLast(); + + ~FileList(); + + +}; + + + +void OpenWrite(NTL_SNS ofstream& s, const char *name); +// opens file for writing...aborts if fails + +void OpenWrite(NTL_SNS ofstream& s, const char *name, FileList& flist); +// opens file for writing and adds name to flist + +void OpenRead(NTL_SNS ifstream& s, const char *name); +// opens file for reading + +void CloseWrite(NTL_SNS ofstream& s); +// closes s, checks for failure + + + +const char *FileName(const char* stem, long d); +// builds the name from stem-DDDDD, returns pointer to buffer + +const NTL_SNS string& UniqueID(); +// ideally, a unique ID (across all processes and threads), +// but it may not be perfect (useful for generating unique +// file names and seeding PRG). + +NTL_CLOSE_NNS + +#endif + + diff --git a/thirdparty/linux/ntl/include/NTL/g_lip.h b/thirdparty/linux/ntl/include/NTL/g_lip.h new file mode 100644 index 0000000000..f230aa10c2 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/g_lip.h @@ -0,0 +1,592 @@ + + + +#if 1 + +typedef void *_ntl_gbigint; +typedef void _ntl_gbigint_body; + + +#else + +/* + * This way of defining the bigint handle type is a bit non-standard, + * but better for debugging. + */ + +struct _ntl_gbigint_is_opaque { long _x_; }; +typedef struct _ntl_gbigint_is_opaque * _ntl_gbigint; + +typedef _ntl_gbigint_is_opaque _ntl_gbigint_body; + +#endif + + +#if (defined(NTL_HAVE_LL_TYPE) && !defined(NTL_LEGACY_SP_MULMOD)) + +#define NTL_LONGLONG_SP_MULMOD + +// on 64 bit machines, hold NTL_SP_NBITS to 60 bits, +// as certain operations (in particular, TBL_REM in g_lip_impl.h) +// are a bit faster + + +#if (!defined(NTL_MAXIMIZE_SP_NBITS) && NTL_BITS_PER_LONG >= 64) +#define NTL_SP_NBITS (NTL_BITS_PER_LONG-4) +#else +#define NTL_SP_NBITS (NTL_BITS_PER_LONG-2) +#endif + + +#define NTL_NSP_NBITS NTL_NBITS_MAX + +#if (NTL_NSP_NBITS > NTL_SP_NBITS) +#undef NTL_NSP_NBITS +#define NTL_NSP_NBITS NTL_SP_NBITS +#endif + + +#elif (NTL_LONGDOUBLE_OK && !defined(NTL_LEGACY_SP_MULMOD) && !defined(NTL_DISABLE_LONGDOUBLE)) + +#define NTL_LONGDOUBLE_SP_MULMOD + +#define NTL_SP_NBITS NTL_WNBITS_MAX + +// on 64 bit machines, hold NTL_SP_NBITS to 60 bits (see above) + +#if (!defined(NTL_MAXIMIZE_SP_NBITS) && NTL_BITS_PER_LONG >= 64 && NTL_SP_NBITS > NTL_BITS_PER_LONG-4) +#undef NTL_SP_NBITS +#define NTL_SP_NBITS (NTL_BITS_PER_LONG-4) +#endif + +#define NTL_NSP_NBITS NTL_NBITS_MAX +#if (NTL_NSP_NBITS > NTL_SP_NBITS) +#undef NTL_NSP_NBITS +#define NTL_NSP_NBITS NTL_SP_NBITS +#endif + + +#else + + +#define NTL_SP_NBITS NTL_NBITS_MAX +#define NTL_NSP_NBITS NTL_NBITS_MAX + + +#endif + +#define NTL_WSP_NBITS (NTL_BITS_PER_LONG-2) + +#define NTL_SP_BOUND (1L << NTL_SP_NBITS) +#define NTL_NSP_BOUND (1L << NTL_NSP_NBITS) +#define NTL_WSP_BOUND (1L << NTL_WSP_NBITS) + +/* define the following so an error is raised */ + +#define NTL_RADIX ...... +#define NTL_NBITSH ...... +#define NTL_RADIXM ...... +#define NTL_RADIXROOT ...... +#define NTL_RADIXROOTM ...... +#define NTL_FRADIX_INV ...... + + + + + + +/*********************************************************************** + + Basic Functions + +***********************************************************************/ + + + + void _ntl_gsadd(_ntl_gbigint a, long d, _ntl_gbigint *b); + /* *b = a + d */ + + void _ntl_gadd(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *c); + /* *c = a + b */ + + void _ntl_gsub(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *c); + /* *c = a - b */ + + void _ntl_gsubpos(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *c); + /* *c = a - b; assumes a >= b >= 0 */ + + void _ntl_gsmul(_ntl_gbigint a, long d, _ntl_gbigint *b); + /* *b = d * a */ + + void _ntl_gmul(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *c); + /* *c = a * b */ + + void _ntl_gsq(_ntl_gbigint a, _ntl_gbigint *c); + /* *c = a * a */ + + long _ntl_gsdiv(_ntl_gbigint a, long b, _ntl_gbigint *q); + /* (*q) = floor(a/b) and a - floor(a/b)*(*q) is returned; + error is raised if b == 0; + if b does not divide a, then sign(*q) == sign(b) */ + + void _ntl_gdiv(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *q, _ntl_gbigint *r); + /* (*q) = floor(a/b) and (*r) = a - floor(a/b)*(*q); + error is raised if b == 0; + if b does not divide a, then sign(*q) == sign(b) */ + + void _ntl_gmod(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *r); + /* same as _ntl_gdiv, but only remainder is computed */ + + long _ntl_gsmod(_ntl_gbigint a, long d); + /* same as _ntl_gsdiv, but only remainder is computed */ + + void _ntl_gquickmod(_ntl_gbigint *r, _ntl_gbigint b); + /* *r = *r % b; + The division is performed in place (but may sometimes + assumes b > 0 and *r >= 0; + cause *r to grow by one digit) */ + + void _ntl_gsaddmul(_ntl_gbigint x, long y, _ntl_gbigint *ww); + /* *ww += x*y */ + + void _ntl_gaddmul(_ntl_gbigint x, _ntl_gbigint y, _ntl_gbigint *ww); + /* *ww += x*y */ + + void _ntl_gssubmul(_ntl_gbigint x, long y, _ntl_gbigint *ww); + /* *ww -= x*y */ + + void _ntl_gsubmul(_ntl_gbigint x, _ntl_gbigint y, _ntl_gbigint *ww); + /* *ww -= x*y */ + + + + + +/******************************************************************** + + Shifting and bit manipulation + +*********************************************************************/ + + + void _ntl_glshift(_ntl_gbigint n, long k, _ntl_gbigint *a); + /* *a = sign(n) * (|n| << k); + shift is in reverse direction for negative k */ + + void _ntl_grshift(_ntl_gbigint n, long k, _ntl_gbigint *a); + /* *a = sign(n) * (|n| >> k); + shift is in reverse direction for negative k */ + + long _ntl_gmakeodd(_ntl_gbigint *n); + /* + if (n != 0) + *n = m; + return (k such that n == 2 ^ k * m with m odd); + else + return (0); + */ + + long _ntl_gnumtwos(_ntl_gbigint n); + /* return largest e such that 2^e divides n, or zero if n is zero */ + + long _ntl_godd(_ntl_gbigint a); + /* returns 1 if n is odd and 0 if it is even */ + + long _ntl_gbit(_ntl_gbigint a, long p); + /* returns p-th bit of a, where the low order bit is indexed by 0; + p out of range returns 0 */ + + long _ntl_gsetbit(_ntl_gbigint *a, long p); + /* returns original value of p-th bit of |a|, and replaces + p-th bit of a by 1 if it was zero; + error if p < 0 */ + + long _ntl_gswitchbit(_ntl_gbigint *a, long p); + /* returns original value of p-th bit of |a|, and switches + the value of p-th bit of a; + p starts counting at 0; + error if p < 0 */ + + + void _ntl_glowbits(_ntl_gbigint a, long k, _ntl_gbigint *b); + /* places k low order bits of |a| in b */ + + long _ntl_gslowbits(_ntl_gbigint a, long k); + /* returns k low order bits of |a| */ + + long _ntl_gweights(long a); + /* returns Hamming weight of |a| */ + + long _ntl_gweight(_ntl_gbigint a); + /* returns Hamming weight of |a| */ + + void _ntl_gand(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *c); + /* c gets bit pattern `bits of |a|` and `bits of |b|` */ + + void _ntl_gor(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *c); + /* c gets bit pattern `bits of |a|` inclusive or `bits of |b|` */ + + void _ntl_gxor(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *c); + /* c gets bit pattern `bits of |a|` exclusive or `bits of |b|` */ + + + + +/************************************************************************ + + Comparison + +*************************************************************************/ + + long _ntl_gcompare(_ntl_gbigint a, _ntl_gbigint b); + /* + if (a > b) + return (1); + if (a == b) + return (0); + if (a < b) + return (-1); + */ + + long _ntl_gscompare(_ntl_gbigint a, long b); + /* single-precision version of the above */ + + long _ntl_giszero (_ntl_gbigint a); + /* test for 0 */ + + + long _ntl_gsign(_ntl_gbigint a); + /* + if (a > 0) + return (1); + if (a == 0) + return (0); + if (a < 0) + return (-1); + */ + + void _ntl_gabs(_ntl_gbigint *a); + /* *a = |a| */ + + void _ntl_gnegate(_ntl_gbigint *a); + /* *a = -a */ + + void _ntl_gcopy(_ntl_gbigint a, _ntl_gbigint *b); + /* *b = a; */ + + void _ntl_gswap(_ntl_gbigint *a, _ntl_gbigint *b); + /* swap a and b (by swaping pointers) */ + + long _ntl_g2log(_ntl_gbigint a); + /* number of bits in |a|; returns 0 if a = 0 */ + + long _ntl_g2logs(long a); + /* single-precision version of the above */ + + +/******************************************************************** + + Conversion + +*********************************************************************/ + + void _ntl_gzero(_ntl_gbigint *a); + /* *a = 0; */ + + void _ntl_gone(_ntl_gbigint *a); + /* *a = 1 */ + + void _ntl_gintoz(long d, _ntl_gbigint *a); + /* *a = d; */ + + + void _ntl_guintoz(unsigned long d, _ntl_gbigint *a); + /* *a = d; space is allocated */ + + long _ntl_gtoint(_ntl_gbigint a); + /* converts a to a long; overflow results in value + mod 2^{NTL_BITS_PER_LONG}. */ + + unsigned long _ntl_gtouint(_ntl_gbigint a); + /* converts a to a long; overflow results in value + mod 2^{NTL_BITS_PER_LONG}. */ + + + + + double _ntl_gdoub(_ntl_gbigint n); + /* converts a to a double; no overflow check */ + + long _ntl_ground_correction(_ntl_gbigint a, long k, long residual); + /* k >= 1, |a| >= 2^k, and residual is 0, 1, or -1. + The result is what we should add to (a >> k) to round + x = a/2^k to the nearest integer using IEEE-like rounding rules + (i.e., round to nearest, and round to even to break ties). + The result is either 0 or sign(a). + If residual is not zero, it is as if x were replaced by + x' = x + residual*2^{-(k+1)}. + This can be used to break ties when x is exactly + half way between two integers. */ + + double _ntl_glog(_ntl_gbigint a); + /* computes log(a), protecting against overflow */ + + void _ntl_gdoubtoz(double a, _ntl_gbigint *x); + /* x = floor(a); */ + + + + +/************************************************************************ + + Square roots + +*************************************************************************/ + + + long _ntl_gsqrts(long n); + /* return floor(sqrt(n)); error raised in n < 0 */ + + void _ntl_gsqrt(_ntl_gbigint n, _ntl_gbigint *r); + /* *r = floor(sqrt(n)); error raised in n < 0 */ + +/********************************************************************* + + Exponentiation + +**********************************************************************/ + + void _ntl_gexp(_ntl_gbigint a, long e, _ntl_gbigint *b); + /* *b = a^e; error raised if e < 0 */ + + void _ntl_gexps(long a, long e, _ntl_gbigint *b); + /* *b = a^e; error raised if e < 0 */ + + +/********************************************************************* + + Modular Arithmetic + + Addition, subtraction, multiplication, squaring division, inversion, + and exponentiation modulo a positive modulus n, where all operands + (except for the exponent in exponentiation) and results are in the + range [0, n-1]. + + ALIAS RESTRICTION: output parameters should not alias n + +***********************************************************************/ + + void _ntl_gaddmod(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint n, _ntl_gbigint *c); + /* *c = (a + b) % n */ + + void _ntl_gsubmod(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint n, _ntl_gbigint *c); + /* *c = (a - b) % n */ + + void _ntl_gsmulmod(_ntl_gbigint a, long b, _ntl_gbigint n, _ntl_gbigint *c); + /* *c = (a * b) % n */ + + void _ntl_gmulmod(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint n, _ntl_gbigint *c); + /* *c = (a * b) % n */ + + void _ntl_gsqmod(_ntl_gbigint a, _ntl_gbigint n, _ntl_gbigint *c); + /* *c = (a ^ 2) % n */ + + void _ntl_ginvmod(_ntl_gbigint a, _ntl_gbigint n, _ntl_gbigint *c); + /* *c = (1 / a) % n; error raised if gcd(b, n) != 1 */ + + void _ntl_gpowermod(_ntl_gbigint g, _ntl_gbigint e, _ntl_gbigint F, + _ntl_gbigint *h); + + /* *b = (a ^ e) % n; */ + + + + +/************************************************************************** + + Euclidean Algorithms + +***************************************************************************/ + void _ntl_ggcd(_ntl_gbigint m1, _ntl_gbigint m2, _ntl_gbigint *r); + /* *r = greatest common divisor of m1 and m2; + uses binary gcd algorithm */ + + + void _ntl_gexteucl(_ntl_gbigint a, _ntl_gbigint *xa, + _ntl_gbigint b, _ntl_gbigint *xb, + _ntl_gbigint *d); + /* + *d = a * *xa + b * *xb = gcd(a, b); + sets *d, *xa and *xb given a and b; + uses Lehmer`s trick + */ + + + long _ntl_ginv(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *c); + /* + if (a and b coprime) + { + *c = inv; + return(0); + } + else + { + *c = gcd(a, b); + return(1); + } + + where inv is such that (inv * a) == 1 mod b; + error raised if a < 0 or b <= 0 + */ + + long _ntl_gxxratrecon(_ntl_gbigint x, _ntl_gbigint m, + _ntl_gbigint a_bound, _ntl_gbigint b_bound, + _ntl_gbigint *a, _ntl_gbigint *b); + + /* rational reconstruction: see doc in ZZ.txt */ + + + +/********************************************************************** + + Storage Allocation + + These routines use malloc and free. + +***********************************************************************/ + + inline + long _ntl_gmaxalloc(_ntl_gbigint x) + { + if (!x) + return 0; + else + return ((((long *) (x))[0]) >> 2); + } + + /* DIRT: the above maxalloc routine is inlined, with the definition + of ALLOC copied and pasted. */ + + + void _ntl_gsetlength(_ntl_gbigint *v, long len); + /* Allocates enough space to hold a len-digit number, + where each digit has NTL_NBITS bits. + If space must be allocated, space for one extra digit + is always allocated. if (exact) then no rounding + occurs. */ + + void _ntl_gfree(_ntl_gbigint *x); + /* Free's space held by x, and sets x back to 0. */ + + +/******************************************************************* + + Special routines + +********************************************************************/ + +long _ntl_gsize(_ntl_gbigint n); +long _ntl_gisone(_ntl_gbigint n); + +long _ntl_gsptest(_ntl_gbigint a); +long _ntl_gwsptest(_ntl_gbigint a); +long _ntl_gcrtinrange(_ntl_gbigint g, _ntl_gbigint a); + +void _ntl_gfrombytes(_ntl_gbigint *x, const unsigned char *p, long n); +void _ntl_gbytesfromz(unsigned char *p, _ntl_gbigint a, long nn); + + +long _ntl_gblock_construct_alloc(_ntl_gbigint *x, long d, long n); +void _ntl_gblock_construct_set(_ntl_gbigint x, _ntl_gbigint *y, long i); +long _ntl_gblock_destroy(_ntl_gbigint x); +long _ntl_gblock_storage(long d); + + + +#define NTL_verylong _ntl_gbigint +#define NTL_verylong_body _ntl_gbigint_body +#define NTL_z2log _ntl_g2log +#define NTL_zabs _ntl_gabs +#define NTL_zadd _ntl_gadd +#define NTL_zaddmod _ntl_gaddmod +#define NTL_zand _ntl_gand +#define NTL_zbit _ntl_gbit +#define NTL_zblock_construct_alloc _ntl_gblock_construct_alloc +#define NTL_zblock_construct_set _ntl_gblock_construct_set +#define NTL_zblock_destroy _ntl_gblock_destroy +#define NTL_zblock_storage _ntl_gblock_storage +#define NTL_zbytesfromz _ntl_gbytesfromz +#define NTL_zcompare _ntl_gcompare +#define NTL_zcopy _ntl_gcopy +#define NTL_zcrtinrange _ntl_gcrtinrange +#define NTL_zdiv _ntl_gdiv +#define NTL_zdoub _ntl_gdoub +#define NTL_zdoubtoz _ntl_gdoubtoz +#define NTL_zexp _ntl_gexp +#define NTL_zexps _ntl_gexps +#define NTL_zexteucl _ntl_gexteucl +#define NTL_zfree _ntl_gfree +#define NTL_zfrombytes _ntl_gfrombytes +#define NTL_zgcd _ntl_ggcd +#define NTL_zintoz _ntl_gintoz +#define NTL_zinv _ntl_ginv +#define NTL_zinvmod _ntl_ginvmod +#define NTL_zisone _ntl_gisone +#define NTL_ziszero _ntl_giszero +#define NTL_zlog _ntl_glog +#define NTL_zlowbits _ntl_glowbits +#define NTL_zlshift _ntl_glshift +#define NTL_zmakeodd _ntl_gmakeodd +#define NTL_zmod _ntl_gmod +#define NTL_zmul _ntl_gmul +#define NTL_zmulmod _ntl_gmulmod +#define NTL_znegate _ntl_gnegate +#define NTL_znumtwos _ntl_gnumtwos +#define NTL_zodd _ntl_godd +#define NTL_zone _ntl_gone +#define NTL_zor _ntl_gor +#define NTL_zpowermod _ntl_gpowermod +#define NTL_zquickmod _ntl_gquickmod +#define NTL_zround_correction _ntl_ground_correction +#define NTL_zrshift _ntl_grshift +#define NTL_zsadd _ntl_gsadd +#define NTL_zscompare _ntl_gscompare +#define NTL_zsdiv _ntl_gsdiv +#define NTL_zsetbit _ntl_gsetbit +#define NTL_zmaxalloc _ntl_gmaxalloc +#define NTL_zsetlength _ntl_gsetlength +#define NTL_zsign _ntl_gsign +#define NTL_zsize _ntl_gsize +#define NTL_zslowbits _ntl_gslowbits +#define NTL_zsmod _ntl_gsmod +#define NTL_zsmul _ntl_gsmul +#define NTL_zsmulmod _ntl_gsmulmod +#define NTL_zsptest _ntl_gsptest +#define NTL_zsq _ntl_gsq +#define NTL_zsqmod _ntl_gsqmod +#define NTL_zsqrt _ntl_gsqrt +#define NTL_zsqrts _ntl_gsqrts +#define NTL_zsub _ntl_gsub +#define NTL_zsubmod _ntl_gsubmod +#define NTL_zsubpos _ntl_gsubpos +#define NTL_zswap _ntl_gswap +#define NTL_zswitchbit _ntl_gswitchbit +#define NTL_ztoint _ntl_gtoint +#define NTL_ztouint _ntl_gtouint +#define NTL_zuintoz _ntl_guintoz +#define NTL_zweight _ntl_gweight +#define NTL_zweights _ntl_gweights +#define NTL_zwsptest _ntl_gwsptest +#define NTL_zxor _ntl_gxor +#define NTL_zxxratrecon _ntl_gxxratrecon +#define NTL_zzero _ntl_gzero + +#define NTL_zsaddmul _ntl_gsaddmul +#define NTL_zaddmul _ntl_gaddmul +#define NTL_zssubmul _ntl_gssubmul +#define NTL_zsubmul _ntl_gsubmul + + + +#define NTL_GMP_LIP + diff --git a/thirdparty/linux/ntl/include/NTL/gmp_aux.h b/thirdparty/linux/ntl/include/NTL/gmp_aux.h new file mode 100644 index 0000000000..e69de29bb2 diff --git a/thirdparty/linux/ntl/include/NTL/lip.h b/thirdparty/linux/ntl/include/NTL/lip.h new file mode 100644 index 0000000000..6be0bdab36 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/lip.h @@ -0,0 +1,81 @@ + +#ifndef NTL_g_lip__H +#define NTL_g_lip__H + +#include + + +#ifdef NTL_GMP_LIP + +#include +#include + + +#else + +#include + + +#endif + + +// These are common to both implementations + +class _ntl_tmp_vec { +public: + virtual ~_ntl_tmp_vec() { } +}; + +class _ntl_crt_struct { +public: + virtual ~_ntl_crt_struct() { } + virtual bool special() = 0; + virtual void insert(long i, NTL_verylong m) = 0; + virtual _ntl_tmp_vec *extract() = 0; + virtual _ntl_tmp_vec *fetch() = 0; + virtual void eval(NTL_verylong *x, const long *b, + _ntl_tmp_vec *tmp_vec) = 0; +}; + +_ntl_crt_struct * +_ntl_crt_struct_build(long n, NTL_verylong p, long (*primes)(long)); + +class _ntl_rem_struct { +public: + virtual ~_ntl_rem_struct() { } + virtual void eval(long *x, NTL_verylong a, _ntl_tmp_vec *tmp_vec) = 0; + virtual _ntl_tmp_vec *fetch() = 0; +}; + +_ntl_rem_struct * +_ntl_rem_struct_build(long n, NTL_verylong modulus, long (*p)(long)); + + +// montgomery +class _ntl_reduce_struct { +public: + virtual ~_ntl_reduce_struct() { } + virtual void eval(NTL_verylong *x, NTL_verylong *a) = 0; + virtual void adjust(NTL_verylong *x) = 0; +}; + +_ntl_reduce_struct * +_ntl_reduce_struct_build(NTL_verylong modulus, NTL_verylong excess); + + +// faster reduction with preconditioning -- general usage, single modulus + +class _ntl_general_rem_one_struct; + +_ntl_general_rem_one_struct * +_ntl_general_rem_one_struct_build(long p); + +long +_ntl_general_rem_one_struct_apply(NTL_verylong a, long p, _ntl_general_rem_one_struct *pinfo); + +void +_ntl_general_rem_one_struct_delete(_ntl_general_rem_one_struct *pinfo); + + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/lzz_p.h b/thirdparty/linux/ntl/include/NTL/lzz_p.h new file mode 100644 index 0000000000..77086d23e0 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/lzz_p.h @@ -0,0 +1,511 @@ + +#ifndef NTL_zz_p__H +#define NTL_zz_p__H + +#include +#include +#include +#include + + + + +NTL_OPEN_NNS + + +class zz_pInfoT { +private: + zz_pInfoT(); // disabled + zz_pInfoT(const zz_pInfoT&); // disabled + void operator=(const zz_pInfoT&); // disabled +public: + zz_pInfoT(long NewP, long maxroot); + zz_pInfoT(INIT_FFT_TYPE, FFTPrimeInfo *info); + zz_pInfoT(INIT_USER_FFT_TYPE, long q); + + long p; + mulmod_t pinv; + + sp_reduce_struct red_struct; + sp_ll_reduce_struct ll_red_struct; + sp_ZZ_reduce_struct ZZ_red_struct; + + FFTPrimeInfo* p_info; // non-null means we are directly using + // an FFT prime + + UniquePtr p_info_owner; + // for user-defined FFT primes, we store the corresponding + // FFTPrimeInfo object here + + + long PrimeCnt; // 0 for FFT prime; otherwise same as NumPrimes + // used for establishing crossover points + + long NumPrimes; + + long MaxRoot; + + long MinusMModP; // -M mod p, M = product of primes + mulmod_precon_t MinusMModPpinv; + + // the following arrays are indexed 0..NumPrimes-1 + // q = FFTPrime[i] + + + Vec CoeffModP; // coeff mod p + Vec CoeffModPpinv; + + Vec x; // u/q, where u = (M/q)^{-1} mod q + Vec u; // u, as above + Vec uqinv; // MulModPrecon for u +}; + +extern +NTL_CHEAP_THREAD_LOCAL +zz_pInfoT *zz_pInfo; +// current modulus, initially null + + +class zz_pContext { +private: +SmartPtr ptr; + +public: + +zz_pContext() { } + +// copy constructor, assignment, destructor: default + +explicit zz_pContext(long p, long maxroot=NTL_FFTMaxRoot); +zz_pContext(INIT_FFT_TYPE, long index); +zz_pContext(INIT_USER_FFT_TYPE, long q); + +void save(); +void restore() const; + + +// some hooks that are useful in helib... +// FIXME: generalize these to other context classes +// and document + +bool null() const { return ptr == 0; } +bool equals(const zz_pContext& other) const { return ptr == other.ptr; } +long modulus() const { return ptr->p; } + + +}; + + +class zz_pBak { +private: +zz_pContext c; +bool MustRestore; + +zz_pBak(const zz_pBak&); // disabled +void operator=(const zz_pBak&); // disabled + +public: +void save(); +void restore(); + +zz_pBak() : MustRestore(false) { } + +~zz_pBak(); + + +}; + + +class zz_pPush { +private: +zz_pBak bak; + +zz_pPush(const zz_pPush&); // disabled +void operator=(const zz_pPush&); // disabled + +public: +zz_pPush() { bak.save(); } +explicit zz_pPush(const zz_pContext& context) { bak.save(); context.restore(); } + +explicit zz_pPush(long p, long maxroot=NTL_FFTMaxRoot) + { bak.save(); zz_pContext c(p, maxroot); c.restore(); } + +zz_pPush(INIT_FFT_TYPE, long index) + { bak.save(); zz_pContext c(INIT_FFT, index); c.restore(); } + +zz_pPush(INIT_USER_FFT_TYPE, long q) + { bak.save(); zz_pContext c(INIT_USER_FFT, q); c.restore(); } + +}; + + + + +#define NTL_zz_pRegister(x) zz_p x + + +class zz_pX; // forward declaration + +class zz_p { +public: +typedef long rep_type; +typedef zz_pContext context_type; +typedef zz_pBak bak_type; +typedef zz_pPush push_type; +typedef zz_pX poly_type; + + + +long _zz_p__rep; + + +static void init(long NewP, long maxroot=NTL_FFTMaxRoot); +static void FFTInit(long index); +static void UserFFTInit(long q); + + + +// ****** constructors and assignment + +zz_p() : _zz_p__rep(0) { } + +explicit zz_p(long a) : _zz_p__rep(0) { *this = a; } + +zz_p(const zz_p& a) : _zz_p__rep(a._zz_p__rep) { } + +~zz_p() { } + +zz_p& operator=(const zz_p& a) { _zz_p__rep = a._zz_p__rep; return *this; } + +inline zz_p& operator=(long a); + +// a loop-hole for direct access to _zz_p__rep +long& LoopHole() { return _zz_p__rep; } + +static long modulus() { return zz_pInfo->p; } +static zz_p zero() { return zz_p(); } +static mulmod_t ModulusInverse() { return zz_pInfo->pinv; } +static sp_reduce_struct red_struct() { return zz_pInfo->red_struct; } +static sp_ll_reduce_struct ll_red_struct() { return zz_pInfo->ll_red_struct; } +static const sp_ZZ_reduce_struct& ZZ_red_struct() { return zz_pInfo->ZZ_red_struct; } +static long PrimeCnt() { return zz_pInfo->PrimeCnt; } + + +static long storage() { return sizeof(long); } + +zz_p(long a, INIT_LOOP_HOLE_TYPE) { _zz_p__rep = a; } + +// for consistency +zz_p(INIT_NO_ALLOC_TYPE) : _zz_p__rep(0) { } +zz_p(INIT_ALLOC_TYPE) : _zz_p__rep(0) { } +void allocate() { } + + +}; + +inline +zz_p to_zz_p(long a) +{ + return zz_p(rem(a, zz_pInfo->p, zz_pInfo->red_struct), INIT_LOOP_HOLE); +} + +inline +void conv(zz_p& x, long a) +{ + x._zz_p__rep = rem(a, zz_pInfo->p, zz_pInfo->red_struct); +} + +inline void VectorConv(long k, zz_p *x, const long *a) +{ + if (k <= 0) return; + sp_reduce_struct red_struct = zz_p::red_struct(); + long p = zz_p::modulus(); + for (long i = 0; i < k; i++) x[i].LoopHole() = rem(a[i], p, red_struct); +} + +inline zz_p& zz_p::operator=(long a) { conv(*this, a); return *this; } + +inline +zz_p to_zz_p(const ZZ& a) +{ + return zz_p(zz_p::ZZ_red_struct().rem(a), INIT_LOOP_HOLE); +} + +inline +void conv(zz_p& x, const ZZ& a) +{ + x._zz_p__rep = zz_p::ZZ_red_struct().rem(a); +} + + +inline void VectorConv(long k, zz_p *x, const ZZ *a) +{ + if (k <= 0) return; + const sp_ZZ_reduce_struct& ZZ_red_struct = zz_p::ZZ_red_struct(); + for (long i = 0; i < k; i++) x[i].LoopHole() = ZZ_red_struct.rem(a[i]); +} + +// read-only access to _zz_p__representation +inline long rep(zz_p a) { return a._zz_p__rep; } + +inline void clear(zz_p& x) +// x = 0 + { x._zz_p__rep = 0; } + +inline void set(zz_p& x) +// x = 1 + { x._zz_p__rep = 1; } + +inline void swap(zz_p& x, zz_p& y) +// swap x and y + + { long t; t = x._zz_p__rep; x._zz_p__rep = y._zz_p__rep; y._zz_p__rep = t; } + +// ****** addition + +inline void add(zz_p& x, zz_p a, zz_p b) +// x = a + b + + { x._zz_p__rep = AddMod(a._zz_p__rep, b._zz_p__rep, zz_p::modulus()); } + +inline void sub(zz_p& x, zz_p a, zz_p b) +// x = a - b + + { x._zz_p__rep = SubMod(a._zz_p__rep, b._zz_p__rep, zz_p::modulus()); } + + +inline void negate(zz_p& x, zz_p a) +// x = -a + + { x._zz_p__rep = SubMod(0, a._zz_p__rep, zz_p::modulus()); } + +// scalar versions + +inline void add(zz_p& x, zz_p a, long b) { add(x, a, to_zz_p(b)); } +inline void add(zz_p& x, long a, zz_p b) { add(x, to_zz_p(a), b); } + +inline void sub(zz_p& x, zz_p a, long b) { sub(x, a, to_zz_p(b)); } +inline void sub(zz_p& x, long a, zz_p b) { sub(x, to_zz_p(a), b); } + +inline zz_p operator+(zz_p a, zz_p b) + { zz_p x; add(x, a, b); return x; } + +inline zz_p operator+(zz_p a, long b) + { zz_p x; add(x, a, b); return x; } + +inline zz_p operator+(long a, zz_p b) + { zz_p x; add(x, a, b); return x; } + +inline zz_p operator-(zz_p a, zz_p b) + { zz_p x; sub(x, a, b); return x; } + +inline zz_p operator-(zz_p a, long b) + { zz_p x; sub(x, a, b); return x; } + +inline zz_p operator-(long a, zz_p b) + { zz_p x; sub(x, a, b); return x; } + + + +inline zz_p operator-(zz_p a) + { zz_p x; negate(x, a); return x; } + + + +inline zz_p& operator+=(zz_p& x, zz_p b) + { add(x, x, b); return x; } + +inline zz_p& operator+=(zz_p& x, long b) + { add(x, x, b); return x; } + + + +inline zz_p& operator-=(zz_p& x, zz_p b) + { sub(x, x, b); return x; } + +inline zz_p& operator-=(zz_p& x, long b) + { sub(x, x, b); return x; } + +inline zz_p& operator++(zz_p& x) { add(x, x, 1); return x; } +inline void operator++(zz_p& x, int) { add(x, x, 1); } +inline zz_p& operator--(zz_p& x) { sub(x, x, 1); return x; } +inline void operator--(zz_p& x, int) { sub(x, x, 1); } + +// ****** multiplication + +inline void mul(zz_p& x, zz_p a, zz_p b) +// x = a*b + + { x._zz_p__rep = MulMod(a._zz_p__rep, b._zz_p__rep, zz_p::modulus(), zz_p::ModulusInverse()); } + +inline void mul(zz_p& x, zz_p a, long b) { mul(x, a, to_zz_p(b)); } +inline void mul(zz_p& x, long a, zz_p b) { mul(x, to_zz_p(a), b); } + +inline zz_p operator*(zz_p a, zz_p b) + { zz_p x; mul(x, a, b); return x; } + +inline zz_p operator*(zz_p a, long b) + { zz_p x; mul(x, a, b); return x; } + +inline zz_p operator*(long a, zz_p b) + { zz_p x; mul(x, a, b); return x; } + + +inline zz_p& operator*=(zz_p& x, zz_p b) + { mul(x, x, b); return x; } + +inline zz_p& operator*=(zz_p& x, long b) + { mul(x, x, b); return x; } + + + +inline void sqr(zz_p& x, zz_p a) +// x = a^2 + + { x._zz_p__rep = MulMod(a._zz_p__rep, a._zz_p__rep, zz_p::modulus(), zz_p::ModulusInverse()); } + +inline zz_p sqr(zz_p a) + { zz_p x; sqr(x, a); return x; } + + + +// ****** division + +inline void div(zz_p& x, zz_p a, zz_p b) +// x = a/b + + { x._zz_p__rep = MulMod(a._zz_p__rep, InvMod(b._zz_p__rep, zz_p::modulus()), zz_p::modulus(), + zz_p::ModulusInverse()); } + +inline void inv(zz_p& x, zz_p a) +// x = 1/a + + { x._zz_p__rep = InvMod(a._zz_p__rep, zz_p::modulus()); } + +inline zz_p inv(zz_p a) + { zz_p x; inv(x, a); return x; } + +inline void div(zz_p& x, zz_p a, long b) { div(x, a, to_zz_p(b)); } +inline void div(zz_p& x, long a, zz_p b) { div(x, to_zz_p(a), b); } + +inline zz_p operator/(zz_p a, zz_p b) + { zz_p x; div(x, a, b); return x; } + +inline zz_p operator/(zz_p a, long b) + { zz_p x; div(x, a, b); return x; } + +inline zz_p operator/(long a, zz_p b) + { zz_p x; div(x, a, b); return x; } + + +inline zz_p& operator/=(zz_p& x, zz_p b) + { div(x, x, b); return x; } + +inline zz_p& operator/=(zz_p& x, long b) + { div(x, x, b); return x; } + + +// ****** exponentiation + +inline void power(zz_p& x, zz_p a, long e) +// x = a^e + + { x._zz_p__rep = PowerMod(a._zz_p__rep, e, zz_p::modulus()); } + +inline zz_p power(zz_p a, long e) + { zz_p x; power(x, a, e); return x; } + +// ****** comparison + +inline long IsZero(zz_p a) + { return a._zz_p__rep == 0; } + +inline long IsOne(zz_p a) + { return a._zz_p__rep == 1; } + +inline long operator==(zz_p a, zz_p b) + { return a._zz_p__rep == b._zz_p__rep; } + +inline long operator!=(zz_p a, zz_p b) + { return !(a == b); } + +inline long operator==(zz_p a, long b) { return a == to_zz_p(b); } +inline long operator==(long a, zz_p b) { return to_zz_p(a) == b; } + +inline long operator!=(zz_p a, long b) { return !(a == b); } +inline long operator!=(long a, zz_p b) { return !(a == b); } + +// ****** random numbers + +inline void random(zz_p& x) +// x = random element in zz_p + + { x._zz_p__rep = RandomBnd(zz_p::modulus()); } + +inline zz_p random_zz_p() + { zz_p x; random(x); return x; } + +inline void VectorRandom(long k, zz_p* x) +{ + if (k <= 0) return; + RandomBndGenerator gen(zz_p::modulus()); + for (long i = 0; i < k; i++) x[i].LoopHole() = gen.next(); +} + + + +// ****** input/output + +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, zz_p a); + +NTL_SNS istream& operator>>(NTL_SNS istream& s, zz_p& x); + + +void conv(Vec& x, const Vec& a); +void conv(Vec& x, const Vec& a); +// explicit instantiation of more efficient versions, +// defined in vec_lzz_p.c + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(int& x, zz_p a) { conv(x, rep(a)); } +inline void conv(unsigned int& x, zz_p a) { conv(x, rep(a)); } +inline void conv(long& x, zz_p a) { conv(x, rep(a)); } +inline void conv(unsigned long& x, zz_p a) { conv(x, rep(a)); } +inline void conv(ZZ& x, zz_p a) { conv(x, rep(a)); } + + +inline void conv(zz_p& x, zz_p a) { x = a; } + +/* ------------------------------------- */ + + +// ********************************************************* +// *** specialized inner-product routines, for internal consumption +// ********************************************************* + +#ifdef NTL_HAVE_LL_TYPE +long +InnerProd_LL(const long *ap, const zz_p *bp, long n, long d, + sp_ll_reduce_struct dinv); + +long +InnerProd_LL(const zz_p *ap, const zz_p *bp, long n, long d, + sp_ll_reduce_struct dinv); +#endif + + +long +InnerProd_L(const long *ap, const zz_p *bp, long n, long d, + sp_reduce_struct dinv); + +long +InnerProd_L(const zz_p *ap, const zz_p *bp, long n, long d, + sp_reduce_struct dinv); + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/lzz_pE.h b/thirdparty/linux/ntl/include/NTL/lzz_pE.h new file mode 100644 index 0000000000..1d789cdf57 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/lzz_pE.h @@ -0,0 +1,524 @@ + +#ifndef NTL_zz_pE__H +#define NTL_zz_pE__H + +#include +#include +#include +#include + +NTL_OPEN_NNS + + + +class zz_pEInfoT { +private: + zz_pEInfoT(); // disabled + zz_pEInfoT(const zz_pEInfoT&); // disabled + void operator=(const zz_pEInfoT&); // disabled + +public: + zz_pEInfoT(const zz_pX&); + ~zz_pEInfoT() { } + + zz_pXModulus p; + + long _card_base; + long _card_exp; + + Lazy _card; + +}; + +extern +NTL_CHEAP_THREAD_LOCAL +zz_pEInfoT *zz_pEInfo; +// info for current modulus, initially null +// fast TLS access + + + +class zz_pEContext { +private: +SmartPtr ptr; + +public: + +zz_pEContext() { } +explicit zz_pEContext(const zz_pX& p) : ptr(MakeSmart(p)) { } + +// copy constructor, assignment, destructor: default + +void save(); +void restore() const; + +}; + + +class zz_pEBak { +private: +zz_pEContext c; +bool MustRestore; + +zz_pEBak(const zz_pEBak&); // disabled +void operator=(const zz_pEBak&); // disabled + +public: +void save(); +void restore(); + +zz_pEBak() : MustRestore(false) { } + +~zz_pEBak(); + + +}; + + + + + + +class zz_pEPush { +private: +zz_pEBak bak; + +zz_pEPush(const zz_pEPush&); // disabled +void operator=(const zz_pEPush&); // disabled + +public: +zz_pEPush() { bak.save(); } +explicit zz_pEPush(const zz_pEContext& context) { bak.save(); context.restore(); } +explicit zz_pEPush(const zz_pX& p) { bak.save(); zz_pEContext c(p); c.restore(); } + + +}; + + + + +class zz_pEX; // forward declaration + +class zz_pE { +public: +typedef zz_pX rep_type; +typedef zz_pEContext context_type; +typedef zz_pEBak bak_type; +typedef zz_pEPush push_type; +typedef zz_pEX poly_type; + + +zz_pX _zz_pE__rep; + + +static long DivCross() { return 16; } +static long ModCross() { return 8; } + + +// ****** constructors and assignment + +zz_pE() { } // NO_ALLOC + +explicit zz_pE(long a) { *this = a; } // NO_ALLOC +explicit zz_pE(const zz_p& a) { *this = a; } // NO_ALLOC + +zz_pE(const zz_pE& a) { _zz_pE__rep = a._zz_pE__rep; } // NO_ALLOC + +zz_pE(INIT_NO_ALLOC_TYPE) { } // allocates no space +zz_pE(INIT_ALLOC_TYPE) { _zz_pE__rep.rep.SetMaxLength(zz_pE::degree()); } // allocates space +void allocate() { _zz_pE__rep.rep.SetMaxLength(zz_pE::degree()); } + +~zz_pE() { } + +zz_pE& operator=(const zz_pE& a) { _zz_pE__rep = a._zz_pE__rep; return *this; } + +zz_pE(zz_pE& x, INIT_TRANS_TYPE) : _zz_pE__rep(x._zz_pE__rep, INIT_TRANS) { } + + +// You can always access the _zz_pE__representation directly...if you dare. +zz_pX& LoopHole() { return _zz_pE__rep; } + +void swap(zz_pE& x) { _zz_pE__rep.swap(x._zz_pE__rep); } + + +static const zz_pXModulus& modulus() { return zz_pEInfo->p; } + +static long degree() { return deg(zz_pEInfo->p); } + +static const ZZ& cardinality(); + +static const zz_pE& zero(); + +static long initialized() { return (zz_pEInfo != 0); } + +static void init(const zz_pX&); + +inline zz_pE& operator=(long a); +inline zz_pE& operator=(const zz_p& a); + + +}; + +inline const zz_pX& _zz_pE__rep(const zz_pE& a) { return a._zz_pE__rep; } + +inline void clear(zz_pE& x) +// x = 0 + { clear(x._zz_pE__rep); } + +inline void set(zz_pE& x) +// x = 1 + { set(x._zz_pE__rep); } + +inline void swap(zz_pE& x, zz_pE& y) +// swap x and y + + { x.swap(y); } + +// ****** addition + +inline void add(zz_pE& x, const zz_pE& a, const zz_pE& b) +// x = a + b + + { add(x._zz_pE__rep, a._zz_pE__rep, b._zz_pE__rep); } + +inline void sub(zz_pE& x, const zz_pE& a, const zz_pE& b) +// x = a - b + + { sub(x._zz_pE__rep, a._zz_pE__rep, b._zz_pE__rep); } + + +inline void negate(zz_pE& x, const zz_pE& a) + + { negate(x._zz_pE__rep, a._zz_pE__rep); } + + +inline void add(zz_pE& x, const zz_pE& a, long b) + { add(x._zz_pE__rep, a._zz_pE__rep, b); } + +inline void add(zz_pE& x, const zz_pE& a, const zz_p& b) + { add(x._zz_pE__rep, a._zz_pE__rep, b); } + +inline void add(zz_pE& x, long a, const zz_pE& b) + { add(x._zz_pE__rep, a, b._zz_pE__rep); } + +inline void add(zz_pE& x, const zz_p& a, const zz_pE& b) + { add(x._zz_pE__rep, a, b._zz_pE__rep); } + + + + + +inline void sub(zz_pE& x, const zz_pE& a, long b) + { sub(x._zz_pE__rep, a._zz_pE__rep, b); } + +inline void sub(zz_pE& x, const zz_pE& a, const zz_p& b) + { sub(x._zz_pE__rep, a._zz_pE__rep, b); } + +inline void sub(zz_pE& x, long a, const zz_pE& b) + { sub(x._zz_pE__rep, a, b._zz_pE__rep); } + +inline void sub(zz_pE& x, const zz_p& a, const zz_pE& b) + { sub(x._zz_pE__rep, a, b._zz_pE__rep); } + + + + + +// ****** multiplication + +inline void mul(zz_pE& x, const zz_pE& a, const zz_pE& b) +// x = a*b + + { MulMod(x._zz_pE__rep, a._zz_pE__rep, b._zz_pE__rep, zz_pE::modulus()); } + + +inline void sqr(zz_pE& x, const zz_pE& a) +// x = a^2 + + { SqrMod(x._zz_pE__rep, a._zz_pE__rep, zz_pE::modulus()); } + +inline zz_pE sqr(const zz_pE& a) + { zz_pE x; sqr(x, a); NTL_OPT_RETURN(zz_pE, x); } + + +inline void mul(zz_pE& x, const zz_pE& a, long b) + { mul(x._zz_pE__rep, a._zz_pE__rep, b); } + +inline void mul(zz_pE& x, const zz_pE& a, const zz_p& b) + { mul(x._zz_pE__rep, a._zz_pE__rep, b); } + +inline void mul(zz_pE& x, long a, const zz_pE& b) + { mul(x._zz_pE__rep, a, b._zz_pE__rep); } + +inline void mul(zz_pE& x, const zz_p& a, const zz_pE& b) + { mul(x._zz_pE__rep, a, b._zz_pE__rep); } + + +// ****** division + + + +void div(zz_pE& x, const zz_pE& a, const zz_pE& b); +void div(zz_pE& x, const zz_pE& a, long b); +void div(zz_pE& x, const zz_pE& a, const zz_p& b); +void div(zz_pE& x, long a, const zz_pE& b); +void div(zz_pE& x, const zz_p& a, const zz_pE& b); + +void inv(zz_pE& x, const zz_pE& a); + +inline zz_pE inv(const zz_pE& a) + { zz_pE x; inv(x, a); NTL_OPT_RETURN(zz_pE, x); } + + + +// ****** exponentiation + +inline void power(zz_pE& x, const zz_pE& a, const ZZ& e) +// x = a^e + + { PowerMod(x._zz_pE__rep, a._zz_pE__rep, e, zz_pE::modulus()); } + +inline zz_pE power(const zz_pE& a, const ZZ& e) + { zz_pE x; power(x, a, e); NTL_OPT_RETURN(zz_pE, x); } + +inline void power(zz_pE& x, const zz_pE& a, long e) + { power(x, a, ZZ_expo(e)); } + +inline zz_pE power(const zz_pE& a, long e) + { zz_pE x; power(x, a, e); NTL_OPT_RETURN(zz_pE, x); } + + + + +// ****** conversion + +inline void conv(zz_pE& x, const zz_pX& a) + { rem(x._zz_pE__rep, a, zz_pE::modulus()); } + +inline void conv(zz_pE& x, long a) + { conv(x._zz_pE__rep, a); } + +inline void conv(zz_pE& x, const zz_p& a) + { conv(x._zz_pE__rep, a); } + +inline void conv(zz_pE& x, const ZZ& a) + { conv(x._zz_pE__rep, a); } + +inline zz_pE to_zz_pE(const zz_pX& a) + { zz_pE x; conv(x, a); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE to_zz_pE(long a) + { zz_pE x; conv(x, a); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE to_zz_pE(const zz_p& a) + { zz_pE x; conv(x, a); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE to_zz_pE(const ZZ& a) + { zz_pE x; conv(x, a); NTL_OPT_RETURN(zz_pE, x); } + + + +// ****** comparison + +inline long IsZero(const zz_pE& a) + { return IsZero(a._zz_pE__rep); } + +inline long IsOne(const zz_pE& a) + { return IsOne(a._zz_pE__rep); } + +inline long operator==(const zz_pE& a, const zz_pE& b) + { return a._zz_pE__rep == b._zz_pE__rep; } +inline long operator==(const zz_pE& a, long b) + { return a._zz_pE__rep == b; } +inline long operator==(const zz_pE& a, const zz_p& b) + { return a._zz_pE__rep == b; } +inline long operator==(long a, const zz_pE& b) + { return a == b._zz_pE__rep; } +inline long operator==(const zz_p& a, const zz_pE& b) + { return a == b._zz_pE__rep; } + +inline long operator!=(const zz_pE& a, const zz_pE& b) + { return !(a == b); } +inline long operator!=(const zz_pE& a, long b) + { return !(a == b); } +inline long operator!=(const zz_pE& a, const zz_p& b) + { return !(a == b); } +inline long operator!=(long a, const zz_pE& b) + { return !(a == b); } +inline long operator!=(const zz_p& a, const zz_pE& b) + { return !(a == b); } + + +// ****** norm and trace + +inline void trace(zz_p& x, const zz_pE& a) + { TraceMod(x, a._zz_pE__rep, zz_pE::modulus()); } +inline zz_p trace(const zz_pE& a) + { return TraceMod(a._zz_pE__rep, zz_pE::modulus()); } + +inline void norm(zz_p& x, const zz_pE& a) + { NormMod(x, a._zz_pE__rep, zz_pE::modulus()); } +inline zz_p norm(const zz_pE& a) + { return NormMod(a._zz_pE__rep, zz_pE::modulus()); } + + +// ****** random numbers + +inline void random(zz_pE& x) +// x = random element in zz_pE + + { random(x._zz_pE__rep, zz_pE::degree()); } + +inline zz_pE random_zz_pE() + { zz_pE x; random(x); NTL_OPT_RETURN(zz_pE, x); } + + +// ****** input/output + +inline NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const zz_pE& a) + { return s << a._zz_pE__rep; } + +NTL_SNS istream& operator>>(NTL_SNS istream& s, zz_pE& x); + +inline const zz_pX& rep(const zz_pE& a) { return a._zz_pE__rep; } + + + +inline zz_pE& zz_pE::operator=(long a) { conv(*this, a); return *this; } +inline zz_pE& zz_pE::operator=(const zz_p& a) { conv(*this, a); return *this; } + + + + +inline zz_pE operator+(const zz_pE& a, const zz_pE& b) + { zz_pE x; add(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator+(const zz_pE& a, const zz_p& b) + { zz_pE x; add(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator+(const zz_pE& a, long b) + { zz_pE x; add(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator+(const zz_p& a, const zz_pE& b) + { zz_pE x; add(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator+(long a, const zz_pE& b) + { zz_pE x; add(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + + +inline zz_pE operator-(const zz_pE& a, const zz_pE& b) + { zz_pE x; sub(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator-(const zz_pE& a, const zz_p& b) + { zz_pE x; sub(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator-(const zz_pE& a, long b) + { zz_pE x; sub(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator-(const zz_p& a, const zz_pE& b) + { zz_pE x; sub(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator-(long a, const zz_pE& b) + { zz_pE x; sub(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator-(const zz_pE& a) + { zz_pE x; negate(x, a); NTL_OPT_RETURN(zz_pE, x); } + + +inline zz_pE& operator+=(zz_pE& x, const zz_pE& b) + { add(x, x, b); return x; } + +inline zz_pE& operator+=(zz_pE& x, const zz_p& b) + { add(x, x, b); return x; } + +inline zz_pE& operator+=(zz_pE& x, long b) + { add(x, x, b); return x; } + + +inline zz_pE& operator-=(zz_pE& x, const zz_pE& b) + { sub(x, x, b); return x; } + +inline zz_pE& operator-=(zz_pE& x, const zz_p& b) + { sub(x, x, b); return x; } + +inline zz_pE& operator-=(zz_pE& x, long b) + { sub(x, x, b); return x; } + + +inline zz_pE& operator++(zz_pE& x) { add(x, x, 1); return x; } + +inline void operator++(zz_pE& x, int) { add(x, x, 1); } + +inline zz_pE& operator--(zz_pE& x) { sub(x, x, 1); return x; } + +inline void operator--(zz_pE& x, int) { sub(x, x, 1); } + + + +inline zz_pE operator*(const zz_pE& a, const zz_pE& b) + { zz_pE x; mul(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator*(const zz_pE& a, const zz_p& b) + { zz_pE x; mul(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator*(const zz_pE& a, long b) + { zz_pE x; mul(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator*(const zz_p& a, const zz_pE& b) + { zz_pE x; mul(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator*(long a, const zz_pE& b) + { zz_pE x; mul(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + + +inline zz_pE& operator*=(zz_pE& x, const zz_pE& b) + { mul(x, x, b); return x; } + +inline zz_pE& operator*=(zz_pE& x, const zz_p& b) + { mul(x, x, b); return x; } + +inline zz_pE& operator*=(zz_pE& x, long b) + { mul(x, x, b); return x; } + + + + +inline zz_pE operator/(const zz_pE& a, const zz_pE& b) + { zz_pE x; div(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator/(const zz_pE& a, const zz_p& b) + { zz_pE x; div(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator/(const zz_pE& a, long b) + { zz_pE x; div(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator/(const zz_p& a, const zz_pE& b) + { zz_pE x; div(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +inline zz_pE operator/(long a, const zz_pE& b) + { zz_pE x; div(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + + +inline zz_pE& operator/=(zz_pE& x, const zz_pE& b) + { div(x, x, b); return x; } + +inline zz_pE& operator/=(zz_pE& x, const zz_p& b) + { div(x, x, b); return x; } + +inline zz_pE& operator/=(zz_pE& x, long b) + { div(x, x, b); return x; } + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(zz_pX& x, const zz_pE& a) { x = rep(a); } +inline void conv(zz_pE& x, const zz_pE& a) { x = a; } + + +/* ------------------------------------- */ + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/lzz_pEX.h b/thirdparty/linux/ntl/include/NTL/lzz_pEX.h new file mode 100644 index 0000000000..a67bc47c0c --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/lzz_pEX.h @@ -0,0 +1,1046 @@ + +#ifndef NTL_zz_pEX__H +#define NTL_zz_pEX__H + +#include +#include + +NTL_OPEN_NNS + +class zz_pEXModulus; // forward declaration + +class zz_pEX { +public: +typedef zz_pE coeff_type; +typedef zz_pEXModulus modulus_type; + + +vec_zz_pE rep; + + +/*************************************************************** + + Constructors, Destructors, and Assignment + +****************************************************************/ + + +zz_pEX() { } +// initial value 0 + + +explicit zz_pEX(long a) { *this = a; } +explicit zz_pEX(const zz_p& a) { *this = a; } +explicit zz_pEX(const zz_pE& a) { *this = a; } + +zz_pEX(INIT_SIZE_TYPE, long n) { rep.SetMaxLength(n); } + +~zz_pEX() { } + +void normalize(); +// strip leading zeros + +void SetMaxLength(long n) +// pre-allocate space for n coefficients. +// Value is unchanged + + { rep.SetMaxLength(n); } + + +void kill() +// free space held by this polynomial. Value becomes 0. + + { rep.kill(); } + +void swap(zz_pEX& x) { rep.swap(x.rep); } + + + +void SetLength(long n) { rep.SetLength(n); } +zz_pE& operator[](long i) { return rep[i]; } +const zz_pE& operator[](long i) const { return rep[i]; } + + + + +static const zz_pEX& zero(); + +inline zz_pEX(long i, const zz_pE& c); +inline zz_pEX(long i, const zz_p& c); +inline zz_pEX(long i, long c); + +inline zz_pEX(INIT_MONO_TYPE, long i, const zz_pE& c); +inline zz_pEX(INIT_MONO_TYPE, long i, const zz_p& c); +inline zz_pEX(INIT_MONO_TYPE, long i, long c); +inline zz_pEX(INIT_MONO_TYPE, long i); + + +inline zz_pEX& operator=(long a); +inline zz_pEX& operator=(const zz_p& a); +inline zz_pEX& operator=(const zz_pE& a); + +zz_pEX(zz_pEX& x, INIT_TRANS_TYPE) : rep(x.rep, INIT_TRANS) { } + + +}; + + +NTL_SNS istream& operator>>(NTL_SNS istream& s, zz_pEX& x); +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const zz_pEX& a); + + + + +/********************************************************** + + Some utility routines + +***********************************************************/ + + +inline long deg(const zz_pEX& a) { return a.rep.length() - 1; } +// degree of a polynomial. +// note that the zero polynomial has degree -1. + +const zz_pE& coeff(const zz_pEX& a, long i); +// zero if i not in range + +const zz_pE& LeadCoeff(const zz_pEX& a); +// zero if a == 0 + +const zz_pE& ConstTerm(const zz_pEX& a); +// zero if a == 0 + +void SetCoeff(zz_pEX& x, long i, const zz_pE& a); +void SetCoeff(zz_pEX& x, long i, const zz_p& a); +void SetCoeff(zz_pEX& x, long i, long a); +// x[i] = a, error is raised if i < 0 + +void SetCoeff(zz_pEX& x, long i); +// x[i] = 1, error is raised if i < 0 + +inline zz_pEX::zz_pEX(long i, const zz_pE& a) { SetCoeff(*this, i, a); } +inline zz_pEX::zz_pEX(long i, const zz_p& a) { SetCoeff(*this, i, a); } +inline zz_pEX::zz_pEX(long i, long a) { SetCoeff(*this, i, a); } + +inline zz_pEX::zz_pEX(INIT_MONO_TYPE, long i, const zz_pE& a) { SetCoeff(*this, i, a); } +inline zz_pEX::zz_pEX(INIT_MONO_TYPE, long i, const zz_p& a) { SetCoeff(*this, i, a); } +inline zz_pEX::zz_pEX(INIT_MONO_TYPE, long i, long a) { SetCoeff(*this, i, a); } +inline zz_pEX::zz_pEX(INIT_MONO_TYPE, long i) { SetCoeff(*this, i); } + +void SetX(zz_pEX& x); +// x is set to the monomial X + +long IsX(const zz_pEX& a); +// test if x = X + +inline void clear(zz_pEX& x) +// x = 0 + + { x.rep.SetLength(0); } + +inline void set(zz_pEX& x) +// x = 1 + + { x.rep.SetLength(1); set(x.rep[0]); } + +inline void swap(zz_pEX& x, zz_pEX& y) +// swap x & y (only pointers are swapped) + + { x.swap(y); } + +void random(zz_pEX& x, long n); +inline zz_pEX random_zz_pEX(long n) + { zz_pEX x; random(x, n); NTL_OPT_RETURN(zz_pEX, x); } +// generate a random polynomial of degree < n + +void trunc(zz_pEX& x, const zz_pEX& a, long m); +inline zz_pEX trunc(const zz_pEX& a, long m) + { zz_pEX x; trunc(x, a, m); NTL_OPT_RETURN(zz_pEX, x); } +// x = a % X^m + +void RightShift(zz_pEX& x, const zz_pEX& a, long n); +inline zz_pEX RightShift(const zz_pEX& a, long n) + { zz_pEX x; RightShift(x, a, n); NTL_OPT_RETURN(zz_pEX, x); } +// x = a/X^n + +void LeftShift(zz_pEX& x, const zz_pEX& a, long n); +inline zz_pEX LeftShift(const zz_pEX& a, long n) + { zz_pEX x; LeftShift(x, a, n); NTL_OPT_RETURN(zz_pEX, x); } +// x = a*X^n + +#ifndef NTL_TRANSITION + +inline zz_pEX operator>>(const zz_pEX& a, long n) + { zz_pEX x; RightShift(x, a, n); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator<<(const zz_pEX& a, long n) + { zz_pEX x; LeftShift(x, a, n); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX& operator<<=(zz_pEX& x, long n) + { LeftShift(x, x, n); return x; } + +inline zz_pEX& operator>>=(zz_pEX& x, long n) + { RightShift(x, x, n); return x; } + +#endif + + + +void diff(zz_pEX& x, const zz_pEX& a); +inline zz_pEX diff(const zz_pEX& a) + { zz_pEX x; diff(x, a); NTL_OPT_RETURN(zz_pEX, x); } +// x = derivative of a + + + +void MakeMonic(zz_pEX& x); + +void reverse(zz_pEX& c, const zz_pEX& a, long hi); + +inline zz_pEX reverse(const zz_pEX& a, long hi) + { zz_pEX x; reverse(x, a, hi); NTL_OPT_RETURN(zz_pEX, x); } + +inline void reverse(zz_pEX& c, const zz_pEX& a) +{ reverse(c, a, deg(a)); } + +inline zz_pEX reverse(const zz_pEX& a) + { zz_pEX x; reverse(x, a); NTL_OPT_RETURN(zz_pEX, x); } + +inline void VectorCopy(vec_zz_pE& x, const zz_pEX& a, long n) + { VectorCopy(x, a.rep, n); } + +inline vec_zz_pE VectorCopy(const zz_pEX& a, long n) + { return VectorCopy(a.rep, n); } + + + + + + +/******************************************************************* + + conversion routines + +********************************************************************/ + + + +void conv(zz_pEX& x, long a); + +void conv(zz_pEX& x, const ZZ& a); + +void conv(zz_pEX& x, const zz_p& a); +void conv(zz_pEX& x, const zz_pX& a); +void conv(zz_pEX& x, const zz_pE& a); + + +void conv(zz_pEX& x, const vec_zz_pE& a); + +inline zz_pEX to_zz_pEX(long a) + { zz_pEX x; conv(x, a); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX to_zz_pEX(const ZZ& a) + { zz_pEX x; conv(x, a); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX to_zz_pEX(const zz_p& a) + { zz_pEX x; conv(x, a); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX to_zz_pEX(const zz_pX& a) + { zz_pEX x; conv(x, a); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX to_zz_pEX(const zz_pE& a) + { zz_pEX x; conv(x, a); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX to_zz_pEX(const vec_zz_pE& a) + { zz_pEX x; conv(x, a); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX& zz_pEX::operator=(long a) + { conv(*this, a); return *this; } + +inline zz_pEX& zz_pEX::operator=(const zz_p& a) + { conv(*this, a); return *this; } + +inline zz_pEX& zz_pEX::operator=(const zz_pE& a) + { conv(*this, a); return *this; } + + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(zz_pEX& x, const zz_pEX& a) + { x = a; } + +inline void conv(vec_zz_pE& x, const zz_pEX& a) + { x = a.rep; } + +class ZZX; +void conv(zz_pEX& x, const ZZX& a); + + +/* ------------------------------------- */ + + + +/************************************************************* + + Comparison + +**************************************************************/ + +long IsZero(const zz_pEX& a); + +long IsOne(const zz_pEX& a); + +inline long operator==(const zz_pEX& a, const zz_pEX& b) +{ return a.rep == b.rep; } + +long operator==(const zz_pEX& a, long b); +long operator==(const zz_pEX& a, const zz_p& b); +long operator==(const zz_pEX& a, const zz_pE& b); + +inline long operator==(long a, const zz_pEX& b) + { return (b == a); } +inline long operator==(const zz_p& a, const zz_pEX& b) + { return (b == a); } +inline long operator==(const zz_pE& a, const zz_pEX& b) + { return (b == a); } + +inline long operator!=(const zz_pEX& a, const zz_pEX& b) + { return !(a == b); } +inline long operator!=(const zz_pEX& a, long b) + { return !(a == b); } +inline long operator!=(const zz_pEX& a, const zz_p& b) + { return !(a == b); } +inline long operator!=(const zz_pEX& a, const zz_pE& b) + { return !(a == b); } +inline long operator!=(const long a, const zz_pEX& b) + { return !(a == b); } +inline long operator!=(const zz_p& a, const zz_pEX& b) + { return !(a == b); } +inline long operator!=(const zz_pE& a, const zz_pEX& b) + { return !(a == b); } + + +/*************************************************************** + + Addition + +****************************************************************/ + +void add(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); + +void sub(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); + +void negate(zz_pEX& x, const zz_pEX& a); + +// scalar versions + +void add(zz_pEX & x, const zz_pEX& a, long b); +void add(zz_pEX & x, const zz_pEX& a, const zz_p& b); +void add(zz_pEX & x, const zz_pEX& a, const zz_pE& b); + +inline void add(zz_pEX& x, const zz_pE& a, const zz_pEX& b) + { add(x, b, a); } +inline void add(zz_pEX& x, const zz_p& a, const zz_pEX& b) + { add(x, b, a); } +inline void add(zz_pEX& x, long a, const zz_pEX& b) + { add(x, b, a); } + +void sub(zz_pEX & x, const zz_pEX& a, long b); +void sub(zz_pEX & x, const zz_pEX& a, const zz_p& b); +void sub(zz_pEX & x, const zz_pEX& a, const zz_pE& b); + +void sub(zz_pEX& x, const zz_pE& a, const zz_pEX& b); +void sub(zz_pEX& x, const zz_p& a, const zz_pEX& b); +void sub(zz_pEX& x, long a, const zz_pEX& b); + + + +inline zz_pEX operator+(const zz_pEX& a, const zz_pEX& b) + { zz_pEX x; add(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator+(const zz_pEX& a, const zz_pE& b) + { zz_pEX x; add(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator+(const zz_pEX& a, const zz_p& b) + { zz_pEX x; add(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator+(const zz_pEX& a, long b) + { zz_pEX x; add(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator+(const zz_pE& a, const zz_pEX& b) + { zz_pEX x; add(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator+(const zz_p& a, const zz_pEX& b) + { zz_pEX x; add(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator+(long a, const zz_pEX& b) + { zz_pEX x; add(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + + +inline zz_pEX operator-(const zz_pEX& a, const zz_pEX& b) + { zz_pEX x; sub(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator-(const zz_pEX& a, const zz_pE& b) + { zz_pEX x; sub(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator-(const zz_pEX& a, const zz_p& b) + { zz_pEX x; sub(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator-(const zz_pEX& a, long b) + { zz_pEX x; sub(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator-(const zz_pE& a, const zz_pEX& b) + { zz_pEX x; sub(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator-(const zz_p& a, const zz_pEX& b) + { zz_pEX x; sub(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator-(long a, const zz_pEX& b) + { zz_pEX x; sub(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + + +inline zz_pEX& operator+=(zz_pEX& x, const zz_pEX& b) + { add(x, x, b); return x; } + +inline zz_pEX& operator+=(zz_pEX& x, const zz_pE& b) + { add(x, x, b); return x; } + +inline zz_pEX& operator+=(zz_pEX& x, const zz_p& b) + { add(x, x, b); return x; } + +inline zz_pEX& operator+=(zz_pEX& x, long b) + { add(x, x, b); return x; } + +inline zz_pEX& operator-=(zz_pEX& x, const zz_pEX& b) + { sub(x, x, b); return x; } + +inline zz_pEX& operator-=(zz_pEX& x, const zz_pE& b) + { sub(x, x, b); return x; } + +inline zz_pEX& operator-=(zz_pEX& x, const zz_p& b) + { sub(x, x, b); return x; } + +inline zz_pEX& operator-=(zz_pEX& x, long b) + { sub(x, x, b); return x; } + + +inline zz_pEX operator-(const zz_pEX& a) + { zz_pEX x; negate(x, a); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX& operator++(zz_pEX& x) { add(x, x, 1); return x; } +inline void operator++(zz_pEX& x, int) { add(x, x, 1); } +inline zz_pEX& operator--(zz_pEX& x) { sub(x, x, 1); return x; } +inline void operator--(zz_pEX& x, int) { sub(x, x, 1); } + + + +/***************************************************************** + + Multiplication + +******************************************************************/ + + +void mul(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); +// x = a * b + +void sqr(zz_pEX& x, const zz_pEX& a); +inline zz_pEX sqr(const zz_pEX& a) + { zz_pEX x; sqr(x, a); NTL_OPT_RETURN(zz_pEX, x); } +// x = a^2 + + +void mul(zz_pEX & x, const zz_pEX& a, long b); +void mul(zz_pEX & x, const zz_pEX& a, const zz_p& b); +void mul(zz_pEX & x, const zz_pEX& a, const zz_pE& b); + +inline void mul(zz_pEX& x, long a, const zz_pEX& b) + { mul(x, b, a); } +inline void mul(zz_pEX& x, const zz_p& a, const zz_pEX& b) + { mul(x, b, a); } +inline void mul(zz_pEX& x, const zz_pE& a, const zz_pEX& b) + { mul(x, b, a); } + +void MulTrunc(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, long n); +inline zz_pEX MulTrunc(const zz_pEX& a, const zz_pEX& b, long n) + { zz_pEX x; MulTrunc(x, a, b, n); NTL_OPT_RETURN(zz_pEX, x); } +// x = a * b % X^n + +void SqrTrunc(zz_pEX& x, const zz_pEX& a, long n); +inline zz_pEX SqrTrunc(const zz_pEX& a, long n) + { zz_pEX x; SqrTrunc(x, a, n); NTL_OPT_RETURN(zz_pEX, x); } +// x = a*a % X^n + + +inline zz_pEX operator*(const zz_pEX& a, const zz_pEX& b) + { zz_pEX x; mul(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator*(const zz_pEX& a, const zz_pE& b) + { zz_pEX x; mul(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator*(const zz_pEX& a, const zz_p& b) + { zz_pEX x; mul(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator*(const zz_pEX& a, long b) + { zz_pEX x; mul(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator*(const zz_pE& a, const zz_pEX& b) + { zz_pEX x; mul(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator*(const zz_p& a, const zz_pEX& b) + { zz_pEX x; mul(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator*(long a, const zz_pEX& b) + { zz_pEX x; mul(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX& operator*=(zz_pEX& x, const zz_pEX& b) + { mul(x, x, b); return x; } + +inline zz_pEX& operator*=(zz_pEX& x, const zz_pE& b) + { mul(x, x, b); return x; } + +inline zz_pEX& operator*=(zz_pEX& x, const zz_p& b) + { mul(x, x, b); return x; } + +inline zz_pEX& operator*=(zz_pEX& x, long b) + { mul(x, x, b); return x; } + + +void power(zz_pEX& x, const zz_pEX& a, long e); +inline zz_pEX power(const zz_pEX& a, long e) + { zz_pEX x; power(x, a, e); NTL_OPT_RETURN(zz_pEX, x); } + + + + + +/************************************************************* + + Division + +**************************************************************/ + +void DivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEX& b); +// q = a/b, r = a%b + +void div(zz_pEX& q, const zz_pEX& a, const zz_pEX& b); +void div(zz_pEX& q, const zz_pEX& a, const zz_pE& b); +void div(zz_pEX& q, const zz_pEX& a, const zz_p& b); +void div(zz_pEX& q, const zz_pEX& a, long b); +// q = a/b + +void rem(zz_pEX& r, const zz_pEX& a, const zz_pEX& b); +// r = a%b + +long divide(zz_pEX& q, const zz_pEX& a, const zz_pEX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const zz_pEX& a, const zz_pEX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +void InvTrunc(zz_pEX& x, const zz_pEX& a, long m); +inline zz_pEX InvTrunc(const zz_pEX& a, long m) + { zz_pEX x; InvTrunc(x, a, m); NTL_OPT_RETURN(zz_pEX, x); } +// computes x = a^{-1} % X^m +// constant term must be invertible + + +inline zz_pEX operator/(const zz_pEX& a, const zz_pEX& b) + { zz_pEX x; div(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator/(const zz_pEX& a, const zz_pE& b) + { zz_pEX x; div(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator/(const zz_pEX& a, const zz_p& b) + { zz_pEX x; div(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX operator/(const zz_pEX& a, long b) + { zz_pEX x; div(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX& operator/=(zz_pEX& x, const zz_pEX& b) + { div(x, x, b); return x; } + +inline zz_pEX& operator/=(zz_pEX& x, const zz_pE& b) + { div(x, x, b); return x; } + +inline zz_pEX& operator/=(zz_pEX& x, const zz_p& b) + { div(x, x, b); return x; } + +inline zz_pEX& operator/=(zz_pEX& x, long b) + { div(x, x, b); return x; } + + +inline zz_pEX operator%(const zz_pEX& a, const zz_pEX& b) + { zz_pEX x; rem(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX& operator%=(zz_pEX& x, const zz_pEX& b) + { rem(x, x, b); return x; } + + + +/*********************************************************** + + GCD's + +************************************************************/ + + +void GCD(zz_pEX& x, const zz_pEX& a, const zz_pEX& b); +inline zz_pEX GCD(const zz_pEX& a, const zz_pEX& b) + { zz_pEX x; GCD(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } +// x = GCD(a, b), x is always monic (or zero if a==b==0). + +void XGCD(zz_pEX& d, zz_pEX& s, zz_pEX& t, const zz_pEX& a, const zz_pEX& b); +// d = gcd(a,b), a s + b t = d + + +/************************************************************* + + Modular Arithmetic without pre-conditioning + +**************************************************************/ + +// arithmetic mod f. +// all inputs and outputs are polynomials of degree less than deg(f). +// ASSUMPTION: f is assumed monic, and deg(f) > 0. +// NOTE: if you want to do many computations with a fixed f, +// use the zz_pEXModulus data structure and associated routines below. + + + +void MulMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, const zz_pEX& f); +inline zz_pEX MulMod(const zz_pEX& a, const zz_pEX& b, const zz_pEX& f) + { zz_pEX x; MulMod(x, a, b, f); NTL_OPT_RETURN(zz_pEX, x); } +// x = (a * b) % f + +void SqrMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f); +inline zz_pEX SqrMod(const zz_pEX& a, const zz_pEX& f) + { zz_pEX x; SqrMod(x, a, f); NTL_OPT_RETURN(zz_pEX, x); } +// x = a^2 % f + +void MulByXMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f); +inline zz_pEX MulByXMod(const zz_pEX& a, const zz_pEX& f) + { zz_pEX x; MulByXMod(x, a, f); NTL_OPT_RETURN(zz_pEX, x); } +// x = (a * X) mod f + +void InvMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f); +inline zz_pEX InvMod(const zz_pEX& a, const zz_pEX& f) + { zz_pEX x; InvMod(x, a, f); NTL_OPT_RETURN(zz_pEX, x); } +// x = a^{-1} % f, error is a is not invertible + +long InvModStatus(zz_pEX& x, const zz_pEX& a, const zz_pEX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f +// otherwise, returns 1 and sets x = (a, f) + + + + + +/****************************************************************** + + Modular Arithmetic with Pre-conditioning + +*******************************************************************/ + + +// If you need to do a lot of arithmetic modulo a fixed f, +// build zz_pEXModulus F for f. This pre-computes information about f +// that speeds up the computation a great deal. + +class zz_pEXModulus { +public: + zz_pEXModulus(); + ~zz_pEXModulus(); + + zz_pEXModulus(const zz_pEX& ff); + + zz_pEX f; // the modulus + + operator const zz_pEX& () const { return f; } + const zz_pEX& val() const { return f; } + + long n; // deg(f) + + long method; + + zz_pEX h0; + zz_pE hlc; + zz_pEX f0; + + OptionalVal< Lazy > tracevec; + // extra level of indirection to ensure relocatability + +}; + + + +inline long deg(const zz_pEXModulus& F) { return F.n; } + + +void build(zz_pEXModulus& F, const zz_pEX& f); + +void rem(zz_pEX& r, const zz_pEX& a, const zz_pEXModulus& F); + +void DivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEXModulus& F); + +void div(zz_pEX& q, const zz_pEX& a, const zz_pEXModulus& F); + +void MulMod(zz_pEX& c, const zz_pEX& a, const zz_pEX& b, + const zz_pEXModulus& F); +inline zz_pEX MulMod(const zz_pEX& a, const zz_pEX& b, + const zz_pEXModulus& F) + { zz_pEX x; MulMod(x, a, b, F); NTL_OPT_RETURN(zz_pEX, x); } + +void SqrMod(zz_pEX& c, const zz_pEX& a, const zz_pEXModulus& F); +inline zz_pEX SqrMod(const zz_pEX& a, const zz_pEXModulus& F) + { zz_pEX x; SqrMod(x, a, F); NTL_OPT_RETURN(zz_pEX, x); } + + +void PowerMod(zz_pEX& h, const zz_pEX& g, const ZZ& e, const zz_pEXModulus& F); + +inline void PowerMod(zz_pEX& h, const zz_pEX& g, long e, + const zz_pEXModulus& F) + { PowerMod(h, g, ZZ_expo(e), F); } + +inline zz_pEX PowerMod(const zz_pEX& g, const ZZ& e, + const zz_pEXModulus& F) + { zz_pEX x; PowerMod(x, g, e, F); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX PowerMod(const zz_pEX& g, long e, const zz_pEXModulus& F) + { zz_pEX x; PowerMod(x, g, e, F); NTL_OPT_RETURN(zz_pEX, x); } + +void PowerXMod(zz_pEX& hh, const ZZ& e, const zz_pEXModulus& F); + +inline void PowerXMod(zz_pEX& h, long e, const zz_pEXModulus& F) + { PowerXMod(h, ZZ_expo(e), F); } + + +inline zz_pEX PowerXMod(const ZZ& e, const zz_pEXModulus& F) + { zz_pEX x; PowerXMod(x, e, F); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX PowerXMod(long e, const zz_pEXModulus& F) + { zz_pEX x; PowerXMod(x, e, F); NTL_OPT_RETURN(zz_pEX, x); } + + +inline zz_pEX operator%(const zz_pEX& a, const zz_pEXModulus& F) + { zz_pEX x; rem(x, a, F); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX& operator%=(zz_pEX& x, const zz_pEXModulus& F) + { rem(x, x, F); return x; } + +inline zz_pEX operator/(const zz_pEX& a, const zz_pEXModulus& F) + { zz_pEX x; div(x, a, F); NTL_OPT_RETURN(zz_pEX, x); } + +inline zz_pEX& operator/=(zz_pEX& x, const zz_pEXModulus& F) + { div(x, x, F); return x; } + + + +/***************************************************************** + + vectors of zz_pEX's + +*****************************************************************/ + + + +typedef Vec vec_zz_pEX; + + + +/******************************************************* + + Evaluation and related problems + +********************************************************/ + + + + +void BuildFromRoots(zz_pEX& x, const vec_zz_pE& a); +inline zz_pEX BuildFromRoots(const vec_zz_pE& a) + { zz_pEX x; BuildFromRoots(x, a); NTL_OPT_RETURN(zz_pEX, x); } +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + + +void eval(zz_pE& b, const zz_pEX& f, const zz_pE& a); +inline zz_pE eval(const zz_pEX& f, const zz_pE& a) + { zz_pE x; eval(x, f, a); NTL_OPT_RETURN(zz_pE, x); } +// b = f(a) + +void eval(vec_zz_pE& b, const zz_pEX& f, const vec_zz_pE& a); +inline vec_zz_pE eval(const zz_pEX& f, const vec_zz_pE& a) + { vec_zz_pE x; eval(x, f, a); NTL_OPT_RETURN(vec_zz_pE, x); } +// b[i] = f(a[i]) + +inline void eval(zz_pE& b, const zz_pX& f, const zz_pE& a) + { conv(b, CompMod(f, rep(a), zz_pE::modulus())); } + +inline zz_pE eval(const zz_pX& f, const zz_pE& a) + { zz_pE x; eval(x, f, a); NTL_OPT_RETURN(zz_pE, x); } +// b = f(a) + + +void interpolate(zz_pEX& f, const vec_zz_pE& a, const vec_zz_pE& b); +inline zz_pEX interpolate(const vec_zz_pE& a, const vec_zz_pE& b) + { zz_pEX x; interpolate(x, a, b); NTL_OPT_RETURN(zz_pEX, x); } +// computes f such that f(a[i]) = b[i] + + + + + +/********************************************************** + + Modular Composition and Minimal Polynomials + +***********************************************************/ + + + +void CompMod(zz_pEX& x, const zz_pEX& g, const zz_pEX& h, const zz_pEXModulus& F); +inline zz_pEX +CompMod(const zz_pEX& g, const zz_pEX& h, const zz_pEXModulus& F) + { zz_pEX x; CompMod(x, g, h, F); NTL_OPT_RETURN(zz_pEX, x); } +// x = g(h) mod f + +void Comp2Mod(zz_pEX& x1, zz_pEX& x2, const zz_pEX& g1, const zz_pEX& g2, + const zz_pEX& h, const zz_pEXModulus& F); +// xi = gi(h) mod f (i=1,2) + +void Comp3Mod(zz_pEX& x1, zz_pEX& x2, zz_pEX& x3, + const zz_pEX& g1, const zz_pEX& g2, const zz_pEX& g3, + const zz_pEX& h, const zz_pEXModulus& F); +// xi = gi(h) mod f (i=1..3) + + + +// The routine build (see below) which is implicitly called +// by the various compose and UpdateMap routines builds a table +// of polynomials. +// If zz_pEXArgBound > 0, then the table is limited in +// size to approximamtely that many KB. +// If zz_pEXArgBound <= 0, then it is ignored, and space is allocated +// so as to maximize speed. +// Initially, zz_pEXArgBound = 0. + + +// If a single h is going to be used with many g's +// then you should build a zz_pEXArgument for h, +// and then use the compose routine below. +// build computes and stores h, h^2, ..., h^m mod f. +// After this pre-computation, composing a polynomial of degree +// roughly n with h takes n/m multiplies mod f, plus n^2 +// scalar multiplies. +// Thus, increasing m increases the space requirement and the pre-computation +// time, but reduces the composition time. +// If zz_pEXArgBound > 0, a table of size less than m may be built. + +struct zz_pEXArgument { + vec_zz_pEX H; +}; + +extern +NTL_CHEAP_THREAD_LOCAL +long zz_pEXArgBound; + + +void build(zz_pEXArgument& H, const zz_pEX& h, const zz_pEXModulus& F, long m); + +// m must be > 0, otherwise an error is raised + +void CompMod(zz_pEX& x, const zz_pEX& g, const zz_pEXArgument& H, + const zz_pEXModulus& F); + +inline zz_pEX +CompMod(const zz_pEX& g, const zz_pEXArgument& H, const zz_pEXModulus& F) + { zz_pEX x; CompMod(x, g, H, F); NTL_OPT_RETURN(zz_pEX, x); } + + + + +void MinPolySeq(zz_pEX& h, const vec_zz_pE& a, long m); +inline zz_pEX MinPolySeq(const vec_zz_pE& a, long m) + { zz_pEX x; MinPolySeq(x, a, m); NTL_OPT_RETURN(zz_pEX, x); } + + +void MinPolyMod(zz_pEX& hh, const zz_pEX& g, const zz_pEXModulus& F); +inline zz_pEX MinPolyMod(const zz_pEX& g, const zz_pEXModulus& F) + { zz_pEX x; MinPolyMod(x, g, F); NTL_OPT_RETURN(zz_pEX, x); } + + +void MinPolyMod(zz_pEX& hh, const zz_pEX& g, const zz_pEXModulus& F, long m); +inline zz_pEX MinPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m) + { zz_pEX x; MinPolyMod(x, g, F, m); NTL_OPT_RETURN(zz_pEX, x); } + +void ProbMinPolyMod(zz_pEX& hh, const zz_pEX& g, const zz_pEXModulus& F); +inline zz_pEX ProbMinPolyMod(const zz_pEX& g, const zz_pEXModulus& F) + { zz_pEX x; ProbMinPolyMod(x, g, F); NTL_OPT_RETURN(zz_pEX, x); } + +void ProbMinPolyMod(zz_pEX& hh, const zz_pEX& g, const zz_pEXModulus& F, long m); +inline zz_pEX ProbMinPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m) + { zz_pEX x; ProbMinPolyMod(x, g, F, m); NTL_OPT_RETURN(zz_pEX, x); } + +void IrredPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F); +inline zz_pEX IrredPolyMod(const zz_pEX& g, const zz_pEXModulus& F) + { zz_pEX x; IrredPolyMod(x, g, F); NTL_OPT_RETURN(zz_pEX, x); } + +void IrredPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m); +inline zz_pEX IrredPolyMod(const zz_pEX& g, const zz_pEXModulus& F, long m) + { zz_pEX x; IrredPolyMod(x, g, F, m); NTL_OPT_RETURN(zz_pEX, x); } + + +struct zz_pEXTransMultiplier { + zz_pEX f0, fbi, b; + long shamt, shamt_fbi, shamt_b; +}; + +void build(zz_pEXTransMultiplier& B, const zz_pEX& b, const zz_pEXModulus& F); + +void TransMulMod(zz_pEX& x, const zz_pEX& a, const zz_pEXTransMultiplier& B, + const zz_pEXModulus& F); + +void UpdateMap(vec_zz_pE& x, const vec_zz_pE& a, + const zz_pEXTransMultiplier& B, const zz_pEXModulus& F); + +inline vec_zz_pE UpdateMap(const vec_zz_pE& a, + const zz_pEXTransMultiplier& B, const zz_pEXModulus& F) + { vec_zz_pE x; UpdateMap(x, a, B, F); NTL_OPT_RETURN(vec_zz_pE, x); } + +void ProjectPowers(vec_zz_pE& x, const vec_zz_pE& a, long k, + const zz_pEXArgument& H, const zz_pEXModulus& F); +inline vec_zz_pE ProjectPowers(const vec_zz_pE& a, long k, + const zz_pEXArgument& H, const zz_pEXModulus& F) + { vec_zz_pE x; ProjectPowers(x, a, k, H, F); NTL_OPT_RETURN(vec_zz_pE, x); } + +void ProjectPowers(vec_zz_pE& x, const vec_zz_pE& a, long k, const zz_pEX& h, + const zz_pEXModulus& F); +inline vec_zz_pE ProjectPowers(const vec_zz_pE& a, long k, + const zz_pEX& H, const zz_pEXModulus& F) + { vec_zz_pE x; ProjectPowers(x, a, k, H, F); NTL_OPT_RETURN(vec_zz_pE, x); } + +inline void project(zz_pE& x, const vec_zz_pE& a, const zz_pEX& b) + { InnerProduct(x, a, b.rep); } + +inline zz_pE project(const vec_zz_pE& a, const zz_pEX& b) + { zz_pE x; InnerProduct(x, a, b.rep); NTL_OPT_RETURN(zz_pE, x); } + + + +/***************************************************************** + + modular composition and minimal polynonomials + in towers + +******************************************************************/ + + +// composition + +void CompTower(zz_pEX& x, const zz_pX& g, const zz_pEXArgument& A, + const zz_pEXModulus& F); + +inline zz_pEX CompTower(const zz_pX& g, const zz_pEXArgument& A, + const zz_pEXModulus& F) + { zz_pEX x; CompTower(x, g, A, F); NTL_OPT_RETURN(zz_pEX, x); } + +void CompTower(zz_pEX& x, const zz_pX& g, const zz_pEX& h, + const zz_pEXModulus& F); + +inline zz_pEX CompTower(const zz_pX& g, const zz_pEX& h, + const zz_pEXModulus& F) + { zz_pEX x; CompTower(x, g, h, F); NTL_OPT_RETURN(zz_pEX, x); } + +// prob min poly + +void ProbMinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, + long m); + +inline zz_pX ProbMinPolyTower(const zz_pEX& g, const zz_pEXModulus& F, + long m) + { zz_pX x; ProbMinPolyTower(x, g, F, m); NTL_OPT_RETURN(zz_pX, x); } + +inline void ProbMinPolyTower(zz_pX& h, const zz_pEX& g, + const zz_pEXModulus& F) + { ProbMinPolyTower(h, g, F, deg(F)*zz_pE::degree()); } + +inline zz_pX ProbMinPolyTower(const zz_pEX& g, const zz_pEXModulus& F) + { zz_pX x; ProbMinPolyTower(x, g, F); NTL_OPT_RETURN(zz_pX, x); } + + +// min poly + + +void MinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, + long m); + +inline zz_pX MinPolyTower(const zz_pEX& g, const zz_pEXModulus& F, + long m) + { zz_pX x; MinPolyTower(x, g, F, m); NTL_OPT_RETURN(zz_pX, x); } + +inline void MinPolyTower(zz_pX& h, const zz_pEX& g, + const zz_pEXModulus& F) + { MinPolyTower(h, g, F, deg(F)*zz_pE::degree()); } + +inline zz_pX MinPolyTower(const zz_pEX& g, const zz_pEXModulus& F) + { zz_pX x; MinPolyTower(x, g, F); NTL_OPT_RETURN(zz_pX, x); } + +// irred poly + + +void IrredPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, + long m); + +inline zz_pX IrredPolyTower(const zz_pEX& g, const zz_pEXModulus& F, + long m) + { zz_pX x; IrredPolyTower(x, g, F, m); NTL_OPT_RETURN(zz_pX, x); } + +inline void IrredPolyTower(zz_pX& h, const zz_pEX& g, + const zz_pEXModulus& F) + { IrredPolyTower(h, g, F, deg(F)*zz_pE::degree()); } + +inline zz_pX IrredPolyTower(const zz_pEX& g, const zz_pEXModulus& F) + { zz_pX x; IrredPolyTower(x, g, F); NTL_OPT_RETURN(zz_pX, x); } + +/***************************************************************** + + Traces, norms, resultants + +******************************************************************/ + +void TraceVec(vec_zz_pE& S, const zz_pEX& f); + +inline vec_zz_pE TraceVec(const zz_pEX& f) + { vec_zz_pE x; TraceVec(x, f); NTL_OPT_RETURN(vec_zz_pE, x); } + + +void TraceMod(zz_pE& x, const zz_pEX& a, const zz_pEXModulus& F); + +inline zz_pE TraceMod(const zz_pEX& a, const zz_pEXModulus& F) + { zz_pE x; TraceMod(x, a, F); NTL_OPT_RETURN(zz_pE, x); } + +void TraceMod(zz_pE& x, const zz_pEX& a, const zz_pEX& f); + +inline zz_pE TraceMod(const zz_pEX& a, const zz_pEX& f) + { zz_pE x; TraceMod(x, a, f); NTL_OPT_RETURN(zz_pE, x); } + + + + + +void NormMod(zz_pE& x, const zz_pEX& a, const zz_pEX& f); + +inline zz_pE NormMod(const zz_pEX& a, const zz_pEX& f) + { zz_pE x; NormMod(x, a, f); NTL_OPT_RETURN(zz_pE, x); } + +void resultant(zz_pE& rres, const zz_pEX& a, const zz_pEX& b); + +inline zz_pE resultant(const zz_pEX& a, const zz_pEX& b) + { zz_pE x; resultant(x, a, b); NTL_OPT_RETURN(zz_pE, x); } + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/lzz_pEXFactoring.h b/thirdparty/linux/ntl/include/NTL/lzz_pEXFactoring.h new file mode 100644 index 0000000000..2da959f0d9 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/lzz_pEXFactoring.h @@ -0,0 +1,194 @@ + +#ifndef NTL_zz_pEXFactoring__H +#define NTL_zz_pEXFactoring__H + +#include + +NTL_OPEN_NNS + + +void SquareFreeDecomp(vec_pair_zz_pEX_long& u, const zz_pEX& f); +inline vec_pair_zz_pEX_long SquareFreeDecomp(const zz_pEX& f) + { vec_pair_zz_pEX_long x; SquareFreeDecomp(x, f); return x; } + + +// Performs square-free decomposition. +// f must be monic. +// If f = prod_i g_i^i, then u is set to a lest of pairs (g_i, i). +// The list is is increasing order of i, with trivial terms +// (i.e., g_i = 1) deleted. + + +void FindRoots(vec_zz_pE& x, const zz_pEX& f); +inline vec_zz_pE FindRoots(const zz_pEX& f) + { vec_zz_pE x; FindRoots(x, f); return x; } + +// f is monic, and has deg(f) distinct roots. +// returns the list of roots + + +void FindRoot(zz_pE& root, const zz_pEX& f); +inline zz_pE FindRoot(const zz_pEX& f) + { zz_pE x; FindRoot(x, f); return x; } + + +// finds a single root of f. +// assumes that f is monic and splits into distinct linear factors + + +extern +NTL_CHEAP_THREAD_LOCAL +long zz_pEX_GCDTableSize; /* = 4 */ +// Controls GCD blocking for NewDDF + + +extern +NTL_CHEAP_THREAD_LOCAL +double zz_pEXFileThresh; +// external files are used for baby/giant steps if size +// of these tables exceeds zz_pEXFileThresh KB. + + +void NewDDF(vec_pair_zz_pEX_long& factors, + const zz_pEX& f, const zz_pEX& h, long verbose=0); +inline vec_pair_zz_pEX_long NewDDF(const zz_pEX& f, const zz_pEX& h, + long verbose=0) + { vec_pair_zz_pEX_long x; NewDDF(x, f, h, verbose); return x; } + + + + + +void EDF(vec_zz_pEX& factors, const zz_pEX& f, const zz_pEX& b, + long d, long verbose=0); +inline vec_zz_pEX EDF(const zz_pEX& f, const zz_pEX& b, + long d, long verbose=0) + { vec_zz_pEX x; EDF(x, f, b, d, verbose); return x; } + + +// Performs equal-degree factorization. +// f is monic, square-free, and all irreducible factors have same degree. +// b = X^p mod f. +// d = degree of irreducible factors of f +// Space for the trace-map computation can be controlled via ComposeBound. + + + +void RootEDF(vec_zz_pEX& factors, const zz_pEX& f, long verbose=0); +inline vec_zz_pEX RootEDF(const zz_pEX& f, long verbose=0) + { vec_zz_pEX x; RootEDF(x, f, verbose); return x; } + + +// EDF for d==1 + +void SFCanZass(vec_zz_pEX& factors, const zz_pEX& f, long verbose=0); +inline vec_zz_pEX SFCanZass(const zz_pEX& f, long verbose=0) + { vec_zz_pEX x; SFCanZass(x, f, verbose); return x; } + + +// Assumes f is monic and square-free. +// returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach. + + + +void CanZass(vec_pair_zz_pEX_long& factors, const zz_pEX& f, + long verbose=0); +inline vec_pair_zz_pEX_long CanZass(const zz_pEX& f, long verbose=0) + { vec_pair_zz_pEX_long x; CanZass(x, f, verbose); return x; } + + +// returns a list of factors, with multiplicities. +// f must be monic. +// Uses "Cantor/Zassenhaus" approach. + + +void mul(zz_pEX& f, const vec_pair_zz_pEX_long& v); +inline zz_pEX mul(const vec_pair_zz_pEX_long& v) + { zz_pEX x; mul(x, v); return x; } + + +// multiplies polynomials, with multiplicities + + +/************************************************************* + + irreducible poly's: tests and constructions + +**************************************************************/ + +long ProbIrredTest(const zz_pEX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test +// the test can err only if f is reducible, and the +// error probability is bounded by p^{-iter}. + +long DetIrredTest(const zz_pEX& f); + +// performs a recursive deterministic irreducibility test +// fast in the worst-case (when input is irreducible). + +long IterIrredTest(const zz_pEX& f); + +// performs an iterative deterministic irreducibility test, +// based on DDF. Fast on average (when f has a small factor). + +void BuildIrred(zz_pEX& f, long n); +inline zz_pEX BuildIrred_zz_pEX(long n) + { zz_pEX x; BuildIrred(x, n); NTL_OPT_RETURN(zz_pEX, x); } + + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(zz_pEX& f, const zz_pEX& g); +inline zz_pEX BuildRandomIrred(const zz_pEX& g) + { zz_pEX x; BuildRandomIrred(x, g); NTL_OPT_RETURN(zz_pEX, x); } + + +// g is a monic irreducible polynomial. +// constructs a random monic irreducible polynomial f of the same degree. + + +long RecComputeDegree(const zz_pEX& h, const zz_pEXModulus& F); + +// f = F.f is assumed to be an "equal degree" polynomial +// h = X^p mod f +// the common degree of the irreducible factors of f is computed +// This routine is useful in counting points on elliptic curves + + +long IterComputeDegree(const zz_pEX& h, const zz_pEXModulus& F); + + +void TraceMap(zz_pEX& w, const zz_pEX& a, long d, const zz_pEXModulus& F, + const zz_pEX& b); + +inline zz_pEX TraceMap(const zz_pEX& a, long d, const zz_pEXModulus& F, + const zz_pEX& b) + { zz_pEX x; TraceMap(x, a, d, F, b); return x; } + + +// w = a+a^q+...+^{q^{d-1}} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see "zz_pEX.h") + + + +void PowerCompose(zz_pEX& w, const zz_pEX& a, long d, const zz_pEXModulus& F); + +inline zz_pEX PowerCompose(const zz_pEX& a, long d, const zz_pEXModulus& F) + { zz_pEX x; PowerCompose(x, a, d, F); return x; } + + +// w = X^{q^d} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see "zz_pEX.h") + + + + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/lzz_pX.h b/thirdparty/linux/ntl/include/NTL/lzz_pX.h new file mode 100644 index 0000000000..253ce3e713 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/lzz_pX.h @@ -0,0 +1,1333 @@ + +#ifndef NTL_zz_pX__H +#define NTL_zz_pX__H + +#include +#include +#include +#include +#include + +NTL_OPEN_NNS + +// some cross-over points + +#define NTL_zz_pX_MOD_CROSSOVER (zz_pX_mod_crossover[zz_pInfo->PrimeCnt]) +#define NTL_zz_pX_MUL_CROSSOVER (zz_pX_mul_crossover[zz_pInfo->PrimeCnt]) +#define NTL_zz_pX_NEWTON_CROSSOVER (zz_pX_newton_crossover[zz_pInfo->PrimeCnt]) +#define NTL_zz_pX_DIV_CROSSOVER (zz_pX_div_crossover[zz_pInfo->PrimeCnt]) +#define NTL_zz_pX_HalfGCD_CROSSOVER (zz_pX_halfgcd_crossover[zz_pInfo->PrimeCnt]) +#define NTL_zz_pX_GCD_CROSSOVER (zz_pX_gcd_crossover[zz_pInfo->PrimeCnt]) +#define NTL_zz_pX_BERMASS_CROSSOVER (zz_pX_bermass_crossover[zz_pInfo->PrimeCnt]) +#define NTL_zz_pX_TRACE_CROSSOVER (zz_pX_trace_crossover[zz_pInfo->PrimeCnt]) + +extern const long zz_pX_mod_crossover[]; +extern const long zz_pX_mul_crossover[]; +extern const long zz_pX_newton_crossover[]; +extern const long zz_pX_div_crossover[]; +extern const long zz_pX_halfgcd_crossover[]; +extern const long zz_pX_gcd_crossover[]; +extern const long zz_pX_bermass_crossover[]; +extern const long zz_pX_trace_crossover[]; + + + +/************************************************************ + + zz_pX + +The class zz_pX implements polynomial arithmetic modulo p. +Polynomials are represented as vec_zz_p's. +If f is a zz_pX, then f.rep is a vec_zz_p. +The zero polynomial is represented as a zero length vector. +Otherwise. f.rep[0] is the constant-term, and f.rep[f.rep.length()-1] +is the leading coefficient, which is always non-zero. +The member f.rep is public, so the vector representation is fully +accessible. +Use the member function normalize() to strip leading zeros. + +**************************************************************/ + + +class zz_pE; // forward declaration +class zz_pXModulus; +class fftRep; +class zz_pXMultiplier; + + +class zz_pX { +public: +typedef zz_p coeff_type; +typedef zz_pE residue_type; +typedef zz_pXModulus modulus_type; +typedef zz_pXMultiplier multiplier_type; +typedef fftRep fft_type; + + +vec_zz_p rep; + +typedef vec_zz_p VectorBaseType; + + + +/*************************************************************** + + Constructors, Destructors, and Assignment + +****************************************************************/ + + +zz_pX() {} +// initial value 0 + +explicit zz_pX(long a) { *this = a; } +explicit zz_pX(zz_p a) { *this = a; } + + +zz_pX(INIT_SIZE_TYPE, long n) { rep.SetMaxLength(n); } + +zz_pX(const zz_pX& a) : rep(a.rep) { } +// initial value is a + +inline zz_pX(long i, zz_p c); +inline zz_pX(long i, long c); + +inline zz_pX(INIT_MONO_TYPE, long i, zz_p c); +inline zz_pX(INIT_MONO_TYPE, long i, long c); +inline zz_pX(INIT_MONO_TYPE, long i); + +zz_pX& operator=(const zz_pX& a) + { rep = a.rep; return *this; } + +inline zz_pX& operator=(long a); +inline zz_pX& operator=(zz_p a); + +~zz_pX() { } + +void normalize(); +// strip leading zeros + +void SetMaxLength(long n) +// pre-allocate space for n coefficients. +// Value is unchanged + + { rep.SetMaxLength(n); } + + +void kill() +// free space held by this polynomial. Value becomes 0. + + { rep.kill(); } + + + +void SetLength(long n) { rep.SetLength(n); } +zz_p& operator[](long i) { return rep[i]; } +const zz_p& operator[](long i) const { return rep[i]; } + + +void swap(zz_pX& x) +{ + rep.swap(x.rep); +} + + +static const zz_pX& zero(); + +zz_pX(zz_pX& x, INIT_TRANS_TYPE) : rep(x.rep, INIT_TRANS) { } + +}; + + + + +/******************************************************************** + + input and output + +I/O format: + + [a_0 a_1 ... a_n], + +represents the polynomial a_0 + a_1*X + ... + a_n*X^n. + +On output, all coefficients will be integers between 0 and p-1, +amd a_n not zero (the zero polynomial is [ ]). +On input, the coefficients are arbitrary integers which are +then reduced modulo p, and leading zeros stripped. + +*********************************************************************/ + + +NTL_SNS istream& operator>>(NTL_SNS istream& s, zz_pX& x); +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const zz_pX& a); + + + + +/********************************************************** + + Some utility routines + +***********************************************************/ + + +inline long deg(const zz_pX& a) { return a.rep.length() - 1; } +// degree of a polynomial. +// note that the zero polynomial has degree -1. + +const zz_p coeff(const zz_pX& a, long i); +// zero if i not in range + +void GetCoeff(zz_p& x, const zz_pX& a, long i); +// x = a[i], or zero if i not in range + +const zz_p LeadCoeff(const zz_pX& a); +// zero if a == 0 + +const zz_p ConstTerm(const zz_pX& a); +// zero if a == 0 + +void SetCoeff(zz_pX& x, long i, zz_p a); +// x[i] = a, error is raised if i < 0 + +void SetCoeff(zz_pX& x, long i, long a); +// x[i] = a, error is raised if i < 0 + +void SetCoeff(zz_pX& x, long i); +// x[i] = 1, error is raised if i < 0 + +inline zz_pX::zz_pX(long i, zz_p a) { SetCoeff(*this, i, a); } +inline zz_pX::zz_pX(long i, long a) { SetCoeff(*this, i, a); } + +inline zz_pX::zz_pX(INIT_MONO_TYPE, long i, zz_p a) { SetCoeff(*this, i, a); } +inline zz_pX::zz_pX(INIT_MONO_TYPE, long i, long a) { SetCoeff(*this, i, a); } +inline zz_pX::zz_pX(INIT_MONO_TYPE, long i) { SetCoeff(*this, i); } + +void SetX(zz_pX& x); +// x is set to the monomial X + +long IsX(const zz_pX& a); +// test if x = X + +inline void clear(zz_pX& x) +// x = 0 + + { x.rep.SetLength(0); } + +inline void set(zz_pX& x) +// x = 1 + + { x.rep.SetLength(1); set(x.rep[0]); } + +inline void swap(zz_pX& x, zz_pX& y) +// swap x & y (only pointers are swapped) + + { x.swap(y); } + +void random(zz_pX& x, long n); +inline zz_pX random_zz_pX(long n) + { zz_pX x; random(x, n); NTL_OPT_RETURN(zz_pX, x); } + +// generate a random polynomial of degree < n + +void trunc(zz_pX& x, const zz_pX& a, long m); +// x = a % X^m + +inline zz_pX trunc(const zz_pX& a, long m) + { zz_pX x; trunc(x, a, m); NTL_OPT_RETURN(zz_pX, x); } + +void RightShift(zz_pX& x, const zz_pX& a, long n); +// x = a/X^n + +inline zz_pX RightShift(const zz_pX& a, long n) + { zz_pX x; RightShift(x, a, n); NTL_OPT_RETURN(zz_pX, x); } + +void LeftShift(zz_pX& x, const zz_pX& a, long n); +// x = a*X^n + +inline zz_pX LeftShift(const zz_pX& a, long n) + { zz_pX x; LeftShift(x, a, n); NTL_OPT_RETURN(zz_pX, x); } + + +#ifndef NTL_TRANSITION + +inline zz_pX operator>>(const zz_pX& a, long n) + { zz_pX x; RightShift(x, a, n); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator<<(const zz_pX& a, long n) + { zz_pX x; LeftShift(x, a, n); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX& operator<<=(zz_pX& x, long n) + { LeftShift(x, x, n); return x; } + +inline zz_pX& operator>>=(zz_pX& x, long n) + { RightShift(x, x, n); return x; } + +#endif + + + +void diff(zz_pX& x, const zz_pX& a); +// x = derivative of a + +inline zz_pX diff(const zz_pX& a) + { zz_pX x; diff(x, a); NTL_OPT_RETURN(zz_pX, x); } + +void MakeMonic(zz_pX& x); +// makes x monic + +void reverse(zz_pX& c, const zz_pX& a, long hi); + +inline zz_pX reverse(const zz_pX& a, long hi) + { zz_pX x; reverse(x, a, hi); NTL_OPT_RETURN(zz_pX, x); } + +inline void reverse(zz_pX& c, const zz_pX& a) +{ reverse(c, a, deg(a)); } + +inline zz_pX reverse(const zz_pX& a) + { zz_pX x; reverse(x, a); NTL_OPT_RETURN(zz_pX, x); } + + +inline void VectorCopy(vec_zz_p& x, const zz_pX& a, long n) + { VectorCopy(x, a.rep, n); } + +inline vec_zz_p VectorCopy(const zz_pX& a, long n) + { return VectorCopy(a.rep, n); } + + + + +/******************************************************************* + + conversion routines + +********************************************************************/ + + + +void conv(zz_pX& x, long a); + +inline zz_pX to_zz_pX(long a) + { zz_pX x; conv(x, a); NTL_OPT_RETURN(zz_pX, x); } + + +void conv(zz_pX& x, const ZZ& a); + +inline zz_pX to_zz_pX(const ZZ& a) + { zz_pX x; conv(x, a); NTL_OPT_RETURN(zz_pX, x); } + +void conv(zz_pX& x, zz_p a); + +inline zz_pX to_zz_pX(zz_p a) + { zz_pX x; conv(x, a); NTL_OPT_RETURN(zz_pX, x); } + + +void conv(zz_pX& x, const vec_zz_p& a); + +inline zz_pX to_zz_pX(const vec_zz_p& a) + { zz_pX x; conv(x, a); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX& zz_pX::operator=(zz_p a) + { conv(*this, a); return *this; } + +inline zz_pX& zz_pX::operator=(long a) + { conv(*this, a); return *this; } + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(zz_pX& x, const zz_pX& a) + { x = a; } + +inline void conv(vec_zz_p& x, const zz_pX& a) + { x = a.rep; } + + +/* ------------------------------------- */ + + + +/************************************************************* + + Comparison + +**************************************************************/ + +long IsZero(const zz_pX& a); + +long IsOne(const zz_pX& a); + +inline long operator==(const zz_pX& a, const zz_pX& b) +{ + return a.rep == b.rep; +} + +inline long operator!=(const zz_pX& a, const zz_pX& b) + { return !(a == b); } + +long operator==(const zz_pX& a, long b); +long operator==(const zz_pX& a, zz_p b); + +inline long operator==(long a, const zz_pX& b) { return b == a; } +inline long operator==(zz_p a, const zz_pX& b) { return b == a; } + +inline long operator!=(const zz_pX& a, long b) { return !(a == b); } +inline long operator!=(const zz_pX& a, zz_p b) { return !(a == b); } +inline long operator!=(long a, const zz_pX& b) { return !(a == b); } +inline long operator!=(zz_p a, const zz_pX& b) { return !(a == b); } + + + +/*************************************************************** + + Addition + +****************************************************************/ + +void add(zz_pX& x, const zz_pX& a, const zz_pX& b); +// x = a + b + +void sub(zz_pX& x, const zz_pX& a, const zz_pX& b); +// x = a - b + +void negate(zz_pX& x, const zz_pX& a); +// x = -a + +// scalar versions + +void add(zz_pX & x, const zz_pX& a, zz_p b); // x = a + b +inline void add(zz_pX& x, const zz_pX& a, long b) { add(x, a, to_zz_p(b)); } + +inline void add(zz_pX& x, zz_p a, const zz_pX& b) { add(x, b, a); } +inline void add(zz_pX& x, long a, const zz_pX& b) { add(x, b, a); } + +void sub(zz_pX & x, const zz_pX& a, zz_p b); // x = a - b +inline void sub(zz_pX& x, const zz_pX& a, long b) { sub(x, a, to_zz_p(b)); } + +void sub(zz_pX& x, zz_p a, const zz_pX& b); +inline void sub(zz_pX& x, long a, const zz_pX& b) { sub(x, to_zz_p(a), b); } + +inline zz_pX operator+(const zz_pX& a, const zz_pX& b) + { zz_pX x; add(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator+(const zz_pX& a, zz_p b) + { zz_pX x; add(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator+(const zz_pX& a, long b) + { zz_pX x; add(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator+(zz_p a, const zz_pX& b) + { zz_pX x; add(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator+(long a, const zz_pX& b) + { zz_pX x; add(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + + +inline zz_pX operator-(const zz_pX& a, const zz_pX& b) + { zz_pX x; sub(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator-(const zz_pX& a, zz_p b) + { zz_pX x; sub(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator-(const zz_pX& a, long b) + { zz_pX x; sub(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator-(zz_p a, const zz_pX& b) + { zz_pX x; sub(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator-(long a, const zz_pX& b) + { zz_pX x; sub(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + + +inline zz_pX& operator+=(zz_pX& x, const zz_pX& b) + { add(x, x, b); return x; } + +inline zz_pX& operator+=(zz_pX& x, zz_p b) + { add(x, x, b); return x; } + +inline zz_pX& operator+=(zz_pX& x, long b) + { add(x, x, b); return x; } + +inline zz_pX& operator-=(zz_pX& x, const zz_pX& b) + { sub(x, x, b); return x; } + +inline zz_pX& operator-=(zz_pX& x, zz_p b) + { sub(x, x, b); return x; } + +inline zz_pX& operator-=(zz_pX& x, long b) + { sub(x, x, b); return x; } + + +inline zz_pX operator-(const zz_pX& a) + { zz_pX x; negate(x, a); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX& operator++(zz_pX& x) { add(x, x, 1); return x; } +inline void operator++(zz_pX& x, int) { add(x, x, 1); } +inline zz_pX& operator--(zz_pX& x) { sub(x, x, 1); return x; } +inline void operator--(zz_pX& x, int) { sub(x, x, 1); } + + + +/***************************************************************** + + Multiplication + +******************************************************************/ + + +void mul(zz_pX& x, const zz_pX& a, const zz_pX& b); +// x = a * b + +void sqr(zz_pX& x, const zz_pX& a); +inline zz_pX sqr(const zz_pX& a) + { zz_pX x; sqr(x, a); NTL_OPT_RETURN(zz_pX, x); } +// x = a^2 + +void mul(zz_pX& x, const zz_pX& a, zz_p b); +inline void mul(zz_pX& x, const zz_pX& a, long b) { mul(x, a, to_zz_p(b)); } + +inline void mul(zz_pX& x, zz_p a, const zz_pX& b) { mul(x, b, a); } +inline void mul(zz_pX& x, long a, const zz_pX& b) { mul(x, b, a); } + + +inline zz_pX operator*(const zz_pX& a, const zz_pX& b) + { zz_pX x; mul(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator*(const zz_pX& a, zz_p b) + { zz_pX x; mul(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator*(const zz_pX& a, long b) + { zz_pX x; mul(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator*(zz_p a, const zz_pX& b) + { zz_pX x; mul(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator*(long a, const zz_pX& b) + { zz_pX x; mul(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX& operator*=(zz_pX& x, const zz_pX& b) + { mul(x, x, b); return x; } + +inline zz_pX& operator*=(zz_pX& x, zz_p b) + { mul(x, x, b); return x; } + +inline zz_pX& operator*=(zz_pX& x, long b) + { mul(x, x, b); return x; } + + +void PlainMul(zz_pX& x, const zz_pX& a, const zz_pX& b); +// always uses the "classical" algorithm + +void PlainSqr(zz_pX& x, const zz_pX& a); +// always uses the "classical" algorithm + + +void FFTMul(zz_pX& x, const zz_pX& a, const zz_pX& b); +// always uses the FFT + +void FFTSqr(zz_pX& x, const zz_pX& a); +// always uses the FFT + +void MulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n); +// x = a * b % X^n + +inline zz_pX MulTrunc(const zz_pX& a, const zz_pX& b, long n) + { zz_pX x; MulTrunc(x, a, b, n); NTL_OPT_RETURN(zz_pX, x); } + +void PlainMulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n); +void FFTMulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n); + +void SqrTrunc(zz_pX& x, const zz_pX& a, long n); +// x = a^2 % X^n + +inline zz_pX SqrTrunc(const zz_pX& a, long n) + { zz_pX x; SqrTrunc(x, a, n); NTL_OPT_RETURN(zz_pX, x); } + +void PlainSqrTrunc(zz_pX& x, const zz_pX& a, long n); +void FFTSqrTrunc(zz_pX& x, const zz_pX& a, long n); + +void power(zz_pX& x, const zz_pX& a, long e); +inline zz_pX power(const zz_pX& a, long e) + { zz_pX x; power(x, a, e); NTL_OPT_RETURN(zz_pX, x); } + + + + + + +// The following data structures and routines allow one +// to hand-craft various algorithms, using the FFT convolution +// algorithms directly. +// Look in the file zz_pX.c for examples. + + + + +// FFT representation of polynomials + +class fftRep { + +public: + long k; // a 2^k point representation + long MaxK; // maximum space allocated + long NumPrimes; + UniqueArray tbl[4]; + + fftRep() : k(-1), MaxK(-1), NumPrimes(0) { } + + fftRep(const fftRep& R) : k(-1), MaxK(-1), NumPrimes(0) + { *this = R; } + + fftRep(INIT_SIZE_TYPE, long InitK) : k(-1), MaxK(-1), NumPrimes(0) + { SetSize(InitK); } + + fftRep& operator=(const fftRep&); + void SetSize(long NewK); + void DoSetSize(long NewK, long NewNumPrimes); +}; + + +void TofftRep(fftRep& y, const zz_pX& x, long k, long lo, long hi); +// computes an n = 2^k point convolution of x[lo..hi]. + +inline void TofftRep(fftRep& y, const zz_pX& x, long k) + + { TofftRep(y, x, k, 0, deg(x)); } + +void RevTofftRep(fftRep& y, const vec_zz_p& x, + long k, long lo, long hi, long offset); +// computes an n = 2^k point convolution of X^offset*x[lo..hi] mod X^n-1 +// using "inverted" evaluation points. + + + +void FromfftRep(zz_pX& x, fftRep& y, long lo, long hi); +// converts from FFT-representation to coefficient representation +// only the coefficients lo..hi are computed +// NOTE: this version destroys the data in y + +// non-destructive versions of the above + +void NDFromfftRep(zz_pX& x, const fftRep& y, long lo, long hi, fftRep& temp); +void NDFromfftRep(zz_pX& x, const fftRep& y, long lo, long hi); + +void RevFromfftRep(vec_zz_p& x, fftRep& y, long lo, long hi); + + // converts from FFT-representation to coefficient representation + // using "inverted" evaluation points. + // only the coefficients lo..hi are computed + + + + +void FromfftRep(zz_p* x, fftRep& y, long lo, long hi); +// convert out coefficients lo..hi of y, store result in x. +// no normalization is done. + + +// direct manipulation of FFT reps + +void mul(fftRep& z, const fftRep& x, const fftRep& y); +void sub(fftRep& z, const fftRep& x, const fftRep& y); +void add(fftRep& z, const fftRep& x, const fftRep& y); + +void reduce(fftRep& x, const fftRep& a, long k); +// reduces a 2^l point FFT-rep to a 2^k point FFT-rep + +void AddExpand(fftRep& x, const fftRep& a); +// x = x + (an "expanded" version of a) + + + + + + + +/************************************************************* + + Division + +**************************************************************/ + +void DivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pX& b); +// q = a/b, r = a%b + +void div(zz_pX& q, const zz_pX& a, const zz_pX& b); +// q = a/b + + +void div(zz_pX& q, const zz_pX& a, zz_p b); +inline void div(zz_pX& q, const zz_pX& a, long b) + { div(q, a, to_zz_p(b)); } + +void rem(zz_pX& r, const zz_pX& a, const zz_pX& b); +// r = a%b + +long divide(zz_pX& q, const zz_pX& a, const zz_pX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + +long divide(const zz_pX& a, const zz_pX& b); +// if b | a, sets q = a/b and returns 1; otherwise returns 0 + + +void InvTrunc(zz_pX& x, const zz_pX& a, long m); +// computes x = a^{-1} % X^m +// constant term must be non-zero + +inline zz_pX InvTrunc(const zz_pX& a, long m) + { zz_pX x; InvTrunc(x, a, m); NTL_OPT_RETURN(zz_pX, x); } + + + + +// These always use "classical" arithmetic +void PlainDivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pX& b); +void PlainDiv(zz_pX& q, const zz_pX& a, const zz_pX& b); +void PlainRem(zz_pX& r, const zz_pX& a, const zz_pX& b); + + +// These always use FFT arithmetic +void FFTDivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pX& b); +void FFTDiv(zz_pX& q, const zz_pX& a, const zz_pX& b); +void FFTRem(zz_pX& r, const zz_pX& a, const zz_pX& b); + +void PlainInvTrunc(zz_pX& x, const zz_pX& a, long m); +// always uses "classical" algorithm +// ALIAS RESTRICTION: input may not alias output + +void NewtonInvTrunc(zz_pX& x, const zz_pX& a, long m); +// uses a Newton Iteration with the FFT. +// ALIAS RESTRICTION: input may not alias output + + +inline zz_pX operator/(const zz_pX& a, const zz_pX& b) + { zz_pX x; div(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator/(const zz_pX& a, zz_p b) + { zz_pX x; div(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX operator/(const zz_pX& a, long b) + { zz_pX x; div(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX& operator/=(zz_pX& x, zz_p b) + { div(x, x, b); return x; } + +inline zz_pX& operator/=(zz_pX& x, long b) + { div(x, x, b); return x; } + +inline zz_pX& operator/=(zz_pX& x, const zz_pX& b) + { div(x, x, b); return x; } + + +inline zz_pX operator%(const zz_pX& a, const zz_pX& b) + { zz_pX x; rem(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX& operator%=(zz_pX& x, const zz_pX& b) + { rem(x, x, b); return x; } + + + + +/*********************************************************** + + GCD's + +************************************************************/ + + +void GCD(zz_pX& x, const zz_pX& a, const zz_pX& b); +// x = GCD(a, b), x is always monic (or zero if a==b==0). + +inline zz_pX GCD(const zz_pX& a, const zz_pX& b) + { zz_pX x; GCD(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + + +void XGCD(zz_pX& d, zz_pX& s, zz_pX& t, const zz_pX& a, const zz_pX& b); +// d = gcd(a,b), a s + b t = d + +void PlainXGCD(zz_pX& d, zz_pX& s, zz_pX& t, const zz_pX& a, const zz_pX& b); +// same as above, but uses classical algorithm + + +void PlainGCD(zz_pX& x, const zz_pX& a, const zz_pX& b); +// always uses "cdlassical" arithmetic + + +class zz_pXMatrix { +private: + + zz_pXMatrix(const zz_pXMatrix&); // disable + zz_pX elts[2][2]; + +public: + + zz_pXMatrix() { } + ~zz_pXMatrix() { } + + void operator=(const zz_pXMatrix&); + zz_pX& operator() (long i, long j) { return elts[i][j]; } + const zz_pX& operator() (long i, long j) const { return elts[i][j]; } +}; + + +void HalfGCD(zz_pXMatrix& M_out, const zz_pX& U, const zz_pX& V, long d_red); +// deg(U) > deg(V), 1 <= d_red <= deg(U)+1. +// +// This computes a 2 x 2 polynomial matrix M_out such that +// M_out * (U, V)^T = (U', V')^T, +// where U', V' are consecutive polynomials in the Euclidean remainder +// sequence of U, V, and V' is the polynomial of highest degree +// satisfying deg(V') <= deg(U) - d_red. + +void XHalfGCD(zz_pXMatrix& M_out, zz_pX& U, zz_pX& V, long d_red); + +// same as above, except that U is replaced by U', and V by V' + + +/************************************************************* + + Modular Arithmetic without pre-conditioning + +**************************************************************/ + +// arithmetic mod f. +// all inputs and outputs are polynomials of degree less than deg(f). +// ASSUMPTION: f is assumed monic, and deg(f) > 0. +// NOTE: if you want to do many computations with a fixed f, +// use the zz_pXModulus data structure and associated routines below. + + + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pX& b, const zz_pX& f); +// x = (a * b) % f + +inline zz_pX MulMod(const zz_pX& a, const zz_pX& b, const zz_pX& f) + { zz_pX x; MulMod(x, a, b, f); NTL_OPT_RETURN(zz_pX, x); } + +void SqrMod(zz_pX& x, const zz_pX& a, const zz_pX& f); +// x = a^2 % f + +inline zz_pX SqrMod(const zz_pX& a, const zz_pX& f) + { zz_pX x; SqrMod(x, a, f); NTL_OPT_RETURN(zz_pX, x); } + +void MulByXMod(zz_pX& x, const zz_pX& a, const zz_pX& f); +// x = (a * X) mod f + +inline zz_pX MulByXMod(const zz_pX& a, const zz_pX& f) + { zz_pX x; MulByXMod(x, a, f); NTL_OPT_RETURN(zz_pX, x); } + +void InvMod(zz_pX& x, const zz_pX& a, const zz_pX& f); +// x = a^{-1} % f, error is a is not invertible + +inline zz_pX InvMod(const zz_pX& a, const zz_pX& f) + { zz_pX x; InvMod(x, a, f); NTL_OPT_RETURN(zz_pX, x); } + +long InvModStatus(zz_pX& x, const zz_pX& a, const zz_pX& f); +// if (a, f) = 1, returns 0 and sets x = a^{-1} % f +// otherwise, returns 1 and sets x = (a, f) + + + +/****************************************************************** + + Modular Arithmetic with Pre-conditioning + +*******************************************************************/ + + +// If you need to do a lot of arithmetic modulo a fixed f, +// build zz_pXModulus F for f. This pre-computes information about f +// that speeds up the computation a great deal. + + +class zz_pXModulus { +public: + zz_pXModulus() : UseFFT(0), n(-1) { } + ~zz_pXModulus() { } + + zz_pX f; // the modulus + long UseFFT;// flag indicating whether FFT should be used. + long n; // n = deg(f) + long k; // least k s/t 2^k >= n + long l; // least l s/t 2^l >= 2n-3 + fftRep FRep; // 2^k point rep of f + // H = rev((rev(f))^{-1} rem X^{n-1}) + fftRep HRep; // 2^l point rep of H + + OptionalVal< Lazy > tracevec; + // extra level of indirection to ensure relocatability + + zz_pXModulus(const zz_pX& ff); + + operator const zz_pX& () const { return f; } + const zz_pX& val() const { return f; } + +}; + + +inline long deg(const zz_pXModulus& F) { return F.n; } + +void build(zz_pXModulus& F, const zz_pX& f); +// deg(f) > 0 + + +void rem21(zz_pX& x, const zz_pX& a, const zz_pXModulus& F); +// x = a % f +// deg(a) <= 2(n-1), where n = F.n = deg(f) + +void rem(zz_pX& x, const zz_pX& a, const zz_pXModulus& F); +// x = a % f, no restrictions on deg(a); makes repeated calls to rem21 + +inline zz_pX operator%(const zz_pX& a, const zz_pXModulus& F) + { zz_pX x; rem(x, a, F); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX& operator%=(zz_pX& x, const zz_pXModulus& F) + { rem(x, x, F); return x; } + + +void DivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pXModulus& F); + +void div(zz_pX& q, const zz_pX& a, const zz_pXModulus& F); + +inline zz_pX operator/(const zz_pX& a, const zz_pXModulus& F) + { zz_pX x; div(x, a, F); NTL_OPT_RETURN(zz_pX, x); } + +inline zz_pX& operator/=(zz_pX& x, const zz_pXModulus& F) + { div(x, x, F); return x; } + + + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pX& b, const zz_pXModulus& F); +// x = (a * b) % f +// deg(a), deg(b) < n + +inline zz_pX MulMod(const zz_pX& a, const zz_pX& b, const zz_pXModulus& F) + { zz_pX x; MulMod(x, a, b, F); NTL_OPT_RETURN(zz_pX, x); } + + +void SqrMod(zz_pX& x, const zz_pX& a, const zz_pXModulus& F); +// x = a^2 % f +// deg(a) < n + + +inline zz_pX SqrMod(const zz_pX& a, const zz_pXModulus& F) + { zz_pX x; SqrMod(x, a, F); NTL_OPT_RETURN(zz_pX, x); } + + +void PowerMod(zz_pX& x, const zz_pX& a, const ZZ& e, const zz_pXModulus& F); +// x = a^e % f, e >= 0 + +inline zz_pX PowerMod(const zz_pX& a, const ZZ& e, const zz_pXModulus& F) + { zz_pX x; PowerMod(x, a, e, F); NTL_OPT_RETURN(zz_pX, x); } + +inline void PowerMod(zz_pX& x, const zz_pX& a, long e, const zz_pXModulus& F) + { PowerMod(x, a, ZZ_expo(e), F); } + +inline zz_pX PowerMod(const zz_pX& a, long e, const zz_pXModulus& F) + { zz_pX x; PowerMod(x, a, e, F); NTL_OPT_RETURN(zz_pX, x); } + + + +void PowerXMod(zz_pX& x, const ZZ& e, const zz_pXModulus& F); +// x = X^e % f, e >= 0 + +inline zz_pX PowerXMod(const ZZ& e, const zz_pXModulus& F) + { zz_pX x; PowerXMod(x, e, F); NTL_OPT_RETURN(zz_pX, x); } + +inline void PowerXMod(zz_pX& x, long e, const zz_pXModulus& F) + { PowerXMod(x, ZZ_expo(e), F); } + +inline zz_pX PowerXMod(long e, const zz_pXModulus& F) + { zz_pX x; PowerXMod(x, e, F); NTL_OPT_RETURN(zz_pX, x); } + +void PowerXPlusAMod(zz_pX& x, zz_p a, const ZZ& e, const zz_pXModulus& F); +// x = (X + a)^e % f, e >= 0 + +inline zz_pX PowerXPlusAMod(zz_p a, const ZZ& e, const zz_pXModulus& F) + { zz_pX x; PowerXPlusAMod(x, a, e, F); NTL_OPT_RETURN(zz_pX, x); } + +inline void PowerXPlusAMod(zz_pX& x, zz_p a, long e, const zz_pXModulus& F) + { PowerXPlusAMod(x, a, ZZ_expo(e), F); } + + +inline zz_pX PowerXPlusAMod(zz_p a, long e, const zz_pXModulus& F) + { zz_pX x; PowerXPlusAMod(x, a, e, F); NTL_OPT_RETURN(zz_pX, x); } + +// If you need to compute a * b % f for a fixed b, but for many a's +// (for example, computing powers of b modulo f), it is +// much more efficient to first build a zz_pXMultiplier B for b, +// and then use the routine below. + +class zz_pXMultiplier { +public: + zz_pXMultiplier() : UseFFT(0) { } + zz_pXMultiplier(const zz_pX& b, const zz_pXModulus& F); + + ~zz_pXMultiplier() { } + + zz_pX b; + long UseFFT; + fftRep B1; + fftRep B2; + + const zz_pX& val() const { return b; } + +}; + +void build(zz_pXMultiplier& B, const zz_pX& b, const zz_pXModulus& F); + + + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pXMultiplier& B, + const zz_pXModulus& F); + +// x = (a * b) % f + +inline zz_pX MulMod(const zz_pX& a, const zz_pXMultiplier& B, + const zz_pXModulus& F) + { zz_pX x; MulMod(x, a, B, F); NTL_OPT_RETURN(zz_pX, x); } + + + + +/******************************************************* + + Evaluation and related problems + +********************************************************/ + + +void BuildFromRoots(zz_pX& x, const vec_zz_p& a); +// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length() + +inline zz_pX BuildFromRoots(const vec_zz_p& a) + { zz_pX x; BuildFromRoots(x, a); NTL_OPT_RETURN(zz_pX, x); } + + + +void eval(zz_p& b, const zz_pX& f, zz_p a); +// b = f(a) + + +inline zz_p eval(const zz_pX& f, zz_p a) + { zz_p x; eval(x, f, a); return x; } + + +void eval(vec_zz_p& b, const zz_pX& f, const vec_zz_p& a); +// b[i] = f(a[i]) + +inline vec_zz_p eval(const zz_pX& f, const vec_zz_p& a) + { vec_zz_p x; eval(x, f, a); NTL_OPT_RETURN(vec_zz_p, x); } + + +void interpolate(zz_pX& f, const vec_zz_p& a, const vec_zz_p& b); +// computes f such that f(a[i]) = b[i] + +inline zz_pX interpolate(const vec_zz_p& a, const vec_zz_p& b) + { zz_pX x; interpolate(x, a, b); NTL_OPT_RETURN(zz_pX, x); } + + + +/***************************************************************** + + vectors of zz_pX's + +*****************************************************************/ + +typedef Vec vec_zz_pX; + + +/********************************************************** + + Modular Composition and Minimal Polynomials + +***********************************************************/ + + +// algorithms for computing g(h) mod f + + + + +void CompMod(zz_pX& x, const zz_pX& g, const zz_pX& h, const zz_pXModulus& F); +// x = g(h) mod f + +inline zz_pX CompMod(const zz_pX& g, const zz_pX& h, + const zz_pXModulus& F) + { zz_pX x; CompMod(x, g, h, F); NTL_OPT_RETURN(zz_pX, x); } + + +void Comp2Mod(zz_pX& x1, zz_pX& x2, const zz_pX& g1, const zz_pX& g2, + const zz_pX& h, const zz_pXModulus& F); +// xi = gi(h) mod f (i=1,2) + +void Comp3Mod(zz_pX& x1, zz_pX& x2, zz_pX& x3, + const zz_pX& g1, const zz_pX& g2, const zz_pX& g3, + const zz_pX& h, const zz_pXModulus& F); +// xi = gi(h) mod f (i=1..3) + + + +// The routine build (see below) which is implicitly called +// by the various compose and UpdateMap routines builds a table +// of polynomials. +// If zz_pXArgBound > 0, then the table is limited in +// size to approximamtely that many KB. +// If zz_pXArgBound <= 0, then it is ignored, and space is allocated +// so as to maximize speed. +// Initially, zz_pXArgBound = 0. + + +// If a single h is going to be used with many g's +// then you should build a zz_pXArgument for h, +// and then use the compose routine below. +// build computes and stores h, h^2, ..., h^m mod f. +// After this pre-computation, composing a polynomial of degree +// roughly n with h takes n/m multiplies mod f, plus n^2 +// scalar multiplies. +// Thus, increasing m increases the space requirement and the pre-computation +// time, but reduces the composition time. +// If zz_pXArgBound > 0, a table of size less than m may be built. + +struct zz_pXArgument { + vec_zz_pX H; +}; + +extern +NTL_CHEAP_THREAD_LOCAL +long zz_pXArgBound; + + +void build(zz_pXArgument& H, const zz_pX& h, const zz_pXModulus& F, long m); + +// m must be > 0, otherwise an error is raised + +void CompMod(zz_pX& x, const zz_pX& g, const zz_pXArgument& H, + const zz_pXModulus& F); + +inline zz_pX +CompMod(const zz_pX& g, const zz_pXArgument& H, const zz_pXModulus& F) + { zz_pX x; CompMod(x, g, H, F); NTL_OPT_RETURN(zz_pX, x); } + + + +// experimental variant that yields a faster ModComp +// Usage: +// zz_pXArgument H; +// build(H, h, F); +// zz_pXAltArgument H1; +// build(H1, H, F); // this keeps a pointer to H, so H must remain alive +// CompMod(x, g, H1, F); // x = g(h) mod f + +struct zz_pXAltArgument { + + const zz_pXArgument *orig; + zz_pXAltArgument() : orig(0) {} + +#ifdef NTL_HAVE_LL_TYPE + long strategy; + + long n, m; + Vec< Vec > mem; + Vec row; + + // NOTE: the following two members are used on if + // NTL_HAVE_AVX; however, we declare them unconditionally + // to facilitate the possibility of dynamic linking based + // on architecture + Vec< AlignedArray > dmem; + Vec drow; + + sp_ll_reduce_struct pinv_LL; + sp_reduce_struct pinv_L; +#endif +}; + + +void build(zz_pXAltArgument& altH, const zz_pXArgument& H, const zz_pXModulus& F); +void CompMod(zz_pX& x, const zz_pX& g, const zz_pXAltArgument& A, + const zz_pXModulus& F); + + + + +#ifndef NTL_TRANSITION + +void UpdateMap(vec_zz_p& x, const vec_zz_p& a, + const zz_pXMultiplier& B, const zz_pXModulus& F); + +inline vec_zz_p +UpdateMap(const vec_zz_p& a, + const zz_pXMultiplier& B, const zz_pXModulus& F) + { vec_zz_p x; UpdateMap(x, a, B, F); + NTL_OPT_RETURN(vec_zz_p, x); } + +#endif + + +/* computes (a, b), (a, (b*X)%f), ..., (a, (b*X^{n-1})%f), + where ( , ) denotes the vector inner product. + + This is really a "transposed" MulMod by B. +*/ + +void PlainUpdateMap(vec_zz_p& x, const vec_zz_p& a, + const zz_pX& b, const zz_pX& f); + + +// same as above, but uses only classical arithmetic + + +void ProjectPowers(vec_zz_p& x, const vec_zz_p& a, long k, + const zz_pX& h, const zz_pXModulus& F); + +// computes (a, 1), (a, h), ..., (a, h^{k-1} % f) +// this is really a "transposed" compose. + +inline vec_zz_p ProjectPowers(const vec_zz_p& a, long k, + const zz_pX& h, const zz_pXModulus& F) +{ + vec_zz_p x; + ProjectPowers(x, a, k, h, F); + NTL_OPT_RETURN(vec_zz_p, x); +} + + +void ProjectPowers(vec_zz_p& x, const vec_zz_p& a, long k, + const zz_pXArgument& H, const zz_pXModulus& F); + +inline vec_zz_p ProjectPowers(const vec_zz_p& a, long k, + const zz_pXArgument& H, const zz_pXModulus& F) +{ + vec_zz_p x; + ProjectPowers(x, a, k, H, F); + NTL_OPT_RETURN(vec_zz_p, x); +} + +// same as above, but uses a pre-computed zz_pXArgument + +inline void project(zz_p& x, const vec_zz_p& a, const zz_pX& b) + { InnerProduct(x, a, b.rep); } + +inline zz_p project(const vec_zz_p& a, const zz_pX& b) + { zz_p x; project(x, a, b); return x; } + + +void MinPolySeq(zz_pX& h, const vec_zz_p& a, long m); +// computes the minimum polynomial of a linealy generated sequence; +// m is a bound on the degree of the polynomial; +// required: a.length() >= 2*m + +inline zz_pX MinPolySeq(const vec_zz_p& a, long m) + { zz_pX x; MinPolySeq(x, a, m); NTL_OPT_RETURN(zz_pX, x); } + +void ProbMinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m); + +inline zz_pX ProbMinPolyMod(const zz_pX& g, const zz_pXModulus& F, long m) + { zz_pX x; ProbMinPolyMod(x, g, F, m); NTL_OPT_RETURN(zz_pX, x); } + + +inline void ProbMinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F) + { ProbMinPolyMod(h, g, F, F.n); } + +inline zz_pX ProbMinPolyMod(const zz_pX& g, const zz_pXModulus& F) + { zz_pX x; ProbMinPolyMod(x, g, F); NTL_OPT_RETURN(zz_pX, x); } + + +// computes the monic minimal polynomial if (g mod f). +// m = a bound on the degree of the minimal polynomial. +// If this argument is not supplied, it defaults to deg(f). +// The algorithm is probabilistic, always returns a divisor of +// the minimal polynomial, and returns a proper divisor with +// probability at most m/p. + +void MinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m); + +inline zz_pX MinPolyMod(const zz_pX& g, const zz_pXModulus& F, long m) + { zz_pX x; MinPolyMod(x, g, F, m); NTL_OPT_RETURN(zz_pX, x); } + + +inline void MinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F) + { MinPolyMod(h, g, F, F.n); } + +inline zz_pX MinPolyMod(const zz_pX& g, const zz_pXModulus& F) + { zz_pX x; MinPolyMod(x, g, F); NTL_OPT_RETURN(zz_pX, x); } + + +// same as above, but guarantees that result is correct + +void IrredPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m); + +inline zz_pX IrredPolyMod(const zz_pX& g, const zz_pXModulus& F, long m) + { zz_pX x; IrredPolyMod(x, g, F, m); NTL_OPT_RETURN(zz_pX, x); } + + +inline void IrredPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F) + { IrredPolyMod(h, g, F, F.n); } + +inline zz_pX IrredPolyMod(const zz_pX& g, const zz_pXModulus& F) + { zz_pX x; IrredPolyMod(x, g, F); NTL_OPT_RETURN(zz_pX, x); } + + +// same as above, but assumes that f is irreducible, +// or at least that the minimal poly of g is itself irreducible. +// The algorithm is deterministic (and is always correct). + + + +/***************************************************************** + + Traces, norms, resultants + +******************************************************************/ + +void TraceVec(vec_zz_p& S, const zz_pX& f); + +inline vec_zz_p TraceVec(const zz_pX& f) + { vec_zz_p x; TraceVec(x, f); NTL_OPT_RETURN(vec_zz_p, x); } + + +void FastTraceVec(vec_zz_p& S, const zz_pX& f); +void PlainTraceVec(vec_zz_p& S, const zz_pX& f); + +void TraceMod(zz_p& x, const zz_pX& a, const zz_pXModulus& F); + +inline zz_p TraceMod(const zz_pX& a, const zz_pXModulus& F) + { zz_p x; TraceMod(x, a, F); return x; } + +void TraceMod(zz_p& x, const zz_pX& a, const zz_pX& f); + +inline zz_p TraceMod(const zz_pX& a, const zz_pX& f) + { zz_p x; TraceMod(x, a, f); return x; } + + + + +void ComputeTraceVec(const zz_pXModulus& F); + +void NormMod(zz_p& x, const zz_pX& a, const zz_pX& f); + + +inline zz_p NormMod(const zz_pX& a, const zz_pX& f) + { zz_p x; NormMod(x, a, f); return x; } + +void resultant(zz_p& rres, const zz_pX& a, const zz_pX& b); + +inline zz_p resultant(const zz_pX& a, const zz_pX& b) + { zz_p x; resultant(x, a, b); return x; } + +void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& f); +// g = char poly of (a mod f) +// only implemented for p >= deg(f)+1 + +inline zz_pX CharPolyMod(const zz_pX& a, const zz_pX& f) + { zz_pX x; CharPolyMod(x, a, f); NTL_OPT_RETURN(zz_pX, x); } + + + + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/lzz_pXFactoring.h b/thirdparty/linux/ntl/include/NTL/lzz_pXFactoring.h new file mode 100644 index 0000000000..774a2d2b76 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/lzz_pXFactoring.h @@ -0,0 +1,255 @@ + + +#ifndef NTL_zz_pXFactoring__H +#define NTL_zz_pXFactoring__H + +#include +#include +#include + +NTL_OPEN_NNS + + + +/************************************************************ + + factorization routines + +************************************************************/ + + + + + +void SquareFreeDecomp(vec_pair_zz_pX_long& u, const zz_pX& f); +inline vec_pair_zz_pX_long SquareFreeDecomp(const zz_pX& f) + { vec_pair_zz_pX_long x; SquareFreeDecomp(x, f); return x; } + + +// Performs square-free decomposition. +// f must be monic. +// If f = prod_i g_i^i, then u is set to a lest of pairs (g_i, i). +// The list is is increasing order of i, with trivial terms +// (i.e., g_i = 1) deleted. + + + +void FindRoots(vec_zz_p& x, const zz_pX& f); +inline vec_zz_p FindRoots(const zz_pX& f) + { vec_zz_p x; FindRoots(x, f); return x; } + + +// f is monic, and has deg(f) distinct roots. +// returns the list of roots + +void FindRoot(zz_p& root, const zz_pX& f); +inline zz_p FindRoot(const zz_pX& f) + { zz_p x; FindRoot(x, f); return x; } + + +// finds a single root of ff. +// assumes that f is monic and splits into distinct linear factors + + +void SFBerlekamp(vec_zz_pX& factors, const zz_pX& f, long verbose=0); +inline vec_zz_pX SFBerlekamp(const zz_pX& f, long verbose=0) + { vec_zz_pX x; SFBerlekamp(x, f, verbose); return x; } + + +// Assumes f is square-free and monic. +// returns list of factors of f. +// Uses "Berlekamp" appraoch. + + +void berlekamp(vec_pair_zz_pX_long& factors, const zz_pX& f, long verbose=0); +inline vec_pair_zz_pX_long berlekamp(const zz_pX& f, long verbose=0) + { vec_pair_zz_pX_long x; berlekamp(x, f, verbose); return x; } + + +// returns a list of factors, with multiplicities. +// f must be monic. +// Uses "Berlekamp" appraoch. + + + + + +extern +NTL_CHEAP_THREAD_LOCAL +long zz_pX_BlockingFactor; +// Controls GCD blocking for DDF. + + +void DDF(vec_pair_zz_pX_long& factors, const zz_pX& f, const zz_pX& h, + long verbose=0); +inline vec_pair_zz_pX_long DDF(const zz_pX& f, const zz_pX& h, + long verbose=0) + { vec_pair_zz_pX_long x; DDF(x, f, h, verbose); return x; } + + +// Performs distinct-degree factorization. +// Assumes f is monic and square-free, and h = X^p mod f +// Obsolete: see NewDDF, below. + + +extern +NTL_CHEAP_THREAD_LOCAL +long zz_pX_GCDTableSize; /* = 4 */ +// Controls GCD blocking for NewDDF + + +void NewDDF(vec_pair_zz_pX_long& factors, const zz_pX& f, const zz_pX& h, + long verbose=0); +inline vec_pair_zz_pX_long NewDDF(const zz_pX& f, const zz_pX& h, + long verbose=0) + { vec_pair_zz_pX_long x; NewDDF(x, f, h, verbose); return x; } + + +// same as above, but uses baby-step/giant-step method + + +void EDF(vec_zz_pX& factors, const zz_pX& f, const zz_pX& b, + long d, long verbose=0); + +inline vec_zz_pX EDF(const zz_pX& f, const zz_pX& b, + long d, long verbose=0) + { vec_zz_pX x; EDF(x, f, b, d, verbose); return x; } + + +// Performs equal-degree factorization. +// f is monic, square-free, and all irreducible factors have same degree. +// b = X^p mod f. +// d = degree of irreducible factors of f +// Space for the trace-map computation can be controlled via ComposeBound. + + + +void RootEDF(vec_zz_pX& factors, const zz_pX& f, long verbose=0); +inline vec_zz_pX RootEDF(const zz_pX& f, long verbose=0) + { vec_zz_pX x; RootEDF(x, f, verbose); return x; } + + +// EDF for d==1 + +void SFCanZass(vec_zz_pX& factors, const zz_pX& f, long verbose=0); +inline vec_zz_pX SFCanZass(const zz_pX& f, long verbose=0) + { vec_zz_pX x; SFCanZass(x, f, verbose); return x; } + + +// Assumes f is square-free. +// returns list of factors of f. +// Uses "Cantor/Zassenhaus" approach. + + + +void SFCanZass1(vec_pair_zz_pX_long& u, zz_pX& h, const zz_pX& f, + long verbose=0); + +// Not intended for general use. + +void SFCanZass2(vec_zz_pX& factors, const vec_pair_zz_pX_long& u, + const zz_pX& h, long verbose=0); + +// Not intended for general use. + + +void CanZass(vec_pair_zz_pX_long& factors, const zz_pX& f, long verbose=0); + +inline vec_pair_zz_pX_long CanZass(const zz_pX& f, long verbose=0) + { vec_pair_zz_pX_long x; CanZass(x, f, verbose); return x; } + + +// returns a list of factors, with multiplicities. +// f must be monic. +// Uses "Cantor/Zassenhaus" approach. + + + + +void mul(zz_pX& f, const vec_pair_zz_pX_long& v); +inline zz_pX mul(const vec_pair_zz_pX_long& v) + { zz_pX x; mul(x, v); return x; } + + +// multiplies polynomials, with multiplicities + + +/************************************************************* + + irreducible poly's: tests and constructions + +**************************************************************/ + +long ProbIrredTest(const zz_pX& f, long iter=1); + +// performs a fast, probabilistic irreduciblity test +// the test can err only if f is reducible, and the +// error probability is bounded by p^{-iter}. + +long DetIrredTest(const zz_pX& f); + +// performs a recursive deterministic irreducibility test +// fast in the worst-case (when input is irreducible). + +long IterIrredTest(const zz_pX& f); + +// performs an iterative deterministic irreducibility test, +// based on DDF. Fast on average (when f has a small factor). + +void BuildIrred(zz_pX& f, long n); +inline zz_pX BuildIrred_zz_pX(long n) + { zz_pX x; BuildIrred(x, n); NTL_OPT_RETURN(zz_pX, x); } + + +// Build a monic irreducible poly of degree n. + +void BuildRandomIrred(zz_pX& f, const zz_pX& g); +inline zz_pX BuildRandomIrred(const zz_pX& g) + { zz_pX x; BuildRandomIrred(x, g); NTL_OPT_RETURN(zz_pX, x); } + + +// g is a monic irreducible polynomial. +// constructs a random monic irreducible polynomial f of the same degree. + + +long ComputeDegree(const zz_pX& h, const zz_pXModulus& F); + +// f = F.f is assumed to be an "equal degree" polynomial +// h = X^p mod f +// the common degree of the irreducible factors of f is computed +// This routine is useful in counting points on elliptic curves + +long ProbComputeDegree(const zz_pX& h, const zz_pXModulus& F); + +// same as above, but uses a slightly faster probabilistic algorithm +// the return value may be 0 or may be too big, but for large p +// (relative to n), this happens with very low probability. + + + +void TraceMap(zz_pX& w, const zz_pX& a, long d, const zz_pXModulus& F, + const zz_pX& b); + +inline zz_pX TraceMap(const zz_pX& a, long d, const zz_pXModulus& F, + const zz_pX& b) + { zz_pX x; TraceMap(x, a, d, F, b); return x; } + + +// w = a+a^q+...+^{q^{d-1}} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see "zz_pX.h") + + + +void PowerCompose(zz_pX& w, const zz_pX& a, long d, const zz_pXModulus& F); +inline zz_pX PowerCompose(const zz_pX& a, long d, const zz_pXModulus& F) + { zz_pX x; PowerCompose(x, a, d, F); return x; } + + +// w = X^{q^d} mod f; +// it is assumed that d >= 0, and b = X^q mod f, q a power of p +// Space allocation can be controlled via ComposeBound (see "zz_pX.h") + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mach_desc.h b/thirdparty/linux/ntl/include/NTL/mach_desc.h new file mode 100644 index 0000000000..7ac06a0305 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mach_desc.h @@ -0,0 +1,850 @@ +#ifndef NTL_mach_desc__H +#define NTL_mach_desc__H + + +#define NTL_BITS_PER_LONG (64) +#define NTL_MAX_LONG (9223372036854775807L) +#define NTL_MAX_INT (2147483647) +#define NTL_BITS_PER_INT (32) +#define NTL_BITS_PER_SIZE_T (64) +#define NTL_ARITH_RIGHT_SHIFT (1) +#define NTL_NBITS_MAX (50) +#define NTL_WNBITS_MAX (62) +#define NTL_DOUBLE_PRECISION (53) +#define NTL_FDOUBLE_PRECISION (((double)(1L<<52))) +#define NTL_LONGDOUBLE_OK (1) +#define NTL_LONGDOUBLE_PRECISION (64) +#define NTL_WIDE_DOUBLE_LDP ((wide_double(1L<<62))*(wide_double(1L<<1))) +#define NTL_WIDE_DOUBLE_DP ((wide_double(1L<<52))) +#define NTL_QUAD_FLOAT_SPLIT ((((double)(1L<<27)))+1.0) +#define NTL_EXT_DOUBLE (0) +#define NTL_FMA_DETECTED (1) +#define NTL_BIG_POINTERS (0) +#define NTL_MIN_LONG (-NTL_MAX_LONG - 1L) +#define NTL_MIN_INT (-NTL_MAX_INT - 1) + + +#define NTL_BB_MUL_CODE0 \ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 60; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 28) & 15]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[(b >> 32) & 15]; hi ^= t >> 32; lo ^= t << 32;\ + t = A[(b >> 36) & 15]; hi ^= t >> 28; lo ^= t << 36;\ + t = A[(b >> 40) & 15]; hi ^= t >> 24; lo ^= t << 40;\ + t = A[(b >> 44) & 15]; hi ^= t >> 20; lo ^= t << 44;\ + t = A[(b >> 48) & 15]; hi ^= t >> 16; lo ^= t << 48;\ + t = A[(b >> 52) & 15]; hi ^= t >> 12; lo ^= t << 52;\ + t = A[(b >> 56) & 15]; hi ^= t >> 8; lo ^= t << 56;\ + t = A[b >> 60]; hi ^= t >> 4; lo ^= t << 60;\ + if (a >> 63) hi ^= ((b & 0xeeeeeeeeeeeeeeeeUL) >> 1);\ + if ((a >> 62) & 1) hi ^= ((b & 0xccccccccccccccccUL) >> 2);\ + if ((a >> 61) & 1) hi ^= ((b & 0x8888888888888888UL) >> 3);\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 60; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 28) & 15]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[(b >> 32) & 15]; hi ^= t >> 32; lo ^= t << 32;\ + t = A[(b >> 36) & 15]; hi ^= t >> 28; lo ^= t << 36;\ + t = A[(b >> 40) & 15]; hi ^= t >> 24; lo ^= t << 40;\ + t = A[(b >> 44) & 15]; hi ^= t >> 20; lo ^= t << 44;\ + t = A[(b >> 48) & 15]; hi ^= t >> 16; lo ^= t << 48;\ + t = A[(b >> 52) & 15]; hi ^= t >> 12; lo ^= t << 52;\ + t = A[(b >> 56) & 15]; hi ^= t >> 8; lo ^= t << 56;\ + t = A[b >> 60]; hi ^= t >> 4; lo ^= t << 60;\ + if (a >> 63) hi ^= ((b & 0xeeeeeeeeeeeeeeeeUL) >> 1);\ + if ((a >> 62) & 1) hi ^= ((b & 0xccccccccccccccccUL) >> 2);\ + if ((a >> 61) & 1) hi ^= ((b & 0x8888888888888888UL) >> 3);\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_BB_MUL_CODE2 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 60; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 28) & 15]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[(b >> 32) & 15]; hi ^= t >> 32; lo ^= t << 32;\ + t = A[(b >> 36) & 15]; hi ^= t >> 28; lo ^= t << 36;\ + t = A[(b >> 40) & 15]; hi ^= t >> 24; lo ^= t << 40;\ + t = A[(b >> 44) & 15]; hi ^= t >> 20; lo ^= t << 44;\ + t = A[(b >> 48) & 15]; hi ^= t >> 16; lo ^= t << 48;\ + t = A[(b >> 52) & 15]; hi ^= t >> 12; lo ^= t << 52;\ + t = A[(b >> 56) & 15]; hi ^= t >> 8; lo ^= t << 56;\ + t = A[b >> 60]; hi ^= t >> 4; lo ^= t << 60;\ + if (a >> 63) hi ^= ((b & 0xeeeeeeeeeeeeeeeeUL) >> 1);\ + if ((a >> 62) & 1) hi ^= ((b & 0xccccccccccccccccUL) >> 2);\ + if ((a >> 61) & 1) hi ^= ((b & 0x8888888888888888UL) >> 3);\ + cp[i] ^= (carry ^ lo); carry = hi;\ + }\ + cp[sb] ^= carry;\ + + + + + +#define NTL_SHORT_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 60; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 28) & 15]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[(b >> 32) & 15]; hi ^= t >> 32; lo ^= t << 32;\ + t = A[(b >> 36) & 15]; hi ^= t >> 28; lo ^= t << 36;\ + t = A[(b >> 40) & 15]; hi ^= t >> 24; lo ^= t << 40;\ + t = A[(b >> 44) & 15]; hi ^= t >> 20; lo ^= t << 44;\ + t = A[(b >> 48) & 15]; hi ^= t >> 16; lo ^= t << 48;\ + t = A[(b >> 52) & 15]; hi ^= t >> 12; lo ^= t << 52;\ + t = A[(b >> 56) & 15]; hi ^= t >> 8; lo ^= t << 56;\ + t = A[b >> 60]; hi ^= t >> 4; lo ^= t << 60;\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_HALF_BB_MUL_CODE0 \ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[4];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + lo = A[b & 3]; t = A[(b >> 2) & 3]; hi = t >> 62; lo ^= t << 2;\ + t = A[(b >> 4) & 3]; hi ^= t >> 60; lo ^= t << 4;\ + t = A[(b >> 6) & 3]; hi ^= t >> 58; lo ^= t << 6;\ + t = A[(b >> 8) & 3]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 10) & 3]; hi ^= t >> 54; lo ^= t << 10;\ + t = A[(b >> 12) & 3]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 14) & 3]; hi ^= t >> 50; lo ^= t << 14;\ + t = A[(b >> 16) & 3]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 18) & 3]; hi ^= t >> 46; lo ^= t << 18;\ + t = A[(b >> 20) & 3]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 22) & 3]; hi ^= t >> 42; lo ^= t << 22;\ + t = A[(b >> 24) & 3]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 26) & 3]; hi ^= t >> 38; lo ^= t << 26;\ + t = A[(b >> 28) & 3]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[b >> 30]; hi ^= t >> 34; lo ^= t << 30;\ + if (a >> 63) hi ^= ((b & 0xaaaaaaaaUL) >> 1);\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_ALT_BB_MUL_CODE0 \ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + const _ntl_ulong t4 = A[(b >> 4) & 15]; \ + const _ntl_ulong t8 = A[(b >> 8) & 15]; \ + const _ntl_ulong t12 = A[(b >> 12) & 15]; \ + const _ntl_ulong t16 = A[(b >> 16) & 15]; \ + const _ntl_ulong t20 = A[(b >> 20) & 15]; \ + const _ntl_ulong t24 = A[(b >> 24) & 15]; \ + const _ntl_ulong t28 = A[(b >> 28) & 15]; \ + const _ntl_ulong t32 = A[(b >> 32) & 15]; \ + const _ntl_ulong t36 = A[(b >> 36) & 15]; \ + const _ntl_ulong t40 = A[(b >> 40) & 15]; \ + const _ntl_ulong t44 = A[(b >> 44) & 15]; \ + const _ntl_ulong t48 = A[(b >> 48) & 15]; \ + const _ntl_ulong t52 = A[(b >> 52) & 15]; \ + const _ntl_ulong t56 = A[(b >> 56) & 15]; \ + const _ntl_ulong t60 = A[b >> 60]; \ + const _ntl_ulong lo = A[b & 15] \ + ^ (t4 << 4)\ + ^ (t8 << 8)\ + ^ (t12 << 12)\ + ^ (t16 << 16)\ + ^ (t20 << 20)\ + ^ (t24 << 24)\ + ^ (t28 << 28)\ + ^ (t32 << 32)\ + ^ (t36 << 36)\ + ^ (t40 << 40)\ + ^ (t44 << 44)\ + ^ (t48 << 48)\ + ^ (t52 << 52)\ + ^ (t56 << 56)\ + ^ (t60 << 60);\ + const _ntl_ulong hi = (t4 >> 60)\ + ^ (t8 >> 56)\ + ^ (t12 >> 52)\ + ^ (t16 >> 48)\ + ^ (t20 >> 44)\ + ^ (t24 >> 40)\ + ^ (t28 >> 36)\ + ^ (t32 >> 32)\ + ^ (t36 >> 28)\ + ^ (t40 >> 24)\ + ^ (t44 >> 20)\ + ^ (t48 >> 16)\ + ^ (t52 >> 12)\ + ^ (t56 >> 8)\ + ^ (t60 >> 4)\ + ^ (((b & 0xeeeeeeeeeeeeeeeeUL) >> 1) & (-(a >> 63)))\ + ^ (((b & 0xccccccccccccccccUL) >> 2) & (-((a >> 62) & 1UL)))\ + ^ (((b & 0x8888888888888888UL) >> 3) & (-((a >> 61) & 1UL)));\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_ALT_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + const _ntl_ulong b = bp[i];\ + const _ntl_ulong t4 = A[(b >> 4) & 15]; \ + const _ntl_ulong t8 = A[(b >> 8) & 15]; \ + const _ntl_ulong t12 = A[(b >> 12) & 15]; \ + const _ntl_ulong t16 = A[(b >> 16) & 15]; \ + const _ntl_ulong t20 = A[(b >> 20) & 15]; \ + const _ntl_ulong t24 = A[(b >> 24) & 15]; \ + const _ntl_ulong t28 = A[(b >> 28) & 15]; \ + const _ntl_ulong t32 = A[(b >> 32) & 15]; \ + const _ntl_ulong t36 = A[(b >> 36) & 15]; \ + const _ntl_ulong t40 = A[(b >> 40) & 15]; \ + const _ntl_ulong t44 = A[(b >> 44) & 15]; \ + const _ntl_ulong t48 = A[(b >> 48) & 15]; \ + const _ntl_ulong t52 = A[(b >> 52) & 15]; \ + const _ntl_ulong t56 = A[(b >> 56) & 15]; \ + const _ntl_ulong t60 = A[b >> 60]; \ + const _ntl_ulong lo = A[b & 15] \ + ^ (t4 << 4)\ + ^ (t8 << 8)\ + ^ (t12 << 12)\ + ^ (t16 << 16)\ + ^ (t20 << 20)\ + ^ (t24 << 24)\ + ^ (t28 << 28)\ + ^ (t32 << 32)\ + ^ (t36 << 36)\ + ^ (t40 << 40)\ + ^ (t44 << 44)\ + ^ (t48 << 48)\ + ^ (t52 << 52)\ + ^ (t56 << 56)\ + ^ (t60 << 60);\ + const _ntl_ulong hi = (t4 >> 60)\ + ^ (t8 >> 56)\ + ^ (t12 >> 52)\ + ^ (t16 >> 48)\ + ^ (t20 >> 44)\ + ^ (t24 >> 40)\ + ^ (t28 >> 36)\ + ^ (t32 >> 32)\ + ^ (t36 >> 28)\ + ^ (t40 >> 24)\ + ^ (t44 >> 20)\ + ^ (t48 >> 16)\ + ^ (t52 >> 12)\ + ^ (t56 >> 8)\ + ^ (t60 >> 4)\ + ^ (((b & 0xeeeeeeeeeeeeeeeeUL) >> 1) & (-(a >> 63)))\ + ^ (((b & 0xccccccccccccccccUL) >> 2) & (-((a >> 62) & 1UL)))\ + ^ (((b & 0x8888888888888888UL) >> 3) & (-((a >> 61) & 1UL)));\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_ALT_BB_MUL_CODE2 \ + long i;\ + _ntl_ulong carry = 0;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + const _ntl_ulong b = bp[i];\ + const _ntl_ulong t4 = A[(b >> 4) & 15]; \ + const _ntl_ulong t8 = A[(b >> 8) & 15]; \ + const _ntl_ulong t12 = A[(b >> 12) & 15]; \ + const _ntl_ulong t16 = A[(b >> 16) & 15]; \ + const _ntl_ulong t20 = A[(b >> 20) & 15]; \ + const _ntl_ulong t24 = A[(b >> 24) & 15]; \ + const _ntl_ulong t28 = A[(b >> 28) & 15]; \ + const _ntl_ulong t32 = A[(b >> 32) & 15]; \ + const _ntl_ulong t36 = A[(b >> 36) & 15]; \ + const _ntl_ulong t40 = A[(b >> 40) & 15]; \ + const _ntl_ulong t44 = A[(b >> 44) & 15]; \ + const _ntl_ulong t48 = A[(b >> 48) & 15]; \ + const _ntl_ulong t52 = A[(b >> 52) & 15]; \ + const _ntl_ulong t56 = A[(b >> 56) & 15]; \ + const _ntl_ulong t60 = A[b >> 60]; \ + const _ntl_ulong lo = A[b & 15] \ + ^ (t4 << 4)\ + ^ (t8 << 8)\ + ^ (t12 << 12)\ + ^ (t16 << 16)\ + ^ (t20 << 20)\ + ^ (t24 << 24)\ + ^ (t28 << 28)\ + ^ (t32 << 32)\ + ^ (t36 << 36)\ + ^ (t40 << 40)\ + ^ (t44 << 44)\ + ^ (t48 << 48)\ + ^ (t52 << 52)\ + ^ (t56 << 56)\ + ^ (t60 << 60);\ + const _ntl_ulong hi = (t4 >> 60)\ + ^ (t8 >> 56)\ + ^ (t12 >> 52)\ + ^ (t16 >> 48)\ + ^ (t20 >> 44)\ + ^ (t24 >> 40)\ + ^ (t28 >> 36)\ + ^ (t32 >> 32)\ + ^ (t36 >> 28)\ + ^ (t40 >> 24)\ + ^ (t44 >> 20)\ + ^ (t48 >> 16)\ + ^ (t52 >> 12)\ + ^ (t56 >> 8)\ + ^ (t60 >> 4)\ + ^ (((b & 0xeeeeeeeeeeeeeeeeUL) >> 1) & (-(a >> 63)))\ + ^ (((b & 0xccccccccccccccccUL) >> 2) & (-((a >> 62) & 1UL)))\ + ^ (((b & 0x8888888888888888UL) >> 3) & (-((a >> 61) & 1UL)));\ + cp[i] ^= (carry ^ lo); carry = hi;\ + }\ + cp[sb] ^= carry;\ + + + + + +#define NTL_ALT_SHORT_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + const _ntl_ulong b = bp[i];\ + const _ntl_ulong t4 = A[(b >> 4) & 15]; \ + const _ntl_ulong t8 = A[(b >> 8) & 15]; \ + const _ntl_ulong t12 = A[(b >> 12) & 15]; \ + const _ntl_ulong t16 = A[(b >> 16) & 15]; \ + const _ntl_ulong t20 = A[(b >> 20) & 15]; \ + const _ntl_ulong t24 = A[(b >> 24) & 15]; \ + const _ntl_ulong t28 = A[(b >> 28) & 15]; \ + const _ntl_ulong t32 = A[(b >> 32) & 15]; \ + const _ntl_ulong t36 = A[(b >> 36) & 15]; \ + const _ntl_ulong t40 = A[(b >> 40) & 15]; \ + const _ntl_ulong t44 = A[(b >> 44) & 15]; \ + const _ntl_ulong t48 = A[(b >> 48) & 15]; \ + const _ntl_ulong t52 = A[(b >> 52) & 15]; \ + const _ntl_ulong t56 = A[(b >> 56) & 15]; \ + const _ntl_ulong t60 = A[b >> 60]; \ + const _ntl_ulong lo = A[b & 15] \ + ^ (t4 << 4)\ + ^ (t8 << 8)\ + ^ (t12 << 12)\ + ^ (t16 << 16)\ + ^ (t20 << 20)\ + ^ (t24 << 24)\ + ^ (t28 << 28)\ + ^ (t32 << 32)\ + ^ (t36 << 36)\ + ^ (t40 << 40)\ + ^ (t44 << 44)\ + ^ (t48 << 48)\ + ^ (t52 << 52)\ + ^ (t56 << 56)\ + ^ (t60 << 60);\ + const _ntl_ulong hi = (t4 >> 60)\ + ^ (t8 >> 56)\ + ^ (t12 >> 52)\ + ^ (t16 >> 48)\ + ^ (t20 >> 44)\ + ^ (t24 >> 40)\ + ^ (t28 >> 36)\ + ^ (t32 >> 32)\ + ^ (t36 >> 28)\ + ^ (t40 >> 24)\ + ^ (t44 >> 20)\ + ^ (t48 >> 16)\ + ^ (t52 >> 12)\ + ^ (t56 >> 8)\ + ^ (t60 >> 4);\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_ALT_HALF_BB_MUL_CODE0 \ + _ntl_ulong A[4];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + const _ntl_ulong t2 = A[(b >> 2) & 3]; \ + const _ntl_ulong t4 = A[(b >> 4) & 3]; \ + const _ntl_ulong t6 = A[(b >> 6) & 3]; \ + const _ntl_ulong t8 = A[(b >> 8) & 3]; \ + const _ntl_ulong t10 = A[(b >> 10) & 3]; \ + const _ntl_ulong t12 = A[(b >> 12) & 3]; \ + const _ntl_ulong t14 = A[(b >> 14) & 3]; \ + const _ntl_ulong t16 = A[(b >> 16) & 3]; \ + const _ntl_ulong t18 = A[(b >> 18) & 3]; \ + const _ntl_ulong t20 = A[(b >> 20) & 3]; \ + const _ntl_ulong t22 = A[(b >> 22) & 3]; \ + const _ntl_ulong t24 = A[(b >> 24) & 3]; \ + const _ntl_ulong t26 = A[(b >> 26) & 3]; \ + const _ntl_ulong t28 = A[(b >> 28) & 3]; \ + const _ntl_ulong t30 = A[b >> 30]; \ + const _ntl_ulong lo = A[b & 3] \ + ^ (t2 << 2)\ + ^ (t4 << 4)\ + ^ (t6 << 6)\ + ^ (t8 << 8)\ + ^ (t10 << 10)\ + ^ (t12 << 12)\ + ^ (t14 << 14)\ + ^ (t16 << 16)\ + ^ (t18 << 18)\ + ^ (t20 << 20)\ + ^ (t22 << 22)\ + ^ (t24 << 24)\ + ^ (t26 << 26)\ + ^ (t28 << 28)\ + ^ (t30 << 30);\ + const _ntl_ulong hi = (t2 >> 62)\ + ^ (t4 >> 60)\ + ^ (t6 >> 58)\ + ^ (t8 >> 56)\ + ^ (t10 >> 54)\ + ^ (t12 >> 52)\ + ^ (t14 >> 50)\ + ^ (t16 >> 48)\ + ^ (t18 >> 46)\ + ^ (t20 >> 44)\ + ^ (t22 >> 42)\ + ^ (t24 >> 40)\ + ^ (t26 >> 38)\ + ^ (t28 >> 36)\ + ^ (t30 >> 34)\ + ^ (((b & 0xaaaaaaaaUL) >> 1) & (-(a >> 63)));\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_ALT1_BB_MUL_CODE0 \ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 60; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 28) & 15]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[(b >> 32) & 15]; hi ^= t >> 32; lo ^= t << 32;\ + t = A[(b >> 36) & 15]; hi ^= t >> 28; lo ^= t << 36;\ + t = A[(b >> 40) & 15]; hi ^= t >> 24; lo ^= t << 40;\ + t = A[(b >> 44) & 15]; hi ^= t >> 20; lo ^= t << 44;\ + t = A[(b >> 48) & 15]; hi ^= t >> 16; lo ^= t << 48;\ + t = A[(b >> 52) & 15]; hi ^= t >> 12; lo ^= t << 52;\ + t = A[(b >> 56) & 15]; hi ^= t >> 8; lo ^= t << 56;\ + t = A[b >> 60]; hi ^= t >> 4; lo ^= t << 60;\ + hi ^= (((b & 0xeeeeeeeeeeeeeeeeUL) >> 1) & (-(a >> 63)))\ + ^ (((b & 0xccccccccccccccccUL) >> 2) & (-((a >> 62) & 1UL)))\ + ^ (((b & 0x8888888888888888UL) >> 3) & (-((a >> 61) & 1UL)));\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_ALT1_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 60; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 28) & 15]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[(b >> 32) & 15]; hi ^= t >> 32; lo ^= t << 32;\ + t = A[(b >> 36) & 15]; hi ^= t >> 28; lo ^= t << 36;\ + t = A[(b >> 40) & 15]; hi ^= t >> 24; lo ^= t << 40;\ + t = A[(b >> 44) & 15]; hi ^= t >> 20; lo ^= t << 44;\ + t = A[(b >> 48) & 15]; hi ^= t >> 16; lo ^= t << 48;\ + t = A[(b >> 52) & 15]; hi ^= t >> 12; lo ^= t << 52;\ + t = A[(b >> 56) & 15]; hi ^= t >> 8; lo ^= t << 56;\ + t = A[b >> 60]; hi ^= t >> 4; lo ^= t << 60;\ + hi ^= (((b & 0xeeeeeeeeeeeeeeeeUL) >> 1) & (-(a >> 63)))\ + ^ (((b & 0xccccccccccccccccUL) >> 2) & (-((a >> 62) & 1UL)))\ + ^ (((b & 0x8888888888888888UL) >> 3) & (-((a >> 61) & 1UL)));\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_ALT1_BB_MUL_CODE2 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 60; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 28) & 15]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[(b >> 32) & 15]; hi ^= t >> 32; lo ^= t << 32;\ + t = A[(b >> 36) & 15]; hi ^= t >> 28; lo ^= t << 36;\ + t = A[(b >> 40) & 15]; hi ^= t >> 24; lo ^= t << 40;\ + t = A[(b >> 44) & 15]; hi ^= t >> 20; lo ^= t << 44;\ + t = A[(b >> 48) & 15]; hi ^= t >> 16; lo ^= t << 48;\ + t = A[(b >> 52) & 15]; hi ^= t >> 12; lo ^= t << 52;\ + t = A[(b >> 56) & 15]; hi ^= t >> 8; lo ^= t << 56;\ + t = A[b >> 60]; hi ^= t >> 4; lo ^= t << 60;\ + hi ^= (((b & 0xeeeeeeeeeeeeeeeeUL) >> 1) & (-(a >> 63)))\ + ^ (((b & 0xccccccccccccccccUL) >> 2) & (-((a >> 62) & 1UL)))\ + ^ (((b & 0x8888888888888888UL) >> 3) & (-((a >> 61) & 1UL)));\ + cp[i] ^= (carry ^ lo); carry = hi;\ + }\ + cp[sb] ^= carry;\ + + + + + +#define NTL_ALT1_SHORT_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 60; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 28) & 15]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[(b >> 32) & 15]; hi ^= t >> 32; lo ^= t << 32;\ + t = A[(b >> 36) & 15]; hi ^= t >> 28; lo ^= t << 36;\ + t = A[(b >> 40) & 15]; hi ^= t >> 24; lo ^= t << 40;\ + t = A[(b >> 44) & 15]; hi ^= t >> 20; lo ^= t << 44;\ + t = A[(b >> 48) & 15]; hi ^= t >> 16; lo ^= t << 48;\ + t = A[(b >> 52) & 15]; hi ^= t >> 12; lo ^= t << 52;\ + t = A[(b >> 56) & 15]; hi ^= t >> 8; lo ^= t << 56;\ + t = A[b >> 60]; hi ^= t >> 4; lo ^= t << 60;\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_ALT1_HALF_BB_MUL_CODE0 \ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[4];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + lo = A[b & 3]; t = A[(b >> 2) & 3]; hi = t >> 62; lo ^= t << 2;\ + t = A[(b >> 4) & 3]; hi ^= t >> 60; lo ^= t << 4;\ + t = A[(b >> 6) & 3]; hi ^= t >> 58; lo ^= t << 6;\ + t = A[(b >> 8) & 3]; hi ^= t >> 56; lo ^= t << 8;\ + t = A[(b >> 10) & 3]; hi ^= t >> 54; lo ^= t << 10;\ + t = A[(b >> 12) & 3]; hi ^= t >> 52; lo ^= t << 12;\ + t = A[(b >> 14) & 3]; hi ^= t >> 50; lo ^= t << 14;\ + t = A[(b >> 16) & 3]; hi ^= t >> 48; lo ^= t << 16;\ + t = A[(b >> 18) & 3]; hi ^= t >> 46; lo ^= t << 18;\ + t = A[(b >> 20) & 3]; hi ^= t >> 44; lo ^= t << 20;\ + t = A[(b >> 22) & 3]; hi ^= t >> 42; lo ^= t << 22;\ + t = A[(b >> 24) & 3]; hi ^= t >> 40; lo ^= t << 24;\ + t = A[(b >> 26) & 3]; hi ^= t >> 38; lo ^= t << 26;\ + t = A[(b >> 28) & 3]; hi ^= t >> 36; lo ^= t << 28;\ + t = A[b >> 30]; hi ^= t >> 34; lo ^= t << 30;\ + hi ^= (((b & 0xaaaaaaaaUL) >> 1) & (-(a >> 63)));\ + c[0] = lo; c[1] = hi;\ + + + +#define NTL_BB_MUL1_BITS (4) + + + +#define NTL_BB_SQR_CODE \ +lo=sqrtab[a&255];\ +lo=lo|(sqrtab[(a>>8)&255]<<16);\ +lo=lo|(sqrtab[(a>>16)&255]<<32);\ +lo=lo|(sqrtab[(a>>24)&255]<<48);\ +hi=sqrtab[(a>>32)&255];\ +hi=hi|(sqrtab[(a>>40)&255]<<16);\ +hi=hi|(sqrtab[(a>>48)&255]<<32);\ +hi=hi|(sqrtab[(a>>56)&255]<<48);\ + + + + +#define NTL_BB_REV_CODE (revtab[(a>>0)&255]<<56)\ +|(revtab[(a>>8)&255]<<48)\ +|(revtab[(a>>16)&255]<<40)\ +|(revtab[(a>>24)&255]<<32)\ +|(revtab[(a>>32)&255]<<24)\ +|(revtab[(a>>40)&255]<<16)\ +|(revtab[(a>>48)&255]<<8)\ +|(revtab[(a>>56)&255]<<0) + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/mat_GF2.h b/thirdparty/linux/ntl/include/NTL/mat_GF2.h new file mode 100644 index 0000000000..62ba4f3e5b --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_GF2.h @@ -0,0 +1,186 @@ + +#ifndef NTL_mat_GF2__H +#define NTL_mat_GF2__H + + +#include +#include + +NTL_OPEN_NNS + + +typedef Mat mat_GF2; + + +// some backward compaitibilty stuff + +inline void conv(mat_GF2& x, const vec_vec_GF2& a) { + MakeMatrix(x, a); +} + +inline mat_GF2 to_mat_GF2(const vec_vec_GF2& a) { + mat_GF2 x; conv(x, a); NTL_OPT_RETURN(mat_GF2, x); +} + + + +void add(mat_GF2& X, const mat_GF2& A, const mat_GF2& B); + +inline void sub(mat_GF2& X, const mat_GF2& A, const mat_GF2& B) + { add(X, A, B); } + +inline void negate(mat_GF2& X, const mat_GF2& A) + { X = A; } + +void mul(mat_GF2& X, const mat_GF2& A, const mat_GF2& B); +void mul(vec_GF2& x, const mat_GF2& A, const vec_GF2& b); +void mul(vec_GF2& x, const vec_GF2& a, const mat_GF2& B); + +void mul(mat_GF2& X, const mat_GF2& A, GF2 b); +inline void mul(mat_GF2& X, GF2 a, const mat_GF2& B) + { mul(X, B, a); } + +inline void mul(mat_GF2& X, const mat_GF2& A, long b) + { mul(X, A, to_GF2(b)); } +inline void mul(mat_GF2& X, long a, const mat_GF2& B) + { mul(X, B, a); } + +void ident(mat_GF2& X, long n); +inline mat_GF2 ident_mat_GF2(long n) + { mat_GF2 X; ident(X, n); NTL_OPT_RETURN(mat_GF2, X); } + +long IsIdent(const mat_GF2& A, long n); +void transpose(mat_GF2& X, const mat_GF2& A); +void solve(ref_GF2 d, vec_GF2& X, const mat_GF2& A, const vec_GF2& b); +void solve(ref_GF2 d, const mat_GF2& A, vec_GF2& x, const vec_GF2& b); +void inv(ref_GF2 d, mat_GF2& X, const mat_GF2& A); + +inline void sqr(mat_GF2& X, const mat_GF2& A) + { mul(X, A, A); } + +inline mat_GF2 sqr(const mat_GF2& A) + { mat_GF2 X; sqr(X, A); NTL_OPT_RETURN(mat_GF2, X); } + +void inv(mat_GF2& X, const mat_GF2& A); + +inline mat_GF2 inv(const mat_GF2& A) + { mat_GF2 X; inv(X, A); NTL_OPT_RETURN(mat_GF2, X); } + +void power(mat_GF2& X, const mat_GF2& A, const ZZ& e); +inline mat_GF2 power(const mat_GF2& A, const ZZ& e) + { mat_GF2 X; power(X, A, e); NTL_OPT_RETURN(mat_GF2, X); } + +inline void power(mat_GF2& X, const mat_GF2& A, long e) + { power(X, A, ZZ_expo(e)); } +inline mat_GF2 power(const mat_GF2& A, long e) + { mat_GF2 X; power(X, A, e); NTL_OPT_RETURN(mat_GF2, X); } + + +void diag(mat_GF2& X, long n, GF2 d); +inline mat_GF2 diag(long n, GF2 d) + { mat_GF2 X; diag(X, n, d); NTL_OPT_RETURN(mat_GF2, X); } + +long IsDiag(const mat_GF2& A, long n, GF2 d); + + +long gauss(mat_GF2& M); +long gauss(mat_GF2& M, long w); +void image(mat_GF2& X, const mat_GF2& A); +void kernel(mat_GF2& X, const mat_GF2& A); + + + + +void determinant(ref_GF2 x, const mat_GF2& a); +inline GF2 determinant(const mat_GF2& a) + { GF2 x; determinant(x, a); return x; } + +inline mat_GF2 transpose(const mat_GF2 & a) + { mat_GF2 x; transpose(x, a); NTL_OPT_RETURN(mat_GF2, x); } + + +void clear(mat_GF2& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_GF2& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_GF2 operator+(const mat_GF2& a, const mat_GF2& b); +mat_GF2 operator-(const mat_GF2& a, const mat_GF2& b); +mat_GF2 operator*(const mat_GF2& a, const mat_GF2& b); + +inline mat_GF2 operator-(const mat_GF2& a) + { return a; } + + +// matrix/scalar multiplication: + +inline mat_GF2 operator*(const mat_GF2& a, GF2 b) + { mat_GF2 x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2, x); } + +inline mat_GF2 operator*(const mat_GF2& a, long b) + { mat_GF2 x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2, x); } + +inline mat_GF2 operator*(GF2 a, const mat_GF2& b) + { mat_GF2 x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2, x); } + +inline mat_GF2 operator*(long a, const mat_GF2& b) + { mat_GF2 x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2, x); } + + + + +// matrix/vector multiplication: + +vec_GF2 operator*(const mat_GF2& a, const vec_GF2& b); + +vec_GF2 operator*(const vec_GF2& a, const mat_GF2& b); + + +// assignment operator notation: + +inline mat_GF2& operator+=(mat_GF2& x, const mat_GF2& a) +{ + add(x, x, a); + return x; +} + +inline mat_GF2& operator-=(mat_GF2& x, const mat_GF2& a) +{ + sub(x, x, a); + return x; +} + + +inline mat_GF2& operator*=(mat_GF2& x, const mat_GF2& a) +{ + mul(x, x, a); + return x; +} + +inline mat_GF2& operator*=(mat_GF2& x, GF2 a) +{ + mul(x, x, a); + return x; +} + +inline mat_GF2& operator*=(mat_GF2& x, long a) +{ + mul(x, x, a); + return x; +} + + +inline vec_GF2& operator*=(vec_GF2& x, const mat_GF2& a) +{ + mul(x, x, a); + return x; +} + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_GF2E.h b/thirdparty/linux/ntl/include/NTL/mat_GF2E.h new file mode 100644 index 0000000000..b8c81f7aca --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_GF2E.h @@ -0,0 +1,187 @@ + +#ifndef NTL_mat_GF2E__H +#define NTL_mat_GF2E__H + +#include +#include + +NTL_OPEN_NNS + +typedef Mat mat_GF2E; + +void add(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B); +inline void sub(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B) + { add(X, A, B); } +inline void negate(mat_GF2E& X, const mat_GF2E& A) + { X = A; } +void mul(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B); +void mul(vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b); +void mul(vec_GF2E& x, const vec_GF2E& a, const mat_GF2E& B); + +void mul(mat_GF2E& X, const mat_GF2E& A, const GF2E& b); +inline void mul(mat_GF2E& X, const GF2E& a, const mat_GF2E& B) + { mul(X, B, a); } + +void mul(mat_GF2E& X, const mat_GF2E& A, GF2 b); +inline void mul(mat_GF2E& X, GF2 a, const mat_GF2E& B) + { mul(X, B, a); } + +inline void mul(mat_GF2E& X, const mat_GF2E& A, long b) + { mul(X, A, to_GF2(b)); } +inline void mul(mat_GF2E& X, long a, const mat_GF2E& B) + { mul(X, B, a); } + +void ident(mat_GF2E& X, long n); +inline mat_GF2E ident_mat_GF2E(long n) + { mat_GF2E X; ident(X, n); NTL_OPT_RETURN(mat_GF2E, X); } + +void determinant(GF2E& d, const mat_GF2E& A); +long IsIdent(const mat_GF2E& A, long n); +void transpose(mat_GF2E& X, const mat_GF2E& A); +void solve(GF2E& d, vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b); +void solve(GF2E& d, const mat_GF2E& A, vec_GF2E& x, const vec_GF2E& b); +void inv(GF2E& d, mat_GF2E& X, const mat_GF2E& A); + +inline void sqr(mat_GF2E& X, const mat_GF2E& A) + { mul(X, A, A); } + +inline mat_GF2E sqr(const mat_GF2E& A) + { mat_GF2E X; sqr(X, A); NTL_OPT_RETURN(mat_GF2E, X); } + +void inv(mat_GF2E& X, const mat_GF2E& A); + +inline mat_GF2E inv(const mat_GF2E& A) + { mat_GF2E X; inv(X, A); NTL_OPT_RETURN(mat_GF2E, X); } + +void power(mat_GF2E& X, const mat_GF2E& A, const ZZ& e); +inline mat_GF2E power(const mat_GF2E& A, const ZZ& e) + { mat_GF2E X; power(X, A, e); NTL_OPT_RETURN(mat_GF2E, X); } + +inline void power(mat_GF2E& X, const mat_GF2E& A, long e) + { power(X, A, ZZ_expo(e)); } +inline mat_GF2E power(const mat_GF2E& A, long e) + { mat_GF2E X; power(X, A, e); NTL_OPT_RETURN(mat_GF2E, X); } + + +void diag(mat_GF2E& X, long n, const GF2E& d); +inline mat_GF2E diag(long n, const GF2E& d) + { mat_GF2E X; diag(X, n, d); NTL_OPT_RETURN(mat_GF2E, X); } + + +long IsDiag(const mat_GF2E& A, long n, const GF2E& d); + + +long gauss(mat_GF2E& M); +long gauss(mat_GF2E& M, long w); +void image(mat_GF2E& X, const mat_GF2E& A); +void kernel(mat_GF2E& X, const mat_GF2E& A); + + + +// miscellaneous: + +inline GF2E determinant(const mat_GF2E& a) + { GF2E x; determinant(x, a); return x; } +// functional variant of determinant + +inline mat_GF2E transpose(const mat_GF2E& a) + { mat_GF2E x; transpose(x, a); NTL_OPT_RETURN(mat_GF2E, x); } + +void clear(mat_GF2E& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_GF2E& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_GF2E operator+(const mat_GF2E& a, const mat_GF2E& b); +mat_GF2E operator-(const mat_GF2E& a, const mat_GF2E& b); +mat_GF2E operator*(const mat_GF2E& a, const mat_GF2E& b); + +mat_GF2E operator-(const mat_GF2E& a); + + +// matrix/scalar multiplication: + +inline mat_GF2E operator*(const mat_GF2E& a, const GF2E& b) + { mat_GF2E x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2E, x); } + +inline mat_GF2E operator*(const mat_GF2E& a, GF2 b) + { mat_GF2E x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2E, x); } + +inline mat_GF2E operator*(const mat_GF2E& a, long b) + { mat_GF2E x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2E, x); } + +inline mat_GF2E operator*(const GF2E& a, const mat_GF2E& b) + { mat_GF2E x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2E, x); } + +inline mat_GF2E operator*(GF2 a, const mat_GF2E& b) + { mat_GF2E x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2E, x); } + +inline mat_GF2E operator*(long a, const mat_GF2E& b) + { mat_GF2E x; mul(x, a, b); NTL_OPT_RETURN(mat_GF2E, x); } + + +// matrix/vector multiplication: + +vec_GF2E operator*(const mat_GF2E& a, const vec_GF2E& b); + +vec_GF2E operator*(const vec_GF2E& a, const mat_GF2E& b); + + + + +// assignment operator notation: + +inline mat_GF2E& operator+=(mat_GF2E& x, const mat_GF2E& a) +{ + add(x, x, a); + return x; +} + +inline mat_GF2E& operator-=(mat_GF2E& x, const mat_GF2E& a) +{ + sub(x, x, a); + return x; +} + + +inline mat_GF2E& operator*=(mat_GF2E& x, const mat_GF2E& a) +{ + mul(x, x, a); + return x; +} + +inline mat_GF2E& operator*=(mat_GF2E& x, const GF2E& a) +{ + mul(x, x, a); + return x; +} + +inline mat_GF2E& operator*=(mat_GF2E& x, GF2 a) +{ + mul(x, x, a); + return x; +} + +inline mat_GF2E& operator*=(mat_GF2E& x, long a) +{ + mul(x, x, a); + return x; +} + + +inline vec_GF2E& operator*=(vec_GF2E& x, const mat_GF2E& a) +{ + mul(x, x, a); + return x; +} + + +NTL_CLOSE_NNS + + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_RR.h b/thirdparty/linux/ntl/include/NTL/mat_RR.h new file mode 100644 index 0000000000..f7065c03aa --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_RR.h @@ -0,0 +1,162 @@ + +#ifndef NTL_mat_RR__H +#define NTL_mat_RR__H + +#include +#include + +NTL_OPEN_NNS + +typedef Mat mat_RR; + +void add(mat_RR& X, const mat_RR& A, const mat_RR& B); +void sub(mat_RR& X, const mat_RR& A, const mat_RR& B); +void negate(mat_RR& X, const mat_RR& A); +void mul(mat_RR& X, const mat_RR& A, const mat_RR& B); +void mul(vec_RR& x, const mat_RR& A, const vec_RR& b); +void mul(vec_RR& x, const vec_RR& a, const mat_RR& B); + +void mul(mat_RR& X, const mat_RR& A, const RR& b); +void mul(mat_RR& X, const mat_RR& A, double b); + +inline void mul(mat_RR& X, const RR& a, const mat_RR& B) + { mul(X, B, a); } + +inline void mul(mat_RR& X, double a, const mat_RR& B) + { mul(X, B, a); } + +void ident(mat_RR& X, long n); +inline mat_RR ident_mat_RR(long n) + { mat_RR X; ident(X, n); NTL_OPT_RETURN(mat_RR, X); } + +void determinant(RR& d, const mat_RR& A); +long IsIdent(const mat_RR& A, long n); +void transpose(mat_RR& X, const mat_RR& A); +void solve(RR& d, vec_RR& X, + const mat_RR& A, const vec_RR& b); +void inv(RR& d, mat_RR& X, const mat_RR& A); + +inline void sqr(mat_RR& X, const mat_RR& A) + { mul(X, A, A); } + +inline mat_RR sqr(const mat_RR& A) + { mat_RR X; sqr(X, A); NTL_OPT_RETURN(mat_RR, X); } + +void inv(mat_RR& X, const mat_RR& A); + +inline mat_RR inv(const mat_RR& A) + { mat_RR X; inv(X, A); NTL_OPT_RETURN(mat_RR, X); } + +void power(mat_RR& X, const mat_RR& A, const ZZ& e); +inline mat_RR power(const mat_RR& A, const ZZ& e) + { mat_RR X; power(X, A, e); NTL_OPT_RETURN(mat_RR, X); } + +inline void power(mat_RR& X, const mat_RR& A, long e) + { power(X, A, ZZ_expo(e)); } +inline mat_RR power(const mat_RR& A, long e) + { mat_RR X; power(X, A, e); NTL_OPT_RETURN(mat_RR, X); } + + + +void diag(mat_RR& X, long n, const RR& d); +inline mat_RR diag(long n, const RR& d) + { mat_RR X; diag(X, n, d); NTL_OPT_RETURN(mat_RR, X); } + +long IsDiag(const mat_RR& A, long n, const RR& d); + + +// miscellaneous: + +RR determinant(const mat_RR& a); +// functional variant of determinant + +inline mat_RR transpose(const mat_RR & a) + { mat_RR x; transpose(x, a); NTL_OPT_RETURN(mat_RR, x); } + + +void clear(mat_RR& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_RR& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_RR operator+(const mat_RR& a, const mat_RR& b); +mat_RR operator-(const mat_RR& a, const mat_RR& b); +mat_RR operator*(const mat_RR& a, const mat_RR& b); + +mat_RR operator-(const mat_RR& a); + + +// matrix/vector multiplication: + +vec_RR operator*(const mat_RR& a, const vec_RR& b); + +vec_RR operator*(const vec_RR& a, const mat_RR& b); + + + +// matrix/scalar multiplication: + +inline mat_RR operator*(const mat_RR& a, const RR& b) + { mat_RR x; mul(x, a, b); NTL_OPT_RETURN(mat_RR, x); } + +inline mat_RR operator*(const mat_RR& a, double b) + { mat_RR x; mul(x, a, b); NTL_OPT_RETURN(mat_RR, x); } + +inline mat_RR operator*(const RR& a, const mat_RR& b) + { mat_RR x; mul(x, a, b); NTL_OPT_RETURN(mat_RR, x); } + +inline mat_RR operator*(double a, const mat_RR& b) + { mat_RR x; mul(x, a, b); NTL_OPT_RETURN(mat_RR, x); } + + + + +// assignment operator notation: + +inline mat_RR& operator+=(mat_RR& x, const mat_RR& a) +{ + add(x, x, a); + return x; +} + +inline mat_RR& operator-=(mat_RR& x, const mat_RR& a) +{ + sub(x, x, a); + return x; +} + + +inline mat_RR& operator*=(mat_RR& x, const mat_RR& a) +{ + mul(x, x, a); + return x; +} + +inline mat_RR& operator*=(mat_RR& x, const RR& a) +{ + mul(x, x, a); + return x; +} + +inline mat_RR& operator*=(mat_RR& x, double a) +{ + mul(x, x, a); + return x; +} + + +inline vec_RR& operator*=(vec_RR& x, const mat_RR& a) +{ + mul(x, x, a); + return x; +} + + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_ZZ.h b/thirdparty/linux/ntl/include/NTL/mat_ZZ.h new file mode 100644 index 0000000000..6f750d1aea --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_ZZ.h @@ -0,0 +1,184 @@ + +#ifndef NTL_mat_ZZ__H +#define NTL_mat_ZZ__H + +#include +#include +#include +#include + +NTL_OPEN_NNS + +typedef Mat mat_ZZ; + + +void add(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B); +void sub(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B); +void negate(mat_ZZ& X, const mat_ZZ& A); +void mul(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B); +void mul(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& b); +void mul(vec_ZZ& x, const vec_ZZ& a, const mat_ZZ& B); + +void mul(mat_ZZ& X, const mat_ZZ& A, const ZZ& b); +inline void mul(mat_ZZ& X, const ZZ& a, const mat_ZZ& B) + { mul(X, B, a); } + +void mul(mat_ZZ& X, const mat_ZZ& A, long b); +inline void mul(mat_ZZ& X, long a, const mat_ZZ& B) + { mul(X, B, a); } + +void ident(mat_ZZ& X, long n); +inline mat_ZZ ident_mat_ZZ(long n) + { mat_ZZ X; ident(X, n); NTL_OPT_RETURN(mat_ZZ, X); } + +long IsIdent(const mat_ZZ& A, long n); +void diag(mat_ZZ& X, long n, const ZZ& d); +inline mat_ZZ diag(long n, const ZZ& d) + { mat_ZZ X; diag(X, n, d); NTL_OPT_RETURN(mat_ZZ, X); } + +long IsDiag(const mat_ZZ& A, long n, const ZZ& d); + +void determinant(ZZ& d, const mat_ZZ& A, long deterministic=0); +void solve(ZZ& d, vec_ZZ& x, + const mat_ZZ& A, const vec_ZZ& b, + long deterministic=0); + +void solve1(ZZ& d_out, vec_ZZ& x_out, const mat_ZZ& A, const vec_ZZ& b); + + +inline +void HenselSolve1(ZZ& d_out, vec_ZZ& x_out, const mat_ZZ& A, const vec_ZZ& b) + { solve1(d_out, x_out, A, b); } +// for backward compatability only + + +void inv(ZZ& d, mat_ZZ& X, const mat_ZZ& A, long deterministic=0); + +inline void sqr(mat_ZZ& X, const mat_ZZ& A) + { mul(X, A, A); } + +inline mat_ZZ sqr(const mat_ZZ& A) + { mat_ZZ X; sqr(X, A); NTL_OPT_RETURN(mat_ZZ, X); } + +void inv(mat_ZZ& X, const mat_ZZ& A); + +inline mat_ZZ inv(const mat_ZZ& A) + { mat_ZZ X; inv(X, A); NTL_OPT_RETURN(mat_ZZ, X); } + +void power(mat_ZZ& X, const mat_ZZ& A, const ZZ& e); +inline mat_ZZ power(const mat_ZZ& A, const ZZ& e) + { mat_ZZ X; power(X, A, e); NTL_OPT_RETURN(mat_ZZ, X); } + +inline void power(mat_ZZ& X, const mat_ZZ& A, long e) + { power(X, A, ZZ_expo(e)); } +inline mat_ZZ power(const mat_ZZ& A, long e) + { mat_ZZ X; power(X, A, e); NTL_OPT_RETURN(mat_ZZ, X); } + + + +void transpose(mat_ZZ& X, const mat_ZZ& A); +inline mat_ZZ transpose(const mat_ZZ& A) + { mat_ZZ x; transpose(x, A); NTL_OPT_RETURN(mat_ZZ, x); } + +void conv(mat_zz_p& x, const mat_ZZ& a); +inline mat_zz_p to_mat_zz_p(const mat_ZZ& a) + { mat_zz_p x; conv(x, a); NTL_OPT_RETURN(mat_zz_p, x); } + +void conv(mat_ZZ_p& x, const mat_ZZ& a); +inline mat_ZZ_p to_mat_ZZ_p(const mat_ZZ& a) + { mat_ZZ_p x; conv(x, a); NTL_OPT_RETURN(mat_ZZ_p, x); } + +long CRT(mat_ZZ& g, ZZ& a, const mat_zz_p& G); + + +// miscellaneous: + +inline ZZ determinant(const mat_ZZ& a, long deterministic=0) + { ZZ x; determinant(x, a, deterministic); return x; } + +// functional variant of determinant + +void clear(mat_ZZ& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_ZZ& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_ZZ operator+(const mat_ZZ& a, const mat_ZZ& b); +mat_ZZ operator-(const mat_ZZ& a, const mat_ZZ& b); +mat_ZZ operator*(const mat_ZZ& a, const mat_ZZ& b); + +mat_ZZ operator-(const mat_ZZ& a); + + +// matrix/scalar multiplication: + +inline mat_ZZ operator*(const mat_ZZ& a, const ZZ& b) + { mat_ZZ x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ, x); } + +inline mat_ZZ operator*(const mat_ZZ& a, long b) + { mat_ZZ x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ, x); } + +inline mat_ZZ operator*(const ZZ& a, const mat_ZZ& b) + { mat_ZZ x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ, x); } + +inline mat_ZZ operator*(long a, const mat_ZZ& b) + { mat_ZZ x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ, x); } + + +// matrix/vector multiplication: + +vec_ZZ operator*(const mat_ZZ& a, const vec_ZZ& b); + +vec_ZZ operator*(const vec_ZZ& a, const mat_ZZ& b); + + + +// assignment operator notation: + +inline mat_ZZ& operator+=(mat_ZZ& x, const mat_ZZ& a) +{ + add(x, x, a); + return x; +} + +inline mat_ZZ& operator-=(mat_ZZ& x, const mat_ZZ& a) +{ + sub(x, x, a); + return x; +} + + +inline mat_ZZ& operator*=(mat_ZZ& x, const mat_ZZ& a) +{ + mul(x, x, a); + return x; +} + +inline mat_ZZ& operator*=(mat_ZZ& x, const ZZ& a) +{ + mul(x, x, a); + return x; +} + +inline mat_ZZ& operator*=(mat_ZZ& x, long a) +{ + mul(x, x, a); + return x; +} + +inline vec_ZZ& operator*=(vec_ZZ& x, const mat_ZZ& a) +{ + mul(x, x, a); + return x; +} + +NTL_CLOSE_NNS + + + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_ZZ_p.h b/thirdparty/linux/ntl/include/NTL/mat_ZZ_p.h new file mode 100644 index 0000000000..cf1e2479a5 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_ZZ_p.h @@ -0,0 +1,168 @@ + +#ifndef NTL_mat_ZZ_p__H +#define NTL_mat_ZZ_p__H + +#include +#include +#include + +NTL_OPEN_NNS + +typedef Mat mat_ZZ_p; + +void add(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B); +void sub(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B); +void negate(mat_ZZ_p& X, const mat_ZZ_p& A); +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B); +void mul(vec_ZZ_p& x, const mat_ZZ_p& A, const vec_ZZ_p& b); +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, const mat_ZZ_p& B); + +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, const ZZ_p& b); +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, long b); + +inline void mul(mat_ZZ_p& X, const ZZ_p& a, const mat_ZZ_p& B) + { mul(X, B, a); } + +inline void mul(mat_ZZ_p& X, long a, const mat_ZZ_p& B) + { mul(X, B, a); } + +void ident(mat_ZZ_p& X, long n); +inline mat_ZZ_p ident_mat_ZZ_p(long n) + { mat_ZZ_p X; ident(X, n); NTL_OPT_RETURN(mat_ZZ_p, X); } + + + +void determinant(ZZ_p& d, const mat_ZZ_p& A); +long IsIdent(const mat_ZZ_p& A, long n); +void transpose(mat_ZZ_p& X, const mat_ZZ_p& A); +void solve(ZZ_p& d, vec_ZZ_p& X, const mat_ZZ_p& A, const vec_ZZ_p& b); +void solve(ZZ_p& d, const mat_ZZ_p& A, vec_ZZ_p& x, const vec_ZZ_p& b); +void inv(ZZ_p& d, mat_ZZ_p& X, const mat_ZZ_p& A); + +inline void sqr(mat_ZZ_p& X, const mat_ZZ_p& A) + { mul(X, A, A); } + +inline mat_ZZ_p sqr(const mat_ZZ_p& A) + { mat_ZZ_p X; sqr(X, A); NTL_OPT_RETURN(mat_ZZ_p, X); } + +void inv(mat_ZZ_p& X, const mat_ZZ_p& A); + +inline mat_ZZ_p inv(const mat_ZZ_p& A) + { mat_ZZ_p X; inv(X, A); NTL_OPT_RETURN(mat_ZZ_p, X); } + +void power(mat_ZZ_p& X, const mat_ZZ_p& A, const ZZ& e); +inline mat_ZZ_p power(const mat_ZZ_p& A, const ZZ& e) + { mat_ZZ_p X; power(X, A, e); NTL_OPT_RETURN(mat_ZZ_p, X); } + +inline void power(mat_ZZ_p& X, const mat_ZZ_p& A, long e) + { power(X, A, ZZ_expo(e)); } +inline mat_ZZ_p power(const mat_ZZ_p& A, long e) + { mat_ZZ_p X; power(X, A, e); NTL_OPT_RETURN(mat_ZZ_p, X); } + + +void diag(mat_ZZ_p& X, long n, const ZZ_p& d); +inline mat_ZZ_p diag(long n, const ZZ_p& d) + { mat_ZZ_p X; diag(X, n, d); NTL_OPT_RETURN(mat_ZZ_p, X); } + +long IsDiag(const mat_ZZ_p& A, long n, const ZZ_p& d); + + +long gauss(mat_ZZ_p& M); +long gauss(mat_ZZ_p& M, long w); +void image(mat_ZZ_p& X, const mat_ZZ_p& A); +void kernel(mat_ZZ_p& X, const mat_ZZ_p& A); + + + + +inline ZZ_p determinant(const mat_ZZ_p& a) + { ZZ_p x; determinant(x, a); return x; } +// functional variant of determinant + +inline mat_ZZ_p transpose(const mat_ZZ_p & a) + { mat_ZZ_p x; transpose(x, a); NTL_OPT_RETURN(mat_ZZ_p, x); } + +void clear(mat_ZZ_p& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_ZZ_p& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_ZZ_p operator+(const mat_ZZ_p& a, const mat_ZZ_p& b); +mat_ZZ_p operator-(const mat_ZZ_p& a, const mat_ZZ_p& b); +mat_ZZ_p operator*(const mat_ZZ_p& a, const mat_ZZ_p& b); + +mat_ZZ_p operator-(const mat_ZZ_p& a); + + +// matrix/scalar multiplication: + +inline mat_ZZ_p operator*(const mat_ZZ_p& a, const ZZ_p& b) + { mat_ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_p, x); } + +inline mat_ZZ_p operator*(const mat_ZZ_p& a, long b) + { mat_ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_p, x); } + +inline mat_ZZ_p operator*(const ZZ_p& a, const mat_ZZ_p& b) + { mat_ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_p, x); } + +inline mat_ZZ_p operator*(long a, const mat_ZZ_p& b) + { mat_ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_p, x); } + +// matrix/vector multiplication: + +vec_ZZ_p operator*(const mat_ZZ_p& a, const vec_ZZ_p& b); + +vec_ZZ_p operator*(const vec_ZZ_p& a, const mat_ZZ_p& b); + + + + +// assignment operator notation: + +inline mat_ZZ_p& operator+=(mat_ZZ_p& x, const mat_ZZ_p& a) +{ + add(x, x, a); + return x; +} + +inline mat_ZZ_p& operator-=(mat_ZZ_p& x, const mat_ZZ_p& a) +{ + sub(x, x, a); + return x; +} + + +inline mat_ZZ_p& operator*=(mat_ZZ_p& x, const mat_ZZ_p& a) +{ + mul(x, x, a); + return x; +} + +inline mat_ZZ_p& operator*=(mat_ZZ_p& x, const ZZ_p& a) +{ + mul(x, x, a); + return x; +} + +inline mat_ZZ_p& operator*=(mat_ZZ_p& x, long a) +{ + mul(x, x, a); + return x; +} + + +inline vec_ZZ_p& operator*=(vec_ZZ_p& x, const mat_ZZ_p& a) +{ + mul(x, x, a); + return x; +} + +NTL_CLOSE_NNS + + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_ZZ_pE.h b/thirdparty/linux/ntl/include/NTL/mat_ZZ_pE.h new file mode 100644 index 0000000000..9a0aa7f970 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_ZZ_pE.h @@ -0,0 +1,178 @@ + +#ifndef NTL_mat_ZZ_pE__H +#define NTL_mat_ZZ_pE__H + +#include + +NTL_OPEN_NNS + +typedef Mat mat_ZZ_pE; + +void add(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B); +void sub(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B); +void negate(mat_ZZ_pE& X, const mat_ZZ_pE& A); +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B); +void mul(vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b); +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const mat_ZZ_pE& B); + +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ_pE& b); + +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ_p& b); +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, long b); + +inline void mul(mat_ZZ_pE& X, const ZZ_pE& a, const mat_ZZ_pE& B) + { mul(X, B, a); } + +inline void mul(mat_ZZ_pE& X, const ZZ_p& a, const mat_ZZ_pE& B) + { mul(X, B, a); } + +inline void mul(mat_ZZ_pE& X, long a, const mat_ZZ_pE& B) + { mul(X, B, a); } + + +void ident(mat_ZZ_pE& X, long n); +inline mat_ZZ_pE ident_mat_ZZ_pE(long n) + { mat_ZZ_pE X; ident(X, n); NTL_OPT_RETURN(mat_ZZ_pE, X); } + + +void determinant(ZZ_pE& d, const mat_ZZ_pE& A); +inline ZZ_pE determinant(const mat_ZZ_pE& A) + { ZZ_pE d; determinant(d, A); NTL_OPT_RETURN(ZZ_pE, d); } + +long IsIdent(const mat_ZZ_pE& A, long n); + +void transpose(mat_ZZ_pE& X, const mat_ZZ_pE& A); +inline mat_ZZ_pE transpose(const mat_ZZ_pE& A) + { mat_ZZ_pE X; transpose(X, A); NTL_OPT_RETURN(mat_ZZ_pE, X); } + +void solve(ZZ_pE& d, vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b); +void solve(ZZ_pE& d, const mat_ZZ_pE& A, vec_ZZ_pE& x, const vec_ZZ_pE& b); + +void inv(ZZ_pE& d, mat_ZZ_pE& X, const mat_ZZ_pE& A); + +inline void sqr(mat_ZZ_pE& X, const mat_ZZ_pE& A) + { mul(X, A, A); } + +inline mat_ZZ_pE sqr(const mat_ZZ_pE& A) + { mat_ZZ_pE X; sqr(X, A); NTL_OPT_RETURN(mat_ZZ_pE, X); } + +void inv(mat_ZZ_pE& X, const mat_ZZ_pE& A); + +inline mat_ZZ_pE inv(const mat_ZZ_pE& A) + { mat_ZZ_pE X; inv(X, A); NTL_OPT_RETURN(mat_ZZ_pE, X); } + +void power(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ& e); +inline mat_ZZ_pE power(const mat_ZZ_pE& A, const ZZ& e) + { mat_ZZ_pE X; power(X, A, e); NTL_OPT_RETURN(mat_ZZ_pE, X); } + +inline void power(mat_ZZ_pE& X, const mat_ZZ_pE& A, long e) + { power(X, A, ZZ_expo(e)); } +inline mat_ZZ_pE power(const mat_ZZ_pE& A, long e) + { mat_ZZ_pE X; power(X, A, e); NTL_OPT_RETURN(mat_ZZ_pE, X); } + + +void diag(mat_ZZ_pE& X, long n, const ZZ_pE& d); +inline mat_ZZ_pE diag(long n, const ZZ_pE& d) + { mat_ZZ_pE X; diag(X, n, d); NTL_OPT_RETURN(mat_ZZ_pE, X); } + +long IsDiag(const mat_ZZ_pE& A, long n, const ZZ_pE& d); + + +long gauss(mat_ZZ_pE& M); +long gauss(mat_ZZ_pE& M, long w); +void image(mat_ZZ_pE& X, const mat_ZZ_pE& A); +void kernel(mat_ZZ_pE& X, const mat_ZZ_pE& A); + + + + +void clear(mat_ZZ_pE& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_ZZ_pE& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_ZZ_pE operator+(const mat_ZZ_pE& a, const mat_ZZ_pE& b); +mat_ZZ_pE operator-(const mat_ZZ_pE& a, const mat_ZZ_pE& b); +mat_ZZ_pE operator*(const mat_ZZ_pE& a, const mat_ZZ_pE& b); + +mat_ZZ_pE operator-(const mat_ZZ_pE& a); + + +// matrix/scalar multiplication: + +inline mat_ZZ_pE operator*(const mat_ZZ_pE& a, const ZZ_pE& b) + { mat_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_pE, x); } +inline mat_ZZ_pE operator*(const mat_ZZ_pE& a, const ZZ_p& b) + { mat_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_pE, x); } +inline mat_ZZ_pE operator*(const mat_ZZ_pE& a, long b) + { mat_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_pE, x); } + +inline mat_ZZ_pE operator*(const ZZ_pE& a, const mat_ZZ_pE& b) + { mat_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_pE, x); } +inline mat_ZZ_pE operator*(const ZZ_p& a, const mat_ZZ_pE& b) + { mat_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_pE, x); } +inline mat_ZZ_pE operator*(long a, const mat_ZZ_pE& b) + { mat_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_ZZ_pE, x); } + +// matrix/vector multiplication: + +vec_ZZ_pE operator*(const mat_ZZ_pE& a, const vec_ZZ_pE& b); + +vec_ZZ_pE operator*(const vec_ZZ_pE& a, const mat_ZZ_pE& b); + + + + +// assignment operator notation: + +inline mat_ZZ_pE& operator+=(mat_ZZ_pE& x, const mat_ZZ_pE& a) +{ + add(x, x, a); + return x; +} + +inline mat_ZZ_pE& operator-=(mat_ZZ_pE& x, const mat_ZZ_pE& a) +{ + sub(x, x, a); + return x; +} + + +inline mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const mat_ZZ_pE& a) +{ + mul(x, x, a); + return x; +} + +inline mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const ZZ_pE& a) +{ + mul(x, x, a); + return x; +} + +inline mat_ZZ_pE& operator*=(mat_ZZ_pE& x, const ZZ_p& a) +{ + mul(x, x, a); + return x; +} + +inline mat_ZZ_pE& operator*=(mat_ZZ_pE& x, long a) +{ + mul(x, x, a); + return x; +} + + +inline vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const mat_ZZ_pE& a) +{ + mul(x, x, a); + return x; +} + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_lzz_p.h b/thirdparty/linux/ntl/include/NTL/mat_lzz_p.h new file mode 100644 index 0000000000..e7f9d56174 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_lzz_p.h @@ -0,0 +1,216 @@ + +#ifndef NTL_mat_zz_p__H +#define NTL_mat_zz_p__H + +#include +#include + +NTL_OPEN_NNS + +typedef Mat mat_zz_p; + + +void add(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B); +void sub(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B); +void negate(mat_zz_p& X, const mat_zz_p& A); +void mul(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B); +void mul(vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b); +void mul(vec_zz_p& x, const vec_zz_p& a, const mat_zz_p& B); + +void mul(mat_zz_p& X, const mat_zz_p& A, zz_p b); +void mul(mat_zz_p& X, const mat_zz_p& A, long b); + +inline void mul(mat_zz_p& X, zz_p a, const mat_zz_p& B) + { mul(X, B, a); } + +inline void mul(mat_zz_p& X, long a, const mat_zz_p& B) + { mul(X, B, a); } + + +void ident(mat_zz_p& X, long n); +inline mat_zz_p ident_mat_zz_p(long n) + { mat_zz_p X; ident(X, n); NTL_OPT_RETURN(mat_zz_p, X); } + +long IsIdent(const mat_zz_p& A, long n); +void transpose(mat_zz_p& X, const mat_zz_p& A); + + + +// ************************ + +void relaxed_solve(zz_p& d, vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b, bool relax=true); +void relaxed_solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b, bool relax=true); + +void relaxed_inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax=true); +inline void relaxed_inv(mat_zz_p& X, const mat_zz_p& A, bool relax=true) + { zz_p d; relaxed_inv(d, X, A, relax); if (d == 0) ArithmeticError("inv: non-invertible matrix"); } +inline mat_zz_p relaxed_inv(const mat_zz_p& A, bool relax=true) + { mat_zz_p X; relaxed_inv(X, A, relax); NTL_OPT_RETURN(mat_zz_p, X); } + +void relaxed_determinant(zz_p& d, const mat_zz_p& A, bool relax=true); +inline zz_p relaxed_determinant(const mat_zz_p& a, bool relax=true) + { zz_p x; relaxed_determinant(x, a, relax); return x; } + +void relaxed_power(mat_zz_p& X, const mat_zz_p& A, const ZZ& e, bool relax=true); +inline mat_zz_p relaxed_power(const mat_zz_p& A, const ZZ& e, bool relax=true) + { mat_zz_p X; relaxed_power(X, A, e, relax); NTL_OPT_RETURN(mat_zz_p, X); } +inline void relaxed_power(mat_zz_p& X, const mat_zz_p& A, long e, bool relax=true) + { relaxed_power(X, A, ZZ_expo(e), relax); } +inline mat_zz_p relaxed_power(const mat_zz_p& A, long e, bool relax=true) + { mat_zz_p X; relaxed_power(X, A, e, relax); NTL_OPT_RETURN(mat_zz_p, X); } + +// *********************** + +inline void solve(zz_p& d, vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b) +{ relaxed_solve(d, x, A, b, false); } + +inline void solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b) +{ relaxed_solve(d, A, x, b, false); } + +inline void inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A) +{ relaxed_inv(d, X, A, false); } + +inline void inv(mat_zz_p& X, const mat_zz_p& A) +{ relaxed_inv(X, A, false); } + +inline mat_zz_p inv(const mat_zz_p& A) +{ return relaxed_inv(A, false); } + +inline void determinant(zz_p& d, const mat_zz_p& A) +{ relaxed_determinant(d, A, false); } + +inline zz_p determinant(const mat_zz_p& a) +{ return relaxed_determinant(a, false); } + +inline void power(mat_zz_p& X, const mat_zz_p& A, const ZZ& e) +{ relaxed_power(X, A, e, false); } + +inline mat_zz_p power(const mat_zz_p& A, const ZZ& e) +{ return relaxed_power(A, e, false); } + +inline void power(mat_zz_p& X, const mat_zz_p& A, long e) +{ relaxed_power(X, A, e, false); } + +inline mat_zz_p power(const mat_zz_p& A, long e) +{ return relaxed_power(A, e, false); } + +// ************************ + + +inline void sqr(mat_zz_p& X, const mat_zz_p& A) + { mul(X, A, A); } + +inline mat_zz_p sqr(const mat_zz_p& A) + { mat_zz_p X; sqr(X, A); NTL_OPT_RETURN(mat_zz_p, X); } + + + + + +void diag(mat_zz_p& X, long n, zz_p d); +inline mat_zz_p diag(long n, zz_p d) + { mat_zz_p X; diag(X, n, d); NTL_OPT_RETURN(mat_zz_p, X); } + +long IsDiag(const mat_zz_p& A, long n, zz_p d); + + +long gauss(mat_zz_p& M); +long gauss(mat_zz_p& M, long w); +void image(mat_zz_p& X, const mat_zz_p& A); +void kernel(mat_zz_p& X, const mat_zz_p& A); + + + +// miscellaneous: + + +inline mat_zz_p transpose(const mat_zz_p& a) + { mat_zz_p x; transpose(x, a); NTL_OPT_RETURN(mat_zz_p, x); } + +void clear(mat_zz_p& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_zz_p& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_zz_p operator+(const mat_zz_p& a, const mat_zz_p& b); +mat_zz_p operator-(const mat_zz_p& a, const mat_zz_p& b); +mat_zz_p operator*(const mat_zz_p& a, const mat_zz_p& b); + +mat_zz_p operator-(const mat_zz_p& a); + + +// matrix/scalar multiplication: + +inline mat_zz_p operator*(const mat_zz_p& a, zz_p b) + { mat_zz_p x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_p, x); } + +inline mat_zz_p operator*(const mat_zz_p& a, long b) + { mat_zz_p x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_p, x); } + +inline mat_zz_p operator*(zz_p a, const mat_zz_p& b) + { mat_zz_p x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_p, x); } + +inline mat_zz_p operator*(long a, const mat_zz_p& b) + { mat_zz_p x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_p, x); } + + + +// matrix/vector multiplication: + +vec_zz_p operator*(const mat_zz_p& a, const vec_zz_p& b); + +vec_zz_p operator*(const vec_zz_p& a, const mat_zz_p& b); + + + + +// assignment operator notation: + +inline mat_zz_p& operator+=(mat_zz_p& x, const mat_zz_p& a) +{ + add(x, x, a); + return x; +} + +inline mat_zz_p& operator-=(mat_zz_p& x, const mat_zz_p& a) +{ + sub(x, x, a); + return x; +} + + +inline mat_zz_p& operator*=(mat_zz_p& x, const mat_zz_p& a) +{ + mul(x, x, a); + return x; +} + +inline mat_zz_p& operator*=(mat_zz_p& x, zz_p a) +{ + mul(x, x, a); + return x; +} + +inline mat_zz_p& operator*=(mat_zz_p& x, long a) +{ + mul(x, x, a); + return x; +} + + +inline vec_zz_p& operator*=(vec_zz_p& x, const mat_zz_p& a) +{ + mul(x, x, a); + return x; +} + + + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_lzz_pE.h b/thirdparty/linux/ntl/include/NTL/mat_lzz_pE.h new file mode 100644 index 0000000000..3e50f70f55 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_lzz_pE.h @@ -0,0 +1,178 @@ + +#ifndef NTL_mat_zz_pE__H +#define NTL_mat_zz_pE__H + +#include + +NTL_OPEN_NNS + +typedef Mat mat_zz_pE; + +void add(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B); +void sub(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B); +void negate(mat_zz_pE& X, const mat_zz_pE& A); +void mul(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B); +void mul(vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b); +void mul(vec_zz_pE& x, const vec_zz_pE& a, const mat_zz_pE& B); + +void mul(mat_zz_pE& X, const mat_zz_pE& A, const zz_pE& b); + +void mul(mat_zz_pE& X, const mat_zz_pE& A, const zz_p& b); +void mul(mat_zz_pE& X, const mat_zz_pE& A, long b); + +inline void mul(mat_zz_pE& X, const zz_pE& a, const mat_zz_pE& B) + { mul(X, B, a); } + +inline void mul(mat_zz_pE& X, const zz_p& a, const mat_zz_pE& B) + { mul(X, B, a); } + +inline void mul(mat_zz_pE& X, long a, const mat_zz_pE& B) + { mul(X, B, a); } + + +void ident(mat_zz_pE& X, long n); +inline mat_zz_pE ident_mat_zz_pE(long n) + { mat_zz_pE X; ident(X, n); NTL_OPT_RETURN(mat_zz_pE, X); } + + +void determinant(zz_pE& d, const mat_zz_pE& A); +inline zz_pE determinant(const mat_zz_pE& A) + { zz_pE d; determinant(d, A); NTL_OPT_RETURN(zz_pE, d); } + +long IsIdent(const mat_zz_pE& A, long n); + +void transpose(mat_zz_pE& X, const mat_zz_pE& A); +inline mat_zz_pE transpose(const mat_zz_pE& A) + { mat_zz_pE X; transpose(X, A); NTL_OPT_RETURN(mat_zz_pE, X); } + +void solve(zz_pE& d, vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b); +void solve(zz_pE& d, const mat_zz_pE& A, vec_zz_pE& x, const vec_zz_pE& b); + +void inv(zz_pE& d, mat_zz_pE& X, const mat_zz_pE& A); + +inline void sqr(mat_zz_pE& X, const mat_zz_pE& A) + { mul(X, A, A); } + +inline mat_zz_pE sqr(const mat_zz_pE& A) + { mat_zz_pE X; sqr(X, A); NTL_OPT_RETURN(mat_zz_pE, X); } + +void inv(mat_zz_pE& X, const mat_zz_pE& A); + +inline mat_zz_pE inv(const mat_zz_pE& A) + { mat_zz_pE X; inv(X, A); NTL_OPT_RETURN(mat_zz_pE, X); } + +void power(mat_zz_pE& X, const mat_zz_pE& A, const ZZ& e); +inline mat_zz_pE power(const mat_zz_pE& A, const ZZ& e) + { mat_zz_pE X; power(X, A, e); NTL_OPT_RETURN(mat_zz_pE, X); } + +inline void power(mat_zz_pE& X, const mat_zz_pE& A, long e) + { power(X, A, ZZ_expo(e)); } +inline mat_zz_pE power(const mat_zz_pE& A, long e) + { mat_zz_pE X; power(X, A, e); NTL_OPT_RETURN(mat_zz_pE, X); } + + +void diag(mat_zz_pE& X, long n, const zz_pE& d); +inline mat_zz_pE diag(long n, const zz_pE& d) + { mat_zz_pE X; diag(X, n, d); NTL_OPT_RETURN(mat_zz_pE, X); } + +long IsDiag(const mat_zz_pE& A, long n, const zz_pE& d); + + +long gauss(mat_zz_pE& M); +long gauss(mat_zz_pE& M, long w); +void image(mat_zz_pE& X, const mat_zz_pE& A); +void kernel(mat_zz_pE& X, const mat_zz_pE& A); + + + + +void clear(mat_zz_pE& a); +// x = 0 (dimension unchanged) + +long IsZero(const mat_zz_pE& a); +// test if a is the zero matrix (any dimension) + + +// operator notation: + +mat_zz_pE operator+(const mat_zz_pE& a, const mat_zz_pE& b); +mat_zz_pE operator-(const mat_zz_pE& a, const mat_zz_pE& b); +mat_zz_pE operator*(const mat_zz_pE& a, const mat_zz_pE& b); + +mat_zz_pE operator-(const mat_zz_pE& a); + + +// matrix/scalar multiplication: + +inline mat_zz_pE operator*(const mat_zz_pE& a, const zz_pE& b) + { mat_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_pE, x); } +inline mat_zz_pE operator*(const mat_zz_pE& a, const zz_p& b) + { mat_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_pE, x); } +inline mat_zz_pE operator*(const mat_zz_pE& a, long b) + { mat_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_pE, x); } + +inline mat_zz_pE operator*(const zz_pE& a, const mat_zz_pE& b) + { mat_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_pE, x); } +inline mat_zz_pE operator*(const zz_p& a, const mat_zz_pE& b) + { mat_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_pE, x); } +inline mat_zz_pE operator*(long a, const mat_zz_pE& b) + { mat_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(mat_zz_pE, x); } + +// matrix/vector multiplication: + +vec_zz_pE operator*(const mat_zz_pE& a, const vec_zz_pE& b); + +vec_zz_pE operator*(const vec_zz_pE& a, const mat_zz_pE& b); + + + + +// assignment operator notation: + +inline mat_zz_pE& operator+=(mat_zz_pE& x, const mat_zz_pE& a) +{ + add(x, x, a); + return x; +} + +inline mat_zz_pE& operator-=(mat_zz_pE& x, const mat_zz_pE& a) +{ + sub(x, x, a); + return x; +} + + +inline mat_zz_pE& operator*=(mat_zz_pE& x, const mat_zz_pE& a) +{ + mul(x, x, a); + return x; +} + +inline mat_zz_pE& operator*=(mat_zz_pE& x, const zz_pE& a) +{ + mul(x, x, a); + return x; +} + +inline mat_zz_pE& operator*=(mat_zz_pE& x, const zz_p& a) +{ + mul(x, x, a); + return x; +} + +inline mat_zz_pE& operator*=(mat_zz_pE& x, long a) +{ + mul(x, x, a); + return x; +} + + +inline vec_zz_pE& operator*=(vec_zz_pE& x, const mat_zz_pE& a) +{ + mul(x, x, a); + return x; +} + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_poly_ZZ.h b/thirdparty/linux/ntl/include/NTL/mat_poly_ZZ.h new file mode 100644 index 0000000000..30b00956d4 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_poly_ZZ.h @@ -0,0 +1,14 @@ + +#ifndef NTL_mat_poly_ZZ__H +#define NTL_mat_poly_ZZ__H + +#include +#include + +NTL_OPEN_NNS + +void CharPoly(ZZX& f, const mat_ZZ& M, long deterministic=0); + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_poly_ZZ_p.h b/thirdparty/linux/ntl/include/NTL/mat_poly_ZZ_p.h new file mode 100644 index 0000000000..603844a400 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_poly_ZZ_p.h @@ -0,0 +1,15 @@ + +#ifndef NTL_mat_poly_ZZ_p__H +#define NTL_mat_poly_ZZ_p__H + +#include +#include + +NTL_OPEN_NNS + +void CharPoly(ZZ_pX& f, const mat_ZZ_p& M); + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/mat_poly_lzz_p.h b/thirdparty/linux/ntl/include/NTL/mat_poly_lzz_p.h new file mode 100644 index 0000000000..ea06a359d2 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/mat_poly_lzz_p.h @@ -0,0 +1,15 @@ + +#ifndef NTL_mat_poly_zz_p__H +#define NTL_mat_poly_zz_p__H + +#include +#include + +NTL_OPEN_NNS + +void CharPoly(zz_pX& f, const mat_zz_p& M); + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/matrix.h b/thirdparty/linux/ntl/include/NTL/matrix.h new file mode 100644 index 0000000000..2ada4aa017 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/matrix.h @@ -0,0 +1,254 @@ +#ifndef NTL_matrix__H +#define NTL_matrix__H + +#include +#include + + +// matrix templates + +NTL_OPEN_NNS + + +template +class Mat { +private: + + struct Fixer { + long m; + + explicit Fixer(long _m) : m(_m) { } + void operator()(Vec& v) { v.FixLength(m); } + }; + +public: + + // pseudo-private fields + Vec< Vec > _mat__rep; + long _mat__numcols; + + + + // really public fields + + typedef typename Vec::value_type value_type; + typedef typename Vec::reference reference; + typedef typename Vec::const_reference const_reference; + + + Mat() : _mat__numcols(0) { } + Mat(const Mat& a); + Mat& operator=(const Mat& a); + ~Mat() { } + + Mat(INIT_SIZE_TYPE, long n, long m); + + void kill(); + + void SetDims(long n, long m); + + long NumRows() const { return _mat__rep.length(); } + long NumCols() const { return _mat__numcols; } + + Vec& operator[](long i) { return _mat__rep[i]; } + const Vec& operator[](long i) const { return _mat__rep[i]; } + + Vec& operator()(long i) { return _mat__rep[i-1]; } + const Vec& operator()(long i) const { return _mat__rep[i-1]; } + + reference operator()(long i, long j) { return _mat__rep[i-1][j-1]; } + const_reference operator()(long i, long j) const + { return _mat__rep[i-1][j-1]; } + + const_reference get(long i, long j) const { return _mat__rep[i].get(j); } + void put(long i, long j, const T& a) { _mat__rep[i].put(j, a); } + + template + void put(long i, long j, const U& a) { _mat__rep[i].put(j, a); } + + + long position(const Vec& a) const { return _mat__rep.position(a); } + long position1(const Vec& a) const { return _mat__rep.position1(a); } + Mat(Mat& x, INIT_TRANS_TYPE) : + _mat__rep(x._mat__rep, INIT_TRANS), _mat__numcols(x._mat__numcols) { } + + void swap(Mat& other) + { + _mat__rep.swap(other._mat__rep); + _ntl_swap(_mat__numcols, other._mat__numcols); + } +}; + +template +inline const Vec< Vec >& rep(const Mat& a) + { return a._mat__rep; } + + +template +Mat::Mat(const Mat& src) : + _mat__rep(src._mat__rep), _mat__numcols(src._mat__numcols) +{ + long i, nrows; + + nrows = _mat__rep.length(); + for (i = 0; i < nrows; i++) + _mat__rep[i].FixAtCurrentLength(); +} + +template +Mat& Mat::operator=(const Mat& src) +{ + if (this == &src) return *this; + + if (src.NumCols() == 0) + SetDims(src.NumRows(), src.NumCols()); + else if (NumCols() != src.NumCols()) { + Mat tmp(src); + this->swap(tmp); + } + else { + long i, init, len; + + init = _mat__rep.MaxLength(); + len = src._mat__rep.length(); + + _mat__rep = src._mat__rep; + + for (i = init; i < len; i++) + _mat__rep[i].FixAtCurrentLength(); + } + + return *this; +} + +template +Mat::Mat(INIT_SIZE_TYPE, long n, long m) : _mat__numcols(0) +{ + SetDims(n, m); +} + +template +void Mat::kill() +{ + Mat tmp; + this->swap(tmp); +} + + +// This is designed to provide strong ES +template +void Mat::SetDims(long n, long m) +{ + if (n < 0 || m < 0) + LogicError("SetDims: bad args"); + + long init = _mat__rep.MaxLength(); + + if (init > 0 && m != _mat__numcols) { + Mat tmp; + tmp._mat__rep.SetLengthAndApply(n, Fixer(m)); + tmp._mat__numcols = m; + this->swap(tmp); + } + else { + _mat__rep.SetLengthAndApply(n, Fixer(m)); + _mat__numcols = m; + } + +} + + +template +void MakeMatrix(Mat& x, const Vec< Vec >& a) +{ + long n = a.length(); + + if (n == 0) { + x.SetDims(0, 0); + return; + } + + long m = a[0].length(); + long i; + + for (i = 1; i < n; i++) + if (a[i].length() != m) + LogicError("nonrectangular matrix"); + + x.SetDims(n, m); + for (i = 0; i < n; i++) + x[i] = a[i]; +} + +template +void swap(Mat& X, Mat& Y) +{ + X.swap(Y); +} + +template +long operator==(const Mat& a, const Mat& b) +{ + if (a.NumCols() != b.NumCols()) + return 0; + + if (a.NumRows() != b.NumRows()) + return 0; + + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (a[i] != b[i]) + return 0; + + return 1; +} + + +template +long operator!=(const Mat& a, const Mat& b) +{ + return !(a == b); +} + + +template +NTL_SNS istream& operator>>(NTL_SNS istream& s, Mat& x) +{ + Vec< Vec > buf; + NTL_INPUT_CHECK_RET(s, s >> buf); + MakeMatrix(x, buf); + return s; +} + +template +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const Mat& a) +{ + long n = a.NumRows(); + long i; + s << "["; + for (i = 0; i < n; i++) { + s << a[i]; + s << "\n"; + } + s << "]"; + return s; +} + + +// conversion + +template +void conv(Mat& x, const Mat& a) +{ + x.SetDims(a.NumRows(), a.NumCols()); + conv(x._mat__rep, a._mat__rep); +} + + + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/new.h b/thirdparty/linux/ntl/include/NTL/new.h new file mode 100644 index 0000000000..a344c389c9 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/new.h @@ -0,0 +1,11 @@ + +#ifndef NTL_new__H +#define NTL_new__H + +#include +#include + +#define NTL_NEW_OP new (std::nothrow) + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/pair.h b/thirdparty/linux/ntl/include/NTL/pair.h new file mode 100644 index 0000000000..20f2a6ed37 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/pair.h @@ -0,0 +1,90 @@ + +#ifndef NTL_pair__H +#define NTL_pair__H + +#include + +// pair templates + +NTL_OPEN_NNS + +template +class Pair { +public: + S a; + T b; + + Pair() { } + Pair(const Pair& x) : a(x.a), b(x.b) { } + Pair(const S& x, const T& y) : a(x), b(y) { } + Pair& operator=(const Pair& x) { a = x.a; b = x.b; return *this; } + ~Pair() { } +}; + +template +inline Pair cons(const S& x, const T& y) { return Pair(x, y); } + + + +template +inline long operator==(const Pair& x, const Pair& y) + { return x.a == y.a && x.b == y.b; } + +template +inline long operator!=(const Pair& x, const Pair& y) + { return !(x == y); } + + + +template +NTL_SNS istream& operator>>(NTL_SNS istream& s, Pair& x) +{ + long c; + S a; + T b; + + if (!s) NTL_INPUT_ERROR(s, "bad pair input"); + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + if (c != '[') + NTL_INPUT_ERROR(s, "bad pair input"); + + s.get(); + + if (!(s >> a)) + NTL_INPUT_ERROR(s, "bad pair input"); + if (!(s >> b)) + NTL_INPUT_ERROR(s, "bad pair input"); + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + if (c != ']') + NTL_INPUT_ERROR(s, "bad pair input"); + + s.get(); + + x.a = a; + x.b = b; + return s; +} + +template +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const Pair& x) +{ + return s << "[" << x.a << " " << x.b << "]"; +} + + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/pair_GF2EX_long.h b/thirdparty/linux/ntl/include/NTL/pair_GF2EX_long.h new file mode 100644 index 0000000000..fe2126d61d --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/pair_GF2EX_long.h @@ -0,0 +1,18 @@ + +#ifndef NTL_pair_GF2EX_long__H +#define NTL_pair_GF2EX_long__H + +#include +#include +#include + +NTL_OPEN_NNS + + +typedef Pair pair_GF2EX_long; + +typedef Vec vec_pair_GF2EX_long; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/pair_GF2X_long.h b/thirdparty/linux/ntl/include/NTL/pair_GF2X_long.h new file mode 100644 index 0000000000..7ec0deeb71 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/pair_GF2X_long.h @@ -0,0 +1,16 @@ + +#ifndef NTL_pair_GF2X_long__H +#define NTL_pair_GF2X_long__H + +#include +#include +#include + +NTL_OPEN_NNS + +typedef Pair pair_GF2X_long; +typedef Vec vec_pair_GF2X_long; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/pair_ZZX_long.h b/thirdparty/linux/ntl/include/NTL/pair_ZZX_long.h new file mode 100644 index 0000000000..c6808b1dc0 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/pair_ZZX_long.h @@ -0,0 +1,16 @@ + +#ifndef NTL_pair_ZZX_long__H +#define NTL_pair_ZZX_long__H + +#include +#include +#include + +NTL_OPEN_NNS + +typedef Pair pair_ZZX_long; +typedef Vec vec_pair_ZZX_long; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/pair_ZZ_pEX_long.h b/thirdparty/linux/ntl/include/NTL/pair_ZZ_pEX_long.h new file mode 100644 index 0000000000..bc55f12460 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/pair_ZZ_pEX_long.h @@ -0,0 +1,16 @@ + +#ifndef NTL_pair_ZZ_pEX_long__H +#define NTL_pair_ZZ_pEX_long__H + +#include +#include + +NTL_OPEN_NNS + +typedef Pair pair_ZZ_pEX_long; +typedef Vec vec_pair_ZZ_pEX_long; + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/pair_ZZ_pX_long.h b/thirdparty/linux/ntl/include/NTL/pair_ZZ_pX_long.h new file mode 100644 index 0000000000..1a56efa081 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/pair_ZZ_pX_long.h @@ -0,0 +1,16 @@ + +#ifndef NTL_pair_ZZ_pX_long__H +#define NTL_pair_ZZ_pX_long__H + +#include +#include +#include + +NTL_OPEN_NNS + +typedef Pair pair_ZZ_pX_long; +typedef Vec vec_pair_ZZ_pX_long; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/pair_lzz_pEX_long.h b/thirdparty/linux/ntl/include/NTL/pair_lzz_pEX_long.h new file mode 100644 index 0000000000..ebe2e35326 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/pair_lzz_pEX_long.h @@ -0,0 +1,16 @@ + +#ifndef NTL_pair_zz_pEX_long__H +#define NTL_pair_zz_pEX_long__H + +#include +#include + +NTL_OPEN_NNS + +typedef Pair pair_zz_pEX_long; +typedef Vec vec_pair_zz_pEX_long; + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/pair_lzz_pX_long.h b/thirdparty/linux/ntl/include/NTL/pair_lzz_pX_long.h new file mode 100644 index 0000000000..1dfc686a23 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/pair_lzz_pX_long.h @@ -0,0 +1,16 @@ + +#ifndef NTL_pair_zz_pX_long__H +#define NTL_pair_zz_pX_long__H + +#include +#include +#include + +NTL_OPEN_NNS + +typedef Pair pair_zz_pX_long; +typedef Vec vec_pair_zz_pX_long; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/quad_float.h b/thirdparty/linux/ntl/include/NTL/quad_float.h new file mode 100644 index 0000000000..9c28e5b6d3 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/quad_float.h @@ -0,0 +1,325 @@ + +#ifndef NTL_quad_float__H +#define NTL_quad_float__H + + +/* +Copyright (C) 1997, 1998, 1999, 2000 Victor Shoup + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +***************************************************** + +The quad_float package is derived from the doubledouble package of +Keith Briggs. However, the version employed in NTL has been extensively +modified. Below, I attach the copyright notice from the original +doubledouble package, which is currently available at + + http://www.labs.bt.com/people/briggsk2 + +***************************************************** + +Copyright (C) 1997 Keith Martin Briggs + +Except where otherwise indicated, +this program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +#include + +NTL_OPEN_NNS + + +class quad_float { +public: + double hi, lo; + + // Constructors + quad_float() : hi(0), lo(0) {} + + explicit quad_float(double a) : hi(0), lo(0) { *this = a; } + + + inline quad_float& operator=(double x); + + static + NTL_CHEAP_THREAD_LOCAL + long oprec; + + static void SetOutputPrecision(long p); + static long OutputPrecision() { return oprec; } + + quad_float(double x, double y) : hi(x), lo(y) { } // internal use only + // FIXME: add a special argument to this to make it more "internal" + + ~quad_float() {} + +}; // end class quad_float + + + + +#if (NTL_BITS_PER_LONG < NTL_DOUBLE_PRECISION) + +// FIXME: we could make this <=, and even BPL <= DP+1 for +// conversions from signed long...but this is mainly academic + +inline quad_float to_quad_float(long n) { return quad_float(n, 0); } +inline quad_float to_quad_float(unsigned long n) { return quad_float(n, 0); } + +#else + + +quad_float to_quad_float(long n); +quad_float to_quad_float(unsigned long n); + +#endif + + + +#if (NTL_BITS_PER_INT < NTL_DOUBLE_PRECISION) + +inline quad_float to_quad_float(int n) { return quad_float(n, 0); } +inline quad_float to_quad_float(unsigned int n) { return quad_float(n, 0); } + +#else + +inline quad_float to_quad_float(int n) + { return to_quad_float(long(n)); } +inline quad_float to_quad_float(unsigned int n) + { return to_quad_float((unsigned long) n); } + +#endif + + + + +inline quad_float to_quad_float(double x) { return quad_float(TrueDouble(x), 0); } + +inline quad_float to_quad_float(float x) + { return to_quad_float(double(x)); } + +inline quad_float& quad_float::operator=(double x) + { *this = to_quad_float(x); return *this; } + +quad_float operator+(const quad_float&, const quad_float& ); + +inline quad_float operator+(const quad_float& x, double y ) + { return x + to_quad_float(y); } + +inline quad_float operator+(double x, const quad_float& y) + { return to_quad_float(x) + y; } + +quad_float operator-(const quad_float&, const quad_float& ); + +inline quad_float operator-(const quad_float& x, double y ) + { return x - to_quad_float(y); } + +inline quad_float operator-(double x, const quad_float& y) + { return to_quad_float(x) - y; } + +quad_float operator*(const quad_float&, const quad_float& ); + +inline quad_float operator*(const quad_float& x, double y ) + { return x * to_quad_float(y); } + +inline quad_float operator*(double x, const quad_float& y) + { return to_quad_float(x) * y; } + +quad_float operator/(const quad_float&, const quad_float& ); + +inline quad_float operator/(const quad_float& x, double y ) + { return x / to_quad_float(y); } + +inline quad_float operator/(double x, const quad_float& y) + { return to_quad_float(x) / y; } + +quad_float operator-(const quad_float& x); + +quad_float& operator+= (quad_float& x, const quad_float& y); +inline quad_float& operator += (quad_float& x, double y) + { x += to_quad_float(y); return x; } + +quad_float& operator-= (quad_float& x, const quad_float& y); +inline quad_float& operator-= (quad_float& x, double y) + { x -= to_quad_float(y); return x; } + +quad_float& operator*= (quad_float& x, const quad_float& y); +inline quad_float& operator*= (quad_float& x, double y) + { x *= to_quad_float(y); return x; } + +quad_float& operator/= (quad_float& x, const quad_float& y); +inline quad_float& operator/= (quad_float& x, double y) + { x /= to_quad_float(y); return x; } + +inline quad_float& operator++(quad_float& a) { a += 1.0; return a; } +inline void operator++(quad_float& a, int) { a += 1.0; } + +inline quad_float& operator--(quad_float& a) { a -= 1.0; return a; } +inline void operator--(quad_float& a, int) { a -= 1.0; } + + +long operator> (const quad_float& x, const quad_float& y); +long operator>=(const quad_float& x, const quad_float& y); +long operator< (const quad_float& x, const quad_float& y); +long operator<=(const quad_float& x, const quad_float& y); +long operator==(const quad_float& x, const quad_float& y); +long operator!=(const quad_float& x, const quad_float& y); + +inline long operator> (const quad_float& x, double y) + { return x > to_quad_float(y); } +inline long operator> (double x, const quad_float& y) + { return to_quad_float(x) > y; } + +inline long operator>=(const quad_float& x, double y) + { return x >= to_quad_float(y); } +inline long operator>=(double x, const quad_float& y) + { return to_quad_float(x) >= y; } + +inline long operator< (const quad_float& x, double y) + { return x < to_quad_float(y); } +inline long operator< (double x, const quad_float& y) + { return to_quad_float(x) < y; } + +inline long operator<=(const quad_float& x, double y) + { return x <= to_quad_float(y); } +inline long operator<=(double x, const quad_float& y) + { return to_quad_float(x) <= y; } + +inline long operator!=(const quad_float& x, double y) + { return x != to_quad_float(y); } +inline long operator!=(double x, const quad_float& y) + { return to_quad_float(x) != y; } + +inline long operator==(const quad_float& x, double y) + { return x == to_quad_float(y); } +inline long operator==(double x, const quad_float& y) + { return to_quad_float(x) == y; } + + +inline long sign(const quad_float& x){ + if (x.hi>0.0) return 1; else if (x.hi<0.0) return -1; else return 0; +} + +long compare(const quad_float&, const quad_float&); + +inline long compare(const quad_float& x, double y) + { return compare(x, to_quad_float(y)); } + +inline long compare(double x, const quad_float& y) + { return compare(to_quad_float(x), y); } + + + +NTL_SNS istream& operator >> (NTL_SNS istream&, quad_float&); +NTL_SNS ostream& operator << (NTL_SNS ostream&, const quad_float&); + + +quad_float sqrt(const quad_float&); +quad_float floor(const quad_float&); +quad_float ceil(const quad_float&); +quad_float trunc(const quad_float&); +quad_float fabs(const quad_float&); + +void power(quad_float&, const quad_float&, long); +inline quad_float power(const quad_float& x, long e) + { quad_float z; power(z, x, e); return z; } + +void power2(quad_float&, long); +inline quad_float power2_quad_float(long e) + { quad_float z; power2(z, e); return z; } + + +long to_long(const quad_float&); +inline int to_int(const quad_float& x) { return to_int(to_long(x)); } + +inline double to_double(const quad_float& x) { return x.hi; } + +inline float to_float(const quad_float& x) { return float(x.hi); } + + +inline void conv(quad_float& x, int a) { x = to_quad_float(a); } +inline void conv(quad_float& x, long a) { x = to_quad_float(a); } + +inline void conv(quad_float& x, unsigned int a) { x = to_quad_float(a); } +inline void conv(quad_float& x, unsigned long a) { x = to_quad_float(a); } + +inline void conv(quad_float& x, float a) { x = to_quad_float(a); } +inline void conv(quad_float& x, double a) { x = to_quad_float(a); } + + +inline void conv(long& x, const quad_float& a) { x = to_long(a); } +inline void conv(int& x, const quad_float& a) { x = to_int(a); } +inline void conv(double& x, const quad_float& a) { x = to_double(a); } +inline void conv(float& x, const quad_float& a) { x = to_float(a); } + +void conv(quad_float&, const ZZ&); +inline quad_float to_quad_float(const ZZ& x) + { quad_float z; conv(z, x); return z; } + +void conv(ZZ&, const quad_float&); +inline ZZ to_ZZ(const quad_float& a) + { ZZ x; conv(x, a); NTL_OPT_RETURN(ZZ, x); } + +inline void conv(quad_float& x, const quad_float& a) + { x = a; } +inline quad_float to_quad_float(const quad_float& a) + { return a; } + +quad_float to_quad_float(const char *s); +inline void conv(quad_float& x, const char *s) + { x = to_quad_float(s); } + + + +/* additional legacy conversions for v6 conversion regime */ + +inline void conv(unsigned int& x, const quad_float& a) + { long z; conv(z, a); conv(x, z); } + +inline void conv(unsigned long& x, const quad_float& a) + { long z; conv(z, a); conv(x, z); } + + +/* ------------------------------------- */ + +long IsFinite(quad_float *x); + +long PrecisionOK(); + +quad_float ldexp(const quad_float& x, long exp); + +quad_float exp(const quad_float& x); +quad_float log(const quad_float& x); + +void random(quad_float& x); +quad_float random_quad_float(); + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/sp_arith.h b/thirdparty/linux/ntl/include/NTL/sp_arith.h new file mode 100644 index 0000000000..d3bd0bbb0d --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/sp_arith.h @@ -0,0 +1,1193 @@ + + +#ifndef NTL_sp_arith__H +#define NTL_sp_arith__H + + +/**************************************************************** + + Single-precision modular arithmetic + +*****************************************************************/ + + +/* +these routines implement single-precision modular arithmetic. +If n is the modulus, all inputs should be in the range 0..n-1. +The number n itself should be in the range 1..2^{NTL_SP_NBITS}-1. +*/ + +// I've declared these "static" so that the installation wizard +// has more flexibility, without worrying about the (rather esoteric) +// possibility of the linker complaining when the definitions +// are inconsistent across several files. + +// DIRT: undocumented feature: in all of these MulMod routines, +// the first argument, a, need only be in the range +// 0..2^{NTL_SP_NBITS}-1. This is assumption is used internally +// in some NT routines...I've tried to mark all such uses with a +// DIRT comment. I may decide to make this feature part +// of the documented interface at some point in the future. + +// NOTE: this header file is for internal use only, via the ZZ.h header. +// It is also used in the LIP implementation files c/g_lip_impl.h. + + +#include +#include + + +NTL_OPEN_NNS + + +#define NTL_HAVE_MULMOD_T + + + +#if 0 +// the following code can be used to use new-style clients with old versions +// of NTL + + +#ifndef NTL_HAVE_MULMOD_T + +NTL_OPEN_NNS + +typedef double mulmod_t; +typedef double muldivrem_t; + + +static inline double PrepMulMod(long n) +{ + return double(1L)/double(n); +} + +static inline double PrepMulDivRem(long b, long n, double ninv) +{ + return double(b)*ninv; +} + +static inline double PrepMulDivRem(long b, long n) +{ + return double(b)/double(n); +} + + +static inline double PrepMulModPrecon(long b, long n) +{ + return PrepMulModPrecon(b, n, PrepMulMod(n)); +} + +NTL_CLOSE_NNS + + + +#endif + + + +#endif + + + + + + + +/********************************************************* + + +HELPER ROUTINES: + +long sp_SignMask(long a) +long sp_SignMask(unsigned long a) +// if (long(a) < 0) then -1 else 0 + +bool sp_Negative(unsigned long a) +// long(a) < 0 + +long sp_CorrectDeficit(long a, long n) +long sp_CorrectDeficit(unsigned long a, long n): +// if (long(a) >= 0) then a else a+n + +// it is assumed that n in (0..B), where B = 2^(NTL_BITS_PER_LONG-1), +// and that long(a) >= -n + +long sp_CorrectExcess(long a, long n) +long sp_CorrectDeficit(unsigned long a, long n): +// if (a < n) then a else a-n + +// For the signed version, it is assumed that a >= 0. +// In either version, it is assumed that +// n in (0..B) and a-n in (-B..B). + + +These are designed to respect the flags NTL_CLEAN_INT, +NTL_ARITH_RIGHT_SHIFT, and NTL_AVOID_BRANCHING. + + +*********************************************************/ + + +#if (NTL_ARITH_RIGHT_SHIFT && !defined(NTL_CLEAN_INT)) +// DIRT: IMPL-DEF: arithmetic right shift and cast unsigned to signed + +static inline +long sp_SignMask(long a) +{ + return a >> (NTL_BITS_PER_LONG-1); +} + +static inline +long sp_SignMask(unsigned long a) +{ + return cast_signed(a) >> (NTL_BITS_PER_LONG-1); +} +#else +static inline +long sp_SignMask(long a) +{ + return -long(cast_unsigned(a) >> (NTL_BITS_PER_LONG-1)); +} + +static inline +long sp_SignMask(unsigned long a) +{ + return -long(a >> (NTL_BITS_PER_LONG-1)); +} +#endif + +static inline +bool sp_Negative(unsigned long a) +{ + return clean_cast_signed(a) < 0; +} + + + +#if (!defined(NTL_AVOID_BRANCHING)) + +// The C++ code is written using branching, but +// on machines with large branch penalties, this code +// should yield "predicated instructions" (i.e., on x86, +// conditional moves). The "branching" version of sp_CorrectExcess +// in written in a particular way to get optimal machine code: +// subtract, cmove (tested on clang, gcc, icc). + +static inline +long sp_CorrectDeficit(long a, long n) +{ + return a >= 0 ? a : a+n; +} + +template static inline +long sp_CorrectDeficitQuo(T& q, long a, long n, long amt=1) +{ + return a >= 0 ? a : (q -= amt, a+n); +} + + + +static inline +long sp_CorrectDeficit(unsigned long a, long n) +{ + return !sp_Negative(a) ? a : a+n; +} + +template static inline +long sp_CorrectDeficitQuo(T& q, unsigned long a, long n, long amt=1) +{ + return !sp_Negative(a) ? a : (q -= amt, a+n); +} + + + +static inline +long sp_CorrectExcess(long a, long n) +{ + return a-n >= 0 ? a-n : a; +} + +template static inline +long sp_CorrectExcessQuo(T& q, long a, long n, long amt=1) +{ + return a-n >= 0 ? (q += amt, a-n) : a; +} + + + +static inline +long sp_CorrectExcess(unsigned long a, long n) +{ + return !sp_Negative(a-n) ? a-n : a; +} + +template static inline +long sp_CorrectExcessQuo(T& q, unsigned long a, long n, long amt=1) +{ + return !sp_Negative(a-n) ? (q += amt, a-n) : a; +} + +#else + + +// This C++ code uses traditional masking and adding +// to avoid branching. + +static inline +long sp_CorrectDeficit(long a, long n) +{ + return a + (sp_SignMask(a) & n); +} + +template static inline +long sp_CorrectDeficitQuo(T& q, long a, long n, long amt=1) +{ + q += sp_SignMask(a)*amt; + return a + (sp_SignMask(a) & n); +} + + + + + +static inline +long sp_CorrectDeficit(unsigned long a, long n) +{ + return a + (sp_SignMask(a) & n); +} + +template static inline +long sp_CorrectDeficitQuo(T& q, unsigned long a, long n, long amt=1) +{ + q += sp_SignMask(a)*amt; + return a + (sp_SignMask(a) & n); +} + + + + +static inline +long sp_CorrectExcess(long a, long n) +{ + return (a-n) + (sp_SignMask(a-n) & n); +} + +template static inline +long sp_CorrectExcessQuo(T& q, long a, long n, long amt=1) +{ + q += (1L + sp_SignMask(a-n))*amt; + return (a-n) + (sp_SignMask(a-n) & n); +} + + + + +static inline +long sp_CorrectExcess(unsigned long a, long n) +{ + return (a-n) + (sp_SignMask(a-n) & n); +} + +template static inline +long sp_CorrectExcessQuo(T& q, unsigned long a, long n, long amt=1) +{ + q += (1L + sp_SignMask(a-n))*amt; + return (a-n) + (sp_SignMask(a-n) & n); +} + +#endif + + +// ********************************************************************** + + + + + +#ifdef NTL_HAVE_BUILTIN_CLZL + +static inline long +sp_CountLeadingZeros(unsigned long x) +{ + return __builtin_clzl(x); +} + +#else + +static inline long +sp_CountLeadingZeros(unsigned long x) +{ + long res = NTL_BITS_PER_LONG-NTL_SP_NBITS; + x = x << NTL_BITS_PER_LONG-NTL_SP_NBITS; + while (x < (1UL << (NTL_BITS_PER_LONG-1))) { + x <<= 1; + res++; + } + + return res; +} + + +#endif + + + + +static inline +long AddMod(long a, long b, long n) +{ + long r = a+b; + return sp_CorrectExcess(r, n); +} + +static inline +long SubMod(long a, long b, long n) +{ + long r = a-b; + return sp_CorrectDeficit(r, n); +} + +static inline +long NegateMod(long a, long n) +{ + return SubMod(0, a, n); +} + +// definition of MulhiUL, using either assembly or ULL type +#if (defined(NTL_SPMM_ASM)) +#define NTL_HAVE_MULHI + +// assmbly code versions +#include + + +#elif (defined(NTL_SPMM_ULL) || defined(NTL_HAVE_LL_TYPE)) +#define NTL_HAVE_MULHI + +static inline unsigned long +MulHiUL(unsigned long a, unsigned long b) +{ + return (((NTL_ULL_TYPE)(a)) * ((NTL_ULL_TYPE)(b))) >> NTL_BITS_PER_LONG; +} +#endif + + + + + + +#if (!defined(NTL_LONGLONG_SP_MULMOD)) + + + +#ifdef NTL_LEGACY_SP_MULMOD + +#define NTL_WIDE_DOUBLE_PRECISION NTL_DOUBLE_PRECISION +#define NTL_WIDE_FDOUBLE_PRECISION NTL_WIDE_DOUBLE_DP +typedef double wide_double; + + +#else + + +#ifdef NTL_LONGDOUBLE_SP_MULMOD + + +#define NTL_WIDE_DOUBLE_PRECISION NTL_LONGDOUBLE_PRECISION +#define NTL_WIDE_FDOUBLE_PRECISION NTL_WIDE_DOUBLE_LDP +typedef long double wide_double_impl_t; + +#else + +#define NTL_WIDE_DOUBLE_PRECISION NTL_DOUBLE_PRECISION +#define NTL_WIDE_FDOUBLE_PRECISION NTL_WIDE_DOUBLE_DP +typedef double wide_double_impl_t; + +#endif + + + + +class wide_double { +public: + wide_double_impl_t data; + + wide_double() { } + + wide_double(const wide_double& x) : data(x.data) { } + + template + explicit wide_double(const T& x) : data(x) { } + + operator wide_double_impl_t() const { return data; } + +}; + +inline wide_double operator+(wide_double x, wide_double y) +{ + return wide_double(x.data + y.data); +} + +inline wide_double operator-(wide_double x, wide_double y) +{ + return wide_double(x.data - y.data); +} + + + +inline wide_double operator*(wide_double x, wide_double y) +{ + return wide_double(x.data * y.data); +} + +inline wide_double operator/(wide_double x, wide_double y) +{ + return wide_double(x.data / y.data); +} + +inline wide_double floor(wide_double x) +{ + return wide_double(std::floor(x.data)); +} + +inline wide_double& operator+=(wide_double& x, wide_double y) +{ + return x = x + y; +} + +inline wide_double& operator-=(wide_double& x, wide_double y) +{ + return x = x - y; +} + +inline wide_double& operator*=(wide_double& x, wide_double y) +{ + return x = x * y; +} + + +inline wide_double& operator/=(wide_double& x, wide_double y) +{ + return x = x / y; +} + +#endif + + + +// old-style MulMod code using floating point arithmetic + +typedef wide_double mulmod_t; +typedef wide_double muldivrem_t; + +static inline wide_double PrepMulMod(long n) +{ + return wide_double(1L)/wide_double(n); +} + +static inline wide_double PrepMulDivRem(long b, long n, wide_double ninv) +{ + return wide_double(b)*ninv; +} + + +static inline +long MulMod(long a, long b, long n, wide_double ninv) +{ + long q = (long) ((((wide_double) a) * ((wide_double) b)) * ninv); + unsigned long rr = cast_unsigned(a)*cast_unsigned(b) - + cast_unsigned(q)*cast_unsigned(n); + long r = sp_CorrectDeficit(rr, n); + return sp_CorrectExcess(r, n); +} + +static inline +long NormalizedMulMod(long a, long b, long n, wide_double ninv) +{ + return MulMod(a, b, n, ninv); +} + +static inline bool NormalizedModulus(wide_double ninv) { return true; } + + + +static inline +long MulModWithQuo(long& qres, long a, long b, long n, wide_double ninv) +{ + long q = (long) ((((wide_double) a) * ((wide_double) b)) * ninv); + unsigned long rr = cast_unsigned(a)*cast_unsigned(b) - + cast_unsigned(q)*cast_unsigned(n); + + long r = sp_CorrectDeficitQuo(q, rr, n); + r = sp_CorrectExcessQuo(q, r, n); + qres = q; + return r; +} + + +static inline +long MulMod2_legacy(long a, long b, long n, wide_double bninv) +{ + long q = (long) (((wide_double) a) * bninv); + unsigned long rr = cast_unsigned(a)*cast_unsigned(b) - + cast_unsigned(q)*cast_unsigned(n); + long r = sp_CorrectDeficit(rr, n); + r = sp_CorrectExcess(r, n); + return r; +} + +static inline +long MulDivRem(long& qres, long a, long b, long n, wide_double bninv) +{ + long q = (long) (((wide_double) a) * bninv); + unsigned long rr = cast_unsigned(a)*cast_unsigned(b) - + cast_unsigned(q)*cast_unsigned(n); + + long r = sp_CorrectDeficitQuo(q, rr, n); + r = sp_CorrectExcessQuo(q, r, n); + qres = q; + return r; +} + +#else + +// new-style MulMod code using ULL arithmetic + + + +struct sp_inverse { + unsigned long inv; + long shamt; + + sp_inverse() { } + sp_inverse(unsigned long _inv, long _shamt) : inv(_inv), shamt(_shamt) { } +}; + +typedef sp_inverse mulmod_t; + + + +#if (NTL_BITS_PER_LONG >= NTL_SP_NBITS+4) + +#define NTL_PRE_SHIFT1 (NTL_BITS_PER_LONG-NTL_SP_NBITS-4) +#define NTL_POST_SHIFT (0) + +#define NTL_PRE_SHIFT2 (2*NTL_SP_NBITS+2) + +#else + +#define NTL_PRE_SHIFT1 (0) +#define NTL_POST_SHIFT (1) + +#define NTL_PRE_SHIFT2 (2*NTL_SP_NBITS+1) + +#endif + + + + +#if (NTL_SP_NBITS <= 2*NTL_DOUBLE_PRECISION-10) + + +static inline unsigned long +sp_NormalizedPrepMulMod(long n) +{ + double ninv = 1/double(n); + unsigned long nn = n; + + // initial approximation to quotient + unsigned long qq = long((double(1L << (NTL_SP_NBITS-1)) * double(1L << NTL_SP_NBITS)) * ninv); + + // NOTE: the true quotient is <= 2^{NTL_SP_NBITS} + + // compute approximate remainder using ULL arithmetic + NTL_ULL_TYPE rr = (((NTL_ULL_TYPE)(1)) << (2*NTL_SP_NBITS-1)) - + (((NTL_ULL_TYPE)(nn)) * ((NTL_ULL_TYPE)(qq))); + + + rr = (rr << (NTL_PRE_SHIFT2-2*NTL_SP_NBITS+1)) - 1; + + // now compute a floating point approximation to r, + // but avoiding unsigned -> float conversions, + // as these are not as well supported in hardware as + // signed -> float conversions + + unsigned long rrlo = (unsigned long) rr; + unsigned long rrhi = ((unsigned long) (rr >> NTL_BITS_PER_LONG)) + + (rrlo >> (NTL_BITS_PER_LONG-1)); + + long rlo = clean_cast_signed(rrlo); // these should be No-Ops + long rhi = clean_cast_signed(rrhi); + + const double bpl_as_double (double(1L << NTL_SP_NBITS) * double(1L << (NTL_BITS_PER_LONG-NTL_SP_NBITS))); + double fr = double(rlo) + double(rhi)*bpl_as_double; + + // now convert fr*ninv to a long + // but we have to be careful: fr may be negative. + // the result should still give floor(r/n) pm 1, + // and is computed in a way that avoids branching + + long q1 = long(fr*ninv); + if (q1 < 0) q1--; + // This counteracts the round-to-zero behavior of conversion + // to long. It should be compiled into branch-free code. + + unsigned long qq1 = q1; + + unsigned long rr1 = rrlo - qq1*nn; + + qq1 += 1L + sp_SignMask(rr1) + sp_SignMask(rr1-n); + + unsigned long res = (qq << (NTL_PRE_SHIFT2-2*NTL_SP_NBITS+1)) + qq1; + + res = res << NTL_PRE_SHIFT1; + return res; +} + +#else + +static inline unsigned long +sp_NormalizedPrepMulMod(long n) +{ + return + (((unsigned long) ( ((((NTL_ULL_TYPE) 1) << NTL_PRE_SHIFT2) - 1)/n )) << NTL_PRE_SHIFT1); +} + +#endif + + +static inline sp_inverse +PrepMulMod(long n) +{ + long shamt = sp_CountLeadingZeros(n) - (NTL_BITS_PER_LONG-NTL_SP_NBITS); + unsigned long inv = sp_NormalizedPrepMulMod(n << shamt); + return sp_inverse(inv, shamt); +} + + + + + + + +static inline long +sp_NormalizedMulMod(long a, long b, long n, unsigned long ninv) +{ + ll_type U; + ll_imul(U, a, b); + unsigned long H = ll_rshift_get_lo(U); + unsigned long Q = MulHiUL(H, ninv); + Q = Q >> NTL_POST_SHIFT; + unsigned long L = ll_get_lo(U); + long r = L - Q*cast_unsigned(n); // r in [0..2*n) + + r = sp_CorrectExcess(r, n); + return r; +} + + + +static inline long +MulMod(long a, long b, long n, sp_inverse ninv) +{ + return sp_NormalizedMulMod(a, b << ninv.shamt, n << ninv.shamt, ninv.inv) >> ninv.shamt; +} + +// if you know what you're doing.... +// FIXME: eventually, put this is the documented interface... +// but for now, it's "experimental" +static inline long +NormalizedMulMod(long a, long b, long n, sp_inverse ninv) +{ + return sp_NormalizedMulMod(a, b, n, ninv.inv); +} + +static inline bool +NormalizedModulus(sp_inverse ninv) { return ninv.shamt == 0; } + + + + +static inline long +sp_NormalizedMulModWithQuo(long& qres, long a, long b, long n, unsigned long ninv) +{ + ll_type U; + ll_imul(U, a, b); + unsigned long H = ll_rshift_get_lo(U); + unsigned long Q = MulHiUL(H, ninv); + Q = Q >> NTL_POST_SHIFT; + unsigned long L = ll_get_lo(U); + long r = L - Q*cast_unsigned(n); // r in [0..2*n) + + r = sp_CorrectExcessQuo(Q, r, n); + qres = Q; + return r; +} + +static inline long +MulModWithQuo(long& qres, long a, long b, long n, sp_inverse ninv) +{ + return sp_NormalizedMulModWithQuo(qres, a, b << ninv.shamt, n << ninv.shamt, ninv.inv) >> ninv.shamt; +} + + + + +#endif + + + +#if (defined(NTL_SPMM_ULL) || defined(NTL_SPMM_ASM) || defined(NTL_LONGLONG_SP_MULMOD)) + + +typedef unsigned long mulmod_precon_t; + + +#if (!defined(NTL_LONGLONG_SP_MULMOD)) + +static inline unsigned long PrepMulModPrecon(long b, long n, wide_double ninv) +{ + long q = (long) ( (((wide_double) b) * wide_double(NTL_SP_BOUND)) * ninv ); + unsigned long rr = (cast_unsigned(b) << NTL_SP_NBITS) - cast_unsigned(q)*cast_unsigned(n); + + q += sp_SignMask(rr) + sp_SignMask(rr-n) + 1L; + + return cast_unsigned(q) << (NTL_BITS_PER_LONG - NTL_SP_NBITS); +} + +#else + + +static inline unsigned long +sp_NormalizedPrepMulModPrecon(long b, long n, unsigned long ninv) +{ + unsigned long H = cast_unsigned(b) << 2; + unsigned long Q = MulHiUL(H, ninv); + Q = Q >> NTL_POST_SHIFT; + unsigned long L = cast_unsigned(b) << NTL_SP_NBITS; + long r = L - Q*cast_unsigned(n); // r in [0..2*n) + + + Q += 1L + sp_SignMask(r-n); + return Q; // NOTE: not shifted +} + + +static inline unsigned long +PrepMulModPrecon(long b, long n, sp_inverse ninv) +{ + return sp_NormalizedPrepMulModPrecon(b << ninv.shamt, n << ninv.shamt, ninv.inv) << (NTL_BITS_PER_LONG-NTL_SP_NBITS); +} + + + + + +#endif + + + + + +static inline long MulModPrecon(long a, long b, long n, unsigned long bninv) +{ + unsigned long qq = MulHiUL(a, bninv); + unsigned long rr = cast_unsigned(a)*cast_unsigned(b) - qq*cast_unsigned(n); + return sp_CorrectExcess(long(rr), n); +} + + + +static inline long MulModPreconWithQuo(long& qres, long a, long b, long n, unsigned long bninv) +{ + unsigned long qq = MulHiUL(a, bninv); + unsigned long rr = cast_unsigned(a)*cast_unsigned(b) - qq*cast_unsigned(n); + long r = sp_CorrectExcessQuo(qq, long(rr), n); + qres = long(qq); + return r; +} + + + +#else + +// default, wide_double version + +typedef wide_double mulmod_precon_t; + + +static inline wide_double PrepMulModPrecon(long b, long n, wide_double ninv) +{ + return ((wide_double) b) * ninv; +} + +static inline long MulModPrecon(long a, long b, long n, wide_double bninv) +{ + return MulMod2_legacy(a, b, n, bninv); +} + +static inline long MulModPreconWithQuo(long& qq, long a, long b, long n, wide_double bninv) +{ + return MulDivRem(qq, a, b, n, bninv); +} + + + +#endif + + + + + + + + +#if (defined(NTL_LONGLONG_SP_MULMOD)) + +// some annoying backward-compatibiliy nonsense + +struct sp_muldivrem_struct { + unsigned long bninv; + + explicit sp_muldivrem_struct(unsigned long _bninv) : bninv(_bninv) { } + sp_muldivrem_struct() { } +}; + +typedef sp_muldivrem_struct muldivrem_t; + +static inline sp_muldivrem_struct PrepMulDivRem(long b, long n, sp_inverse ninv) +{ + return sp_muldivrem_struct(PrepMulModPrecon(b, n, ninv)); +} + + +static inline +long MulDivRem(long& qres, long a, long b, long n, sp_muldivrem_struct bninv) +{ + return MulModPreconWithQuo(qres, a, b, n, bninv.bninv); +} + +#endif + + + + + + +static inline mulmod_precon_t PrepMulModPrecon(long b, long n) +{ + return PrepMulModPrecon(b, n, PrepMulMod(n)); +} + + +static inline +long MulMod(long a, long b, long n) +{ + return MulMod(a, b, n, PrepMulMod(n)); +} + +static inline muldivrem_t PrepMulDivRem(long b, long n) +{ + return PrepMulDivRem(b, n, PrepMulMod(n)); +} + + + + + + +#ifdef NTL_LEGACY_SP_MULMOD + +static inline long MulMod2(long a, long b, long n, wide_double bninv) +{ + return MulMod2_legacy(a, b, n, bninv); +} + + +#endif + + + + +static inline +void VectorMulModPrecon(long k, long *x, const long *a, long b, long n, + mulmod_precon_t bninv) +{ + for (long i = 0; i < k; i++) + x[i] = MulModPrecon(a[i], b, n, bninv); +} + +static inline +void VectorMulMod(long k, long *x, const long *a, long b, long n, + mulmod_t ninv) +{ + mulmod_precon_t bninv; + bninv = PrepMulModPrecon(b, n, ninv); + VectorMulModPrecon(k, x, a, b, n, bninv); +} + + +static inline +void VectorMulMod(long k, long *x, const long *a, long b, long n) +{ + mulmod_t ninv = PrepMulMod(n); + VectorMulMod(k, x, a, b, n, ninv); +} + +#ifdef NTL_HAVE_MULHI + + +struct sp_reduce_struct { + unsigned long ninv; + long sgn; + + sp_reduce_struct(unsigned long _ninv, long _sgn) : + ninv(_ninv), sgn(_sgn) { } + + sp_reduce_struct() { } +}; + +static inline +sp_reduce_struct sp_PrepRem(long n) +{ + unsigned long q = (1UL << (NTL_BITS_PER_LONG-1))/cast_unsigned(n); + long r = (1UL << (NTL_BITS_PER_LONG-1)) - q*cast_unsigned(n); + + long r1 = 2*r; + q = 2*q; + r1 = sp_CorrectExcessQuo(q, r1, n); + + return sp_reduce_struct(q, r); +} + + + +static inline +long rem(unsigned long a, long n, sp_reduce_struct red) +{ + unsigned long Q = MulHiUL(a, red.ninv); + long r = a - Q*cast_unsigned(n); + r = sp_CorrectExcess(r, n); + return r; +} + + +static inline +long rem(long a, long n, sp_reduce_struct red) +{ + unsigned long a0 = cast_unsigned(a) & ((1UL << (NTL_BITS_PER_LONG-1))-1); + long r = rem(a0, n, red); + long s = sp_SignMask(a) & red.sgn; + return SubMod(r, s, n); +} +#else + +struct sp_reduce_struct { }; + + +static inline +sp_reduce_struct sp_PrepRem(long n) +{ + return sp_reduce_struct(); +} + + +static inline +long rem(unsigned long a, long n, sp_reduce_struct red) +{ + return a % cast_unsigned(n); +} + +static inline +long rem(long a, long n, sp_reduce_struct red) +{ + long r = a % n; + return sp_CorrectDeficit(r, n); +} + + +#endif + + +#ifdef NTL_HAVE_LL_TYPE + +#define NTL_HAVE_SP_LL_ROUTINES + + +// some routines that are currently not part of the documented +// interface. They currently are only defined when we have appropriate +// LL type. + + +struct sp_ll_reduce_struct { + unsigned long inv; + long nbits; + + sp_ll_reduce_struct() { } + + sp_ll_reduce_struct(unsigned long _inv, long _nbits) : inv(_inv), nbits(_nbits) { } + +}; + + +static inline sp_ll_reduce_struct +make_sp_ll_reduce_struct(long n) +{ + long nbits = NTL_BITS_PER_LONG - sp_CountLeadingZeros(n); + unsigned long inv = + (unsigned long) ( ((((NTL_ULL_TYPE) 1) << (nbits+NTL_BITS_PER_LONG))-1UL) / ((NTL_ULL_TYPE) n) ); + + return sp_ll_reduce_struct(inv, nbits); +} + + +// computes remainder (hi, lo) mod d, assumes hi < d +static inline long +sp_ll_red_21(unsigned long hi, unsigned long lo, long d, + sp_ll_reduce_struct dinv) +{ + unsigned long H = + (hi << (NTL_BITS_PER_LONG-dinv.nbits)) | (lo >> dinv.nbits); + unsigned long Q = MulHiUL(H, dinv.inv) + H; + unsigned long rr = lo - Q*cast_unsigned(d); // rr in [0..4*d) + long r = sp_CorrectExcess(rr, 2*d); // r in [0..2*d) + r = sp_CorrectExcess(r, d); + return r; +} + +// computes remainder (x[n-1], ..., x[0]) mod d +static inline long +sp_ll_red_n1(const unsigned long *x, long n, long d, sp_ll_reduce_struct dinv) +{ + long carry = 0; + long i; + for (i = n-1; i >= 0; i--) + carry = sp_ll_red_21(carry, x[i], d, dinv); + return carry; +} + +// computes remainder (x2, x1, x0) mod d, assumes x2 < d +static inline long +sp_ll_red_31(unsigned long x2, unsigned long x1, unsigned long x0, + long d, sp_ll_reduce_struct dinv) +{ + long carry = sp_ll_red_21(x2, x1, d, dinv); + return sp_ll_red_21(carry, x0, d, dinv); +} + + +// normalized versions of the above: assume NumBits(d) == NTL_SP_NBITS + +// computes remainder (hi, lo) mod d, assumes hi < d +static inline long +sp_ll_red_21_normalized(unsigned long hi, unsigned long lo, long d, + sp_ll_reduce_struct dinv) +{ + unsigned long H = + (hi << (NTL_BITS_PER_LONG-NTL_SP_NBITS)) | (lo >> NTL_SP_NBITS); + unsigned long Q = MulHiUL(H, dinv.inv) + H; + unsigned long rr = lo - Q*cast_unsigned(d); // rr in [0..4*d) + long r = sp_CorrectExcess(rr, 2*d); // r in [0..2*d) + r = sp_CorrectExcess(r, d); + return r; +} + +// computes remainder (x[n-1], ..., x[0]) mod d +static inline long +sp_ll_red_n1_normalized(const unsigned long *x, long n, long d, sp_ll_reduce_struct dinv) +{ + long carry = 0; + long i; + for (i = n-1; i >= 0; i--) + carry = sp_ll_red_21_normalized(carry, x[i], d, dinv); + return carry; +} + +// computes remainder (x2, x1, x0) mod d, assumes x2 < d +static inline long +sp_ll_red_31_normalized(unsigned long x2, unsigned long x1, unsigned long x0, + long d, sp_ll_reduce_struct dinv) +{ + long carry = sp_ll_red_21_normalized(x2, x1, d, dinv); + return sp_ll_red_21_normalized(carry, x0, d, dinv); +} + + +#else + +// provided to streamline some code + + +struct sp_ll_reduce_struct { }; + + +static inline sp_ll_reduce_struct +make_sp_ll_reduce_struct(long n) +{ + return sp_ll_reduce_struct(); +} + +#endif + + +NTL_CLOSE_NNS + +#endif + + + + + +/************************************************************************** + +Implementation notes -- the LONGLONG MulMod implementation + +I started out basing this on Granlund-Moeller multiplication, +but it evolved into something a little bit different. + +We assume that modulus n has w bits, so 2^{w-1} <= n < 2^w. +We also assume that 2 <= w <= BPL-2. + +As a precomputation step, we compute X = floor((2^v-1)/n), i.e., + 2^v - 1 = X n + Y, where 0 <= Y < n + +Now, we are given U to reduce mod n. +Write + U = H 2^s + L + H X = Q 2^t + R +where s + t = v. + +Some simple calculations yield: + H <= U/2^s + 2^{v-w} <= X < 2^v/n <= 2^{v-w+1} + H X < 2^t U / n + +Also: + U - Qn < n U / 2^v + L + n + +For the case where BPL >= 64, we generally work with w = BPL-4. +In this case, we set v = 2w+2, s = w-2, t = w+4. +Then we have: + U - Qn < n/4 + n/2 + n < 2n +This choice of parameters allows us to do a MulMod with just a single +correction. It also allows us to do the LazyPrepMulModPrecon +step with just a single correction. + +If w = BPL-2, we set v = 2w+1, s = w-2, t = w+3. +Then we have: + U - Qn < n/2 + n/2 + n = 2n +So again, we we do a MulMod with just a single correction, +although the LazyPrepMulModPrecon now takes two corrections. + +For the Lazy stuff, we are computing floor(2^{w+2}b/n), so +U = 2^{w+2} b. +For the case w = BPL-4, we are setting s = w+2 and t = w. +L = 0 in this case, and we obtain U - Qn < n + n = 2n. +In the case w = BPL-2, we set s = w, t = w+1. +We obtain U - Qn < 2n + n = 3n. +Also, we need to write X = 2^{w+1} + X_0 to perform the +HX computation correctly. + +***************************************************************************/ + diff --git a/thirdparty/linux/ntl/include/NTL/thread.h b/thirdparty/linux/ntl/include/NTL/thread.h new file mode 100644 index 0000000000..6767abf60c --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/thread.h @@ -0,0 +1,229 @@ + +#ifndef NTL_thread__H +#define NTL_thread__H + +#include +#include + +#ifdef NTL_THREADS + +#include +#include + +#endif + +NTL_OPEN_NNS + + +#ifdef NTL_THREADS + +class AtomicLong { +private: + NTL_SNS atomic_long data; + + AtomicLong(const AtomicLong& other); // disabled + AtomicLong& operator=(const AtomicLong& other); // disabled + +public: + + explicit AtomicLong(const long& _data) : data(_data) { } + AtomicLong& operator=(const long& _data) + { + data.store(_data, NTL_SNS memory_order_release); + return *this; + } + operator long() const { return data.load( NTL_SNS memory_order_acquire); } +}; + + +class AtomicBool { +private: + NTL_SNS atomic_bool data; + + AtomicBool(const AtomicBool& other); // disabled + AtomicBool& operator=(const AtomicBool& other); // disabled + +public: + + explicit AtomicBool(const bool& _data) : data(_data) { } + AtomicBool& operator=(const bool& _data) + { + data.store(_data, NTL_SNS memory_order_release); + return *this; + } + operator bool() const { return data.load( NTL_SNS memory_order_acquire); } +}; + + +class AtomicCounter { +private: + NTL_SNS atomic_ulong cnt; + +public: + AtomicCounter() : cnt(0) { } + unsigned long inc() + { + return cnt.fetch_add(1UL, NTL_SNS memory_order_relaxed); + } +}; + + + + +class AtomicRefCount { +private: + NTL_SNS atomic_long cnt; + +public: + AtomicRefCount() : cnt(0) { } + void inc() { cnt.fetch_add(1, NTL_SNS memory_order_relaxed); } + bool dec() + { + if (cnt.fetch_sub(1, NTL_SNS memory_order_release) == 1) { + NTL_SNS atomic_thread_fence(NTL_SNS memory_order_acquire); + return true; + } + else + return false; + } +}; + +class MutexProxy { +private: + NTL_SNS mutex mtx; + + MutexProxy(const MutexProxy&); // disabled + void operator=(const MutexProxy&); // disabled + +public: + MutexProxy() { } + + friend class GuardProxy; +}; + +class GuardProxy { +private: + NTL_SNS unique_lock lck; + + + GuardProxy(const GuardProxy&); // disabled + void operator=(const GuardProxy&); // disabled + +public: + GuardProxy(MutexProxy& mtx) : lck(mtx.mtx, NTL_SNS defer_lock) { } + void lock() { lck.lock(); } +}; + + +#else + +class AtomicLong { +private: + long data; + + AtomicLong(const AtomicLong& other); // disabled + AtomicLong& operator=(const AtomicLong& other); // disabled + +public: + + explicit AtomicLong(const long& _data) : data(_data) { } + AtomicLong& operator=(const long& _data) { data = _data; return *this; } + operator long() const { return data; } +}; + + +class AtomicBool { +private: + bool data; + + AtomicBool(const AtomicBool& other); // disabled + AtomicBool& operator=(const AtomicBool& other); // disabled + +public: + + explicit AtomicBool(const bool& _data) : data(_data) { } + AtomicBool& operator=(const bool& _data) { data = _data; return *this; } + operator bool() const { return data; } +}; + + +class AtomicCounter { +private: + unsigned long cnt; + + AtomicCounter(const AtomicCounter&); // disabled + void operator=(const AtomicCounter&); // disabled + + +public: + AtomicCounter() : cnt(0) { } + unsigned long inc() { return cnt++; } +}; + + +class AtomicRefCount { +private: + long cnt; + + AtomicRefCount(const AtomicRefCount&); // disabled + void operator=(const AtomicRefCount&); // disabled + + +public: + AtomicRefCount() : cnt(0) { } + void inc() { cnt++; } + bool dec() { cnt--; return cnt == 0; } +}; + +class MutexProxy { +private: + MutexProxy(const MutexProxy&); // disabled + void operator=(const MutexProxy&); // disabled + +public: + MutexProxy() { } +}; + +class GuardProxy { +private: + GuardProxy(const GuardProxy&); // disabled + void operator=(const GuardProxy&); // disabled + +public: + GuardProxy(MutexProxy&) { } + void lock() { } +}; + +#endif + + +const NTL_SNS string& CurrentThreadID(); + + + + +/********************************************************************* + +NOTES: See + + +http://preshing.com/20120913/acquire-and-release-semantics/ +http://preshing.com/20130922/acquire-and-release-fences/ +http://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ +http://preshing.com/20131125/acquire-and-release-fences-dont-work-the-way-youd-expect/ + +for general information on C++11 atomics. + + +Also see + +http://www.chaoticmind.net/~hcb/projects/boost.atomic/doc/atomic/usage_examples.html#boost_atomic.usage_examples.example_reference_counters + +for reference counting in a multi-threaded environment. + +*********************************************************************/ + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/tools.h b/thirdparty/linux/ntl/include/NTL/tools.h new file mode 100644 index 0000000000..c0decfe660 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/tools.h @@ -0,0 +1,939 @@ + + + + +#ifndef NTL_tools__H +#define NTL_tools__H + +//#define NTL_TEST_EXCEPTIONS + +#include +#include + +#include +#include +#include + +#include +#include + + + +#if (defined(NTL_THREADS) && defined(__GNUC__) && !defined(NTL_DISABLE_TLS_HACK)) +#define NTL_TLS_HACK +#endif + + + +#ifdef NTL_TLS_HACK +#include +#endif + + + + +#define NTL_SNS std :: +#define NTL_USE_SNS using namespace std; + +#ifndef NTL_LEGACY_NO_NAMESPACE + +// This wraps NTL in the NTL namespace. +// This is the current default. + +#define NTL_NAMESPACE NTL +#define NTL_OPEN_NNS namespace NTL_NAMESPACE { +#define NTL_CLOSE_NNS } +#define NTL_USE_NNS using namespace NTL_NAMESPACE; +#define NTL_NNS NTL_NAMESPACE :: + +// To make things work, we have to apply using declarations of all std +// functions that are both overloaded by NTL and are used in +// the implementation of NTL. + +#define NTL_START_IMPL NTL_USE_SNS NTL_OPEN_NNS \ + using NTL_SNS abs; \ + using NTL_SNS ceil; \ + using NTL_SNS exp; \ + using NTL_SNS fabs; \ + using NTL_SNS floor; \ + using NTL_SNS ldexp; \ + using NTL_SNS log; \ + using NTL_SNS sqrt; + +#define NTL_END_IMPL NTL_CLOSE_NNS + +#else + +// This puts NTL in the global namespace. +// Provided only for backward compatibility. + +#define NTL_NAMESPACE +#define NTL_OPEN_NNS +#define NTL_CLOSE_NNS +#define NTL_USE_NNS +#define NTL_NNS + +#define NTL_START_IMPL +#define NTL_END_IMPL + +#endif + +#define NTL_CLIENT NTL_USE_SNS NTL_USE_NNS + + + +double _ntl_GetTime(); +unsigned long _ntl_GetPID(); + +typedef unsigned long _ntl_ulong; +typedef _ntl_ulong *_ntl_ulong_ptr; +// I made these have "obscure" names to avoid conflict with +// (non-standard but common) definitions in standard headers. +// Putting u_long inside namespace NTL only tends to creates ambiguities, +// for no good reason. + + + + +NTL_OPEN_NNS + +#ifndef NTL_LEGACY_INPUT_ERROR + +// this newer version is more in line with wider C++ +// practice, setting the "fail bit" of an input stream +// when an error is encounted. This is now the default in NTL + +#define NTL_INPUT_ERROR(s, msg) \ + do {\ + s.setstate(NTL_SNS ios::failbit);\ + return s;\ + } while (0)\ + + +#else + +// this version provides full backward compatibility, +// raising an error on ill-formed or missing input + +#define NTL_INPUT_ERROR(s, msg) \ + do {\ + InputError(msg);\ + } while (0)\ + + +#endif + + +#define NTL_INPUT_CHECK_ERR(stmt) \ + do {\ + if (!(stmt)) InputError("bad input\n");\ + } while (0)\ + + + +#define NTL_INPUT_CHECK_RET(s, stmt) \ + do {\ + if (!(stmt)) { s.setstate(NTL_SNS ios::failbit); return s; }\ + } while (0)\ + + + + + +#define NTL_FILE_THRESH (1e12) +// threshold in KB for switching to external storage of certain tables + + + + +struct INIT_SIZE_STRUCT { }; +const INIT_SIZE_STRUCT INIT_SIZE = INIT_SIZE_STRUCT(); +typedef const INIT_SIZE_STRUCT& INIT_SIZE_TYPE; + +struct INIT_VAL_STRUCT { }; +const INIT_VAL_STRUCT INIT_VAL = INIT_VAL_STRUCT(); +typedef const INIT_VAL_STRUCT& INIT_VAL_TYPE; + +struct INIT_TRANS_STRUCT { }; +const INIT_TRANS_STRUCT INIT_TRANS = INIT_TRANS_STRUCT(); +typedef const INIT_TRANS_STRUCT& INIT_TRANS_TYPE; + + +struct INIT_LOOP_HOLE_STRUCT { }; +const INIT_LOOP_HOLE_STRUCT INIT_LOOP_HOLE = INIT_LOOP_HOLE_STRUCT(); +typedef const INIT_LOOP_HOLE_STRUCT& INIT_LOOP_HOLE_TYPE; + +struct INIT_FFT_STRUCT { }; +const INIT_FFT_STRUCT INIT_FFT = INIT_FFT_STRUCT(); +typedef const INIT_FFT_STRUCT& INIT_FFT_TYPE; + +struct INIT_USER_FFT_STRUCT { }; +const INIT_USER_FFT_STRUCT INIT_USER_FFT = INIT_USER_FFT_STRUCT(); +typedef const INIT_USER_FFT_STRUCT& INIT_USER_FFT_TYPE; + +struct INIT_NO_ALLOC_STRUCT { }; +const INIT_NO_ALLOC_STRUCT INIT_NO_ALLOC = INIT_NO_ALLOC_STRUCT(); +typedef const INIT_NO_ALLOC_STRUCT& INIT_NO_ALLOC_TYPE; + +struct INIT_ALLOC_STRUCT { }; +const INIT_ALLOC_STRUCT INIT_ALLOC = INIT_ALLOC_STRUCT(); +typedef const INIT_ALLOC_STRUCT& INIT_ALLOC_TYPE; + +struct INIT_MONO_STRUCT { }; +const INIT_MONO_STRUCT INIT_MONO = INIT_MONO_STRUCT(); +typedef const INIT_MONO_STRUCT& INIT_MONO_TYPE; + + + +#ifdef NTL_NO_INIT_TRANS +#define NTL_OPT_RETURN(t, x) return x +#else +#define NTL_OPT_RETURN(t, x) return t(x, INIT_TRANS) +#endif + + +#ifndef NTL_NO_MIN_MAX + +inline int min(int a, int b) { return (a < b) ? a : b; } +inline int max(int a, int b) { return (a < b) ? b : a; } + +inline long min(long a, long b) { return (a < b) ? a : b; } +inline long max(long a, long b) { return (a < b) ? b : a; } + +inline long min(int a, long b) { return (a < b) ? long(a) : b; } +inline long max(int a, long b) { return (a < b) ? b : long(a); } + +inline long min(long a, int b) { return (a < b) ? a : long(b); } +inline long max(long a, int b) { return (a < b) ? long(b) : a; } + +inline unsigned int min(unsigned int a, unsigned int b) +{ return (a < b) ? a : b; } +inline unsigned int max(unsigned int a, unsigned int b) +{ return (a < b) ? b : a; } + +inline unsigned long min(unsigned long a, unsigned long b) +{ return (a < b) ? a : b; } +inline unsigned long max(unsigned long a, unsigned long b) +{ return (a < b) ? b : a; } + +inline unsigned long min(unsigned int a, unsigned long b) +{ return (a < b) ? (unsigned long)(a) : b; } +inline unsigned long max(unsigned int a, unsigned long b) +{ return (a < b) ? b : (unsigned long)(a); } + +inline unsigned long min(unsigned long a, unsigned int b) +{ return (a < b) ? a : (unsigned long)(b); } +inline unsigned long max(unsigned long a, unsigned int b) +{ return (a < b) ? (unsigned long)(b) : a; } + +#endif + + +// NOTE: these are here for historical reasons, so I'll leave them +// Since it is likely to lead to ambiguities with std::swap, +// I am not defining any more of these. +inline void swap(long& a, long& b) { long t; t = a; a = b; b = t; } +inline void swap(int& a, int& b) { int t; t = a; a = b; b = t; } + + + +inline void conv(int& x, int a) { x = a; } +inline void conv(int& x, long a) + { unsigned y = (unsigned) a; x = NTL_UINT_TO_INT(y); } +inline void conv(int& x, float a) { x = int(NTL_SNS floor(double(a))); } +inline void conv(int& x, double a) { x = int(NTL_SNS floor(a)); } + +inline void conv(int& x, unsigned a) + { x = NTL_UINT_TO_INT(a); } + +inline void conv(int& x, unsigned long a) + { unsigned y = (unsigned) a; x = NTL_UINT_TO_INT(y); } + +inline int to_int(int a) { return a; } +inline int to_int(long a) + { unsigned y = (unsigned) a; return NTL_UINT_TO_INT(y); } +inline int to_int(float a) { return int(NTL_SNS floor(double(a))); } +inline int to_int(double a) { return int(NTL_SNS floor(a)); } + +inline int to_int(unsigned a) + { return NTL_UINT_TO_INT(a); } + +inline int to_int(unsigned long a) + { unsigned y = (unsigned) a; return NTL_UINT_TO_INT(y); } + + +inline void conv(long& x, int a) { x = a; } +inline void conv(long& x, long a) { x = a; } +inline void conv(long& x, float a) { x = long(NTL_SNS floor(double(a))); } +inline void conv(long& x, double a) { x = long(NTL_SNS floor(a)); } + +inline void conv(long& x, unsigned a) + { unsigned long y = a; x = NTL_ULONG_TO_LONG(y); } + +inline void conv(long& x, unsigned long a) + { x = NTL_ULONG_TO_LONG(a); } + +inline long to_long(int a) { return a; } +inline long to_long(long a) { return a; } +inline long to_long(float a) { return long(NTL_SNS floor(double(a))); } +inline long to_long(double a) { return long(NTL_SNS floor(a)); } + +inline long to_long(unsigned a) + { unsigned long y = a; return NTL_ULONG_TO_LONG(y); } + +inline long to_long(unsigned long a) + { return NTL_ULONG_TO_LONG(a); } + +inline void conv(float& x, int a) { x = float(a); } +inline void conv(float& x, long a) { x = float(a); } +inline void conv(float& x, unsigned a) { x = float(a); } +inline void conv(float& x, unsigned long a) { x = float(a); } +inline void conv(float& x, float a) { x = a; } +inline void conv(float& x, double a) { x = float(a); } + +inline float to_float(int a) { return float(a); } +inline float to_float(long a) { return float(a); } +inline float to_float(unsigned a) { return float(a); } +inline float to_float(unsigned long a) { return float(a); } +inline float to_float(float a) { return a; } +inline float to_float(double a) { return float(a); } + +inline void conv(double& x, int a) { x = double(a); } +inline void conv(double& x, long a) { x = double(a); } +inline void conv(double& x, unsigned a) { x = double(a); } +inline void conv(double& x, unsigned long a) { x = double(a); } +inline void conv(double& x, float a) { x = double(a); } +inline void conv(double& x, double a) { x = a; } + +inline double to_double(int a) { return double(a); } +inline double to_double(long a) { return double(a); } +inline double to_double(unsigned a) { return double(a); } +inline double to_double(unsigned long a) { return double(a); } +inline double to_double(float a) { return double(a); } +inline double to_double(double a) { return a; } + + + +/* additional legacy conversions for v6 conversion regime */ + + +inline void conv(unsigned int& x, int a) { x = ((unsigned int)(a)); } +inline void conv(unsigned int& x, long a) { x = ((unsigned int)(a)); } +inline void conv(unsigned int& x, unsigned a) { x = a; } +inline void conv(unsigned int& x, unsigned long a) { x = ((unsigned int)(a)); } +inline void conv(unsigned int& x, float a) { x = ((unsigned int) to_long(a)); } +inline void conv(unsigned int& x, double a) { x = ((unsigned int) to_long(a)); } + +inline void conv(unsigned long& x, int a) { x = ((unsigned long)(a)); } +inline void conv(unsigned long& x, long a) { x = ((unsigned long)(a)); } +inline void conv(unsigned long& x, unsigned a) { x = ((unsigned long)(a)); } +inline void conv(unsigned long& x, unsigned long a) { x = a; } +inline void conv(unsigned long& x, float a) { x = ((unsigned int) to_long(a)); } +inline void conv(unsigned long& x, double a) { x = ((unsigned int) to_long(a)); } + + +/* ------------------------------------- */ + + +// new style converson function +// example: ZZ x = conv(1); +// note: modern C++ compilers should implemented +// "named return value optimization", so the +// result statement should not create a temporary + +template +T conv(const S& a) +{ + T x; + conv(x, a); + return x; +} + + +// some convenience casting routines: + +inline long cast_signed(unsigned long a) { return long(a); } +inline int cast_signed(unsigned int a) { return int(a); } +// DIRT: IMPL-DEF: the behavior here is implementation defined, +// but on a 2s compliment machine, it should always work + +inline unsigned long cast_unsigned(long a) { return (unsigned long) a; } +inline unsigned int cast_unsigned(int a) { return (unsigned int) a; } + + +// these versions respect the NTL_CLEAN_INT flag: if set, +// they use code that is guaranteed to work, under the +// assumption that signed intgers are two's complement. +// A good compiler should optimize it all away and generate +// the same code in either case (tested on gcc, clang, icc). +// This is really an academic exercise... + +#ifdef NTL_CLEAN_INT + +inline long clean_cast_signed(unsigned long a) +{ return NTL_ULONG_TO_LONG(a); } + +inline int clean_cast_signed(unsigned int a) +{ return NTL_UINT_TO_INT(a); } + +#else + +inline long clean_cast_signed(unsigned long a) { return long(a); } +inline int clean_cast_signed(unsigned int a) { return int(a); } + +#endif + + + + + + + + + + + + + + +long SkipWhiteSpace(NTL_SNS istream& s); +long IsWhiteSpace(long c); +long IsEOFChar(long c); + +long CharToIntVal(long c); +char IntValToChar(long a); + + + + + +inline double GetTime() { return _ntl_GetTime(); } +inline unsigned long GetPID() { return _ntl_GetPID(); } + +inline long IsFinite(double *p) { return _ntl_IsFinite(p); } + + +#if (NTL_EXT_DOUBLE) + +inline void ForceToMem(double *p) { _ntl_ForceToMem(p); } + +#else + +inline void ForceToMem(double *p) { } + +#endif + + +inline double TrueDouble(double x) +{ + ForceToMem(&x); + return x; +} + + + + +void PrintTime(NTL_SNS ostream& s, double t); + + + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) + +// on relative modern versions of gcc, we can +// decalare "restricted" pointers in C++ + +#define NTL_RESTRICT __restrict + +#else + +#define NTL_RESTRICT + +#endif + +// A very lightly wrapped pointer than does nothing more than provide +// auto cleanup in a destructor. Use the UniquePtr class (in SmartPtr.h) +// for a class with more safety and convenience features. +// This class is easiest to use to retrofit older code with RAII +// semantics. + +// A call to Deleter::apply should free the pointed-to storage +// and set the pointer itself to zero, so apply should +// take an argument that is a reference to a T*. + +template +class WrappedPtr { +private: + WrappedPtr(const WrappedPtr&); // disable + void operator=(const WrappedPtr&); // disable +public: + typedef T * raw_ptr; + + raw_ptr rep; + + WrappedPtr() : rep(0) { } + void operator=(const raw_ptr& _rep) { rep = _rep; } + + ~WrappedPtr() { Deleter::apply(rep); } + + operator const raw_ptr& () const { return rep; } + operator raw_ptr& () { return rep; } + + const raw_ptr* operator&() const { return &rep; } + raw_ptr* operator&() { return &rep; } + + void kill() { Deleter::apply(rep); } + + void swap(WrappedPtr& other) { _ntl_swap(rep, other.rep); } + +}; + +template +void swap(WrappedPtr& x, WrappedPtr& y) +{ + x.swap(y); +} + + + +// Error Handling + + + +class ErrorObject : public NTL_SNS runtime_error { +public: + ErrorObject(const char *msg) : runtime_error(msg) { } +}; + +class LogicErrorObject : public ErrorObject { +public: + LogicErrorObject(const char *msg) : ErrorObject(msg) { } +}; + +class ArithmeticErrorObject : public ErrorObject { +public: + ArithmeticErrorObject(const char *msg) : ErrorObject(msg) { } +}; + +class ResourceErrorObject : public ErrorObject { +public: + ResourceErrorObject(const char *msg) : ErrorObject(msg) { } +}; + +class FileErrorObject : public ErrorObject { +public: + FileErrorObject(const char *msg) : ErrorObject(msg) { } +}; + +class InputErrorObject : public ErrorObject { +public: + InputErrorObject(const char *msg) : ErrorObject(msg) { } +}; + + + +extern NTL_CHEAP_THREAD_LOCAL void (*ErrorCallback)(); + +extern NTL_CHEAP_THREAD_LOCAL void (*ErrorMsgCallback)(const char *); + + +void TerminalError(const char *s); + +#ifdef NTL_EXCEPTIONS + +inline void MemoryError() { throw NTL_SNS bad_alloc(); } +inline void Error(const char *msg) { throw ErrorObject(msg); } +inline void LogicError(const char *msg) { throw LogicErrorObject(msg); } +inline void ArithmeticError(const char *msg) { throw ArithmeticErrorObject(msg); } +inline void InvModError(const char *msg) { throw ArithmeticErrorObject(msg); } +inline void ResourceError(const char *msg) { throw ResourceErrorObject(msg); } +inline void FileError(const char *msg) { throw FileErrorObject(msg); } +inline void InputError(const char *msg) { throw InputErrorObject(msg); } + +#else + +inline void MemoryError() { TerminalError("out of memory"); } +inline void Error(const char *msg) { TerminalError(msg); } +inline void LogicError(const char *msg) { TerminalError(msg); } +inline void ArithmeticError(const char *msg) { TerminalError(msg); } +inline void InvModError(const char *msg) { TerminalError(msg); } +inline void ResourceError(const char *msg) { TerminalError(msg); } +inline void FileError(const char *msg) { TerminalError(msg); } +inline void InputError(const char *msg) { TerminalError(msg); } + +#endif + + + + + + +#ifdef NTL_EXCEPTIONS + + +template < typename F > +class scope_guard +{ + typename std::remove_reference::type f; + bool active; + const char *info; + +public: + scope_guard(F&& _f, const char *_info) : + f(std::forward(_f)), active(true), info(_info) { } + + ~scope_guard() { + if (active) { +#ifdef NTL_TEST_EXCEPTIONS + NTL_SNS cerr << "*** ACTIVE SCOPE GUARD TRIGGERED: " + << info << "\n"; +#endif + f(); + } + } + + void relax() { active = false; } +}; + + +struct scope_guard_builder { + const char *info; + explicit scope_guard_builder(const char *_info) : info(_info) { } +}; + +template < typename F > +scope_guard +operator+(scope_guard_builder b, F&& f) +{ + return scope_guard(std::forward(f), b.info); +} + + +#define NTL_SCOPE(var) auto var = \ + scope_guard_builder(__FILE__ ":" NTL_STRINGIFY(__LINE__)) + [&] + + +#else + + +class DummyScopeGuard { +public: + void relax() { } +}; + +#define NTL_SCOPE(var) DummyScopeGuard var; if (false) + + + + +#endif + + + + + +#ifdef NTL_TLS_HACK + + +namespace details_pthread { + + +template void do_delete_aux(T* t) noexcept { delete t; } +// an exception here would likely lead to a complete mess... +// the noexcept specification should force an immediate termination + +template void do_delete(void* t) { do_delete_aux((T*)t); } + +using namespace std; +// I'm not sure if pthread stuff might be placed in namespace std + +struct key_wrapper { + pthread_key_t key; + + key_wrapper(void (*destructor)(void*)) + { + if (pthread_key_create(&key, destructor)) + ResourceError("pthread_key_create failed"); + } + + template + T* set(T *p) + { + if (!p) MemoryError(); + if (pthread_setspecific(key, p)) { + do_delete_aux(p); + ResourceError("pthread_setspecific failed"); + } + return p; + } + +}; + +} + + +#define NTL_TLS_LOCAL_INIT(type, var, init) \ + static NTL_CHEAP_THREAD_LOCAL type *_ntl_hidden_variable_tls_local_ptr_ ## var = 0; \ + type *_ntl_hidden_variable_tls_local_ptr1_ ## var = _ntl_hidden_variable_tls_local_ptr_ ## var; \ + if (!_ntl_hidden_variable_tls_local_ptr1_ ## var) { \ + static details_pthread::key_wrapper hidden_variable_key(details_pthread::do_delete); \ + type *_ntl_hidden_variable_tls_local_ptr2_ ## var = hidden_variable_key.set(NTL_NEW_OP type init); \ + _ntl_hidden_variable_tls_local_ptr1_ ## var = _ntl_hidden_variable_tls_local_ptr2_ ## var; \ + _ntl_hidden_variable_tls_local_ptr_ ## var = _ntl_hidden_variable_tls_local_ptr1_ ## var; \ + } \ + type &var = *_ntl_hidden_variable_tls_local_ptr1_ ## var \ + + + +#else + + +// NOTE: this definition of NTL_TLS_LOCAL_INIT ensures that var names +// a local reference, regardless of the implementation +#define NTL_TLS_LOCAL_INIT(type,var,init) \ + static NTL_THREAD_LOCAL type _ntl_hidden_variable_tls_local ## var init; \ + type &var = _ntl_hidden_variable_tls_local ## var + + + + +#endif + +#define NTL_EMPTY_ARG +#define NTL_TLS_LOCAL(type,var) NTL_TLS_LOCAL_INIT(type,var,NTL_EMPTY_ARG) + +#define NTL_TLS_GLOBAL_DECL_INIT(type,var,init) \ + typedef type _ntl_hidden_typedef_tls_access_ ## var; \ + static inline \ + type& _ntl_hidden_function_tls_access_ ## var() { \ + NTL_TLS_LOCAL_INIT(type,var,init); \ + return var; \ + } \ + + +#define NTL_TLS_GLOBAL_DECL(type,var) NTL_TLS_GLOBAL_DECL_INIT(type,var,NTL_EMPTY_ARG) + +#define NTL_TLS_GLOBAL_ACCESS(var) \ +_ntl_hidden_typedef_tls_access_ ## var & var = _ntl_hidden_function_tls_access_ ## var() + + +// ************************************************************** +// Following is code for "long long" arithmetic that can +// be implemented using NTL_ULL_TYPE or using assembly. +// I have found that the assembly can be a bit faster. +// For now, this code is only available if NTL_HAVE_LL_TYPE +// is defined. This could change. In any case, this provides +// a cleaner interface and might eventually allow for +// implementation on systems that don't provide a long long type. +// ************************************************************** + +#ifdef NTL_HAVE_LL_TYPE + + +#if (!defined(NTL_DISABLE_LL_ASM) \ + && defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ + && defined (__x86_64__) && NTL_BITS_PER_LONG == 64) + +// NOTE: clang's and icc's inline asm code gen is pretty bad, so +// we don't even try. + +// FIXME: probably, this should all be properly tested for speed (and correctness) +// using the Wizard. + + +struct ll_type { + unsigned long hi, lo; +}; + + +static inline void +ll_mul_add(ll_type& x, unsigned long a, unsigned long b) +{ + unsigned long hi, lo; + __asm__ ( + "mulq %[b] \n\t" + "addq %[lo],%[xlo] \n\t" + "adcq %[hi],%[xhi]" : + [lo] "=a" (lo), [hi] "=d" (hi), [xhi] "+r" (x.hi), [xlo] "+r" (x.lo) : + [a] "%[lo]" (a), [b] "rm" (b) : + "cc" + ); +} + +static inline void +ll_imul_add(ll_type& x, unsigned long a, unsigned long b) +{ + unsigned long hi, lo; + __asm__ ( + "imulq %[b] \n\t" + "addq %[lo],%[xlo] \n\t" + "adcq %[hi],%[xhi]" : + [lo] "=a" (lo), [hi] "=d" (hi), [xhi] "+r" (x.hi), [xlo] "+r" (x.lo) : + [a] "%[lo]" (a), [b] "rm" (b) : + "cc" + ); +} + +static inline void +ll_mul(ll_type& x, unsigned long a, unsigned long b) +{ + __asm__ ( + "mulq %[b]" : + [lo] "=a" (x.lo), [hi] "=d" (x.hi) : + [a] "%[lo]" (a), [b] "rm" (b) : + "cc" + ); +} + +static inline void +ll_imul(ll_type& x, unsigned long a, unsigned long b) +{ + __asm__ ( + "imulq %[b]" : + [lo] "=a" (x.lo), [hi] "=d" (x.hi) : + [a] "%[lo]" (a), [b] "rm" (b) : + "cc" + ); +} + +static inline void +ll_add(ll_type& x, unsigned long a) +{ + __asm__ ( + "addq %[a],%[xlo] \n\t" + "adcq %[z],%[xhi]" : + [xhi] "+r" (x.hi), [xlo] "+r" (x.lo) : + [a] "rm" (a), [z] "i" (0) : + "cc" + ); +} + + + +// NOTE: an optimizing compiler will remove the conditional. +// The alternative would be to make a specialization for shamt=0. +// Unfortunately, this is impossible to do across a wide range +// of compilers and still maintain internal linkage --- it is not +// allowed to include static spec in the specialization (new compilers +// will complain) and without it, some older compilers will generate +// an external symbol. In fact, NTL currently never calls +// this with shamt=0, so it is all rather academic...but I want to +// keep this general for future use. +template +static inline unsigned long +ll_rshift_get_lo(ll_type x) +{ + if (shamt) { + __asm__ ( + "shrdq %[shamt],%[hi],%[lo]" : + [lo] "+r" (x.lo) : + [shamt] "i" (shamt), [hi] "r" (x.hi) : + "cc" + ); + } + return x.lo; +} + + +static inline unsigned long +ll_get_lo(const ll_type& x) +{ + return x.lo; +} + +static inline unsigned long +ll_get_hi(const ll_type& x) +{ + return x.hi; +} + + +static inline void +ll_init(ll_type& x, unsigned long a) +{ + x.lo = a; + x.hi = 0; +} + +#else + + +typedef NTL_ULL_TYPE ll_type; + +// NOTE: the following functions definitions should serve as +// documentation, as well. + +static inline void +ll_mul_add(ll_type& x, unsigned long a, unsigned long b) +{ + x += ((ll_type) a)*((ll_type) b); +} + +// a and b should be representable as positive long's, +// to allow for the most flexible implementation +static inline void +ll_imul_add(ll_type& x, unsigned long a, unsigned long b) +{ + x += ((ll_type) long(a))*((ll_type) long(b)); +} +static inline void +ll_mul(ll_type& x, unsigned long a, unsigned long b) +{ + x = ((ll_type) a)*((ll_type) b); +} + +// a and b should be representable as positive long's, +// to allow for the most flexible implementation +static inline void +ll_imul(ll_type& x, unsigned long a, unsigned long b) +{ + x = ((ll_type) long(a))*((ll_type) long(b)); +} + +static inline void +ll_add(ll_type& x, unsigned long a) +{ + x += a; +} + +template +static inline unsigned long +ll_rshift_get_lo(const ll_type& x) +{ + return ((unsigned long) (x >> shamt)); +} + +static inline unsigned long +ll_get_lo(const ll_type& x) +{ + return ((unsigned long) x); +} + +static inline unsigned long +ll_get_hi(const ll_type& x) +{ + return ((unsigned long) (x >> NTL_BITS_PER_LONG)); +} + + +static inline void +ll_init(ll_type& x, unsigned long a) +{ + x = a; +} + + +#endif + + + +#endif + + + + + +NTL_CLOSE_NNS + + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/vec_GF2.h b/thirdparty/linux/ntl/include/NTL/vec_GF2.h new file mode 100644 index 0000000000..fa3a0f00ba --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_GF2.h @@ -0,0 +1,214 @@ + +#ifndef NTL_vec_GF2__H +#define NTL_vec_GF2__H + +#include +#include + +NTL_OPEN_NNS + + +// Vec is an explicit specialization of Vec. +// Vec is declared, but not defined, in GF2.h, +// to prevent the generic Vec from being used. + +template<> +class Vec { + +public: + +// these should be private, but they are not + + WordVector rep; + + long _len; // length (in bits) + long _maxlen; // (MaxLength << 1) | (fixed) + + // invariants: rep.length() "tracks" length() ( = _len) + // All bits in positions >= length are zero. + + // Note: rep.MaxLength() may exceed the value + // indicated by MaxLength(). + + +//the following are "really" public + + + Vec() : _len(0), _maxlen(0) {} + Vec(INIT_SIZE_TYPE, long n) : _len(0), _maxlen(0) { SetLength(n); } + Vec(const Vec& a) : _len(0), _maxlen(0) { *this = a; } + + Vec& operator=(const Vec& a); + + ~Vec() {} + + void kill(); + + void SetLength(long n); + void SetLength(long n, GF2 a); + + void SetMaxLength(long n); + void FixLength(long n); + void FixAtCurrentLength(); + + long length() const { return _len; } + long MaxLength() const { return _maxlen >> 1; } + long allocated() const { return rep.MaxLength() * NTL_BITS_PER_LONG; } + long fixed() const { return _maxlen & 1; } + + + Vec(Vec& x, INIT_TRANS_TYPE) : + rep(x.rep, INIT_TRANS), _len(x._len), _maxlen(x._maxlen) { } + + const GF2 get(long i) const; + void put(long i, GF2 a); + void put(long i, long a) { put(i, to_GF2(a)); } + + ref_GF2 operator[](long i); + + ref_GF2 operator()(long i) + { return (*this)[i-1]; } + + const GF2 operator[](long i) const + { return get(i); } + + const GF2 operator()(long i) const + { return get(i-1); } + + void swap(Vec& y); + void append(GF2 a); + void append(const Vec& w); + + + + +// Some partial STL compatibility...also used +// to interface with the Matrix template class + + typedef GF2 value_type; + typedef ref_GF2 reference; + typedef const GF2 const_reference; + +}; + +typedef Vec vec_GF2; + + +// sepcialized conversion +inline void conv(vec_GF2& x, const vec_GF2& a) +{ x = a; } + + +inline void swap(vec_GF2& x, vec_GF2& y) { x.swap(y); } +inline void append(vec_GF2& v, const GF2 a) { v.append(a); } +inline void append(vec_GF2& v, const vec_GF2& a) { v.append(a); } + +long operator==(const vec_GF2& a, const vec_GF2& b); +inline long operator!=(const vec_GF2& a, const vec_GF2& b) + { return !(a == b); } + +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const vec_GF2& a); +NTL_SNS istream& operator>>(NTL_SNS istream& s, vec_GF2& a); + +void shift(vec_GF2& x, const vec_GF2& a, long n); +// x = a shifted n places, i.e., if l = a.length(), +// x.length() = l, x[i] = a[i-n] for 0 <= i-n < l, +// and x[i] = 0 for all other i such that 0 <= i < l. + +inline vec_GF2 shift(const vec_GF2& a, long n) + { vec_GF2 x; shift(x, a, n); NTL_OPT_RETURN(vec_GF2, x); } + +void reverse(vec_GF2& x, const vec_GF2& a); + +inline vec_GF2 reverse(const vec_GF2& a) + { vec_GF2 x; reverse(x, a); NTL_OPT_RETURN(vec_GF2, x); } + +void random(vec_GF2& x, long n); +inline vec_GF2 random_vec_GF2(long n) + { vec_GF2 x; random(x, n); NTL_OPT_RETURN(vec_GF2, x); } + +long weight(const vec_GF2& a); + +void mul(vec_GF2& x, const vec_GF2& a, GF2 b); +inline void mul(vec_GF2& x, GF2 a, const vec_GF2& b) + { mul(x, b, a); } + +inline void mul(vec_GF2& x, const vec_GF2& a, long b) + { mul(x, a, to_GF2(b)); } +inline void mul(vec_GF2& x, long a, const vec_GF2& b) + { mul(x, b, a); } + +void add(vec_GF2& x, const vec_GF2& a, const vec_GF2& b); + +inline void sub(vec_GF2& x, const vec_GF2& a, const vec_GF2& b) + { add(x, a, b); } + +void clear(vec_GF2& x); + +inline void negate(vec_GF2& x, const vec_GF2& a) + { x = a; } + +inline void InnerProduct(ref_GF2 x, const vec_GF2& a, const vec_GF2& b) + { x = to_GF2(InnerProduct(a.rep, b.rep)); } + +long IsZero(const vec_GF2& a); + +vec_GF2 operator+(const vec_GF2& a, const vec_GF2& b); + +vec_GF2 operator-(const vec_GF2& a, const vec_GF2& b); + +inline vec_GF2 operator-(const vec_GF2& a) + { return a; } + +inline vec_GF2 operator*(const vec_GF2& a, GF2 b) + { vec_GF2 x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2, x); } + +inline vec_GF2 operator*(const vec_GF2& a, long b) + { vec_GF2 x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2, x); } + +inline vec_GF2 operator*(GF2 a, const vec_GF2& b) + { vec_GF2 x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2, x); } + +inline vec_GF2 operator*(long a, const vec_GF2& b) + { vec_GF2 x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2, x); } + + +inline GF2 operator*(const vec_GF2& a, const vec_GF2& b) + { return to_GF2(InnerProduct(a.rep, b.rep)); } + +// assignment operator notation: + +inline vec_GF2& operator+=(vec_GF2& x, const vec_GF2& a) +{ + add(x, x, a); + return x; +} + +inline vec_GF2& operator-=(vec_GF2& x, const vec_GF2& a) +{ + sub(x, x, a); + return x; +} + +inline vec_GF2& operator*=(vec_GF2& x, GF2 a) +{ + mul(x, x, a); + return x; +} + +inline vec_GF2& operator*=(vec_GF2& x, long a) +{ + mul(x, x, a); + return x; +} + +void VectorCopy(vec_GF2& x, const vec_GF2& a, long n); +inline vec_GF2 VectorCopy(const vec_GF2& a, long n) + { vec_GF2 x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_GF2, x); } + +NTL_CLOSE_NNS + + +#endif + + diff --git a/thirdparty/linux/ntl/include/NTL/vec_GF2E.h b/thirdparty/linux/ntl/include/NTL/vec_GF2E.h new file mode 100644 index 0000000000..f54926f799 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_GF2E.h @@ -0,0 +1,110 @@ + +#ifndef NTL_vec_GF2E__H +#define NTL_vec_GF2E__H + +#include + +NTL_OPEN_NNS + +typedef Vec vec_GF2E; + +void mul(vec_GF2E& x, const vec_GF2E& a, const GF2E& b); +inline void mul(vec_GF2E& x, const GF2E& a, const vec_GF2E& b) + { mul(x, b, a); } + +void mul(vec_GF2E& x, const vec_GF2E& a, GF2 b); +inline void mul(vec_GF2E& x, GF2 a, const vec_GF2E& b) + { mul(x, b, a); } + +inline void mul(vec_GF2E& x, const vec_GF2E& a, long b) + { mul(x, a, to_GF2(b)); } +inline void mul(vec_GF2E& x, long a, const vec_GF2E& b) + { mul(x, b, a); } + + + +void add(vec_GF2E& x, const vec_GF2E& a, const vec_GF2E& b); +inline void sub(vec_GF2E& x, const vec_GF2E& a, const vec_GF2E& b) + { add(x, a, b); } + +inline void negate(vec_GF2E& x, const vec_GF2E& a) { x = a; } + +void clear(vec_GF2E& x); + + +void InnerProduct(GF2E& x, const vec_GF2E& a, const vec_GF2E& b); +void InnerProduct(GF2E& x, const vec_GF2E& a, const vec_GF2E& b, + long offset); + + +long IsZero(const vec_GF2E& a); + +vec_GF2E +operator+(const vec_GF2E& a, const vec_GF2E& b); + +vec_GF2E +operator-(const vec_GF2E& a, const vec_GF2E& b); + +vec_GF2E operator-(const vec_GF2E& a); +GF2E operator*(const vec_GF2E& a, const vec_GF2E& b); + +inline vec_GF2E operator*(const vec_GF2E& a, const GF2E& b) + { vec_GF2E x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2E, x); } + +inline vec_GF2E operator*(const vec_GF2E& a, GF2 b) + { vec_GF2E x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2E, x); } + +inline vec_GF2E operator*(const vec_GF2E& a, long b) + { vec_GF2E x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2E, x); } + +inline vec_GF2E operator*(const GF2E& a, const vec_GF2E& b) + { vec_GF2E x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2E, x); } + +inline vec_GF2E operator*(GF2 a, const vec_GF2E& b) + { vec_GF2E x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2E, x); } + +inline vec_GF2E operator*(long a, const vec_GF2E& b) + { vec_GF2E x; mul(x, a, b); NTL_OPT_RETURN(vec_GF2E, x); } + + + +// assignment operator notation: + +inline vec_GF2E& operator+=(vec_GF2E& x, const vec_GF2E& a) +{ + add(x, x, a); + return x; +} + +inline vec_GF2E& operator-=(vec_GF2E& x, const vec_GF2E& a) +{ + sub(x, x, a); + return x; +} + +inline vec_GF2E& operator*=(vec_GF2E& x, const GF2E& a) +{ + mul(x, x, a); + return x; +} + +inline vec_GF2E& operator*=(vec_GF2E& x, GF2 a) +{ + mul(x, x, a); + return x; +} + +inline vec_GF2E& operator*=(vec_GF2E& x, long a) +{ + mul(x, x, a); + return x; +} + +void VectorCopy(vec_GF2E& x, const vec_GF2E& a, long n); +inline vec_GF2E VectorCopy(const vec_GF2E& a, long n) + { vec_GF2E x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_GF2E, x); } + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_GF2XVec.h b/thirdparty/linux/ntl/include/NTL/vec_GF2XVec.h new file mode 100644 index 0000000000..cd93d72a45 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_GF2XVec.h @@ -0,0 +1,14 @@ + +#ifndef NTL_vec_GF2XVec__H +#define NTL_vec_GF2XVec__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec vec_GF2XVec; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_RR.h b/thirdparty/linux/ntl/include/NTL/vec_RR.h new file mode 100644 index 0000000000..7185e692d0 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_RR.h @@ -0,0 +1,88 @@ + +#ifndef NTL_vec_RR__H +#define NTL_vec_RR__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec vec_RR; + +void mul(vec_RR& x, const vec_RR& a, const RR& b); +inline void mul(vec_RR& x, const RR& a, const vec_RR& b) + { mul(x, b, a); } + +void mul(vec_RR& x, const vec_RR& a, double b); +inline void mul(vec_RR& x, double a, const vec_RR& b) + { mul(x, b, a); } + + + + +void add(vec_RR& x, const vec_RR& a, const vec_RR& b); + +void sub(vec_RR& x, const vec_RR& a, const vec_RR& b); +void clear(vec_RR& x); +void negate(vec_RR& x, const vec_RR& a); + + +void InnerProduct(RR& x, const vec_RR& a, const vec_RR& b); + +long IsZero(const vec_RR& a); + +void VectorCopy(vec_RR& x, const vec_RR& a, long n); +inline vec_RR VectorCopy(const vec_RR& a, long n) + { vec_RR x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_RR, x); } + + + +vec_RR operator+(const vec_RR& a, const vec_RR& b); +vec_RR operator-(const vec_RR& a, const vec_RR& b); +vec_RR operator-(const vec_RR& a); + +inline vec_RR operator*(const vec_RR& a, const RR& b) + { vec_RR x; mul(x, a, b); NTL_OPT_RETURN(vec_RR, x); } + +inline vec_RR operator*(const vec_RR& a, double b) + { vec_RR x; mul(x, a, b); NTL_OPT_RETURN(vec_RR, x); } + +inline vec_RR operator*(const RR& a, const vec_RR& b) + { vec_RR x; mul(x, a, b); NTL_OPT_RETURN(vec_RR, x); } + +inline vec_RR operator*(double a, const vec_RR& b) + { vec_RR x; mul(x, a, b); NTL_OPT_RETURN(vec_RR, x); } + +RR operator*(const vec_RR& a, const vec_RR& b); + + +// assignment operator notation: + +inline vec_RR& operator+=(vec_RR& x, const vec_RR& a) +{ + add(x, x, a); + return x; +} + +inline vec_RR& operator-=(vec_RR& x, const vec_RR& a) +{ + sub(x, x, a); + return x; +} + +inline vec_RR& operator*=(vec_RR& x, const RR& a) +{ + mul(x, x, a); + return x; +} + +inline vec_RR& operator*=(vec_RR& x, double a) +{ + mul(x, x, a); + return x; +} + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_ZZ.h b/thirdparty/linux/ntl/include/NTL/vec_ZZ.h new file mode 100644 index 0000000000..447ada5291 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_ZZ.h @@ -0,0 +1,90 @@ + +#ifndef NTL_vec_ZZ__H +#define NTL_vec_ZZ__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec vec_ZZ; + +void mul(vec_ZZ& x, const vec_ZZ& a, const ZZ& b); +inline void mul(vec_ZZ& x, const ZZ& a, const vec_ZZ& b) + { mul(x, b, a); } + +void mul(vec_ZZ& x, const vec_ZZ& a, long b); +inline void mul(vec_ZZ& x, long a, const vec_ZZ& b) + { mul(x, b, a); } + +void add(vec_ZZ& x, const vec_ZZ& a, const vec_ZZ& b); + +void sub(vec_ZZ& x, const vec_ZZ& a, const vec_ZZ& b); +void clear(vec_ZZ& x); +void negate(vec_ZZ& x, const vec_ZZ& a); + + + + +void InnerProduct(ZZ& x, const vec_ZZ& a, const vec_ZZ& b); + +long IsZero(const vec_ZZ& a); + +vec_ZZ operator+(const vec_ZZ& a, const vec_ZZ& b); +vec_ZZ operator-(const vec_ZZ& a, const vec_ZZ& b); +vec_ZZ operator-(const vec_ZZ& a); + +inline vec_ZZ operator*(const vec_ZZ& a, const ZZ& b) + { vec_ZZ x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ, x); } + +inline vec_ZZ operator*(const vec_ZZ& a, long b) + { vec_ZZ x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ, x); } + +inline vec_ZZ operator*(const ZZ& a, const vec_ZZ& b) + { vec_ZZ x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ, x); } + +inline vec_ZZ operator*(long a, const vec_ZZ& b) + { vec_ZZ x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ, x); } + + +ZZ operator*(const vec_ZZ& a, const vec_ZZ& b); + + + + + +// assignment operator notation: + +inline vec_ZZ& operator+=(vec_ZZ& x, const vec_ZZ& a) +{ + add(x, x, a); + return x; +} + +inline vec_ZZ& operator-=(vec_ZZ& x, const vec_ZZ& a) +{ + sub(x, x, a); + return x; +} + +inline vec_ZZ& operator*=(vec_ZZ& x, const ZZ& a) +{ + mul(x, x, a); + return x; +} + +inline vec_ZZ& operator*=(vec_ZZ& x, long a) +{ + mul(x, x, a); + return x; +} + +void VectorCopy(vec_ZZ& x, const vec_ZZ& a, long n); +inline vec_ZZ VectorCopy(const vec_ZZ& a, long n) + { vec_ZZ x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_ZZ, x); } + + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_ZZVec.h b/thirdparty/linux/ntl/include/NTL/vec_ZZVec.h new file mode 100644 index 0000000000..47d32e5ff5 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_ZZVec.h @@ -0,0 +1,14 @@ + +#ifndef NTL_vec_ZZVec__H +#define NTL_vec_ZZVec__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec vec_ZZVec; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_ZZ_p.h b/thirdparty/linux/ntl/include/NTL/vec_ZZ_p.h new file mode 100644 index 0000000000..de678ed729 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_ZZ_p.h @@ -0,0 +1,97 @@ + +#ifndef NTL_vec_ZZ_p__H +#define NTL_vec_ZZ_p__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec vec_ZZ_p; + + +// legacy conversion notation +inline vec_ZZ_p to_vec_ZZ_p(const vec_ZZ& a) + { vec_ZZ_p x; conv(x, a); NTL_OPT_RETURN(vec_ZZ_p, x); } +inline vec_ZZ to_vec_ZZ(const vec_ZZ_p& a) + { vec_ZZ x; conv(x, a); NTL_OPT_RETURN(vec_ZZ, x); } + + +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, const ZZ_p& b); +inline void mul(vec_ZZ_p& x, const ZZ_p& a, const vec_ZZ_p& b) + { mul(x, b, a); } + +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, long b); +inline void mul(vec_ZZ_p& x, long a, const vec_ZZ_p& b) + { mul(x, b, a); } + +void add(vec_ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b); + +void sub(vec_ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b); +void clear(vec_ZZ_p& x); +void negate(vec_ZZ_p& x, const vec_ZZ_p& a); + + + + +void InnerProduct(ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b); +void InnerProduct(ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b, + long offset); + +long IsZero(const vec_ZZ_p& a); + +void VectorCopy(vec_ZZ_p& x, const vec_ZZ_p& a, long n); +inline vec_ZZ_p VectorCopy(const vec_ZZ_p& a, long n) + { vec_ZZ_p x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_ZZ_p, x); } + +vec_ZZ_p operator+(const vec_ZZ_p& a, const vec_ZZ_p& b); +vec_ZZ_p operator-(const vec_ZZ_p& a, const vec_ZZ_p& b); +vec_ZZ_p operator-(const vec_ZZ_p& a); + +inline vec_ZZ_p operator*(const vec_ZZ_p& a, const ZZ_p& b) + { vec_ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_p, x); } + +inline vec_ZZ_p operator*(const vec_ZZ_p& a, long b) + { vec_ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_p, x); } + +inline vec_ZZ_p operator*(const ZZ_p& a, const vec_ZZ_p& b) + { vec_ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_p, x); } + +inline vec_ZZ_p operator*(long a, const vec_ZZ_p& b) + { vec_ZZ_p x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_p, x); } + +ZZ_p operator*(const vec_ZZ_p& a, const vec_ZZ_p& b); + + + +// assignment operator notation: + +inline vec_ZZ_p& operator+=(vec_ZZ_p& x, const vec_ZZ_p& a) +{ + add(x, x, a); + return x; +} + +inline vec_ZZ_p& operator-=(vec_ZZ_p& x, const vec_ZZ_p& a) +{ + sub(x, x, a); + return x; +} + +inline vec_ZZ_p& operator*=(vec_ZZ_p& x, const ZZ_p& a) +{ + mul(x, x, a); + return x; +} + +inline vec_ZZ_p& operator*=(vec_ZZ_p& x, long a) +{ + mul(x, x, a); + return x; +} + + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_ZZ_pE.h b/thirdparty/linux/ntl/include/NTL/vec_ZZ_pE.h new file mode 100644 index 0000000000..9770be9637 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_ZZ_pE.h @@ -0,0 +1,107 @@ + +#ifndef NTL_vec_ZZ_pE__H +#define NTL_vec_ZZ_pE__H + +#include + +NTL_OPEN_NNS + +typedef Vec vec_ZZ_pE; + + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_pE& b); +inline void mul(vec_ZZ_pE& x, const ZZ_pE& a, const vec_ZZ_pE& b) + { mul(x, b, a); } + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_p& b); +inline void mul(vec_ZZ_pE& x, const ZZ_p& a, const vec_ZZ_pE& b) + { mul(x, b, a); } + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, long b); +inline void mul(vec_ZZ_pE& x, long a, const vec_ZZ_pE& b) + { mul(x, b, a); } + +void add(vec_ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b); +void sub(vec_ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b); + +void negate(vec_ZZ_pE& x, const vec_ZZ_pE& a); + +void clear(vec_ZZ_pE& x); + + +void InnerProduct(ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b); +void InnerProduct(ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b, + long offset); + + +long IsZero(const vec_ZZ_pE& a); + +void VectorCopy(vec_ZZ_pE& x, const vec_ZZ_pE& a, long n); +inline vec_ZZ_pE VectorCopy(const vec_ZZ_pE& a, long n) + { vec_ZZ_pE x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_ZZ_pE, x); } + + + +vec_ZZ_pE operator+(const vec_ZZ_pE& a, const vec_ZZ_pE& b); +vec_ZZ_pE operator-(const vec_ZZ_pE& a, const vec_ZZ_pE& b); +vec_ZZ_pE operator-(const vec_ZZ_pE& a); + +inline vec_ZZ_pE operator*(const vec_ZZ_pE& a, const ZZ_pE& b) + { vec_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_pE, x); } + +inline vec_ZZ_pE operator*(const vec_ZZ_pE& a, const ZZ_p& b) + { vec_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_pE, x); } + +inline vec_ZZ_pE operator*(const vec_ZZ_pE& a, long b) + { vec_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_pE, x); } + +inline vec_ZZ_pE operator*(const ZZ_pE& a, const vec_ZZ_pE& b) + { vec_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_pE, x); } + +inline vec_ZZ_pE operator*(const ZZ_p& a, const vec_ZZ_pE& b) + { vec_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_pE, x); } + +inline vec_ZZ_pE operator*(long a, const vec_ZZ_pE& b) + { vec_ZZ_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_ZZ_pE, x); } + + + +ZZ_pE operator*(const vec_ZZ_pE& a, const vec_ZZ_pE& b); + + + +// assignment operator notation: + +inline vec_ZZ_pE& operator+=(vec_ZZ_pE& x, const vec_ZZ_pE& a) +{ + add(x, x, a); + return x; +} + +inline vec_ZZ_pE& operator-=(vec_ZZ_pE& x, const vec_ZZ_pE& a) +{ + sub(x, x, a); + return x; +} + +inline vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const ZZ_pE& a) +{ + mul(x, x, a); + return x; +} + +inline vec_ZZ_pE& operator*=(vec_ZZ_pE& x, const ZZ_p& a) +{ + mul(x, x, a); + return x; +} + +inline vec_ZZ_pE& operator*=(vec_ZZ_pE& x, long a) +{ + mul(x, x, a); + return x; +} + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_double.h b/thirdparty/linux/ntl/include/NTL/vec_double.h new file mode 100644 index 0000000000..a53f4154b8 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_double.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_double__H +#define NTL_vec_double__H + +#include + +NTL_OPEN_NNS + +typedef Vec vec_double; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_long.h b/thirdparty/linux/ntl/include/NTL/vec_long.h new file mode 100644 index 0000000000..2180da6688 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_long.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_long__H +#define NTL_vec_long__H + +#include + +NTL_OPEN_NNS + +typedef Vec vec_long; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_lzz_p.h b/thirdparty/linux/ntl/include/NTL/vec_lzz_p.h new file mode 100644 index 0000000000..f6f8b7d9c8 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_lzz_p.h @@ -0,0 +1,107 @@ + +#ifndef NTL_vec_zz_p__H +#define NTL_vec_zz_p__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec vec_zz_p; + + +// legacy conversion notation +inline vec_zz_p to_vec_zz_p(const vec_ZZ& a) + { vec_zz_p x; conv(x, a); NTL_OPT_RETURN(vec_zz_p, x); } +inline vec_ZZ to_vec_ZZ(const vec_zz_p& a) + { vec_ZZ x; conv(x, a); NTL_OPT_RETURN(vec_ZZ, x); } + + + +long CRT(vec_ZZ& g, ZZ& a, const vec_zz_p& G); + +void add(vec_zz_p& x, const vec_zz_p& a, const vec_zz_p& b); + +void sub(vec_zz_p& x, const vec_zz_p& a, const vec_zz_p& b); +void clear(vec_zz_p& x); +void negate(vec_zz_p& x, const vec_zz_p& a); + + + +void InnerProduct(zz_p& x, const vec_zz_p& a, const vec_zz_p& b); +void InnerProduct(zz_p& x, const vec_zz_p& a, const vec_zz_p& b, + long offset); + + + +void mul(vec_zz_p& x, const vec_zz_p& a, zz_p b); +inline void mul(vec_zz_p& x, zz_p a, const vec_zz_p& b) + { mul(x, b, a); } + +void mul(vec_zz_p& x, const vec_zz_p& a, long b); +inline void mul(vec_zz_p& x, long a, const vec_zz_p& b) + { mul(x, b, a); } + + +long IsZero(const vec_zz_p& a); + +void VectorCopy(vec_zz_p& x, const vec_zz_p& a, long n); +inline vec_zz_p VectorCopy(const vec_zz_p& a, long n) + { vec_zz_p x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_zz_p, x); } + + + +vec_zz_p operator+(const vec_zz_p& a, const vec_zz_p& b); +vec_zz_p operator-(const vec_zz_p& a, const vec_zz_p& b); +vec_zz_p operator-(const vec_zz_p& a); +zz_p operator*(const vec_zz_p& a, const vec_zz_p& b); + +inline vec_zz_p operator*(const vec_zz_p& a, zz_p b) + { vec_zz_p x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_p, x); } + +inline vec_zz_p operator*(const vec_zz_p& a, long b) + { vec_zz_p x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_p, x); } + +inline vec_zz_p operator*(zz_p a, const vec_zz_p& b) + { vec_zz_p x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_p, x); } + +inline vec_zz_p operator*(long a, const vec_zz_p& b) + { vec_zz_p x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_p, x); } + + + + + + + +// assignment operator notation: + +inline vec_zz_p& operator+=(vec_zz_p& x, const vec_zz_p& a) +{ + add(x, x, a); + return x; +} + +inline vec_zz_p& operator-=(vec_zz_p& x, const vec_zz_p& a) +{ + sub(x, x, a); + return x; +} + +inline vec_zz_p& operator*=(vec_zz_p& x, zz_p a) +{ + mul(x, x, a); + return x; +} + +inline vec_zz_p& operator*=(vec_zz_p& x, long a) +{ + mul(x, x, a); + return x; +} + + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_lzz_pE.h b/thirdparty/linux/ntl/include/NTL/vec_lzz_pE.h new file mode 100644 index 0000000000..632224444c --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_lzz_pE.h @@ -0,0 +1,107 @@ + +#ifndef NTL_vec_zz_pE__H +#define NTL_vec_zz_pE__H + +#include + +NTL_OPEN_NNS + + +typedef Vec vec_zz_pE; + +void mul(vec_zz_pE& x, const vec_zz_pE& a, const zz_pE& b); +inline void mul(vec_zz_pE& x, const zz_pE& a, const vec_zz_pE& b) + { mul(x, b, a); } + +void mul(vec_zz_pE& x, const vec_zz_pE& a, const zz_p& b); +inline void mul(vec_zz_pE& x, const zz_p& a, const vec_zz_pE& b) + { mul(x, b, a); } + +void mul(vec_zz_pE& x, const vec_zz_pE& a, long b); +inline void mul(vec_zz_pE& x, long a, const vec_zz_pE& b) + { mul(x, b, a); } + +void add(vec_zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b); +void sub(vec_zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b); + +void negate(vec_zz_pE& x, const vec_zz_pE& a); + +void clear(vec_zz_pE& x); + + +void InnerProduct(zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b); +void InnerProduct(zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b, + long offset); + + +long IsZero(const vec_zz_pE& a); + +void VectorCopy(vec_zz_pE& x, const vec_zz_pE& a, long n); +inline vec_zz_pE VectorCopy(const vec_zz_pE& a, long n) + { vec_zz_pE x; VectorCopy(x, a, n); NTL_OPT_RETURN(vec_zz_pE, x); } + + + +vec_zz_pE operator+(const vec_zz_pE& a, const vec_zz_pE& b); +vec_zz_pE operator-(const vec_zz_pE& a, const vec_zz_pE& b); +vec_zz_pE operator-(const vec_zz_pE& a); + +inline vec_zz_pE operator*(const vec_zz_pE& a, const zz_pE& b) + { vec_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_pE, x); } + +inline vec_zz_pE operator*(const vec_zz_pE& a, const zz_p& b) + { vec_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_pE, x); } + +inline vec_zz_pE operator*(const vec_zz_pE& a, long b) + { vec_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_pE, x); } + +inline vec_zz_pE operator*(const zz_pE& a, const vec_zz_pE& b) + { vec_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_pE, x); } + +inline vec_zz_pE operator*(const zz_p& a, const vec_zz_pE& b) + { vec_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_pE, x); } + +inline vec_zz_pE operator*(long a, const vec_zz_pE& b) + { vec_zz_pE x; mul(x, a, b); NTL_OPT_RETURN(vec_zz_pE, x); } + + + +zz_pE operator*(const vec_zz_pE& a, const vec_zz_pE& b); + + + +// assignment operator notation: + +inline vec_zz_pE& operator+=(vec_zz_pE& x, const vec_zz_pE& a) +{ + add(x, x, a); + return x; +} + +inline vec_zz_pE& operator-=(vec_zz_pE& x, const vec_zz_pE& a) +{ + sub(x, x, a); + return x; +} + +inline vec_zz_pE& operator*=(vec_zz_pE& x, const zz_pE& a) +{ + mul(x, x, a); + return x; +} + +inline vec_zz_pE& operator*=(vec_zz_pE& x, const zz_p& a) +{ + mul(x, x, a); + return x; +} + +inline vec_zz_pE& operator*=(vec_zz_pE& x, long a) +{ + mul(x, x, a); + return x; +} + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_quad_float.h b/thirdparty/linux/ntl/include/NTL/vec_quad_float.h new file mode 100644 index 0000000000..76c48770ab --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_quad_float.h @@ -0,0 +1,14 @@ + +#ifndef NTL_vec_quad_float__H +#define NTL_vec_quad_float__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec vec_quad_float; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_ulong.h b/thirdparty/linux/ntl/include/NTL/vec_ulong.h new file mode 100644 index 0000000000..9cde5f30bd --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_ulong.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_ulong__H +#define NTL_vec_ulong__H + +#include + +NTL_OPEN_NNS + +typedef Vec vec_ulong; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_GF2.h b/thirdparty/linux/ntl/include/NTL/vec_vec_GF2.h new file mode 100644 index 0000000000..6d9e81fb62 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_GF2.h @@ -0,0 +1,14 @@ + +#ifndef NTL_vec_vec_GF2__H +#define NTL_vec_vec_GF2__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_GF2; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_GF2E.h b/thirdparty/linux/ntl/include/NTL/vec_vec_GF2E.h new file mode 100644 index 0000000000..e9e38018d4 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_GF2E.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_vec_GF2E__H +#define NTL_vec_vec_GF2E__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_GF2E; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_RR.h b/thirdparty/linux/ntl/include/NTL/vec_vec_RR.h new file mode 100644 index 0000000000..8b8472e978 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_RR.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_vec_RR__H +#define NTL_vec_vec_RR__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_RR; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ.h b/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ.h new file mode 100644 index 0000000000..9d28f3c05f --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_vec_ZZ__H +#define NTL_vec_vec_ZZ__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_ZZ; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ_p.h b/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ_p.h new file mode 100644 index 0000000000..32d8d79783 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ_p.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_vec_ZZ_p__H +#define NTL_vec_vec_ZZ_p__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_ZZ_p; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ_pE.h b/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ_pE.h new file mode 100644 index 0000000000..850325fcc2 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_ZZ_pE.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_vec_ZZ_pE__H +#define NTL_vec_vec_ZZ_pE__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_ZZ_pE; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_long.h b/thirdparty/linux/ntl/include/NTL/vec_vec_long.h new file mode 100644 index 0000000000..02525c8b9d --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_long.h @@ -0,0 +1,14 @@ + +#ifndef NTL_vec_vec_long__H +#define NTL_vec_vec_long__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_long; + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_lzz_p.h b/thirdparty/linux/ntl/include/NTL/vec_vec_lzz_p.h new file mode 100644 index 0000000000..3191608441 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_lzz_p.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_vec_zz_p__H +#define NTL_vec_vec_zz_p__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_zz_p; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_lzz_pE.h b/thirdparty/linux/ntl/include/NTL/vec_vec_lzz_pE.h new file mode 100644 index 0000000000..4818de08b5 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_lzz_pE.h @@ -0,0 +1,13 @@ + +#ifndef NTL_vec_vec_zz_pE__H +#define NTL_vec_vec_zz_pE__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_zz_pE; + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_vec_ulong.h b/thirdparty/linux/ntl/include/NTL/vec_vec_ulong.h new file mode 100644 index 0000000000..041be32bf8 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_vec_ulong.h @@ -0,0 +1,14 @@ + +#ifndef NTL_vec_vec_ulong__H +#define NTL_vec_vec_ulong__H + +#include + +NTL_OPEN_NNS + +typedef Vec< Vec > vec_vec_ulong; + +NTL_CLOSE_NNS + + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vec_xdouble.h b/thirdparty/linux/ntl/include/NTL/vec_xdouble.h new file mode 100644 index 0000000000..345f9505e4 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vec_xdouble.h @@ -0,0 +1,15 @@ + +#ifndef NTL_vec_xdouble__H +#define NTL_vec_xdouble__H + +#include +#include + +NTL_OPEN_NNS + +typedef Vec vec_xdouble; + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/include/NTL/vector.h b/thirdparty/linux/ntl/include/NTL/vector.h new file mode 100644 index 0000000000..228ba332d3 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/vector.h @@ -0,0 +1,808 @@ + +#ifndef NTL_vector__H +#define NTL_vector__H + +#include +#include + +struct _ntl_VectorHeader { + long length; + long alloc; + long init; + long fixed; +}; + +union _ntl_AlignedVectorHeader { + _ntl_VectorHeader h; + double x1; + long x2; + char *x3; + long double x4; +}; + +#define NTL_VECTOR_HEADER_SIZE (sizeof(_ntl_AlignedVectorHeader)) + +#define NTL_VEC_HEAD(p) (& (((_ntl_AlignedVectorHeader *) (p.rep))[-1].h)) + + +#ifndef NTL_RANGE_CHECK +#define NTL_RANGE_CHECK_CODE(i) +#else +#define NTL_RANGE_CHECK_CODE(i) if ((i) < 0 || !_vec__rep || (i) >= NTL_VEC_HEAD(_vec__rep)->length) LogicError("index out of range in Vec"); + +#endif + +// vectors are allocated in chunks of this size + +#ifndef NTL_VectorMinAlloc +#define NTL_VectorMinAlloc (4) +#endif + +// vectors are always expanded by at least this ratio + +#ifndef NTL_VectorExpansionRatio +#define NTL_VectorExpansionRatio (1.2) +#endif + +// controls initialization during input + +#ifndef NTL_VectorInputBlock +#define NTL_VectorInputBlock 50 +#endif + + +NTL_OPEN_NNS + + +template +void BlockDestroy(T* p, long n) +{ + for (long i = 0; i < n; i++) + p[i].~T(); + + // NOTE: this routine is only invoked through a Vec destructor + // or a scope guard destructor, both of which are noexcept destructors. + // therefore, if ~T() should throw, the program will terminate +} + + +template +void BlockConstruct(T* p, long n) +{ + long i; + + NTL_SCOPE(guard) { BlockDestroy(p, i); }; + + for (i = 0; i < n; i++) + (void) new(&p[i]) T; + + guard.relax(); + + // NOTE: we invoke T rather than T(), which would ensure + // POD types get zeroed out, but only in compilers that + // comply with C++03, which does not include MS compilers. + // So we just use T, which is less expensive, and it is better + // not to assume POD types get initialized. + +} + +template +void BlockConstructFromVec(T* p, long n, const T* q) +{ + long i; + + NTL_SCOPE(guard) { BlockDestroy(p, i); }; + + for (i = 0; i < n; i++) + (void) new(&p[i]) T(q[i]); + + guard.relax(); +} + +template +void BlockConstructFromObj(T* p, long n, const T& q) +{ + long i; + + NTL_SCOPE(guard) { BlockDestroy(p, i); }; + + for (i = 0; i < n; i++) + (void) new(&p[i]) T(q); + + guard.relax(); +} + + + +template +class Vec { +public: + + class _vec_deleter { + public: + static void apply(T*& p) { + if (p) { + NTL_SNS free(((char *) p) - sizeof(_ntl_AlignedVectorHeader)); + p = 0; + } + } + }; + + WrappedPtr _vec__rep; + + + Vec() { } + + Vec(INIT_SIZE_TYPE, long n) { SetLength(n); } + Vec(INIT_SIZE_TYPE, long n, const T& a) { SetLength(n, a); } + Vec(const Vec& a) { *this = a; } + + Vec& operator=(const Vec& a); + ~Vec(); + void kill(); + + void SetMaxLength(long n); + void FixLength(long n); + void FixAtCurrentLength(); + void QuickSetLength(long n) { NTL_VEC_HEAD(_vec__rep)->length = n; } + + void SetLength(long n) { + if (_vec__rep && !NTL_VEC_HEAD(_vec__rep)->fixed && + n >= 0 && n <= NTL_VEC_HEAD(_vec__rep)->init) + NTL_VEC_HEAD(_vec__rep)->length = n; + else + DoSetLength(n); + } + + void SetLength(long n, const T& a) { + if (_vec__rep && !NTL_VEC_HEAD(_vec__rep)->fixed && + n >= 0 && n <= NTL_VEC_HEAD(_vec__rep)->init) + NTL_VEC_HEAD(_vec__rep)->length = n; + else + DoSetLength(n, a); + } + + template + void SetLengthAndApply(long n, F f) { + if (_vec__rep && !NTL_VEC_HEAD(_vec__rep)->fixed && + n >= 0 && n <= NTL_VEC_HEAD(_vec__rep)->init) + NTL_VEC_HEAD(_vec__rep)->length = n; + else + DoSetLengthAndApply(n, f); + } + + + + long length() const + { return (!_vec__rep) ? 0 : NTL_VEC_HEAD(_vec__rep)->length; } + + long MaxLength() const + { return (!_vec__rep) ? 0 : NTL_VEC_HEAD(_vec__rep)->init; } + + long allocated() const + { return (!_vec__rep) ? 0 : NTL_VEC_HEAD(_vec__rep)->alloc; } + + long fixed() const + { return _vec__rep && NTL_VEC_HEAD(_vec__rep)->fixed; } + + T& operator[](long i) + { + NTL_RANGE_CHECK_CODE(i) + return _vec__rep[i]; + } + + const T& operator[](long i) const + { + NTL_RANGE_CHECK_CODE(i) + return _vec__rep[i]; + } + + T& RawGet(long i) + { + return _vec__rep[i]; + } + + const T& RawGet(long i) const + { + return _vec__rep[i]; + } + + T& operator()(long i) { return (*this)[i-1]; } + const T& operator()(long i) const { return (*this)[i-1]; } + + + const T* elts() const { return _vec__rep; } + T* elts() { return _vec__rep; } + + Vec(Vec& x, INIT_TRANS_TYPE) + { _vec__rep.swap(x._vec__rep); } + + long position(const T& a) const; + long position1(const T& a) const; + + void swap(Vec& y); + void append(const T& a); + void append(const Vec& w); + + +// Some compatibility with vec_GF2 + + const T& get(long i) const + { return (*this)[i]; } + + void put(long i, const T& a) + { (*this)[i] = a; } + + +// Some STL compatibility + + typedef T value_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef value_type *iterator; + typedef const value_type *const_iterator; + + const T* data() const { return elts(); } + T* data() { return elts(); } + + T* begin() { return elts(); } + const T* begin() const { return elts(); } + + T* end() { + if (elts()) + return elts() + length(); + else + return 0; + } + + const T* end() const { + if (elts()) + return elts() + length(); + else + return 0; + } + + T& at(long i) { + if ((i) < 0 || !_vec__rep || (i) >= NTL_VEC_HEAD(_vec__rep)->length) + LogicError("index out of range in Vec"); + return _vec__rep[i]; + } + + const T& at(long i) const { + if ((i) < 0 || !_vec__rep || (i) >= NTL_VEC_HEAD(_vec__rep)->length) + LogicError("index out of range in Vec"); + return _vec__rep[i]; + } + + class Watcher { + public: + Vec& watched; + explicit + Watcher(Vec& _watched) : watched(_watched) {} + + ~Watcher() + { + if (watched.MaxLength() > NTL_RELEASE_THRESH) watched.kill(); + } + }; + +private: + void DoSetLength(long n); + void DoSetLength(long n, const T& a); + + template + void DoSetLengthAndApply(long n, F& f); + + void AdjustLength(long n) { if (_vec__rep) NTL_VEC_HEAD(_vec__rep)->length = n; } + void AdjustAlloc(long n) { if (_vec__rep) NTL_VEC_HEAD(_vec__rep)->alloc = n; } + void AdjustMaxLength(long n) { if (_vec__rep) NTL_VEC_HEAD(_vec__rep)->init = n; } + + void AllocateTo(long n); // reserves space for n items + void Init(long n); // make sure first n entries are initialized + void Init(long n, const T* src); // same, but use src + void Init(long n, const T& src); // same, but use src + + template + void InitAndApply(long n, F& f); +}; + + + + +#if (!defined(NTL_CLEAN_PTR)) + +template +long Vec::position(const T& a) const +{ + if (!_vec__rep) return -1; + long num_alloc = NTL_VEC_HEAD(_vec__rep)->alloc; + long num_init = NTL_VEC_HEAD(_vec__rep)->init; + if (&a < _vec__rep || &a >= _vec__rep + num_alloc) return -1; + long res = (&a) - _vec__rep; + + if (res < 0 || res >= num_alloc || + _vec__rep + res != &a) return -1; + + if (res >= num_init) + LogicError("position: reference to uninitialized object"); + return res; +} + +template +long Vec::position1(const T& a) const +{ + if (!_vec__rep) return -1; + long len = NTL_VEC_HEAD(_vec__rep)->length; + if (&a < _vec__rep || &a >= _vec__rep + len) return -1; + long res = (&a) - _vec__rep; + + if (res < 0 || res >= len || + _vec__rep + res != &a) return -1; + + return res; +} + + +#else + +template +long Vec::position(const T& a) const +{ + if (!_vec__rep) return -1; + long num_alloc = NTL_VEC_HEAD(_vec__rep)->alloc; + long num_init = NTL_VEC_HEAD(_vec__rep)->init; + long res; + res = 0; + while (res < num_alloc && _vec__rep + res != &a) res++; + if (res >= num_alloc) return -1; + if (res >= num_init) + LogicError("position: reference to uninitialized object"); + return res; +} + +template +long Vec::position1(const T& a) const +{ + if (!_vec__rep) return -1; + long len = NTL_VEC_HEAD(_vec__rep)->length; + long res; + res = 0; + while (res < len && _vec__rep + res != &a) res++; + if (res >= len) return -1; + return res; +} + + +#endif + + +template +void Vec::AllocateTo(long n) +{ + long m; + + if (n < 0) { + LogicError("negative length in vector::SetLength"); + } + if (NTL_OVERFLOW(n, sizeof(T), 0)) + ResourceError("excessive length in vector::SetLength"); + + if (_vec__rep && NTL_VEC_HEAD(_vec__rep)->fixed) { + if (NTL_VEC_HEAD(_vec__rep)->length == n) + return; + else + LogicError("SetLength: can't change this vector's length"); + } + + if (n == 0) { + return; + } + + if (!_vec__rep) { + m = ((n+NTL_VectorMinAlloc-1)/NTL_VectorMinAlloc) * NTL_VectorMinAlloc; + char *p = (char *) NTL_SNS_MALLOC(m, sizeof(T), sizeof(_ntl_AlignedVectorHeader)); + if (!p) { + MemoryError(); + } + _vec__rep = (T *) (p + sizeof(_ntl_AlignedVectorHeader)); + + NTL_VEC_HEAD(_vec__rep)->length = 0; + NTL_VEC_HEAD(_vec__rep)->alloc = m; + NTL_VEC_HEAD(_vec__rep)->init = 0; + NTL_VEC_HEAD(_vec__rep)->fixed = 0; + } + else if (n > NTL_VEC_HEAD(_vec__rep)->alloc) { + m = max(n, long(NTL_VectorExpansionRatio*NTL_VEC_HEAD(_vec__rep)->alloc)); + m = ((m+NTL_VectorMinAlloc-1)/NTL_VectorMinAlloc) * NTL_VectorMinAlloc; + char *p = ((char *) _vec__rep.rep) - sizeof(_ntl_AlignedVectorHeader); + p = (char *) NTL_SNS_REALLOC(p, m, sizeof(T), sizeof(_ntl_AlignedVectorHeader)); + if (!p) { + MemoryError(); + } + _vec__rep = (T *) (p + sizeof(_ntl_AlignedVectorHeader)); + NTL_VEC_HEAD(_vec__rep)->alloc = m; + } +} + +template +void Vec::Init(long n) +{ + long num_init = MaxLength(); + if (n <= num_init) return; + + BlockConstruct(_vec__rep + num_init, n-num_init); + AdjustMaxLength(n); +} + +template +void Vec::Init(long n, const T *src) +{ + long num_init = MaxLength(); + if (n <= num_init) return; + + BlockConstructFromVec(_vec__rep + num_init, n-num_init, src); + AdjustMaxLength(n); + +} + +template +void Vec::Init(long n, const T& src) +{ + long num_init = MaxLength(); + if (n <= num_init) return; + + BlockConstructFromObj(_vec__rep + num_init, n-num_init, src); + AdjustMaxLength(n); +} + +template template +void Vec::InitAndApply(long n, F& f) +{ + long num_init = MaxLength(); + if (n <= num_init) return; + + BlockConstruct(_vec__rep + num_init, n-num_init); + + NTL_SCOPE(guard) { BlockDestroy(_vec__rep + num_init, n - num_init); }; + + long i; + for (i = num_init; i < n; i++) + f(_vec__rep[i]); + + guard.relax(); + + AdjustMaxLength(n); +} + +template +void Vec::DoSetLength(long n) +{ + AllocateTo(n); + Init(n); + AdjustLength(n); +} + +template +void Vec::DoSetLength(long n, const T& a) +{ + // if vector gets moved, we have to worry about + // a aliasing a vector element + const T *src = &a; + long pos = -1; + if (n >= allocated()) pos = position(a); + AllocateTo(n); + if (pos != -1) src = elts() + pos; + Init(n, *src); + AdjustLength(n); +} + +template template +void Vec::DoSetLengthAndApply(long n, F& f) +{ + AllocateTo(n); + InitAndApply(n, f); + AdjustLength(n); +} + + + + +template +void Vec::SetMaxLength(long n) +{ + long OldLength = length(); + SetLength(n); + SetLength(OldLength); +} + +template +void Vec::FixLength(long n) +{ + if (_vec__rep) LogicError("FixLength: can't fix this vector"); + if (n < 0) LogicError("FixLength: negative length"); + + NTL_SCOPE(guard) { _vec__rep.kill(); }; + + if (n > 0) + SetLength(n); + else { + char *p = (char *) NTL_SNS_MALLOC(0, sizeof(T), sizeof(_ntl_AlignedVectorHeader)); + if (!p) { + MemoryError(); + } + _vec__rep = (T *) (p + sizeof(_ntl_AlignedVectorHeader)); + + NTL_VEC_HEAD(_vec__rep)->length = 0; + NTL_VEC_HEAD(_vec__rep)->init = 0; + NTL_VEC_HEAD(_vec__rep)->alloc = 0; + } + NTL_VEC_HEAD(_vec__rep)->fixed = 1; + + guard.relax(); +} + +template +void Vec::FixAtCurrentLength() +{ + if (fixed()) return; + if (length() != MaxLength()) + LogicError("FixAtCurrentLength: can't fix this vector"); + + if (_vec__rep) + NTL_VEC_HEAD(_vec__rep)->fixed = 1; + else + FixLength(0); +} + +template +Vec& Vec::operator=(const Vec& a) +{ + if (this == &a) return *this; + + long init = MaxLength(); + long src_len = a.length(); + const T *src = a.elts(); + + AllocateTo(src_len); + T *dst = elts(); + + + if (src_len <= init) { + long i; + for (i = 0; i < src_len; i++) + dst[i] = src[i]; + } + else { + long i; + for (i = 0; i < init; i++) + dst[i] = src[i]; + Init(src_len, src+init); + } + + AdjustLength(src_len); + + return *this; +} + + +template +Vec::~Vec() +{ + if (!_vec__rep) return; + BlockDestroy(_vec__rep.rep, NTL_VEC_HEAD(_vec__rep)->init); +} + +template +void Vec::kill() +{ + Vec tmp; + this->swap(tmp); +} + + +template +void Vec::swap(Vec& y) +{ + long xf = fixed(); + long yf = y.fixed(); + if (xf != yf || + (xf && NTL_VEC_HEAD(_vec__rep)->length != NTL_VEC_HEAD(y._vec__rep)->length)) + LogicError("swap: can't swap these vectors"); + + _vec__rep.swap(y._vec__rep); +} + +template +void swap(Vec& x, Vec& y) +{ + x.swap(y); +} + +// EXCEPTIONS: provides strong ES +template +void Vec::append(const T& a) +{ + long len = length(); + long init = MaxLength(); + long src_len = 1; + + // if vector gets moved, we have to worry about + // a aliasing a vector element + const T *src = &a; + long pos = -1; + if (len >= allocated()) pos = position(a); + AllocateTo(len+src_len); + + // The logic here is copy-pasted from the append-vector + // logic...mostly + + long i; + T *dst = elts(); + if (pos != -1) src = dst + pos; + + if (len+src_len <= init) { + for (i = 0; i < src_len; i++) + dst[i+len] = src[i]; + } + else { + for (i = 0; i < init-len; i++) + dst[i+len] = src[i]; + + // make sure we use BlockConstructFromObj + Init(src_len+len, *src); + } + + AdjustLength(len+src_len); +} + +template +void append(Vec& v, const T& a) +{ + v.append(a); +} + +template +void Vec::append(const Vec& w) +{ + long len = length(); + long init = MaxLength(); + long src_len = w.length(); + + AllocateTo(len+src_len); + const T *src = w.elts(); + T *dst = elts(); + + if (len+src_len <= init) { + long i; + for (i = 0; i < src_len; i++) + dst[i+len] = src[i]; + } + else { + long i; + for (i = 0; i < init-len; i++) + dst[i+len] = src[i]; + Init(src_len+len, src+init-len); + } + + AdjustLength(len+src_len); +} + + +template +void append(Vec& v, const Vec& w) +{ + v.append(w); +} + + +template +NTL_SNS istream & operator>>(NTL_SNS istream& s, Vec& a) +{ + Vec ibuf; + long c; + long n; + if (!s) NTL_INPUT_ERROR(s, "bad vector input"); + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + if (c != '[') { + NTL_INPUT_ERROR(s, "bad vector input"); + } + + n = 0; + ibuf.SetLength(0); + + s.get(); + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + while (c != ']' && !IsEOFChar(c)) { + if (n % NTL_VectorInputBlock == 0) ibuf.SetMaxLength(n + NTL_VectorInputBlock); + n++; + ibuf.SetLength(n); + if (!(s >> ibuf[n-1])) NTL_INPUT_ERROR(s, "bad vector input"); + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + } + + if (IsEOFChar(c)) NTL_INPUT_ERROR(s, "bad vector input"); + s.get(); + + a = ibuf; + return s; +} + + +template +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const Vec& a) +{ + long i, n; + + n = a.length(); + + s << '['; + + for (i = 0; i < n; i++) { + s << a[i]; + if (i < n-1) s << " "; + } + + s << ']'; + + return s; +} + +template +long operator==(const Vec& a, const Vec& b) +{ + long n = a.length(); + if (b.length() != n) return 0; + const T* ap = a.elts(); + const T* bp = b.elts(); + long i; + for (i = 0; i < n; i++) if (ap[i] != bp[i]) return 0; + return 1; +} + + + +template +long operator!=(const Vec& a, const Vec& b) +{ return !(a == b); } + + + + +// conversions + +template +void conv(Vec& x, const Vec& a) +{ + long n = a.length(); + x.SetLength(n); + + for (long i = 0; i < n; i++) + conv(x[i], a[i]); +} + + +NTL_CLOSE_NNS + + + + + + + + + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/version.h b/thirdparty/linux/ntl/include/NTL/version.h new file mode 100644 index 0000000000..a04bc10c52 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/version.h @@ -0,0 +1,12 @@ + +#ifndef NTL_version__H +#define NTL_version__H + +#define NTL_VERSION "9.11.0" + +#define NTL_MAJOR_VERSION (9) +#define NTL_MINOR_VERSION (11) +#define NTL_REVISION (0) + +#endif + diff --git a/thirdparty/linux/ntl/include/NTL/wizard_log.h b/thirdparty/linux/ntl/include/NTL/wizard_log.h new file mode 100644 index 0000000000..502871e1e2 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/wizard_log.h @@ -0,0 +1,21 @@ + + +/*************************** +Basic Configuration Options: +NTL_THREADS +NTL_GMP_LIP + +Resolution of double-word types: +__int128_t +__uint128_t + +Performance Options: +NTL_SPMM_ASM +NTL_FFT_BIGTAB +NTL_FFT_LAZYMUL +NTL_TBL_REM +NTL_CRT_ALTCODE +NTL_PCLMUL +***************************/ + + diff --git a/thirdparty/linux/ntl/include/NTL/xdouble.h b/thirdparty/linux/ntl/include/NTL/xdouble.h new file mode 100644 index 0000000000..534d19b480 --- /dev/null +++ b/thirdparty/linux/ntl/include/NTL/xdouble.h @@ -0,0 +1,281 @@ +#ifndef NTL_xdouble__H +#define NTL_xdouble__H + +#include + +NTL_OPEN_NNS + +// NTL_XD_HBOUND = 2^{max(NTL_DOUBLE_PRECISION,NTL_BITS_PER_LONG)+4} + +#if (NTL_DOUBLE_PRECISION > NTL_BITS_PER_LONG) + +#define NTL_XD_HBOUND (NTL_FDOUBLE_PRECISION*32.0) +#define NTL_XD_HBOUND_LOG (NTL_DOUBLE_PRECISION+4) + +#else + +#define NTL_XD_HBOUND (double(1L << (NTL_BITS_PER_LONG - 2))*64.0) +#define NTL_XD_HBOUND_LOG (NTL_BITS_PER_LONG+4) + +#endif + +#define NTL_XD_HBOUND_INV (double(1)/NTL_XD_HBOUND) + +#define NTL_XD_BOUND (NTL_XD_HBOUND*NTL_XD_HBOUND) +#define NTL_XD_BOUND_INV (double(1)/NTL_XD_BOUND) + + +class xdouble { + +public: + +double x; +long e; + +xdouble() : x(0), e(0) { } +explicit xdouble(double a) : x(0), e(0) { *this = a; } + +inline xdouble& operator=(double a); + + +~xdouble() { } + +void normalize(); + +static +NTL_CHEAP_THREAD_LOCAL +long oprec; + +static void SetOutputPrecision(long p); +static long OutputPrecision() { return oprec; } + +double mantissa() const { return x; } +long exponent() const { return e; } + +xdouble(double xx, long ee) : x(xx), e(ee) { } // internal use only + +}; + +inline xdouble to_xdouble(int a) { return xdouble(a, 0); } +inline xdouble to_xdouble(long a) { return xdouble(a, 0); } +inline xdouble to_xdouble(unsigned int a) { return xdouble(a, 0); } +inline xdouble to_xdouble(unsigned long a) { return xdouble(a, 0); } + +xdouble to_xdouble(double a); +inline xdouble to_xdouble(float a) { return to_xdouble(double(a)); } +xdouble to_xdouble(const char *a); + + +inline xdouble& xdouble::operator=(double a) + { *this = to_xdouble(a); return *this; } + +xdouble operator+(const xdouble& a, const xdouble& b); +inline xdouble operator+(const xdouble& a, double b) + { return a + to_xdouble(b); } +inline xdouble operator+(double a, const xdouble& b) + { return to_xdouble(a) + b; } + +xdouble operator-(const xdouble& a, const xdouble& b); +inline xdouble operator-(const xdouble& a, double b) + { return a - to_xdouble(b); } +inline xdouble operator-(double a, const xdouble& b) + { return to_xdouble(a) - b; } + +xdouble operator*(const xdouble& a, const xdouble& b); +inline xdouble operator*(const xdouble& a, double b) + { return a * to_xdouble(b); } +inline xdouble operator*(double a, const xdouble& b) + { return to_xdouble(a) * b; } + +xdouble operator/(const xdouble& a, const xdouble& b); +inline xdouble operator/(const xdouble& a, double b) + { return a / to_xdouble(b); } +inline xdouble operator/(double a, const xdouble& b) + { return to_xdouble(a) / b; } + +xdouble operator-(const xdouble& a); + + + +inline xdouble& operator+=(xdouble& a, const xdouble& b) + { a = a + b; return a; } +inline xdouble& operator+=(xdouble& a, double b) + { a = a + b; return a; } + +inline xdouble& operator-=(xdouble& a, const xdouble& b) + { a = a - b; return a; } +inline xdouble& operator-=(xdouble& a, double b) + { a = a - b; return a; } + +inline xdouble& operator*=(xdouble& a, const xdouble& b) + { a = a * b; return a; } +inline xdouble& operator*=(xdouble& a, double b) + { a = a * b; return a; } + +inline xdouble& operator/=(xdouble& a, const xdouble& b) + { a = a / b; return a; } +inline xdouble& operator/=(xdouble& a, double b) + { a = a / b; return a; } + +inline xdouble& operator++(xdouble& a) { a = a + to_xdouble(1); return a; } +inline xdouble& operator--(xdouble& a) { a = a - to_xdouble(1); return a; } + +inline void operator++(xdouble& a, int) { a = a + to_xdouble(1); } +inline void operator--(xdouble& a, int) { a = a - to_xdouble(1); } + +long compare(const xdouble& a, const xdouble& b); +inline long compare(const xdouble& a, double b) + { return compare(a, to_xdouble(b)); } +inline long compare(double a, const xdouble& b) + { return compare(to_xdouble(a), b); } + +long sign(const xdouble& a); + +inline long operator==(const xdouble& a, const xdouble& b) + { return compare(a, b) == 0; } +inline long operator!=(const xdouble& a, const xdouble& b) + { return compare(a, b) != 0; } +inline long operator<=(const xdouble& a, const xdouble& b) + { return compare(a, b) <= 0; } +inline long operator>=(const xdouble& a, const xdouble& b) + { return compare(a, b) >= 0; } +inline long operator <(const xdouble& a, const xdouble& b) + { return compare(a, b) < 0; } +inline long operator >(const xdouble& a, const xdouble& b) + { return compare(a, b) > 0; } + +inline long operator==(const xdouble& a, double b) + { return compare(a, b) == 0; } +inline long operator!=(const xdouble& a, double b) + { return compare(a, b) != 0; } +inline long operator<=(const xdouble& a, double b) + { return compare(a, b) <= 0; } +inline long operator>=(const xdouble& a, double b) + { return compare(a, b) >= 0; } +inline long operator <(const xdouble& a, double b) + { return compare(a, b) < 0; } +inline long operator >(const xdouble& a, double b) + { return compare(a, b) > 0; } + +inline long operator==(double a, const xdouble& b) + { return compare(a, b) == 0; } +inline long operator!=(double a, const xdouble& b) + { return compare(a, b) != 0; } +inline long operator<=(double a, const xdouble& b) + { return compare(a, b) <= 0; } +inline long operator>=(double a, const xdouble& b) + { return compare(a, b) >= 0; } +inline long operator <(double a, const xdouble& b) + { return compare(a, b) < 0; } +inline long operator >(double a, const xdouble& b) + { return compare(a, b) > 0; } + + +void conv(ZZ& x, const xdouble& a); +// x = floor(a); + +inline ZZ to_ZZ(const xdouble& a) + { ZZ x; conv(x, a); NTL_OPT_RETURN(ZZ, x); } + + + +xdouble to_xdouble(const ZZ& a); +inline void conv(xdouble& z, const ZZ& a) + { z = to_xdouble(a); } + +void conv(double& x, const xdouble& a); +inline double to_double(const xdouble& a) + { double z; conv(z, a); return z; } + +inline void conv(float& x, const xdouble& a) + { double t; conv(t, a); x = float(t); } +inline float to_float(const xdouble& a) + { float z; conv(z, a); return z; } + + +inline void conv(long& x, const xdouble& a) + { double z; conv(z, a); x = long(NTL_SNS floor(z)); } +inline long to_long(const xdouble& a) + { long z; conv(z, a); return z; } + + + + +inline void conv(int& x, const xdouble& a) + { double z; conv(z, a); x = int(NTL_SNS floor(z)); } +inline int to_int(const xdouble& a) + { int z; conv(z, a); return z; } + +inline void conv(xdouble& x, const xdouble& a) + { x = a; } +inline xdouble to_xdouble(const xdouble& a) + { return a; } + + +inline void conv(xdouble& x, int a) { x = to_xdouble(a); } +inline void conv(xdouble& x, long a) { x = to_xdouble(a); } + +inline void conv(xdouble& x, unsigned int a) { x = to_xdouble(a); } +inline void conv(xdouble& x, unsigned long a) { x = to_xdouble(a); } + +inline void conv(xdouble& x, float a) { x = to_xdouble(a); } +inline void conv(xdouble& x, double a) { x = to_xdouble(a); } +inline void conv(xdouble& x, const char *a) { x = to_xdouble(a); } + + + +/* additional legacy conversions for v6 conversion regime */ + + +inline void conv(unsigned int& x, const xdouble& a) + { long z; conv(z, a); conv(x, z); } + +inline void conv(unsigned long& x, const xdouble& a) + { long z; conv(z, a); conv(x, z); } + +/* ------------------------------------- */ + +NTL_SNS ostream& operator<<(NTL_SNS ostream& s, const xdouble& a); + +NTL_SNS istream& operator>>(NTL_SNS istream& s, xdouble& x); + +xdouble trunc(const xdouble& a); +xdouble floor(const xdouble& a); +xdouble ceil(const xdouble& a); +xdouble fabs(const xdouble& a); +xdouble sqrt(const xdouble& a); + +void power(xdouble& z, const xdouble& a, const ZZ& e); +inline xdouble power(const xdouble& a, const ZZ& e) + { xdouble z; power(z, a, e); return z; } + +void power(xdouble& z, const xdouble& a, long e); +inline xdouble power(const xdouble& a, long e) + { xdouble z; power(z, a, e); return z; } + +void power2(xdouble& z, long e); +inline xdouble power2_xdouble(long e) + { xdouble z; power2(z, e); return z; } + + +void MulAdd(xdouble& z, const xdouble& a, const xdouble& b, const xdouble& c); +inline xdouble MulAdd(const xdouble& a, const xdouble& b, + const xdouble& c) + { xdouble z; MulAdd(z, a, b, c); return z; } + + +void MulSub(xdouble& z, const xdouble& a, const xdouble& b, const xdouble& c); +inline xdouble MulSub(const xdouble& a, const xdouble& b, + const xdouble& c) + { xdouble z; MulSub(z, a, b, c); return z; } + +double log(const xdouble& a); +xdouble xexp(double x); + + +inline xdouble exp(const xdouble& x) { return xexp(to_double(x)); } + + +NTL_CLOSE_NNS + +#endif diff --git a/thirdparty/linux/ntl/src/BasicThreadPool.c b/thirdparty/linux/ntl/src/BasicThreadPool.c new file mode 100644 index 0000000000..f87c979b7b --- /dev/null +++ b/thirdparty/linux/ntl/src/BasicThreadPool.c @@ -0,0 +1,32 @@ + +#include + +#ifdef NTL_THREAD_BOOST + +NTL_START_IMPL + + +NTL_TLS_GLOBAL_DECL(UniquePtr, NTLThreadPool_stg) + +NTL_CHEAP_THREAD_LOCAL BasicThreadPool *NTLThreadPool_ptr = 0; + +void ResetThreadPool(BasicThreadPool *pool) +{ + NTL_TLS_GLOBAL_ACCESS(NTLThreadPool_stg); + NTLThreadPool_stg.reset(pool); + NTLThreadPool_ptr = pool; +} + +BasicThreadPool *ReleaseThreadPool() +{ + NTL_TLS_GLOBAL_ACCESS(NTLThreadPool_stg); + BasicThreadPool *pool = NTLThreadPool_stg.release(); + NTLThreadPool_ptr = 0; + return pool; +} + + + +NTL_END_IMPL + +#endif diff --git a/thirdparty/linux/ntl/src/BerlekampTest.c b/thirdparty/linux/ntl/src/BerlekampTest.c new file mode 100644 index 0000000000..e65e33cb0e --- /dev/null +++ b/thirdparty/linux/ntl/src/BerlekampTest.c @@ -0,0 +1,81 @@ + +#include + +NTL_CLIENT + + +long compare(const ZZ_pX& a, const ZZ_pX& b) +{ + if (deg(a) < deg(b)) + return 0; + + if (deg(a) > deg(b)) + return 1; + + long n = a.rep.length(); + long i; + + for (i = 0; i < n; i++) { + if (rep(a.rep[i]) < rep(b.rep[i])) return 0; + if (rep(a.rep[i]) > rep(b.rep[i])) return 1; + } + + return 0; +} + +void sort(vec_pair_ZZ_pX_long& v) +{ + long n = v.length(); + long i, j; + + for (i = 0; i < n-1; i++) + for (j = 0; j < n-1-i; j++) + if (compare(v[j].a, v[j+1].a)) { + swap(v[j].a, v[j+1].a); + swap(v[j].b, v[j+1].b); + } +} + + +int main() +{ + ZZ p; + cin >> p; + ZZ_p::init(p); + ZZ_pX f; + cin >> f; + + vec_pair_ZZ_pX_long factors; + + double t = GetTime(); + berlekamp(factors, f, 1); + t = GetTime()-t; + cerr << "total time: " << t << "\n"; + + ZZ_pX ff; + + mul(ff, factors); + if (f != ff) + TerminalError("Incorrect factorization!!"); + + sort(factors); + + cerr << "factorization pattern:"; + long i; + + for (i = 0; i < factors.length(); i++) { + cerr << " "; + long k = factors[i].b; + if (k > 1) + cerr << k << "*"; + cerr << deg(factors[i].a); + } + + cerr << "\n"; + + + + cout << factors << "\n"; + + return 0; +} diff --git a/thirdparty/linux/ntl/src/BerlekampTestIn b/thirdparty/linux/ntl/src/BerlekampTestIn new file mode 100644 index 0000000000..25c359dcf5 --- /dev/null +++ b/thirdparty/linux/ntl/src/BerlekampTestIn @@ -0,0 +1,2 @@ +267257146016241686964920093290467696047 +[49837358131570447864817515087439013768 217790630152030295509630390043323452418 183007752636725657346015922031780246753 262113896575279912339769331241278344279 177558407200939953451023246629148421281 136929296553865076504681539657670073560 192415039389210093994081842103350165598 182557741530928548483637113503078591939 152312715846007778517225522567912229374 61070310616310966143036616435945529275 56882351798282916540663346247514573657 158686522480779859534523214608362876609 85210334586722150186842848782331419132 198362138483259282290069664523891695478 105962174491644106720737449885846770737 249033261315062656917484949663089856785 139280737583202876936726498752829998312 96203889545184151489078136782695324160 208694243930290456762944377328869542824 251198129775817736537493636046342370518 32491172002557837695014870030709955365 157057260026057739908648668261127013585 32158111277873531187646273497929686990 58023085641078573288671168992626772711 203352616803671819759251328642179414361 176832214306605225027954988995962853393 197751743759386172324631466360221889957 250675485398129152936547054273008945084 89802180792752320286820790825168446402 221927328217203245795479583872431482139 181867644367245155189096649430804155534 196573951588049184839338282394294640421 126494998583021765272120647086463689743 174657163006615797502644605616894054311 234672242556809736618320732618464101882 205348109319692361504021855702077801974 201480844443360756410435617763952548343 170821441145829211070253605577359112378 187298115367502536593622759425713052026 201353915199638771306766167529913744292 110056325175807519097151055510442724504 84674679789619373292653213867492283966 168108975031341360548972497299114799167 188785266690312084670451157982206333021 6919124922283999072938889546284835561 95172401022443537916304700401182513359 242656351122617045586289402040975850488 258917690731498966483992090617969273135 255172357998710172667686768952981848252 118175616249074875462815777656446629825 209369380640707258048170718089924020429 216746678547888281943927295868928862244 63169707116329105567261084639495637564 202333112949022656860592608187835800947 80435337513539361770944989701279792581 28559403271199376081807425371890436795 256861001114471080744832531110449193687 38657959213601984487225303100688777103 264329692965823772141881317913001435480 218475193420063453264724104126186468822 55233548185953383662963228863699064965 142847894615807036044593258566061501298 40776137890228920841990607606744773281 207391812963267976992797930441533817374 131427136606359893450088207506169771951 82054365866591169413120690376061966303 254634913083682101845032880368012778683 113001824664061224418685032691163656798 117833438072061069588705088985207648396 198178618302450682247100216569460586289 198381479150163198990715018490680028045 40108560518858431892450501419935817741 249439423887278332139791602557219952471 221966937421423130761062580294130357251 27490538843743647962500092623737267888 199032039511054771408483525115006232955 153374408545054543308974562742599189798 204441596188139879934847569435872573583 72511171850979613061537038035887332270 188055003891843700481760891626319067311 246942866306387063440530450313249487213 154432671448051407822509238106814497392 10269235724501453720265397111909271779 148857768429934113454368585111610769515 108297143766697999083231078069833743328 220532835954576708526395590637539965227 47023132625007027307413233669814369642 84333086497358423101905076861010297068 109923194851260709892302191788390844119 145532132551656120297402397944248638204 213830252095553517168726834522180278894 102749312273606281835206463742368919982 84543296032499431865178411848912028082 255842553711064592614706368471411701728 92792788881700092973313938762496041466 226246741492184645916217086335680308632 47398189106172181220565717959082348717 534841639268407919453412423169225929 227881612330362332482913814220965533258 20468583267376817360819899952174792253 144516395639654137625068920238408162625 23063430578963026866276768667282988723 78622647090193125396541826334521992292 264903205883946296353652606189457860631 73321233545830123884053316326108652224 224457856516455546437636685796583263125 89690091316250629374456315479480007830 187208222107636159370431203716381339929 171228087913280313688978034913595496145 65181480163363913025022929713156523521 260386206624679247021239706748428416618 233532508524755687017145839207222247623 104495928340502321945045270235830859361 142404893002257456916371093923595763470 70547706665310794838360176214164641180 250155930626141180499692089763382633396 173703133121986101552083136940445186732 101381348611023590273882716679386824878 52369949231407573392363550023930927779 164846348231869333212354656533341858054 167611977406913990206167373536253863095 44127887745906175987802 210066388901 458330 677 26 5 2 1] diff --git a/thirdparty/linux/ntl/src/BerlekampTestOut b/thirdparty/linux/ntl/src/BerlekampTestOut new file mode 100644 index 0000000000..4e9fbaa2e0 --- /dev/null +++ b/thirdparty/linux/ntl/src/BerlekampTestOut @@ -0,0 +1 @@ +[[[72398276494209883607569001664095082443 117364489934900288813772748912639262763 1] 1] [[194885164668670801306433413055322420208 46460887933725801021646400328802766319 1] 1] [[265214480819444708594123279944559159830 135555755881205794630508718609541287286 44692727376424019667265698780467837920 105348873456947058075098215336964742926 200420383267665008124992677631949344009 175037957226154573788077864523554954164 198784363762937430133582236807030005417 189197625224350476857712002941357479808 123176502074239231605912049355403842408 173219197680757673164794465537096242604 82502246015727196418180291241544180842 252148878141517772614419588889223095499 222861768237293427164610320331139899060 145279042477655847402483626711031219727 87910557872715526777045607382277034643 142072596850302561207634759593757462627 71205756108575268517401216273026274651 12651078513331907405242158198817330707 160266113454553114847517104170780094187 195464879027759626684158522704705329638 176940293918687704807235877854468104601 256684763930845904420213250872082486979 190679352948270926135966349300610845388 180925218261056184334641860647804951138 69890985699528053281337128635751875924 249647620923101369642976296912944075845 105942913538075089762334334130653298694 3898562296566255216819318123346284574 96036249681148056555888770829972641770 116954586547181158405632648906791331155 156714696811280091319378172855548989363 20102049324529059757649901773837738504 242046209169075775000997868731143868307 153110660874477601229072252534180746483 262645300558512784314570152020176734087 170644937290658478142860569172821089079 236324059282643408066541233152180268668 91420241446920934270222801792817823163 221895909267925472012615501139074144107 65585860673582603802310418767748147371 175284970274766593398967509698077212527 264577434882178626916502171362966809724 248619213343209942390691463132907556843 48694357302037897882508173061369838915 7590271812997530261785510569226703355 41579865942640367934632794995077059110 25903764683100553843924000316062727206 208277694495623391365783965784035058862 20574104178269334275866063132729651833 32390265141750340906945023778238315879 48391540618880119406049654232187869232 68678876632229582296123624388982684769 50350996346028914093248748742495458871 104801543658718378265450948780789553345 170541167825831826486226694019149131735 191712035005915681570453205945152435115 171132197785251136981628932361511268322 94274910720002633503096975918879251922 131705134092713179690105683659114095235 39198110458547272415690420119622383324 46934920344877609024617702381518055784 23165921134083299943549996062416170591 28459767746923491203355891737619983167 259003325998562294815931954475285084505 132573405345042766236588923626423134760 125044589325274435507575739483784144221 143842666056873673875855062404427652880 77045649566194835393229740528339656886 230721416549574802093826739521838241759 136984872197606244217262196765704845330 179963548013928826293965633036986606846 81093794490723279885994296550509929282 175340915677491500317276597749413950329 57047311937998596889239670071739911078 78345967891418982524389386889543351652 93400063263712556706184675445068663367 180200784179611435611501496080960686407 135119048356203726778728767376260501672 10486454602716462586191022987435392902 266180484471226163583776411401032555923 177922567296382887995292645607281764774 147354638941110748901071777613832077504 11386236491379506830728620963094661745 42647393401281707520209780845057964211 560012345672962108635101650391927614 135105021891377379180374693260873457591 224760786679278206405412871787773939009 73961499887977539989001000821829880530 201511135175463460025181871183378346942 115951205938373310071143721922524848428 200405446663062724609631010980787818513 4084499686464546462830526469012407101 152724466051124045433884928207554220950 126901627861321624285024729051799564271 242367754280050045324139831677280684141 213094306936169441482299557482247052572 85651902354263777822663401884585899399 107061500515903509923074464269206283499 221594000868361748687063067883562138255 23369088685973206706623693970501448229 225855286644190676337782370996698625468 80297213561030314459788121000041822216 104375640259389070400243588809323828300 189446580253479282905484486251777171917 13339974796188630005869457676998089978 241064276410560038338723364620921385192 76395515368032998743463293241788398517 188914664855694160488405094145717027307 29475305348626328925169323294949894329 118126738456573750249388337944191859619 221689202784680185314494482532119456449 94812282609491148726311465288993793033 152692588155831901280608072033125732355 225123885569700674110694161715400402535 116499445366526195693200909257658932928 62753483122144112754233240781236179858 15444805687749127948434404171151371996 14095553723150444214265207378160628689 47449972008672602110053765929544594603 222342338045680629345204692875855824856 213944253857122075734729482547141604817 37079316985276538985773977257059183254 256448445640796358952644860664830285753 103431768147615597129500944049025666967 1] 1]] diff --git a/thirdparty/linux/ntl/src/BitMatTest.c b/thirdparty/linux/ntl/src/BitMatTest.c new file mode 100644 index 0000000000..c47617be81 --- /dev/null +++ b/thirdparty/linux/ntl/src/BitMatTest.c @@ -0,0 +1,99 @@ + +#include +#include + +NTL_CLIENT + + +void random(mat_zz_p& X, long n, long m) +{ + X.SetDims(n, m); + long i, j; + + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + random(X[i][j]); +} + +void random(vec_zz_p& X, long n) +{ + X.SetLength(n); + long i; + + for (i = 0; i < n; i++) + random(X[i]); +} + +void cvt(mat_GF2& x, const mat_zz_p& a) +{ + long n = a.NumRows(); + long m = a.NumCols(); + + x.SetDims(n, m); + + long i, j; + + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + x.put(i, j, rep(a[i][j])); +} + + +void cvt(vec_GF2& x, const vec_zz_p& a) +{ + long n = a.length(); + + x.SetLength(n); + + long i; + + for (i = 0; i < n; i++) + x.put(i, rep(a[i])); +} + +int main() +{ + zz_p::init(2); + + long i; + + vec_GF2 v; + v.SetLength(5); + v[1] = 1; + v[0] = v[1]; + + if (v[0] != v[1]) TerminalError("BitMatTest not OK!!"); + + for (i=0; i < 8; i++) { + mat_zz_p a, x; + mat_GF2 A, X, X1; + + long n = RandomBnd(500) + 1; + long m = RandomBnd(500) + 1; + cerr << n << " " << m << "\n"; + + double t; + + random(a, n, m); + + t = GetTime(); + image(x, a); + t = GetTime() - t; cerr << t << "\n"; + + cvt(A, a); + + t = GetTime(); + image(X, A); + t = GetTime() - t; cerr << t << "\n"; + + cvt(X1, x); + + if (X1 != X) TerminalError("BitMatTest NOT OK!!"); + + cerr << "\n"; + } + + cerr << "BitMatTest OK\n"; + +} + diff --git a/thirdparty/linux/ntl/src/CanZassTest.c b/thirdparty/linux/ntl/src/CanZassTest.c new file mode 100644 index 0000000000..d90fef42ec --- /dev/null +++ b/thirdparty/linux/ntl/src/CanZassTest.c @@ -0,0 +1,80 @@ + +#include + +NTL_CLIENT + + +long compare(const ZZ_pX& a, const ZZ_pX& b) +{ + if (deg(a) < deg(b)) + return 0; + + if (deg(a) > deg(b)) + return 1; + + long n = a.rep.length(); + long i; + + for (i = 0; i < n; i++) { + if (rep(a.rep[i]) < rep(b.rep[i])) return 0; + if (rep(a.rep[i]) > rep(b.rep[i])) return 1; + } + + return 0; +} + +void sort(vec_pair_ZZ_pX_long& v) +{ + long n = v.length(); + long i, j; + + for (i = 0; i < n-1; i++) + for (j = 0; j < n-1-i; j++) + if (compare(v[j].a, v[j+1].a)) { + swap(v[j].a, v[j+1].a); + swap(v[j].b, v[j+1].b); + } +} + + +int main() +{ + ZZ p; + cin >> p; + ZZ_p::init(p); + ZZ_pX f; + cin >> f; + + vec_pair_ZZ_pX_long factors; + + double t = GetTime(); + CanZass(factors, f, 1); + t = GetTime()-t; + cerr << "total time: " << t << "\n"; + + ZZ_pX ff; + + mul(ff, factors); + if (f != ff) + TerminalError("Incorrect factorization!!"); + + sort(factors); + + cerr << "factorization pattern:"; + long i; + + for (i = 0; i < factors.length(); i++) { + cerr << " "; + long k = factors[i].b; + if (k > 1) + cerr << k << "*"; + cerr << deg(factors[i].a); + } + + cerr << "\n"; + + + cout << factors << "\n"; + + return 0; +} diff --git a/thirdparty/linux/ntl/src/CanZassTestIn b/thirdparty/linux/ntl/src/CanZassTestIn new file mode 100644 index 0000000000..25c359dcf5 --- /dev/null +++ b/thirdparty/linux/ntl/src/CanZassTestIn @@ -0,0 +1,2 @@ +267257146016241686964920093290467696047 +[49837358131570447864817515087439013768 217790630152030295509630390043323452418 183007752636725657346015922031780246753 262113896575279912339769331241278344279 177558407200939953451023246629148421281 136929296553865076504681539657670073560 192415039389210093994081842103350165598 182557741530928548483637113503078591939 152312715846007778517225522567912229374 61070310616310966143036616435945529275 56882351798282916540663346247514573657 158686522480779859534523214608362876609 85210334586722150186842848782331419132 198362138483259282290069664523891695478 105962174491644106720737449885846770737 249033261315062656917484949663089856785 139280737583202876936726498752829998312 96203889545184151489078136782695324160 208694243930290456762944377328869542824 251198129775817736537493636046342370518 32491172002557837695014870030709955365 157057260026057739908648668261127013585 32158111277873531187646273497929686990 58023085641078573288671168992626772711 203352616803671819759251328642179414361 176832214306605225027954988995962853393 197751743759386172324631466360221889957 250675485398129152936547054273008945084 89802180792752320286820790825168446402 221927328217203245795479583872431482139 181867644367245155189096649430804155534 196573951588049184839338282394294640421 126494998583021765272120647086463689743 174657163006615797502644605616894054311 234672242556809736618320732618464101882 205348109319692361504021855702077801974 201480844443360756410435617763952548343 170821441145829211070253605577359112378 187298115367502536593622759425713052026 201353915199638771306766167529913744292 110056325175807519097151055510442724504 84674679789619373292653213867492283966 168108975031341360548972497299114799167 188785266690312084670451157982206333021 6919124922283999072938889546284835561 95172401022443537916304700401182513359 242656351122617045586289402040975850488 258917690731498966483992090617969273135 255172357998710172667686768952981848252 118175616249074875462815777656446629825 209369380640707258048170718089924020429 216746678547888281943927295868928862244 63169707116329105567261084639495637564 202333112949022656860592608187835800947 80435337513539361770944989701279792581 28559403271199376081807425371890436795 256861001114471080744832531110449193687 38657959213601984487225303100688777103 264329692965823772141881317913001435480 218475193420063453264724104126186468822 55233548185953383662963228863699064965 142847894615807036044593258566061501298 40776137890228920841990607606744773281 207391812963267976992797930441533817374 131427136606359893450088207506169771951 82054365866591169413120690376061966303 254634913083682101845032880368012778683 113001824664061224418685032691163656798 117833438072061069588705088985207648396 198178618302450682247100216569460586289 198381479150163198990715018490680028045 40108560518858431892450501419935817741 249439423887278332139791602557219952471 221966937421423130761062580294130357251 27490538843743647962500092623737267888 199032039511054771408483525115006232955 153374408545054543308974562742599189798 204441596188139879934847569435872573583 72511171850979613061537038035887332270 188055003891843700481760891626319067311 246942866306387063440530450313249487213 154432671448051407822509238106814497392 10269235724501453720265397111909271779 148857768429934113454368585111610769515 108297143766697999083231078069833743328 220532835954576708526395590637539965227 47023132625007027307413233669814369642 84333086497358423101905076861010297068 109923194851260709892302191788390844119 145532132551656120297402397944248638204 213830252095553517168726834522180278894 102749312273606281835206463742368919982 84543296032499431865178411848912028082 255842553711064592614706368471411701728 92792788881700092973313938762496041466 226246741492184645916217086335680308632 47398189106172181220565717959082348717 534841639268407919453412423169225929 227881612330362332482913814220965533258 20468583267376817360819899952174792253 144516395639654137625068920238408162625 23063430578963026866276768667282988723 78622647090193125396541826334521992292 264903205883946296353652606189457860631 73321233545830123884053316326108652224 224457856516455546437636685796583263125 89690091316250629374456315479480007830 187208222107636159370431203716381339929 171228087913280313688978034913595496145 65181480163363913025022929713156523521 260386206624679247021239706748428416618 233532508524755687017145839207222247623 104495928340502321945045270235830859361 142404893002257456916371093923595763470 70547706665310794838360176214164641180 250155930626141180499692089763382633396 173703133121986101552083136940445186732 101381348611023590273882716679386824878 52369949231407573392363550023930927779 164846348231869333212354656533341858054 167611977406913990206167373536253863095 44127887745906175987802 210066388901 458330 677 26 5 2 1] diff --git a/thirdparty/linux/ntl/src/CanZassTestOut b/thirdparty/linux/ntl/src/CanZassTestOut new file mode 100644 index 0000000000..4e9fbaa2e0 --- /dev/null +++ b/thirdparty/linux/ntl/src/CanZassTestOut @@ -0,0 +1 @@ +[[[72398276494209883607569001664095082443 117364489934900288813772748912639262763 1] 1] [[194885164668670801306433413055322420208 46460887933725801021646400328802766319 1] 1] [[265214480819444708594123279944559159830 135555755881205794630508718609541287286 44692727376424019667265698780467837920 105348873456947058075098215336964742926 200420383267665008124992677631949344009 175037957226154573788077864523554954164 198784363762937430133582236807030005417 189197625224350476857712002941357479808 123176502074239231605912049355403842408 173219197680757673164794465537096242604 82502246015727196418180291241544180842 252148878141517772614419588889223095499 222861768237293427164610320331139899060 145279042477655847402483626711031219727 87910557872715526777045607382277034643 142072596850302561207634759593757462627 71205756108575268517401216273026274651 12651078513331907405242158198817330707 160266113454553114847517104170780094187 195464879027759626684158522704705329638 176940293918687704807235877854468104601 256684763930845904420213250872082486979 190679352948270926135966349300610845388 180925218261056184334641860647804951138 69890985699528053281337128635751875924 249647620923101369642976296912944075845 105942913538075089762334334130653298694 3898562296566255216819318123346284574 96036249681148056555888770829972641770 116954586547181158405632648906791331155 156714696811280091319378172855548989363 20102049324529059757649901773837738504 242046209169075775000997868731143868307 153110660874477601229072252534180746483 262645300558512784314570152020176734087 170644937290658478142860569172821089079 236324059282643408066541233152180268668 91420241446920934270222801792817823163 221895909267925472012615501139074144107 65585860673582603802310418767748147371 175284970274766593398967509698077212527 264577434882178626916502171362966809724 248619213343209942390691463132907556843 48694357302037897882508173061369838915 7590271812997530261785510569226703355 41579865942640367934632794995077059110 25903764683100553843924000316062727206 208277694495623391365783965784035058862 20574104178269334275866063132729651833 32390265141750340906945023778238315879 48391540618880119406049654232187869232 68678876632229582296123624388982684769 50350996346028914093248748742495458871 104801543658718378265450948780789553345 170541167825831826486226694019149131735 191712035005915681570453205945152435115 171132197785251136981628932361511268322 94274910720002633503096975918879251922 131705134092713179690105683659114095235 39198110458547272415690420119622383324 46934920344877609024617702381518055784 23165921134083299943549996062416170591 28459767746923491203355891737619983167 259003325998562294815931954475285084505 132573405345042766236588923626423134760 125044589325274435507575739483784144221 143842666056873673875855062404427652880 77045649566194835393229740528339656886 230721416549574802093826739521838241759 136984872197606244217262196765704845330 179963548013928826293965633036986606846 81093794490723279885994296550509929282 175340915677491500317276597749413950329 57047311937998596889239670071739911078 78345967891418982524389386889543351652 93400063263712556706184675445068663367 180200784179611435611501496080960686407 135119048356203726778728767376260501672 10486454602716462586191022987435392902 266180484471226163583776411401032555923 177922567296382887995292645607281764774 147354638941110748901071777613832077504 11386236491379506830728620963094661745 42647393401281707520209780845057964211 560012345672962108635101650391927614 135105021891377379180374693260873457591 224760786679278206405412871787773939009 73961499887977539989001000821829880530 201511135175463460025181871183378346942 115951205938373310071143721922524848428 200405446663062724609631010980787818513 4084499686464546462830526469012407101 152724466051124045433884928207554220950 126901627861321624285024729051799564271 242367754280050045324139831677280684141 213094306936169441482299557482247052572 85651902354263777822663401884585899399 107061500515903509923074464269206283499 221594000868361748687063067883562138255 23369088685973206706623693970501448229 225855286644190676337782370996698625468 80297213561030314459788121000041822216 104375640259389070400243588809323828300 189446580253479282905484486251777171917 13339974796188630005869457676998089978 241064276410560038338723364620921385192 76395515368032998743463293241788398517 188914664855694160488405094145717027307 29475305348626328925169323294949894329 118126738456573750249388337944191859619 221689202784680185314494482532119456449 94812282609491148726311465288993793033 152692588155831901280608072033125732355 225123885569700674110694161715400402535 116499445366526195693200909257658932928 62753483122144112754233240781236179858 15444805687749127948434404171151371996 14095553723150444214265207378160628689 47449972008672602110053765929544594603 222342338045680629345204692875855824856 213944253857122075734729482547141604817 37079316985276538985773977257059183254 256448445640796358952644860664830285753 103431768147615597129500944049025666967 1] 1]] diff --git a/thirdparty/linux/ntl/src/CharPolyTest.c b/thirdparty/linux/ntl/src/CharPolyTest.c new file mode 100644 index 0000000000..88a2e651a5 --- /dev/null +++ b/thirdparty/linux/ntl/src/CharPolyTest.c @@ -0,0 +1,19 @@ + +#include + +NTL_CLIENT + +int main() +{ + ZZX a, f, g; + + cin >> a; + cin >> f; + + double t = GetTime();; + CharPolyMod(g, a, f); + cerr << GetTime()-t << "\n"; + + cout << g << "\n"; + return 0; +} diff --git a/thirdparty/linux/ntl/src/CharPolyTestIn b/thirdparty/linux/ntl/src/CharPolyTestIn new file mode 100644 index 0000000000..46bae0d2bc --- /dev/null +++ b/thirdparty/linux/ntl/src/CharPolyTestIn @@ -0,0 +1,2 @@ +[0394820 309072154 109014622 1426290 175222167 825171846 88106125 543013 726393158 718508011 224282105 670970394 84911920 856082243 51432313 356689413 487830678 743695336 929502 439479925 136123276 707988349 831373101 66679668 746727 540936511 114305132 82147118 380194841 79736700 862619239 676125089 510124380 35667584 474394435 720924125 724948 222793534 105330644 871564016 68412077 165794770 279048705 126777275 17627 174553324 851984080 873153773 495650905 188357106 289635478 760525082 90604 467949471 828574359 866977171 812328920 825671785 9758 640977503 35747401 791343 602111985 393648] +[560394820 309072154 109014622 281426290 175222167 825171846 88106125 595543013 726393158 718508011 224282105 670970394 784911920 856082243 51432313 356689413 487830678 743695336 630929502 439479925 136123276 707988349 831373101 66679668 182746727 540936511 114305132 82147118 380194841 79736700 862619239 676125089 510124380 35667584 474394435 720924125 338724948 222793534 105330644 871564016 68412077 165794770 279048705 126777275 73317627 174553324 851984080 873153773 495650905 188357106 289635478 760525082 63690604 467949471 828574359 866977171 812328920 825671785 368289758 640977503 35747401 791340303 602111985 393652348 1] diff --git a/thirdparty/linux/ntl/src/CharPolyTestOut b/thirdparty/linux/ntl/src/CharPolyTestOut new file mode 100644 index 0000000000..5e641e1700 --- /dev/null +++ b/thirdparty/linux/ntl/src/CharPolyTestOut @@ -0,0 +1 @@ +[190508694806967015998600611097132060049717118365016280475020799440937327406646942789463171342229692206347643711656823487561893705323611973115765000429375831263108569315225427694150247540246831153992507299992825439755817111538589609613023922678205180207300350411328634748456398347317769763113112532949991488988450573239123152557436015405152850660488623917857833818683001387193609953938476094742107938276333134954685460879041352353213523705745901290368321559046533980676310568693475940388053682248147969674444106923575773142801027102175343542579052985992474969967454917969163643940460819332006220647042696260091357235019028610323559379428816498125971469436220619879921223810882259943059156398587216631554149611899809727132305526391913730249737864692929996282208822321711506302712970006968799992644118563580707422065774467792905866650907789662026726695059010485690597774061864567960192738380217011200884878157520759920017356366290889818183668529815140973598202522753939144473600027992971354118778520924921276625945271355017006545810816893146727901309540633127638217572327583960024077147518348286178124206625881051474510793410494138927402297512333148160000000 5510370375206376125805735185537274993180811627757919519755287408102657543105514120792053265823597303123658199603190909569821147417926587591837948891619607615404159338429979309010981827777265483023527116617038416675735836718068782894703173068052932387089155053446282556998354357342954371353946291827882920830186259111039667082687772480659627704424961644441004621954251872383257999920305246693623623498482718299450630387944250424028179677314085650306231332277967061921960251729426061804857726399930059931907366410943776901370704889872998997777631140261117929980793431899765308093602311276420817885590340576896739344657734289639054912398163280271557909299828203587370332167866138589662124644838451869911270013194124015287901127671853661852442880293741916634440272968369900740132355610706928640899686635103468378085757505718584806093099415444501421862714784284070333433948596923713685684574641540702890150015865159237374688805375049432465077932740662557791642851660645069380471195882146186453681686664702908783838072092969407751326548494273380663918625448882464696110955357483705033360786105019870179686630400090123377232360917098071205836248722702336 76636285697827388747309150397897338703479382655435292390554714660407941511174746464243025647528622769737495338417763000019752933198203429069288208786893348291060629771936723001636718081554593978950514639343299849165764060314063003740717219445956162206615008145807100960233499022750960786803534436409840076666609990348072618173778799055867435553418661212328637044438515447861390096923942145452184550926103620714191992966658028523149438488617327736796164690205303611559851533348391220497893652311305086830636423146920281968022984997613739234215642701976964164397239787490393250734733620138901318869290768097670428843847479492870130488315826039236794818526738146932716621531032665547834525578389396835279873989042511968000540704297800489965929294381610645655637781300868741853833969598348341278121723340700947141766103901608611245230634594534138275745968582584616004535088835018595756746851357210313362093691247845116569117168912286575004344006357633865556011340608321685351516589956019986482228918514350139373609635120134296139491552111152715937110193382344029831207490105756020517515406659981844923071514299766843216132915342254701398327296 682967003344890835104695900793461228916913282023162349416019476885347047156759153830343120041870425125569604438946978617346346223802843129244409026989530580448709099715921025449151356852730096275479574615719078213208988161876991217023296146618565394073289073124848475354819462769506785090935269931460083937050792647715752262044025073338413934921518433111330878769086105978574924684859970370731689106411068075813771466717648400417984385910546076246317409340120988390231835922623872949359375373395547861053095973616176322399894174273765908103222264211747808396140882054612886289413912925004315543450775343461823364610689104041918562764276178075501439400697622053625550952618137968427279232226138077885052216249165524377571639876002893437253703273718596321975206629081443850727596346094491858960684321942142381936301394414333513107590064735382496422091136268562587050129410391248793338093640930328696656251923097684767440301807638687503824842067179628362144610969716679671925283856807610141684043053481248490183832573811408991949667380869760117396637297998369355376561865448872187244433527686282937515305111719666762008504477296919552 4396287874266350268417177805786841295493285845771288614320531722181450577440496198267540620122441281553812907400504909524660352843872107689903998501003247215720798299930569761579252919847263597017778263780200759304776124632551896764004200319773966307706700837585719362472911584420160829320074823113112187068424639830701017474825263597859572552083301841676783056997824812281207005222515987398155599961381284956593211460558976826936242990196571756335894460511674105502282700984315091337756595274747455563525695047962064436078517466962488134179515600566425940659015475067087631091550405699107099365563958969655433429595966567972539232512595888850734247448002102630529844063167928403354823170988961018692168827249173766875055077986870320850147504036659688993124424941419462548993704935179215624593583416622387732393558335009285828678194876074307762996487949549217902665276840258170080597450696368401354134918695158808862725965698566836610154549833757914692891695902811629791088761009326218529170961543557487512210617065814711103170567820807023070096232258273723831311098314998089425172517665091602134431378731762129472035179249 21975839272414395140361539785289148168737851601427458106777755540491334819163322484647244978641576412004122655167427242365378926150687855704244197490363755992292471167821947596597285923459298708017342763275484040196472357872457323212426003258309692000740218485926878522772170127883562943966756258953435551301344298636848316395708197448952847533805085919981476788000303763063749547614198262442705566581963433679537784550150274110413247581659422808793927285653952739466893622437618071127017592576972854442335113267274667707244651098414183394666701084263995550246669025925884806103308866095793683394213988768560031805699567081080363316032748368087823642161357731024813134467363964793281047599734182925994046660369640730321168593303294139003203933511829270132988039147532396132671420753632126315580576038889456326274419565551013232046578798578618873797907261992571005987755213987784888628036003990588206548067335221448237972961862770839037675891152426688093187411493332273636238909511476593797282376050324676361977699816044607823657737979943629751365692194020511409834121987811331317079116625509824147947161648994117662 90614572248167832815398901237692019368525145085533138287685444807567873204605246604398275364599214250950175235171627805701057597182975632869431126594165858643501966801330401701884178826293853784654266932084418159299910396757430868083130186649756193170672642494752192628065980129443480078681559872292550125041244266137297279861048796846169326664796602071039267580150457326682548667162241050100846792062281616565917920117176525496157042264079387008296747997513691112950993064961357912157542541361239544658084051215442219429376780912474018936426931476238924451921866695962822487292868699840533224902464369267076667572591038734279554702463167517642637118155796167154515207970256621250915178345320909915299152054879019436390822689493822916282165358794664029287244374521577173641509213361383304713851144993302120088461588930231691757002754227719102412898327883927285380172840945990247971689534806358922800882578508024879529511841393273052597976594405438110148788980182813122865554195573932864901904909286094707181763733741079778836930551780931164321209391340730494169796298921330443829194579510911833655576601377 328970398417249875437738673791810670683647177007669554639061977661129100259388636124853047280349180968995650521486117611261304774855877172118818503217976563637794662123745036086469964386605974339488969452314068670779496333555667260276057696519162720936386665699718862715372641593041230458364666891030299116596377056920968571888893465794425500091218841221526095522810974059764947932904726898749021756246707937422584947654173499250106585907320417442000646966083556296607788743563855449319383344414945991420349690655656572763836066709064607426306646832925531075519272224784344606184131358705975839652153843794680601129001228882668623998957850199335696114626372129092315495250157484480620506477204266405136218111512712589632928130561672876426318364373974128796119771175230813968549877506567434438125790847566327516518294857166955570644984229507908647723196006884389541854025643867101067179678490320518453668197131386782690931516841798901968731850277321174688014066857404664758976402868866557275703580522731564202058255775248697080239282936435654805413030583016332080024310102484631101698158062067490199 1127579186957279465567668413182166261397126766366319885142562182868033829423390009947792480046827709615996364865149656955177256175360655982981652377844755656703086872891547668751079735240226870806580504325683552641781613734598976127577230632147939492768120951488677479596772843739320350792156288111647654894320229252992971612171484642080125139314340876334784023575117675940955402037348660795174532129778527784638963525831806417148407590099717948311458840122301529476398574966897559165380798926543906149343707861656819996067396890665167537769521520650155177445335334404782275667914595436068084005414865156078561007411769443544001978569847632042119527015851991803584322700647043343678885335970712148283137252705043154445174772568964759581374098650514877175166730024379788369366185937637692589623967096975465827342905551004697669216024080226292024812174472626656605850289484314826519488201803952735942230029839337765567769407453010191965826318974242809590315145991767303399763633467299753793823335835788081811850780569472659902942011532595431085475913394407208321374000736048501106892346552196 3828041097219390857907082551067745839476775728326108881879204364051036739270222883898243519649048587642069576968281141303097991041018521880872948859074212972982480449636159997684984126299406495337515903672735412353899285441049046431315131720847467461635427950759940265793408225958463477923656798926652172053707784414886859466167575187298829948847616648829314671347189690358265482551663036686190713893382929400857167710163163729354824015596295132972609948785664123113633400305046225801228443620327395362882461723640091710316379011210504884717075728724641838344258079857344943803769697097560745957594234550289137121341427605700174589001506374718509179023501502389872884093440923152550892501817583961673278241805137409553957313416749430122047354932415388574412196293616806554972921398129524769926234010626224067096643334001194753251645415328660304032278529100608261378305772515847975694249308640869899779642831560727309583425352906457046981218535153118614520625245105811319924139353834363134046379556619709601106013471271498116343693680126950788286778727880803528173998527018709543211 12846134731688678874091515609227180027437690702746448213342751690753878562510042778660573488767873354769669317691187032722535030135303209764786747127868178272363478080136747815943752727880681013530676352679956461180005064025710332201101730891353638781103510027299392807972101297674408887762189593699894895525116998295886036859208784764517719293702188737006741979158231258249397710045725785608974164198068415408769382390899720139267750604996537552615497472442675629598714235521235200473357114765795449976837706509938246497769234249256837375183387716855184063918654748134789245470766987274600625684341809777905089658552539780640596719377160142093793100935591877956074240688187266602111532044043715224141229435638253334136063649115123041470933430553962665790743567847484538982628767295204492897880624550871686675533193752156303545638905826964767986887275149742469089443969949848597046642581030974447859327066316039701202403271834352137565019594158277845648681602629364580908320567325004601703035295876317848269027005261020499575466287921842640599487183076541840358945569793753 41046983192534522533335169525136765306600811060513966258767005515620968135643738818842371156102573321691144163061313175735289972587684800501673555777397097134195735202271088503178235858188882262649966998998275202228701215157055999496646719386034958002307593053418734615992027304115386834015123932695277057767984414997298159383099033467257898944764425338679791490906981961439153570377071417385946693210021092344133437630368644045821396458359968401212847680583808841391739885911779092086333396344701573836116255224700123293901585505971177624372765972351113536305140808608187662657753313060455900360496358254508194457882151721477763003798366482721585236424452141686305834178064765615407864281847488216470092971242615404371740176055518815045280121719735735059110329913104888812448649958627902271186994444227106172584304733521997451563549006785596680059304971930918971009039321422587031126996437792329230683473369797165351134164090806234697826881327259320681155748181192685414938903208953111115066829305023346231045250552876545675345461882235720979123913883979127988276 120369962019205430978689855404124372900212866215205222090358103895296148759231283753806402533034855572533338321434922099149748684298577825503701909946986150956281910695782020429702123379243351854028821131399252666599392432737132350839146686531554935267725091513381349351580172040351650497931963524146888590733767996958117308106252499763150598507713880560527810362266122786934877928749238115161454112628523538128520755120190021061256052736063390560151830042273317211547890404880184101664658895849948400163661998484100081456221309297572839442948890490150388950696172232131277679766687139834183139867099921122832197907908704608609633735919558382436288880044543333475800536136548586775667302671905517449576956106304601772228184771059609741256642232027139529888618405957065145938709716970881425409138086093981132615138744117012266121299183849581741411838861370187737459710513819502849521366609082597000809326957480876650619264281372978374209662648437206201048779834875669900827395300764841978960389991164340598542920725526398768753169985590889374335926101687132 317390868282092512891464159675406335576019420847475400634608645062826040654058734124506912818537056545680509611337511961726525747794642469756898737520592378000699701527041763678373366705001530040418950720376667599563076056319177069208904205784897373553098400914043787719673249113833234104384140216539976216768156998966282134713978459662799222559304232484057407495764809394124534082232804034009824219659054459163496283843498363231386225782200151257443027821289142021696055027740906917259641134142410147082043581149683419536913715176895243754905296910385301950887106149817372861377425734209581178617134139131298654961272698290194979185991461943128575413334064152617241077546842897682343096444192163319882020087557959980674925732863827537642715815108799502024302456279843243886904130287868777656227533467734880375316555864413697218999377364829108910069903340205165650328282705764500301481604737256903378796730728725738484527988676341816454744103887175989280705236795599534469956241089552933313845021568007821694409715810476520818750680515460794538701 747434245949570578181292635933035612072425856921900130057009004535428379144294535890529282308866690434029416449462106518118891815528994087749187724165985386180766935349891861990225450975205044879827016279791936496889423120682367069304178872753667685540685718210843403271559230208965801434195430782360915502656776511706673003940873844377653500618983242456154260750622937180099955007684065048805422469220861226201819945087982634678888815225200547750765376191063482272608383516266927895863769579892065256681776776572607399582887640357212125973987543818748772546033474374828025683421617731569870956841430428988314092633981559350133569702272073683016627343537428688065365050085741641035677907679956032228224919385238963916553657262288136056541558536871478861223374976391048721949942648127124373533717817331484205349704512261697486979839580483756001358055941617723657425578852084665416464178580793318854279381654927677862383599569513100273095147087971352329954986495290824735159974762698434222178803981416011999330180898763302215807446645423983 1573229387044305762810701712960729806419837957062370675243187252889663508622635279953927201928481514186795665445703580725453464896674494229451296953752915279032258877704866380340656463330520225616557456362548916196929799891892939772491827777453482562995342293736577079863986180605068401866298479779951383867420467993942838063897717911961821242956547279511050042672908226217000401726508158194050684966961607206726228989042465822012481387201661227837635268948627897540445353051617121609658117398950656618967708231227442021101460677583873814833561706174003930314667347819267409588490340674599298128875223335443745067282564160542254444421736070817445584293683064060596294818747936486825660644905498015073939646049793417694917491899903088730811553545967648558153695639996912855671927517014516169567623565359911684776740577788830188215200997801446988405027198937187633031620559718587346141186856087602712710548464323333040387186221593117079091042739689414270471754094931230742220271128440198059933290551778609649137642021409839835341117 2971907874675964671549636163311641138310784433581148857853989601197878243731153926191585920286210240130308181456314268777322887415707926752080313459518110385217099240288158920263313325284336482127032025866771153285721022236451893939337014509262983910116826065775258635967014804317466816241384669549178927805279837501266231783657632891911127980109553588901411240837224400758511748392930847870735759326294342860382213024404028808907082527919031793722454809035760904952040923541964548596397340536258852957923155006371208146939429957624594302691761080706402850196228052759672118684904575716923793865730777153497856557961915829536328784403195772125830995123543908831466409358326169617941315992792564523264795831787202945636171067425470986790499829208375227813497076628392374801256264013219847847492774280076711106157324861107587295447266262535339803381463965965567208942794446472646092479605887460884514525603020644973495711195391739803904863141835870139423068816249402847149849395676670827796577644471864843685875165512391560 5064909821165357427740898821451717695483108055661974648346952768086282713713977991223493769601282271687750969761106676324260369644937032721172647834053411318725230177794233414878578502338928635450773150648010274070822103872546340665820085840711611080101278821956202037290314619675514536838593067968292001669775958569667922188650775102898556223591120275182422470313223835900425198096918487898549701704968967220783964416498254761892847800809763841421585873530760927316572987006194345563486682545779693160483714677369544956000884966556507240737854220505337650937686902505200189197828056978701045101687081732223340117606938652988648936811684039194952857066228342412257685593198323699130618153473894239914572395221201296926905398262390985858080585938637932008742052196055693165012994572610160964714816378353854598776509896517550182301163413897772666255855095700964690826847498917941863456414161978632212826501333782135308398129733934965894915286694551486779552821180265732291733685086379205605191722998192110957403956 7828767009830700810388369063141949193420751739123318377350285208094444722839264691835113093683409279394949645185739005030906169030574921283791443596713367509939556017213885179560203881438566815434500182048823743283565445493587354351422037110268635787310017932175291423325892602135674703135362540541144218470209056419495913201930453347722254458068899007384878064501018319521751379289455429554535297973498236575176125054965632206281397848261712959523930365695279750709452415208308787835689489622345848219917675523136827822736522266753451186354365896484744861090836536726383646816188698383194003119464375915713644409725671105610925074762108160249835887512288062278145924382470580379804107140678433878773988384631708991519625587839063383419702854416884968909873970835677056393967804133890679337444480944086237950264371000505596585858499722997817806488955630844451588439580569708408393267601197550129600466932017502547950688881193144529246904777212900147052561850901135573398893616614699741101556383957623950 11027953267970630909581668660180702525710166604151574536502753399243127761293423114309112958488322904185983630490091220267741942170658761332386880130095095125454650258548732386949971262756160799788021777165408980659842648391794646086289598388046259464106068726431666480099215700127335686498548360567464330941830674129636791458860061050735620157440243918265288400504675977112486836207180247649786719533244850209726594494136352246399581221659532993631182557778926663600571479225800466743290189697181244525247031633354445948798654874558656880756208532232459180636883494595773559367258215888851553181102434280048138396322523918175525076990189255053378524235643013682609382833799813511022259200406106460958595849240623546953821707886505604197670807955128967476318436911542336553838732957091404606373712245908845082101961130007946949779338071620201378912417794748209692121465900173950168225648981276821997095473570661279993984450045156764233849452643862629607472731563542306471474302903443075714153251 14215486739027793420511165578454333171006241012338248278469606836641329694572118755154901761978190884111496362048095252966437617057161427406418816792709322460071529668593343197811895530152598553015381739667361824459870247720392325637964754930578154018241965363055967450169545586365546219350652302155348965159555621306871674496658449554070146439545286362289367632698431493166454093445340646511612334716195370512972747468588052163179510556106635160442750086203472053538549820142810860107094525701425913909439615499351035570540082895206847289472896654925023941294491351446694497959815443728727224324009034933737141512611116982444023575965025569408040259517538518590287903358851325480448948694520552199855814106858663947641612058623218782306271114793408923480312872755079110435546043520071063500723171656595429586296939369269064327899133290007363581662742491263636752135839628283615863673769087375503322975245350538096932985710004443787649203391804337656263776622567064736118244226973732102 16823213050615015574645995568954608900085789946415305967508645386810651530859610615616065900491935732108453200213727440212537069375711049806987407713976168906257897070162390521380099327284203646295831293800491535187702914650760875363281304319242813664884593086310791131029907755466417107264334129010572630304016513361342822611282389107467500492755313172690180294445757762601765851880161139661911942723832227015286754312110734894658235311290551881596546300232232079232112114203573243135529880770787769913441089596103299142135915478475807247938476721497374188525122484544188275396283889844535908320063459104525992961463986469133777633535151047812609187809171537464310826950489706486797722376935833050972156979834740358479035954635888238476246604558901239468585910262135004843451090056166810578799027304901579232798943645535248751413210862177964683750846679825599666126740068794727853373287568397275636482248940484195325918840105297298791284502764192171305684162491259529294673653 18320954461901719402603881313562377009401564547713869109606557402074913521520028052788231923042514752745365775977756102190024949830669781722226496457109336250918951619631113429495852595356739934223979784237958546063667407239316847406400854775481648161563380355501444518532682739440365957587538600264296062117645644496737763187940459929800354249562128637860956916219185440983067440511552333783961284247296332060225807120263868674689623688202286085250770772180092240965739044775498517677653674904084786480993044150365531892242656479038825840258257582394728294862433919812255124288857642814411260282234577545060045419677540238369142509478100014485181889715257019476786439121653908732334795199955735307484547356690458781965908744111000778635472303977725195451882719671045625635428640265745857452393288397994010335302724673974385975073182607618018347668468015370511487270543931601712325029537566884360257779823415403804835859406640065660402203302748500590476889654999464613 18385409832950842292089035324351512946826899541269027991837745030955087696606749499554272676240449719060302191374052313012228961211752592255454040803253928737727726936738557984533221126391304275252334846056219712825233396747961359388338667513684412893161471645863262625410801307258312354903026026916430547856903134400335537137322446299948000342910844080533731788372820688170314786054744569161064441938902823400044989834933543439709133316097990286158636499639698957989569897897124383299162476831249495725232812811854339884924944100062192642534375633185637177022263691127953716409675384018660772111790985733846554401935712519249677941862429341976348310688964181380802623013366514696908719963693895760771545029990937073407162270288935484888991334762556171132161513633483740446380732870015294065952020155266360129415940086393164737698848690808153653487667453028081404166887086250867887652949032793797062823425710167149144354011998616185500232294021330847431632410 17008368686579424815117496574809293086074091906200875734261281730368169164195677739766001360143768714784473902151810175411370635081445571404597299311781474000733427923317658126082790270740349416026378289196380418656876582825319625475320334244109049019451781381757740864569470806856406275621080946305670591307306547075555171887105833184718962111267685090470855597150670173338533858819359847058438281289423534012711457290519652603986913369081957608105566157957669255030315498607014574209543801014131332350946295450349277969967349865892774454263245568238179693220751733142631728467679344802120247528751504448876276152614430717161456971333106897257575781667430646516639116334471151927273284783410666849755608697720558583918495399810421101111906614668099978333887197260202329116346757235004274794176212046262703347146126953621416572973980481108294531192965065010744898054719398893718784524278177908301700443673056846445287329606464105566315392002815069485 14497357221394085077679237312382517578002131539538402880910299082998787709042282643500220112505747809065216942735684464616997436596456210707171052151784415284352187935359785690436471162612776127363308998022463935122728340083040297531001084255157199129246945105927264346962922853326392527116403492287489688967222443842522365269884180589847605493628438243304413060992524883279591916116066671251664293319928981990900420136274181830012817405939932318234246069976826210875774808355838848388505435874539066897840423165045159463734219064931537037265114304080824736247495960867958491212146793572355849601409543553437250406324206796721546889918077010261300607244788558933370878687400568722382690322971223165957096828081578491778577327999810476622354466437474110184276347443188730954255334078129098694864340233712442930823399200090719099929536740930334720676398853202875442324468926705209405408751029421831194880226547414912038750111502298256090787705 11369205940614014000511209341577104029139488813173928331165498102870062495615440412257343209295222692145740554547698411460718303239702377282132942984199801799048218477384755039954678522458399884694232182445364912027847114296811679348753838355473877576486648933425751325474378559329111296939828399878140649232150509379939212677131781032777176621493759781596708530888631687056736526355368016552676962231642953990908159704289702837351384595540574238088840254312196958632615287990872838099525132185584492653092229927581707431087835167658843829150031565608952061507519076993785076894776269923554651122387223532126518498109540321249751015323838004828985545179954344807681348589906471132409371576933969260625224070366669064689270369805478575119218070905111170095895365973741280569798649044505099063614867202572623694159831195823552529727198410671569056999383625843172119910889677028868744020860940676245854774959414866894188679675540644808 8184062434084091394962964843593603316635093530124292322517090443544531236393298689685854309670172660565195086967622398146628199959805961989300832190913900545951427121390544583722608062709704546689438940255432472297411829811994559208033004406483896540829932788946481933639848337863109713945483367335781335446931451625787771255071554306486025140044088404264862605362224110340698078767469783017133610874638771519302470345725128541447838809207062900290887964987309476327566189512266756113277905755207677892498922391151701604013059740001819713839827782451813907298281741186538488709114921032319443156997145350082754502785876921286166903106119125028253825880919239580124639103680943342932647665075820573240134269153059065639476637714675717376761411995139839244376239097568049127858577704968915499546972617299408212797077429745039519482273112174563321383380927156073675624553783074335045777880227373810704280398098443462711993836 5389665418118209000150519862758619702702796716527799183600217088520085003361602674992050943645314440105358346422312246816067105629738926364766520405849742807008110767254147665307597187958527051701200399865105214953893098414205292010616649329263312571541320981727215265893721318130542296957693112538916687699081307013502135884389833463076038609849713484163551834819122712718864775957427327052042642814585212056970675632794405380133936940619450338630092411739268407371099262951149642545583595282475626616290120777761678216367683507644309481321145955426684522604512781010945370392583330116177300449116818932610304255222506528552743766566836759151866937469480186772134336875955173266552776403672329112371317331509117947821249203157603581222864165325076403875057916007597248467667028651028975273998568213379299277468391054995596096089292454364381624186969883639445634841936025267327493697865842458752172772996435862085 3232653104105455965050963542144126174511394533404564441108024055494741938394025072191419617891117522318008347422011443414486128926583676768742567007693881344559331041118197192829056527517271092721789130219541733513936739042965284759338723988528222746197269437531287255281996931728692008497213985933191802690390832782941807565667827008827828868019579423203780580537598640335227507128786558318060894469586086822701421529196632338118637909214697798999651374849527946699078947574696434595717566197730108886278141589462144014534988130710310332989219299501490949224020558620508281843033139627563754250706198433133911650961955418863017030131341023979926769082692194917419036004919044502376249416391626435775606521801272070180851738672399053066070664843687291626156237856416982665263302877820820635441117202379057295745807639971171790511110931335841251926185678766926850270138494171674298460969125856781225969804 1755247756793686037125732490059066277910149489025762834423145527432874025739125761941884492016061674022498025924511047596727422239038863131551670688610872058060738734000130507330666577046705837339130587397738963347954858660279717092215258144293548814421824751222931009512925509851666913374542449219620575212195972582798148250455192265371033912726180462100509079204886961746688451125175946298567953600218569930391908288832637786607099475856298774219287831317970501988231013389902222737281322164185181956360231747553828359498403106268362761178716230325560725400079538201401283334159116328057723717712307983348645400306259579201934549460399437162109646898477258632812280913813011696409598483985085790316679828522616690442288464185933265109991939372296309035691635903011421813463672199183154795640198865944109898522581573392715087750824944127153132074420403208172603619731015375948038328909949014997 855621346364583049651755757607034401608668601171035526615008394183360755976485046768332488472377521462437364624135706102913777896567219718258475968028581700716259853832867443620941557577913585507181981361217796323205229031158670411041206954610619447718306264994164037535022654187355050703716437296185335231206453055446869816323210990145998431922720366570959844305532141814946224854843672630182857817452557220708598197801656646734241181374309484500522597228251315609896556566380393728225284222435505582578592393838413269337311559191470804917715216536067471935181230227349219608219734884284241922718295241751292323143328651782377247442476747500839364622386507047792218289360952428688327744070211928954466048783528260472603016655380845396132147940531970315730714754079360586586841816873554225103294532336956078035854777578637392107725698094217917066263206726726753744938807983333850597407 369941957363852037053003410279474816391823722932690003175756528860045519249130968063544770941634391185834542616036146391891044901572694190660680392733079223643729457064359195610897759651304773059153335869333214535732770720091854494611367353084848345054530099749073829447108865619686992745378043158361669578481905536152613241868659072845113634748784650047173389926736945234966353579225402522626336486310504123400013885107478372476734699239691660562215943834658902176561797467065736993357030303383988109473321081011325270133620538522029945607924384180926482975698638977662198216859849847588248148419376879584793000801386469829651374306597297108837725900777097461546848814007352002160044746856465558350965300431977568087647992315521995815262220437569368388155784246104231391449094792974954412991788482382355036302105284200682137186308411972361148628078939929605525616329130486764 139191364598100843671027168761130115333739203230073508584504049198713306948029606668367340592877421569262202328711352214631816344333430888224866663927910750217690398198507767286579692931128957428406466870713547237180753804673266321549300865385850018747167487230201326536136239330735653833069283919924711009354326646524735118602093950463534501810069560814698094813381528415549430024065315782636001023528948712921233203403275760464752945553398567810045911741183985712636733519308837258429584395999947098335342825835755617783240120577928324385395355601072972532493189303029050906187213598549002054867041009615131670995094676076847869555668748313657507176520813190907440336547182478133230201744715454258846238282811591909788614229538626699301214779199384582376920251846275459729250954320869202310577206597462674216996364687134766803137799019145507774282641931338193917126 44043562205733313414036828936504004429701054515845180285704927397182696140832887668786783975143554314666650588109907209444296723436732454131038103698094256184337863472429705445108750422844212892443517056326891764058203238993453983475342072912089462318310151640603493879178234343106906687646639974006265050741182575773918097627209932573998215394455430637451684558340227666732733171871174929978825049679206392052615486072747164309491396263119932669229470239722979076614818030839492587001718829636040617460890209483650534065684646884090968592356749997899187177334375897501229529442050888804562993595168864954254807250313656769086998073348710010793613307590002059463649318882670792705438035408867873165807303747666006673443131430948061577880104663642859799643744511302365497772748115064157215074175620549173005771474650437986127476366721735402899582825084713951 10861925002847327504320075403706795824019456223819973379272382447081129018235199666959659023101727367034539163597036405501200542617696994122772564647317696022147087009445291067309079115069929701338735350181801540429672590867240921627765049456452565474988868946933539776659454688256934520331273598832585642384750571654311572378032155149569838977608824214339423539102699201456694781585735937483863277218369590758105449347258392434502820444874497444771764310267081910137499708886201563652713494352196135379091471828692479382107370857606520827572904448137920679562258643533068128646856366248786885383763721493680392869952650819064642319983515151091744763609383560764830606666131134898471563533720399420844110910329565129960194288603446857171570761459348579411157619041698371464753094573881938944871956336061796983442128009603721161442952701445988490157 1594581635580909688870856131856948873962233789118578503093090414701998756676627323762619896703146499751113203432773544027609214366525969481917365329825852362231228051320747319565091520634757918269736022465030862748060849821127992742435418007246838382166242470327041621175989946906399383164995926127975797364033844659359519114943964391260672768238689425130122121072958719857430584745068854835057254870452296437218413852584338623949321798215333032181452173022958032143969953469954141519096247566523741119877846927660963667626122580477250091180649734659844292484949135918505832341910904689615744589748276674654430762667515177051483238717292311936381077164382595142828724182034867085752129370517700853572698774700025513525951609734112270450530335313883451375743271723233505227684663080786799962266443744364148697199454509544369192067795068620 -175258330374288488217699780681056007790738018516419452281991217832654531725817746986685120805050526934206221214211141521852994927206528251631399712058678982392880699800227124440195696214823380916708167445310792263345031242143035676279592625475734248832441286065902794254642514509741620998597884565902881353097461921842322070870984529159229514060165919622238464454116474141063130936752984696976799921351467288696273318318430019354280625161000802741257300306815204221600289221276550160895249928480353445064342116960056581099307365863716994163846551720672990602923358299428092665607545231959644829152413704506424911051622320036052874090548354320033672129426624281831899775321139548164347517142958538427511252378549150217840856725285327334555207840270205721245932505325761823123705947260438597139285994868809917490844945144025467690 -220204516996925692407481639318962275158355359698005005318968831105137877410318035105710030735253493203113614601148492718480669119594955222926330988813747084051194096829288602013708139413490389065268147594635456948867658605504940394238031555482960370191602547657627988580473271317223440356283286912868992645933201703098438403232137920212420959440102724896369844165067242271981986193800359091094747723345076794962221197090724157544666059314055602239994576519447991895833036047648279835490528709218893463307393510253353796421833052938499596293567284485949691751840926801159600011504013566712863640442505163583444721673287818969800667583958363492414161365095498214335337281791062974362643892979140568974220436080857130413612179269433508941820057605917648593877126906070657521089338116118571692083335137346253597230363346438 -86736665632678925155482178717541416902076376005607891408754495900348933434918513209452724010018685361893158041473663225587808925011797029588521260149269601655264376023270221929857106249729817265899694017319158207730930109639129622351016832288730019468523759267204415540775380151255429425512565935792592185965233296489721362856086319211357984593146960452637758443160166637910243405020664850087829521695097428554200043291734159717457440793421662838516158714294447522963772443831263641844249855189329155555845332410144343086066556665296069125595886804982098096438335788033789366633479851394765637333769728445267065432985566657279853389925618818844008343107578319219160786523354188227450128473210990504657882345063523456224912915046059727664646559655991260080589840205997564008010527878595560007930655715317990862 -21582126454742306667993456149556677980675932460298650535328848994746168325095862852666722458536329447597423387096751009305244842139050184544491450038967368144228011746022914115583645360623692331233474989474489219444393742965914046539434679463380651421037041943699498012845273895029251517240309503539746153134122836376598573102967018125553483751216696097816100791852880103103911552601264050283041915993619118558454428361434079044285011654871484945504053877664789720457201070079729484136519080360281111860909422248778061447745643765587425823404328107356657353938906970458620401013459131295171413389624086947025266759645994382891917066435893359532835153676706612710486528253286986244761712486266844971524920224154894618138915830104817136858848835666377534542360360110300083222617164486932381133489689581 -3197721395912341565564081952412110136169530860919163386385656007202973837007705327433023312125711351763473924016018786188937755919484674858599014596439712903164844770941217866141250736344714983164806546715253297791739899663831078378960721000334785698452304134226379392593908714462220788715616932619290014117079613611578414674308820766751225609804210768585733858466066057240133880231960864180798374206349802727313893774585143539099322978308588247355508151954504889114732631366814096556279917706062023211143064458274960070737668337702316425759599067738690845954429429400298407857211499431969609899228498434622943215486709416512227495363888797264000251869126601212793734657197459434337040578441217069635718989402007332910889176409302675400938019591258921490266883491293314586761456215738961770 -25183123559183134423074018635524921832567265127033408066598519830668308800738590152159316536479462511856139052375940167019153352366410191569916455774974730936481936472407740689794453537159299477945394874218179716458088007817339589177132979600490924502475782964795739644166657770467923215193227627376893261006656721851888476838139947219545442917759649093849334569809615727763573119590799216988467863818520639405700972431656022214085849172788952250025843472304147527886472940563732646744681254020329574575681719685485214262576685737122105672170725913417889623770432626301778650002211771426799598581241698478530425796305941118354222097953096626148205867114509728845372477611897334418262194839862537661129415586620561992889051163087412367496939111184741348290053889630495170354284007 128275949551782491640474118889911607242774013921143288004195427385429753191316410621073680915032415002991246233227241685218661767649500833260019371026766929922077186190252090680590646808375568580923518388358580405630116044119446285151876735370994962224616003501372549492788514039684921712846322649930896678422844654726772099788678741997475287649384944306743050933126902428578594909351375933898610844399632943765799134480756749732150738875295939401328991932923118218026009444193306285323722921890789771119859125621720604794898536055719867467868551264006568797001394242368280603685003713685417075488634011839908567853979278206769732648122467235571817039665159929122289138505819901539058492237967047125526640957265744234349635955895518407341265018302997163824746454137938087 36963842051408455627078292326095346501246419691467820566043752654928260360635708519143675203535094266715469760304060224887034201056737938410041473238542080972330131276773138012859711976027293161650646753000063680283470851169614128001621029560032089647000121540728734034873216463172295549679743717552505757877424913087495693058992361715922344628253564214783533834574720935823745285134657287539006424710364955973555231817000324626220956550772246888359548080627256851944674224116575128363053447942251208263767154007379582934196511400853751069963626767352734759344696048841379510765400102007519095595958932603531254720925841290118885854413489406727653366745404051980293900871756737680821341752735389489963057142251700893290170349570982879373958040232204173023894597 6067174578807453882753508695539020923333859968628303403913263562280670905602102213232710017631552907573770753069068140679475637462246560687301384947860220017158514402385174162671478980346080076597495562440866421626583532005762468447785804806628204738514111291246171469726298201141779388725308164318995081658226104112654075090713565816719599937473161020844843072726765709967526710443567524770323628072478886987184690619390671630875830952507603589427492242929410704976261817552061744654577645234599656211610253715579260456079722291995428674862563113864722845696773528951755788870714311116850396597345170291180807201764893656551450911123228743957132553672671394174588032604926396747714948205481344701433412957837221543194473981990173538847582486737301020 971164695550327316892471881277319230144861081825771285644418561121126514102955689748783732656538341192704824708237150063888076727774611503864966179200318939765919288445046521087555430815379597081937522609887476758129473089655827989487053701225651356542513908590686236171081594017781764761922710349346758377262798118275462154489349177944578221743389445540203506895642378539922840587871907292652558453453396357065517291921665634437090215929395048200343232386724384404583333781856076099664543319224848254642268353089301170089910494152967988433817567975947425876339410081317280834758550575869803559163798201156301489358203554822339031417391872906622726932181112253772995176180540797206282250073555909994274582396693822760802046170444942349140186 305036345493182155889442536292403071932596904420670668853502291563573867944598082511166761027895898173116934410911879913377548692099530873229032000817703467966997420724171955656509974496220898794905850355504313531829000951395618367904698857322853686129408450855805854071328499658844029561336767988651095243219847939247053250383854294500676536313935416993391926170132383451904184245262833764118599919155947685509575938361073495014692445254191349734304706389836339450591131002258675833002573374737502780762133276227409467670512009376094428722486750470394353067572548145747953124406467507224507612572529561059009136195017360664320871069052010795988350844840031450055643345848625572356540106608016932772566377206894261963399408707535429 105402147299284393801684646885778765742279178573992233031002910205250604702107223611864499918268527819101366742356007311005598768213571349766642491482728851958450950455889076721535783401002573368065986641078538028177544079941158139701344311765223277908415309351243081916785925470763744013483181045308661510512983799198957649602467883671699837397899465611265192162955058717050114297847549743385238281013700095022129633406711965279543685618083033571213158068909888120832409670101576348978811101537536208970395785467601852766952819049221485815628398517759178058517844029811060975065473131245800169534764075588719469992424624467622166627042982349012632687637915493827861315497693792261057017521680719980397862367373442917876157 25690485425700007333327111291046118052916546194109933954523067798738017855304967811892753709170039580388138276001812699286681159645069291618895891194879761696331727008600641934194473897886852528388560036713296401415074448604856009511906586657616465393600319522201499868805163005338376243350208867769783685669787198200747184429863674508841940316235553259068361994071128057351734893274960785298347081660660264238009720132244399784201277754062818527274311389411709867520460740661039284659109334041614417751085450050575155730149368510175650199046168852037071844173108903183316159364494461747307840277224000732804339394664231823372983684233671675576111529458692852486624912591483861819847132408468500128390390412482138 4215035667590888985965337359443737702195499617955407573077454563146784318779137203129821443296887722633551646953717495161745778610526621162333698225795054973314855581534019375834217718972908143471154846977571546017208423085406558162928278706853427103802761450898397243477928296062723848690389563178829255707841336335359415879160397344049578440289746047145057551995762998797567233587941639801361974253344177437179996074137443886617174425826822831268913377654925421417599260409139402113644684201977541552066007144688581988784173586755471581570836991346570105975945420711503849529541929295904156542683598760758845679378157072396937034110306179362927719659230904628961645530501360977522978808476694302943186 462608017710741149518921076515242367264310594099722535292736461315143127358867450486911423197949754642022766376343537286573284946456631788802043092001043896546292343099970117513140142876413958588930953599662628736745369506823323897452022806104104529802623340126278463345791040740606201163943262041386643332415085548334817258827802340131096308637534109003534454046336053946567998799673728380561979967733281395234989365130219066756264394109321296110934236015777796862896750313519753357650032471553283593689556428033365906555050953469040246556394965449213751121321069122824472412896675102149973952890085685970772980198278212100194335225120047097292109798954577716767576673399096865805426031898042 33862882529009830392485750866401109291648756085239885192082193097784920731593510260850294819451992370540561827534054561043033181886121065508178168097047466348896543655968913775469625023261852734567755690444784377082618901195121097163060014104094528589150920034706346712588776891700145293759544057581161523708488868539894989826251069867715958740770359882727101057333241529220124806976175891772510013657016401004102493186345636955722434001726286775167696494812419825444677061717883293199341538009742231099116825607061048095514686072084597278370412671931651845336784138496241689057151288722793849241157333200359325678533457807666714016815119119891017483729637924628747757394881277636790 1814623801974185696815411432306974014379062647175798499062528227782363002203191819942324519164433161232052472621761280070112514159919420240010712015103508983852372125429523943794433421628338145728876227573271925890034546001131071085439503633266645106075112295601727385107254180910165696906145047683869744641314125436224088096228900107268787766673078208658527478859567891322851448775940551764542001214799552683090377894121653548550575556075535747002343835038737644780867590842510832150540594815550414392332097232034612397903028036182050628308867653849432243505136662631356743179732676539609289709606155493542338238043241358032059032628463146842515700175757168778764095505759 93249888791181405042891390110774057102972740421065529154532451790004315096467048152350846217767211853115221918973072905637280639319499388476067438107304005045618477850818111196816958030142369907861726827052730264000569007382147991186409984517351604436769447197567185458368327875073308764506295090352637281036797615844448188127999213812845697180008086762047306094725568426355832344785271436502705237563126481219955894227622900528094243798610227925703776440230934671798963120546661856209134853604493722945315746084805142400631303978633284704971131134077611735198494799656712080010887074787301599408554520182158518438151437286098561879378410661311904076531922059185 4921498129852006424675299227573952821415882781331655892547799121843146815123947125603369893485756274753305528741917211894425572123308839699198568100362792179918425427995889493742458117705487858667254313174540216219309981972579082976319232928821327039892861057638179204340332660957287682233833005671999449095166127905075871387601986746761580173192664891901026731422751457459655514767954157000108433632026130871403579293221376313661255899621749906507629683182995716794782056017691100468715575369671957055745623421358122499170675579034750975916470271385931509184453519776648405650846194058985391901895227982621183162123025473842137674656349700426683056077 201272902562305689472571200491331851750049636933880578899333598076293964505742493042459180484126457126465401121797451596202908612946695380686396377033841585914389410302477301705112170287889865975851157757507465549323652763901840790205622628817939616400535635536652521657953857053611322588643586675183084699607388843166404226187657419153219853333625020688501210708293131524312769920565161689113511055648748532848818271016123653266692893441555288882689645026034714316432372128195031585720729197776036330109543487202131022997674917997849083896635639725036480827904589680949947237809251437137288956412469527468501734114310918240063291480050930942 6074751253304373699478103005423767194392460180178532242779132131883134798971251952194610961512672663543035654127756153712450507211561368973571976634207783355252804686544103676265001237260184773446824873437506138389881580498386927743109268036309633256751542644893511555286815487627956463069829692824237568007690960531273076445032010133947772936218555810889901230346272520955630280919825571309963348627906976862989869796455699119995006635298335538115452044779862172192986061031087449594487565989656917008256321642347817336596876445523851568976128441660664521879336687329733154120379435756686684095884126156330120727152201024121141435 158835640368624927032590567603298033578619460428181240275271178311247955906385358780880609108729731593000528649866256208791406940775600121819889914361157014596934714258355297904117710616353860489019083482117241921698807099414430612049481575726358558043601754835057019630102310430472592841252337908017889440767202384995675110755961647212986318461061788135296343800480157889647383498289820679293341354940262895884389598393484839276117181543909642716707954164229246657614251709668652420572710512949807247063044678349724511089981459422690779865347519696887457564045446080141772939950269568858082012591313957710183996464958422 3378902776872134909204839634350780570489386444602833200454760900258461158456475968767786638826676577991767426735542388159200737328011394812485955461262633296784485901814533654752799406903518798599048703026651458655257379291725908577613016050003151556791844209730414928304386452743655025491000998238763687307956696603617748086915536390338655047635126787848262872245644469285183252398652508297220725721995236789120114906732141512842760857593190120638372023923729622526452382862969080559959232688153460133906800190053367105284950075055827365166620172993694924414174980373672898385726820481538485431659006278194509 43079331259939515919275698578804185620986197244696798415016495720873670933657463215743464320826822818217043520103056534209916056157195395750471704735369170458016194728611156362683682500685550935485395203593729702769779759594200935064821306591638627011883484686976790112753212915600441815632077904449113748453399259468531763936142962291681638398372605510439393259145855884510266583260056334357613740785660705739276425104145786006056312697160438032838601633621767509204695962293044418377847996849526662006253108210889296421165933617690710605289828221283228905818261545639312850768818457519140627933059 241888549947281780453970332376533021011282816110362728271203139436475765057942802071196872840393868465027289459959410799788461175668613254139063157050636058392181269139232404141461880298738058796474652008228390097224085674472117318668135699123548934145057351803730650123864459215458336646869134720211474335685153578049826170902230696097090172541924249742742735455798580881018087903927097407092457336479159013029800043877603400653137762357381820397447743848915397214280255912384053813197361790665976579382707258006965027976397355870442671717523377265419561146645361069308842409716414749823 2376666192959846633516107374330794714748296292371435769889972320797571626802118600704583172570991283874821908434105402149381098559830077967475461633717911514260338342247661573203110589842201645894704836171721825311983421771649377974425053594782235622798898959134633315987735133144574981207375842994980940751142886625727144663673518413109414933454784015023169517143545340341136893308618175231432552370076990743951870797931924001487930023505152696548269229638467144136238737326349883098595437180422242838214426859842155775740707758296424184218989209744806567739431156071 12224062625832648183516693488530135118514258747173591610235394106191837482287498065840699381079070193230023620339082954952758518319905977223914718987561206209653761576001047130961967535872674537183498320220994771834545529715371523489357329178329639953948515526053154981117517445196850102059032579570738873131904056879673688661280476002767416938718011459524318277612243153293359296829432754457534753162164415110718998005028639349072782744247477763832792261907563795101315208763137730846936640573388864436242175078215481286793261658163010047075816510 1] diff --git a/thirdparty/linux/ntl/src/CheckAVX.c b/thirdparty/linux/ntl/src/CheckAVX.c new file mode 100644 index 0000000000..34eb0507d5 --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckAVX.c @@ -0,0 +1,63 @@ +#include + +#include +#include +#include + + +#if (!defined(__GNUC__) || !defined(__x86_64__) || !defined(__AVX__)) +#error "AVX not supported" +#endif + +#if (NTL_BITS_PER_LONG != 64 || NTL_DOUBLE_PRECISION != 53) +#error "AVX not supported" +// sanity check -- code that uses this feature also relies on this +#endif + +using namespace std; + +void fun(double * x, const double *a, const double *b) +{ + __m256d xvec, avec, bvec, cvec; + + avec = _mm256_load_pd(a); + bvec = _mm256_load_pd(b); + xvec = _mm256_load_pd(x); + + xvec = _mm256_add_pd(_mm256_mul_pd(avec, bvec), xvec); + + _mm256_store_pd(x, xvec); +} +int main() +{ + NTL_AVX_LOCAL_ARRAY(vp, double, 12); + + double *a = vp + 0*4; + double *b = vp + 1*4; + double *x = vp + 2*4; + + a[0] = atoi("1"); + a[1] = atoi("2"); + a[2] = atoi("3"); + a[3] = atoi("4"); + + b[0] = atoi("2"); + b[1] = atoi("3"); + b[2] = atoi("4"); + b[3] = atoi("5"); + + x[0] = atoi("3"); + x[1] = atoi("4"); + x[2] = atoi("5"); + x[3] = atoi("6"); + + fun(x, a, b); + + if (x[0] == 5 && x[1] == 10 && x[2] == 17 && x[3] == 26) + return 0; + else + return -1; +} + + + diff --git a/thirdparty/linux/ntl/src/CheckCLZL.c b/thirdparty/linux/ntl/src/CheckCLZL.c new file mode 100644 index 0000000000..09526dc6c4 --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckCLZL.c @@ -0,0 +1,21 @@ +#include + +void touch(unsigned long& x); + +long CountLeadingZeros(unsigned long x) +{ + return __builtin_clzl(x); +} + +int main() +{ + unsigned long x = 3; + touch(x); + if (CountLeadingZeros(x) == NTL_BITS_PER_LONG-2) + return 0; + else + return -1; +} + + + diff --git a/thirdparty/linux/ntl/src/CheckCLZLAux.c b/thirdparty/linux/ntl/src/CheckCLZLAux.c new file mode 100644 index 0000000000..5cf3beefb7 --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckCLZLAux.c @@ -0,0 +1,2 @@ + +void touch(unsigned long& x) { } diff --git a/thirdparty/linux/ntl/src/CheckCompile.c b/thirdparty/linux/ntl/src/CheckCompile.c new file mode 100644 index 0000000000..d38ae81af6 --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckCompile.c @@ -0,0 +1,2 @@ + +int main() { return 0; } diff --git a/thirdparty/linux/ntl/src/CheckFMA.c b/thirdparty/linux/ntl/src/CheckFMA.c new file mode 100644 index 0000000000..c665624737 --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckFMA.c @@ -0,0 +1,65 @@ + +#include + +#include +#include +#include + + +#if (!defined(__GNUC__) || !defined(__x86_64__) || !defined(__AVX2__)) +#error "AVX2 with FMA not supported" +#endif + +#if (NTL_BITS_PER_LONG != 64 || NTL_DOUBLE_PRECISION != 53) +#error "AVX2 with FMA not supported" +// sanity check -- code that uses this feature also relies on this +#endif + +using namespace std; + + +void fun(double * x, const double *a, const double *b) +{ + __m256d xvec, avec, bvec, cvec; + + avec = _mm256_load_pd(a); + bvec = _mm256_load_pd(b); + xvec = _mm256_load_pd(x); + + xvec = _mm256_fmadd_pd(avec, bvec, xvec); + + _mm256_store_pd(x, xvec); +} +int main() +{ + NTL_AVX_LOCAL_ARRAY(vp, double, 12); + + double *a = vp + 0*4; + double *b = vp + 1*4; + double *x = vp + 2*4; + + a[0] = atoi("1"); + a[1] = atoi("2"); + a[2] = atoi("3"); + a[3] = atoi("4"); + + b[0] = atoi("2"); + b[1] = atoi("3"); + b[2] = atoi("4"); + b[3] = atoi("5"); + + x[0] = atoi("3"); + x[1] = atoi("4"); + x[2] = atoi("5"); + x[3] = atoi("6"); + + fun(x, a, b); + + if (x[0] == 5 && x[1] == 10 && x[2] == 17 && x[3] == 26) + return 0; + else + return -1; +} + + + diff --git a/thirdparty/linux/ntl/src/CheckFeature.log b/thirdparty/linux/ntl/src/CheckFeature.log new file mode 100644 index 0000000000..b538c81ab9 --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckFeature.log @@ -0,0 +1,8 @@ +*** CheckFeature log *** +g++ -I../include -I. -std=c++11 -pthread -march=native -g -O2 -o TestGetTime TestGetTime.c GetTime1.c -lm +running +g++ -I../include -I. -std=c++11 -pthread -march=native -g -O2 -o TestGetPID TestGetPID.c GetPID1.c -lm +g++ -I../include -I. -std=c++11 -pthread -march=native -g -O2 -o CheckFeature CheckCLZL.c CheckCLZLAux.c -lm +g++ -I../include -I. -std=c++11 -pthread -march=native -g -O2 -o CheckFeature CheckLL.c CheckLLAux.c -lm +g++ -I../include -I. -std=c++11 -pthread -march=native -g -O2 -o CheckFeature CheckAVX.c -lm +g++ -I../include -I. -std=c++11 -pthread -march=native -g -O2 -o CheckFeature CheckFMA.c -lm diff --git a/thirdparty/linux/ntl/src/CheckFlag.log b/thirdparty/linux/ntl/src/CheckFlag.log new file mode 100644 index 0000000000..42c62195cf --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckFlag.log @@ -0,0 +1,2 @@ +*** CheckFlag log *** +g++ -I../include -I. -march=native -g -O2 -o CheckCompile CheckCompile.c -lm diff --git a/thirdparty/linux/ntl/src/CheckLL.c b/thirdparty/linux/ntl/src/CheckLL.c new file mode 100644 index 0000000000..03b8f3a2f5 --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckLL.c @@ -0,0 +1,57 @@ + + +#define NTL_HAVE_LL_TYPE +// DIRT: we need to define this here so that ctools.h +// does not undefine the LL type macros + +#include + +#ifdef NTL_DISABLE_LONGLONG +#error "LL_TYPE disabled" +#endif + +void touch(unsigned long& x); +void touch(long& x); + +int main() +{ + if (sizeof(NTL_LL_TYPE) != 2*sizeof(long)) return -1; + if (sizeof(NTL_ULL_TYPE) != 2*sizeof(long)) return -1; + + unsigned long x1 = -1L; + unsigned long x2 = -1L; + unsigned long x3 = -1L; + unsigned long x4 = -1L; + + touch(x1); + touch(x2); + touch(x3); + touch(x4); + + NTL_ULL_TYPE xx = ((NTL_ULL_TYPE) x1)*((NTL_ULL_TYPE) x2); + NTL_ULL_TYPE yy = xx - ((((NTL_ULL_TYPE) x3) << (NTL_BITS_PER_LONG+1)) + 1); + + if (yy != 0) + return -1; + + if (xx/x3 != x4) + return -1; + + long v1 = x1 >> 1; + long v2 = -(x2 >> 1); + + touch(v1); + touch(v2); + + NTL_LL_TYPE zz = ((NTL_LL_TYPE) v1)*((NTL_LL_TYPE) v2); + NTL_ULL_TYPE ww = -zz; + NTL_ULL_TYPE ww1 = ww - (((((NTL_ULL_TYPE) x3) << (NTL_BITS_PER_LONG+2)) >> 2) + 1); + + if (ww1 != 0) + return -1; + + return 0; +} + + + diff --git a/thirdparty/linux/ntl/src/CheckLLAux.c b/thirdparty/linux/ntl/src/CheckLLAux.c new file mode 100644 index 0000000000..1697701c47 --- /dev/null +++ b/thirdparty/linux/ntl/src/CheckLLAux.c @@ -0,0 +1,4 @@ + +void touch(unsigned long& x) { } +void touch(long& x) { } + diff --git a/thirdparty/linux/ntl/src/CopyFeatures b/thirdparty/linux/ntl/src/CopyFeatures new file mode 100644 index 0000000000..f8fbc4dff8 --- /dev/null +++ b/thirdparty/linux/ntl/src/CopyFeatures @@ -0,0 +1,5 @@ + +cp "$1/include/NTL/HAVE_LL_TYPE.h" "$2/include/NTL/HAVE_LL_TYPE.h" +cp "$1/include/NTL/HAVE_BUILTIN_CLZL.h" "$2/include/NTL/HAVE_BUILTIN_CLZL.h" +cp "$1/include/NTL/HAVE_AVX.h" "$2/include/NTL/HAVE_AVX.h" +cp "$1/include/NTL/HAVE_FMA.h" "$2/include/NTL/HAVE_FMA.h" diff --git a/thirdparty/linux/ntl/src/DIRNAME b/thirdparty/linux/ntl/src/DIRNAME new file mode 100644 index 0000000000..d9004c7775 --- /dev/null +++ b/thirdparty/linux/ntl/src/DIRNAME @@ -0,0 +1 @@ +ntl-9.11.0 diff --git a/thirdparty/linux/ntl/src/DispSettings.c b/thirdparty/linux/ntl/src/DispSettings.c new file mode 100644 index 0000000000..326c34d541 --- /dev/null +++ b/thirdparty/linux/ntl/src/DispSettings.c @@ -0,0 +1,199 @@ + +#include + +#include +using namespace std; + + +#define make_string_aux(x) #x +#define make_string(x) make_string_aux(x) + + +int main() +{ + + cout << "\n\n"; + cout << "/***************************\n"; + cout << "Basic Configuration Options:\n"; + + +#ifdef NTL_LEGACY_NO_NAMESPACE + cout << "NTL_LEGACY_NO_NAMESPACE\n"; +#endif + + +#ifdef NTL_LEGACY_INPUT_ERROR + cout << "NTL_LEGACY_INPUT_ERROR\n"; +#endif + + +#ifdef NTL_THREADS + cout << "NTL_THREADS\n"; +#endif + +#ifdef NTL_DISABLE_TLS_HACK + cout << "NTL_DISABLE_TLS_HACK\n"; +#endif + +#ifdef NTL_ENABLE_TLS_HACK + cout << "NTL_ENABLE_TLS_HACK\n"; +#endif + +#ifdef NTL_EXCEPTIONS + cout << "NTL_EXCEPTIONS\n"; +#endif + +#ifdef NTL_THREAD_BOOST + cout << "NTL_THREAD_BOOST\n"; +#endif + + +#ifdef NTL_GMP_LIP + cout << "NTL_GMP_LIP\n"; +#endif + + +#ifdef NTL_GF2X_LIB + cout << "NTL_GF2X_LIB\n"; +#endif + +#ifdef NTL_LONG_LONG_TYPE + cout << "NTL_LONG_LONG_TYPE: "; + cout << make_string(NTL_LONG_LONG_TYPE) << "\n"; +#endif + +#ifdef NTL_UNSIGNED_LONG_LONG_TYPE + cout << "NTL_UNSIGNED_LONG_LONG_TYPE: "; + cout << make_string(NTL_UNSIGNED_LONG_LONG_TYPE) << "\n"; +#endif + + +#ifdef NTL_X86_FIX + cout << "NTL_X86_FIX\n"; +#endif + +#ifdef NTL_NO_X86_FIX + cout << "NTL_NO_X86_FIX\n"; +#endif + +#ifdef NTL_NO_INIT_TRANS + cout << "NTL_NO_INIT_TRANS\n"; +#endif + +#ifdef NTL_CLEAN_INT + cout << "NTL_CLEAN_INT\n"; +#endif + +#ifdef NTL_CLEAN_PTR + cout << "NTL_CLEAN_PTR\n"; +#endif + +#ifdef NTL_RANGE_CHECK + cout << "NTL_RANGE_CHECK\n"; +#endif + + +#ifdef NTL_LEGACY_SP_MULMOD + cout << "NTL_LEGACY_SP_MULMOD\n"; +#endif + + +#ifdef NTL_DISABLE_LONGDOUBLE + cout << "NTL_DISABLE_LONGDOUBLE\n"; +#endif + +#ifdef NTL_DISABLE_LONGLONG + cout << "NTL_DISABLE_LONGLONG\n"; +#endif + +#ifdef NTL_DISABLE_LL_ASM + cout << "NTL_DISABLE_LL_ASM\n"; +#endif + +#ifdef NTL_MAXIMIZE_SP_NBITS + cout << "NTL_MAXIMIZE_SP_NBITS\n"; +#endif + + +cout << "\n"; +cout << "Resolution of double-word types:\n"; +cout << make_string(NTL_LL_TYPE) << "\n"; +cout << make_string(NTL_ULL_TYPE) << "\n"; + + +cout << "\n"; +cout << "Performance Options:\n"; + +#ifdef NTL_LONG_LONG + cout << "NTL_LONG_LONG\n"; +#endif + +#ifdef NTL_AVOID_FLOAT + cout << "NTL_AVOID_FLOAT\n"; +#endif + + +#ifdef NTL_SPMM_ULL + cout << "NTL_SPMM_ULL\n"; +#endif + + +#ifdef NTL_SPMM_ASM + cout << "NTL_SPMM_ASM\n"; +#endif + + +#ifdef NTL_AVOID_BRANCHING + cout << "NTL_AVOID_BRANCHING\n"; +#endif + +#ifdef NTL_FFT_BIGTAB + cout << "NTL_FFT_BIGTAB\n"; +#endif + +#ifdef NTL_FFT_LAZYMUL + cout << "NTL_FFT_LAZYMUL\n"; +#endif + + +#ifdef NTL_TBL_REM + cout << "NTL_TBL_REM\n"; +#endif + +#ifdef NTL_TBL_REM_LL + cout << "NTL_TBL_REM_LL\n"; +#endif + +#ifdef NTL_CRT_ALTCODE + cout << "NTL_CRT_ALTCODE\n"; +#endif + +#ifdef NTL_CRT_ALTCODE_SMALL + cout << "NTL_CRT_ALTCODE_SMALL\n"; +#endif + + +#ifdef NTL_GF2X_ALTCODE + cout << "NTL_GF2X_ALTCODE\n"; +#endif + + +#ifdef NTL_GF2X_ALTCODE1 + cout << "NTL_GF2X_ALTCODE1\n"; +#endif + + +#ifdef NTL_GF2X_NOINLINE + cout << "NTL_GF2X_NOINLINE\n"; +#endif + +#ifdef NTL_PCLMUL + cout << "NTL_PCLMUL\n"; +#endif + + + cout << "***************************/\n"; + cout << "\n\n"; + + return 0; +} diff --git a/thirdparty/linux/ntl/src/DoConfig b/thirdparty/linux/ntl/src/DoConfig new file mode 100644 index 0000000000..d94aa615bf --- /dev/null +++ b/thirdparty/linux/ntl/src/DoConfig @@ -0,0 +1,461 @@ +# This is a perl script, invoked from a shell + +# use warnings; # this doesn't work on older versions of perl + + + +%MakeFlag = ( + +'WIZARD' => 'on', +'SHARED' => 'off', +'NATIVE' => 'on' + +); + +%MakeVal = ( + +'CXX' => 'g++', +'CXXFLAGS' => '-g -O2', +'CXXAUTOFLAGS'=> '', +'AR' => 'ar', +'ARFLAGS' => 'ruv', +'RANLIB' => 'ranlib', +'LIBTOOL' => 'libtool', + +'LDFLAGS' => '', +'LDLIBS' => '-lm', +'CPPFLAGS' => '', + +'DEF_PREFIX' => '/usr/local', + +'PREFIX' => '$(DEF_PREFIX)', +'LIBDIR' => '$(PREFIX)/lib', +'INCLUDEDIR' => '$(PREFIX)/include', +'DOCDIR' => '$(PREFIX)/share/doc', + +'GMP_PREFIX' => '$(DEF_PREFIX)', +'GMP_INCDIR' => '$(GMP_PREFIX)/include', +'GMP_LIBDIR' => '$(GMP_PREFIX)/lib', + +'GF2X_PREFIX' => '$(DEF_PREFIX)', +'GF2X_INCDIR' => '$(GF2X_PREFIX)/include', +'GF2X_LIBDIR' => '$(GF2X_PREFIX)/lib', + +); + + + + +%ConfigFlag = ( + +'NTL_LEGACY_NO_NAMESPACE' => 'off', +'NTL_LEGACY_INPUT_ERROR' => 'off', +'NTL_DISABLE_LONGDOUBLE' => 'off', +'NTL_DISABLE_LONGLONG' => 'off', +'NTL_DISABLE_LL_ASM' => 'off', +'NTL_MAXIMIZE_SP_NBITS' => 'off', +'NTL_LEGACY_SP_MULMOD' => 'off', +'NTL_THREADS' => 'off', +'NTL_DISABLE_TLS_HACK' => 'off', +'NTL_ENABLE_TLS_HACK' => 'off', +'NTL_EXCEPTIONS' => 'off', +'NTL_THREAD_BOOST' => 'off', +'NTL_GMP_LIP' => 'on', +'NTL_GF2X_LIB' => 'off', +'NTL_X86_FIX' => 'off', +'NTL_NO_X86_FIX' => 'off', +'NTL_AVOID_FLOAT' => 'off', +'NTL_LONG_LONG' => 'off', +'NTL_SPMM_ULL' => 'off', +'NTL_SPMM_ASM' => 'off', +'NTL_AVOID_BRANCHING' => 'off', +'NTL_TBL_REM' => 'off', +'NTL_TBL_REM_LL' => 'off', +'NTL_CRT_ALTCODE' => 'off', +'NTL_CRT_ALTCODE_SMALL' => 'off', +'NTL_GF2X_NOINLINE' => 'off', +'NTL_GF2X_ALTCODE' => 'off', +'NTL_GF2X_ALTCODE1' => 'off', +'NTL_PCLMUL' => 'off', +'NTL_NO_INIT_TRANS' => 'off', +'NTL_CLEAN_INT' => 'off', +'NTL_CLEAN_PTR' => 'off', +'NTL_RANGE_CHECK' => 'off', +'NTL_FFT_BIGTAB' => 'off', +'NTL_FFT_LAZYMUL' => 'off', + +); + + +%ConfigVal = ( + +'NTL_LONG_LONG_TYPE' => undef, +'NTL_UNSIGNED_LONG_LONG_TYPE' => undef, + +); + + +%Variable = (); + +$nowrite = 0; + +foreach $arg (@ARGV) { + + if ($arg =~ '^(-h|help|-help|--help)$') { + system("more ../doc/config.txt"); + exit 0; + } + + if ($arg =~ '^--nowrite$') { + $nowrite = 1; + next; + } + + + if (($name, $val) = ($arg =~ /(.*?)=(.*)/)) { + + $Variable{$name} = 0; + + if (exists($MakeFlag{$name}) && ($val =~ 'on|off')) { + $MakeFlag{$name} = $val; + next; + } + elsif (exists($MakeVal{$name})) { + $MakeVal{$name} = $val; + next; + } + elsif (exists($ConfigFlag{$name}) && ($val =~ 'on|off')) { + $ConfigFlag{$name} = $val; + next; + } + elsif (exists($ConfigVal{$name})) { + $ConfigVal{$name} = $val; + next; + } + } + + die "Error: unrecognized option: $arg\ntype \"./configure -h\" for help\n"; + +} + +# special processing: NTL_THREAD_BOOST => NTL_THREADS + +if ($ConfigFlag{'NTL_THREAD_BOOST'} eq 'on') { + $ConfigFlag{'NTL_THREADS'} = 'on'; +} + +if ($ConfigFlag{'NTL_THREADS'} eq 'on' && $ConfigFlag{'NTL_GMP_LIP'} eq 'off') { + die "Error: NTL_THREADS currently only available with NTL_GMP_LIP...sorry\n"; +} + + + +# some special MakeVal values that are determined by SHARED + +if ($MakeFlag{'SHARED'} eq 'off') { + + $MakeVal{'LSTAT'} = ''; + $MakeVal{'LSHAR'} = '# '; + +} else { + + # sanity check for libtool + + print("***** checking for libtool *****\n"); + if (system("$MakeVal{'LIBTOOL'} --version")) { + die "Error: bad libtool ($MakeVal{'LIBTOOL'}) -- try glibtool?"; + } + print("***** libtool OK *****\n\n"); + + $MakeVal{'LSTAT'} = '# '; + $MakeVal{'LSHAR'} = ''; + +} + +# special GMP variables + +$MakeVal{'GMPI'} = '# '; +$MakeVal{'GMPL'} = '# '; +$MakeVal{'GMP'} = '# '; + +if ($ConfigFlag{'NTL_GMP_LIP'} eq 'on') { + $MakeVal{'GMP'} = ''; + if (exists($Variable{'DEF_PREFIX'}) || + exists($Variable{'GMP_PREFIX'}) || + exists($Variable{'GMP_INCDIR'})) { + $MakeVal{'GMPI'} = ''; + } + if (exists($Variable{'DEF_PREFIX'}) || + exists($Variable{'GMP_PREFIX'}) || + exists($Variable{'GMP_LIBDIR'})) { + $MakeVal{'GMPL'} = ''; + } +} + +# special GF2X variables + + +$MakeVal{'GF2XI'} = '# '; +$MakeVal{'GF2XL'} = '# '; +$MakeVal{'GF2X'} = '# '; + + +if ($ConfigFlag{'NTL_GF2X_LIB'} eq 'on') { + $MakeVal{'GF2X'} = ''; + if (exists($Variable{'DEF_PREFIX'}) || + exists($Variable{'GF2X_PREFIX'}) || + exists($Variable{'GF2X_INCDIR'})) { + $MakeVal{'GF2XI'} = ''; + } + if (exists($Variable{'DEF_PREFIX'}) || + exists($Variable{'GF2X_PREFIX'}) || + exists($Variable{'GF2X_LIBDIR'})) { + $MakeVal{'GF2XL'} = ''; + } +} + + +# copy %MakeVal and %MakeFlag as is into %MakeSub + +%MakeSub = (%MakeVal, %MakeFlag); + + +# copy %ConfigFlag into %ConfigSub, substituting 0 for off and 1 of on + + +%ConfigSub = ( ); + +foreach $name (keys %ConfigFlag) { + + if ($ConfigFlag{$name} eq 'on') { + $ConfigSub{$name} = 1; + } + else { + $ConfigSub{$name} = 0; + } + +} + +# special logic for NTL_LONG_LONG_TYPE + +if (defined($ConfigVal{'NTL_LONG_LONG_TYPE'})) { + + $ConfigSub{'NTL_LONG_LONG_TYPE'} = $ConfigVal{'NTL_LONG_LONG_TYPE'}; + $ConfigSub{'FLAG_LONG_LONG_TYPE'} = 1; + +} +else { + + $ConfigSub{'NTL_LONG_LONG_TYPE'} = 'long long'; + $ConfigSub{'FLAG_LONG_LONG_TYPE'} = 0; + +} + + +# special logic for NTL_UNSIGNED_LONG_LONG_TYPE + +if (defined($ConfigVal{'NTL_UNSIGNED_LONG_LONG_TYPE'})) { + + $ConfigSub{'NTL_UNSIGNED_LONG_LONG_TYPE'} = $ConfigVal{'NTL_UNSIGNED_LONG_LONG_TYPE'}; + $ConfigSub{'FLAG_UNSIGNED_LONG_LONG_TYPE'} = 1; + +} +else { + + $ConfigSub{'NTL_UNSIGNED_LONG_LONG_TYPE'} = 'unsigned long long'; + $ConfigSub{'FLAG_UNSIGNED_LONG_LONG_TYPE'} = 0; + +} + +# special logic for WIZARD_HACK + +$ConfigSub{'WIZARD_HACK'} = ''; + + +# some extra logic consistency checks + +if ($ConfigSub{'NTL_X86_FIX'} + $ConfigSub{'NTL_NO_X86_FIX'} > 1) { + + die "Error: at most one of NTL_X86_FIX and NTL_NO_X86_FIX may be on\n"; + +} + + +if ($ConfigSub{'NTL_AVOID_FLOAT'} + $ConfigSub{'NTL_LONG_LONG'} > 1) { + + die "Error: at most one of NTL_AVOID_FLOAT and NTL_LONG_LONG may be on\n"; + +} + + +if ($ConfigSub{'NTL_SPMM_ULL'} + $ConfigSub{'NTL_SPMM_ASM'} > 1) { + + die "Error: at most one of NTL_SPMM_ULL and NTL_SPMM_ASM may be on\n"; + +} + + +if ($ConfigSub{'NTL_GF2X_ALTCODE'} + $ConfigSub{'NTL_GF2X_ALTCODE1'} > 1) { + + die "Error: at most one of NTL_GF2X_ALTCODE and NTL_GF2X_ALTCODE1 may be on\n"; + +} + + + + +# +# +#code to set CXXAUTOFLAGS + +sub RemoveProg { +# This should work on unix and cygwin on windows + + my ($name) = @_; + unlink($name); unlink("$name.exe"); + return 1; +} + +sub GenFiles { + + open(MFILE, "< mfile"); + open(MFILEOUT, "> mfileout"); + + while ($line = ) { + + $line =~ s/@\{(.*?)\}/$MakeSub{$1}/ge; + + print MFILEOUT $line; + + } + + close(MFILE); + close(MFILEOUT); + + + # generate config.h + + + open(CFILE, "< cfile"); + open(CFILEOUT, "> cfileout"); + + while ($line = ) { + + $line =~ s/@\{(.*?)\}/$ConfigSub{$1}/ge; + + print CFILEOUT $line; + + } + + close(CFILE); + close(CFILEOUT); + + open(HFILEOUT, "> hfileout"); + $argstr = join(' ', @ARGV); + print HFILEOUT "// generated by ./configure $argstr\n"; + print HFILEOUT "// CXXAUTOFLAGS=\"$MakeSub{'CXXAUTOFLAGS'}\" \n"; + close(HFILEOUT); + + + return 1; +} + +sub CopyFiles { + + system("cp mfileout makefile"); + system("cp cfileout ../include/NTL/config.h"); + system("cp hfileout ../include/NTL/config_log.h"); + + return 1; +} + +sub CheckCompile { + GenFiles(); + CopyFiles(); + RemoveProg("CheckCompile"); + system("make CheckCompile >> CheckFlag.log 2>&1") and return 0; + system("./CheckCompile") and RemoveProg("CheckCompile") and return 0; + RemoveProg("CheckCompile"); + return 1; +} + +sub CheckFlag { + my ($flag) = @_; + my $try_flags = $MakeSub{'CXXAUTOFLAGS'}; + print "*** checking $flag flag\n"; + $MakeSub{'CXXAUTOFLAGS'} = $MakeSub{'CXXAUTOFLAGS'} . ' ' . $flag; + print("CXXAUTOFLAGS=\"$MakeSub{'CXXAUTOFLAGS'}\"\n"); + if (CheckCompile()) { + print "*** $flag works\n"; + } + else { + $MakeSub{'CXXAUTOFLAGS'} = $try_flags; + print "*** $flag does not work\n"; + } + return 1; +} + + + +if ($nowrite) { + GenFiles(); + exit 0; +} + + +if (exists($Variable{'CXXAUTOFLAGS'})) { + print("CXXAUTOFLAGS=\"$MakeSub{'CXXAUTOFLAGS'}\"\n"); + GenFiles(); + CopyFiles(); + exit 0; +} + +$std_flag = 0; +$pthread_flag = 0; +$native_flag = 0; + +# special processing for NTL_THREADS + +if ($ConfigFlag{'NTL_THREADS'} eq 'on') { + $std_flag = 1; # ' -std=c++11'; + $pthread_flag = 1; # ' -pthread'; +} + +# special processing for NTL_EXCEPTIONS + +if ($ConfigFlag{'NTL_EXCEPTIONS'} eq 'on') { + $std_flag = 1; # ' -std=c++11'; +} + +# special processing for NATIVE + +if ($MakeFlag{'NATIVE'} eq 'on') { + $native_flag = 1; # ' -march=native'; +} + + +system("echo '*** CheckFlag log ***' > CheckFlag.log"); + +if ($std_flag) { + CheckFlag('-std=c++11'); +} + +if ($pthread_flag) { + CheckFlag('-pthread'); +} + +if ($native_flag) { + CheckFlag('-march=native'); +} + +print("CXXAUTOFLAGS=\"$MakeSub{'CXXAUTOFLAGS'}\"\n"); +print("generating makefile\n"); +print("generating ../include/NTL/config.h\n"); +print("generating ../include/NTL/config_log.h\n"); + +GenFiles(); +CopyFiles(); +exit 0; + + + diff --git a/thirdparty/linux/ntl/src/ExceptionTest.c b/thirdparty/linux/ntl/src/ExceptionTest.c new file mode 100644 index 0000000000..6d99f6a756 --- /dev/null +++ b/thirdparty/linux/ntl/src/ExceptionTest.c @@ -0,0 +1,70 @@ + +#include +#include + +unsigned long exception_counter = 0; + +NTL_CLIENT + +int main() +{ + ZZ_p::init(to_ZZ(17)); + + ZZ_pX P; + BuildIrred(P, 10); + + ZZ_pE::init(P); + + ZZ_pEX f, g, h; + + random(f, 20); + SetCoeff(f, 20); + + random(h, 20); + + g = MinPolyMod(h, f); + + if (deg(g) < 0) TerminalError("bad ZZ_pEXTest (1)"); + if (CompMod(g, h, f) != 0) + TerminalError("bad ZZ_pEXTest (2)"); + + + + vec_pair_ZZ_pEX_long v; + + long n = 100; + + random(f, n); + SetCoeff(f, n); + + double running_counter = 100; + + bool done = false; + + while (!done) { + done = true; + running_counter *= 1.521; + exception_counter = running_counter; + cerr << "counter = " << exception_counter << "\n"; + try { + CanZass(v, f, 1); + } + catch(...) { + cerr << "\n**** caught exception -- retry...\n"; + done = false; + } + } + + exception_counter = 0; + + + g = mul(v); + if (f != g) cerr << "oops1\n"; + + long i; + for (i = 0; i < v.length(); i++) + if (!DetIrredTest(v[i].a)) + TerminalError("bad ZZ_pEXTest (3)"); + + +} diff --git a/thirdparty/linux/ntl/src/FFT.c b/thirdparty/linux/ntl/src/FFT.c new file mode 100644 index 0000000000..f49e4fff43 --- /dev/null +++ b/thirdparty/linux/ntl/src/FFT.c @@ -0,0 +1,2288 @@ + +#include +#include + + +/******************************************************************** + +This is an implementation of a "small prime" FFT, which lies at the heart of +the ZZ_pX arithmetic, as well as some other applications. + +The basic algorithm is loosely based on the routine in the Cormen, Leiserson, +Rivest, and Stein book on algorithms. + + +CACHE PERFORMANCE + +Some attention has been paid to cache performance, but there is still more that +could be done. + + +The bit-reverse-copy (BRC) algorithm is a simple table-driven algorithm up to +a certain theshold, and then switches to the COBRA algorithm from Carter and +Gatlin, "Towards an optimal bit-reversal permutation algorithm", FOCS 1998. +I've found that COBRA helps, but not much: just 5-10%. I've also found that +getting rid of BRC altogether leads to another 5-10% improvement. These +numbers are based on experiments with 2^{17}- and 2^{19}-point FFTs, looping +over 50 different primes on a Core 2 Duo machine. + +One could get rid of bit-reverse-copy altogether. The current FFT routines all +implement what is called Decimation-In-Time (DIT), which means that inputs are +bit reversed. One can also implement the FFT using Decimation-In-Frequency +(DIF), which means that the outputs are bit reversed. One can get rid of the +bit reversals for doing convolutions by simply doing the forward FFT using +DIF-FFT and and the reverse FFT using DIT-FFT. This would allow one to simply +eliminate all of the bit-reversal steps, which would lead to some nontrivial +savings. However, there are a few places in NTL where I rely on the ordering +of elements within an FFTRep to be their "natural ordering". The reduce and +AddExpand routines in ZZ_pX come to mind (which actually may become simpler), +along with like RevToFFTRep and RevFromFFTRep (which may be trickier). Anyway, +because BRC doesn't seem to be a big problem right now, it doesn't seem worth +worrying about this. + + +Within the FFT algorithm itself, I have not tried anything like Bailey's 4-step +algorithm. Maybe this should be tested. However, I somehow doubt that +anything more than modest gains will be achieved, since most modern processors +now employ a kind of memory prefetching technique, to keep the cache filled +with memory locations that are likely to be used next. Moreover, the FFT +algorithm used here accesses memory for the most part in small, sequential +strides, which meshes well with hardware prefetching. The paper "Algorithms to +Take Advantage of Hardware Prefetching" [Pan, Cherng, Dick, Ladner, Workshop on +Algorithm Engineering and Experiments, 2007] contains some interesting +experiments and useful background information. Anyway, there is still room for +more experimentation. + + + +SINGLE-PRECISION MODULAR ARITHMETIC + +The implementation of arithmetic modulo n, where n is a "word sized" integer is +critical to the performance of the FFT. Such word-sized modular arithmetic is +used throughout many other parts of NTL, and is a part of the external, +documented interface. + +As NTL was initially built on top of Arjen Lenstra's LIP software, I stole a +lot of ideas from LIP. One very nice ideas was LIP's way of handling +single-precision modular arithmetic. Back in those days (the early 1990's), I +was targeting 32-machines, mainly SPARC stations. LIP's stratgey was to +restrict n to 30 bits, and to compute a*b % n, where 0 <= a, b < n, the +follwong was computed: + + long q = long(double(a) * double(b) / double(n)); + long r = a*b - q*n; + if (r >= n) + r -= n; + else if (r < 0) + r += n; + +With quite reasonable assumptions about floating point (certainly, anything +even remotely close to IEEE 64-bit doubles), the computation of q always gives +the true quotient floor(a*b / n), plus or minus 1. The computation of r is +done modulo the 2^{word size}, and the following if/then/else adjusts r as +necessary. To be more portable, some of these computations should really be +done using unsigned arithmetic, but that is not so important here. Also, the +adjustment steps can be replaced by simple non-branching instrictions sequences +involving SHIFT, AND, and ADD/SUB instructions. On many modern machines, this +is usually faster and NTL uses this non-branching strategy. + +Other simple optimizations can be done, such as precomputing 1/double(n) when n +remains fixed for many computations, as is often the case. + +Note also that this strategy works perfectly well even when a or b are larger +than n, but the quotient itself is bounded by 2^30. + +This strategy worked well for many years. I had considered employing +"Montgomery multiplication", but did not do so for a couple of reasons: + 1) it would require non-portable code, because Montgomery multiplication + requires the computation of two-word products, + 2) I did not like the idea of working with "alternative representations" + for integers mod n, as this would make the interfaces more awkward. + +At some point in the early 2000's, this strategy was starting to slow things +down, as floating point arithmetic, especially the integer/floating point +conversions, was starting to slow down relative to integer arithmetic. This +was especially true on x86 machines, which by this time was starting to become +the most important target. As it happens, later in the 2000's, as the x86 +platforms started to use SSE instructions in lieu of the old x87 FPU +instructions, this speed differential again became less of a problem. +Nevertheless, I introduced some new techniques that speed things up across a +variety of platforms. I introduced this new technique in NTL 5.4 back in 2005. +I never claimed it was particularly new, and I never really documented many +details about it, but since then, it has come to be known as "Shoup +multiplcation" in a few papers, so I'll accept that. :-) The paper "Faster +arithmetic for number-theoretic transforms" [David Harvey, J. Symb. Comp. 60 +(2014) 113–119] seems to be the first place where it is discussed in detail, +and Harvey's paper also contains some improvements which I discuss below. + +The basic idea is that in many computations, not only n, but one of the +arguments, say b, remains fixed for many computatations of a*b % n, and so we +can afford to do a little precomputation, based on b and n, to speed things up. +This approach does require the ability to compute double-word products +(actually, just the high word of the product), but it still presents the same +basic interface as before (i.e., no awkward, alternative representations); +moreover, on platforms where we can't get double-word products, the +implementation falls back to the old floating point strategy, and client code +need not be aware of this. + +The basic idea is this: suppose 0 <= n < 2^w, and 0 <= a < 2^w, and 0 <= b < n. +We precompute bninv = floor(2^w*b/n). Then if we compute q = +floor(a*bninv/2^w), it can be argued that q is either floor(a*b/n), or is 1 too +small. The computation of bninv can be done using the floating point +techniques described above. The computation of q can be done by computing the +high word of a double-word product (it helps if bninv is left-shifted an +appropriate amount first). Once we have q, we can compute a*b - q*n as before, +and adjust (but now only one adjustment is needed). So after the +precomputation. the whole operation takes 3 multiplies (one doube-word and two +single-word), and a small handful of simple instructions (adds, shifts, etc). +Moreover, two of the three multiplies can start in parallel, on platforms where +this is possible. + +David Harvey noticed that because on modern machines, multiplies are really not +that slow compared to additions, the cost of all of the adjustments (in the +MulMod, as well as in the AddMod and SubMod's in the basic FFT butterfly steps) +starts to dominate the cost of the FFT. Indeed, with a straightforward +implementation of the above ideas, there are three multiplies and three +adjustment steps in each butterfly step. David's idea was to work with +redundant representations mod n, in the range [0..4*n), and thus reduce the +number of adjustments per butterfly from three to one. I've implemented this +idea here, and it does indeed make a significant difference, which is even more +pronounced when all of the FFT multipliers b and corresponding bninv values are +precomputed. My initial implementation of David's ideas (v6.0 in 2013) only +implemented his approach with these precomputated tables: it seemed that +without these tables, it was not a significant improvement. However, I later +figured out how to reduce the cost of computing all the necessary data "on the +fly", in a way that seems only slightly (10-15%) slower overall. I introduced +this in v9.1 in 2015, and set things up so that now the pre-computed tables are +still used, but not exclusively, in such a way as to reduce the memory used by +these tables for very large polynomials (either very high degree or lots of FFT +primes). The idea here is simple, but I haven't seen it discussed elsewhere, +so I'll document the basic idea here. + +Suppose we have the preconditioners for a and b, and want a*b % n along with +the preconditioner for a*b % n. + +For a, let us suppose that we have both q1 and r1, where: + 2^w*a = n*q1 + r1 +We can obtain both q1 and r1 using floating point techniques. + +Step 1. Compute a*b % n, using the integer-only MulMod, using +either the preconditioner for either a or b. + +Step 2. Compute q2 and r2 such that + r1*b = n*q2 + r2 +We can obtain these using the integer-only MulMod, preconditioned on b. +Actually, we only need q2, not r2. + +Step 3. Compute + q3 = q1*b + q2 mod 2^w +which we can compute with just a single-word multiply and an addition. + +One can easily show that the value q3 computed above is indeed the +preconditioner for a*b % n. + +Note that, in theory, if the computation in Step 2 is done using the +preconditioner for a (i.e., q1), then the multiplication q1*b in Step 3 should +not really be necessary (assuming that computing both high and low words of a +doube-wprd product is no more expensive than just computing the low word). +However, none of the compilers I've used have been able to perform that +optimization. + + +64-BIT MACHINES + +Prior to v9.0 of NTL, on 64 bits, the modulus n was restricted to 50 bits, in +order to allow the use of double-precision techniques, as double's have 53 bits +of precisions. However, since the x86-64 is such an importnat target, and the +one can still access the old x87 FPU, which provided 64-bit precision, the +bound on n on such platforms is now 60 bits. Actually, 62 bits could be +supported, but other things (namely, the TBL_REM implementation in +g_lip_impl.h) start to slow down if 62 bits are used, so 60 seems like a good +compromose. Currently, 60-bit moduli are available only when using gcc on +x86-64 machines, and when compiling NTL with GMP. + +Now, the FPU-based multiplies are in fact a bit slower than the SSE-based +multiplies. However, with the preconditioned all-integer MulMod's now used +extensively on almost all critical paths within NTL, this does not really +matter, and in fact, many things get faster with the wider moduli, so overall, +it is a net performance gain. + + +FUTURE TRENDS + +In the future, I might also experiment with other MulMod techniques, such as +those described in "Improved Division by Invariant Integers" [Moeller, +Granlund, IEEE Transactions on Computers, June 2010]. This might allow for, +say, 60-bit moduli on 64-bit machines that don't have extended double +precision. It is not clear how the performance of this would compare with the +floating-point methods; however, it probably doesn't matter too much, as the +preconditioned MulMod's are the most important ones. + +It might also be useful to go back and reconsider Montgomery multiplication, at +least for "internal" use, like the FFT. However, I doubt that this will help +significantly. + +As mentioned above, it could be useful to experiment with more cache-friendly +variants of the FFT, like Bailey's 4-step method. I could also experiment with +using the DIF/DIT. This affects some code outside of FFT as well (in ZZ_pX and +zz_pX, like reduce and AddeExpand), but should not affect any documented +interfaces. + +Another direction to consider is exploiting concurrency. Besides using +multiple cores to parallelize things at a higher level, it would be nice to +exploit newer SIMD instructions. Unfortunately, as of now (early 2015), these +don't seem to have the functionality I need. A 64-bit x 64-bit -> low order +64-bit instruction is supposed to be available soon in the new AVX-512 +instruction set. That would be a good start, but I would really like to get +the high-order 64-bits too. Maybe that will come someday. In the mean time, it +might be fun to experiment with using the AVX-512 instructions that will be +available, which will at least allow at least a floating-point-based +implementation, or an all-integer implementation with emulated MulHi. I have +no idea how performance will compare. + + + +********************************************************************/ + + + + + + + +// #define NTL_BRC_TEST +// Flag to test the cost of "bit reverse copy" + + +#define NTL_FFT_BIGTAB_LIMIT (200) +#ifndef NTL_BRC_TEST +#define NTL_FFT_BIGTAB_MAXROOT (17) +#else +#define NTL_FFT_BIGTAB_MAXROOT (25) +#endif +// big tables are only used for the first NTL_FFT_BIGTAB_LIMIT primes, +// and then only for k-values at most NTL_FFT_BIGTAB_MAXROOT + +// NOTE: in newer versions of NTL (v9.1 and later), the BIGTAB +// code is only about 5-15% faster than the non-BIGTAB code, so +// this is not a great time/space trade-off. + +// NOTE: NTL_FFT_BIGTAB_MAXROOT is set independently of the parameter +// NTL_FFTMaxRoot defined in FFT.h (and which is typically 25). +// The space for the LazyTable FFTMultipliers could be reduced a bit +// by using min(NTL_FFT_BIGTAB_MAXROOT, NTL_FFTMaxRoot) + 1 for the +// size of these tables. + + + +NTL_START_IMPL + + +FFTTablesType FFTTables; +// a truly GLOBAL variable, shared among all threads + + + +long IsFFTPrime(long n, long& w) +{ + long m, x, y, z; + long j, k; + + + if (n <= 1 || n >= NTL_SP_BOUND) return 0; + + if (n % 2 == 0) return 0; + + if (n % 3 == 0) return 0; + + if (n % 5 == 0) return 0; + + if (n % 7 == 0) return 0; + + m = n - 1; + k = 0; + while ((m & 1) == 0) { + m = m >> 1; + k++; + } + + for (;;) { + x = RandomBnd(n); + + if (x == 0) continue; + z = PowerMod(x, m, n); + if (z == 1) continue; + + x = z; + j = 0; + do { + y = z; + z = MulMod(y, y, n); + j++; + } while (j != k && z != 1); + + if (z != 1 || y != n-1) return 0; + + if (j == k) + break; + } + + /* x^{2^k} = 1 mod n, x^{2^{k-1}} = -1 mod n */ + + long TrialBound; + + TrialBound = m >> k; + if (TrialBound > 0) { + if (!ProbPrime(n, 5)) return 0; + + /* we have to do trial division by special numbers */ + + TrialBound = SqrRoot(TrialBound); + + long a, b; + + for (a = 1; a <= TrialBound; a++) { + b = (a << k) + 1; + if (n % b == 0) return 0; + } + } + + /* n is an FFT prime */ + + + for (j = NTL_FFTMaxRoot; j < k; j++) { + x = MulMod(x, x, n); + } + + w = x; + + return 1; +} + + +static +void NextFFTPrime(long& q, long& w, long index) +{ + static long m = NTL_FFTMaxRootBnd + 1; + static long k = 0; + // m and k are truly GLOBAL variables, shared among + // all threads. Access is protected by a critical section + // guarding FFTTables + + static long last_index = -1; + static long last_m = 0; + static long last_k = 0; + + if (index == last_index) { + // roll back m and k...part of a simple error recovery + // strategy if an exception was thrown in the last + // invocation of UseFFTPrime...probably of academic + // interest only + + m = last_m; + k = last_k; + } + else { + last_index = index; + last_m = m; + last_k = k; + } + + long t, cand; + + for (;;) { + if (k == 0) { + m--; + if (m < 5) ResourceError("ran out of FFT primes"); + k = 1L << (NTL_SP_NBITS-m-2); + } + + k--; + + cand = (1L << (NTL_SP_NBITS-1)) + (k << (m+1)) + (1L << m) + 1; + + if (!IsFFTPrime(cand, t)) continue; + q = cand; + w = t; + return; + } +} + + +long CalcMaxRoot(long p) +{ + p = p-1; + long k = 0; + while ((p & 1) == 0) { + p = p >> 1; + k++; + } + + if (k > NTL_FFTMaxRoot) + return NTL_FFTMaxRoot; + else + return k; +} + + +void InitFFTPrimeInfo(FFTPrimeInfo& info, long q, long w, bool bigtab) +{ + mulmod_t qinv = PrepMulMod(q); + + long mr = CalcMaxRoot(q); + + info.q = q; + info.qinv = qinv; + info.qrecip = 1/double(q); + info.zz_p_context = 0; + + + info.RootTable[0].SetLength(mr+1); + info.RootTable[1].SetLength(mr+1); + info.TwoInvTable.SetLength(mr+1); + info.TwoInvPreconTable.SetLength(mr+1); + + long *rt = &info.RootTable[0][0]; + long *rit = &info.RootTable[1][0]; + long *tit = &info.TwoInvTable[0]; + mulmod_precon_t *tipt = &info.TwoInvPreconTable[0]; + + long j; + long t; + + rt[mr] = w; + for (j = mr-1; j >= 0; j--) + rt[j] = MulMod(rt[j+1], rt[j+1], q); + + rit[mr] = InvMod(w, q); + for (j = mr-1; j >= 0; j--) + rit[j] = MulMod(rit[j+1], rit[j+1], q); + + t = InvMod(2, q); + tit[0] = 1; + for (j = 1; j <= mr; j++) + tit[j] = MulMod(tit[j-1], t, q); + + for (j = 0; j <= mr; j++) + tipt[j] = PrepMulModPrecon(tit[j], q, qinv); + + if (bigtab) + info.bigtab.make(); +} + + +#ifndef NTL_WIZARD_HACK +SmartPtr Build_zz_pInfo(FFTPrimeInfo *info); +#else +SmartPtr Build_zz_pInfo(FFTPrimeInfo *info) { return 0; } +#endif + +void UseFFTPrime(long index) +{ + if (index < 0) LogicError("invalud FFT prime index"); + if (index >= NTL_MAX_FFTPRIMES) ResourceError("FFT prime index too large"); + + do { // NOTE: thread safe lazy init + FFTTablesType::Builder bld(FFTTables, index+1); + long amt = bld.amt(); + if (!amt) break; + + long first = index+1-amt; + // initialize entries first..index + + long i; + for (i = first; i <= index; i++) { + UniquePtr info; + info.make(); + + long q, w; + NextFFTPrime(q, w, i); + + bool bigtab = false; + +#ifdef NTL_FFT_BIGTAB + if (i < NTL_FFT_BIGTAB_LIMIT) + bigtab = true; +#endif + + InitFFTPrimeInfo(*info, q, w, bigtab); + info->zz_p_context = Build_zz_pInfo(info.get()); + bld.move(info); + } + + } while (0); +} + + + + + +#define NTL_PIPELINE +// Define to gets some software pipelining...actually seems +// to help somewhat + +#define NTL_LOOP_UNROLL +// Define to unroll some loops. Seems to help a little + +// FIXME: maybe the above two should be tested by the wizard + + +static +long RevInc(long a, long k) +{ + long j, m; + + j = k; + m = 1L << (k-1); + + while (j && (m & a)) { + a ^= m; + m >>= 1; + j--; + } + if (j) a ^= m; + return a; +} + + +// FIXME: This could potentially be shared across threads, using +// a "lazy table". +static inline +Vec *get_brc_mem() +{ + NTL_TLS_LOCAL_INIT(Vec< Vec >, brc_mem_vec, (INIT_SIZE, NTL_FFTMaxRoot+1)); + return brc_mem_vec.elts(); +} + + + +#if 0 + + +static +void BitReverseCopy(long * NTL_RESTRICT A, const long * NTL_RESTRICT a, long k) +{ + Vec *brc_mem = get_brc_mem(); + + long n = 1L << k; + long* NTL_RESTRICT rev; + long i, j; + + rev = brc_mem[k].elts(); + if (!rev) { + brc_mem[k].SetLength(n); + rev = brc_mem[k].elts(); + for (i = 0, j = 0; i < n; i++, j = RevInc(j, k)) + rev[i] = j; + } + + for (i = 0; i < n; i++) + A[rev[i]] = a[i]; +} + + +static +void BitReverseCopy(unsigned long * NTL_RESTRICT A, const long * NTL_RESTRICT a, long k) +{ + Vec *brc_mem = get_brc_mem(); + + long n = 1L << k; + long* NTL_RESTRICT rev; + long i, j; + + rev = brc_mem[k].elts(); + if (!rev) { + brc_mem[k].SetLength(n); + rev = brc_mem[k].elts(); + for (i = 0, j = 0; i < n; i++, j = RevInc(j, k)) + rev[i] = j; + } + + for (i = 0; i < n; i++) + A[rev[i]] = a[i]; +} + +#else + + + +#define NTL_BRC_THRESH (12) +#define NTL_BRC_Q (5) + +// Must have NTL_BRC_THRESH > 2*NTL_BRC_Q +// Should also have (1L << (2*NTL_BRC_Q)) small enough +// so that we can fit that many long's into the cache + + +static +long *BRC_init(long k) +{ + Vec *brc_mem = get_brc_mem(); + + long n = (1L << k); + brc_mem[k].SetLength(n); + long *rev = brc_mem[k].elts(); + long i, j; + for (i = 0, j = 0; i < n; i++, j = RevInc(j, k)) + rev[i] = j; + return rev; +} + + +static +void BasicBitReverseCopy(long * NTL_RESTRICT B, + const long * NTL_RESTRICT A, long k) +{ + Vec *brc_mem = get_brc_mem(); + + long n = 1L << k; + long* NTL_RESTRICT rev; + long i, j; + + rev = brc_mem[k].elts(); + if (!rev) rev = BRC_init(k); + + for (i = 0; i < n; i++) + B[rev[i]] = A[i]; +} + + + +static +void COBRA(long * NTL_RESTRICT B, const long * NTL_RESTRICT A, long k) +{ + Vec *brc_mem = get_brc_mem(); + + NTL_TLS_LOCAL(Vec, BRC_temp); + + long q = NTL_BRC_Q; + long k1 = k - 2*q; + long * NTL_RESTRICT rev_k1, * NTL_RESTRICT rev_q; + long *NTL_RESTRICT T; + long a, b, c, a1, b1, c1; + long i, j; + + rev_k1 = brc_mem[k1].elts(); + if (!rev_k1) rev_k1 = BRC_init(k1); + + rev_q = brc_mem[q].elts(); + if (!rev_q) rev_q = BRC_init(q); + + T = BRC_temp.elts(); + if (!T) { + BRC_temp.SetLength(1L << (2*q)); + T = BRC_temp.elts(); + } + + for (b = 0; b < (1L << k1); b++) { + b1 = rev_k1[b]; + for (a = 0; a < (1L << q); a++) { + a1 = rev_q[a]; + for (c = 0; c < (1L << q); c++) + T[(a1 << q) + c] = A[(a << (k1+q)) + (b << q) + c]; + } + + for (c = 0; c < (1L << q); c++) { + c1 = rev_q[c]; + for (a1 = 0; a1 < (1L << q); a1++) + B[(c1 << (k1+q)) + (b1 << q) + a1] = T[(a1 << q) + c]; + } + } +} + +static +void BitReverseCopy(long * NTL_RESTRICT B, const long * NTL_RESTRICT A, long k) +{ + if (k <= NTL_BRC_THRESH) + BasicBitReverseCopy(B, A, k); + else + COBRA(B, A, k); +} + + +static +void BasicBitReverseCopy(unsigned long * NTL_RESTRICT B, + const long * NTL_RESTRICT A, long k) +{ + Vec *brc_mem = get_brc_mem(); + + long n = 1L << k; + long* NTL_RESTRICT rev; + long i, j; + + rev = brc_mem[k].elts(); + if (!rev) rev = BRC_init(k); + + for (i = 0; i < n; i++) + B[rev[i]] = A[i]; +} + + + +static +void COBRA(unsigned long * NTL_RESTRICT B, const long * NTL_RESTRICT A, long k) +{ + Vec *brc_mem = get_brc_mem(); + + NTL_TLS_LOCAL(Vec, BRC_temp); + + long q = NTL_BRC_Q; + long k1 = k - 2*q; + long * NTL_RESTRICT rev_k1, * NTL_RESTRICT rev_q; + unsigned long *NTL_RESTRICT T; + long a, b, c, a1, b1, c1; + long i, j; + + rev_k1 = brc_mem[k1].elts(); + if (!rev_k1) rev_k1 = BRC_init(k1); + + rev_q = brc_mem[q].elts(); + if (!rev_q) rev_q = BRC_init(q); + + T = BRC_temp.elts(); + if (!T) { + BRC_temp.SetLength(1L << (2*q)); + T = BRC_temp.elts(); + } + + for (b = 0; b < (1L << k1); b++) { + b1 = rev_k1[b]; + for (a = 0; a < (1L << q); a++) { + a1 = rev_q[a]; + for (c = 0; c < (1L << q); c++) + T[(a1 << q) + c] = A[(a << (k1+q)) + (b << q) + c]; + } + + for (c = 0; c < (1L << q); c++) { + c1 = rev_q[c]; + for (a1 = 0; a1 < (1L << q); a1++) + B[(c1 << (k1+q)) + (b1 << q) + a1] = T[(a1 << q) + c]; + } + } +} + +static +void BitReverseCopy(unsigned long * NTL_RESTRICT B, const long * NTL_RESTRICT A, long k) +{ + if (k <= NTL_BRC_THRESH) + BasicBitReverseCopy(B, A, k); + else + COBRA(B, A, k); +} + + + + +#endif + + +#ifdef NTL_FFT_LAZYMUL +// we only honor the FFT_LAZYMUL flag if either the SPMM_ULL, SPMM_ASM, or LONGLONG_SP_MULMOD +// flags are set + +#if (!defined(NTL_SPMM_ULL) && !defined(NTL_SPMM_ASM) && !defined(NTL_LONGLONG_SP_MULMOD)) +#undef NTL_FFT_LAZYMUL +#endif + +#endif + +#ifndef NTL_FFT_BIGTAB + +#define NTL_FFT_ROUTINE_TAB FFT_aux +#define NTL_FFT_ROUTINE_NOTAB FFT + +#else + +#define NTL_FFT_ROUTINE_TAB FFT +#define NTL_FFT_ROUTINE_NOTAB FFT_aux + +#endif + + + + + + +#ifndef NTL_FFT_LAZYMUL + + +// A basic FFT, no tables, no lazy strategy + +void NTL_FFT_ROUTINE_NOTAB(long* A, const long* a, long k, const FFTPrimeInfo& info, long dir) +// performs a 2^k-point convolution modulo q + +{ + long q = info.q; + const long *root = info.RootTable[dir].elts(); + mulmod_t qinv = info.qinv; + + if (k <= 1) { + if (k == 0) { + A[0] = a[0]; + return; + } + if (k == 1) { + long a0 = AddMod(a[0], a[1], q); + long a1 = SubMod(a[0], a[1], q); + A[0] = a0; + A[1] = a1; + return; + } + } + + // assume k > 1 + + NTL_TLS_LOCAL(Vec, wtab_store); + NTL_TLS_LOCAL(Vec, wqinvtab_store); + NTL_TLS_LOCAL(Vec, AA_store); + + wtab_store.SetLength(1L << (k-2)); + wqinvtab_store.SetLength(1L << (k-2)); + AA_store.SetLength(1L << k); + + long * NTL_RESTRICT wtab = wtab_store.elts(); + mulmod_precon_t * NTL_RESTRICT wqinvtab = wqinvtab_store.elts(); + long *AA = AA_store.elts(); + + wtab[0] = 1; + wqinvtab[0] = PrepMulModPrecon(1, q, qinv); + + + BitReverseCopy(AA, a, k); + + long n = 1L << k; + + long s, m, m_half, m_fourth, i, j, t, u, t1, u1, tt, tt1; + + long w; + mulmod_precon_t wqinv; + + // s = 1 + + for (i = 0; i < n; i += 2) { + t = AA[i + 1]; + u = AA[i]; + AA[i] = AddMod(u, t, q); + AA[i+1] = SubMod(u, t, q); + } + + + + for (s = 2; s < k; s++) { + m = 1L << s; + m_half = 1L << (s-1); + m_fourth = 1L << (s-2); + + w = root[s]; + wqinv = PrepMulModPrecon(w, q, qinv); + + // prepare wtab... + +#if 1 + // plain version... + + for (i = m_half-1, j = m_fourth-1; i >= 0; i -= 2, j--) { + long w_j = wtab[j]; + mulmod_precon_t wqi_j = wqinvtab[j]; + long w_i = MulModPrecon(w_j, w, q, wqinv); + mulmod_precon_t wqi_i = PrepMulModPrecon(w_i, q, qinv); + + wtab[i-1] = w_j; + wqinvtab[i-1] = wqi_j; + wtab[i] = w_i; + wqinvtab[i] = wqi_i; + } +#else + // software pipeline version...doesn't seem to make a big difference + + if (s == 2) { + wtab[1] = MulModPrecon(wtab[0], w, q, wqinv); + wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); + } + else { + i = m_half-1; j = m_fourth-1; + wtab[i-1] = wtab[j]; + wqinvtab[i-1] = wqinvtab[j]; + wtab[i] = MulModPrecon(wtab[i-1], w, q, wqinv); + + i -= 2; j --; + + for (; i >= 0; i -= 2, j --) { + long wp2 = wtab[i+2]; + long wm1 = wtab[j]; + wqinvtab[i+2] = PrepMulModPrecon(wp2, q, qinv); + wtab[i-1] = wm1; + wqinvtab[i-1] = wqinvtab[j]; + wtab[i] = MulModPrecon(wm1, w, q, wqinv); + } + + wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); + } + + +#endif + + + for (i = 0; i < n; i+= m) { + + long * NTL_RESTRICT AA0 = &AA[i]; + long * NTL_RESTRICT AA1 = &AA[i + m_half]; + + + +#if 1 + // loop unrolling and pipelining + + t = AA1[0]; + u = AA0[0]; + t1 = MulModPrecon(AA1[1], w, q, wqinv); + u1 = AA0[1]; + + + + for (j = 0; j < m_half-2; j += 2) { + long a02 = AA0[j+2]; + long a03 = AA0[j+3]; + long a12 = AA1[j+2]; + long a13 = AA1[j+3]; + long w2 = wtab[j+2]; + long w3 = wtab[j+3]; + mulmod_precon_t wqi2 = wqinvtab[j+2]; + mulmod_precon_t wqi3 = wqinvtab[j+3]; + + tt = MulModPrecon(a12, w2, q, wqi2); + long b00 = AddMod(u, t, q); + long b10 = SubMod(u, t, q); + t = tt; + u = a02; + + tt1 = MulModPrecon(a13, w3, q, wqi3); + long b01 = AddMod(u1, t1, q); + long b11 = SubMod(u1, t1, q); + t1 = tt1; + u1 = a03; + + AA0[j] = b00; + AA1[j] = b10; + AA0[j+1] = b01; + AA1[j+1] = b11; + } + + + AA0[j] = AddMod(u, t, q); + AA1[j] = SubMod(u, t, q); + AA0[j + 1] = AddMod(u1, t1, q); + AA1[j + 1] = SubMod(u1, t1, q); + + +#else + // no loop unrolling, but still some pipelining + + + t = AA1[0]; + u = AA0[0]; + + for (j = 0; j < m_half-1; j++) { + long a02 = AA0[j+1]; + long a12 = AA1[j+1]; + long w2 = wtab[j+1]; + mulmod_precon_t wqi2 = wqinvtab[j+1]; + + tt = MulModPrecon(a12, w2, q, wqi2); + long b00 = AddMod(u, t, q); + long b10 = SubMod(u, t, q); + t = tt; + u = a02; + + AA0[j] = b00; + AA1[j] = b10; + } + + + AA0[j] = AddMod(u, t, q); + AA1[j] = SubMod(u, t, q); + + +#endif + } + } + + + // s == k...special case + + m = 1L << s; + m_half = 1L << (s-1); + m_fourth = 1L << (s-2); + + + w = root[s]; + wqinv = PrepMulModPrecon(w, q, qinv); + + // j = 0, 1 + + t = AA[m_half]; + u = AA[0]; + t1 = MulModPrecon(AA[1+ m_half], w, q, wqinv); + u1 = AA[1]; + + A[0] = AddMod(u, t, q); + A[m_half] = SubMod(u, t, q); + A[1] = AddMod(u1, t1, q); + A[1 + m_half] = SubMod(u1, t1, q); + + for (j = 2; j < m_half; j += 2) { + t = MulModPrecon(AA[j + m_half], wtab[j >> 1], q, wqinvtab[j >> 1]); + u = AA[j]; + t1 = MulModPrecon(AA[j + 1+ m_half], wtab[j >> 1], q, + wqinvtab[j >> 1]); + t1 = MulModPrecon(t1, w, q, wqinv); + u1 = AA[j + 1]; + + A[j] = AddMod(u, t, q); + A[j + m_half] = SubMod(u, t, q); + A[j + 1] = AddMod(u1, t1, q); + A[j + 1 + m_half] = SubMod(u1, t1, q); + + } +} + + + + + + + +#else + + + +// FFT with lazy multiplication + +#if (defined(NTL_LONGLONG_SP_MULMOD)) + + +#if (NTL_BITS_PER_LONG >= NTL_SP_NBITS+4) + +static inline unsigned long +sp_NormalizedLazyPrepMulModPreconWithRem(unsigned long& rres, long b, long n, unsigned long ninv) +{ + unsigned long H = cast_unsigned(b); + unsigned long Q = MulHiUL(H << 4, ninv); + unsigned long L = cast_unsigned(b) << (NTL_SP_NBITS+2); + long r = L - Q*cast_unsigned(n); // r in [0..2*n) + + r = sp_CorrectExcessQuo(Q, r, n); + rres = r; + return Q; // NOTE: not shifted +} + +static inline unsigned long +sp_NormalizedLazyPrepMulModPrecon(long b, long n, unsigned long ninv) +{ + unsigned long H = cast_unsigned(b); + unsigned long Q = MulHiUL(H << 4, ninv); + unsigned long L = cast_unsigned(b) << (NTL_SP_NBITS+2); + long r = L - Q*cast_unsigned(n); // r in [0..2*n) + + Q += 1L + sp_SignMask(r-n); + return Q; // NOTE: not shifted +} + + +#else + +// NTL_BITS_PER_LONG == NTL_SP_NBITS+2 +static inline unsigned long +sp_NormalizedLazyPrepMulModPreconWithRem(unsigned long& rres, long b, long n, unsigned long ninv) +{ + unsigned long H = cast_unsigned(b) << 2; + unsigned long Q = MulHiUL(H, (ninv << 1)) + H; + unsigned long rr = -Q*cast_unsigned(n); // r in [0..3*n) + + long r = sp_CorrectExcessQuo(Q, rr, n); + r = sp_CorrectExcessQuo(Q, r, n); + rres = r; + return Q; // NOTE: not shifted +} + +static inline unsigned long +sp_NormalizedLazyPrepMulModPrecon(long b, long n, unsigned long ninv) +{ + unsigned long H = cast_unsigned(b) << 2; + unsigned long Q = MulHiUL(H, (ninv << 1)) + H; + unsigned long rr = -Q*cast_unsigned(n); // r in [0..3*n) + Q += 2L + sp_SignMask(rr-n) + sp_SignMask(rr-2*n); + return Q; // NOTE: not shifted +} + + +#endif + + +static inline unsigned long +LazyPrepMulModPrecon(long b, long n, sp_inverse ninv) +{ + return sp_NormalizedLazyPrepMulModPrecon(b << ninv.shamt, n << ninv.shamt, ninv.inv) << (NTL_BITS_PER_LONG-NTL_SP_NBITS-2); +} + + +static inline unsigned long +LazyPrepMulModPreconWithRem(unsigned long& rres, long b, long n, sp_inverse ninv) +{ + unsigned long qq, rr; + qq = sp_NormalizedLazyPrepMulModPreconWithRem(rr, b << ninv.shamt, n << ninv.shamt, ninv.inv); + rres = rr >> ninv.shamt; + return qq << (NTL_BITS_PER_LONG-NTL_SP_NBITS-2); +} + + + + + + + + +#elif (NTL_BITS_PER_LONG - NTL_SP_NBITS >= 4 && NTL_WIDE_DOUBLE_PRECISION - NTL_SP_NBITS >= 4) + + +// slightly faster functions, which should kick in on x86-64, where +// NTL_BITS_PER_LONG == 64 +// NTL_SP_NBITS == 60 (another reason for holding this back to 60 bits) +// NTL_WIDE_DOUBLE_PRECISION == 64 + +// DIRT: if the relative error in floating point calcuations (muls and reciprocals) +// is <= epsilon, the relative error in the calculations is <= 3*epsilon + +// O(epsilon^2), and we require that this relative error is at most +// 2^{-(NTL_SP_NBITS+2)}, so it should be pretty safe as long as +// epsilon is at most, or not much geater than, 2^{-NTL_WIDE_DOUBLE_PRECISION}. + +static inline +unsigned long LazyPrepMulModPrecon(long b, long n, wide_double ninv) +{ + long q = (long) ( (((wide_double) b) * wide_double(4*NTL_SP_BOUND)) * ninv ); + + unsigned long rr = (cast_unsigned(b) << (NTL_SP_NBITS+2)) + - cast_unsigned(q)*cast_unsigned(n); + + q += sp_SignMask(rr) + sp_SignMask(rr-n) + 1L; + + return cast_unsigned(q) << (NTL_BITS_PER_LONG - NTL_SP_NBITS - 2); +} + +static inline +unsigned long LazyPrepMulModPreconWithRem(unsigned long& rres, long b, long n, wide_double ninv) +{ + long q = (long) ( (((wide_double) b) * wide_double(4*NTL_SP_BOUND)) * ninv ); + + unsigned long rr = (cast_unsigned(b) << (NTL_SP_NBITS+2)) + - cast_unsigned(q)*cast_unsigned(n); + + long r = sp_CorrectDeficitQuo(q, rr, n); + r = sp_CorrectExcessQuo(q, r, n); + + unsigned long qres = cast_unsigned(q) << (NTL_BITS_PER_LONG - NTL_SP_NBITS - 2); + rres = r; + return qres; +} + +#else + + +static inline +unsigned long LazyPrepMulModPrecon(long b, long n, wide_double ninv) +{ + long q = (long) ( (((wide_double) b) * wide_double(NTL_SP_BOUND)) * ninv ); + + unsigned long rr = (cast_unsigned(b) << (NTL_SP_NBITS)) + - cast_unsigned(q)*cast_unsigned(n); + + long r = sp_CorrectDeficitQuo(q, rr, n); + r = sp_CorrectExcessQuo(q, r, n); + + unsigned long qq = q; + + qq = 2*qq; + r = 2*r; + r = sp_CorrectExcessQuo(qq, r, n); + + qq = 2*qq; + r = 2*r; + qq += sp_SignMask(r-n) + 1L; + + return qq << (NTL_BITS_PER_LONG - NTL_SP_NBITS - 2); +} + + + + + +static inline +unsigned long LazyPrepMulModPreconWithRem(unsigned long& rres, long b, long n, wide_double ninv) +{ + long q = (long) ( (((wide_double) b) * wide_double(NTL_SP_BOUND)) * ninv ); + + unsigned long rr = (cast_unsigned(b) << (NTL_SP_NBITS)) + - cast_unsigned(q)*cast_unsigned(n); + + long r = sp_CorrectDeficitQuo(q, rr, n); + r = sp_CorrectExcessQuo(q, r, n); + + unsigned long qq = q; + + qq = 2*qq; + r = 2*r; + r = sp_CorrectExcessQuo(qq, r, n); + + qq = 2*qq; + r = 2*r; + r = sp_CorrectExcessQuo(qq, r, n); + + rres = r; + return qq << (NTL_BITS_PER_LONG - NTL_SP_NBITS - 2); +} + +#endif + + + +static inline +unsigned long LazyMulModPreconQuo(unsigned long a, unsigned long b, + unsigned long n, unsigned long bninv) +{ + unsigned long q = MulHiUL(a, bninv); + unsigned long r = a*b - q*n; + q += sp_SignMask(r-n) + 1L; + return q << (NTL_BITS_PER_LONG - NTL_SP_NBITS - 2); +} + + +static inline +unsigned long LazyMulModPrecon(unsigned long a, unsigned long b, + unsigned long n, unsigned long bninv) +{ + unsigned long q = MulHiUL(a, bninv); + unsigned long res = a*b - q*n; + return res; +} + + +static inline +unsigned long LazyReduce1(unsigned long a, long q) +{ + return sp_CorrectExcess(long(a), q); +} + +static inline +unsigned long LazyReduce2(unsigned long a, long q) +{ + return sp_CorrectExcess(a, 2*q); +} + + + + +// FFT: Lazy, no tables + +void NTL_FFT_ROUTINE_NOTAB(long* A, const long* a, long k, const FFTPrimeInfo& info, long dir) + +// performs a 2^k-point convolution modulo q + +{ + long q = info.q; + const long *root = info.RootTable[dir].elts(); + mulmod_t qinv = info.qinv; + + if (k <= 1) { + if (k == 0) { + A[0] = a[0]; + return; + } + if (k == 1) { + long a0 = AddMod(a[0], a[1], q); + long a1 = SubMod(a[0], a[1], q); + A[0] = a0; + A[1] = a1; + return; + } + } + + // assume k >= 2 + + NTL_TLS_LOCAL(Vec, AA_store); + AA_store.SetLength(1L << k); + unsigned long *AA = AA_store.elts(); + + NTL_TLS_LOCAL(Vec, wtab_store); + wtab_store.SetLength(max(2, 1L << (k-2))); + // allocate space for at least 2 elements, to deal with a corner case when k == 2 + long * NTL_RESTRICT wtab = wtab_store.elts(); + + NTL_TLS_LOCAL(Vec, wqinvtab_store); + wqinvtab_store.SetLength(max(2, 1L << (k-2))); + // allocate space for at least 2 elements, to deal with a corner case when k == 2 + mulmod_precon_t * NTL_RESTRICT wqinvtab = wqinvtab_store.elts(); + + + BitReverseCopy(AA, a, k); + + long n = 1L << k; + + + /* we work with redundant representations, in the range [0, 4q) */ + + long s, m, m_half, m_fourth, i, j; + unsigned long t, u, t1, u1; + + + wtab[0] = 1; + wqinvtab[0] = LazyPrepMulModPrecon(1, q, qinv); + + // s = 1 + for (i = 0; i < n; i += 2) { + t = AA[i + 1]; + u = AA[i]; + AA[i] = u + t; + AA[i+1] = u - t + q; + } + + // s = 2 + { + long w = root[2]; + mulmod_precon_t wqinv = LazyPrepMulModPrecon(w, q, qinv); + + wtab[1] = w; + wqinvtab[1] = wqinv; + + + for (i = 0; i < n; i += 4) { + + unsigned long * NTL_RESTRICT AA0 = &AA[i]; + unsigned long * NTL_RESTRICT AA1 = &AA[i + 2]; + + { + const unsigned long a11 = AA1[0]; + const unsigned long a01 = AA0[0]; + + const unsigned long tt1 = a11; + const unsigned long uu1 = a01; + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[0] = b01; + AA1[0] = b11; + } + { + const unsigned long a11 = AA1[1]; + const unsigned long a01 = AA0[1]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w, q, wqinv); + const unsigned long uu1 = a01; + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[1] = b01; + AA1[1] = b11; + } + } + } + + + // s = 3..k-1 + + for (s = 3; s < k; s++) { + m = 1L << s; + m_half = 1L << (s-1); + m_fourth = 1L << (s-2); + + long w = root[s]; + +#if 0 + // This computes all the multipliers in a straightforward fashion. + // It's a bit slower that the strategy used below, even if + // NTL_LONGLONG_SP_MULMOD is set + + mulmod_precon_t wqinv = LazyPrepMulModPrecon(w, q, qinv); + + + for (i = m_half-1, j = m_fourth-1; i >= 0; i -= 2, j--) { + long w_j = wtab[j]; + mulmod_precon_t wqi_j = wqinvtab[j]; + + long w_i = LazyReduce1(LazyMulModPrecon(w_j, w, q, wqinv), q); + mulmod_precon_t wqi_i = LazyPrepMulModPrecon(w_i, q, qinv); + + wtab[i-1] = w_j; + wqinvtab[i-1] = wqi_j; + wtab[i] = w_i; + wqinvtab[i] = wqi_i; + } +#else + unsigned long wqinv_rem; + mulmod_precon_t wqinv = LazyPrepMulModPreconWithRem(wqinv_rem, w, q, qinv); + + + for (i = m_half-1, j = m_fourth-1; i >= 0; i -= 2, j--) { + long w_j = wtab[j]; + mulmod_precon_t wqi_j = wqinvtab[j]; + + // The next two lines are equivalent, but the first involves + // a computation of hi(w_j*wqinv), which pairs with the + // computation of lo(w_j*wqinv) below...but I don't think + // the compiler sees this...oh well... + + long w_i = LazyReduce1(LazyMulModPrecon(w_j, w, q, wqinv), q); + // long w_i = LazyReduce1(LazyMulModPrecon(w, w_j, q, wqi_j), q); + + mulmod_precon_t wqi_i = LazyMulModPreconQuo(wqinv_rem, w_j, q, wqi_j) + + cast_unsigned(w_j)*wqinv; + + wtab[i-1] = w_j; + wqinvtab[i-1] = wqi_j; + wtab[i] = w_i; + wqinvtab[i] = wqi_i; + } + + +#endif + + for (i = 0; i < n; i += m) { + + unsigned long * NTL_RESTRICT AA0 = &AA[i]; + unsigned long * NTL_RESTRICT AA1 = &AA[i + m_half]; + + + for (j = 0; j < m_half; j += 4) { + { + const long w1 = wtab[j+0]; + const mulmod_precon_t wqi1 = wqinvtab[j+0]; + const unsigned long a11 = AA1[j+0]; + const unsigned long a01 = AA0[j+0]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+0] = b01; + AA1[j+0] = b11; + } + { + const long w1 = wtab[j+1]; + const mulmod_precon_t wqi1 = wqinvtab[j+1]; + const unsigned long a11 = AA1[j+1]; + const unsigned long a01 = AA0[j+1]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+1] = b01; + AA1[j+1] = b11; + } + { + const long w1 = wtab[j+2]; + const mulmod_precon_t wqi1 = wqinvtab[j+2]; + const unsigned long a11 = AA1[j+2]; + const unsigned long a01 = AA0[j+2]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+2] = b01; + AA1[j+2] = b11; + } + { + const long w1 = wtab[j+3]; + const mulmod_precon_t wqi1 = wqinvtab[j+3]; + const unsigned long a11 = AA1[j+3]; + const unsigned long a01 = AA0[j+3]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+3] = b01; + AA1[j+3] = b11; + } + } + } + } + + + + // special case: s == k to avoid extraneous computation of constants + + if (k > 2) { + s = k; + + m = 1L << s; + m_half = 1L << (s-1); + m_fourth = 1L << (s-2); + + long w = root[s]; + mulmod_precon_t wqinv = LazyPrepMulModPrecon(w, q, qinv); + + + for (i = 0; i < n; i += m) { + + unsigned long * NTL_RESTRICT AA0 = &AA[i]; + unsigned long * NTL_RESTRICT AA1 = &AA[i + m_half]; + + long half_j; + + for (j = 0, half_j = 0; j < m_half; j += 4, half_j += 2) { + { + const long w1 = wtab[half_j+0]; + const mulmod_precon_t wqi1 = wqinvtab[half_j+0]; + const unsigned long a11 = AA1[j+0]; + const unsigned long a01 = AA0[j+0]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+0] = b01; + AA1[j+0] = b11; + } + { + const long w1 = wtab[half_j+0]; + const mulmod_precon_t wqi1 = wqinvtab[half_j+0]; + const unsigned long a11 = AA1[j+1]; + const unsigned long a01 = AA0[j+1]; + + const unsigned long tt1 = LazyMulModPrecon(LazyMulModPrecon(a11, w1, q, wqi1), + w, q, wqinv); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+1] = b01; + AA1[j+1] = b11; + } + { + const long w1 = wtab[half_j+1]; + const mulmod_precon_t wqi1 = wqinvtab[half_j+1]; + const unsigned long a11 = AA1[j+2]; + const unsigned long a01 = AA0[j+2]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+2] = b01; + AA1[j+2] = b11; + } + { + const long w1 = wtab[half_j+1]; + const mulmod_precon_t wqi1 = wqinvtab[half_j+1]; + const unsigned long a11 = AA1[j+3]; + const unsigned long a01 = AA0[j+3]; + + const unsigned long tt1 = LazyMulModPrecon(LazyMulModPrecon(a11, w1, q, wqi1), + w, q, wqinv); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+3] = b01; + AA1[j+3] = b11; + } + } + } + } + + + /* need to reduce redundant representations */ + + for (i = 0; i < n; i++) { + unsigned long tmp = LazyReduce2(AA[i], q); + A[i] = LazyReduce1(tmp, q); + } +} + + +#endif + + + + + + + +#ifndef NTL_FFT_LAZYMUL + +// FFT with precomputed tables, no lazy mul + +static +void PrecompFFTMultipliers(long k, long q, mulmod_t qinv, const long *root, const FFTMultipliers& tab) +{ + if (k < 1) LogicError("PrecompFFTMultipliers: bad input"); + + do { // NOTE: thread safe lazy init + FFTMultipliers::Builder bld(tab, k+1); + long amt = bld.amt(); + if (!amt) break; + + long first = k+1-amt; + // initialize entries first..k + + + for (long s = first; s <= k; s++) { + UniquePtr item; + + if (s == 0) { + bld.move(item); // position 0 not used + continue; + } + + if (s == 1) { + item.make(); + item->wtab_precomp.SetLength(1); + item->wqinvtab_precomp.SetLength(1); + item->wtab_precomp[0] = 1; + item->wqinvtab_precomp[0] = PrepMulModPrecon(1, q, qinv); + bld.move(item); + continue; + } + + item.make(); + item->wtab_precomp.SetLength(1L << (s-1)); + item->wqinvtab_precomp.SetLength(1L << (s-1)); + + long m = 1L << s; + long m_half = 1L << (s-1); + long m_fourth = 1L << (s-2); + + const long *wtab_last = tab[s-1]->wtab_precomp.elts(); + const mulmod_precon_t *wqinvtab_last = tab[s-1]->wqinvtab_precomp.elts(); + + long *wtab = item->wtab_precomp.elts(); + mulmod_precon_t *wqinvtab = item->wqinvtab_precomp.elts(); + + for (long i = 0; i < m_fourth; i++) { + wtab[i] = wtab_last[i]; + wqinvtab[i] = wqinvtab_last[i]; + } + + long w = root[s]; + mulmod_precon_t wqinv = PrepMulModPrecon(w, q, qinv); + + // prepare wtab... + + if (s == 2) { + wtab[1] = MulModPrecon(wtab[0], w, q, wqinv); + wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); + } + else { + // some software pipelining + long i, j; + + i = m_half-1; j = m_fourth-1; + wtab[i-1] = wtab[j]; + wqinvtab[i-1] = wqinvtab[j]; + wtab[i] = MulModPrecon(wtab[i-1], w, q, wqinv); + + i -= 2; j --; + + for (; i >= 0; i -= 2, j --) { + long wp2 = wtab[i+2]; + long wm1 = wtab[j]; + wqinvtab[i+2] = PrepMulModPrecon(wp2, q, qinv); + wtab[i-1] = wm1; + wqinvtab[i-1] = wqinvtab[j]; + wtab[i] = MulModPrecon(wm1, w, q, wqinv); + } + + wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); + } + + bld.move(item); + } + } while (0); +} + + +// FFT: no lazy, table + +void NTL_FFT_ROUTINE_TAB(long* A, const long* a, long k, const FFTPrimeInfo& info, long dir) +// performs a 2^k-point convolution modulo q + +{ + if (!info.bigtab || k > NTL_FFT_BIGTAB_MAXROOT) { + NTL_FFT_ROUTINE_NOTAB(A, a, k, info, dir); + return; + } + + + long q = info.q; + const long *root = info.RootTable[dir].elts(); + mulmod_t qinv = info.qinv; + const FFTMultipliers& tab = info.bigtab->MulTab[dir]; + + + if (k <= 1) { + if (k == 0) { + A[0] = a[0]; + return; + } + if (k == 1) { + long a0 = AddMod(a[0], a[1], q); + long a1 = SubMod(a[0], a[1], q); + A[0] = a0; + A[1] = a1; + return; + } + } + + // assume k > 1 + + if (k >= tab.length()) PrecompFFTMultipliers(k, q, qinv, root, tab); + + NTL_TLS_LOCAL(Vec, AA_store); + AA_store.SetLength(1L << k); + long *AA = AA_store.elts(); + + BitReverseCopy(AA, a, k); + + long n = 1L << k; + + long s, m, m_half, m_fourth, i, j, t, u, t1, u1, tt, tt1; + + // s = 1 + + for (i = 0; i < n; i += 2) { + t = AA[i + 1]; + u = AA[i]; + AA[i] = AddMod(u, t, q); + AA[i+1] = SubMod(u, t, q); + } + + + for (s = 2; s < k; s++) { + m = 1L << s; + m_half = 1L << (s-1); + m_fourth = 1L << (s-2); + + const long* NTL_RESTRICT wtab = tab[s]->wtab_precomp.elts(); + const mulmod_precon_t * NTL_RESTRICT wqinvtab = tab[s]->wqinvtab_precomp.elts(); + + for (i = 0; i < n; i+= m) { + + long * NTL_RESTRICT AA0 = &AA[i]; + long * NTL_RESTRICT AA1 = &AA[i + m_half]; + +#ifdef NTL_PIPELINE + +// pipelining: seems to be faster + + t = AA1[0]; + u = AA0[0]; + t1 = MulModPrecon(AA1[1], wtab[1], q, wqinvtab[1]); + u1 = AA0[1]; + + for (j = 0; j < m_half-2; j += 2) { + long a02 = AA0[j+2]; + long a03 = AA0[j+3]; + long a12 = AA1[j+2]; + long a13 = AA1[j+3]; + long w2 = wtab[j+2]; + long w3 = wtab[j+3]; + mulmod_precon_t wqi2 = wqinvtab[j+2]; + mulmod_precon_t wqi3 = wqinvtab[j+3]; + + tt = MulModPrecon(a12, w2, q, wqi2); + long b00 = AddMod(u, t, q); + long b10 = SubMod(u, t, q); + + tt1 = MulModPrecon(a13, w3, q, wqi3); + long b01 = AddMod(u1, t1, q); + long b11 = SubMod(u1, t1, q); + + AA0[j] = b00; + AA1[j] = b10; + AA0[j+1] = b01; + AA1[j+1] = b11; + + + t = tt; + u = a02; + t1 = tt1; + u1 = a03; + } + + + AA0[j] = AddMod(u, t, q); + AA1[j] = SubMod(u, t, q); + AA0[j + 1] = AddMod(u1, t1, q); + AA1[j + 1] = SubMod(u1, t1, q); + } +#else + for (j = 0; j < m_half; j += 2) { + const long a00 = AA0[j]; + const long a01 = AA0[j+1]; + const long a10 = AA1[j]; + const long a11 = AA1[j+1]; + + const long w0 = wtab[j]; + const long w1 = wtab[j+1]; + const mulmod_precon_t wqi0 = wqinvtab[j]; + const mulmod_precon_t wqi1 = wqinvtab[j+1]; + + const long tt = MulModPrecon(a10, w0, q, wqi0); + const long uu = a00; + const long b00 = AddMod(uu, tt, q); + const long b10 = SubMod(uu, tt, q); + + const long tt1 = MulModPrecon(a11, w1, q, wqi1); + const long uu1 = a01; + const long b01 = AddMod(uu1, tt1, q); + const long b11 = SubMod(uu1, tt1, q); + + AA0[j] = b00; + AA0[j+1] = b01; + AA1[j] = b10; + AA1[j+1] = b11; + } + } +#endif + } + + + // s == k, special case + { + m = 1L << s; + m_half = 1L << (s-1); + m_fourth = 1L << (s-2); + + const long* NTL_RESTRICT wtab = tab[s]->wtab_precomp.elts(); + const mulmod_precon_t * NTL_RESTRICT wqinvtab = tab[s]->wqinvtab_precomp.elts(); + + for (i = 0; i < n; i+= m) { + + long * NTL_RESTRICT AA0 = &AA[i]; + long * NTL_RESTRICT AA1 = &AA[i + m_half]; + long * NTL_RESTRICT A0 = &A[i]; + long * NTL_RESTRICT A1 = &A[i + m_half]; + +#ifdef NTL_PIPELINE + +// pipelining: seems to be faster + + t = AA1[0]; + u = AA0[0]; + t1 = MulModPrecon(AA1[1], wtab[1], q, wqinvtab[1]); + u1 = AA0[1]; + + for (j = 0; j < m_half-2; j += 2) { + long a02 = AA0[j+2]; + long a03 = AA0[j+3]; + long a12 = AA1[j+2]; + long a13 = AA1[j+3]; + long w2 = wtab[j+2]; + long w3 = wtab[j+3]; + mulmod_precon_t wqi2 = wqinvtab[j+2]; + mulmod_precon_t wqi3 = wqinvtab[j+3]; + + tt = MulModPrecon(a12, w2, q, wqi2); + long b00 = AddMod(u, t, q); + long b10 = SubMod(u, t, q); + + tt1 = MulModPrecon(a13, w3, q, wqi3); + long b01 = AddMod(u1, t1, q); + long b11 = SubMod(u1, t1, q); + + A0[j] = b00; + A1[j] = b10; + A0[j+1] = b01; + A1[j+1] = b11; + + + t = tt; + u = a02; + t1 = tt1; + u1 = a03; + } + + + A0[j] = AddMod(u, t, q); + A1[j] = SubMod(u, t, q); + A0[j + 1] = AddMod(u1, t1, q); + A1[j + 1] = SubMod(u1, t1, q); + } +#else + for (j = 0; j < m_half; j += 2) { + const long a00 = AA0[j]; + const long a01 = AA0[j+1]; + const long a10 = AA1[j]; + const long a11 = AA1[j+1]; + + const long w0 = wtab[j]; + const long w1 = wtab[j+1]; + const mulmod_precon_t wqi0 = wqinvtab[j]; + const mulmod_precon_t wqi1 = wqinvtab[j+1]; + + const long tt = MulModPrecon(a10, w0, q, wqi0); + const long uu = a00; + const long b00 = AddMod(uu, tt, q); + const long b10 = SubMod(uu, tt, q); + + const long tt1 = MulModPrecon(a11, w1, q, wqi1); + const long uu1 = a01; + const long b01 = AddMod(uu1, tt1, q); + const long b11 = SubMod(uu1, tt1, q); + + A0[j] = b00; + A0[j+1] = b01; + A1[j] = b10; + A1[j+1] = b11; + } + } +#endif + } + +} + + + + + + +#else + +// FFT with precomputed tables, lazy mul + + +static +void LazyPrecompFFTMultipliers(long k, long q, mulmod_t qinv, const long *root, const FFTMultipliers& tab) +{ + if (k < 1) LogicError("LazyPrecompFFTMultipliers: bad input"); + + do { // NOTE: thread safe lazy init + FFTMultipliers::Builder bld(tab, k+1); + long amt = bld.amt(); + if (!amt) break; + + long first = k+1-amt; + // initialize entries first..k + + + for (long s = first; s <= k; s++) { + UniquePtr item; + + if (s == 0) { + bld.move(item); // position 0 not used + continue; + } + + if (s == 1) { + item.make(); + item->wtab_precomp.SetLength(1); + item->wqinvtab_precomp.SetLength(1); + item->wtab_precomp[0] = 1; + item->wqinvtab_precomp[0] = LazyPrepMulModPrecon(1, q, qinv); + bld.move(item); + continue; + } + + item.make(); + item->wtab_precomp.SetLength(1L << (s-1)); + item->wqinvtab_precomp.SetLength(1L << (s-1)); + + long m = 1L << s; + long m_half = 1L << (s-1); + long m_fourth = 1L << (s-2); + + const long *wtab_last = tab[s-1]->wtab_precomp.elts(); + const mulmod_precon_t *wqinvtab_last = tab[s-1]->wqinvtab_precomp.elts(); + + long *wtab = item->wtab_precomp.elts(); + mulmod_precon_t *wqinvtab = item->wqinvtab_precomp.elts(); + + for (long i = 0; i < m_fourth; i++) { + wtab[i] = wtab_last[i]; + wqinvtab[i] = wqinvtab_last[i]; + } + + long w = root[s]; + mulmod_precon_t wqinv = LazyPrepMulModPrecon(w, q, qinv); + + // prepare wtab... + + if (s == 2) { + wtab[1] = LazyReduce1(LazyMulModPrecon(wtab[0], w, q, wqinv), q); + wqinvtab[1] = LazyPrepMulModPrecon(wtab[1], q, qinv); + } + else { + // some software pipelining + long i, j; + + i = m_half-1; j = m_fourth-1; + wtab[i-1] = wtab[j]; + wqinvtab[i-1] = wqinvtab[j]; + wtab[i] = LazyReduce1(LazyMulModPrecon(wtab[i-1], w, q, wqinv), q); + + i -= 2; j --; + + for (; i >= 0; i -= 2, j --) { + long wp2 = wtab[i+2]; + long wm1 = wtab[j]; + wqinvtab[i+2] = LazyPrepMulModPrecon(wp2, q, qinv); + wtab[i-1] = wm1; + wqinvtab[i-1] = wqinvtab[j]; + wtab[i] = LazyReduce1(LazyMulModPrecon(wm1, w, q, wqinv), q); + } + + wqinvtab[1] = LazyPrepMulModPrecon(wtab[1], q, qinv); + } + + bld.move(item); + } + } while (0); +} + + + + +#ifdef NTL_BRC_TEST +bool BRC_test_flag = false; +#endif + + +// FFT: lazy, tables + +void NTL_FFT_ROUTINE_TAB(long* A, const long* a, long k, const FFTPrimeInfo& info, long dir) + +// performs a 2^k-point convolution modulo q + +{ + if (!info.bigtab || k > NTL_FFT_BIGTAB_MAXROOT) { + NTL_FFT_ROUTINE_NOTAB(A, a, k, info, dir); + return; + } + + long q = info.q; + const long *root = info.RootTable[dir].elts(); + mulmod_t qinv = info.qinv; + const FFTMultipliers& tab = info.bigtab->MulTab[dir]; + + if (k <= 1) { + if (k == 0) { + A[0] = a[0]; + return; + } + if (k == 1) { + long a0 = AddMod(a[0], a[1], q); + long a1 = SubMod(a[0], a[1], q); + A[0] = a0; + A[1] = a1; + return; + } + } + + // assume k > 1 + + if (k >= tab.length()) LazyPrecompFFTMultipliers(k, q, qinv, root, tab); + + NTL_TLS_LOCAL(Vec, AA_store); + AA_store.SetLength(1L << k); + unsigned long *AA = AA_store.elts(); + + + long n = 1L << k; + +#ifndef NTL_BRC_TEST + BitReverseCopy(AA, a, k); +#else + if (BRC_test_flag) + for (long i = 0; i < n; i++) AA[i] = a[i]; + else + BitReverseCopy(AA, a, k); +#endif + + + + /* we work with redundant representations, in the range [0, 4q) */ + + + + long s, m, m_half, m_fourth, i, j; + unsigned long t, u, t1, u1; + + + // s = 1 + for (i = 0; i < n; i += 2) { + t = AA[i + 1]; + u = AA[i]; + AA[i] = u + t; + AA[i+1] = u - t + q; + } + + + // s = 2 + { + const long * NTL_RESTRICT wtab = tab[2]->wtab_precomp.elts(); + const mulmod_precon_t * NTL_RESTRICT wqinvtab = tab[2]->wqinvtab_precomp.elts(); + + const long w1 = wtab[1]; + const mulmod_precon_t wqi1 = wqinvtab[1]; + + for (i = 0; i < n; i += 4) { + + unsigned long * NTL_RESTRICT AA0 = &AA[i]; + unsigned long * NTL_RESTRICT AA1 = &AA[i + 2]; + + { + const unsigned long a11 = AA1[0]; + const unsigned long a01 = AA0[0]; + + const unsigned long tt1 = a11; + const unsigned long uu1 = a01; + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[0] = b01; + AA1[0] = b11; + } + { + const unsigned long a11 = AA1[1]; + const unsigned long a01 = AA0[1]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = a01; + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[1] = b01; + AA1[1] = b11; + } + } + } + + + // s = 3..k + + for (s = 3; s <= k; s++) { + m = 1L << s; + m_half = 1L << (s-1); + m_fourth = 1L << (s-2); + + const long* NTL_RESTRICT wtab = tab[s]->wtab_precomp.elts(); + const mulmod_precon_t * NTL_RESTRICT wqinvtab = tab[s]->wqinvtab_precomp.elts(); + + for (i = 0; i < n; i += m) { + + unsigned long * NTL_RESTRICT AA0 = &AA[i]; + unsigned long * NTL_RESTRICT AA1 = &AA[i + m_half]; + +#if 1 + + // a little loop unrolling: this gives the best code + + for (j = 0; j < m_half; j += 4) { + { + const long w1 = wtab[j+0]; + const mulmod_precon_t wqi1 = wqinvtab[j+0]; + const unsigned long a11 = AA1[j+0]; + const unsigned long a01 = AA0[j+0]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+0] = b01; + AA1[j+0] = b11; + } + { + const long w1 = wtab[j+1]; + const mulmod_precon_t wqi1 = wqinvtab[j+1]; + const unsigned long a11 = AA1[j+1]; + const unsigned long a01 = AA0[j+1]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+1] = b01; + AA1[j+1] = b11; + } + { + const long w1 = wtab[j+2]; + const mulmod_precon_t wqi1 = wqinvtab[j+2]; + const unsigned long a11 = AA1[j+2]; + const unsigned long a01 = AA0[j+2]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+2] = b01; + AA1[j+2] = b11; + } + { + const long w1 = wtab[j+3]; + const mulmod_precon_t wqi1 = wqinvtab[j+3]; + const unsigned long a11 = AA1[j+3]; + const unsigned long a01 = AA0[j+3]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j+3] = b01; + AA1[j+3] = b11; + } + } + +#else + + // a plain loop: not as good as the unrolled version + + for (j = 0; j < m_half; j++) { + const long w1 = wtab[j]; + const mulmod_precon_t wqi1 = wqinvtab[j]; + const unsigned long a11 = AA1[j]; + const unsigned long a01 = AA0[j]; + + const unsigned long tt1 = LazyMulModPrecon(a11, w1, q, wqi1); + const unsigned long uu1 = LazyReduce2(a01, q); + const unsigned long b01 = uu1 + tt1; + const unsigned long b11 = uu1 - tt1 + 2*q; + + AA0[j] = b01; + AA1[j] = b11; + } + +#endif + + } + } + + /* need to reduce redundant representations */ + + for (i = 0; i < n; i++) { + unsigned long tmp = LazyReduce2(AA[i], q); + A[i] = LazyReduce1(tmp, q); + } +} + + + + + +#endif + + + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/FacVec.c b/thirdparty/linux/ntl/src/FacVec.c new file mode 100644 index 0000000000..68e06df046 --- /dev/null +++ b/thirdparty/linux/ntl/src/FacVec.c @@ -0,0 +1,85 @@ + +#include +#include + + +NTL_START_IMPL + + +static +void swap(IntFactor& x, IntFactor& y) +{ + IntFactor t; + + t = x; x = y; y = t; +} + +static +void FindMin(FacVec& v, long lo, long hi) +{ + long minv = 0; + long minp = -1; + long i; + + for (i = lo; i <= hi; i++) { + if (minv == 0 || v[i].val < minv) { + minv = v[i].val; + minp = i; + } + } + + swap(v[lo], v[minp]); +} + + +void FactorInt(FacVec& fvec, long n) +{ + if (n <= 1) LogicError("internal error: FactorInt(FacVec,long n) with n<=1"); + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("internal error: FactorInt(FacVec,long n) with n too large"); + + long NumFactors; + long q; + + fvec.SetLength(2*NextPowerOfTwo(n)); + + NumFactors = 0; + q = 2; + + while (n != 1) { + if (n%q == 0) { + fvec[NumFactors].q = q; + n = n/q; + fvec[NumFactors].a = 1; + fvec[NumFactors].val = q; + while (n%q == 0) { + n = n/q; + (fvec[NumFactors].a)++; + fvec[NumFactors].val *= q; + } + fvec[NumFactors].link = -1; + NumFactors++; + } + + q++; + } + + fvec.SetLength(2*NumFactors-1); + + long lo = 0; + long hi = NumFactors - 1; + + while (lo < hi) { + FindMin(fvec, lo, hi); + FindMin(fvec, lo+1, hi); + hi++; + fvec[hi].link = lo; + fvec[hi].val = fvec[lo].val * fvec[lo+1].val; + lo += 2; + } +} + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GF2.c b/thirdparty/linux/ntl/src/GF2.c new file mode 100644 index 0000000000..e25d1cadf5 --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2.c @@ -0,0 +1,41 @@ + +#include + +#include + +NTL_START_IMPL + + +GF2 power(GF2 a, long e) +{ + if (e == 0) { + return to_GF2(1); + } + + if (e < 0 && IsZero(a)) + ArithmeticError("GF2: division by zero"); + + return a; +} + +ostream& operator<<(ostream& s, GF2 a) +{ + if (a == 0) + s << "0"; + else + s << "1"; + + return s; +} + +istream& operator>>(istream& s, ref_GF2 x) +{ + NTL_ZZRegister(a); + + NTL_INPUT_CHECK_RET(s, s >> a); + + conv(x, a); + return s; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GF2E.c b/thirdparty/linux/ntl/src/GF2E.c new file mode 100644 index 0000000000..3cc502805f --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2E.c @@ -0,0 +1,177 @@ + + +#include + +#include + +NTL_START_IMPL + +NTL_TLS_GLOBAL_DECL(SmartPtr, GF2EInfo_stg) + +NTL_CHEAP_THREAD_LOCAL +GF2EInfoT *GF2EInfo = 0; + + +GF2EInfoT::GF2EInfoT(const GF2X& NewP) +{ + build(p, NewP); + + if (p.size == 1) { + if (deg(p) <= NTL_BITS_PER_LONG/2) + KarCross = 4; + else + KarCross = 8; + } + else if (p.size == 2) + KarCross = 8; + else if (p.size <= 5) + KarCross = 4; + else if (p.size == 6) + KarCross = 3; + else + KarCross = 2; + + + if (p.size <= 1) { + if (deg(p) <= NTL_BITS_PER_LONG/2) + ModCross = 20; + else + ModCross = 40; + } + else if (p.size <= 2) + ModCross = 75; + else if (p.size <= 4) + ModCross = 50; + else + ModCross = 25; + + if (p.size == 1) { + if (deg(p) <= NTL_BITS_PER_LONG/2) + DivCross = 100; + else + DivCross = 200; + } + else if (p.size == 2) + DivCross = 400; + else if (p.size <= 4) + DivCross = 200; + else if (p.size == 5) + DivCross = 150; + else if (p.size <= 13) + DivCross = 100; + else + DivCross = 75; + + _card_exp = p.n; +} + + +const ZZ& GF2E::cardinality() +{ + if (!GF2EInfo) LogicError("GF2E::cardinality: undefined modulus"); + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(GF2EInfo->_card); + if (!builder()) break; + UniquePtr p; + p.make(); + power(*p, 2, GF2EInfo->_card_exp); + builder.move(p); + } while (0); + + return *GF2EInfo->_card; +} + + + + + + + +void GF2E::init(const GF2X& p) +{ + GF2EContext c(p); + c.restore(); +} + + +void GF2EContext::save() +{ + NTL_TLS_GLOBAL_ACCESS(GF2EInfo_stg); + ptr = GF2EInfo_stg; +} + +void GF2EContext::restore() const +{ + NTL_TLS_GLOBAL_ACCESS(GF2EInfo_stg); + GF2EInfo_stg = ptr; + GF2EInfo = GF2EInfo_stg.get(); +} + + + +GF2EBak::~GF2EBak() +{ + if (MustRestore) c.restore(); +} + +void GF2EBak::save() +{ + c.save(); + MustRestore = true; +} + + +void GF2EBak::restore() +{ + c.restore(); + MustRestore = false; +} + + + +const GF2E& GF2E::zero() +{ + static const GF2E z(INIT_NO_ALLOC); // GLOBAL (assumes C++11 thread-safe init) + return z; +} + + + +istream& operator>>(istream& s, GF2E& x) +{ + GF2X y; + + NTL_INPUT_CHECK_RET(s, s >> y); + conv(x, y); + + return s; +} + +void div(GF2E& x, const GF2E& a, const GF2E& b) +{ + GF2E t; + + inv(t, b); + mul(x, a, t); +} + +void div(GF2E& x, GF2 a, const GF2E& b) +{ + inv(x, b); + mul(x, x, a); +} + +void div(GF2E& x, long a, const GF2E& b) +{ + inv(x, b); + mul(x, x, a); +} + + +void inv(GF2E& x, const GF2E& a) +{ + InvMod(x._GF2E__rep, a._GF2E__rep, GF2E::modulus()); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GF2EX.c b/thirdparty/linux/ntl/src/GF2EX.c new file mode 100644 index 0000000000..24347d190b --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2EX.c @@ -0,0 +1,3457 @@ + + +#include +#include +#include + +#include + +NTL_START_IMPL + + + +const GF2EX& GF2EX::zero() +{ + static const GF2EX z; // GLOBAL (assumes C++11 thread-safe init) + return z; +} + + + +istream& operator>>(istream& s, GF2EX& x) +{ + NTL_INPUT_CHECK_RET(s, s >> x.rep); + x.normalize(); + return s; +} + +ostream& operator<<(ostream& s, const GF2EX& a) +{ + return s << a.rep; +} + + +void GF2EX::normalize() +{ + long n; + const GF2E* p; + + n = rep.length(); + if (n == 0) return; + p = rep.elts() + n; + while (n > 0 && IsZero(*--p)) { + n--; + } + rep.SetLength(n); +} + + +long IsZero(const GF2EX& a) +{ + return a.rep.length() == 0; +} + + +long IsOne(const GF2EX& a) +{ + return a.rep.length() == 1 && IsOne(a.rep[0]); +} + +void GetCoeff(GF2E& x, const GF2EX& a, long i) +{ + if (i < 0 || i > deg(a)) + clear(x); + else + x = a.rep[i]; +} + +void SetCoeff(GF2EX& x, long i, const GF2E& a) +{ + long j, m; + + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (NTL_OVERFLOW(i, 1, 0)) + LogicError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m && IsZero(a)) return; + + if (i > m) { + /* careful: a may alias a coefficient of x */ + + long alloc = x.rep.allocated(); + + if (alloc > 0 && i >= alloc) { + GF2E aa = a; + x.rep.SetLength(i+1); + x.rep[i] = aa; + } + else { + x.rep.SetLength(i+1); + x.rep[i] = a; + } + + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + else + x.rep[i] = a; + + x.normalize(); +} + +void SetCoeff(GF2EX& x, long i, GF2 a) +{ + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (a == 1) + SetCoeff(x, i); + else + SetCoeff(x, i, GF2E::zero()); +} + +void SetCoeff(GF2EX& x, long i, long a) +{ + if (i < 0) + LogicError("SetCoeff: negative index"); + + if ((a & 1) == 1) + SetCoeff(x, i); + else + SetCoeff(x, i, GF2E::zero()); +} + +void SetCoeff(GF2EX& x, long i) +{ + long j, m; + + if (i < 0) + LogicError("coefficient index out of range"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + set(x.rep[i]); + x.normalize(); +} + + +void SetX(GF2EX& x) +{ + clear(x); + SetCoeff(x, 1); +} + + +long IsX(const GF2EX& a) +{ + return deg(a) == 1 && IsOne(LeadCoeff(a)) && IsZero(ConstTerm(a)); +} + + + +const GF2E& coeff(const GF2EX& a, long i) +{ + if (i < 0 || i > deg(a)) + return GF2E::zero(); + else + return a.rep[i]; +} + + +const GF2E& LeadCoeff(const GF2EX& a) +{ + if (IsZero(a)) + return GF2E::zero(); + else + return a.rep[deg(a)]; +} + +const GF2E& ConstTerm(const GF2EX& a) +{ + if (IsZero(a)) + return GF2E::zero(); + else + return a.rep[0]; +} + + + +void conv(GF2EX& x, const GF2E& a) +{ + if (IsZero(a)) + x.rep.SetLength(0); + else { + x.rep.SetLength(1); + x.rep[0] = a; + } +} + +void conv(GF2EX& x, long a) +{ + if (a & 1) + set(x); + else + clear(x); +} + +void conv(GF2EX& x, GF2 a) +{ + if (a == 1) + set(x); + else + clear(x); +} + +void conv(GF2EX& x, const ZZ& a) +{ + if (IsOdd(a)) + set(x); + else + clear(x); +} + +void conv(GF2EX& x, const GF2X& aa) +{ + GF2X a = aa; // in case a aliases the rep of a coefficient of x + + long n = deg(a)+1; + long i; + + x.rep.SetLength(n); + for (i = 0; i < n; i++) + conv(x.rep[i], coeff(a, i)); +} + +void conv(GF2EX& x, const vec_GF2E& a) +{ + x.rep = a; + x.normalize(); +} + + + +/* additional legacy conversions for v6 conversion regime */ + +void conv(GF2EX& x, const ZZX& a) +{ + long n = a.rep.length(); + long i; + + x.rep.SetLength(n); + for (i = 0; i < n; i++) + conv(x.rep[i], a.rep[i]); + + x.normalize(); +} + + +/* ------------------------------------- */ + + + + +void add(GF2EX& x, const GF2EX& a, const GF2EX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const GF2E *ap, *bp; + GF2E* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + add(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab && &x != &b) + for (i = db-minab; i; i--, xp++, bp++) + *xp = *bp; + else + x.normalize(); +} + +void add(GF2EX& x, const GF2EX& a, const GF2E& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (&x == &a) { + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + GF2E *xp = x.rep.elts(); + add(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const GF2E *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + +void add(GF2EX& x, const GF2EX& a, GF2 b) +{ + if (a.rep.length() == 0) { + conv(x, b); + } + else { + if (&x != &a) x = a; + add(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + +void add(GF2EX& x, const GF2EX& a, long b) +{ + if (a.rep.length() == 0) { + conv(x, b); + } + else { + if (&x != &a) x = a; + add(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + + +void PlainMul(GF2EX& x, const GF2EX& a, const GF2EX& b) +{ + long da = deg(a); + long db = deg(b); + + if (da < 0 || db < 0) { + clear(x); + return; + } + + if (&a == &b) { + sqr(x, a); + return; + } + + long d = da+db; + + const GF2E *ap, *bp; + GF2E *xp; + + GF2EX la, lb; + + if (&x == &a) { + la = a; + ap = la.rep.elts(); + } + else + ap = a.rep.elts(); + + if (&x == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + x.rep.SetLength(d+1); + + xp = x.rep.elts(); + + long i, j, jmin, jmax; + GF2X t, accum; + + for (i = 0; i <= d; i++) { + jmin = max(0, i-db); + jmax = min(da, i); + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, rep(ap[j]), rep(bp[i-j])); + add(accum, accum, t); + } + conv(xp[i], accum); + } + x.normalize(); +} + + +void sqr(GF2EX& x, const GF2EX& a) +{ + long da = deg(a); + + if (da < 0) { + clear(x); + return; + } + + x.rep.SetLength(2*da+1); + long i; + + for (i = da; i > 0; i--) { + sqr(x.rep[2*i], a.rep[i]); + clear(x.rep[2*i-1]); + } + + sqr(x.rep[0], a.rep[0]); + + x.normalize(); +} + + + +static +void PlainMul1(GF2X *xp, const GF2X *ap, long sa, const GF2X& b) +{ + long i; + + for (i = 0; i < sa; i++) + mul(xp[i], ap[i], b); +} + + + + +static inline +void q_add(GF2X& x, const GF2X& a, const GF2X& b) + +// This is a quick-and-dirty add routine used by the karatsuba routine. +// It assumes that the output already has enough space allocated, +// thus avoiding any procedure calls. +// WARNING: it also accesses the underlying WordVector representation +// directly...that is dirty!. +// It shaves a few percent off the running time. + +{ + _ntl_ulong *xp = x.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + const _ntl_ulong *bp = b.xrep.elts(); + + long sa = ap[-1]; + long sb = bp[-1]; + + long i; + + if (sa == sb) { + for (i = 0; i < sa; i++) + xp[i] = ap[i] ^ bp[i]; + + i = sa-1; + while (i >= 0 && !xp[i]) i--; + xp[-1] = i+1; + } + else if (sa < sb) { + for (i = 0; i < sa; i++) + xp[i] = ap[i] ^ bp[i]; + + for (; i < sb; i++) + xp[i] = bp[i]; + + xp[-1] = sb; + } + else { // sa > sb + for (i = 0; i < sb; i++) + xp[i] = ap[i] ^ bp[i]; + + for (; i < sa; i++) + xp[i] = ap[i]; + + xp[-1] = sa; + } +} + + +static inline +void q_copy(GF2X& x, const GF2X& a) +// see comments for q_add above + +{ + _ntl_ulong *xp = x.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + + long sa = ap[-1]; + long i; + + for (i = 0; i < sa; i++) + xp[i] = ap[i]; + + xp[-1] = sa; +} + + + +static +void KarFold(GF2X *T, const GF2X *b, long sb, long hsa) +{ + long m = sb - hsa; + long i; + + for (i = 0; i < m; i++) + q_add(T[i], b[i], b[hsa+i]); + + for (i = m; i < hsa; i++) + q_copy(T[i], b[i]); +} + + +static +void KarAdd(GF2X *T, const GF2X *b, long sb) +{ + long i; + + for (i = 0; i < sb; i++) + q_add(T[i], T[i], b[i]); +} + +static +void KarFix(GF2X *c, const GF2X *b, long sb, long hsa) +{ + long i; + + for (i = 0; i < hsa; i++) + q_copy(c[i], b[i]); + + for (i = hsa; i < sb; i++) + q_add(c[i], c[i], b[i]); +} + + + +static +void KarMul(GF2X *c, const GF2X *a, + long sa, const GF2X *b, long sb, GF2X *stk) +{ + if (sa < sb) { + { long t = sa; sa = sb; sb = t; } + { const GF2X *t = a; a = b; b = t; } + } + + if (sb == 1) { + if (sa == 1) + mul(*c, *a, *b); + else + PlainMul1(c, a, sa, *b); + + return; + } + + if (sb == 2 && sa == 2) { + mul(c[0], a[0], b[0]); + mul(c[2], a[1], b[1]); + q_add(stk[0], a[0], a[1]); + q_add(stk[1], b[0], b[1]); + mul(c[1], stk[0], stk[1]); + q_add(c[1], c[1], c[0]); + q_add(c[1], c[1], c[2]); + + return; + } + + long hsa = (sa + 1) >> 1; + + if (hsa < sb) { + /* normal case */ + + long hsa2 = hsa << 1; + + GF2X *T1, *T2, *T3; + + T1 = stk; stk += hsa; + T2 = stk; stk += hsa; + T3 = stk; stk += hsa2 - 1; + + /* compute T1 = a_lo + a_hi */ + + KarFold(T1, a, sa, hsa); + + /* compute T2 = b_lo + b_hi */ + + KarFold(T2, b, sb, hsa); + + /* recursively compute T3 = T1 * T2 */ + + KarMul(T3, T1, hsa, T2, hsa, stk); + + /* recursively compute a_hi * b_hi into high part of c */ + /* and subtract from T3 */ + + KarMul(c + hsa2, a+hsa, sa-hsa, b+hsa, sb-hsa, stk); + KarAdd(T3, c + hsa2, sa + sb - hsa2 - 1); + + + /* recursively compute a_lo*b_lo into low part of c */ + /* and subtract from T3 */ + + KarMul(c, a, hsa, b, hsa, stk); + KarAdd(T3, c, hsa2 - 1); + + clear(c[hsa2 - 1]); + + /* finally, add T3 * X^{hsa} to c */ + + KarAdd(c+hsa, T3, hsa2-1); + } + else { + /* degenerate case */ + + GF2X *T; + + T = stk; stk += hsa + sb - 1; + + /* recursively compute b*a_hi into high part of c */ + + KarMul(c + hsa, a + hsa, sa - hsa, b, sb, stk); + + /* recursively compute b*a_lo into T */ + + KarMul(T, a, hsa, b, sb, stk); + + KarFix(c, T, hsa + sb - 1, hsa); + } +} + +void ExtractBits(_ntl_ulong *cp, const _ntl_ulong *ap, long k, long n) + +// extract k bits from a at position n + +{ + long sc = (k + NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; + + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + long i; + + if (bn == 0) { + for (i = 0; i < sc; i++) + cp[i] = ap[i+wn]; + } + else { + for (i = 0; i < sc-1; i++) + cp[i] = (ap[i+wn] >> bn) | (ap[i+wn+1] << (NTL_BITS_PER_LONG - bn)); + + if (k > sc*NTL_BITS_PER_LONG - bn) + cp[sc-1] = (ap[sc+wn-1] >> bn)|(ap[sc+wn] << (NTL_BITS_PER_LONG - bn)); + else + cp[sc-1] = ap[sc+wn-1] >> bn; + } + + long p = k % NTL_BITS_PER_LONG; + if (p != 0) + cp[sc-1] &= ((1UL << p) - 1UL); + +} + + +void KronSubst(GF2X& aa, const GF2EX& a) +{ + long sa = a.rep.length(); + long blocksz = 2*GF2E::degree() - 1; + + long saa = sa*blocksz; + + long wsaa = (saa + NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; + + aa.xrep.SetLength(wsaa+1); + + _ntl_ulong *paa = aa.xrep.elts(); + + + long i; + for (i = 0; i < wsaa+1; i++) + paa[i] = 0; + + for (i = 0; i < sa; i++) + ShiftAdd(paa, rep(a.rep[i]).xrep.elts(), rep(a.rep[i]).xrep.length(), + blocksz*i); + + aa.normalize(); +} + +void KronMul(GF2EX& x, const GF2EX& a, const GF2EX& b) +{ + if (a == 0 || b == 0) { + clear(x); + return; + } + + GF2X aa, bb, xx; + + long sx = deg(a) + deg(b) + 1; + long blocksz = 2*GF2E::degree() - 1; + + if (NTL_OVERFLOW(blocksz, sx, 0)) + ResourceError("overflow in GF2EX KronMul"); + + KronSubst(aa, a); + KronSubst(bb, b); + mul(xx, aa, bb); + + GF2X c; + + long wc = (blocksz + NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; + + x.rep.SetLength(sx); + + long i; + for (i = 0; i < sx-1; i++) { + c.xrep.SetLength(wc); + ExtractBits(c.xrep.elts(), xx.xrep.elts(), blocksz, i*blocksz); + c.normalize(); + conv(x.rep[i], c); + } + + long last_blocksz = deg(xx) - (sx-1)*blocksz + 1; + wc = (last_blocksz + NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; + c.xrep.SetLength(wc); + + ExtractBits(c.xrep.elts(), xx.xrep.elts(), last_blocksz, (sx-1)*blocksz); + c.normalize(); + conv(x.rep[sx-1], c); + + x.normalize(); +} + + + +void mul(GF2EX& c, const GF2EX& a, const GF2EX& b) +{ + if (IsZero(a) || IsZero(b)) { + clear(c); + return; + } + + if (&a == &b) { + sqr(c, a); + return; + } + + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sa == 1) { + mul(c, b, a.rep[0]); + return; + } + + if (sb == 1) { + mul(c, a, b.rep[0]); + return; + } + + if (sa < GF2E::KarCross() || sb < GF2E::KarCross()) { + PlainMul(c, a, b); + return; + } + + if (GF2E::WordLength() <= 1) { + KronMul(c, a, b); + return; + } + + + /* karatsuba */ + + long n, hn, sp; + + n = max(sa, sb); + sp = 0; + do { + hn = (n+1) >> 1; + sp += (hn << 2) - 1; + n = hn; + } while (n > 1); + + GF2XVec stk; + stk.SetSize(sp + 2*(sa+sb)-1, 2*GF2E::WordLength()); + + long i; + + for (i = 0; i < sa; i++) + stk[i+sa+sb-1] = rep(a.rep[i]); + + for (i = 0; i < sb; i++) + stk[i+2*sa+sb-1] = rep(b.rep[i]); + + KarMul(&stk[0], &stk[sa+sb-1], sa, &stk[2*sa+sb-1], sb, + &stk[2*(sa+sb)-1]); + + c.rep.SetLength(sa+sb-1); + + for (i = 0; i < sa+sb-1; i++) + conv(c.rep[i], stk[i]); + + c.normalize(); +} + + +void MulTrunc(GF2EX& x, const GF2EX& a, const GF2EX& b, long n) +{ + GF2EX t; + mul(t, a, b); + trunc(x, t, n); +} + +void SqrTrunc(GF2EX& x, const GF2EX& a, long n) +{ + GF2EX t; + sqr(t, a); + trunc(x, t, n); +} + + + +void PlainDivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EX& b) +{ + long da, db, dq, i, j, LCIsOne; + const GF2E *bp; + GF2E *qp; + GF2X *xp; + + + GF2E LCInv, t; + GF2X s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("GF2EX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + GF2EX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + GF2XVec x(da + 1, 2*GF2E::WordLength()); + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainRem(GF2EX& r, const GF2EX& a, const GF2EX& b, GF2XVec& x) +{ + long da, db, dq, i, j, LCIsOne; + const GF2E *bp; + GF2X *xp; + + + GF2E LCInv, t; + GF2X s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("GF2EX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainDivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EX& b, GF2XVec& x) +{ + long da, db, dq, i, j, LCIsOne; + const GF2E *bp; + GF2E *qp; + GF2X *xp; + + + GF2E LCInv, t; + GF2X s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("GF2EX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + GF2EX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainDiv(GF2EX& q, const GF2EX& a, const GF2EX& b) +{ + long da, db, dq, i, j, LCIsOne; + const GF2E *bp; + GF2E *qp; + GF2X *xp; + + + GF2E LCInv, t; + GF2X s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("GF2EX: division by zero"); + + if (da < db) { + clear(q); + return; + } + + GF2EX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + GF2XVec x(da + 1 - db, 2*GF2E::WordLength()); + + for (i = db; i <= da; i++) + x[i-db] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + + long lastj = max(0, db-i); + + for (j = db-1; j >= lastj; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j-db], xp[i+j-db], s); + } + } +} + +void PlainRem(GF2EX& r, const GF2EX& a, const GF2EX& b) +{ + long da, db, dq, i, j, LCIsOne; + const GF2E *bp; + GF2X *xp; + + + GF2E LCInv, t; + GF2X s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("GF2EX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + GF2XVec x(da + 1, 2*GF2E::WordLength()); + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + +void mul(GF2EX& x, const GF2EX& a, const GF2E& b) +{ + if (IsZero(a) || IsZero(b)) { + clear(x); + return; + } + + GF2X bb, t; + long i, da; + + const GF2E *ap; + GF2E* xp; + + bb = rep(b); + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + for (i = 0; i <= da; i++) { + mul(t, rep(ap[i]), bb); + conv(xp[i], t); + } + + x.normalize(); +} + +void mul(GF2EX& x, const GF2EX& a, GF2 b) +{ + if (b == 0) + clear(x); + else + x = a; +} + +void mul(GF2EX& x, const GF2EX& a, long b) +{ + if ((b & 1) == 0) + clear(x); + else + x = a; +} + + +void GCD(GF2EX& x, const GF2EX& a, const GF2EX& b) +{ + GF2E t; + + if (IsZero(b)) + x = a; + else if (IsZero(a)) + x = b; + else { + long n = max(deg(a),deg(b)) + 1; + GF2EX u(INIT_SIZE, n), v(INIT_SIZE, n); + GF2XVec tmp(n, 2*GF2E::WordLength()); + + u = a; + v = b; + do { + PlainRem(u, u, v, tmp); + swap(u, v); + } while (!IsZero(v)); + + x = u; + } + + if (IsZero(x)) return; + if (IsOne(LeadCoeff(x))) return; + + /* make gcd monic */ + + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + + + + +void XGCD(GF2EX& d, GF2EX& s, GF2EX& t, const GF2EX& a, const GF2EX& b) +{ + GF2E z; + + + if (IsZero(b)) { + set(s); + clear(t); + d = a; + } + else if (IsZero(a)) { + clear(s); + set(t); + d = b; + } + else { + long e = max(deg(a), deg(b)) + 1; + + GF2EX temp(INIT_SIZE, e), u(INIT_SIZE, e), v(INIT_SIZE, e), + u0(INIT_SIZE, e), v0(INIT_SIZE, e), + u1(INIT_SIZE, e), v1(INIT_SIZE, e), + u2(INIT_SIZE, e), v2(INIT_SIZE, e), q(INIT_SIZE, e); + + + set(u1); clear(v1); + clear(u2); set(v2); + u = a; v = b; + + do { + DivRem(q, u, u, v); + swap(u, v); + u0 = u2; + v0 = v2; + mul(temp, q, u2); + add(u2, u1, temp); + mul(temp, q, v2); + add(v2, v1, temp); + u1 = u0; + v1 = v0; + } while (!IsZero(v)); + + d = u; + s = u1; + t = v1; + } + + if (IsZero(d)) return; + if (IsOne(LeadCoeff(d))) return; + + /* make gcd monic */ + + inv(z, LeadCoeff(d)); + mul(d, d, z); + mul(s, s, z); + mul(t, t, z); +} + + +void MulMod(GF2EX& x, const GF2EX& a, const GF2EX& b, const GF2EX& f) +{ + if (deg(a) >= deg(f) || deg(b) >= deg(f) || deg(f) == 0) + LogicError("MulMod: bad args"); + + GF2EX t; + + mul(t, a, b); + rem(x, t, f); +} + +void SqrMod(GF2EX& x, const GF2EX& a, const GF2EX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("SqrMod: bad args"); + + GF2EX t; + + sqr(t, a); + rem(x, t, f); +} + + +void InvMod(GF2EX& x, const GF2EX& a, const GF2EX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvMod: bad args"); + + GF2EX d, xx, t; + + XGCD(d, xx, t, a, f); + if (!IsOne(d)) + InvModError("GF2EX InvMod: can't compute multiplicative inverse"); + + x = xx; +} + +long InvModStatus(GF2EX& x, const GF2EX& a, const GF2EX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvModStatus: bad args"); + + GF2EX d, t; + + XGCD(d, x, t, a, f); + if (!IsOne(d)) { + x = d; + return 1; + } + else + return 0; +} + + + + +static +void MulByXModAux(GF2EX& h, const GF2EX& a, const GF2EX& f) +{ + long i, n, m; + GF2E* hh; + const GF2E *aa, *ff; + + GF2E t, z; + + n = deg(f); + m = deg(a); + + if (m >= n || n == 0) LogicError("MulByXMod: bad args"); + + if (m < 0) { + clear(h); + return; + } + + if (m < n-1) { + h.rep.SetLength(m+2); + hh = h.rep.elts(); + aa = a.rep.elts(); + for (i = m+1; i >= 1; i--) + hh[i] = aa[i-1]; + clear(hh[0]); + } + else { + h.rep.SetLength(n); + hh = h.rep.elts(); + aa = a.rep.elts(); + ff = f.rep.elts(); + z = aa[n-1]; + if (!IsOne(ff[n])) + div(z, z, ff[n]); + for (i = n-1; i >= 1; i--) { + mul(t, z, ff[i]); + add(hh[i], aa[i-1], t); + } + mul(hh[0], z, ff[0]); + h.normalize(); + } +} + +void MulByXMod(GF2EX& h, const GF2EX& a, const GF2EX& f) +{ + if (&h == &f) { + GF2EX hh; + MulByXModAux(hh, a, f); + h = hh; + } + else + MulByXModAux(h, a, f); +} + + + + +void random(GF2EX& x, long n) +{ + long i; + + x.rep.SetLength(n); + + for (i = 0; i < n; i++) + random(x.rep[i]); + + x.normalize(); +} + + +void CopyReverse(GF2EX& x, const GF2EX& a, long hi) + + // x[0..hi] = reverse(a[0..hi]), with zero fill + // input may not alias output + +{ + long i, j, n, m; + + n = hi+1; + m = a.rep.length(); + + x.rep.SetLength(n); + + const GF2E* ap = a.rep.elts(); + GF2E* xp = x.rep.elts(); + + for (i = 0; i < n; i++) { + j = hi-i; + if (j < 0 || j >= m) + clear(xp[i]); + else + xp[i] = ap[j]; + } + + x.normalize(); +} + + + +void trunc(GF2EX& x, const GF2EX& a, long m) + +// x = a % X^m, output may alias input + +{ + if (m < 0) LogicError("trunc: bad args"); + + if (&x == &a) { + if (x.rep.length() > m) { + x.rep.SetLength(m); + x.normalize(); + } + } + else { + long n; + long i; + GF2E* xp; + const GF2E* ap; + + n = min(a.rep.length(), m); + x.rep.SetLength(n); + + xp = x.rep.elts(); + ap = a.rep.elts(); + + for (i = 0; i < n; i++) xp[i] = ap[i]; + + x.normalize(); + } +} + +void NewtonInvTrunc(GF2EX& c, const GF2EX& a, long e) +{ + GF2E x; + + inv(x, ConstTerm(a)); + + if (e == 1) { + conv(c, x); + return; + } + + vec_long E; + E.SetLength(0); + append(E, e); + while (e > 1) { + e = (e+1)/2; + append(E, e); + } + + long L = E.length(); + + GF2EX g, g0, g1, g2; + + + g.rep.SetMaxLength(E[0]); + g0.rep.SetMaxLength(E[0]); + g1.rep.SetMaxLength((3*E[0]+1)/2); + g2.rep.SetMaxLength(E[0]); + + conv(g, x); + + long i; + + for (i = L-1; i > 0; i--) { + // lift from E[i] to E[i-1] + + long k = E[i]; + long l = E[i-1]-E[i]; + + trunc(g0, a, k+l); + + mul(g1, g0, g); + RightShift(g1, g1, k); + trunc(g1, g1, l); + + mul(g2, g1, g); + trunc(g2, g2, l); + LeftShift(g2, g2, k); + + add(g, g, g2); + } + + c = g; +} + + +void InvTrunc(GF2EX& c, const GF2EX& a, long e) +{ + if (e < 0) LogicError("InvTrunc: bad args"); + if (e == 0) { + clear(c); + return; + } + + if (NTL_OVERFLOW(e, 1, 0)) + ResourceError("overflow in InvTrunc"); + + NewtonInvTrunc(c, a, e); +} + + + +const long GF2EX_MOD_PLAIN = 0; +const long GF2EX_MOD_MUL = 1; + +void build(GF2EXModulus& F, const GF2EX& f) +{ + long n = deg(f); + + if (n <= 0) LogicError("build(GF2EXModulus,GF2EX): deg(f) <= 0"); + + if (NTL_OVERFLOW(n, GF2E::degree(), 0)) + ResourceError("build(GF2EXModulus,GF2EX): overflow"); + + F.tracevec.make(); + + F.f = f; + F.n = n; + + if (F.n < GF2E::ModCross()) { + F.method = GF2EX_MOD_PLAIN; + } + else { + F.method = GF2EX_MOD_MUL; + GF2EX P1; + GF2EX P2; + + CopyReverse(P1, f, n); + InvTrunc(P2, P1, n-1); + CopyReverse(P1, P2, n-2); + trunc(F.h0, P1, n-2); + trunc(F.f0, f, n); + F.hlc = ConstTerm(P2); + } +} + +GF2EXModulus::GF2EXModulus() +{ + n = -1; + method = GF2EX_MOD_PLAIN; +} + + + +GF2EXModulus::GF2EXModulus(const GF2EX& ff) +{ + n = -1; + method = GF2EX_MOD_PLAIN; + + build(*this, ff); +} + + + + + +void UseMulRem21(GF2EX& r, const GF2EX& a, const GF2EXModulus& F) +{ + GF2EX P1; + GF2EX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + mul(P1, P2, F.f0); + trunc(P1, P1, F.n); + trunc(r, a, F.n); + add(r, r, P1); +} + +void UseMulDivRem21(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EXModulus& F) +{ + GF2EX P1; + GF2EX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + mul(P1, P2, F.f0); + trunc(P1, P1, F.n); + trunc(r, a, F.n); + add(r, r, P1); + q = P2; +} + +void UseMulDiv21(GF2EX& q, const GF2EX& a, const GF2EXModulus& F) +{ + GF2EX P1; + GF2EX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + q = P2; + +} + +void rem(GF2EX& x, const GF2EX& a, const GF2EXModulus& F) +{ + if (F.method == GF2EX_MOD_PLAIN) { + PlainRem(x, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulRem21(x, a, F); + return; + } + + GF2EX buf(INIT_SIZE, 2*n-1); + + long a_len = da+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + UseMulRem21(buf, buf, F); + + a_len -= amt; + } + + x = buf; +} + +void DivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EXModulus& F) +{ + if (F.method == GF2EX_MOD_PLAIN) { + PlainDivRem(q, r, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulDivRem21(q, r, a, F); + return; + } + + GF2EX buf(INIT_SIZE, 2*n-1); + GF2EX qbuf(INIT_SIZE, n-1); + + GF2EX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + UseMulDivRem21(qbuf, buf, buf, F); + long dl = qbuf.rep.length(); + a_len = a_len - amt; + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + r = buf; + + qq.normalize(); + q = qq; +} + +void div(GF2EX& q, const GF2EX& a, const GF2EXModulus& F) +{ + if (F.method == GF2EX_MOD_PLAIN) { + PlainDiv(q, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulDiv21(q, a, F); + return; + } + + GF2EX buf(INIT_SIZE, 2*n-1); + GF2EX qbuf(INIT_SIZE, n-1); + + GF2EX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + a_len = a_len - amt; + if (a_len > 0) + UseMulDivRem21(qbuf, buf, buf, F); + else + UseMulDiv21(qbuf, buf, F); + + long dl = qbuf.rep.length(); + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + qq.normalize(); + q = qq; +} + + + + +void MulMod(GF2EX& c, const GF2EX& a, const GF2EX& b, const GF2EXModulus& F) +{ + if (deg(a) >= F.n || deg(b) >= F.n) LogicError("MulMod: bad args"); + + GF2EX t; + mul(t, a, b); + rem(c, t, F); +} + + +void SqrMod(GF2EX& c, const GF2EX& a, const GF2EXModulus& F) +{ + if (deg(a) >= F.n) LogicError("MulMod: bad args"); + + GF2EX t; + sqr(t, a); + rem(c, t, F); +} + + + +static +long OptWinSize(long n) +// finds k that minimizes n/(k+1) + 2^{k-1} + +{ + long k; + double v, v_new; + + + v = n/2.0 + 1.0; + k = 1; + + for (;;) { + v_new = n/(double(k+2)) + double(1L << k); + if (v_new >= v) break; + v = v_new; + k++; + } + + return k; +} + + + +void PowerMod(GF2EX& h, const GF2EX& g, const ZZ& e, const GF2EXModulus& F) +// h = g^e mod f using "sliding window" algorithm +{ + if (deg(g) >= F.n) LogicError("PowerMod: bad args"); + + if (e == 0) { + set(h); + return; + } + + if (e == 1) { + h = g; + return; + } + + if (e == -1) { + InvMod(h, g, F); + return; + } + + if (e == 2) { + SqrMod(h, g, F); + return; + } + + if (e == -2) { + SqrMod(h, g, F); + InvMod(h, h, F); + return; + } + + + long n = NumBits(e); + + GF2EX res; + res.SetMaxLength(F.n); + set(res); + + long i; + + if (n < 16) { + // plain square-and-multiply algorithm + + for (i = n - 1; i >= 0; i--) { + SqrMod(res, res, F); + if (bit(e, i)) + MulMod(res, res, g, F); + } + + if (e < 0) InvMod(res, res, F); + + h = res; + return; + } + + long k = OptWinSize(n); + k = min(k, 5); + + vec_GF2EX v; + + v.SetLength(1L << (k-1)); + + v[0] = g; + + if (k > 1) { + GF2EX t; + SqrMod(t, g, F); + + for (i = 1; i < (1L << (k-1)); i++) + MulMod(v[i], v[i-1], t, F); + } + + + long val; + long cnt; + long m; + + val = 0; + for (i = n-1; i >= 0; i--) { + val = (val << 1) | bit(e, i); + if (val == 0) + SqrMod(res, res, F); + else if (val >= (1L << (k-1)) || i == 0) { + cnt = 0; + while ((val & 1) == 0) { + val = val >> 1; + cnt++; + } + + m = val; + while (m > 0) { + SqrMod(res, res, F); + m = m >> 1; + } + + MulMod(res, res, v[val >> 1], F); + + while (cnt > 0) { + SqrMod(res, res, F); + cnt--; + } + + val = 0; + } + } + + if (e < 0) InvMod(res, res, F); + + h = res; +} + + + + +void PowerXMod(GF2EX& hh, const ZZ& e, const GF2EXModulus& F) +{ + if (F.n < 0) LogicError("PowerXMod: uninitialized modulus"); + + if (IsZero(e)) { + set(hh); + return; + } + + long n = NumBits(e); + long i; + + GF2EX h; + + h.SetMaxLength(F.n+1); + set(h); + + for (i = n - 1; i >= 0; i--) { + SqrMod(h, h, F); + if (bit(e, i)) { + MulByXMod(h, h, F.f); + } + } + + if (e < 0) InvMod(h, h, F); + + hh = h; +} + + + + + +void UseMulRem(GF2EX& r, const GF2EX& a, const GF2EX& b) +{ + GF2EX P1; + GF2EX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + mul(P1, P2, b); + add(P1, P1, a); + + r = P1; +} + +void UseMulDivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EX& b) +{ + GF2EX P1; + GF2EX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + mul(P1, P2, b); + add(P1, P1, a); + + r = P1; + q = P2; +} + +void UseMulDiv(GF2EX& q, const GF2EX& a, const GF2EX& b) +{ + GF2EX P1; + GF2EX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + + q = P2; +} + + + +void DivRem(GF2EX& q, GF2EX& r, const GF2EX& a, const GF2EX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < GF2E::DivCross() || sa-sb < GF2E::DivCross()) + PlainDivRem(q, r, a, b); + else if (sa < 4*sb) + UseMulDivRem(q, r, a, b); + else { + GF2EXModulus B; + build(B, b); + DivRem(q, r, a, B); + } +} + +void div(GF2EX& q, const GF2EX& a, const GF2EX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < GF2E::DivCross() || sa-sb < GF2E::DivCross()) + PlainDiv(q, a, b); + else if (sa < 4*sb) + UseMulDiv(q, a, b); + else { + GF2EXModulus B; + build(B, b); + div(q, a, B); + } +} + +void div(GF2EX& q, const GF2EX& a, const GF2E& b) +{ + GF2E t; + inv(t, b); + mul(q, a, t); +} + +void div(GF2EX& q, const GF2EX& a, GF2 b) +{ + if (b == 0) + ArithmeticError("div: division by zero"); + + q = a; +} + +void div(GF2EX& q, const GF2EX& a, long b) +{ + if ((b & 1) == 0) + ArithmeticError("div: division by zero"); + + q = a; +} + + + +void rem(GF2EX& r, const GF2EX& a, const GF2EX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < GF2E::DivCross() || sa-sb < GF2E::DivCross()) + PlainRem(r, a, b); + else if (sa < 4*sb) + UseMulRem(r, a, b); + else { + GF2EXModulus B; + build(B, b); + rem(r, a, B); + } +} + + +void diff(GF2EX& x, const GF2EX& a) +{ + long n = deg(a); + long i; + + if (n <= 0) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(n); + + for (i = 0; i <= n-1; i++) { + if ((i+1)&1) + x.rep[i] = a.rep[i+1]; + else + clear(x.rep[i]); + } + + if (&x == &a) + x.rep.SetLength(n); + + x.normalize(); +} + + +void RightShift(GF2EX& x, const GF2EX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) ResourceError("overflow in RightShift"); + LeftShift(x, a, -n); + return; + } + + long da = deg(a); + long i; + + if (da < n) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(da-n+1); + + for (i = 0; i <= da-n; i++) + x.rep[i] = a.rep[i+n]; + + if (&x == &a) + x.rep.SetLength(da-n+1); + + x.normalize(); +} + +void LeftShift(GF2EX& x, const GF2EX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) + clear(x); + else + RightShift(x, a, -n); + return; + } + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in LeftShift"); + + long m = a.rep.length(); + + x.rep.SetLength(m+n); + + long i; + for (i = m-1; i >= 0; i--) + x.rep[i+n] = a.rep[i]; + + for (i = 0; i < n; i++) + clear(x.rep[i]); +} + + +void ShiftAdd(GF2EX& U, const GF2EX& V, long n) +// assumes input does not alias output +{ + if (IsZero(V)) + return; + + long du = deg(U); + long dv = deg(V); + + long d = max(du, n+dv); + + U.rep.SetLength(d+1); + long i; + + for (i = du+1; i <= d; i++) + clear(U.rep[i]); + + for (i = 0; i <= dv; i++) + add(U.rep[i+n], U.rep[i+n], V.rep[i]); + + U.normalize(); +} + + +void IterBuild(GF2E* a, long n) +{ + long i, k; + GF2E b, t; + + if (n <= 0) return; + + for (k = 1; k <= n-1; k++) { + b = a[k]; + add(a[k], b, a[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t, a[i], b); + add(a[i], t, a[i-1]); + } + mul(a[0], a[0], b); + } +} + + + +void BuildFromRoots(GF2EX& x, const vec_GF2E& a) +{ + long n = a.length(); + + if (n == 0) { + set(x); + return; + } + + x.rep.SetMaxLength(n+1); + x.rep = a; + IterBuild(&x.rep[0], n); + x.rep.SetLength(n+1); + SetCoeff(x, n); +} + + + +void eval(GF2E& b, const GF2EX& f, const GF2E& a) +// does a Horner evaluation +{ + GF2E acc; + long i; + + clear(acc); + for (i = deg(f); i >= 0; i--) { + mul(acc, acc, a); + add(acc, acc, f.rep[i]); + } + + b = acc; +} + + + +void eval(vec_GF2E& b, const GF2EX& f, const vec_GF2E& a) +// naive algorithm: repeats Horner +{ + if (&b == &f.rep) { + vec_GF2E bb; + eval(bb, f, a); + b = bb; + return; + } + + long m = a.length(); + b.SetLength(m); + long i; + for (i = 0; i < m; i++) + eval(b[i], f, a[i]); +} + + + + +void interpolate(GF2EX& f, const vec_GF2E& a, const vec_GF2E& b) +{ + long m = a.length(); + if (b.length() != m) LogicError("interpolate: vector length mismatch"); + + if (m == 0) { + clear(f); + return; + } + + vec_GF2E prod; + prod = a; + + GF2E t1, t2; + + long k, i; + + vec_GF2E res; + res.SetLength(m); + + for (k = 0; k < m; k++) { + + const GF2E& aa = a[k]; + + set(t1); + for (i = k-1; i >= 0; i--) { + mul(t1, t1, aa); + add(t1, t1, prod[i]); + } + + clear(t2); + for (i = k-1; i >= 0; i--) { + mul(t2, t2, aa); + add(t2, t2, res[i]); + } + + + inv(t1, t1); + sub(t2, b[k], t2); + mul(t1, t1, t2); + + for (i = 0; i < k; i++) { + mul(t2, prod[i], t1); + add(res[i], res[i], t2); + } + + res[k] = t1; + + if (k < m-1) { + if (k == 0) + negate(prod[0], prod[0]); + else { + negate(t1, a[k]); + add(prod[k], t1, prod[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t2, prod[i], t1); + add(prod[i], t2, prod[i-1]); + } + mul(prod[0], prod[0], t1); + } + } + } + + while (m > 0 && IsZero(res[m-1])) m--; + res.SetLength(m); + f.rep = res; +} + + +void InnerProduct(GF2EX& x, const vec_GF2E& v, long low, long high, + const vec_GF2EX& H, long n, GF2XVec& t) +{ + GF2X s; + long i, j; + + for (j = 0; j < n; j++) + clear(t[j]); + + high = min(high, v.length()-1); + for (i = low; i <= high; i++) { + const vec_GF2E& h = H[i-low].rep; + long m = h.length(); + const GF2X& w = rep(v[i]); + + for (j = 0; j < m; j++) { + mul(s, w, rep(h[j])); + add(t[j], t[j], s); + } + } + + x.rep.SetLength(n); + for (j = 0; j < n; j++) + conv(x.rep[j], t[j]); + x.normalize(); +} + + +void CompMod(GF2EX& x, const GF2EX& g, const GF2EXArgument& A, + const GF2EXModulus& F) +{ + if (deg(g) <= 0) { + x = g; + return; + } + + + GF2EX s, t; + GF2XVec scratch(F.n, 2*GF2E::WordLength()); + + long m = A.H.length() - 1; + long l = ((g.rep.length()+m-1)/m) - 1; + + const GF2EX& M = A.H[m]; + + InnerProduct(t, g.rep, l*m, l*m + m - 1, A.H, F.n, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g.rep, i*m, i*m + m - 1, A.H, F.n, scratch); + MulMod(t, t, M, F); + add(t, t, s); + } + + x = t; +} + + +void build(GF2EXArgument& A, const GF2EX& h, const GF2EXModulus& F, long m) +{ + long i; + + if (m <= 0 || deg(h) >= F.n) + LogicError("build GF2EXArgument: bad args"); + + if (m > F.n) m = F.n; + + if (GF2EXArgBound > 0) { + double sz = GF2E::storage(); + sz = sz*F.n; + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_GF2E); + sz = sz/1024; + m = min(m, long(GF2EXArgBound/sz)); + m = max(m, 1); + } + + A.H.SetLength(m+1); + + set(A.H[0]); + A.H[1] = h; + for (i = 2; i <= m; i++) + MulMod(A.H[i], A.H[i-1], h, F); +} + + + + +NTL_CHEAP_THREAD_LOCAL +long GF2EXArgBound = 0; + + +void CompMod(GF2EX& x, const GF2EX& g, const GF2EX& h, const GF2EXModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(g.rep.length()); + + if (m == 0) { + clear(x); + return; + } + + GF2EXArgument A; + + build(A, h, F, m); + + CompMod(x, g, A, F); +} + + + + +void Comp2Mod(GF2EX& x1, GF2EX& x2, const GF2EX& g1, const GF2EX& g2, + const GF2EX& h, const GF2EXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + return; + } + + GF2EXArgument A; + + build(A, h, F, m); + + GF2EX xx1, xx2; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + + x1 = xx1; + x2 = xx2; +} + +void Comp3Mod(GF2EX& x1, GF2EX& x2, GF2EX& x3, + const GF2EX& g1, const GF2EX& g2, const GF2EX& g3, + const GF2EX& h, const GF2EXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length() + g3.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + clear(x3); + return; + } + + GF2EXArgument A; + + build(A, h, F, m); + + GF2EX xx1, xx2, xx3; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + CompMod(xx3, g3, A, F); + + x1 = xx1; + x2 = xx2; + x3 = xx3; +} + + + + + +void build(GF2EXTransMultiplier& B, const GF2EX& b, const GF2EXModulus& F) +{ + long db = deg(b); + + if (db >= F.n) LogicError("build TransMultiplier: bad args"); + + GF2EX t; + + LeftShift(t, b, F.n-1); + div(t, t, F); + + // we optimize for low degree b + + long d; + + d = deg(t); + if (d < 0) + B.shamt_fbi = 0; + else + B.shamt_fbi = F.n-2 - d; + + CopyReverse(B.fbi, t, d); + + // The following code optimizes the case when + // f = X^n + low degree poly + + trunc(t, F.f, F.n); + d = deg(t); + if (d < 0) + B.shamt = 0; + else + B.shamt = d; + + CopyReverse(B.f0, t, d); + + if (db < 0) + B.shamt_b = 0; + else + B.shamt_b = db; + + CopyReverse(B.b, b, db); +} + +void TransMulMod(GF2EX& x, const GF2EX& a, const GF2EXTransMultiplier& B, + const GF2EXModulus& F) +{ + if (deg(a) >= F.n) LogicError("TransMulMod: bad args"); + + GF2EX t1, t2; + + mul(t1, a, B.b); + RightShift(t1, t1, B.shamt_b); + + mul(t2, a, B.f0); + RightShift(t2, t2, B.shamt); + trunc(t2, t2, F.n-1); + + mul(t2, t2, B.fbi); + if (B.shamt_fbi > 0) LeftShift(t2, t2, B.shamt_fbi); + trunc(t2, t2, F.n-1); + LeftShift(t2, t2, 1); + + add(x, t1, t2); +} + + +void UpdateMap(vec_GF2E& x, const vec_GF2E& a, + const GF2EXTransMultiplier& B, const GF2EXModulus& F) +{ + GF2EX xx; + TransMulMod(xx, to_GF2EX(a), B, F); + x = xx.rep; +} + + + +static +void ProjectPowers(vec_GF2E& x, const GF2EX& a, long k, + const GF2EXArgument& H, const GF2EXModulus& F) +{ + if (k < 0 || deg(a) >= F.n) + LogicError("ProjectPowers: bad args"); + + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive args"); + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + GF2EXTransMultiplier M; + build(M, H.H[m], F); + + GF2EX s; + s = a; + + x.SetLength(k); + + long i; + + for (i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + for (long j = 0; j < m1; j++) + InnerProduct(x[i*m+j], H.H[j].rep, s.rep); + if (i < l) + TransMulMod(s, s, M, F); + } +} + +static +void ProjectPowers(vec_GF2E& x, const GF2EX& a, long k, const GF2EX& h, + const GF2EXModulus& F) +{ + if (k < 0 || deg(a) >= F.n || deg(h) >= F.n) + LogicError("ProjectPowers: bad args"); + + if (k == 0) { + x.SetLength(0);; + return; + } + + long m = SqrRoot(k); + + GF2EXArgument H; + build(H, h, F, m); + + ProjectPowers(x, a, k, H, F); +} + +void ProjectPowers(vec_GF2E& x, const vec_GF2E& a, long k, + const GF2EXArgument& H, const GF2EXModulus& F) +{ + ProjectPowers(x, to_GF2EX(a), k, H, F); +} + +void ProjectPowers(vec_GF2E& x, const vec_GF2E& a, long k, + const GF2EX& h, const GF2EXModulus& F) +{ + ProjectPowers(x, to_GF2EX(a), k, h, F); +} + + + + +void BerlekampMassey(GF2EX& h, const vec_GF2E& a, long m) +{ + GF2EX Lambda, Sigma, Temp; + long L; + GF2E Delta, Delta1, t1; + long shamt; + GF2X tt1, tt2; + + // cerr << "*** " << m << "\n"; + + Lambda.SetMaxLength(m+1); + Sigma.SetMaxLength(m+1); + Temp.SetMaxLength(m+1); + + L = 0; + set(Lambda); + clear(Sigma); + set(Delta); + shamt = 0; + + long i, r, dl; + + for (r = 1; r <= 2*m; r++) { + // cerr << r << "--"; + clear(tt1); + dl = deg(Lambda); + for (i = 0; i <= dl; i++) { + mul(tt2, rep(Lambda.rep[i]), rep(a[r-i-1])); + add(tt1, tt1, tt2); + } + + conv(Delta1, tt1); + + if (IsZero(Delta1)) { + shamt++; + // cerr << "case 1: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else if (2*L < r) { + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + Sigma = Lambda; + ShiftAdd(Lambda, Temp, shamt+1); + shamt = 0; + L = r-L; + Delta = Delta1; + // cerr << "case 2: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else { + shamt++; + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + ShiftAdd(Lambda, Temp, shamt); + // cerr << "case 3: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + } + + // cerr << "finished: " << L << " " << deg(Lambda) << "\n"; + + dl = deg(Lambda); + h.rep.SetLength(L + 1); + + for (i = 0; i < L - dl; i++) + clear(h.rep[i]); + + for (i = L - dl; i <= L; i++) + h.rep[i] = Lambda.rep[L - i]; +} + + +void MinPolySeq(GF2EX& h, const vec_GF2E& a, long m) +{ + if (m < 0 || NTL_OVERFLOW(m, 1, 0)) LogicError("MinPoly: bad args"); + if (a.length() < 2*m) LogicError("MinPoly: sequence too short"); + + BerlekampMassey(h, a, m); +} + + +void DoMinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m, + const GF2EX& R) +{ + vec_GF2E x; + + ProjectPowers(x, R, 2*m, g, F); + MinPolySeq(h, x, m); +} + +void ProbMinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m) +{ + long n = F.n; + if (m < 1 || m > n) LogicError("ProbMinPoly: bad args"); + + GF2EX R; + random(R, n); + + DoMinPolyMod(h, g, F, m, R); +} + +void ProbMinPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F) +{ + ProbMinPolyMod(h, g, F, F.n); +} + +void MinPolyMod(GF2EX& hh, const GF2EX& g, const GF2EXModulus& F, long m) +{ + GF2EX h, h1; + long n = F.n; + if (m < 1 || m > n) LogicError("MinPoly: bad args"); + + /* probabilistically compute min-poly */ + + ProbMinPolyMod(h, g, F, m); + if (deg(h) == m) { hh = h; return; } + CompMod(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + + GF2EX h2, h3; + GF2EX R; + GF2EXTransMultiplier H1; + + + for (;;) { + random(R, n); + build(H1, h1, F); + TransMulMod(R, R, H1, F); + DoMinPolyMod(h2, g, F, m-deg(h), R); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompMod(h3, h2, g, F); + MulMod(h1, h3, h1, F); + if (IsZero(h1)) { hh = h; return; } + } +} + +void IrredPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F, long m) +{ + if (m < 1 || m > F.n) LogicError("IrredPoly: bad args"); + + GF2EX R; + set(R); + + DoMinPolyMod(h, g, F, m, R); +} + + + +void IrredPolyMod(GF2EX& h, const GF2EX& g, const GF2EXModulus& F) +{ + IrredPolyMod(h, g, F, F.n); +} + + + +void MinPolyMod(GF2EX& hh, const GF2EX& g, const GF2EXModulus& F) +{ + MinPolyMod(hh, g, F, F.n); +} + + +void MakeMonic(GF2EX& x) +{ + if (IsZero(x)) + return; + + if (IsOne(LeadCoeff(x))) + return; + + GF2E t; + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + +long divide(GF2EX& q, const GF2EX& a, const GF2EX& b) +{ + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + GF2EX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + q = lq; + return 1; +} + +long divide(const GF2EX& a, const GF2EX& b) +{ + if (IsZero(b)) return IsZero(a); + GF2EX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + return 1; +} + + +long operator==(const GF2EX& a, long b) +{ + if (b & 1) + return IsOne(a); + else + return IsZero(a); +} + + +long operator==(const GF2EX& a, GF2 b) +{ + if (b == 1) + return IsOne(a); + else + return IsZero(a); +} + +long operator==(const GF2EX& a, const GF2E& b) +{ + if (IsZero(b)) + return IsZero(a); + + if (deg(a) != 0) + return 0; + + return a.rep[0] == b; +} + + + +void power(GF2EX& x, const GF2EX& a, long e) +{ + if (e < 0) { + ArithmeticError("power: negative exponent"); + } + + if (e == 0) { + x = 1; + return; + } + + if (a == 0 || a == 1) { + x = a; + return; + } + + long da = deg(a); + + if (da == 0) { + x = power(ConstTerm(a), e); + return; + } + + + if (da > (NTL_MAX_LONG-1)/e) + ResourceError("overflow in power"); + + GF2EX res; + res.SetMaxLength(da*e + 1); + res = 1; + + long k = NumBits(e); + long i; + + for (i = k - 1; i >= 0; i--) { + sqr(res, res); + if (bit(e, i)) + mul(res, res, a); + } + + x = res; +} + +void reverse(GF2EX& x, const GF2EX& a, long hi) +{ + if (hi < 0) { clear(x); return; } + if (NTL_OVERFLOW(hi, 1, 0)) ResourceError("overflow in reverse"); + + if (&x == &a) { + GF2EX tmp; + CopyReverse(tmp, a, hi); + x = tmp; + } + else + CopyReverse(x, a, hi); +} + + +static +void FastTraceVec(vec_GF2E& S, const GF2EXModulus& f) +{ + long n = deg(f); + + GF2EX x = reverse(-LeftShift(reverse(diff(reverse(f)), n-1), n-1)/f, n-1); + + S.SetLength(n); + S[0] = n; + + long i; + for (i = 1; i < n; i++) + S[i] = coeff(x, i); +} + + +void PlainTraceVec(vec_GF2E& S, const GF2EX& ff) +{ + if (deg(ff) <= 0) + LogicError("TraceVec: bad args"); + + GF2EX f; + f = ff; + + MakeMonic(f); + + long n = deg(f); + + S.SetLength(n); + + if (n == 0) + return; + + long k, i; + GF2X acc, t; + GF2E t1; + + S[0] = n; + + for (k = 1; k < n; k++) { + mul(acc, rep(f.rep[n-k]), k); + + for (i = 1; i < k; i++) { + mul(t, rep(f.rep[n-i]), rep(S[k-i])); + add(acc, acc, t); + } + + conv(t1, acc); + negate(S[k], t1); + } +} + +void TraceVec(vec_GF2E& S, const GF2EX& f) +{ + if (deg(f) < GF2E::DivCross()) + PlainTraceVec(S, f); + else + FastTraceVec(S, f); +} + +static +void ComputeTraceVec(vec_GF2E& S, const GF2EXModulus& F) +{ + if (F.method == GF2EX_MOD_PLAIN) { + PlainTraceVec(S, F.f); + } + else { + FastTraceVec(S, F); + } +} + +void TraceMod(GF2E& x, const GF2EX& a, const GF2EXModulus& F) +{ + long n = F.n; + + if (deg(a) >= n) + LogicError("trace: bad args"); + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(F.tracevec.val()); + if (!builder()) break; + UniquePtr p; + p.make(); + ComputeTraceVec(*p, F); + builder.move(p); + } while (0); + + InnerProduct(x, a.rep, *F.tracevec.val()); +} + +void TraceMod(GF2E& x, const GF2EX& a, const GF2EX& f) +{ + if (deg(a) >= deg(f) || deg(f) <= 0) + LogicError("trace: bad args"); + + project(x, TraceVec(f), a); +} + + +void PlainResultant(GF2E& rres, const GF2EX& a, const GF2EX& b) +{ + GF2E res; + + if (IsZero(a) || IsZero(b)) + clear(res); + else if (deg(a) == 0 && deg(b) == 0) + set(res); + else { + long d0, d1, d2; + GF2E lc; + set(res); + + long n = max(deg(a),deg(b)) + 1; + GF2EX u(INIT_SIZE, n), v(INIT_SIZE, n); + GF2XVec tmp(n, 2*GF2E::WordLength()); + + u = a; + v = b; + + for (;;) { + d0 = deg(u); + d1 = deg(v); + lc = LeadCoeff(v); + + PlainRem(u, u, v, tmp); + swap(u, v); + + d2 = deg(v); + if (d2 >= 0) { + power(lc, lc, d0-d2); + mul(res, res, lc); + if (d0 & d1 & 1) negate(res, res); + } + else { + if (d1 == 0) { + power(lc, lc, d0); + mul(res, res, lc); + } + else + clear(res); + + break; + } + } + + rres = res; + } +} + +void resultant(GF2E& rres, const GF2EX& a, const GF2EX& b) +{ + PlainResultant(rres, a, b); +} + + +void NormMod(GF2E& x, const GF2EX& a, const GF2EX& f) +{ + if (deg(f) <= 0 || deg(a) >= deg(f)) + LogicError("norm: bad args"); + + if (IsZero(a)) { + clear(x); + return; + } + + GF2E t; + resultant(t, f, a); + if (!IsOne(LeadCoeff(f))) { + GF2E t1; + power(t1, LeadCoeff(f), deg(a)); + inv(t1, t1); + mul(t, t, t1); + } + + x = t; +} + + + +// tower stuff... + +void InnerProduct(GF2EX& x, const GF2X& v, long low, long high, + const vec_GF2EX& H, long n, vec_GF2E& t) +{ + long i, j; + + for (j = 0; j < n; j++) + clear(t[j]); + + high = min(high, deg(v)); + for (i = low; i <= high; i++) { + const vec_GF2E& h = H[i-low].rep; + long m = h.length(); + + if (coeff(v, i) != 0) { + for (j = 0; j < m; j++) { + add(t[j], t[j], h[j]); + } + } + } + + x.rep.SetLength(n); + for (j = 0; j < n; j++) + x.rep[j] = t[j]; + + x.normalize(); +} + + + +void CompTower(GF2EX& x, const GF2X& g, const GF2EXArgument& A, + const GF2EXModulus& F) +{ + if (deg(g) <= 0) { + conv(x, g); + return; + } + + + GF2EX s, t; + vec_GF2E scratch; + scratch.SetLength(deg(F)); + + long m = A.H.length() - 1; + long l = (((deg(g)+1)+m-1)/m) - 1; + + const GF2EX& M = A.H[m]; + + InnerProduct(t, g, l*m, l*m + m - 1, A.H, F.n, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g, i*m, i*m + m - 1, A.H, F.n, scratch); + MulMod(t, t, M, F); + add(t, t, s); + } + x = t; +} + + +void CompTower(GF2EX& x, const GF2X& g, const GF2EX& h, + const GF2EXModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(deg(g)+1); + + if (m == 0) { + clear(x); + return; + } + + + GF2EXArgument A; + + build(A, h, F, m); + + CompTower(x, g, A, F); +} + +void PrepareProjection(vec_vec_GF2& tt, const vec_GF2E& s, + const vec_GF2& proj) +{ + long l = s.length(); + tt.SetLength(l); + + GF2XTransMultiplier M; + long i; + + for (i = 0; i < l; i++) { + build(M, rep(s[i]), GF2E::modulus()); + UpdateMap(tt[i], proj, M, GF2E::modulus()); + } +} + +void ProjectedInnerProduct(ref_GF2 x, const vec_GF2E& a, + const vec_vec_GF2& b) +{ + long n = min(a.length(), b.length()); + + GF2 t, res; + + res = 0; + + long i; + for (i = 0; i < n; i++) { + project(t, b[i], rep(a[i])); + res += t; + } + + x = res; +} + + + +void PrecomputeProj(vec_GF2& proj, const GF2X& f) +{ + long n = deg(f); + + if (n <= 0) LogicError("PrecomputeProj: bad args"); + + if (ConstTerm(f) != 0) { + proj.SetLength(1); + proj[0] = 1; + } + else { + proj.SetLength(n); + clear(proj); + proj[n-1] = 1; + } +} + +void ProjectPowersTower(vec_GF2& x, const vec_GF2E& a, long k, + const GF2EXArgument& H, const GF2EXModulus& F, + const vec_GF2& proj) + +{ + long n = F.n; + + if (a.length() > n || k < 0) LogicError("ProjectPowers: bad args"); + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + GF2EXTransMultiplier M; + build(M, H.H[m], F); + + vec_GF2E s(INIT_SIZE, n); + s = a; + + x.SetLength(k); + + vec_vec_GF2 tt; + + for (long i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + + PrepareProjection(tt, s, proj); + + for (long j = 0; j < m1; j++) { + GF2 r; + ProjectedInnerProduct(r, H.H[j].rep, tt); + x.put(i*m + j, r); + } + if (i < l) + UpdateMap(s, s, M, F); + } +} + + + + +void ProjectPowersTower(vec_GF2& x, const vec_GF2E& a, long k, + const GF2EX& h, const GF2EXModulus& F, + const vec_GF2& proj) + +{ + if (a.length() > F.n || k < 0) LogicError("ProjectPowers: bad args"); + + if (k == 0) { + x.SetLength(0); + return; + } + + long m = SqrRoot(k); + + GF2EXArgument H; + + build(H, h, F, m); + ProjectPowersTower(x, a, k, H, F, proj); +} + + +void DoMinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, long m, + const vec_GF2E& R, const vec_GF2& proj) +{ + vec_GF2 x; + + ProjectPowersTower(x, R, 2*m, g, F, proj); + + MinPolySeq(h, x, m); +} + + +void ProbMinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, + long m) +{ + long n = F.n; + if (m < 1 || m > n*GF2E::degree()) LogicError("ProbMinPoly: bad args"); + + vec_GF2E R; + R.SetLength(n); + long i; + for (i = 0; i < n; i++) random(R[i]); + + vec_GF2 proj; + PrecomputeProj(proj, GF2E::modulus()); + + DoMinPolyTower(h, g, F, m, R, proj); +} + +void ProbMinPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, + long m, const vec_GF2& proj) +{ + long n = F.n; + if (m < 1 || m > n*GF2E::degree()) LogicError("ProbMinPoly: bad args"); + + vec_GF2E R; + R.SetLength(n); + long i; + for (i = 0; i < n; i++) random(R[i]); + + DoMinPolyTower(h, g, F, m, R, proj); +} + +void MinPolyTower(GF2X& hh, const GF2EX& g, const GF2EXModulus& F, long m) +{ + GF2X h; + GF2EX h1; + long n = F.n; + if (m < 1 || m > n*GF2E::degree()) { + LogicError("MinPoly: bad args"); + } + + vec_GF2 proj; + PrecomputeProj(proj, GF2E::modulus()); + + /* probabilistically compute min-poly */ + + ProbMinPolyTower(h, g, F, m, proj); + if (deg(h) == m) { hh = h; return; } + CompTower(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + long i; + + GF2X h2; + GF2EX h3; + vec_GF2E R; + GF2EXTransMultiplier H1; + + + for (;;) { + R.SetLength(n); + for (i = 0; i < n; i++) random(R[i]); + build(H1, h1, F); + UpdateMap(R, R, H1, F); + DoMinPolyTower(h2, g, F, m-deg(h), R, proj); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompTower(h3, h2, g, F); + MulMod(h1, h3, h1, F); + if (IsZero(h1)) { + hh = h; + return; + } + } +} + +void IrredPolyTower(GF2X& h, const GF2EX& g, const GF2EXModulus& F, long m) +{ + if (m < 1 || m > deg(F)*GF2E::degree()) LogicError("IrredPoly: bad args"); + + vec_GF2E R; + R.SetLength(1); + R[0] = 1; + + vec_GF2 proj; + proj.SetLength(1); + proj.put(0, 1); + + DoMinPolyTower(h, g, F, m, R, proj); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GF2EXFactoring.c b/thirdparty/linux/ntl/src/GF2EXFactoring.c new file mode 100644 index 0000000000..36479cf5be --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2EXFactoring.c @@ -0,0 +1,2163 @@ + + +#include +#include +#include +#include +#include + + +NTL_START_IMPL + + + + +static +void IterSqr(GF2E& c, const GF2E& a, long n) +{ + GF2E res; + + long i; + + res = a; + + for (i = 0; i < n; i++) + sqr(res, res); + + c = res; +} + + + +void SquareFreeDecomp(vec_pair_GF2EX_long& u, const GF2EX& ff) +{ + GF2EX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SquareFreeDecomp: bad args"); + + GF2EX r, t, v, tmp1; + long m, j, finished, done; + + u.SetLength(0); + + if (deg(f) == 0) + return; + + m = 1; + finished = 0; + + do { + j = 1; + diff(tmp1, f); + GCD(r, f, tmp1); + div(t, f, r); + + if (deg(t) > 0) { + done = 0; + do { + GCD(v, r, t); + div(tmp1, t, v); + if (deg(tmp1) > 0) append(u, cons(tmp1, j*m)); + if (deg(v) > 0) { + div(r, r, v); + t = v; + j++; + } + else + done = 1; + } while (!done); + if (deg(r) == 0) finished = 1; + } + + if (!finished) { + /* r is a square */ + + long k, d; + d = deg(r)/2; + f.rep.SetLength(d+1); + for (k = 0; k <= d; k++) + IterSqr(f.rep[k], r.rep[k*2], GF2E::degree()-1); + m = m*2; + } + } while (!finished); +} + + + +static +void NullSpace(long& r, vec_long& D, vec_GF2XVec& M, long verbose) +{ + long k, l, n; + long i, j; + long pos; + GF2X t1, t2; + GF2X *x, *y; + + const GF2XModulus& p = GF2E::modulus(); + + n = M.length(); + + D.SetLength(n); + for (j = 0; j < n; j++) D[j] = -1; + + r = 0; + + l = 0; + for (k = 0; k < n; k++) { + + if (verbose && k % 10 == 0) cerr << "+"; + + pos = -1; + for (i = l; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) + pos = i; + } + + if (pos != -1) { + swap(M[pos], M[l]); + + // make M[l, k] == -1 mod p, and make row l reduced + + InvMod(t1, M[l][k], p); + for (j = k+1; j < n; j++) { + rem(t2, M[l][j], p); + MulMod(M[l][j], t2, t1, p); + } + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[l].elts() + (k+1); + + for (j = k+1; j < n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + + D[k] = l; // variable k is defined by row l + l++; + + } + else { + r++; + } + } +} + + + +static +void BuildMatrix(vec_GF2XVec& M, long n, const GF2EX& g, const GF2EXModulus& F, + long verbose) +{ + long i, j, m; + GF2EX h; + + + M.SetLength(n); + for (i = 0; i < n; i++) + M[i].SetSize(n, 2*GF2E::WordLength()); + + set(h); + for (j = 0; j < n; j++) { + if (verbose && j % 10 == 0) cerr << "+"; + + m = deg(h); + for (i = 0; i < n; i++) { + if (i <= m) + M[i][j] = rep(h.rep[i]); + else + clear(M[i][j]); + } + + if (j < n-1) + MulMod(h, h, g, F); + } + + for (i = 0; i < n; i++) + add(M[i][i], M[i][i], 1); + +} + + +static +void TraceMap(GF2EX& h, const GF2EX& a, const GF2EXModulus& F) + +// one could consider making a version based on modular composition, +// as in ComposeFrobeniusMap... + +{ + GF2EX res, tmp; + + res = a; + tmp = a; + + long i; + for (i = 0; i < GF2E::degree()-1; i++) { + SqrMod(tmp, tmp, F); + add(res, res, tmp); + } + + h = res; +} + +void PlainFrobeniusMap(GF2EX& h, const GF2EXModulus& F) +{ + GF2EX res; + + SetX(res); + long i; + for (i = 0; i < GF2E::degree(); i++) + SqrMod(res, res, F); + + h = res; +} + +long UseComposeFrobenius(long d, long n) +{ + long i; + i = 1; + while (i <= d) i = i << 1; + i = i >> 1; + + i = i >> 1; + long m = 1; + + long dz; + + if (n == 2) { + dz = 1; + } + else { + while (i) { + long m1 = 2*m; + if (i & d) m1++; + + if (m1 >= NTL_BITS_PER_LONG-1 || (1L << m1) >= n) break; + + m = m1; + i = i >> 1; + } + + dz = 1L << m; + } + + long rootn = SqrRoot(n); + long cnt = 0; + + if (i) { + cnt += SqrRoot(dz+1); + i = i >> 1; + } + + while (i) { + cnt += rootn; + i = i >> 1; + } + + return 4*cnt <= d; +} + +void ComposeFrobeniusMap(GF2EX& y, const GF2EXModulus& F) +{ + long d = GF2E::degree(); + long n = deg(F); + + long i; + i = 1; + while (i <= d) i = i << 1; + i = i >> 1; + + GF2EX z(INIT_SIZE, n), z1(INIT_SIZE, n); + + i = i >> 1; + long m = 1; + + if (n == 2) { + SetX(z); + SqrMod(z, z, F); + } + else { + while (i) { + long m1 = 2*m; + if (i & d) m1++; + + if (m1 >= NTL_BITS_PER_LONG-1 || (1L << m1) >= n) break; + + m = m1; + i = i >> 1; + } + + clear(z); + SetCoeff(z, 1L << m); + } + + + while (i) { + z1 = z; + + long j, k, dz; + dz = deg(z); + + for (j = 0; j <= dz; j++) + for (k = 0; k < m; k++) + sqr(z1.rep[j], z1.rep[j]); + + CompMod(z, z1, z, F); + m = 2*m; + + if (d & i) { + SqrMod(z, z, F); + m++; + } + + i = i >> 1; + } + + y = z; +} + +void FrobeniusMap(GF2EX& h, const GF2EXModulus& F) +{ + long n = deg(F); + long d = GF2E::degree(); + + if (n == 1) { + h = ConstTerm(F); + return; + } + + if (UseComposeFrobenius(d, n)) + ComposeFrobeniusMap(h, F); + else + PlainFrobeniusMap(h, F); +} + + + + + + + +static +void RecFindRoots(vec_GF2E& x, const GF2EX& f) +{ + if (deg(f) == 0) return; + + if (deg(f) == 1) { + long k = x.length(); + x.SetLength(k+1); + x[k] = ConstTerm(f); + return; + } + + GF2EX h; + + GF2E r; + + + { + GF2EXModulus F; + build(F, f); + + do { + random(r); + clear(h); + SetCoeff(h, 1, r); + TraceMap(h, h, F); + GCD(h, h, f); + } while (deg(h) <= 0 || deg(h) == deg(f)); + } + + RecFindRoots(x, h); + div(h, f, h); + RecFindRoots(x, h); +} + +void FindRoots(vec_GF2E& x, const GF2EX& ff) +{ + GF2EX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoots: bad args"); + + x.SetMaxLength(deg(f)); + x.SetLength(0); + RecFindRoots(x, f); +} + + +static +void RandomBasisElt(GF2EX& g, const vec_long& D, const vec_GF2XVec& M) +{ + GF2X t1, t2; + + long n = D.length(); + + long i, j, s; + + g.rep.SetLength(n); + + vec_GF2E& v = g.rep; + + for (j = n-1; j >= 0; j--) { + if (D[j] == -1) + random(v[j]); + else { + i = D[j]; + + // v[j] = sum_{s=j+1}^{n-1} v[s]*M[i,s] + + clear(t1); + + for (s = j+1; s < n; s++) { + mul(t2, rep(v[s]), M[i][s]); + add(t1, t1, t2); + } + + conv(v[j], t1); + } + } + + g.normalize(); +} + + + +static +void split(GF2EX& f1, GF2EX& g1, GF2EX& f2, GF2EX& g2, + const GF2EX& f, const GF2EX& g, + const vec_GF2E& roots, long lo, long mid) +{ + long r = mid-lo+1; + + GF2EXModulus F; + build(F, f); + + vec_GF2E lroots(INIT_SIZE, r); + long i; + + for (i = 0; i < r; i++) + lroots[i] = roots[lo+i]; + + + GF2EX h, a, d; + BuildFromRoots(h, lroots); + CompMod(a, h, g, F); + + + GCD(f1, a, f); + + div(f2, f, f1); + + rem(g1, g, f1); + rem(g2, g, f2); +} + +static +void RecFindFactors(vec_GF2EX& factors, const GF2EX& f, const GF2EX& g, + const vec_GF2E& roots, long lo, long hi) +{ + long r = hi-lo+1; + + if (r == 0) return; + + if (r == 1) { + append(factors, f); + return; + } + + GF2EX f1, g1, f2, g2; + + long mid = (lo+hi)/2; + + split(f1, g1, f2, g2, f, g, roots, lo, mid); + + RecFindFactors(factors, f1, g1, roots, lo, mid); + RecFindFactors(factors, f2, g2, roots, mid+1, hi); +} + + +static +void FindFactors(vec_GF2EX& factors, const GF2EX& f, const GF2EX& g, + const vec_GF2E& roots) +{ + long r = roots.length(); + + factors.SetMaxLength(r); + factors.SetLength(0); + + RecFindFactors(factors, f, g, roots, 0, r-1); +} + +#if 0 + +static +void IterFindFactors(vec_GF2EX& factors, const GF2EX& f, + const GF2EX& g, const vec_GF2E& roots) +{ + long r = roots.length(); + long i; + GF2EX h; + + factors.SetLength(r); + + for (i = 0; i < r; i++) { + add(h, g, roots[i]); + GCD(factors[i], f, h); + } +} + +#endif + + + + +void SFBerlekamp(vec_GF2EX& factors, const GF2EX& ff, long verbose) +{ + GF2EX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SFBerlekamp: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + double t; + + long n = deg(f); + + GF2EXModulus F; + + build(F, f); + + GF2EX g, h; + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + FrobeniusMap(g, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + vec_long D; + long r; + + vec_GF2XVec M; + + if (verbose) { cerr << "building matrix..."; t = GetTime(); } + BuildMatrix(M, n, g, F, verbose); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + if (verbose) { cerr << "diagonalizing..."; t = GetTime(); } + NullSpace(r, D, M, verbose); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + + if (verbose) cerr << "number of factors = " << r << "\n"; + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (verbose) { cerr << "factor extraction..."; t = GetTime(); } + + vec_GF2E roots; + + RandomBasisElt(g, D, M); + MinPolyMod(h, g, F, r); + if (deg(h) == r) M.kill(); + FindRoots(roots, h); + FindFactors(factors, f, g, roots); + + GF2EX g1; + vec_GF2EX S, S1; + long i; + + while (factors.length() < r) { + if (verbose) cerr << "+"; + RandomBasisElt(g, D, M); + S.kill(); + for (i = 0; i < factors.length(); i++) { + const GF2EX& f = factors[i]; + if (deg(f) == 1) { + append(S, f); + continue; + } + build(F, f); + rem(g1, g, F); + if (deg(g1) <= 0) { + append(S, f); + continue; + } + MinPolyMod(h, g1, F, min(deg(f), r-factors.length()+1)); + FindRoots(roots, h); + S1.kill(); + FindFactors(S1, f, g1, roots); + append(S, S1); + } + swap(factors, S); + } + + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + if (verbose) { + cerr << "degrees:"; + long i; + for (i = 0; i < factors.length(); i++) + cerr << " " << deg(factors[i]); + cerr << "\n"; + } +} + + +void berlekamp(vec_pair_GF2EX_long& factors, const GF2EX& f, long verbose) +{ + double t; + vec_pair_GF2EX_long sfd; + vec_GF2EX x; + + if (!IsOne(LeadCoeff(f))) + LogicError("berlekamp: bad args"); + + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFBerlekamp(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + + + +static +void AddFactor(vec_pair_GF2EX_long& factors, const GF2EX& g, long d, long verbose) +{ + if (verbose) + cerr << "degree=" << d << ", number=" << deg(g)/d << "\n"; + append(factors, cons(g, d)); +} + +static +void ProcessTable(GF2EX& f, vec_pair_GF2EX_long& factors, + const GF2EXModulus& F, long limit, const vec_GF2EX& tbl, + long d, long verbose) + +{ + if (limit == 0) return; + + if (verbose) cerr << "+"; + + GF2EX t1; + + if (limit == 1) { + GCD(t1, f, tbl[0]); + if (deg(t1) > 0) { + AddFactor(factors, t1, d, verbose); + div(f, f, t1); + } + + return; + } + + long i; + + t1 = tbl[0]; + for (i = 1; i < limit; i++) + MulMod(t1, t1, tbl[i], F); + + GCD(t1, f, t1); + + if (deg(t1) == 0) return; + + div(f, f, t1); + + GF2EX t2; + + i = 0; + d = d - limit + 1; + + while (2*d <= deg(t1)) { + GCD(t2, tbl[i], t1); + if (deg(t2) > 0) { + AddFactor(factors, t2, d, verbose); + div(t1, t1, t2); + } + + i++; + d++; + } + + if (deg(t1) > 0) + AddFactor(factors, t1, deg(t1), verbose); +} + + +void TraceMap(GF2EX& w, const GF2EX& a, long d, const GF2EXModulus& F, + const GF2EX& b) + +{ + if (d < 0) LogicError("TraceMap: bad args"); + + GF2EX y, z, t; + + z = b; + y = a; + clear(w); + + while (d) { + if (d == 1) { + if (IsZero(w)) + w = y; + else { + CompMod(w, w, z, F); + add(w, w, y); + } + } + else if ((d & 1) == 0) { + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else if (IsZero(w)) { + w = y; + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else { + Comp3Mod(z, t, w, z, y, w, z, F); + add(w, w, y); + add(y, t, y); + } + + d = d >> 1; + } +} + + +void PowerCompose(GF2EX& y, const GF2EX& h, long q, const GF2EXModulus& F) +{ + if (q < 0) LogicError("powerCompose: bad args"); + + GF2EX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y); + + while (q) { + sw = 0; + + if (q > 1) sw = 2; + if (q & 1) { + if (IsX(y)) + y = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y, y, z, F); + break; + + case 2: + CompMod(z, z, z, F); + break; + + case 3: + Comp2Mod(y, z, y, z, z, F); + break; + } + + q = q >> 1; + } +} + + +long ProbIrredTest(const GF2EX& f, long iter) +{ + long n = deg(f); + + if (n <= 0) return 0; + if (n == 1) return 1; + + GF2EXModulus F; + + build(F, f); + + GF2EX b, r, s; + + FrobeniusMap(b, F); + + long all_zero = 1; + + long i; + + for (i = 0; i < iter; i++) { + random(r, n); + TraceMap(s, r, n, F, b); + + all_zero = all_zero && IsZero(s); + + if (deg(s) > 0) return 0; + } + + if (!all_zero || (n & 1)) return 1; + + PowerCompose(s, b, n/2, F); + return !IsX(s); +} + + +NTL_CHEAP_THREAD_LOCAL +long GF2EX_BlockingFactor = 10; + +void DDF(vec_pair_GF2EX_long& factors, const GF2EX& ff, const GF2EX& hh, + long verbose) +{ + GF2EX f = ff; + GF2EX h = hh; + + if (!IsOne(LeadCoeff(f))) + LogicError("DDF: bad args"); + + factors.SetLength(0); + + if (deg(f) == 0) + return; + + if (deg(f) == 1) { + AddFactor(factors, f, 1, verbose); + return; + } + + long CompTableSize = 2*SqrRoot(deg(f)); + + long GCDTableSize = GF2EX_BlockingFactor; + + GF2EXModulus F; + build(F, f); + + GF2EXArgument H; + + build(H, h, F, min(CompTableSize, deg(f))); + + long i, d, limit, old_n; + GF2EX g, X; + + + vec_GF2EX tbl(INIT_SIZE, GCDTableSize); + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = GCDTableSize; + + + while (2*d <= deg(f)) { + + old_n = deg(f); + add(tbl[i], g, X); + i++; + if (i == limit) { + ProcessTable(f, factors, F, i, tbl, d, verbose); + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + // we need to go further + + if (deg(f) < old_n) { + // f has changed + + build(F, f); + rem(h, h, f); + rem(g, g, f); + build(H, h, F, min(CompTableSize, deg(f))); + } + + CompMod(g, g, H, F); + } + } + + ProcessTable(f, factors, F, i, tbl, d-1, verbose); + + if (!IsOne(f)) AddFactor(factors, f, deg(f), verbose); +} + + + +void RootEDF(vec_GF2EX& factors, const GF2EX& f, long verbose) +{ + vec_GF2E roots; + double t; + + if (verbose) { cerr << "finding roots..."; t = GetTime(); } + FindRoots(roots, f); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + long r = roots.length(); + factors.SetLength(r); + for (long j = 0; j < r; j++) { + SetX(factors[j]); + add(factors[j], factors[j], roots[j]); + } +} + +static +void EDFSplit(vec_GF2EX& v, const GF2EX& f, const GF2EX& b, long d) +{ + GF2EX a, g, h; + GF2EXModulus F; + vec_GF2E roots; + + build(F, f); + long n = F.n; + long r = n/d; + random(a, n); + TraceMap(g, a, d, F, b); + MinPolyMod(h, g, F, r); + FindRoots(roots, h); + FindFactors(v, f, g, roots); +} + +static +void RecEDF(vec_GF2EX& factors, const GF2EX& f, const GF2EX& b, long d, + long verbose) +{ + vec_GF2EX v; + long i; + GF2EX bb; + + if (verbose) cerr << "+"; + + EDFSplit(v, f, b, d); + for (i = 0; i < v.length(); i++) { + if (deg(v[i]) == d) { + append(factors, v[i]); + } + else { + GF2EX bb; + rem(bb, b, v[i]); + RecEDF(factors, v[i], bb, d, verbose); + } + } +} + + +void EDF(vec_GF2EX& factors, const GF2EX& ff, const GF2EX& bb, + long d, long verbose) + +{ + GF2EX f = ff; + GF2EX b = bb; + + if (!IsOne(LeadCoeff(f))) + LogicError("EDF: bad args"); + + long n = deg(f); + long r = n/d; + + if (r == 0) { + factors.SetLength(0); + return; + } + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (d == 1) { + RootEDF(factors, f, verbose); + return; + } + + + double t; + if (verbose) { + cerr << "computing EDF(" << d << "," << r << ")..."; + t = GetTime(); + } + + factors.SetLength(0); + + RecEDF(factors, f, b, d, verbose); + + if (verbose) cerr << (GetTime()-t) << "\n"; +} + + +void SFCanZass(vec_GF2EX& factors, const GF2EX& ff, long verbose) +{ + GF2EX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SFCanZass: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + factors.SetLength(0); + + double t; + + + GF2EXModulus F; + build(F, f); + + GF2EX h; + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + FrobeniusMap(h, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + vec_pair_GF2EX_long u; + if (verbose) { cerr << "computing DDF..."; t = GetTime(); } + NewDDF(u, f, h, verbose); + if (verbose) { + t = GetTime()-t; + cerr << "DDF time: " << t << "\n"; + } + + GF2EX hh; + vec_GF2EX v; + + long i; + for (i = 0; i < u.length(); i++) { + const GF2EX& g = u[i].a; + long d = u[i].b; + long r = deg(g)/d; + + if (r == 1) { + // g is already irreducible + + append(factors, g); + } + else { + // must perform EDF + + if (d == 1) { + // root finding + RootEDF(v, g, verbose); + append(factors, v); + } + else { + // general case + rem(hh, h, g); + EDF(v, g, hh, d, verbose); + append(factors, v); + } + } + } +} + +void CanZass(vec_pair_GF2EX_long& factors, const GF2EX& f, long verbose) +{ + if (!IsOne(LeadCoeff(f))) + LogicError("CanZass: bad args"); + + double t; + vec_pair_GF2EX_long sfd; + vec_GF2EX x; + + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFCanZass(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + +void mul(GF2EX& f, const vec_pair_GF2EX_long& v) +{ + long i, j, n; + + n = 0; + for (i = 0; i < v.length(); i++) + n += v[i].b*deg(v[i].a); + + GF2EX g(INIT_SIZE, n+1); + + set(g); + for (i = 0; i < v.length(); i++) + for (j = 0; j < v[i].b; j++) { + mul(g, g, v[i].a); + } + + f = g; +} + + + + +static +long BaseCase(const GF2EX& h, long q, long a, const GF2EXModulus& F) +{ + long b, e; + GF2EX lh(INIT_SIZE, F.n); + + lh = h; + b = 1; + e = 0; + while (e < a-1 && !IsX(lh)) { + e++; + b *= q; + PowerCompose(lh, lh, q, F); + } + + if (!IsX(lh)) b *= q; + + return b; +} + + + +static +void TandemPowerCompose(GF2EX& y1, GF2EX& y2, const GF2EX& h, + long q1, long q2, const GF2EXModulus& F) +{ + GF2EX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y1); + SetX(y2); + + while (q1 || q2) { + sw = 0; + + if (q1 > 1 || q2 > 1) sw = 4; + + if (q1 & 1) { + if (IsX(y1)) + y1 = z; + else + sw = sw | 2; + } + + if (q2 & 1) { + if (IsX(y2)) + y2 = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y2, y2, z, F); + break; + + case 2: + CompMod(y1, y1, z, F); + break; + + case 3: + Comp2Mod(y1, y2, y1, y2, z, F); + break; + + case 4: + CompMod(z, z, z, F); + break; + + case 5: + Comp2Mod(z, y2, z, y2, z, F); + break; + + case 6: + Comp2Mod(z, y1, z, y1, z, F); + break; + + case 7: + Comp3Mod(z, y1, y2, z, y1, y2, z, F); + break; + } + + q1 = q1 >> 1; + q2 = q2 >> 1; + } +} + + + +static +long RecComputeDegree(long u, const GF2EX& h, const GF2EXModulus& F, + FacVec& fvec) +{ + if (IsX(h)) return 1; + + if (fvec[u].link == -1) return BaseCase(h, fvec[u].q, fvec[u].a, F); + + GF2EX h1, h2; + long q1, q2, r1, r2; + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + r1 = RecComputeDegree(fvec[u].link, h2, F, fvec); + r2 = RecComputeDegree(fvec[u].link+1, h1, F, fvec); + return r1*r2; +} + + + + +long RecComputeDegree(const GF2EX& h, const GF2EXModulus& F) + // f = F.f is assumed to be an "equal degree" polynomial + // h = X^p mod f + // the common degree of the irreducible factors of f is computed +{ + if (F.n == 1 || IsX(h)) + return 1; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecComputeDegree(fvec.length()-1, h, F, fvec); +} + + +void FindRoot(GF2E& root, const GF2EX& ff) +// finds a root of ff. +// assumes that ff is monic and splits into distinct linear factors + +{ + GF2EXModulus F; + GF2EX h, h1, f; + GF2E r; + + f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoot: bad args"); + + if (deg(f) == 0) + LogicError("FindRoot: bad args"); + + + while (deg(f) > 1) { + build(F, f); + random(r); + clear(h); + SetCoeff(h, 1, r); + TraceMap(h, h, F); + GCD(h, h, f); + if (deg(h) > 0 && deg(h) < deg(f)) { + if (deg(h) > deg(f)/2) + div(f, f, h); + else + f = h; + } + } + + root = ConstTerm(f); +} + + +static +long power(long a, long e) +{ + long i, res; + + res = 1; + for (i = 1; i <= e; i++) + res = res * a; + + return res; +} + + +static +long IrredBaseCase(const GF2EX& h, long q, long a, const GF2EXModulus& F) +{ + long e; + GF2EX X, s, d; + + e = power(q, a-1); + PowerCompose(s, h, e, F); + SetX(X); + add(s, s, X); + GCD(d, F.f, s); + return IsOne(d); +} + + +static +long RecIrredTest(long u, const GF2EX& h, const GF2EXModulus& F, + const FacVec& fvec) +{ + long q1, q2; + GF2EX h1, h2; + + if (IsX(h)) return 0; + + if (fvec[u].link == -1) { + return IrredBaseCase(h, fvec[u].q, fvec[u].a, F); + } + + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + return RecIrredTest(fvec[u].link, h2, F, fvec) + && RecIrredTest(fvec[u].link+1, h1, F, fvec); +} + +long DetIrredTest(const GF2EX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + GF2EXModulus F; + + build(F, f); + + GF2EX h; + + FrobeniusMap(h, F); + + GF2EX s; + PowerCompose(s, h, F.n, F); + if (!IsX(s)) return 0; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecIrredTest(fvec.length()-1, h, F, fvec); +} + + + +long IterIrredTest(const GF2EX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + GF2EXModulus F; + + build(F, f); + + GF2EX h; + + FrobeniusMap(h, F); + + long CompTableSize = 2*SqrRoot(deg(f)); + + GF2EXArgument H; + + build(H, h, F, CompTableSize); + + long i, d, limit, limit_sqr; + GF2EX g, X, t, prod; + + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = 2; + limit_sqr = limit*limit; + + set(prod); + + + while (2*d <= deg(f)) { + add(t, g, X); + MulMod(prod, prod, t, F); + i++; + if (i == limit_sqr) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + + set(prod); + limit++; + limit_sqr = limit*limit; + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + CompMod(g, g, H, F); + } + } + + if (i > 0) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + } + + return 1; +} + +static +void MulByXPlusY(vec_GF2EX& h, const GF2EX& f, const GF2EX& g) +// h represents the bivariate polynomial h[0] + h[1]*Y + ... + h[n-1]*Y^k, +// where the h[i]'s are polynomials in X, each of degree < deg(f), +// and k < deg(g). +// h is replaced by the bivariate polynomial h*(X+Y) (mod f(X), g(Y)). + +{ + long n = deg(g); + long k = h.length()-1; + + if (k < 0) return; + + if (k < n-1) { + h.SetLength(k+2); + h[k+1] = h[k]; + for (long i = k; i >= 1; i--) { + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + } + MulByXMod(h[0], h[0], f); + } + else { + GF2EX b, t; + + b = h[n-1]; + for (long i = n-1; i >= 1; i--) { + mul(t, b, g.rep[i]); + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + add(h[i], h[i], t); + } + mul(t, b, g.rep[0]); + MulByXMod(h[0], h[0], f); + add(h[0], h[0], t); + } + + // normalize + + k = h.length()-1; + while (k >= 0 && IsZero(h[k])) k--; + h.SetLength(k+1); +} + + +static +void IrredCombine(GF2EX& x, const GF2EX& f, const GF2EX& g) +{ + if (deg(f) < deg(g)) { + IrredCombine(x, g, f); + return; + } + + // deg(f) >= deg(g)...not necessary, but maybe a little more + // time & space efficient + + long df = deg(f); + long dg = deg(g); + long m = df*dg; + + vec_GF2EX h(INIT_SIZE, dg); + + long i; + for (i = 0; i < dg; i++) h[i].SetMaxLength(df); + + h.SetLength(1); + set(h[0]); + + vec_GF2E a; + + a.SetLength(2*m); + + for (i = 0; i < 2*m; i++) { + a[i] = ConstTerm(h[0]); + if (i < 2*m-1) + MulByXPlusY(h, f, g); + } + + MinPolySeq(x, a, m); +} + + +static +void BuildPrimePowerIrred(GF2EX& f, long q, long e) +{ + long n = power(q, e); + + do { + random(f, n); + SetCoeff(f, n); + } while (!IterIrredTest(f)); +} + +static +void RecBuildIrred(GF2EX& f, long u, const FacVec& fvec) +{ + if (fvec[u].link == -1) + BuildPrimePowerIrred(f, fvec[u].q, fvec[u].a); + else { + GF2EX g, h; + RecBuildIrred(g, fvec[u].link, fvec); + RecBuildIrred(h, fvec[u].link+1, fvec); + IrredCombine(f, g, h); + } +} + + +void BuildIrred(GF2EX& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in BuildIrred"); + + if (n == 1) { + SetX(f); + return; + } + + FacVec fvec; + + FactorInt(fvec, n); + + RecBuildIrred(f, fvec.length()-1, fvec); +} + + + +#if 0 +void BuildIrred(GF2EX& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in BuildIrred"); + + if (n == 1) { + SetX(f); + return; + } + + GF2EX g; + + do { + random(g, n); + SetCoeff(g, n); + } while (!IterIrredTest(g)); + + f = g; + +} +#endif + + + +void BuildRandomIrred(GF2EX& f, const GF2EX& g) +{ + GF2EXModulus G; + GF2EX h, ff; + + build(G, g); + do { + random(h, deg(g)); + IrredPolyMod(ff, h, G); + } while (deg(ff) < deg(g)); + + f = ff; +} + + +/************* NEW DDF ****************/ + +NTL_CHEAP_THREAD_LOCAL long GF2EX_GCDTableSize = 4; +NTL_CHEAP_THREAD_LOCAL double GF2EXFileThresh = NTL_FILE_THRESH; +static NTL_CHEAP_THREAD_LOCAL vec_GF2EX *BabyStepFile = 0; +static NTL_CHEAP_THREAD_LOCAL vec_GF2EX *GiantStepFile = 0; +static NTL_CHEAP_THREAD_LOCAL long use_files; + + +static +double CalcTableSize(long n, long k) +{ + double sz = GF2E::storage(); + sz = sz * n; + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_GF2E); + sz = sz * k; + sz = sz/1024; + return sz; +} + + + +static +void GenerateBabySteps(GF2EX& h1, const GF2EX& f, const GF2EX& h, long k, + FileList& flist, long verbose) + +{ + double t; + + if (verbose) { cerr << "generating baby steps..."; t = GetTime(); } + + GF2EXModulus F; + build(F, f); + + GF2EXArgument H; + +#if 0 + double n2 = sqrt(double(F.n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + + h1 = h; + + long i; + + GF2XHexOutputPush push; + GF2X::HexOutput = 1; + + if (!use_files) { + (*BabyStepFile).SetLength(k-1); + } + + for (i = 1; i <= k-1; i++) { + if (use_files) { + ofstream s; + OpenWrite(s, FileName("baby", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*BabyStepFile)(i) = h1; + + CompMod(h1, h1, H, F); + if (verbose) cerr << "+"; + } + + if (verbose) + cerr << (GetTime()-t) << "\n"; + +} + + +static +void GenerateGiantSteps(const GF2EX& f, const GF2EX& h, long l, + FileList& flist, long verbose) +{ + + double t; + + if (verbose) { cerr << "generating giant steps..."; t = GetTime(); } + + GF2EXModulus F; + build(F, f); + + GF2EXArgument H; + +#if 0 + double n2 = sqrt(double(F.n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + GF2EX h1; + + h1 = h; + + long i; + + GF2XHexOutputPush push; + GF2X::HexOutput = 1; + + if (!use_files) { + (*GiantStepFile).SetLength(l); + } + + for (i = 1; i <= l-1; i++) { + if (use_files) { + ofstream s; + OpenWrite(s, FileName("giant", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*GiantStepFile)(i) = h1; + + CompMod(h1, h1, H, F); + if (verbose) cerr << "+"; + } + + if (use_files) { + ofstream s; + OpenWrite(s, FileName("giant", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*GiantStepFile)(i) = h1; + + if (verbose) + cerr << (GetTime()-t) << "\n"; +} + +static +void NewAddFactor(vec_pair_GF2EX_long& u, const GF2EX& g, long m, long verbose) +{ + long len = u.length(); + + u.SetLength(len+1); + u[len].a = g; + u[len].b = m; + + if (verbose) { + cerr << "split " << m << " " << deg(g) << "\n"; + } +} + + + + +static +void NewProcessTable(vec_pair_GF2EX_long& u, GF2EX& f, const GF2EXModulus& F, + vec_GF2EX& buf, long size, long StartInterval, + long IntervalLength, long verbose) + +{ + if (size == 0) return; + + GF2EX& g = buf[size-1]; + + long i; + + for (i = 0; i < size-1; i++) + MulMod(g, g, buf[i], F); + + GCD(g, f, g); + + if (deg(g) == 0) return; + + div(f, f, g); + + long d = (StartInterval-1)*IntervalLength + 1; + i = 0; + long interval = StartInterval; + + while (i < size-1 && 2*d <= deg(g)) { + GCD(buf[i], buf[i], g); + if (deg(buf[i]) > 0) { + NewAddFactor(u, buf[i], interval, verbose); + div(g, g, buf[i]); + } + + i++; + interval++; + d += IntervalLength; + } + + if (deg(g) > 0) { + if (i == size-1) + NewAddFactor(u, g, interval, verbose); + else + NewAddFactor(u, g, (deg(g)+IntervalLength-1)/IntervalLength, verbose); + } +} + + +static +void FetchGiantStep(GF2EX& g, long gs, const GF2EXModulus& F) +{ + if (use_files) { + ifstream s; + OpenRead(s, FileName("giant", gs)); + NTL_INPUT_CHECK_ERR(s >> g); + } + else + g = (*GiantStepFile)(gs); + + rem(g, g, F); +} + + +static +void FetchBabySteps(vec_GF2EX& v, long k) +{ + v.SetLength(k); + + SetX(v[0]); + + long i; + for (i = 1; i <= k-1; i++) { + if (use_files) { + ifstream s; + OpenRead(s, FileName("baby", i)); + NTL_INPUT_CHECK_ERR(s >> v[i]); + } + else + v[i] = (*BabyStepFile)(i); + } +} + + + +static +void GiantRefine(vec_pair_GF2EX_long& u, const GF2EX& ff, long k, long l, + long verbose) + +{ + double t; + + if (verbose) { + cerr << "giant refine..."; + t = GetTime(); + } + + u.SetLength(0); + + vec_GF2EX BabyStep; + + FetchBabySteps(BabyStep, k); + + vec_GF2EX buf(INIT_SIZE, GF2EX_GCDTableSize); + + GF2EX f; + f = ff; + + GF2EXModulus F; + build(F, f); + + GF2EX g; + GF2EX h; + + long size = 0; + + long first_gs; + + long d = 1; + + while (2*d <= deg(f)) { + + long old_n = deg(f); + + long gs = (d+k-1)/k; + long bs = gs*k - d; + + if (bs == k-1) { + size++; + if (size == 1) first_gs = gs; + FetchGiantStep(g, gs, F); + add(buf[size-1], g, BabyStep[bs]); + } + else { + add(h, g, BabyStep[bs]); + MulMod(buf[size-1], buf[size-1], h, F); + } + + if (verbose && bs == 0) cerr << "+"; + + if (size == GF2EX_GCDTableSize && bs == 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + size = 0; + } + + d++; + + if (2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + + long i; + for (i = 1; i <= k-1; i++) + rem(BabyStep[i], BabyStep[i], F); + } + } + + if (size > 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + } + + if (deg(f) > 0) + NewAddFactor(u, f, 0, verbose); + + if (verbose) { + t = GetTime()-t; + cerr << "giant refine time: " << t << "\n"; + } +} + + +static +void IntervalRefine(vec_pair_GF2EX_long& factors, const GF2EX& ff, + long k, long gs, const vec_GF2EX& BabyStep, long verbose) + +{ + vec_GF2EX buf(INIT_SIZE, GF2EX_GCDTableSize); + + GF2EX f; + f = ff; + + GF2EXModulus F; + build(F, f); + + GF2EX g; + + FetchGiantStep(g, gs, F); + + long size = 0; + + long first_d; + + long d = (gs-1)*k + 1; + long bs = k-1; + + while (bs >= 0 && 2*d <= deg(f)) { + + long old_n = deg(f); + + if (size == 0) first_d = d; + rem(buf[size], BabyStep[bs], F); + add(buf[size], buf[size], g); + size++; + + if (size == GF2EX_GCDTableSize) { + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + size = 0; + } + + d++; + bs--; + + if (bs >= 0 && 2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + rem(g, g, F); + } + } + + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + + if (deg(f) > 0) + NewAddFactor(factors, f, deg(f), verbose); +} + + + + +static +void BabyRefine(vec_pair_GF2EX_long& factors, const vec_pair_GF2EX_long& u, + long k, long l, long verbose) + +{ + double t; + + if (verbose) { + cerr << "baby refine..."; + t = GetTime(); + } + + factors.SetLength(0); + + vec_GF2EX BabyStep; + + long i; + for (i = 0; i < u.length(); i++) { + const GF2EX& g = u[i].a; + long gs = u[i].b; + + if (gs == 0 || 2*((gs-1)*k+1) > deg(g)) + NewAddFactor(factors, g, deg(g), verbose); + else { + if (BabyStep.length() == 0) + FetchBabySteps(BabyStep, k); + IntervalRefine(factors, g, k, gs, BabyStep, verbose); + } + } + + if (verbose) { + t = GetTime()-t; + cerr << "baby refine time: " << t << "\n"; + } +} + + + + + + +void NewDDF(vec_pair_GF2EX_long& factors, + const GF2EX& f, + const GF2EX& h, + long verbose) + +{ + if (!IsOne(LeadCoeff(f))) + LogicError("NewDDF: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(0); + append(factors, cons(f, 1L)); + return; + } + + long B = deg(f)/2; + long k = SqrRoot(B); + long l = (B+k-1)/k; + + GF2EX h1; + + if (CalcTableSize(deg(f), k + l - 1) > GF2EXFileThresh) + use_files = 1; + else + use_files = 0; + + FileList flist; + + vec_GF2EX local_BabyStepFile; + vec_GF2EX local_GiantStepFile; + + BabyStepFile = &local_BabyStepFile; + GiantStepFile = &local_GiantStepFile; + + + GenerateBabySteps(h1, f, h, k, flist, verbose); + + GenerateGiantSteps(f, h1, l, flist, verbose); + + vec_pair_GF2EX_long u; + GiantRefine(u, f, k, l, verbose); + BabyRefine(factors, u, k, l, verbose); +} + +long IterComputeDegree(const GF2EX& h, const GF2EXModulus& F) +{ + long n = deg(F); + + if (n == 1 || IsX(h)) return 1; + + long B = n/2; + long k = SqrRoot(B); + long l = (B+k-1)/k; + + + GF2EXArgument H; + +#if 0 + double n2 = sqrt(double(n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + GF2EX h1; + h1 = h; + + vec_GF2EX baby; + baby.SetLength(k); + + SetX(baby[0]); + + long i; + + for (i = 1; i <= k-1; i++) { + baby[i] = h1; + CompMod(h1, h1, H, F); + if (IsX(h1)) return i+1; + } + + build(H, h1, F, sz); + + long j; + + for (j = 2; j <= l; j++) { + CompMod(h1, h1, H, F); + + for (i = k-1; i >= 0; i--) { + if (h1 == baby[i]) + return j*k-i; + } + } + + return n; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GF2EXTest.c b/thirdparty/linux/ntl/src/GF2EXTest.c new file mode 100644 index 0000000000..67e24df2c1 --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2EXTest.c @@ -0,0 +1,133 @@ + +#include +#include + + +NTL_OPEN_NNS + +void PlainMul(GF2EX&, const GF2EX&, const GF2EX&); + +NTL_CLOSE_NNS + +NTL_CLIENT + +int main() +{ + GF2X p; + + BuildIrred(p, 200); + + GF2E::init(p); + + GF2EX f; + + SetCoeff(f, 41); + SetCoeff(f, 1); + SetCoeff(f, 0); + + GF2X a; + SetCoeff(a, 117); + SetCoeff(a, 10); + SetCoeff(a, 0); + + GF2EX g, h; + SetX(g); + SetCoeff(g, 0, to_GF2E(a)); + + MinPolyMod(h, g, f); + + f = h; + + vec_pair_GF2EX_long u; + + CanZass(u, f, 1); + + cerr << "factorization pattern:"; + long i; + + for (i = 0; i < u.length(); i++) { + cerr << " "; + long k = u[i].b; + if (k > 1) + cerr << k << "*"; + cerr << deg(u[i].a); + } + + cerr << "\n\n\n"; + + GF2EX ff; + mul(ff, u); + + if (f != ff || u.length() != 11) { + cerr << "GF2EXTest NOT OK\n"; + return 1; + } + + { + + cerr << "multiplication test...\n"; + + BuildIrred(p, 512); + GF2E::init(p); + + GF2EX A, B, C, C1; + + + random(A, 512); + random(B, 512); + + double t; + long i; + + t = GetTime(); + for (i = 0; i < 10; i++) PlainMul(C, A, B); + t = GetTime() - t; + cerr << "time for plain mul of degree 511 over GF(2^512): " << (t/10) << "s\n"; + + t = GetTime(); + for (i = 0; i < 10; i++) mul(C1, A, B); + t = GetTime() - t; + cerr << "time for karatsuba mul of degree 511 over GF(2^512): " << (t/10) << "s\n"; + + if (C != C1) { + cerr << "GF2EXTest NOT OK\n"; + return 1; + } + + } + + { + + cerr << "multiplication test...\n"; + + BuildIrred(p, 16); + GF2E::init(p); + + GF2EX A, B, C, C1; + + + random(A, 512); + random(B, 512); + + double t; + + t = GetTime(); + for (i = 0; i < 10; i++) PlainMul(C, A, B); + t = GetTime() - t; + cerr << "time for plain mul of degree 511 over GF(2^16): " << (t/10) << "s\n"; + + t = GetTime(); + for (i = 0; i < 10; i++) mul(C1, A, B); + t = GetTime() - t; + cerr << "time for karatsuba mul of degree 511 over GF(2^16): " << (t/10) << "s\n"; + + if (C != C1) { + cerr << "GF2EXTest NOT OK\n"; + return 1; + } + + } + + cerr << "GF2EXTest OK\n"; + return 0; +} diff --git a/thirdparty/linux/ntl/src/GF2X.c b/thirdparty/linux/ntl/src/GF2X.c new file mode 100644 index 0000000000..3bb7c533bf --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2X.c @@ -0,0 +1,2038 @@ + +#include +#include + +#include +#include + +#if (defined(NTL_WIZARD_HACK) && defined(NTL_GF2X_LIB)) +#undef NTL_GF2X_LIB +#endif + +#ifdef NTL_GF2X_LIB +#include +#endif + + +#ifdef NTL_PCLMUL + +#if (NTL_BITS_PER_LONG != 64) +#error "NTL_PCLMUL only works on 64-bit machines" +#endif + +#include + +#define NTL_INLINE inline + +static inline void +pclmul_mul1 (unsigned long *c, unsigned long a, unsigned long b) +{ + __m128i aa = _mm_setr_epi64( _mm_cvtsi64_m64(a), _mm_cvtsi64_m64(0)); + __m128i bb = _mm_setr_epi64( _mm_cvtsi64_m64(b), _mm_cvtsi64_m64(0)); + _mm_storeu_si128((__m128i*)c, _mm_clmulepi64_si128(aa, bb, 0)); +} +#else + + +#define NTL_INLINE + +#endif + +NTL_START_IMPL + +NTL_CHEAP_THREAD_LOCAL +long GF2X::HexOutput = 0; + + +void GF2X::SetMaxLength(long n) +{ + if (n < 0) LogicError("GF2X::SetMaxLength: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("GF2X::SetMaxLength: excessive length"); + long w = (n + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + xrep.SetMaxLength(w); +} + +GF2X::GF2X(INIT_SIZE_TYPE, long n) +{ + SetMaxLength(n); +} + + + +const GF2X& GF2X::zero() +{ + static const GF2X z; // GLOBAL (assumes C++11 thread-safe init) + return z; +} + +void GF2X::normalize() +{ + long n; + const _ntl_ulong *p; + + n = xrep.length(); + if (n == 0) return; + p = xrep.elts() + n; + while (n > 0 && (*--p) == 0) { + n--; + } + xrep.QuickSetLength(n); +} + + + +void GF2X::SetLength(long n) +{ + if (n < 0) { + LogicError("SetLength: negative index"); + return; // NOTE: this helps the compiler optimize + } + + long w = (n + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + long old_w = xrep.length(); + + xrep.SetLength(w); + + long i; + + if (w > old_w) { + // zero out new words + + for (i = old_w; i < w; i++) + xrep[i] = 0; + } + else { + // zero out high order bits of last word + + + long wi = n/NTL_BITS_PER_LONG; + long bi = n - wi*NTL_BITS_PER_LONG; + + if (bi == 0) return; + unsigned long mask = (1UL << bi) - 1UL; + xrep[wi] &= mask; + } +} + + +ref_GF2 GF2X::operator[](long i) +{ + if (i < 0) LogicError("GF2X: subscript out of range"); + long wi = i/NTL_BITS_PER_LONG; + if (wi >= xrep.length()) LogicError("GF2X: subscript out of range"); + long bi = i - wi*NTL_BITS_PER_LONG; + return ref_GF2(INIT_LOOP_HOLE, &xrep[wi], bi); +} + + +const GF2 GF2X::operator[](long i) const +{ + if (i < 0) LogicError("GF2X: subscript out of range"); + long wi = i/NTL_BITS_PER_LONG; + if (wi >= xrep.length()) LogicError("GF2X: subscript out of range"); + long bi = i - wi*NTL_BITS_PER_LONG; + return to_GF2((xrep[wi] & (1UL << bi)) != 0); +} + + + + + +long IsZero(const GF2X& a) + { return a.xrep.length() == 0; } + +long IsOne(const GF2X& a) + { return a.xrep.length() == 1 && a.xrep[0] == 1; } + + + + + + + +long IsX(const GF2X& a) +{ + return a.xrep.length() == 1 && a.xrep[0] == 2; +} + +const GF2 coeff(const GF2X& a, long i) +{ + if (i < 0) return to_GF2(0); + long wi = i/NTL_BITS_PER_LONG; + if (wi >= a.xrep.length()) return to_GF2(0); + long bi = i - wi*NTL_BITS_PER_LONG; + + return to_GF2((a.xrep[wi] & (1UL << bi)) != 0); +} + +const GF2 LeadCoeff(const GF2X& a) +{ + if (IsZero(a)) + return to_GF2(0); + else + return to_GF2(1); +} + +const GF2 ConstTerm(const GF2X& a) +{ + if (IsZero(a)) + return to_GF2(0); + else + return to_GF2((a.xrep[0] & 1) != 0); +} + + +void set(GF2X& x) +{ + x.xrep.SetLength(1); + x.xrep[0] = 1; +} + +void SetX(GF2X& x) +{ + x.xrep.SetLength(1); + x.xrep[0] = 2; +} + +void SetCoeff(GF2X& x, long i) +{ + if (i < 0) { + LogicError("SetCoeff: negative index"); + return; // NOTE: helpse the compiler optimize + } + + long n, j; + + n = x.xrep.length(); + long wi = i/NTL_BITS_PER_LONG; + + if (wi >= n) { + x.xrep.SetLength(wi+1); + for (j = n; j <= wi; j++) + x.xrep[j] = 0; + } + + long bi = i - wi*NTL_BITS_PER_LONG; + + x.xrep[wi] |= (1UL << bi); +} + + + +void SetCoeff(GF2X& x, long i, long val) +{ + if (i < 0) { + LogicError("SetCoeff: negative index"); + return; // NOTE: helps the compiler optimize + } + + val = val & 1; + + if (val) { + SetCoeff(x, i); + return; + } + + // we want to clear position i + + long n; + + n = x.xrep.length(); + long wi = i/NTL_BITS_PER_LONG; + + if (wi >= n) + return; + + long bi = i - wi*NTL_BITS_PER_LONG; + + x.xrep[wi] &= ~(1UL << bi); + if (wi == n-1 && !x.xrep[wi]) x.normalize(); +} + +void SetCoeff(GF2X& x, long i, GF2 a) +{ + SetCoeff(x, i, rep(a)); +} + + + +long deg(const GF2X& aa) +{ + long n = aa.xrep.length(); + + if (n == 0) + return -1; + + _ntl_ulong a = aa.xrep[n-1]; + long i = 0; + + if (a == 0) LogicError("GF2X: unnormalized polynomial detected in deg"); + + while (a>=256) + i += 8, a >>= 8; + if (a >=16) + i += 4, a >>= 4; + if (a >= 4) + i += 2, a >>= 2; + if (a >= 2) + i += 2; + else if (a >= 1) + i++; + + return NTL_BITS_PER_LONG*(n-1) + i - 1; +} + + +long operator==(const GF2X& a, const GF2X& b) +{ + return a.xrep == b.xrep; +} + +long operator==(const GF2X& a, long b) +{ + if (b & 1) + return IsOne(a); + else + return IsZero(a); +} + +long operator==(const GF2X& a, GF2 b) +{ + if (b == 1) + return IsOne(a); + else + return IsZero(a); +} + +static +istream & HexInput(istream& s, GF2X& a) +{ + long n; + long c; + long i; + long val; + GF2X ibuf; + + n = 0; + clear(ibuf); + + c = s.peek(); + val = CharToIntVal(c); + while (val != -1) { + for (i = 0; i < 4; i++) + if (val & (1L << i)) + SetCoeff(ibuf, n+i); + + n += 4; + s.get(); + c = s.peek(); + val = CharToIntVal(c); + } + + a = ibuf; + return s; +} + + + + + + + +istream & operator>>(istream& s, GF2X& a) +{ + NTL_ZZRegister(ival); + + long c; + if (!s) NTL_INPUT_ERROR(s, "bad GF2X input"); + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + if (c == '0') { + s.get(); + c = s.peek(); + if (c == 'x' || c == 'X') { + s.get(); + return HexInput(s, a); + } + else { + NTL_INPUT_ERROR(s, "bad GF2X input"); + } + } + + if (c != '[') { + NTL_INPUT_ERROR(s, "bad GF2X input"); + } + + GF2X ibuf; + long n; + + n = 0; + clear(ibuf); + + s.get(); + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + while (c != ']' && c != EOF) { + if (!(s >> ival)) NTL_INPUT_ERROR(s, "bad GF2X input"); + SetCoeff(ibuf, n, to_GF2(ival)); + n++; + + c = s.peek(); + + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + } + + if (c == EOF) NTL_INPUT_ERROR(s, "bad GF2X input"); + s.get(); + + a = ibuf; + return s; +} + + + +static +ostream & HexOutput(ostream& s, const GF2X& a) +{ + s << "0x"; + + long da = deg(a); + + if (da < 0) { + s << '0'; + return s; + } + + long i, n, val; + + val = 0; + n = 0; + for (i = 0; i <= da; i++) { + val = val | (rep(coeff(a, i)) << n); + n++; + + if (n == 4) { + s << IntValToChar(val); + val = 0; + n = 0; + } + } + + if (val) + s << IntValToChar(val); + + return s; +} + + +ostream& operator<<(ostream& s, const GF2X& a) +{ + if (GF2X::HexOutput) + return HexOutput(s, a); + + long i, da; + GF2 c; + + da = deg(a); + + s << '['; + + for (i = 0; i <= da; i++) { + c = coeff(a, i); + if (c == 0) + s << "0"; + else + s << "1"; + if (i < da) s << " "; + } + + s << ']'; + + return s; +} + +void random(GF2X& x, long n) +{ + if (n < 0) LogicError("GF2X random: negative length"); + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("GF2X random: excessive length"); + + long wl = (n+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; + + x.xrep.SetLength(wl); + + long i; + for (i = 0; i < wl-1; i++) { + x.xrep[i] = RandomWord(); + } + + if (n > 0) { + long pos = n % NTL_BITS_PER_LONG; + if (pos == 0) pos = NTL_BITS_PER_LONG; + x.xrep[wl-1] = RandomBits_ulong(pos); + } + + x.normalize(); +} + +void add(GF2X& x, const GF2X& a, const GF2X& b) +{ + long sa = a.xrep.length(); + long sb = b.xrep.length(); + + long i; + + if (sa == sb) { + x.xrep.SetLength(sa); + if (sa == 0) return; + + _ntl_ulong *xp = x.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + const _ntl_ulong *bp = b.xrep.elts(); + + for (i = 0; i < sa; i++) + xp[i] = ap[i] ^ bp[i]; + + i = sa-1; + while (i >= 0 && !xp[i]) i--; + x.xrep.QuickSetLength(i+1); + } + + else if (sa < sb) { + x.xrep.SetLength(sb); + _ntl_ulong *xp = x.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + const _ntl_ulong *bp = b.xrep.elts(); + + for (i = 0; i < sa; i++) + xp[i] = ap[i] ^ bp[i]; + + for (; i < sb; i++) + xp[i] = bp[i]; + } + else { // sa > sb + x.xrep.SetLength(sa); + _ntl_ulong *xp = x.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + const _ntl_ulong *bp = b.xrep.elts(); + + for (i = 0; i < sb; i++) + xp[i] = ap[i] ^ bp[i]; + + for (; i < sa; i++) + xp[i] = ap[i]; + } +} + + + + + +/* + * The bodies of mul1, Mul1, AddMul1, and mul_half + * are generated by the MakeDesc program, and the + * macros NTL_BB_MUL_CODE... are defined in mach_desc.h. + * Thanks to Paul Zimmermann for providing improvements + * to this approach. + */ + + +#if (defined(NTL_GF2X_ALTCODE1)) + +#define NTL_EFF_BB_MUL_CODE0 NTL_ALT1_BB_MUL_CODE0 +#define NTL_EFF_BB_MUL_CODE1 NTL_ALT1_BB_MUL_CODE1 +#define NTL_EFF_BB_MUL_CODE2 NTL_ALT1_BB_MUL_CODE2 +#define NTL_EFF_SHORT_BB_MUL_CODE1 NTL_ALT1_SHORT_BB_MUL_CODE1 +#define NTL_EFF_HALF_BB_MUL_CODE0 NTL_ALT1_HALF_BB_MUL_CODE0 + +#elif (defined(NTL_GF2X_ALTCODE)) + +#define NTL_EFF_BB_MUL_CODE0 NTL_ALT_BB_MUL_CODE0 +#define NTL_EFF_BB_MUL_CODE1 NTL_ALT_BB_MUL_CODE1 +#define NTL_EFF_BB_MUL_CODE2 NTL_ALT_BB_MUL_CODE2 +#define NTL_EFF_SHORT_BB_MUL_CODE1 NTL_ALT_SHORT_BB_MUL_CODE1 +#define NTL_EFF_HALF_BB_MUL_CODE0 NTL_ALT_HALF_BB_MUL_CODE0 + +#else + +#define NTL_EFF_BB_MUL_CODE0 NTL_BB_MUL_CODE0 +#define NTL_EFF_BB_MUL_CODE1 NTL_BB_MUL_CODE1 +#define NTL_EFF_BB_MUL_CODE2 NTL_BB_MUL_CODE2 +#define NTL_EFF_SHORT_BB_MUL_CODE1 NTL_SHORT_BB_MUL_CODE1 +#define NTL_EFF_HALF_BB_MUL_CODE0 NTL_HALF_BB_MUL_CODE0 + +#endif + + + +static +void mul1(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b) +{ + +#ifdef NTL_PCLMUL +pclmul_mul1(c, a, b); +#else +NTL_EFF_BB_MUL_CODE0 +#endif + + +} + + +#ifdef NTL_GF2X_NOINLINE + +#define mul1_IL mul1 + +#else + +static inline +void mul1_inline(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b) +{ + +#ifdef NTL_PCLMUL +pclmul_mul1(c, a, b); +#else +NTL_EFF_BB_MUL_CODE0 +#endif + + +} + +#define mul1_IL mul1_inline +#endif + + +static +void Mul1(_ntl_ulong *cp, const _ntl_ulong *bp, long sb, _ntl_ulong a) +{ + +#ifdef NTL_PCLMUL + + long i; + unsigned long carry, prod[2]; + + carry = 0; + for (i = 0; i < sb; i++) { + pclmul_mul1(prod, bp[i], a); + cp[i] = carry ^ prod[0]; + carry = prod[1]; + } + + cp[sb] = carry; + +#else + +NTL_EFF_BB_MUL_CODE1 + +#endif + + +} + +static +void AddMul1(_ntl_ulong *cp, const _ntl_ulong* bp, long sb, _ntl_ulong a) +{ + +#ifdef NTL_PCLMUL + + long i; + unsigned long carry, prod[2]; + + carry = 0; + for (i = 0; i < sb; i++) { + pclmul_mul1(prod, bp[i], a); + cp[i] ^= carry ^ prod[0]; + carry = prod[1]; + } + + cp[sb] ^= carry; + +#else + +NTL_EFF_BB_MUL_CODE2 + +#endif + + + +} + + +static +void Mul1_short(_ntl_ulong *cp, const _ntl_ulong *bp, long sb, _ntl_ulong a) +{ + +#ifdef NTL_PCLMUL + + long i; + unsigned long carry, prod[2]; + + carry = 0; + for (i = 0; i < sb; i++) { + pclmul_mul1(prod, bp[i], a); + cp[i] = carry ^ prod[0]; + carry = prod[1]; + } + + cp[sb] = carry; + +#else + +NTL_EFF_SHORT_BB_MUL_CODE1 + +#endif + + +} + + + + + +static +void mul_half(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b) +{ + +#ifdef NTL_PCLMUL +pclmul_mul1(c, a, b); +#else +NTL_EFF_HALF_BB_MUL_CODE0 +#endif + + +} + + +// mul2...mul8 hard-code 2x2...8x8 word multiplies. +// I adapted these routines from LiDIA (except mul3, see below). +// NOTE: Generally, inlining these functions seems to hurt performance, +// at least when using software mul1; for hardware mul1, I've +// switched to making mul2, 3, 4 inline, although this should +// really be profiled. + +static NTL_INLINE +void mul2(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) +{ + _ntl_ulong hs0, hs1; + _ntl_ulong hl2[2]; + + hs0 = a[0] ^ a[1]; + hs1 = b[0] ^ b[1]; + + mul1_IL(c, a[0], b[0]); + mul1_IL(c+2, a[1], b[1]); + mul1_IL(hl2, hs0, hs1); + + + hl2[0] = hl2[0] ^ c[0] ^ c[2]; + hl2[1] = hl2[1] ^ c[1] ^ c[3]; + + c[1] ^= hl2[0]; + c[2] ^= hl2[1]; +} + + +/* + * This version of mul3 I got from Weimerskirch, Stebila, + * and Shantz, "Generic GF(2^m) arithmetic in software + * an its application to ECC" (ACISP 2003). + */ + +static NTL_INLINE +void mul3 (_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) +{ + _ntl_ulong d0[2], d1[2], d2[2], d01[2], d02[2], d12[2]; + + mul1_IL(d0, a[0], b[0]); + mul1_IL(d1, a[1], b[1]); + mul1_IL(d2, a[2], b[2]); + mul1_IL(d01, a[0]^a[1], b[0]^b[1]); + mul1_IL(d02, a[0]^a[2], b[0]^b[2]); + mul1_IL(d12, a[1]^a[2], b[1]^b[2]); + + + c[0] = d0[0]; + c[1] = d0[1] ^ d01[0] ^ d1[0] ^ d0[0]; + c[2] = d01[1] ^ d1[1] ^ d0[1] ^ d02[0] ^ d2[0] ^ d0[0] ^ d1[0]; + c[3] = d02[1] ^ d2[1] ^ d0[1] ^ d1[1] ^ d12[0] ^ d1[0] ^ d2[0]; + c[4] = d12[1] ^ d1[1] ^ d2[1] ^ d2[0]; + c[5] = d2[1]; + +} + +static NTL_INLINE +void mul4(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) +{ + _ntl_ulong hs0[2], hs1[2]; + _ntl_ulong hl2[4]; + + hs0[0] = a[0] ^ a[2]; + hs0[1] = a[1] ^ a[3]; + hs1[0] = b[0] ^ b[2]; + hs1[1] = b[1] ^ b[3]; + + mul2(c, a, b); + mul2(c+4, a+2, b+2); + mul2(hl2, hs0, hs1); + + hl2[0] = hl2[0] ^ c[0] ^ c[4]; + hl2[1] = hl2[1] ^ c[1] ^ c[5]; + hl2[2] = hl2[2] ^ c[2] ^ c[6]; + hl2[3] = hl2[3] ^ c[3] ^ c[7]; + + c[2] ^= hl2[0]; + c[3] ^= hl2[1]; + c[4] ^= hl2[2]; + c[5] ^= hl2[3]; +} + +static +void mul5 (_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) +{ + _ntl_ulong hs0[3], hs1[3]; + _ntl_ulong hl2[6]; + + hs0[0] = a[0] ^ a[3]; + hs0[1] = a[1] ^ a[4]; + hs0[2] = a[2]; + hs1[0] = b[0] ^ b[3]; + hs1[1] = b[1] ^ b[4]; + hs1[2] = b[2]; + + mul3(c, a, b); + mul3(hl2, hs0, hs1); + mul2(c+6, a+3, b+3); + + hl2[0] = hl2[0] ^ c[0] ^ c[6]; + hl2[1] = hl2[1] ^ c[1] ^ c[7]; + hl2[2] = hl2[2] ^ c[2] ^ c[8]; + hl2[3] = hl2[3] ^ c[3] ^ c[9]; + hl2[4] = hl2[4] ^ c[4]; + hl2[5] = hl2[5] ^ c[5]; + + + c[3] ^= hl2[0]; + c[4] ^= hl2[1]; + c[5] ^= hl2[2]; + c[6] ^= hl2[3]; + c[7] ^= hl2[4]; + c[8] ^= hl2[5]; +} + +static +void mul6(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) +{ + _ntl_ulong hs0[3], hs1[3]; + _ntl_ulong hl2[6]; + + hs0[0] = a[0] ^ a[3]; + hs0[1] = a[1] ^ a[4]; + hs0[2] = a[2] ^ a[5]; + hs1[0] = b[0] ^ b[3]; + hs1[1] = b[1] ^ b[4]; + hs1[2] = b[2] ^ b[5]; + + mul3(c, a, b); + mul3(c+6, a+3, b+3); + mul3(hl2, hs0, hs1); + + hl2[0] = hl2[0] ^ c[0] ^ c[6]; + hl2[1] = hl2[1] ^ c[1] ^ c[7]; + hl2[2] = hl2[2] ^ c[2] ^ c[8]; + hl2[3] = hl2[3] ^ c[3] ^ c[9]; + hl2[4] = hl2[4] ^ c[4] ^ c[10]; + hl2[5] = hl2[5] ^ c[5] ^ c[11]; + + c[3] ^= hl2[0]; + c[4] ^= hl2[1]; + c[5] ^= hl2[2]; + c[6] ^= hl2[3]; + c[7] ^= hl2[4]; + c[8] ^= hl2[5]; +} + +static +void mul7(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) +{ + _ntl_ulong hs0[4], hs1[4]; + _ntl_ulong hl2[8]; + + hs0[0] = a[0] ^ a[4]; + hs0[1] = a[1] ^ a[5]; + hs0[2] = a[2] ^ a[6]; + hs0[3] = a[3]; + hs1[0] = b[0] ^ b[4]; + hs1[1] = b[1] ^ b[5]; + hs1[2] = b[2] ^ b[6]; + hs1[3] = b[3]; + + mul4(c, a, b); + mul4(hl2, hs0, hs1); + mul3(c+8, a+4, b+4); + + hl2[0] = hl2[0] ^ c[0] ^ c[8]; + hl2[1] = hl2[1] ^ c[1] ^ c[9]; + hl2[2] = hl2[2] ^ c[2] ^ c[10]; + hl2[3] = hl2[3] ^ c[3] ^ c[11]; + hl2[4] = hl2[4] ^ c[4] ^ c[12]; + hl2[5] = hl2[5] ^ c[5] ^ c[13]; + hl2[6] = hl2[6] ^ c[6]; + hl2[7] = hl2[7] ^ c[7]; + + c[4] ^= hl2[0]; + c[5] ^= hl2[1]; + c[6] ^= hl2[2]; + c[7] ^= hl2[3]; + c[8] ^= hl2[4]; + c[9] ^= hl2[5]; + c[10] ^= hl2[6]; + c[11] ^= hl2[7]; +} + +static +void mul8(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) +{ + _ntl_ulong hs0[4], hs1[4]; + _ntl_ulong hl2[8]; + + hs0[0] = a[0] ^ a[4]; + hs0[1] = a[1] ^ a[5]; + hs0[2] = a[2] ^ a[6]; + hs0[3] = a[3] ^ a[7]; + hs1[0] = b[0] ^ b[4]; + hs1[1] = b[1] ^ b[5]; + hs1[2] = b[2] ^ b[6]; + hs1[3] = b[3] ^ b[7]; + + mul4(c, a, b); + mul4(c+8, a+4, b+4); + mul4(hl2, hs0, hs1); + + hl2[0] = hl2[0] ^ c[0] ^ c[8]; + hl2[1] = hl2[1] ^ c[1] ^ c[9]; + hl2[2] = hl2[2] ^ c[2] ^ c[10]; + hl2[3] = hl2[3] ^ c[3] ^ c[11]; + hl2[4] = hl2[4] ^ c[4] ^ c[12]; + hl2[5] = hl2[5] ^ c[5] ^ c[13]; + hl2[6] = hl2[6] ^ c[6] ^ c[14]; + hl2[7] = hl2[7] ^ c[7] ^ c[15]; + + c[4] ^= hl2[0]; + c[5] ^= hl2[1]; + c[6] ^= hl2[2]; + c[7] ^= hl2[3]; + c[8] ^= hl2[4]; + c[9] ^= hl2[5]; + c[10] ^= hl2[6]; + c[11] ^= hl2[7]; +} + +static +void KarMul(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b, + long len, _ntl_ulong *stk) +{ + if (len <= 8) { + switch (len) { + case 1: mul1(c, a[0], b[0]); break; + case 2: mul2(c, a, b); break; + case 3: mul3(c, a, b); break; + case 4: mul4(c, a, b); break; + case 5: mul5(c, a, b); break; + case 6: mul6(c, a, b); break; + case 7: mul7(c, a, b); break; + case 8: mul8(c, a, b); break; + } + + return; + } + + long ll, lh, i, ll2, lh2; + const _ntl_ulong *a0, *a1, *b0, *b1; + _ntl_ulong *a01, *b01, *h; + + + lh = len >> 1; + ll = (len+1) >> 1; + + ll2 = ll << 1; + lh2 = lh << 1; + + a01 = stk; stk += ll+1; + b01 = stk; stk += ll+1; + h = stk; stk += ll2+1; + + a0 = a; + a1 = a+ll; + b0 = b; + b1 = b+ll; + + KarMul(c, a0, b0, ll, stk); + KarMul(c+ll2, a1, b1, lh, stk); + + for (i = 0; i < lh; i++) { + a01[i] = a[i] ^ a[i+ll]; + b01[i] = b[i] ^ b[i+ll]; + } + + if (lh < ll) { + a01[lh] = a[lh]; + b01[lh] = b[lh]; + } + + KarMul(h, a01, b01, ll, stk); + + for (i = 0; i < ll2; i++) + h[i] ^= c[i]; + + for (i = 0; i < lh2; i++) + h[i] ^= c[i+ll2]; + + for (i = 0; i < ll2; i++) + c[i+ll] ^= h[i]; +} + + + +#ifdef NTL_GF2X_LIB + + +void mul(GF2X& c, const GF2X& a, const GF2X& b) +{ + long sa = a.xrep.length(); + long sb = b.xrep.length(); + + if (sa <= 0 || sb <= 0) { + clear(c); + return; + } + + _ntl_ulong a0 = a.xrep[0]; + _ntl_ulong b0 = b.xrep[0]; + + if (sb == 1 && b0 == 1) { + c = a; + return; + } + + if (sa == 1 && a0 == 1) { + c = b; + return; + } + + if (&a == &b) { + sqr(c, a); + return; + } + + // finally: the general case + + + NTL_TLS_LOCAL(WordVector, mem); + WordVectorWatcher watch_mem(mem); + + const _ntl_ulong *ap = a.xrep.elts(), *bp = b.xrep.elts(); + _ntl_ulong *cp; + + + long sc = sa + sb; + long in_mem = 0; + + if (&a == &c || &b == &c) { + mem.SetLength(sc); + cp = mem.elts(); + in_mem = 1; + } + else { + c.xrep.SetLength(sc); + cp = c.xrep.elts(); + } + + gf2x_mul(cp, ap, sa, bp, sb); + + if (in_mem) { + c.xrep = mem; + } + + c.normalize(); +} +#else +void OldMul(GF2X& c, const GF2X& a, const GF2X& b) +{ + mul(c, a, b); +} +#endif + + + +#ifdef NTL_GF2X_LIB +void OldMul(GF2X& c, const GF2X& a, const GF2X& b) +#else +void mul(GF2X& c, const GF2X& a, const GF2X& b) +#endif +{ + long sa = a.xrep.length(); + long sb = b.xrep.length(); + + if (sa <= 0 || sb <= 0) { + clear(c); + return; + } + + _ntl_ulong a0 = a.xrep[0]; + _ntl_ulong b0 = b.xrep[0]; + + if (sb == 1 && b0 == 1) { + c = a; + return; + } + + if (sa == 1 && a0 == 1) { + c = b; + return; + } + + if (&a == &b) { + sqr(c, a); + return; + } + + if (sa == sb && sa <= 8) { + // we treat these cases specially for efficiency reasons + + switch (sa) { + case 1: { + _ntl_ulong v[2]; + if (!(a0 >> NTL_BITS_PER_LONG/2)) + mul_half(v, b0, a0); + else if (!(b0 >> NTL_BITS_PER_LONG/2)) + mul_half(v, a0, b0); + else + mul1(v, a0, b0); + + if (v[1]) { + c.xrep.SetLength(2); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + } + else { + c.xrep.SetLength(1); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + } + } + return; + + case 2: { + _ntl_ulong v[4]; + mul2(v, &a.xrep[0], &b.xrep[0]); + if (v[3]) { + c.xrep.SetLength(4); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + } + else { + c.xrep.SetLength(3); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + } + } + return; + + case 3: { + _ntl_ulong v[6]; + mul3(v, &a.xrep[0], &b.xrep[0]); + if (v[5]) { + c.xrep.SetLength(6); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + } + else { + c.xrep.SetLength(5); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + } + } + return; + + case 4: { + _ntl_ulong v[8]; + mul4(v, &a.xrep[0], &b.xrep[0]); + if (v[7]) { + c.xrep.SetLength(8); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + } + else { + c.xrep.SetLength(7); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + } + } + return; + + case 5: { + _ntl_ulong v[10]; + mul5(v, &a.xrep[0], &b.xrep[0]); + if (v[9]) { + c.xrep.SetLength(10); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + cp[8] = v[8]; + cp[9] = v[9]; + } + else { + c.xrep.SetLength(9); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + cp[8] = v[8]; + } + } + return; + + case 6: { + _ntl_ulong v[12]; + mul6(v, &a.xrep[0], &b.xrep[0]); + if (v[11]) { + c.xrep.SetLength(12); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + cp[8] = v[8]; + cp[9] = v[9]; + cp[10] = v[10]; + cp[11] = v[11]; + } + else { + c.xrep.SetLength(11); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + cp[8] = v[8]; + cp[9] = v[9]; + cp[10] = v[10]; + } + } + return; + + case 7: { + _ntl_ulong v[14]; + mul7(v, &a.xrep[0], &b.xrep[0]); + if (v[13]) { + c.xrep.SetLength(14); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + cp[8] = v[8]; + cp[9] = v[9]; + cp[10] = v[10]; + cp[11] = v[11]; + cp[12] = v[12]; + cp[13] = v[13]; + } + else { + c.xrep.SetLength(13); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + cp[8] = v[8]; + cp[9] = v[9]; + cp[10] = v[10]; + cp[11] = v[11]; + cp[12] = v[12]; + } + } + return; + + case 8: { + _ntl_ulong v[16]; + mul8(v, &a.xrep[0], &b.xrep[0]); + if (v[15]) { + c.xrep.SetLength(16); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + cp[8] = v[8]; + cp[9] = v[9]; + cp[10] = v[10]; + cp[11] = v[11]; + cp[12] = v[12]; + cp[13] = v[13]; + cp[14] = v[14]; + cp[15] = v[15]; + } + else { + c.xrep.SetLength(15); + _ntl_ulong *cp = &c.xrep[0]; + cp[0] = v[0]; + cp[1] = v[1]; + cp[2] = v[2]; + cp[3] = v[3]; + cp[4] = v[4]; + cp[5] = v[5]; + cp[6] = v[6]; + cp[7] = v[7]; + cp[8] = v[8]; + cp[9] = v[9]; + cp[10] = v[10]; + cp[11] = v[11]; + cp[12] = v[12]; + cp[13] = v[13]; + cp[14] = v[14]; + } + } + return; + + } + } + + // another special case: one of the two inputs + // has length 1 (or less). + + if (sa == 1) { + c.xrep.SetLength(sb + 1); + _ntl_ulong *cp = c.xrep.elts(); + const _ntl_ulong *bp = b.xrep.elts(); + + if (a0 >> (NTL_BITS_PER_LONG-NTL_BB_MUL1_BITS+1)) + Mul1(cp, bp, sb, a0); + else + Mul1_short(cp, bp, sb, a0); + + + c.normalize(); + return; + } + + if (sb == 1) { + c.xrep.SetLength(sa + 1); + _ntl_ulong *cp = c.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + + + if (b0 >> (NTL_BITS_PER_LONG-NTL_BB_MUL1_BITS+1)) + Mul1(cp, ap, sa, b0); + else + Mul1_short(cp, ap, sa, b0); + + c.normalize(); + return; + } + + // finally: the general case + + + NTL_TLS_LOCAL(WordVector, mem); + NTL_TLS_LOCAL(WordVector, stk); + NTL_TLS_LOCAL(WordVector, vec); + + WordVectorWatcher watch_mem(mem); + WordVectorWatcher watch_stk(stk); + WordVectorWatcher watch_vec(vec); + + const _ntl_ulong *ap, *bp; + _ntl_ulong *cp; + + + long sc = sa + sb; + long in_mem = 0; + + if (&a == &c || &b == &c) { + mem.SetLength(sc); + cp = mem.elts(); + in_mem = 1; + } + else { + c.xrep.SetLength(sc); + cp = c.xrep.elts(); + } + + + long n, hn, sp; + + n = min(sa, sb); + sp = 0; + while (n > 8) { + hn = (n+1) >> 1; + sp += (hn << 2) + 3; + n = hn; + } + + stk.SetLength(sp); + _ntl_ulong *stk_p = stk.elts(); + + if (sa > sb) { + { long t; t = sa; sa = sb; sb = t; } + ap = b.xrep.elts(); + bp = a.xrep.elts(); + } + else { + ap = a.xrep.elts(); + bp = b.xrep.elts(); + } + + + vec.SetLength(2*sa); + + _ntl_ulong *v = vec.elts(); + + long i, j; + + for (i = 0; i < sc; i++) + cp[i] = 0; + + do { + if (sa == 0) break; + + if (sa == 1) { + AddMul1(cp, bp, sb, ap[0]); + + break; + } + + // general case + + for (i = 0; i+sa <= sb; i += sa) { + KarMul(v, ap, bp + i, sa, stk_p); + for (j = 0; j < 2*sa; j++) + cp[i+j] ^= v[j]; + } + + { const _ntl_ulong *t; t = ap; ap = bp + i; bp = t; } + { long t; t = sa; sa = sb - i; sb = t; } + cp = cp + i; + } while (1); + + if (in_mem) + c.xrep = mem; + + c.normalize(); +} + + + +void mul(GF2X& c, const GF2X& a, long b) +{ + if (b & 1) + c = a; + else + clear(c); +} + +void mul(GF2X& c, const GF2X& a, GF2 b) +{ + if (b == 1) + c = a; + else + clear(c); +} + + +void trunc(GF2X& x, const GF2X& a, long m) +{ + if (m < 0) LogicError("trunc: bad args"); + + long n = a.xrep.length(); + if (n == 0 || m == 0) { + clear(x); + return; + } + + if (&x == &a) { + if (n*NTL_BITS_PER_LONG > m) { + long wm = (m-1)/NTL_BITS_PER_LONG; + long bm = m - NTL_BITS_PER_LONG*wm; + _ntl_ulong msk; + if (bm == NTL_BITS_PER_LONG) + msk = ~(0UL); + else + msk = ((1UL << bm) - 1UL); + x.xrep[wm] &= msk; + x.xrep.QuickSetLength(wm+1); + x.normalize(); + } + } + else if (n*NTL_BITS_PER_LONG <= m) + x = a; + else { + long wm = (m-1)/NTL_BITS_PER_LONG; + long bm = m - NTL_BITS_PER_LONG*wm; + x.xrep.SetLength(wm+1); + _ntl_ulong *xp = &x.xrep[0]; + const _ntl_ulong *ap = &a.xrep[0]; + long i; + for (i = 0; i < wm; i++) + xp[i] = ap[i]; + _ntl_ulong msk; + if (bm == NTL_BITS_PER_LONG) + msk = ~(0UL); + else + msk = ((1UL << bm) - 1UL); + xp[wm] = ap[wm] & msk; + x.normalize(); + } +} + + +void MulByX(GF2X& x, const GF2X& a) +{ + long n = a.xrep.length(); + if (n == 0) { + clear(x); + return; + } + + if (a.xrep[n-1] & (1UL << (NTL_BITS_PER_LONG-1))) { + x.xrep.SetLength(n+1); + x.xrep[n] = 1; + } + else if (&x != &a) + x.xrep.SetLength(n); + + _ntl_ulong *xp = &x.xrep[0]; + const _ntl_ulong *ap = &a.xrep[0]; + + long i; + for (i = n-1; i > 0; i--) + xp[i] = (ap[i] << 1) | (ap[i-1] >> (NTL_BITS_PER_LONG-1)); + + xp[0] = ap[0] << 1; + + // no need to normalize +} + + + +static const _ntl_ulong sqrtab[256] = { + +0UL, 1UL, 4UL, 5UL, 16UL, 17UL, 20UL, 21UL, 64UL, +65UL, 68UL, 69UL, 80UL, 81UL, 84UL, 85UL, 256UL, +257UL, 260UL, 261UL, 272UL, 273UL, 276UL, 277UL, 320UL, +321UL, 324UL, 325UL, 336UL, 337UL, 340UL, 341UL, 1024UL, +1025UL, 1028UL, 1029UL, 1040UL, 1041UL, 1044UL, 1045UL, 1088UL, +1089UL, 1092UL, 1093UL, 1104UL, 1105UL, 1108UL, 1109UL, 1280UL, +1281UL, 1284UL, 1285UL, 1296UL, 1297UL, 1300UL, 1301UL, 1344UL, +1345UL, 1348UL, 1349UL, 1360UL, 1361UL, 1364UL, 1365UL, 4096UL, +4097UL, 4100UL, 4101UL, 4112UL, 4113UL, 4116UL, 4117UL, 4160UL, +4161UL, 4164UL, 4165UL, 4176UL, 4177UL, 4180UL, 4181UL, 4352UL, +4353UL, 4356UL, 4357UL, 4368UL, 4369UL, 4372UL, 4373UL, 4416UL, +4417UL, 4420UL, 4421UL, 4432UL, 4433UL, 4436UL, 4437UL, 5120UL, +5121UL, 5124UL, 5125UL, 5136UL, 5137UL, 5140UL, 5141UL, 5184UL, +5185UL, 5188UL, 5189UL, 5200UL, 5201UL, 5204UL, 5205UL, 5376UL, +5377UL, 5380UL, 5381UL, 5392UL, 5393UL, 5396UL, 5397UL, 5440UL, +5441UL, 5444UL, 5445UL, 5456UL, 5457UL, 5460UL, 5461UL, 16384UL, +16385UL, 16388UL, 16389UL, 16400UL, 16401UL, 16404UL, 16405UL, 16448UL, +16449UL, 16452UL, 16453UL, 16464UL, 16465UL, 16468UL, 16469UL, 16640UL, +16641UL, 16644UL, 16645UL, 16656UL, 16657UL, 16660UL, 16661UL, 16704UL, +16705UL, 16708UL, 16709UL, 16720UL, 16721UL, 16724UL, 16725UL, 17408UL, +17409UL, 17412UL, 17413UL, 17424UL, 17425UL, 17428UL, 17429UL, 17472UL, +17473UL, 17476UL, 17477UL, 17488UL, 17489UL, 17492UL, 17493UL, 17664UL, +17665UL, 17668UL, 17669UL, 17680UL, 17681UL, 17684UL, 17685UL, 17728UL, +17729UL, 17732UL, 17733UL, 17744UL, 17745UL, 17748UL, 17749UL, 20480UL, +20481UL, 20484UL, 20485UL, 20496UL, 20497UL, 20500UL, 20501UL, 20544UL, +20545UL, 20548UL, 20549UL, 20560UL, 20561UL, 20564UL, 20565UL, 20736UL, +20737UL, 20740UL, 20741UL, 20752UL, 20753UL, 20756UL, 20757UL, 20800UL, +20801UL, 20804UL, 20805UL, 20816UL, 20817UL, 20820UL, 20821UL, 21504UL, +21505UL, 21508UL, 21509UL, 21520UL, 21521UL, 21524UL, 21525UL, 21568UL, +21569UL, 21572UL, 21573UL, 21584UL, 21585UL, 21588UL, 21589UL, 21760UL, +21761UL, 21764UL, 21765UL, 21776UL, 21777UL, 21780UL, 21781UL, 21824UL, +21825UL, 21828UL, 21829UL, 21840UL, 21841UL, 21844UL, 21845UL }; + + + + + +static inline +void sqr1(_ntl_ulong *c, _ntl_ulong a) +{ +#ifdef NTL_PCLMUL + // this appears to be marginally faster than the + // table-driven code + pclmul_mul1(c, a, a); +#else + _ntl_ulong hi, lo; + + NTL_BB_SQR_CODE + + c[0] = lo; + c[1] = hi; +#endif +} + + + + +void sqr(GF2X& c, const GF2X& a) +{ + long sa = a.xrep.length(); + if (sa <= 0) { + clear(c); + return; + } + + c.xrep.SetLength(sa << 1); + _ntl_ulong *cp = c.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + long i; + + for (i = sa-1; i >= 0; i--) + sqr1(cp + (i << 1), ap[i]); + + c.normalize(); + return; +} + + + +void LeftShift(GF2X& c, const GF2X& a, long n) +{ + if (IsZero(a)) { + clear(c); + return; + } + + if (n == 1) { + MulByX(c, a); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) + clear(c); + else + RightShift(c, a, -n); + return; + } + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in LeftShift"); + + if (n == 0) { + c = a; + return; + } + + long sa = a.xrep.length(); + + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + long sc = sa + wn; + if (bn) sc++; + + c.xrep.SetLength(sc); + + _ntl_ulong *cp = c.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + + long i; + + if (bn == 0) { + for (i = sa+wn-1; i >= wn; i--) + cp[i] = ap[i-wn]; + for (i = wn-1; i >= 0; i--) + cp[i] = 0; + } + else { + cp[sa+wn] = ap[sa-1] >> (NTL_BITS_PER_LONG-bn); + for (i = sa+wn-1; i >= wn+1; i--) + cp[i] = (ap[i-wn] << bn) | (ap[i-wn-1] >> (NTL_BITS_PER_LONG-bn)); + cp[wn] = ap[0] << bn; + for (i = wn-1; i >= 0; i--) + cp[i] = 0; + } + + c.normalize(); +} + +void ShiftAdd(GF2X& c, const GF2X& a, long n) +// c = c + a*X^n +{ + if (n < 0) LogicError("ShiftAdd: negative argument"); + + if (n == 0) { + add(c, c, a); + return; + } + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in ShiftAdd"); + + long sa = a.xrep.length(); + if (sa <= 0) { + return; + } + + long sc = c.xrep.length(); + + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + long ss = sa + wn; + if (bn) ss++; + + if (ss > sc) + c.xrep.SetLength(ss); + + _ntl_ulong *cp = c.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + + long i; + + for (i = sc; i < ss; i++) + cp[i] = 0; + + if (bn == 0) { + for (i = sa+wn-1; i >= wn; i--) + cp[i] ^= ap[i-wn]; + } + else { + cp[sa+wn] ^= ap[sa-1] >> (NTL_BITS_PER_LONG-bn); + for (i = sa+wn-1; i >= wn+1; i--) + cp[i] ^= (ap[i-wn] << bn) | (ap[i-wn-1] >> (NTL_BITS_PER_LONG-bn)); + cp[wn] ^= ap[0] << bn; + } + + c.normalize(); +} + + + + +void RightShift(GF2X& c, const GF2X& a, long n) +{ + if (IsZero(a)) { + clear(c); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) ResourceError("overflow in RightShift"); + LeftShift(c, a, -n); + return; + } + + if (n == 0) { + c = a; + return; + } + + long sa = a.xrep.length(); + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + if (wn >= sa) { + clear(c); + return; + } + + c.xrep.SetLength(sa-wn); + _ntl_ulong *cp = c.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + + long i; + + if (bn == 0) { + for (i = 0; i < sa-wn; i++) + cp[i] = ap[i+wn]; + } + else { + for (i = 0; i < sa-wn-1; i++) + cp[i] = (ap[i+wn] >> bn) | (ap[i+wn+1] << (NTL_BITS_PER_LONG - bn)); + + cp[sa-wn-1] = ap[sa-1] >> bn; + } + + c.normalize(); +} + + + +static const _ntl_ulong revtab[256] = { + +0UL, 128UL, 64UL, 192UL, 32UL, 160UL, 96UL, 224UL, 16UL, 144UL, +80UL, 208UL, 48UL, 176UL, 112UL, 240UL, 8UL, 136UL, 72UL, 200UL, +40UL, 168UL, 104UL, 232UL, 24UL, 152UL, 88UL, 216UL, 56UL, 184UL, +120UL, 248UL, 4UL, 132UL, 68UL, 196UL, 36UL, 164UL, 100UL, 228UL, +20UL, 148UL, 84UL, 212UL, 52UL, 180UL, 116UL, 244UL, 12UL, 140UL, +76UL, 204UL, 44UL, 172UL, 108UL, 236UL, 28UL, 156UL, 92UL, 220UL, +60UL, 188UL, 124UL, 252UL, 2UL, 130UL, 66UL, 194UL, 34UL, 162UL, +98UL, 226UL, 18UL, 146UL, 82UL, 210UL, 50UL, 178UL, 114UL, 242UL, +10UL, 138UL, 74UL, 202UL, 42UL, 170UL, 106UL, 234UL, 26UL, 154UL, +90UL, 218UL, 58UL, 186UL, 122UL, 250UL, 6UL, 134UL, 70UL, 198UL, +38UL, 166UL, 102UL, 230UL, 22UL, 150UL, 86UL, 214UL, 54UL, 182UL, +118UL, 246UL, 14UL, 142UL, 78UL, 206UL, 46UL, 174UL, 110UL, 238UL, +30UL, 158UL, 94UL, 222UL, 62UL, 190UL, 126UL, 254UL, 1UL, 129UL, +65UL, 193UL, 33UL, 161UL, 97UL, 225UL, 17UL, 145UL, 81UL, 209UL, +49UL, 177UL, 113UL, 241UL, 9UL, 137UL, 73UL, 201UL, 41UL, 169UL, +105UL, 233UL, 25UL, 153UL, 89UL, 217UL, 57UL, 185UL, 121UL, 249UL, +5UL, 133UL, 69UL, 197UL, 37UL, 165UL, 101UL, 229UL, 21UL, 149UL, +85UL, 213UL, 53UL, 181UL, 117UL, 245UL, 13UL, 141UL, 77UL, 205UL, +45UL, 173UL, 109UL, 237UL, 29UL, 157UL, 93UL, 221UL, 61UL, 189UL, +125UL, 253UL, 3UL, 131UL, 67UL, 195UL, 35UL, 163UL, 99UL, 227UL, +19UL, 147UL, 83UL, 211UL, 51UL, 179UL, 115UL, 243UL, 11UL, 139UL, +75UL, 203UL, 43UL, 171UL, 107UL, 235UL, 27UL, 155UL, 91UL, 219UL, +59UL, 187UL, 123UL, 251UL, 7UL, 135UL, 71UL, 199UL, 39UL, 167UL, +103UL, 231UL, 23UL, 151UL, 87UL, 215UL, 55UL, 183UL, 119UL, 247UL, +15UL, 143UL, 79UL, 207UL, 47UL, 175UL, 111UL, 239UL, 31UL, 159UL, +95UL, 223UL, 63UL, 191UL, 127UL, 255UL }; + +static inline +_ntl_ulong rev1(_ntl_ulong a) +{ + return NTL_BB_REV_CODE; +} + + + +void CopyReverse(GF2X& c, const GF2X& a, long hi) +// c[0..hi] = reverse(a[0..hi]), with zero fill as necessary +// input may alias output + +{ + if (hi < 0) { clear(c); return; } + + if (NTL_OVERFLOW(hi, 1, 0)) + ResourceError("overflow in CopyReverse"); + + long n = hi+1; + long sa = a.xrep.length(); + if (n <= 0 || sa <= 0) { + clear(c); + return; + } + + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + if (bn != 0) { + wn++; + bn = NTL_BITS_PER_LONG - bn; + } + + c.xrep.SetLength(wn); + + _ntl_ulong *cp = c.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + + long mm = min(sa, wn); + long i; + + for (i = 0; i < mm; i++) + cp[i] = ap[i]; + + for (i = mm; i < wn; i++) + cp[i] = 0; + + if (bn != 0) { + for (i = wn-1; i >= 1; i--) + cp[i] = (cp[i] << bn) | (cp[i-1] >> (NTL_BITS_PER_LONG-bn)); + cp[0] = cp[0] << bn; + } + + for (i = 0; i < wn/2; i++) { + _ntl_ulong t; t = cp[i]; cp[i] = cp[wn-1-i]; cp[wn-1-i] = t; + } + + for (i = 0; i < wn; i++) + cp[i] = rev1(cp[i]); + + c.normalize(); +} + + +void div(GF2X& q, const GF2X& a, GF2 b) +{ + if (b == 0) + ArithmeticError("div: division by zero"); + + q = a; +} + +void div(GF2X& q, const GF2X& a, long b) +{ + if ((b & 1) == 0) + ArithmeticError("div: division by zero"); + + q = a; +} + + + +void GF2XFromBytes(GF2X& x, const unsigned char *p, long n) +{ + if (n <= 0) { + x = 0; + return; + } + + const long BytesPerLong = NTL_BITS_PER_LONG/8; + + long lw, r, i, j; + + lw = n/BytesPerLong; + r = n - lw*BytesPerLong; + + if (r != 0) + lw++; + else + r = BytesPerLong; + + x.xrep.SetLength(lw); + unsigned long *xp = x.xrep.elts(); + + for (i = 0; i < lw-1; i++) { + unsigned long t = 0; + for (j = 0; j < BytesPerLong; j++) { + t >>= 8; + t += (((unsigned long)(*p)) & 255UL) << ((BytesPerLong-1)*8); + p++; + } + xp[i] = t; + } + + unsigned long t = 0; + for (j = 0; j < r; j++) { + t >>= 8; + t += (((unsigned long)(*p)) & 255UL) << ((BytesPerLong-1)*8); + p++; + } + + t >>= (BytesPerLong-r)*8; + xp[lw-1] = t; + + x.normalize(); +} + +void BytesFromGF2X(unsigned char *p, const GF2X& a, long n) +{ + if (n < 0) n = 0; + + const long BytesPerLong = NTL_BITS_PER_LONG/8; + + long lbits = deg(a) + 1; + long lbytes = (lbits+7)/8; + + long min_bytes = min(lbytes, n); + + long min_words = min_bytes/BytesPerLong; + long r = min_bytes - min_words*BytesPerLong; + if (r != 0) + min_words++; + else + r = BytesPerLong; + + const unsigned long *ap = a.xrep.elts(); + + long i, j; + + for (i = 0; i < min_words-1; i++) { + unsigned long t = ap[i]; + for (j = 0; j < BytesPerLong; j++) { + *p = t & 255UL; + t >>= 8; + p++; + } + } + + if (min_words > 0) { + unsigned long t = ap[min_words-1]; + for (j = 0; j < r; j++) { + *p = t & 255UL; + t >>= 8; + p++; + } + } + + for (j = min_bytes; j < n; j++) { + *p = 0; + p++; + } +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GF2X1.c b/thirdparty/linux/ntl/src/GF2X1.c new file mode 100644 index 0000000000..0e1c81a9ba --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2X1.c @@ -0,0 +1,3705 @@ + + +#include +#include + +#ifndef NTL_WIZARD_HACK + +#include + +#endif + +#include + +#if (defined(NTL_WIZARD_HACK) && defined(NTL_GF2X_LIB)) +#undef NTL_GF2X_LIB +#endif + + +// simple scaling factor for some crossover points: +// we use a lower crossover of the underlying multiplication +// is faster +#if (defined(NTL_GF2X_LIB) || defined(NTL_PCLMUL)) +#define XOVER_SCALE (1L) +#else +#define XOVER_SCALE (2L) +#endif + + + +#define NTL_GF2X_GCD_CROSSOVER (XOVER_SCALE*300L*NTL_BITS_PER_LONG) + +#define NTL_GF2X_BERMASS_CROSSOVER (XOVER_SCALE*200L*NTL_BITS_PER_LONG) + +#define NTL_GF2X_HalfGCD_CROSSOVER (4L*NTL_BITS_PER_LONG) + + + +NTL_START_IMPL + + + +NTL_TLS_GLOBAL_DECL(vec_GF2X, stab) +// used by PlainDivRem and PlainRem + +NTL_TLS_GLOBAL_DECL(WordVector, GF2X_rembuf) + + +void PlainDivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2X& b) +{ + NTL_TLS_GLOBAL_ACCESS(stab); + + long da, sa, posa, db, sb, posb, dq, sq, posq; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("GF2X: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + sa = a.xrep.length(); + posa = da - NTL_BITS_PER_LONG*(sa-1); + sb = b.xrep.length(); + posb = db - NTL_BITS_PER_LONG*(sb-1); + + dq = da - db; + sq = dq/NTL_BITS_PER_LONG + 1; + posq = dq - NTL_BITS_PER_LONG*(sq-1); + + NTL_TLS_GLOBAL_ACCESS(GF2X_rembuf); + WordVectorWatcher watch_GF2X_rembuf(GF2X_rembuf); + + _ntl_ulong *ap; + if (&r == &a) + ap = r.xrep.elts(); + else { + GF2X_rembuf = a.xrep; + ap = GF2X_rembuf.elts(); + } + + stab.SetLength(NTL_BITS_PER_LONG); + long i; + + NTL_SCOPE(guard) { + for (i = 0; i <= min(dq, NTL_BITS_PER_LONG-1); i++) { + WordVector& st = stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG].xrep; + st.KillBig(); + } + }; + + stab[posb] = b; + for (i = 1; i <= min(dq, NTL_BITS_PER_LONG-1); i++) + MulByX(stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG], + stab[((_ntl_ulong)(posb+i-1))%NTL_BITS_PER_LONG]); + + _ntl_ulong *stab_ptr[NTL_BITS_PER_LONG]; + long stab_cnt[NTL_BITS_PER_LONG]; + + for (i = 0; i <= min(dq, NTL_BITS_PER_LONG-1); i++) { + WordVector& st = stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG].xrep; + long k = st.length(); + stab_ptr[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG] = &st[k-1]; + stab_cnt[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG] = -k+1; + } + + q.xrep.SetLength(sq); + _ntl_ulong *qp = q.xrep.elts(); + for (i = 0; i < sq; i++) + qp[i] = 0; + + _ntl_ulong *atop = &ap[sa-1]; + _ntl_ulong *qtop = &qp[sq-1]; + _ntl_ulong *stab_top; + + while (1) { + if (atop[0] & (1UL << posa)) { + qtop[0] |= (1UL << posq); + stab_top = stab_ptr[posa]; + for (i = stab_cnt[posa]; i <= 0; i++) + atop[i] ^= stab_top[i]; + } + + da--; + if (da < db) break; + + posa--; + if (posa < 0) { + posa = NTL_BITS_PER_LONG-1; + atop--; + } + + posq--; + if (posq < 0) { + posq = NTL_BITS_PER_LONG-1; + qtop--; + } + } + + if (posb == 0) sb--; + + r.xrep.SetLength(sb); + if (&r != &a) { + _ntl_ulong *rp = r.xrep.elts(); + for (i = 0; i < sb; i++) + rp[i] = ap[i]; + } + r.normalize(); + + guard.relax(); + for (i = 0; i <= min(dq, NTL_BITS_PER_LONG-1); i++) { + WordVector& st = stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG].xrep; + st.KillBig(); + } +} + + + +void PlainDiv(GF2X& q, const GF2X& a, const GF2X& b) +{ + NTL_GF2XRegister(r); + PlainDivRem(q, r, a, b); +} + + +void PlainRem(GF2X& r, const GF2X& a, const GF2X& b) +{ + NTL_TLS_GLOBAL_ACCESS(stab); + + long da, sa, posa, db, sb, posb; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("GF2X: division by zero"); + + if (da < db) { + r = a; + return; + } + + sa = a.xrep.length(); + posa = da - NTL_BITS_PER_LONG*(sa-1); + sb = b.xrep.length(); + posb = db - NTL_BITS_PER_LONG*(sb-1); + + + NTL_TLS_GLOBAL_ACCESS(GF2X_rembuf); + WordVectorWatcher watch_GF2X_rembuf(GF2X_rembuf); + + _ntl_ulong *ap; + if (&r == &a) + ap = r.xrep.elts(); + else { + GF2X_rembuf = a.xrep; + ap = GF2X_rembuf.elts(); + } + + stab.SetLength(NTL_BITS_PER_LONG); + long i; + + NTL_SCOPE(guard) { + for (i = 0; i <= min(da-db, NTL_BITS_PER_LONG-1); i++) { + WordVector& st = stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG].xrep; + st.KillBig(); + } + }; + + stab[posb] = b; + for (i = 1; i <= min(da-db, NTL_BITS_PER_LONG-1); i++) + MulByX(stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG], + stab[((_ntl_ulong)(posb+i-1))%NTL_BITS_PER_LONG]); + + _ntl_ulong *stab_ptr[NTL_BITS_PER_LONG]; + long stab_cnt[NTL_BITS_PER_LONG]; + + for (i = 0; i <= min(da-db, NTL_BITS_PER_LONG-1); i++) { + WordVector& st = stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG].xrep; + long k = st.length(); + stab_ptr[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG] = &st[k-1]; + stab_cnt[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG] = -k+1; + } + + + _ntl_ulong *atop = &ap[sa-1]; + _ntl_ulong *stab_top; + + while (1) { + if (atop[0] & (1UL << posa)) { + stab_top = stab_ptr[posa]; + for (i = stab_cnt[posa]; i <= 0; i++) + atop[i] ^= stab_top[i]; + } + + da--; + if (da < db) break; + + posa--; + if (posa < 0) { + posa = NTL_BITS_PER_LONG-1; + atop--; + } + } + + if (posb == 0) sb--; + + r.xrep.SetLength(sb); + if (&r != &a) { + _ntl_ulong *rp = r.xrep.elts(); + for (i = 0; i < sb; i++) + rp[i] = ap[i]; + } + r.normalize(); + + guard.relax(); + for (i = 0; i <= min(da-db, NTL_BITS_PER_LONG-1); i++) { + WordVector& st = stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG].xrep; + st.KillBig(); + } +} + +#define MASK8 ((1UL << 8)-1UL) + +static const _ntl_ulong invtab[128] = { +1UL, 255UL, 85UL, 219UL, 73UL, 151UL, 157UL, 51UL, 17UL, 175UL, +69UL, 139UL, 89UL, 199UL, 141UL, 99UL, 33UL, 95UL, 117UL, 123UL, +105UL, 55UL, 189UL, 147UL, 49UL, 15UL, 101UL, 43UL, 121UL, 103UL, +173UL, 195UL, 65UL, 191UL, 21UL, 155UL, 9UL, 215UL, 221UL, 115UL, +81UL, 239UL, 5UL, 203UL, 25UL, 135UL, 205UL, 35UL, 97UL, 31UL, +53UL, 59UL, 41UL, 119UL, 253UL, 211UL, 113UL, 79UL, 37UL, 107UL, +57UL, 39UL, 237UL, 131UL, 129UL, 127UL, 213UL, 91UL, 201UL, 23UL, +29UL, 179UL, 145UL, 47UL, 197UL, 11UL, 217UL, 71UL, 13UL, 227UL, +161UL, 223UL, 245UL, 251UL, 233UL, 183UL, 61UL, 19UL, 177UL, 143UL, +229UL, 171UL, 249UL, 231UL, 45UL, 67UL, 193UL, 63UL, 149UL, 27UL, +137UL, 87UL, 93UL, 243UL, 209UL, 111UL, 133UL, 75UL, 153UL, 7UL, +77UL, 163UL, 225UL, 159UL, 181UL, 187UL, 169UL, 247UL, 125UL, 83UL, +241UL, 207UL, 165UL, 235UL, 185UL, 167UL, 109UL, 3UL }; + + + +void NewtonInvTrunc(GF2X& c, const GF2X& a, long e) +{ + if (e == 1) { + set(c); + return; + } + + NTL_TLS_LOCAL(vec_long, E); + E.SetLength(0); + append(E, e); + while (e > 8) { + e = (e+1)/2; + append(E, e); + } + + long L = E.length(); + + NTL_GF2XRegister(g); + NTL_GF2XRegister(g0); + NTL_GF2XRegister(g1); + NTL_GF2XRegister(g2); + + g.xrep.SetMaxLength((E[0]+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG + 1); + g0.xrep.SetMaxLength((E[0]+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG + 1); + g1.xrep.SetMaxLength(((3*E[0]+1)/2+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG+1); + g2.xrep.SetMaxLength((E[0]+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG + 1); + + g.xrep.SetLength(1); + g.xrep[0] = invtab[(a.xrep[0] & MASK8) >> 1] & ((1UL< 0; i--) { + // lift from E[i] to E[i-1] + + long k = E[i]; + long l = E[i-1]-E[i]; + + trunc(g0, a, k+l); + + mul(g1, g0, g); + RightShift(g1, g1, k); + trunc(g1, g1, l); + + mul(g2, g1, g); + trunc(g2, g2, l); + LeftShift(g2, g2, k); + + add(g, g, g2); + } + + c = g; +} + +void InvTrunc(GF2X& c, const GF2X& a, long e) +{ + if (ConstTerm(a) == 0 || e < 0) + LogicError("inv: bad args"); + + if (NTL_OVERFLOW(e, 1, 0)) + ResourceError("overflow in InvTrunc"); + + if (e == 0) { + clear(c); + return; + } + + NewtonInvTrunc(c, a, e); +} + + + +static +long weight1(_ntl_ulong a) +{ + long res = 0; + while (a) { + if (a & 1) res ++; + a >>= 1; + } + return res; +} + +long weight(const GF2X& a) +{ + long wlen = a.xrep.length(); + long res = 0; + long i; + for (i = 0; i < wlen; i++) + res += weight1(a.xrep[i]); + + return res; +} + + + +static +void SparsityCheck(const GF2X& f, long& k3, long& k2, long& k1) +{ + long w = weight(f); + if (w != 3 && w != 5) { + k3 = 0; + return; + } + + if (ConstTerm(f) != 1) { + k3 = 0; + return; + } + + GF2X g = f; + + long n = deg(f); + + trunc(g, g, n); + + long t = deg(g); + + if (n-t < NTL_BITS_PER_LONG || t > (n+1)/2) { + k3 = 0; + return; + } + + if (w == 3) { + k3 = t; + k2 = 0; + return; + } + + k3 = t; + trunc(g, g, t); + t = deg(g); + k2 = t; + trunc(g, g, t); + t = deg(g); + k1 = t; +} + + + + +const long GF2X_MOD_PLAIN = 0; +const long GF2X_MOD_MUL = 1; +const long GF2X_MOD_SPECIAL = 2; +const long GF2X_MOD_TRI = 3; +const long GF2X_MOD_PENT = 4; + +void build(GF2XModulus& F, const GF2X& f) +{ + long n = deg(f); + long i; + + if (n <= 0) LogicError("build(GF2XModulus,GF2X): deg(f) <= 0"); + + F.tracevec.make(); + + F.f = f; + F.n = n; + F.sn = f.xrep.length(); + + long sb = F.sn; + long posb = n - NTL_BITS_PER_LONG*(sb-1); + + F.posn = posb; + + if (F.posn > 0) { + F.size = F.sn; + F.msk = (1UL << F.posn) - 1UL; + } + else { + F.size = F.sn-1; + F.msk = ~0UL; + } + + SparsityCheck(f, F.k3, F.k2, F.k1); + + if (F.k3 != 0) { + if (F.k2 == 0) + F.method = GF2X_MOD_TRI; + else + F.method = GF2X_MOD_PENT; + + return; + } + + + GF2X f0; + trunc(f0, f, n); + + if (F.n >= (NTL_BITS_PER_LONG/2)*XOVER_SCALE) + F.method = GF2X_MOD_MUL; + else + F.method = GF2X_MOD_PLAIN; + + + // NOTE: I've run some tests which indicate that the GF2X_MOD_SPECIAL + // method is not worth it. + // FIXME: in a future version, I should eliminate all code + // and data associated with GF2X_MOD_SPECIAL + + // NOTE: I've runs some tests which indicate that the crossover + // for GF2X_MOD_MUL is extremely low, even without PCLMUL support. + + + if (F.method == GF2X_MOD_SPECIAL) { + if (!F.stab_cnt) F.stab_cnt.SetLength(NTL_BITS_PER_LONG); + long *stab_cnt = F.stab_cnt.get(); + + if (!F.stab1) F.stab1.SetLength(2*NTL_BITS_PER_LONG); + _ntl_ulong *stab1 = F.stab1.get(); + + stab1[posb<<1] = f.xrep[0]; + stab1[(posb<<1)+1] = 0; + + stab_cnt[posb] = -sb+1; + + for (i = 1; i < NTL_BITS_PER_LONG; i++) { + long kk0 = ((_ntl_ulong)(posb+i-1))%NTL_BITS_PER_LONG; + long kk1 = ((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG; + + stab1[kk1<<1] = stab1[kk0<<1] << 1; + stab1[(kk1<<1)+1] = (stab1[(kk0<<1)+1] << 1) + | (stab1[kk0<<1] >> (NTL_BITS_PER_LONG-1)); + + if (kk1 < posb) + stab_cnt[kk1] = -sb; + else + stab_cnt[kk1] = -sb+1; + } + } + else if (F.method == GF2X_MOD_PLAIN) { + vec_GF2X& stab = F.stab; + stab.SetLength(NTL_BITS_PER_LONG); + + + if (!F.stab_ptr) F.stab_ptr.SetLength(NTL_BITS_PER_LONG); + _ntl_ulong **stab_ptr = F.stab_ptr.get(); + + if (!F.stab_cnt) F.stab_cnt.SetLength(NTL_BITS_PER_LONG); + long *stab_cnt = F.stab_cnt.get(); + + + stab[posb] = f; + for (i = 1; i < NTL_BITS_PER_LONG; i++) + MulByX(stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG], + stab[((_ntl_ulong)(posb+i-1))%NTL_BITS_PER_LONG]); + + + for (i = 0; i < NTL_BITS_PER_LONG; i++) { + WordVector& st = stab[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG].xrep; + long k = st.length(); + stab_ptr[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG] = &st[k-1]; + stab_cnt[((_ntl_ulong)(posb+i))%NTL_BITS_PER_LONG] = -k+1; + } + } + else if (F.method == GF2X_MOD_MUL) { + GF2X P1, P2; + + CopyReverse(P1, f, n); + InvTrunc(P2, P1, n-1); + CopyReverse(P1, P2, n-2); + trunc(F.h0, P1, n-2); + F.f0 = f0; + } +} + +GF2XModulus::GF2XModulus() +{ + n = -1; + method = GF2X_MOD_PLAIN; +} + + +// The following two routines are total spaghetti...unfortunately, +// cleaning them up would require too much re-coding in other +// places. + +GF2XModulus::GF2XModulus(const GF2XModulus& F) : + f(F.f), n(F.n), sn(F.sn), posn(F.posn), k3(F.k3), k2(F.k2), k1(F.k1), + size(F.size), + msk(F.msk), method(F.method), stab(F.stab), h0(F.h0), f0(F.f0), + tracevec(F.tracevec) +{ + if (method == GF2X_MOD_SPECIAL) { + long i; + stab1.SetLength(2*NTL_BITS_PER_LONG); + for (i = 0; i < 2*NTL_BITS_PER_LONG; i++) + stab1[i] = F.stab1[i]; + stab_cnt.SetLength(NTL_BITS_PER_LONG); + for (i = 0; i < NTL_BITS_PER_LONG; i++) + stab_cnt[i] = F.stab_cnt[i]; + } + else if (method == GF2X_MOD_PLAIN) { + long i; + + if (F.stab_cnt) { + stab_cnt.SetLength(NTL_BITS_PER_LONG); + for (i = 0; i < NTL_BITS_PER_LONG; i++) + stab_cnt[i] = F.stab_cnt[i]; + } + + if (F.stab_ptr) { + stab_ptr.SetLength(NTL_BITS_PER_LONG); + for (i = 0; i < NTL_BITS_PER_LONG; i++) { + WordVector& st = stab[((_ntl_ulong)(posn+i))%NTL_BITS_PER_LONG].xrep; + long k = st.length(); + stab_ptr[((_ntl_ulong)(posn+i))%NTL_BITS_PER_LONG] = &st[k-1]; + stab_cnt[((_ntl_ulong)(posn+i))%NTL_BITS_PER_LONG] = -k+1; + } + } + } +} + +GF2XModulus& GF2XModulus::operator=(const GF2XModulus& F) +{ + if (this == &F) return *this; + + f=F.f; n=F.n; sn=F.sn; posn=F.posn; + k3=F.k3; k2=F.k2; k1=F.k1; + size=F.size; + msk=F.msk; method=F.method; stab=F.stab; h0=F.h0; f0 = F.f0; + tracevec=F.tracevec; + + if (method == GF2X_MOD_SPECIAL) { + long i; + if (!stab1) stab1.SetLength(2*NTL_BITS_PER_LONG); + for (i = 0; i < 2*NTL_BITS_PER_LONG; i++) + stab1[i] = F.stab1[i]; + if (!stab_cnt) stab_cnt.SetLength(NTL_BITS_PER_LONG); + for (i = 0; i < NTL_BITS_PER_LONG; i++) + stab_cnt[i] = F.stab_cnt[i]; + } + else if (method == GF2X_MOD_PLAIN) { + long i; + + if (F.stab_cnt) { + if (!stab_cnt) stab_cnt.SetLength(NTL_BITS_PER_LONG); + for (i = 0; i < NTL_BITS_PER_LONG; i++) + stab_cnt[i] = F.stab_cnt[i]; + } + + if (F.stab_ptr) { + if (!stab_ptr) stab_ptr.SetLength(NTL_BITS_PER_LONG); + + for (i = 0; i < NTL_BITS_PER_LONG; i++) { + WordVector& st = stab[((_ntl_ulong)(posn+i))%NTL_BITS_PER_LONG].xrep; + long k = st.length(); + stab_ptr[((_ntl_ulong)(posn+i))%NTL_BITS_PER_LONG] = &st[k-1]; + stab_cnt[((_ntl_ulong)(posn+i))%NTL_BITS_PER_LONG] = -k+1; + } + } + } + + return *this; +} + + + + +GF2XModulus::GF2XModulus(const GF2X& ff) +{ + n = -1; + method = GF2X_MOD_PLAIN; + + build(*this, ff); +} + + + + + +void UseMulRem21(GF2X& r, const GF2X& a, const GF2XModulus& F) +{ + NTL_GF2XRegister(P1); + NTL_GF2XRegister(P2); + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + add(P2, P2, P1); + mul(P1, P2, F.f0); + trunc(P1, P1, F.n); + trunc(r, a, F.n); + add(r, r, P1); +} + +void UseMulDivRem21(GF2X& q, GF2X& r, const GF2X& a, const GF2XModulus& F) +{ + NTL_GF2XRegister(P1); + NTL_GF2XRegister(P2); + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + add(P2, P2, P1); + mul(P1, P2, F.f0); + trunc(P1, P1, F.n); + trunc(r, a, F.n); + add(r, r, P1); + q = P2; +} + +void UseMulDiv21(GF2X& q, const GF2X& a, const GF2XModulus& F) +{ + NTL_GF2XRegister(P1); + NTL_GF2XRegister(P2); + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + add(P2, P2, P1); + q = P2; +} + + +void UseMulRemX1(GF2X& r, const GF2X& aa, const GF2XModulus& F) +{ + NTL_GF2XRegister(buf); + NTL_GF2XRegister(tmp); + NTL_GF2XRegister(a); + + clear(buf); + a = aa; + + long n = F.n; + long a_len = deg(a) + 1; + + while (a_len > 0) { + long old_buf_len = deg(buf) + 1; + long amt = min(2*n-1-old_buf_len, a_len); + + LeftShift(buf, buf, amt); + RightShift(tmp, a, a_len-amt); + add(buf, buf, tmp); + trunc(a, a, a_len-amt); + + UseMulRem21(buf, buf, F); + a_len -= amt; + } + + r = buf; +} + + +void UseMulDivRemX1(GF2X& q, GF2X& r, const GF2X& aa, const GF2XModulus& F) +{ + NTL_GF2XRegister(buf); + NTL_GF2XRegister(tmp); + NTL_GF2XRegister(a); + NTL_GF2XRegister(qq); + NTL_GF2XRegister(qbuf); + + clear(buf); + a = aa; + clear(qq); + + long n = F.n; + long a_len = deg(a) + 1; + + while (a_len > 0) { + long old_buf_len = deg(buf) + 1; + long amt = min(2*n-1-old_buf_len, a_len); + + LeftShift(buf, buf, amt); + RightShift(tmp, a, a_len-amt); + add(buf, buf, tmp); + trunc(a, a, a_len-amt); + + UseMulDivRem21(qbuf, buf, buf, F); + a_len -= amt; + + ShiftAdd(qq, qbuf, a_len); + } + + r = buf; + q = qq; +} + + +void UseMulDivX1(GF2X& q, const GF2X& aa, const GF2XModulus& F) +{ + NTL_GF2XRegister(buf); + NTL_GF2XRegister(tmp); + NTL_GF2XRegister(a); + NTL_GF2XRegister(qq); + NTL_GF2XRegister(qbuf); + + clear(buf); + a = aa; + clear(qq); + + long n = F.n; + long a_len = deg(a) + 1; + + while (a_len > 0) { + long old_buf_len = deg(buf) + 1; + long amt = min(2*n-1-old_buf_len, a_len); + + LeftShift(buf, buf, amt); + RightShift(tmp, a, a_len-amt); + add(buf, buf, tmp); + trunc(a, a, a_len-amt); + + UseMulDivRem21(qbuf, buf, buf, F); + a_len -= amt; + + ShiftAdd(qq, qbuf, a_len); + } + + q = qq; +} + +static +void TrinomReduce(GF2X& x, const GF2X& a, long n, long k) +{ + long wn = n / NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + long wdiff = (n-k)/NTL_BITS_PER_LONG; + long bdiff = (n-k) - wdiff*NTL_BITS_PER_LONG; + + long m = a.xrep.length()-1; + + if (wn > m) { + x = a; + return; + } + + NTL_GF2XRegister(r); + + r = a; + + _ntl_ulong *p = r.xrep.elts(); + + _ntl_ulong *pp; + + + _ntl_ulong w; + + if (bn == 0) { + if (bdiff == 0) { + // bn == 0 && bdiff == 0 + + while (m >= wn) { + w = p[m]; + p[m-wdiff] ^= w; + p[m-wn] ^= w; + m--; + } + } + else { + // bn == 0 && bdiff != 0 + + while (m >= wn) { + w = p[m]; + pp = &p[m-wdiff]; + *pp ^= (w >> bdiff); + *(pp-1) ^= (w << (NTL_BITS_PER_LONG-bdiff)); + p[m-wn] ^= w; + m--; + } + } + } + else { + if (bdiff == 0) { + // bn != 0 && bdiff == 0 + + while (m > wn) { + w = p[m]; + p[m-wdiff] ^= w; + pp = &p[m-wn]; + *pp ^= (w >> bn); + *(pp-1) ^= (w << (NTL_BITS_PER_LONG-bn)); + m--; + } + + w = (p[m] >> bn) << bn;; + + p[m-wdiff] ^= w; + p[0] ^= (w >> bn); + + p[m] &= ((1UL< wn) { + w = p[m]; + pp = &p[m-wdiff]; + *pp ^= (w >> bdiff);; + *(pp-1) ^= (w << (NTL_BITS_PER_LONG-bdiff)); + pp = &p[m-wn]; + *pp ^= (w >> bn); + *(pp-1) ^= (w << (NTL_BITS_PER_LONG-bn)); + m--; + } + + w = (p[m] >> bn) << bn;; + + p[m-wdiff] ^= (w >> bdiff); + if (m-wdiff-1 >= 0) p[m-wdiff-1] ^= (w << (NTL_BITS_PER_LONG-bdiff)); + p[0] ^= (w >> bn); + p[m] &= ((1UL<= 0 && p[wn] == 0) + wn--; + + r.xrep.QuickSetLength(wn+1); + + x = r; +} + +static +void PentReduce(GF2X& x, const GF2X& a, long n, long k3, long k2, long k1) +{ + long wn = n / NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + long m = a.xrep.length()-1; + + if (wn > m) { + x = a; + return; + } + + long wdiff1 = (n-k1)/NTL_BITS_PER_LONG; + long bdiff1 = (n-k1) - wdiff1*NTL_BITS_PER_LONG; + + long wdiff2 = (n-k2)/NTL_BITS_PER_LONG; + long bdiff2 = (n-k2) - wdiff2*NTL_BITS_PER_LONG; + + long wdiff3 = (n-k3)/NTL_BITS_PER_LONG; + long bdiff3 = (n-k3) - wdiff3*NTL_BITS_PER_LONG; + + NTL_GF2XRegister(r); + r = a; + + _ntl_ulong *p = r.xrep.elts(); + + _ntl_ulong *pp; + + _ntl_ulong w; + + while (m > wn) { + w = p[m]; + + if (bn == 0) + p[m-wn] ^= w; + else { + pp = &p[m-wn]; + *pp ^= (w >> bn); + *(pp-1) ^= (w << (NTL_BITS_PER_LONG-bn)); + } + + if (bdiff1 == 0) + p[m-wdiff1] ^= w; + else { + pp = &p[m-wdiff1]; + *pp ^= (w >> bdiff1); + *(pp-1) ^= (w << (NTL_BITS_PER_LONG-bdiff1)); + } + + if (bdiff2 == 0) + p[m-wdiff2] ^= w; + else { + pp = &p[m-wdiff2]; + *pp ^= (w >> bdiff2); + *(pp-1) ^= (w << (NTL_BITS_PER_LONG-bdiff2)); + } + + if (bdiff3 == 0) + p[m-wdiff3] ^= w; + else { + pp = &p[m-wdiff3]; + *pp ^= (w >> bdiff3); + *(pp-1) ^= (w << (NTL_BITS_PER_LONG-bdiff3)); + } + + m--; + } + + w = (p[m] >> bn) << bn; + + p[0] ^= (w >> bn); + + if (bdiff1 == 0) + p[m-wdiff1] ^= w; + else { + p[m-wdiff1] ^= (w >> bdiff1); + if (m-wdiff1-1 >= 0) p[m-wdiff1-1] ^= (w << (NTL_BITS_PER_LONG-bdiff1)); + } + + if (bdiff2 == 0) + p[m-wdiff2] ^= w; + else { + p[m-wdiff2] ^= (w >> bdiff2); + if (m-wdiff2-1 >= 0) p[m-wdiff2-1] ^= (w << (NTL_BITS_PER_LONG-bdiff2)); + } + + if (bdiff3 == 0) + p[m-wdiff3] ^= w; + else { + p[m-wdiff3] ^= (w >> bdiff3); + if (m-wdiff3-1 >= 0) p[m-wdiff3-1] ^= (w << (NTL_BITS_PER_LONG-bdiff3)); + } + + if (bn != 0) + p[m] &= ((1UL<= 0 && p[wn] == 0) + wn--; + + r.xrep.QuickSetLength(wn+1); + + x = r; +} + + + + +static +void RightShiftAdd(GF2X& c, const GF2X& a, long n) +{ + if (n < 0) { + LogicError("RightShiftAdd: negative shamt"); + } + + if (n == 0) { + add(c, c, a); + return; + } + + long sa = a.xrep.length(); + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + if (wn >= sa) { + return; + } + + long sc = c.xrep.length(); + long i; + + if (sa-wn > sc) + c.xrep.SetLength(sa-wn); + + _ntl_ulong *cp = c.xrep.elts(); + const _ntl_ulong *ap = a.xrep.elts(); + + for (i = sc; i < sa-wn; i++) + cp[i] = 0; + + + if (bn == 0) { + for (i = 0; i < sa-wn; i++) + cp[i] ^= ap[i+wn]; + } + else { + for (i = 0; i < sa-wn-1; i++) + cp[i] ^= (ap[i+wn] >> bn) | (ap[i+wn+1] << (NTL_BITS_PER_LONG - bn)); + + cp[sa-wn-1] ^= ap[sa-1] >> bn; + } + + c.normalize(); +} + + +static +void TriDiv21(GF2X& q, const GF2X& a, long n, long k) +{ + NTL_GF2XRegister(P1); + + RightShift(P1, a, n); + if (k != 1) + RightShiftAdd(P1, P1, n-k); + + q = P1; +} + +static +void TriDivRem21(GF2X& q, GF2X& r, const GF2X& a, long n, long k) +{ + NTL_GF2XRegister(Q); + TriDiv21(Q, a, n, k); + TrinomReduce(r, a, n, k); + q = Q; +} + + +static +void PentDiv21(GF2X& q, const GF2X& a, long n, long k3, long k2, long k1) +{ + if (deg(a) < n) { + clear(q); + return; + } + + NTL_GF2XRegister(P1); + NTL_GF2XRegister(P2); + + RightShift(P1, a, n); + + RightShift(P2, P1, n-k3); + RightShiftAdd(P2, P1, n-k2); + if (k1 != 1) { + RightShiftAdd(P2, P1, n-k1); + } + + add(P2, P2, P1); + + q = P2; +} + +static +void PentDivRem21(GF2X& q, GF2X& r, const GF2X& a, long n, + long k3, long k2, long k1) +{ + NTL_GF2XRegister(Q); + PentDiv21(Q, a, n, k3, k2, k1); + PentReduce(r, a, n, k3, k2, k1); + q = Q; +} + +static +void TriDivRemX1(GF2X& q, GF2X& r, const GF2X& aa, long n, long k) +{ + NTL_GF2XRegister(buf); + NTL_GF2XRegister(tmp); + NTL_GF2XRegister(a); + NTL_GF2XRegister(qq); + NTL_GF2XRegister(qbuf); + + clear(buf); + a = aa; + clear(qq); + + long a_len = deg(a) + 1; + + while (a_len > 0) { + long old_buf_len = deg(buf) + 1; + long amt = min(2*n-1-old_buf_len, a_len); + + LeftShift(buf, buf, amt); + RightShift(tmp, a, a_len-amt); + add(buf, buf, tmp); + trunc(a, a, a_len-amt); + + TriDivRem21(qbuf, buf, buf, n, k); + a_len -= amt; + + ShiftAdd(qq, qbuf, a_len); + } + + r = buf; + q = qq; +} + + +static +void TriDivX1(GF2X& q, const GF2X& aa, long n, long k) +{ + NTL_GF2XRegister(buf); + NTL_GF2XRegister(tmp); + NTL_GF2XRegister(a); + NTL_GF2XRegister(qq); + NTL_GF2XRegister(qbuf); + + clear(buf); + a = aa; + clear(qq); + + long a_len = deg(a) + 1; + + while (a_len > 0) { + long old_buf_len = deg(buf) + 1; + long amt = min(2*n-1-old_buf_len, a_len); + + LeftShift(buf, buf, amt); + RightShift(tmp, a, a_len-amt); + add(buf, buf, tmp); + trunc(a, a, a_len-amt); + + TriDivRem21(qbuf, buf, buf, n, k); + a_len -= amt; + + ShiftAdd(qq, qbuf, a_len); + } + + q = qq; +} + +static +void PentDivRemX1(GF2X& q, GF2X& r, const GF2X& aa, long n, + long k3, long k2, long k1) +{ + NTL_GF2XRegister(buf); + NTL_GF2XRegister(tmp); + NTL_GF2XRegister(a); + NTL_GF2XRegister(qq); + NTL_GF2XRegister(qbuf); + + clear(buf); + a = aa; + clear(qq); + + long a_len = deg(a) + 1; + + while (a_len > 0) { + long old_buf_len = deg(buf) + 1; + long amt = min(2*n-1-old_buf_len, a_len); + + LeftShift(buf, buf, amt); + RightShift(tmp, a, a_len-amt); + add(buf, buf, tmp); + trunc(a, a, a_len-amt); + + PentDivRem21(qbuf, buf, buf, n, k3, k2, k1); + a_len -= amt; + + ShiftAdd(qq, qbuf, a_len); + } + + r = buf; + q = qq; +} + + +static +void PentDivX1(GF2X& q, const GF2X& aa, long n, long k3, long k2, long k1) +{ + NTL_GF2XRegister(buf); + NTL_GF2XRegister(tmp); + NTL_GF2XRegister(a); + NTL_GF2XRegister(qq); + NTL_GF2XRegister(qbuf); + + clear(buf); + a = aa; + clear(qq); + + long a_len = deg(a) + 1; + + while (a_len > 0) { + long old_buf_len = deg(buf) + 1; + long amt = min(2*n-1-old_buf_len, a_len); + + LeftShift(buf, buf, amt); + RightShift(tmp, a, a_len-amt); + add(buf, buf, tmp); + trunc(a, a, a_len-amt); + + PentDivRem21(qbuf, buf, buf, n, k3, k2, k1); + a_len -= amt; + + ShiftAdd(qq, qbuf, a_len); + } + + q = qq; +} + + + +void rem(GF2X& r, const GF2X& a, const GF2XModulus& F) +{ + long n = F.n; + + if (n < 0) LogicError("rem: uninitialized modulus"); + + if (F.method == GF2X_MOD_TRI) { + TrinomReduce(r, a, n, F.k3); + return; + } + + if (F.method == GF2X_MOD_PENT) { + PentReduce(r, a, n, F.k3, F.k2, F.k1); + return; + } + + long da = deg(a); + + + if (da < n) { + r = a; + } + else if (F.method == GF2X_MOD_MUL) { + if (da <= 2*(n-1)) + UseMulRem21(r, a, F); + else + UseMulRemX1(r, a, F); + } + else if (F.method == GF2X_MOD_SPECIAL) { + NTL_TLS_GLOBAL_ACCESS(GF2X_rembuf); + WordVectorWatcher watch_GF2X_rembuf(GF2X_rembuf); + + long sa = a.xrep.length(); + long posa = da - NTL_BITS_PER_LONG*(sa-1); + + _ntl_ulong *ap; + if (&r == &a) + ap = r.xrep.elts(); + else { + GF2X_rembuf = a.xrep; + ap = GF2X_rembuf.elts(); + } + + _ntl_ulong *atop = &ap[sa-1]; + _ntl_ulong *stab_top; + + long i; + + while (1) { + if (atop[0] & (1UL << posa)) { + stab_top = &F.stab1[posa << 1]; + i = F.stab_cnt[posa]; + atop[i] ^= stab_top[0]; + atop[i+1] ^= stab_top[1]; + } + + da--; + if (da < n) break; + + posa--; + if (posa < 0) { + posa = NTL_BITS_PER_LONG-1; + atop--; + } + } + + long sn = F.size; + r.xrep.SetLength(sn); + if (&r != &a) { + _ntl_ulong *rp = r.xrep.elts(); + for (i = 0; i < sn; i++) + rp[i] = ap[i]; + } + r.xrep[sn-1] &= F.msk; + r.normalize(); + } + else { + NTL_TLS_GLOBAL_ACCESS(GF2X_rembuf); + WordVectorWatcher watch_GF2X_rembuf(GF2X_rembuf); + + long sa = a.xrep.length(); + long posa = da - NTL_BITS_PER_LONG*(sa-1); + + _ntl_ulong *ap; + if (&r == &a) + ap = r.xrep.elts(); + else { + GF2X_rembuf = a.xrep; + ap = GF2X_rembuf.elts(); + } + + _ntl_ulong *atop = &ap[sa-1]; + _ntl_ulong *stab_top; + + long i; + + while (1) { + if (atop[0] & (1UL << posa)) { + stab_top = F.stab_ptr[posa]; + for (i = F.stab_cnt[posa]; i <= 0; i++) + atop[i] ^= stab_top[i]; + } + + da--; + if (da < n) break; + + posa--; + if (posa < 0) { + posa = NTL_BITS_PER_LONG-1; + atop--; + } + } + + long sn = F.size; + r.xrep.SetLength(sn); + if (&r != &a) { + _ntl_ulong *rp = r.xrep.elts(); + for (i = 0; i < sn; i++) + rp[i] = ap[i]; + } + r.normalize(); + } + +} + +void DivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2XModulus& F) +{ + long da = deg(a); + long n = F.n; + + if (n < 0) LogicError("DivRem: uninitialized modulus"); + + if (da < n) { + r = a; + clear(q); + } + else if (F.method == GF2X_MOD_TRI) { + if (da <= 2*(n-1)) + TriDivRem21(q, r, a, F.n, F.k3); + else + TriDivRemX1(q, r, a, F.n, F.k3); + } + else if (F.method == GF2X_MOD_PENT) { + if (da <= 2*(n-1)) + PentDivRem21(q, r, a, F.n, F.k3, F.k2, F.k1); + else + PentDivRemX1(q, r, a, F.n, F.k3, F.k2, F.k1); + } + else if (F.method == GF2X_MOD_MUL) { + if (da <= 2*(n-1)) + UseMulDivRem21(q, r, a, F); + else + UseMulDivRemX1(q, r, a, F); + } + else if (F.method == GF2X_MOD_SPECIAL) { + NTL_TLS_GLOBAL_ACCESS(GF2X_rembuf); + WordVectorWatcher watch_GF2X_rembuf(GF2X_rembuf); + + long sa = a.xrep.length(); + long posa = da - NTL_BITS_PER_LONG*(sa-1); + + long dq = da - n; + long sq = dq/NTL_BITS_PER_LONG + 1; + long posq = dq - NTL_BITS_PER_LONG*(sq-1); + + _ntl_ulong *ap; + if (&r == &a) + ap = r.xrep.elts(); + else { + GF2X_rembuf = a.xrep; + ap = GF2X_rembuf.elts(); + } + + _ntl_ulong *atop = &ap[sa-1]; + _ntl_ulong *stab_top; + + long i; + + q.xrep.SetLength(sq); + _ntl_ulong *qp = q.xrep.elts(); + for (i = 0; i < sq; i++) + qp[i] = 0; + + _ntl_ulong *qtop = &qp[sq-1]; + + + while (1) { + if (atop[0] & (1UL << posa)) { + qtop[0] |= (1UL << posq); + stab_top = &F.stab1[posa << 1]; + i = F.stab_cnt[posa]; + atop[i] ^= stab_top[0]; + atop[i+1] ^= stab_top[1]; + } + + da--; + if (da < n) break; + + posa--; + if (posa < 0) { + posa = NTL_BITS_PER_LONG-1; + atop--; + } + + posq--; + if (posq < 0) { + posq = NTL_BITS_PER_LONG-1; + qtop--; + } + } + + long sn = F.size; + r.xrep.SetLength(sn); + if (&r != &a) { + _ntl_ulong *rp = r.xrep.elts(); + for (i = 0; i < sn; i++) + rp[i] = ap[i]; + } + r.xrep[sn-1] &= F.msk; + r.normalize(); + } + else { + NTL_TLS_GLOBAL_ACCESS(GF2X_rembuf); + WordVectorWatcher watch_GF2X_rembuf(GF2X_rembuf); + + long sa = a.xrep.length(); + long posa = da - NTL_BITS_PER_LONG*(sa-1); + + long dq = da - n; + long sq = dq/NTL_BITS_PER_LONG + 1; + long posq = dq - NTL_BITS_PER_LONG*(sq-1); + + _ntl_ulong *ap; + if (&r == &a) + ap = r.xrep.elts(); + else { + GF2X_rembuf = a.xrep; + ap = GF2X_rembuf.elts(); + } + + _ntl_ulong *atop = &ap[sa-1]; + _ntl_ulong *stab_top; + + long i; + + q.xrep.SetLength(sq); + _ntl_ulong *qp = q.xrep.elts(); + for (i = 0; i < sq; i++) + qp[i] = 0; + + _ntl_ulong *qtop = &qp[sq-1]; + + while (1) { + if (atop[0] & (1UL << posa)) { + qtop[0] |= (1UL << posq); + stab_top = F.stab_ptr[posa]; + for (i = F.stab_cnt[posa]; i <= 0; i++) + atop[i] ^= stab_top[i]; + } + + da--; + if (da < n) break; + + posa--; + if (posa < 0) { + posa = NTL_BITS_PER_LONG-1; + atop--; + } + + posq--; + if (posq < 0) { + posq = NTL_BITS_PER_LONG-1; + qtop--; + } + } + + long sn = F.size; + r.xrep.SetLength(sn); + if (&r != &a) { + _ntl_ulong *rp = r.xrep.elts(); + for (i = 0; i < sn; i++) + rp[i] = ap[i]; + } + r.normalize(); + } +} + + + +void div(GF2X& q, const GF2X& a, const GF2XModulus& F) +{ + long da = deg(a); + long n = F.n; + + if (n < 0) LogicError("div: uninitialized modulus"); + + + if (da < n) { + clear(q); + } + else if (F.method == GF2X_MOD_TRI) { + if (da <= 2*(n-1)) + TriDiv21(q, a, F.n, F.k3); + else + TriDivX1(q, a, F.n, F.k3); + } + else if (F.method == GF2X_MOD_PENT) { + if (da <= 2*(n-1)) + PentDiv21(q, a, F.n, F.k3, F.k2, F.k1); + else + PentDivX1(q, a, F.n, F.k3, F.k2, F.k1); + } + else if (F.method == GF2X_MOD_MUL) { + if (da <= 2*(n-1)) + UseMulDiv21(q, a, F); + else + UseMulDivX1(q, a, F); + } + else if (F.method == GF2X_MOD_SPECIAL) { + NTL_TLS_GLOBAL_ACCESS(GF2X_rembuf); + WordVectorWatcher watch_GF2X_rembuf(GF2X_rembuf); + + long sa = a.xrep.length(); + long posa = da - NTL_BITS_PER_LONG*(sa-1); + + long dq = da - n; + long sq = dq/NTL_BITS_PER_LONG + 1; + long posq = dq - NTL_BITS_PER_LONG*(sq-1); + + _ntl_ulong *ap; + GF2X_rembuf = a.xrep; + ap = GF2X_rembuf.elts(); + + + _ntl_ulong *atop = &ap[sa-1]; + _ntl_ulong *stab_top; + + long i; + + q.xrep.SetLength(sq); + _ntl_ulong *qp = q.xrep.elts(); + for (i = 0; i < sq; i++) + qp[i] = 0; + + _ntl_ulong *qtop = &qp[sq-1]; + + while (1) { + if (atop[0] & (1UL << posa)) { + qtop[0] |= (1UL << posq); + stab_top = &F.stab1[posa << 1]; + i = F.stab_cnt[posa]; + atop[i] ^= stab_top[0]; + atop[i+1] ^= stab_top[1]; + } + + da--; + if (da < n) break; + + posa--; + if (posa < 0) { + posa = NTL_BITS_PER_LONG-1; + atop--; + } + + posq--; + if (posq < 0) { + posq = NTL_BITS_PER_LONG-1; + qtop--; + } + } + } + else { + NTL_TLS_GLOBAL_ACCESS(GF2X_rembuf); + WordVectorWatcher watch_GF2X_rembuf(GF2X_rembuf); + + long sa = a.xrep.length(); + long posa = da - NTL_BITS_PER_LONG*(sa-1); + + long dq = da - n; + long sq = dq/NTL_BITS_PER_LONG + 1; + long posq = dq - NTL_BITS_PER_LONG*(sq-1); + + _ntl_ulong *ap; + GF2X_rembuf = a.xrep; + ap = GF2X_rembuf.elts(); + + _ntl_ulong *atop = &ap[sa-1]; + _ntl_ulong *stab_top; + + long i; + + q.xrep.SetLength(sq); + _ntl_ulong *qp = q.xrep.elts(); + for (i = 0; i < sq; i++) + qp[i] = 0; + + _ntl_ulong *qtop = &qp[sq-1]; + + while (1) { + if (atop[0] & (1UL << posa)) { + qtop[0] |= (1UL << posq); + stab_top = F.stab_ptr[posa]; + for (i = F.stab_cnt[posa]; i <= 0; i++) + atop[i] ^= stab_top[i]; + } + + da--; + if (da < n) break; + + posa--; + if (posa < 0) { + posa = NTL_BITS_PER_LONG-1; + atop--; + } + + posq--; + if (posq < 0) { + posq = NTL_BITS_PER_LONG-1; + qtop--; + } + } + } +} + + +void MulMod(GF2X& c, const GF2X& a, const GF2X& b, const GF2XModulus& F) +{ + if (F.n < 0) LogicError("MulMod: uninitialized modulus"); + + NTL_GF2XRegister(t); + mul(t, a, b); + rem(c, t, F); +} + + +void SqrMod(GF2X& c, const GF2X& a, const GF2XModulus& F) +{ + if (F.n < 0) LogicError("SqrMod: uninitialized modulus"); + + NTL_GF2XRegister(t); + sqr(t, a); + rem(c, t, F); +} + + +// we need these two versions to prevent a GF2XModulus +// from being constructed. + + +void MulMod(GF2X& c, const GF2X& a, const GF2X& b, const GF2X& f) +{ + NTL_GF2XRegister(t); + mul(t, a, b); + rem(c, t, f); +} + +void SqrMod(GF2X& c, const GF2X& a, const GF2X& f) +{ + NTL_GF2XRegister(t); + sqr(t, a); + rem(c, t, f); +} + + +static +long OptWinSize(long n) +// finds k that minimizes n/(k+1) + 2^{k-1} + +{ + long k; + double v, v_new; + + + v = n/2.0 + 1.0; + k = 1; + + for (;;) { + v_new = n/(double(k+2)) + double(1L << k); + if (v_new >= v) break; + v = v_new; + k++; + } + + return k; +} + + + +void PowerMod(GF2X& h, const GF2X& g, const ZZ& e, const GF2XModulus& F) +// h = g^e mod f using "sliding window" algorithm +{ + if (deg(g) >= F.n) LogicError("PowerMod: bad args"); + + if (e == 0) { + set(h); + return; + } + + if (e == 1) { + h = g; + return; + } + + if (e == -1) { + InvMod(h, g, F); + return; + } + + if (e == 2) { + SqrMod(h, g, F); + return; + } + + if (e == -2) { + SqrMod(h, g, F); + InvMod(h, h, F); + return; + } + + + long n = NumBits(e); + + GF2X res; + res.SetMaxLength(F.n); + set(res); + + long i; + + if (n < 16) { + // plain square-and-multiply algorithm + + for (i = n - 1; i >= 0; i--) { + SqrMod(res, res, F); + if (bit(e, i)) + MulMod(res, res, g, F); + } + + if (e < 0) InvMod(res, res, F); + + h = res; + return; + } + + long k = OptWinSize(n); + + k = min(k, 9); + + vec_GF2X v; + + v.SetLength(1L << (k-1)); + + v[0] = g; + + if (k > 1) { + GF2X t; + SqrMod(t, g, F); + + for (i = 1; i < (1L << (k-1)); i++) + MulMod(v[i], v[i-1], t, F); + } + + + long val; + long cnt; + long m; + + val = 0; + for (i = n-1; i >= 0; i--) { + val = (val << 1) | bit(e, i); + if (val == 0) + SqrMod(res, res, F); + else if (val >= (1L << (k-1)) || i == 0) { + cnt = 0; + while ((val & 1) == 0) { + val = val >> 1; + cnt++; + } + + m = val; + while (m > 0) { + SqrMod(res, res, F); + m = m >> 1; + } + + MulMod(res, res, v[val >> 1], F); + + while (cnt > 0) { + SqrMod(res, res, F); + cnt--; + } + + val = 0; + } + } + + if (e < 0) InvMod(res, res, F); + + h = res; +} + + + + +void PowerXMod(GF2X& hh, const ZZ& e, const GF2XModulus& F) +{ + if (F.n < 0) LogicError("PowerXMod: uninitialized modulus"); + + if (IsZero(e)) { + set(hh); + return; + } + + long n = NumBits(e); + long i; + + GF2X h; + + h.SetMaxLength(F.n+1); + set(h); + + for (i = n - 1; i >= 0; i--) { + SqrMod(h, h, F); + if (bit(e, i)) { + MulByX(h, h); + if (coeff(h, F.n) != 0) + add(h, h, F.f); + } + } + + if (e < 0) InvMod(h, h, F); + + hh = h; +} + + + + + +void UseMulRem(GF2X& r, const GF2X& a, const GF2X& b) +{ + NTL_GF2XRegister(P1); + NTL_GF2XRegister(P2); + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + mul(P1, P2, b); + add(P1, P1, a); + + r = P1; +} + +void UseMulDivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2X& b) +{ + NTL_GF2XRegister(P1); + NTL_GF2XRegister(P2); + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + mul(P1, P2, b); + add(P1, P1, a); + + r = P1; + q = P2; +} + +void UseMulDiv(GF2X& q, const GF2X& a, const GF2X& b) +{ + NTL_GF2XRegister(P1); + NTL_GF2XRegister(P2); + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + + q = P2; +} + + +const long GF2X_DIV_CROSS = (NTL_BITS_PER_LONG/2)*XOVER_SCALE; + +void DivRem(GF2X& q, GF2X& r, const GF2X& a, const GF2X& b) +{ + long da = deg(a); + long db = deg(b); + + if (db < GF2X_DIV_CROSS || da-db < GF2X_DIV_CROSS) + PlainDivRem(q, r, a, b); + else if (da < 4*db) + UseMulDivRem(q, r, a, b); + else { + GF2XModulus B; + build(B, b); + DivRem(q, r, a, B); + } +} + +void div(GF2X& q, const GF2X& a, const GF2X& b) +{ + long da = deg(a); + long db = deg(b); + + if (db < GF2X_DIV_CROSS || da-db < GF2X_DIV_CROSS) + PlainDiv(q, a, b); + else if (da < 4*db) + UseMulDiv(q, a, b); + else { + GF2XModulus B; + build(B, b); + div(q, a, B); + } +} + +void rem(GF2X& r, const GF2X& a, const GF2X& b) +{ + long da = deg(a); + long db = deg(b); + + if (db < GF2X_DIV_CROSS || da-db < GF2X_DIV_CROSS) + PlainRem(r, a, b); + else if (da < 4*db) + UseMulRem(r, a, b); + else { + GF2XModulus B; + build(B, b); + rem(r, a, B); + } +} + + +static inline +void swap(_ntl_ulong_ptr& a, _ntl_ulong_ptr& b) +{ _ntl_ulong_ptr t; t = a; a = b; b = t; } + + + + +static +void BaseGCD(GF2X& d, const GF2X& a_in, const GF2X& b_in) +{ + NTL_GF2XRegister(a); + NTL_GF2XRegister(b); + + if (IsZero(a_in)) { + d = b_in; + return; + } + + if (IsZero(b_in)) { + d = a_in; + return; + } + + a.xrep.SetMaxLength(a_in.xrep.length()+1); + b.xrep.SetMaxLength(b_in.xrep.length()+1); + + a = a_in; + b = b_in; + + _ntl_ulong *ap = a.xrep.elts(); + _ntl_ulong *bp = b.xrep.elts(); + + long da = deg(a); + long wa = da/NTL_BITS_PER_LONG; + long ba = da - wa*NTL_BITS_PER_LONG; + + long db = deg(b); + long wb = db/NTL_BITS_PER_LONG; + long bb = db - wb*NTL_BITS_PER_LONG; + + long parity = 0; + + for (;;) { + if (da < db) { + swap(ap, bp); + swap(da, db); + swap(wa, wb); + swap(ba, bb); + parity = 1 - parity; + } + + // da >= db + + if (db == -1) break; + + ShiftAdd(ap, bp, wb+1, da-db); + + _ntl_ulong msk = 1UL << ba; + _ntl_ulong aa = ap[wa]; + + while ((aa & msk) == 0) { + da--; + msk = msk >> 1; + ba--; + if (!msk) { + wa--; + ba = NTL_BITS_PER_LONG-1; + msk = 1UL << (NTL_BITS_PER_LONG-1); + if (wa < 0) break; + aa = ap[wa]; + } + } + } + + a.normalize(); + b.normalize(); + + if (!parity) { + d = a; + } + else { + d = b; + } +} + + +void OldGCD(GF2X& d, const GF2X& a, const GF2X& b) +{ + long sa = a.xrep.length(); + long sb = b.xrep.length(); + + if (sb >= 10 && 2*sa > 3*sb) { + NTL_GF2XRegister(r); + + rem(r, a, b); + BaseGCD(d, b, r); + } + else if (sa >= 10 && 2*sb > 3*sa) { + NTL_GF2XRegister(r); + + rem(r, b, a); + BaseGCD(d, a, r); + } + else { + BaseGCD(d, a, b); + } +} + + + + + +#define XX_STEP(ap,da,wa,ba,rp,sr,bp,db,wb,bb,sp,ss) \ + long delta = da-db; \ + \ + if (delta == 0) { \ + long i; \ + for (i = wb; i >= 0; i--) ap[i] ^= bp[i]; \ + for (i = ss-1; i >= 0; i--) rp[i] ^= sp[i]; \ + if (ss > sr) sr = ss; \ + } \ + else if (delta == 1) { \ + long i; \ + _ntl_ulong tt, tt1; \ + \ + tt = bp[wb] >> (NTL_BITS_PER_LONG-1); \ + if (tt) ap[wb+1] ^= tt; \ + tt = bp[wb]; \ + for (i = wb; i >= 1; i--) \ + tt1 = bp[i-1], ap[i] ^= (tt << 1) | (tt1 >> (NTL_BITS_PER_LONG-1)), \ + tt = tt1; \ + ap[0] ^= tt << 1; \ + \ + if (ss > 0) { \ + long t = ss; \ + tt = sp[ss-1] >> (NTL_BITS_PER_LONG-1); \ + if (tt) rp[ss] ^= tt, t++; \ + tt = sp[ss-1]; \ + for (i = ss-1; i >= 1; i--) \ + tt1=sp[i-1], \ + rp[i] ^= (tt << 1) | (tt1 >> (NTL_BITS_PER_LONG-1)), \ + tt = tt1; \ + rp[0] ^= tt << 1; \ + if (t > sr) sr = t; \ + } \ + } \ + else if (delta < NTL_BITS_PER_LONG) { \ + long i; \ + _ntl_ulong tt, tt1; \ + long rdelta = NTL_BITS_PER_LONG-delta; \ + \ + tt = bp[wb] >> rdelta; \ + if (tt) ap[wb+1] ^= tt; \ + tt=bp[wb]; \ + for (i = wb; i >= 1; i--) \ + tt1=bp[i-1], ap[i] ^= (tt << delta) | (tt1 >> rdelta), \ + tt=tt1; \ + ap[0] ^= tt << delta; \ + \ + if (ss > 0) { \ + long t = ss; \ + tt = sp[ss-1] >> rdelta; \ + if (tt) rp[ss] ^= tt, t++; \ + tt=sp[ss-1]; \ + for (i = ss-1; i >= 1; i--) \ + tt1=sp[i-1], rp[i] ^= (tt << delta) | (tt1 >> rdelta), \ + tt=tt1; \ + rp[0] ^= tt << delta; \ + if (t > sr) sr = t; \ + } \ + } \ + else { \ + ShiftAdd(ap, bp, wb+1, da-db); \ + ShiftAdd(rp, sp, ss, da-db); \ + long t = ss + (da-db+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; \ + if (t > sr) { \ + while (t > 0 && rp[t-1] == 0) t--; \ + sr = t; \ + } \ + } \ + \ + _ntl_ulong msk = 1UL << ba; \ + _ntl_ulong aa = ap[wa]; \ + \ + while ((aa & msk) == 0) { \ + da--; \ + msk = msk >> 1; \ + ba--; \ + if (!msk) { \ + wa--; \ + ba = NTL_BITS_PER_LONG-1; \ + msk = 1UL << (NTL_BITS_PER_LONG-1); \ + if (wa < 0) break; \ + aa = ap[wa]; \ + } \ + } \ + + + + +static +void XXGCD(GF2X& d, GF2X& r_out, const GF2X& a_in, const GF2X& b_in) +{ + NTL_GF2XRegister(a); + NTL_GF2XRegister(b); + NTL_GF2XRegister(r); + NTL_GF2XRegister(s); + + if (IsZero(b_in)) { + d = a_in; + set(r_out); + return; + } + + if (IsZero(a_in)) { + d = b_in; + clear(r_out); + return; + } + + a.xrep.SetMaxLength(a_in.xrep.length()+1); + b.xrep.SetMaxLength(b_in.xrep.length()+1); + + long max_sz = max(a_in.xrep.length(), b_in.xrep.length()); + r.xrep.SetLength(max_sz+1); + s.xrep.SetLength(max_sz+1); + + _ntl_ulong *rp = r.xrep.elts(); + _ntl_ulong *sp = s.xrep.elts(); + + long i; + for (i = 0; i <= max_sz; i++) { + rp[i] = sp[i] = 0; + } + + rp[0] = 1; + + long sr = 1; + long ss = 0; + + a = a_in; + b = b_in; + + _ntl_ulong *ap = a.xrep.elts(); + _ntl_ulong *bp = b.xrep.elts(); + + long da = deg(a); + long wa = da/NTL_BITS_PER_LONG; + long ba = da - wa*NTL_BITS_PER_LONG; + + long db = deg(b); + long wb = db/NTL_BITS_PER_LONG; + long bb = db - wb*NTL_BITS_PER_LONG; + + long parity = 0; + + + for (;;) { + if (da == -1 || db == -1) break; + + if (da < db || (da == db && parity)) { + if (da < db && !parity) parity = 1; + XX_STEP(bp,db,wb,bb,sp,ss,ap,da,wa,ba,rp,sr) + + } + else { + parity = 0; + XX_STEP(ap,da,wa,ba,rp,sr,bp,db,wb,bb,sp,ss) + } + } + + a.normalize(); + b.normalize(); + r.normalize(); + s.normalize(); + + if (db == -1) { + d = a; + r_out = r; + } + else { + d = b; + r_out = s; + } +} + + + +static +void BaseXGCD(GF2X& d, GF2X& s, GF2X& t, const GF2X& a, const GF2X& b) +{ + if (IsZero(b)) { + d = a; + set(s); + clear(t); + } + else { + NTL_GF2XRegister(t1); + NTL_GF2XRegister(b1); + + b1 = b; + XXGCD(d, s, a, b); + mul(t1, a, s); + add(t1, t1, d); + div(t, t1, b1); + } +} + + + + +void OldXGCD(GF2X& d, GF2X& s, GF2X& t, const GF2X& a, const GF2X& b) +{ + long sa = a.xrep.length(); + long sb = b.xrep.length(); + + + if (sb >= 10 && 2*sa > 3*sb) { + NTL_GF2XRegister(r); + NTL_GF2XRegister(q); + NTL_GF2XRegister(s1); + NTL_GF2XRegister(t1); + + + DivRem(q, r, a, b); + BaseXGCD(d, s1, t1, b, r); + + + mul(r, t1, q); + add(r, r, s1); // r = s1 - t1*q, but sign doesn't matter + + s = t1; + t = r; + } + else if (sa >= 10 && 2*sb > 3*sa) { + NTL_GF2XRegister(r); + NTL_GF2XRegister(q); + NTL_GF2XRegister(s1); + NTL_GF2XRegister(t1); + + + DivRem(q, r, b, a); + BaseXGCD(d, s1, t1, a, r); + + + mul(r, t1, q); + add(r, r, s1); // r = s1 - t1*q, but sign doesn't matter + + t = t1; + s = r; + } + else { + BaseXGCD(d, s, t, a, b); + } + +} + + + + +static +void BaseInvMod(GF2X& d, GF2X& s, const GF2X& a, const GF2X& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvMod: bad args"); + + long sa = a.xrep.length(); + long sf = f.xrep.length(); + + if ((sa >= 10 && 2*sf > 3*sa) || + sf > NTL_GF2X_GCD_CROSSOVER/NTL_BITS_PER_LONG) { + NTL_GF2XRegister(t); + + XGCD(d, s, t, a, f); + } + else { + XXGCD(d, s, a, f); + } + +} + + + +void InvMod(GF2X& c, const GF2X& a, const GF2X& f) +{ + NTL_GF2XRegister(d); + NTL_GF2XRegister(s); + BaseInvMod(d, s, a, f); + + if (!IsOne(d)) InvModError("InvMod: inverse undefined"); + + c = s; +} + + + +long InvModStatus(GF2X& c, const GF2X& a, const GF2X& f) +{ + NTL_GF2XRegister(d); + NTL_GF2XRegister(s); + BaseInvMod(d, s, a, f); + + if (!IsOne(d)) { + c = d; + return 1; + } + + c = s; + return 0; +} + + + + +void diff(GF2X& c, const GF2X& a) +{ + RightShift(c, a, 1); + + // clear odd coeffs + + long dc = deg(c); + long i; + for (i = 1; i <= dc; i += 2) + SetCoeff(c, i, 0); +} + +void conv(GF2X& c, long a) +{ + if (a & 1) + set(c); + else + clear(c); +} + +void conv(GF2X& c, GF2 a) +{ + if (a == 1) + set(c); + else + clear(c); +} + +void conv(GF2X& x, const vec_GF2& a) +{ + x.xrep = a.rep; + x.normalize(); +} + +void conv(vec_GF2& x, const GF2X& a) +{ + VectorCopy(x, a, deg(a)+1); +} + + + +/* additional legacy conversions for v6 conversion regime */ + +#ifndef NTL_WIZARD_HACK +void conv(GF2X& x, const ZZX& a) +{ + long n = deg(a) + 1; + long i; + + x.SetLength(n); + for (i = 0; i < n; i++) + conv(x[i], a[i]); + x.normalize(); +} + +void conv(ZZX& x, const GF2X& a) +{ + long n = deg(a) + 1; + long i; + + x.rep.SetLength(n); + for (i = 0; i < n; i++) + x.rep[i] = rep(coeff(a, i)); + + x.normalize(); +} +#endif + +/* ------------------------------------- */ + +void VectorCopy(vec_GF2& x, const GF2X& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in VectorCopy"); + + long wa = a.xrep.length(); + long wx = (n + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + + long wmin = min(wa, wx); + + x.SetLength(n); + + const _ntl_ulong *ap = a.xrep.elts(); + _ntl_ulong *xp = x.rep.elts(); + + long i; + for (i = 0; i < wmin; i++) + xp[i] = ap[i]; + + if (wa < wx) { + for (i = wa; i < wx; i++) + xp[i] = 0; + } + else { + long p = n % NTL_BITS_PER_LONG; + if (p != 0) + xp[wx-1] &= (1UL << p) - 1UL; + } +} + + +void add(GF2X& c, const GF2X& a, long b) +{ + c = a; + if (b & 1) { + long n = c.xrep.length(); + if (n == 0) + set(c); + else { + c.xrep[0] ^= 1; + if (n == 1 && !c.xrep[0]) c.xrep.SetLength(0); + } + } +} + +void add(GF2X& c, const GF2X& a, GF2 b) +{ + add(c, a, rep(b)); +} + + +void MulTrunc(GF2X& c, const GF2X& a, const GF2X& b, long n) +{ + NTL_GF2XRegister(t); + + mul(t, a, b); + trunc(c, t, n); +} + +void SqrTrunc(GF2X& c, const GF2X& a, long n) +{ + NTL_GF2XRegister(t); + + sqr(t, a); + trunc(c, t, n); +} + + +long divide(GF2X& q, const GF2X& a, const GF2X& b) +{ + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + NTL_GF2XRegister(lq); + NTL_GF2XRegister(r); + + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + q = lq; + return 1; +} + +long divide(const GF2X& a, const GF2X& b) +{ + if (IsZero(b)) return IsZero(a); + NTL_GF2XRegister(r); + rem(r, a, b); + if (!IsZero(r)) return 0; + return 1; +} + + + +/*** modular composition routines and data structures ***/ + + +void InnerProduct(GF2X& x, const GF2X& v, long dv, long low, long high, + const vec_GF2X& H, long n, WordVector& t) +{ + long i, j; + + _ntl_ulong *tp = t.elts(); + + for (i = 0; i < n; i++) + tp[i] = 0; + + + long w_low = low/NTL_BITS_PER_LONG; + long b_low = low - w_low*NTL_BITS_PER_LONG; + + + const _ntl_ulong *vp = &v.xrep[w_low]; + _ntl_ulong msk = 1UL << b_low; + _ntl_ulong vv = *vp; + + high = min(high, dv); + + i = low; + for (;;) { + if (vv & msk) { + const WordVector& h = H[i-low].xrep; + long m = h.length(); + const _ntl_ulong *hp = h.elts(); + for (j = 0; j < m; j++) + tp[j] ^= hp[j]; + } + + i++; + if (i > high) break; + + msk = msk << 1; + if (!msk) { + msk = 1UL; + vp++; + vv = *vp; + } + } + + x.xrep = t; + x.normalize(); +} + + +void CompMod(GF2X& x, const GF2X& g, const GF2XArgument& A, const GF2XModulus& F) +{ + long dg = deg(g); + if (dg <= 0) { + x = g; + return; + } + + GF2X s, t; + WordVector scratch(INIT_SIZE, F.size); + + long m = A.H.length() - 1; + long l = (((dg+1)+m-1)/m) - 1; + + InnerProduct(t, g, dg, l*m, l*m + m - 1, A.H, F.size, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g, dg, i*m, i*m + m - 1, A.H, F.size, scratch); + MulMod(t, t, A.H[m], F); + add(t, t, s); + } + + x = t; +} + +void build(GF2XArgument& A, const GF2X& h, const GF2XModulus& F, long m) +{ + if (m <= 0 || deg(h) >= F.n) LogicError("build GF2XArgument: bad args"); + + if (m > F.n) m = F.n; + + long i; + + A.H.SetLength(m+1); + + set(A.H[0]); + A.H[1] = h; + for (i = 2; i <= m; i++) + MulMod(A.H[i], A.H[i-1], h, F); +} + + +void CompMod(GF2X& x, const GF2X& g, const GF2X& h, const GF2XModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(deg(g)+1); + + if (m == 0) { + clear(x); + return; + } + + GF2XArgument A; + + build(A, h, F, m); + + CompMod(x, g, A, F); +} + + + + +void Comp2Mod(GF2X& x1, GF2X& x2, const GF2X& g1, const GF2X& g2, + const GF2X& h, const GF2XModulus& F) + +{ + long m = SqrRoot(deg(g1) + deg(g2) + 2); + + if (m == 0) { + clear(x1); + clear(x2); + return; + } + + GF2XArgument A; + + build(A, h, F, m); + + GF2X xx1, xx2; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + + x1 = xx1; + x2 = xx2; +} + +void Comp3Mod(GF2X& x1, GF2X& x2, GF2X& x3, + const GF2X& g1, const GF2X& g2, const GF2X& g3, + const GF2X& h, const GF2XModulus& F) + +{ + long m = SqrRoot(deg(g1) + deg(g2) + deg(g3) + 3); + + if (m == 0) { + clear(x1); + clear(x2); + clear(x3); + return; + } + + GF2XArgument A; + + build(A, h, F, m); + + GF2X xx1, xx2, xx3; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + CompMod(xx3, g3, A, F); + + x1 = xx1; + x2 = xx2; + x3 = xx3; +} + + + +void build(GF2XTransMultiplier& B, const GF2X& b, const GF2XModulus& F) +{ + long db = deg(b); + + if (db >= F.n) LogicError("build TransMultiplier: bad args"); + + GF2X t; + + LeftShift(t, b, F.n-1); + div(t, t, F); + + // we optimize for low degree b + + long d; + + d = deg(t); + if (d < 0) + B.shamt_fbi = 0; + else + B.shamt_fbi = F.n-2 - d; + + CopyReverse(B.fbi, t, d); + + if (F.method != GF2X_MOD_TRI && F.method != GF2X_MOD_PENT) { + + // The following code optimizes the case when + // f = X^n + low degree poly + + trunc(t, F.f, F.n); + d = deg(t); + if (d < 0) + B.shamt = 0; + else + B.shamt = d; + + CopyReverse(B.f0, t, d); + } + + + if (db < 0) + B.shamt_b = 0; + else + B.shamt_b = db; + + CopyReverse(B.b, b, db); +} + +void TransMulMod(GF2X& x, const GF2X& a, const GF2XTransMultiplier& B, + const GF2XModulus& F) +{ + if (deg(a) >= F.n) LogicError("TransMulMod: bad args"); + + NTL_GF2XRegister(t1); + NTL_GF2XRegister(t2); + NTL_GF2XRegister(t3); + + mul(t1, a, B.b); + RightShift(t1, t1, B.shamt_b); + + if (F.method == GF2X_MOD_TRI) { + RightShift(t2, a, F.k3); + add(t2, t2, a); + } + else if (F.method == GF2X_MOD_PENT) { + RightShift(t2, a, F.k3); + RightShift(t3, a, F.k2); + add(t2, t2, t3); + RightShift(t3, a, F.k1); + add(t2, t2, t3); + add(t2, t2, a); + } + else { + mul(t2, a, B.f0); + RightShift(t2, t2, B.shamt); + } + + trunc(t2, t2, F.n-1); + + mul(t2, t2, B.fbi); + if (B.shamt_fbi > 0) LeftShift(t2, t2, B.shamt_fbi); + trunc(t2, t2, F.n-1); + MulByX(t2, t2); + + add(x, t1, t2); +} + +void UpdateMap(vec_GF2& x, const vec_GF2& a, const GF2XTransMultiplier& B, + const GF2XModulus& F) +{ + NTL_GF2XRegister(xx); + NTL_GF2XRegister(aa); + conv(aa, a); + TransMulMod(xx, aa, B, F); + conv(x, xx); +} + + +void ProjectPowers(GF2X& x, const GF2X& a, long k, const GF2XArgument& H, + const GF2XModulus& F) +{ + long n = F.n; + + if (deg(a) >= n || k < 0) + LogicError("ProjectPowers: bad args"); + + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive parameter"); + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + GF2XTransMultiplier M; + build(M, H.H[m], F); + + GF2X s; + s = a; + + x.SetMaxLength(k); + clear(x); + + long i; + + for (i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + for (long j = 0; j < m1; j++) + SetCoeff(x, i*m+j, InnerProduct(H.H[j].xrep, s.xrep)); + if (i < l) + TransMulMod(s, s, M, F); + } +} + + +void ProjectPowers(vec_GF2& x, const vec_GF2& a, long k, + const GF2XArgument& H, const GF2XModulus& F) +{ + GF2X xx; + ProjectPowers(xx, to_GF2X(a), k, H, F); + VectorCopy(x, xx, k); +} + + +void ProjectPowers(GF2X& x, const GF2X& a, long k, const GF2X& h, + const GF2XModulus& F) +{ + if (deg(a) >= F.n || k < 0) LogicError("ProjectPowers: bad args"); + + if (k == 0) { + clear(x); + return; + } + + long m = SqrRoot(k); + + GF2XArgument H; + build(H, h, F, m); + + ProjectPowers(x, a, k, H, F); +} + +void ProjectPowers(vec_GF2& x, const vec_GF2& a, long k, const GF2X& H, + const GF2XModulus& F) +{ + GF2X xx; + ProjectPowers(xx, to_GF2X(a), k, H, F); + VectorCopy(x, xx, k); +} + + +void OldMinPolyInternal(GF2X& h, const GF2X& x, long m) +{ + GF2X a, b, r, s; + GF2X a_in, b_in; + + if (IsZero(x)) { + set(h); + return; + } + + clear(a_in); + SetCoeff(a_in, 2*m); + + CopyReverse(b_in, x, 2*m-1); + + a.xrep.SetMaxLength(a_in.xrep.length()+1); + b.xrep.SetMaxLength(b_in.xrep.length()+1); + + long max_sz = max(a_in.xrep.length(), b_in.xrep.length()); + r.xrep.SetLength(max_sz+1); + s.xrep.SetLength(max_sz+1); + + _ntl_ulong *rp = r.xrep.elts(); + _ntl_ulong *sp = s.xrep.elts(); + + long i; + for (i = 0; i <= max_sz; i++) { + rp[i] = sp[i] = 0; + } + + sp[0] = 1; + + long sr = 0; + long ss = 1; + + a = a_in; + b = b_in; + + _ntl_ulong *ap = a.xrep.elts(); + _ntl_ulong *bp = b.xrep.elts(); + + long da = deg(a); + long wa = da/NTL_BITS_PER_LONG; + long ba = da - wa*NTL_BITS_PER_LONG; + + long db = deg(b); + long wb = db/NTL_BITS_PER_LONG; + long bb = db - wb*NTL_BITS_PER_LONG; + + long parity = 0; + + for (;;) { + if (da < db) { + swap(ap, bp); + swap(da, db); + swap(wa, wb); + swap(ba, bb); + parity = 1 - parity; + + swap(rp, sp); + swap(sr, ss); + } + + // da >= db + + if (db < m) break; + + ShiftAdd(ap, bp, wb+1, da-db); + ShiftAdd(rp, sp, ss, da-db); + long t = ss + (da-db+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; + if (t > sr) { + while (t > 0 && rp[t-1] == 0) t--; + sr = t; + } + + _ntl_ulong msk = 1UL << ba; + _ntl_ulong aa = ap[wa]; + + while ((aa & msk) == 0) { + da--; + msk = msk >> 1; + ba--; + if (!msk) { + wa--; + ba = NTL_BITS_PER_LONG-1; + msk = 1UL << (NTL_BITS_PER_LONG-1); + if (wa < 0) break; + aa = ap[wa]; + } + } + } + + a.normalize(); + b.normalize(); + r.normalize(); + s.normalize(); + + if (!parity) { + h = s; + } + else { + h = r; + } +} + + +void DoMinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m, + const GF2X& R) +{ + GF2X x; + + ProjectPowers(x, R, 2*m, g, F); + MinPolyInternal(h, x, m); +} + +void MinPolySeq(GF2X& h, const vec_GF2& a, long m) +{ + if (m < 0 || NTL_OVERFLOW(m, 1, 0)) LogicError("MinPoly: bad args"); + if (a.length() < 2*m) LogicError("MinPoly: sequence too short"); + GF2X x; + x.xrep = a.rep; + x.normalize(); + MinPolyInternal(h, x, m); +} + +void ProbMinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m) +{ + long n = F.n; + if (m < 1 || m > n) LogicError("ProbMinPoly: bad args"); + + GF2X R; + random(R, n); + + DoMinPolyMod(h, g, F, m, R); +} + +void ProbMinPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F) +{ + ProbMinPolyMod(h, g, F, F.n); +} + +void MinPolyMod(GF2X& hh, const GF2X& g, const GF2XModulus& F, long m) +{ + GF2X h, h1; + long n = F.n; + if (m < 1 || m > n) LogicError("MinPoly: bad args"); + + /* probabilistically compute min-poly */ + + ProbMinPolyMod(h, g, F, m); + if (deg(h) == m) { hh = h; return; } + CompMod(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + + GF2X h2, h3; + GF2X R; + GF2XTransMultiplier H1; + + + for (;;) { + random(R, n); + build(H1, h1, F); + TransMulMod(R, R, H1, F); + DoMinPolyMod(h2, g, F, m-deg(h), R); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompMod(h3, h2, g, F); + MulMod(h1, h3, h1, F); + if (IsZero(h1)) { hh = h; return; } + } +} + +void IrredPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F, long m) +{ + if (m < 1 || m > F.n) LogicError("IrredPoly: bad args"); + + GF2X R; + set(R); + + DoMinPolyMod(h, g, F, m, R); +} + + + +void IrredPolyMod(GF2X& h, const GF2X& g, const GF2XModulus& F) +{ + IrredPolyMod(h, g, F, F.n); +} + + + +void MinPolyMod(GF2X& hh, const GF2X& g, const GF2XModulus& F) +{ + MinPolyMod(hh, g, F, F.n); +} + + + +void MulByXMod(GF2X& c, const GF2X& a, const GF2XModulus& F) +{ + long da = deg(a); + long df = deg(F); + if (da >= df) LogicError("MulByXMod: bad args"); + + MulByX(c, a); + + if (da >= 0 && da == df-1) + add(c, c, F); +} + +static +void MulByXModAux(GF2X& c, const GF2X& a, const GF2X& f) +{ + long da = deg(a); + long df = deg(f); + if (da >= df) LogicError("MulByXMod: bad args"); + + MulByX(c, a); + + if (da >= 0 && da == df-1) + add(c, c, f); +} + +void MulByXMod(GF2X& h, const GF2X& a, const GF2X& f) +{ + if (&h == &f) { + GF2X hh; + MulByXModAux(hh, a, f); + h = hh; + } + else + MulByXModAux(h, a, f); +} + + + + +void power(GF2X& x, const GF2X& a, long e) +{ + if (e < 0) { + ArithmeticError("power: negative exponent"); + } + + if (e == 0) { + x = 1; + return; + } + + if (a == 0 || a == 1) { + x = a; + return; + } + + long da = deg(a); + + if (da > (NTL_MAX_LONG-1)/e) + ResourceError("overflow in power"); + + GF2X res; + res.SetMaxLength(da*e + 1); + res = 1; + + long k = NumBits(e); + long i; + + for (i = k - 1; i >= 0; i--) { + sqr(res, res); + if (bit(e, i)) + mul(res, res, a); + } + + x = res; +} + + +static +void FastTraceVec(vec_GF2& S, const GF2XModulus& f) +{ + long n = deg(f); + + if (n <= 0) LogicError("TraceVec: bad args"); + + GF2X x = reverse(-LeftShift(reverse(diff(reverse(f)), n-1), n-1)/f, n-1); + + VectorCopy(S, x, n); + S.put(0, to_GF2(n)); +} + +static +void PlainTraceVec(vec_GF2& S, const GF2X& f) +{ + long n = deg(f); + + if (n <= 0) + LogicError("TraceVec: bad args"); + + if (n == 0) { + S.SetLength(0); + return; + } + + GF2X x = reverse(-LeftShift(reverse(diff(reverse(f)), n-1), n-1)/f, n-1); + + VectorCopy(S, x, n); + S.put(0, to_GF2(n)); +} + + +void TraceVec(vec_GF2& S, const GF2X& f) +{ + PlainTraceVec(S, f); +} + +static +void ComputeTraceVec(vec_GF2& S, const GF2XModulus& F) +{ + if (F.method == GF2X_MOD_PLAIN) { + PlainTraceVec(S, F.f); + } + else { + FastTraceVec(S, F); + } +} + +void TraceMod(ref_GF2 x, const GF2X& a, const GF2XModulus& F) +{ + long n = F.n; + + if (deg(a) >= n) + LogicError("trace: bad args"); + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(F.tracevec.val()); + if (!builder()) break; + UniquePtr p; + p.make(); + ComputeTraceVec(*p, F); + builder.move(p); + } while (0); + + project(x, *F.tracevec.val(), a); +} + +void TraceMod(ref_GF2 x, const GF2X& a, const GF2X& f) +{ + if (deg(a) >= deg(f) || deg(f) <= 0) + LogicError("trace: bad args"); + + project(x, TraceVec(f), a); +} + + + +// New versions of GCD, XGCD, and MinPolyInternal +// and support routines + +class _NTL_GF2XMatrix { +private: + + _NTL_GF2XMatrix(const _NTL_GF2XMatrix&); // disable + GF2X elts[2][2]; + +public: + + _NTL_GF2XMatrix() { } + ~_NTL_GF2XMatrix() { } + + void operator=(const _NTL_GF2XMatrix&); + GF2X& operator() (long i, long j) { return elts[i][j]; } + const GF2X& operator() (long i, long j) const { return elts[i][j]; } +}; + + +void _NTL_GF2XMatrix::operator=(const _NTL_GF2XMatrix& M) +{ + elts[0][0] = M.elts[0][0]; + elts[0][1] = M.elts[0][1]; + elts[1][0] = M.elts[1][0]; + elts[1][1] = M.elts[1][1]; +} + + +static +void mul(GF2X& U, GF2X& V, const _NTL_GF2XMatrix& M) +// (U, V)^T = M*(U, V)^T +{ + GF2X t1, t2, t3; + + mul(t1, M(0,0), U); + mul(t2, M(0,1), V); + add(t3, t1, t2); + mul(t1, M(1,0), U); + mul(t2, M(1,1), V); + add(V, t1, t2); + U = t3; +} + + +static +void mul(_NTL_GF2XMatrix& A, _NTL_GF2XMatrix& B, _NTL_GF2XMatrix& C) +// A = B*C, B and C are destroyed +{ + GF2X t1, t2; + + mul(t1, B(0,0), C(0,0)); + mul(t2, B(0,1), C(1,0)); + add(A(0,0), t1, t2); + + mul(t1, B(1,0), C(0,0)); + mul(t2, B(1,1), C(1,0)); + add(A(1,0), t1, t2); + + mul(t1, B(0,0), C(0,1)); + mul(t2, B(0,1), C(1,1)); + add(A(0,1), t1, t2); + + mul(t1, B(1,0), C(0,1)); + mul(t2, B(1,1), C(1,1)); + add(A(1,1), t1, t2); + + long i, j; + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + B(i,j).kill(); + C(i,j).kill(); + } + } +} + +static +void IterHalfGCD(_NTL_GF2XMatrix& M_out, GF2X& U, GF2X& V, long d_red) +{ + M_out(0,0).SetMaxLength(d_red); + M_out(0,1).SetMaxLength(d_red); + M_out(1,0).SetMaxLength(d_red); + M_out(1,1).SetMaxLength(d_red); + + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + long goal = deg(U) - d_red; + + if (deg(V) <= goal) + return; + + GF2X Q, t(INIT_SIZE, d_red); + + while (deg(V) > goal) { + DivRem(Q, U, U, V); + swap(U, V); + + mul(t, Q, M_out(1,0)); + sub(t, M_out(0,0), t); + M_out(0,0) = M_out(1,0); + M_out(1,0) = t; + + mul(t, Q, M_out(1,1)); + sub(t, M_out(0,1), t); + M_out(0,1) = M_out(1,1); + M_out(1,1) = t; + } +} + + + +static +void HalfGCD(_NTL_GF2XMatrix& M_out, const GF2X& U, const GF2X& V, long d_red) +{ + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + return; + } + + + long n = deg(U) - 2*d_red + 2; + if (n < 0) n = 0; + + GF2X U1, V1; + + RightShift(U1, U, n); + RightShift(V1, V, n); + + if (d_red <= NTL_GF2X_HalfGCD_CROSSOVER) { + IterHalfGCD(M_out, U1, V1, d_red); + return; + } + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + _NTL_GF2XMatrix M1; + + HalfGCD(M1, U1, V1, d1); + mul(U1, V1, M1); + + + long d2 = deg(V1) - deg(U) + n + d_red; + + if (IsZero(V1) || d2 <= 0) { + M_out = M1; + return; + } + + + GF2X Q; + _NTL_GF2XMatrix M2; + + DivRem(Q, U1, U1, V1); + swap(U1, V1); + + HalfGCD(M2, U1, V1, d2); + + GF2X t(INIT_SIZE, deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,0)); + sub(t, M1(0,0), t); + swap(M1(0,0), M1(1,0)); + swap(M1(1,0), t); + + t.kill(); + + t.SetMaxLength(deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,1)); + sub(t, M1(0,1), t); + swap(M1(0,1), M1(1,1)); + swap(M1(1,1), t); + + t.kill(); + + mul(M_out, M2, M1); +} + +static +void HalfGCD(GF2X& U, GF2X& V) +{ + long d_red = (deg(U)+1)/2; + + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + return; + } + + long du = deg(U); + + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + _NTL_GF2XMatrix M1; + + HalfGCD(M1, U, V, d1); + mul(U, V, M1); + + long d2 = deg(V) - du + d_red; + + if (IsZero(V) || d2 <= 0) { + return; + } + + M1(0,0).kill(); + M1(0,1).kill(); + M1(1,0).kill(); + M1(1,1).kill(); + + + GF2X Q; + + DivRem(Q, U, U, V); + swap(U, V); + + HalfGCD(M1, U, V, d2); + + mul(U, V, M1); +} + + +void GCD(GF2X& d, const GF2X& u, const GF2X& v) +{ + long su = u.xrep.length(); + long sv = v.xrep.length(); + + if (su <= NTL_GF2X_GCD_CROSSOVER/NTL_BITS_PER_LONG && + sv <= NTL_GF2X_GCD_CROSSOVER/NTL_BITS_PER_LONG) { + OldGCD(d, u, v); + return; + } + + GF2X u1, v1; + + u1 = u; + v1 = v; + + long du1 = deg(u1); + long dv1 = deg(v1); + + if (du1 == dv1) { + if (IsZero(u1)) { + clear(d); + return; + } + + rem(v1, v1, u1); + } + else if (du1 < dv1) { + swap(u1, v1); + du1 = dv1; + } + + // deg(u1) > deg(v1) + + while (du1 >= NTL_GF2X_GCD_CROSSOVER && !IsZero(v1)) { + HalfGCD(u1, v1); + + if (!IsZero(v1)) { + rem(u1, u1, v1); + swap(u1, v1); + } + + du1 = deg(u1); + } + + OldGCD(d, u1, v1); +} + +static +void XHalfGCD(_NTL_GF2XMatrix& M_out, GF2X& U, GF2X& V, long d_red) +{ + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + return; + } + + long du = deg(U); + + if (d_red <= NTL_GF2X_HalfGCD_CROSSOVER) { + IterHalfGCD(M_out, U, V, d_red); + return; + } + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + _NTL_GF2XMatrix M1; + + HalfGCD(M1, U, V, d1); + mul(U, V, M1); + + long d2 = deg(V) - du + d_red; + + if (IsZero(V) || d2 <= 0) { + M_out = M1; + return; + } + + + GF2X Q; + _NTL_GF2XMatrix M2; + + DivRem(Q, U, U, V); + swap(U, V); + + XHalfGCD(M2, U, V, d2); + + + GF2X t(INIT_SIZE, deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,0)); + sub(t, M1(0,0), t); + swap(M1(0,0), M1(1,0)); + swap(M1(1,0), t); + + t.kill(); + + t.SetMaxLength(deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,1)); + sub(t, M1(0,1), t); + swap(M1(0,1), M1(1,1)); + swap(M1(1,1), t); + + t.kill(); + + mul(M_out, M2, M1); +} + + + + +void XGCD(GF2X& d, GF2X& s, GF2X& t, const GF2X& a, const GF2X& b) +{ + // GF2 w; + + long sa = a.xrep.length(); + long sb = b.xrep.length(); + + if (sa <= NTL_GF2X_GCD_CROSSOVER/NTL_BITS_PER_LONG && + sb <= NTL_GF2X_GCD_CROSSOVER/NTL_BITS_PER_LONG) { + OldXGCD(d, s, t, a, b); + return; + } + + GF2X U, V, Q; + + U = a; + V = b; + + long flag = 0; + + if (deg(U) == deg(V)) { + DivRem(Q, U, U, V); + swap(U, V); + flag = 1; + } + else if (deg(U) < deg(V)) { + swap(U, V); + flag = 2; + } + + _NTL_GF2XMatrix M; + + XHalfGCD(M, U, V, deg(U)+1); + + d = U; + + + if (flag == 0) { + s = M(0,0); + t = M(0,1); + } + else if (flag == 1) { + s = M(0,1); + mul(t, Q, M(0,1)); + sub(t, M(0,0), t); + } + else { /* flag == 2 */ + s = M(0,1); + t = M(0,0); + } + + // normalize + + // inv(w, LeadCoeff(d)); + // mul(d, d, w); + // mul(s, s, w); + // mul(t, t, w); +} + + +void MinPolyInternal(GF2X& h, const GF2X& x, long m) +{ + if (m < NTL_GF2X_BERMASS_CROSSOVER) { + OldMinPolyInternal(h, x, m); + return; + } + + GF2X a, b; + _NTL_GF2XMatrix M; + + SetCoeff(b, 2*m); + CopyReverse(a, x, 2*m-1); + HalfGCD(M, b, a, m+1); + + h = M(1,1); +} + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GF2XFactoring.c b/thirdparty/linux/ntl/src/GF2XFactoring.c new file mode 100644 index 0000000000..610cac1d03 --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2XFactoring.c @@ -0,0 +1,964 @@ + +#include + +#include + +NTL_START_IMPL + + +long IterIrredTest(const GF2X& f) +{ + long df = deg(f); + + if (df <= 0) return 0; + if (df == 1) return 1; + + GF2XModulus F; + + build(F, f); + + GF2X h; + SetX(h); + SqrMod(h, h, F); + + long i, d, limit, limit_sqr; + GF2X g, X, t, prod; + + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = 2; + limit_sqr = limit*limit; + + set(prod); + + while (2*d <= df) { + add(t, g, X); + MulMod(prod, prod, t, F); + i++; + if (i == limit_sqr) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + + set(prod); + limit++; + limit_sqr = limit*limit; + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + SqrMod(g, g, F); + } + } + + if (i > 0) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + } + + return 1; +} + + +void SquareFreeDecomp(vec_pair_GF2X_long& u, const GF2X& ff) +{ + GF2X f = ff; + + if (IsZero(f)) LogicError("SquareFreeDecomp: bad args"); + + GF2X r, t, v, tmp1; + long m, j, finished, done; + + u.SetLength(0); + + if (deg(f) == 0) + return; + + m = 1; + finished = 0; + + do { + j = 1; + diff(tmp1, f); + GCD(r, f, tmp1); + div(t, f, r); + + if (deg(t) > 0) { + done = 0; + do { + GCD(v, r, t); + div(tmp1, t, v); + if (deg(tmp1) > 0) append(u, cons(tmp1, j*m)); + if (deg(v) > 0) { + div(r, r, v); + t = v; + j++; + } + else + done = 1; + } while (!done); + if (deg(r) == 0) finished = 1; + } + + if (!finished) { + /* r is a p-th power */ + long p, k, d; + p = 2; + d = deg(r)/p; + clear(f); + for (k = 0; k <= d; k++) + if (coeff(r, k*p) == 1) + SetCoeff(f, k); + + m = m*p; + } + } while (!finished); +} + + + + +static +void AddFactor(vec_pair_GF2X_long& factors, const GF2X& g, long d, long verbose) +{ + if (verbose) + cerr << "degree=" << d << ", number=" << deg(g)/d << "\n"; + append(factors, cons(g, d)); +} + +static +void ProcessTable(GF2X& f, vec_pair_GF2X_long& factors, + const GF2XModulus& F, long limit, const vec_GF2X& tbl, + long d, long verbose) + +{ + if (limit == 0) return; + + if (verbose) cerr << "+"; + + GF2X t1; + + if (limit == 1) { + GCD(t1, f, tbl[0]); + if (deg(t1) > 0) { + AddFactor(factors, t1, d, verbose); + div(f, f, t1); + } + + return; + } + + long i; + + t1 = tbl[0]; + for (i = 1; i < limit; i++) + MulMod(t1, t1, tbl[i], F); + + GCD(t1, f, t1); + + if (deg(t1) == 0) return; + + div(f, f, t1); + + GF2X t2; + + i = 0; + d = d - limit + 1; + + while (2*d <= deg(t1)) { + GCD(t2, tbl[i], t1); + if (deg(t2) > 0) { + AddFactor(factors, t2, d, verbose); + div(t1, t1, t2); + } + + i++; + d++; + } + + if (deg(t1) > 0) + AddFactor(factors, t1, deg(t1), verbose); +} + + +static +void TraceMap(GF2X& w, const GF2X& a, long d, const GF2XModulus& F) + +{ + GF2X y, z; + + y = a; + z = a; + + long i; + + for (i = 1; i < d; i++) { + SqrMod(z, z, F); + add(y, y, z); + } + + w = y; +} + + +const long GF2X_BlockingFactor = 40; + +void DDF(vec_pair_GF2X_long& factors, const GF2X& ff, long verbose) +{ + GF2X f = ff; + + if (IsZero(f)) LogicError("DDF: bad args"); + + factors.SetLength(0); + + if (deg(f) == 0) + return; + + if (deg(f) == 1) { + AddFactor(factors, f, 1, verbose); + return; + } + + + long GCDTableSize = GF2X_BlockingFactor; + + GF2XModulus F; + build(F, f); + + long i, d, limit, old_n; + GF2X g, X; + + + vec_GF2X tbl(INIT_SIZE, GCDTableSize); + + SetX(X); + + i = 0; + SqrMod(g, X, F); + d = 1; + limit = GCDTableSize; + + + while (2*d <= deg(f)) { + + old_n = deg(f); + add(tbl[i], g, X); + i++; + if (i == limit) { + ProcessTable(f, factors, F, i, tbl, d, verbose); + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + // we need to go further + + if (deg(f) < old_n) { + // f has changed + + build(F, f); + rem(g, g, F); + } + + SqrMod(g, g, F); + } + } + + ProcessTable(f, factors, F, i, tbl, d-1, verbose); + + if (!IsOne(f)) AddFactor(factors, f, deg(f), verbose); +} + + + + +static +void EDFSplit(GF2X& f1, GF2X& f2, const GF2X& f, long d) +{ + GF2X a, g; + GF2XModulus F; + + build(F, f); + long n = F.n; + + do { + random(a, n); + TraceMap(g, a, d, F); + } while (deg(g) <= 0); + + GCD(f1, f, g); + div(f2, f, f1); +} + +static +void RecEDF(vec_GF2X& factors, const GF2X& f, long d) +{ + if (deg(f) == d) { + append(factors, f); + return; + } + + GF2X f1, f2; + + EDFSplit(f1, f2, f, d); + RecEDF(factors, f1, d); + RecEDF(factors, f2, d); +} + + +void EDF(vec_GF2X& factors, const GF2X& ff, long d, long verbose) + +{ + GF2X f = ff; + + if (IsZero(f)) LogicError("EDF: bad args"); + + long n = deg(f); + long r = n/d; + + if (r == 0) { + factors.SetLength(0); + return; + } + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (d == 1) { + // factors are X and X+1 + + factors.SetLength(2); + SetX(factors[0]); + SetX(factors[1]); + SetCoeff(factors[1], 0); + return; + } + + + double t; + if (verbose) { + cerr << "computing EDF(" << d << "," << r << ")..."; + t = GetTime(); + } + + factors.SetLength(0); + + RecEDF(factors, f, d); + + if (verbose) cerr << (GetTime()-t) << "\n"; +} + + +void SFCanZass(vec_GF2X& factors, const GF2X& ff, long verbose) +{ + GF2X f = ff; + + if (IsZero(f)) LogicError("SFCanZass: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + factors.SetLength(0); + + double t; + + + vec_pair_GF2X_long u; + if (verbose) { cerr << "computing DDF..."; t = GetTime(); } + DDF(u, f, verbose); + if (verbose) { + t = GetTime()-t; + cerr << "DDF time: " << t << "\n"; + } + + vec_GF2X v; + + long i; + for (i = 0; i < u.length(); i++) { + const GF2X& g = u[i].a; + long d = u[i].b; + long r = deg(g)/d; + + if (r == 1) { + // g is already irreducible + + append(factors, g); + } + else { + // must perform EDF + + EDF(v, g, d, verbose); + append(factors, v); + } + } +} + +void CanZass(vec_pair_GF2X_long& factors, const GF2X& f, long verbose) +{ + if (IsZero(f)) + LogicError("CanZass: bad args"); + + double t; + vec_pair_GF2X_long sfd; + vec_GF2X x; + + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFCanZass(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + +void mul(GF2X& f, const vec_pair_GF2X_long& v) +{ + long i, j, n; + + n = 0; + for (i = 0; i < v.length(); i++) + n += v[i].b*deg(v[i].a); + + GF2X g; + + set(g); + for (i = 0; i < v.length(); i++) + for (j = 0; j < v[i].b; j++) { + mul(g, g, v[i].a); + } + + f = g; +} + + + +static +void ConvertBits(GF2X& x, _ntl_ulong b) +{ + clear(x); + long i; + + for (i = NTL_BITS_PER_LONG-1; i >= 0; i--) + if (b & (1UL << i)) + SetCoeff(x, i); + +} + +void BuildIrred(GF2X& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in BuildIrred"); + + if (n == 1) { + SetX(f); + return; + } + + GF2X g; + + _ntl_ulong i; + + i = 0; + do { + ConvertBits(g, 2*i+1); + SetCoeff(g, n); + i++; + } while (!IterIrredTest(g)); + + f = g; + +} + +void BuildRandomIrred(GF2X& f, const GF2X& g) +{ + GF2XModulus G; + GF2X h, ff; + + build(G, g); + do { + random(h, deg(g)); + IrredPolyMod(ff, h, G); + } while (deg(ff) < deg(g)); + + f = ff; +} + + + + +static int GF2X_irred_tab[][3] = + +{{0,0,0}, {0,0,0}, +{1,0,0}, {1,0,0}, {1,0,0}, {2,0,0}, {1,0,0}, {1,0,0}, +{4,3,1}, {1,0,0}, {3,0,0}, {2,0,0}, {3,0,0}, {4,3,1}, +{5,0,0}, {1,0,0}, {5,3,1}, {3,0,0}, {3,0,0}, {5,2,1}, +{3,0,0}, {2,0,0}, {1,0,0}, {5,0,0}, {4,3,1}, {3,0,0}, +{4,3,1}, {5,2,1}, {1,0,0}, {2,0,0}, {1,0,0}, {3,0,0}, +{7,3,2}, {10,0,0}, {7,0,0}, {2,0,0}, {9,0,0}, {6,4,1}, +{6,5,1}, {4,0,0}, {5,4,3}, {3,0,0}, {7,0,0}, {6,4,3}, +{5,0,0}, {4,3,1}, {1,0,0}, {5,0,0}, {5,3,2}, {9,0,0}, +{4,3,2}, {6,3,1}, {3,0,0}, {6,2,1}, {9,0,0}, {7,0,0}, +{7,4,2}, {4,0,0}, {19,0,0}, {7,4,2}, {1,0,0}, {5,2,1}, +{29,0,0}, {1,0,0}, {4,3,1}, {18,0,0}, {3,0,0}, {5,2,1}, +{9,0,0}, {6,5,2}, {5,3,1}, {6,0,0}, {10,9,3}, {25,0,0}, +{35,0,0}, {6,3,1}, {21,0,0}, {6,5,2}, {6,5,3}, {9,0,0}, +{9,4,2}, {4,0,0}, {8,3,1}, {7,4,2}, {5,0,0}, {8,2,1}, +{21,0,0}, {13,0,0}, {7,6,2}, {38,0,0}, {27,0,0}, {8,5,1}, +{21,0,0}, {2,0,0}, {21,0,0}, {11,0,0}, {10,9,6}, {6,0,0}, +{11,0,0}, {6,3,1}, {15,0,0}, {7,6,1}, {29,0,0}, {9,0,0}, +{4,3,1}, {4,0,0}, {15,0,0}, {9,7,4}, {17,0,0}, {5,4,2}, +{33,0,0}, {10,0,0}, {5,4,3}, {9,0,0}, {5,3,2}, {8,7,5}, +{4,2,1}, {5,2,1}, {33,0,0}, {8,0,0}, {4,3,1}, {18,0,0}, +{6,2,1}, {2,0,0}, {19,0,0}, {7,6,5}, {21,0,0}, {1,0,0}, +{7,2,1}, {5,0,0}, {3,0,0}, {8,3,2}, {17,0,0}, {9,8,2}, +{57,0,0}, {11,0,0}, {5,3,2}, {21,0,0}, {8,7,1}, {8,5,3}, +{15,0,0}, {10,4,1}, {21,0,0}, {5,3,2}, {7,4,2}, {52,0,0}, +{71,0,0}, {14,0,0}, {27,0,0}, {10,9,7}, {53,0,0}, {3,0,0}, +{6,3,2}, {1,0,0}, {15,0,0}, {62,0,0}, {9,0,0}, {6,5,2}, +{8,6,5}, {31,0,0}, {5,3,2}, {18,0,0}, {27,0,0}, {7,6,3}, +{10,8,7}, {9,8,3}, {37,0,0}, {6,0,0}, {15,3,2}, {34,0,0}, +{11,0,0}, {6,5,2}, {1,0,0}, {8,5,2}, {13,0,0}, {6,0,0}, +{11,3,2}, {8,0,0}, {31,0,0}, {4,2,1}, {3,0,0}, {7,6,1}, +{81,0,0}, {56,0,0}, {9,8,7}, {24,0,0}, {11,0,0}, {7,6,5}, +{6,5,2}, {6,5,2}, {8,7,6}, {9,0,0}, {7,2,1}, {15,0,0}, +{87,0,0}, {8,3,2}, {3,0,0}, {9,4,2}, {9,0,0}, {34,0,0}, +{5,3,2}, {14,0,0}, {55,0,0}, {8,7,1}, {27,0,0}, {9,5,2}, +{10,9,5}, {43,0,0}, {9,3,1}, {6,0,0}, {7,0,0}, {11,10,8}, +{105,0,0}, {6,5,2}, {73,0,0}, {23,0,0}, {7,3,1}, {45,0,0}, +{11,0,0}, {8,4,1}, {7,0,0}, {8,6,2}, {5,4,2}, {33,0,0}, +{9,8,3}, {32,0,0}, {10,7,3}, {10,9,4}, {113,0,0}, {10,4,1}, +{8,7,6}, {26,0,0}, {9,4,2}, {74,0,0}, {31,0,0}, {9,6,1}, +{5,0,0}, {7,4,1}, {73,0,0}, {36,0,0}, {8,5,3}, {70,0,0}, +{95,0,0}, {8,5,1}, {111,0,0}, {6,4,1}, {11,2,1}, {82,0,0}, +{15,14,10}, {35,0,0}, {103,0,0}, {7,4,2}, {15,0,0}, {46,0,0}, +{7,2,1}, {52,0,0}, {10,5,2}, {12,0,0}, {71,0,0}, {10,6,2}, +{15,0,0}, {7,6,4}, {9,8,4}, {93,0,0}, {9,6,2}, {42,0,0}, +{47,0,0}, {8,6,3}, {25,0,0}, {7,6,1}, {53,0,0}, {58,0,0}, +{9,3,2}, {23,0,0}, {67,0,0}, {11,10,9}, {63,0,0}, {12,6,3}, +{5,0,0}, {5,0,0}, {9,5,2}, {93,0,0}, {35,0,0}, {12,7,5}, +{53,0,0}, {10,7,5}, {69,0,0}, {71,0,0}, {11,10,1}, {21,0,0}, +{5,3,2}, {12,11,5}, {37,0,0}, {11,6,1}, {33,0,0}, {48,0,0}, +{7,3,2}, {5,0,0}, {11,8,4}, {11,6,4}, {5,0,0}, {9,5,2}, +{41,0,0}, {1,0,0}, {11,2,1}, {102,0,0}, {7,3,1}, {8,4,2}, +{15,0,0}, {10,6,4}, {93,0,0}, {7,5,3}, {9,7,4}, {79,0,0}, +{15,0,0}, {10,9,1}, {63,0,0}, {7,4,2}, {45,0,0}, {36,0,0}, +{4,3,1}, {31,0,0}, {67,0,0}, {10,3,1}, {51,0,0}, {10,5,2}, +{10,3,1}, {34,0,0}, {8,3,1}, {50,0,0}, {99,0,0}, {10,6,2}, +{89,0,0}, {2,0,0}, {5,2,1}, {10,7,2}, {7,4,1}, {55,0,0}, +{4,3,1}, {16,10,7}, {45,0,0}, {10,8,6}, {125,0,0}, {75,0,0}, +{7,2,1}, {22,0,0}, {63,0,0}, {11,10,3}, {103,0,0}, {6,5,2}, +{53,0,0}, {34,0,0}, {13,11,6}, {69,0,0}, {99,0,0}, {6,5,1}, +{10,9,7}, {11,10,2}, {57,0,0}, {68,0,0}, {5,3,2}, {7,4,1}, +{63,0,0}, {8,5,3}, {9,0,0}, {9,6,5}, {29,0,0}, {21,0,0}, +{7,3,2}, {91,0,0}, {139,0,0}, {8,3,2}, {111,0,0}, {8,7,2}, +{8,6,5}, {16,0,0}, {8,7,5}, {41,0,0}, {43,0,0}, {10,8,5}, +{47,0,0}, {5,2,1}, {81,0,0}, {90,0,0}, {12,3,2}, {6,0,0}, +{83,0,0}, {8,7,1}, {159,0,0}, {10,9,5}, {9,0,0}, {28,0,0}, +{13,10,6}, {7,0,0}, {135,0,0}, {11,6,5}, {25,0,0}, {12,7,6}, +{7,6,2}, {26,0,0}, {5,3,2}, {152,0,0}, {171,0,0}, {9,8,5}, +{65,0,0}, {13,8,2}, {141,0,0}, {71,0,0}, {5,3,2}, {87,0,0}, +{10,4,3}, {12,10,3}, {147,0,0}, {10,7,6}, {13,0,0}, {102,0,0}, +{9,5,2}, {107,0,0}, {199,0,0}, {15,5,4}, {7,0,0}, {5,4,2}, +{149,0,0}, {25,0,0}, {9,7,2}, {12,0,0}, {63,0,0}, {11,6,5}, +{105,0,0}, {10,8,7}, {14,6,1}, {120,0,0}, {13,4,3}, {33,0,0}, +{12,11,5}, {12,9,5}, {165,0,0}, {6,2,1}, {65,0,0}, {49,0,0}, +{4,3,1}, {7,0,0}, {7,5,2}, {10,6,1}, {81,0,0}, {7,6,4}, +{105,0,0}, {73,0,0}, {11,6,4}, {134,0,0}, {47,0,0}, {16,10,1}, +{6,5,4}, {15,6,4}, {8,6,1}, {38,0,0}, {18,9,6}, {16,0,0}, +{203,0,0}, {12,5,2}, {19,0,0}, {7,6,1}, {73,0,0}, {93,0,0}, +{19,18,13}, {31,0,0}, {14,11,6}, {11,6,1}, {27,0,0}, {9,5,2}, +{9,0,0}, {1,0,0}, {11,3,2}, {200,0,0}, {191,0,0}, {9,8,4}, +{9,0,0}, {16,15,7}, {121,0,0}, {104,0,0}, {15,9,6}, {138,0,0}, +{9,6,5}, {9,6,4}, {105,0,0}, {17,16,6}, {81,0,0}, {94,0,0}, +{4,3,1}, {83,0,0}, {219,0,0}, {11,6,3}, {7,0,0}, {10,5,3}, +{17,0,0}, {76,0,0}, {16,5,2}, {78,0,0}, {155,0,0}, {11,6,5}, +{27,0,0}, {5,4,2}, {8,5,4}, {3,0,0}, {15,14,6}, {156,0,0}, +{23,0,0}, {13,6,3}, {9,0,0}, {8,7,3}, {69,0,0}, {10,0,0}, +{8,5,2}, {26,0,0}, {67,0,0}, {14,7,4}, {21,0,0}, {12,10,2}, +{33,0,0}, {79,0,0}, {15,11,2}, {32,0,0}, {39,0,0}, {13,6,2}, +{167,0,0}, {6,4,1}, {97,0,0}, {47,0,0}, {11,6,2}, {42,0,0}, +{10,7,3}, {10,5,4}, {1,0,0}, {4,3,2}, {161,0,0}, {8,6,2}, +{7,5,3}, {94,0,0}, {195,0,0}, {10,5,4}, {9,0,0}, {13,10,4}, +{8,6,1}, {16,0,0}, {8,3,1}, {122,0,0}, {8,2,1}, {13,7,4}, +{10,5,3}, {16,4,3}, {193,0,0}, {135,0,0}, {19,16,9}, {39,0,0}, +{10,8,7}, {10,9,4}, {153,0,0}, {7,6,5}, {73,0,0}, {34,0,0}, +{11,9,6}, {71,0,0}, {11,4,2}, {14,7,3}, {163,0,0}, {11,6,1}, +{153,0,0}, {28,0,0}, {15,7,6}, {77,0,0}, {67,0,0}, {10,5,2}, +{12,8,1}, {10,6,4}, {13,0,0}, {146,0,0}, {13,4,3}, {25,0,0}, +{23,22,16}, {12,9,7}, {237,0,0}, {13,7,6}, {85,0,0}, {130,0,0}, +{14,13,3}, {88,0,0}, {7,5,2}, {11,6,1}, {35,0,0}, {10,4,3}, +{93,0,0}, {9,6,4}, {13,6,3}, {86,0,0}, {19,0,0}, {9,2,1}, +{273,0,0}, {14,12,9}, {7,6,1}, {30,0,0}, {9,5,2}, {201,0,0}, +{215,0,0}, {6,4,3}, {105,0,0}, {10,7,5}, {165,0,0}, {105,0,0}, +{19,13,6}, {31,0,0}, {127,0,0}, {10,4,2}, {81,0,0}, {19,10,4}, +{45,0,0}, {211,0,0}, {19,10,3}, {200,0,0}, {295,0,0}, {9,8,5}, +{9,0,0}, {12,6,5}, {297,0,0}, {68,0,0}, {11,6,5}, {133,0,0}, +{251,0,0}, {13,8,4}, {223,0,0}, {6,5,2}, {7,4,2}, {307,0,0}, +{9,2,1}, {101,0,0}, {39,0,0}, {14,10,4}, {217,0,0}, {14,9,1}, +{6,5,1}, {16,0,0}, {14,3,2}, {11,0,0}, {119,0,0}, {11,3,2}, +{11,6,5}, {11,8,4}, {249,0,0}, {5,0,0}, {13,3,1}, {37,0,0}, +{3,0,0}, {14,0,0}, {93,0,0}, {10,8,7}, {33,0,0}, {88,0,0}, +{7,5,4}, {38,0,0}, {55,0,0}, {15,4,2}, {11,0,0}, {12,11,4}, +{21,0,0}, {107,0,0}, {11,9,8}, {33,0,0}, {10,7,2}, {18,7,3}, +{147,0,0}, {5,4,2}, {153,0,0}, {15,0,0}, {11,6,5}, {28,0,0}, +{11,7,4}, {6,3,1}, {31,0,0}, {8,4,3}, {15,5,3}, {66,0,0}, +{23,16,9}, {11,9,3}, {171,0,0}, {11,6,1}, {209,0,0}, {4,3,1}, +{197,0,0}, {13,0,0}, {19,14,6}, {14,0,0}, {79,0,0}, {13,6,2}, +{299,0,0}, {15,8,2}, {169,0,0}, {177,0,0}, {23,10,2}, {267,0,0}, +{215,0,0}, {15,10,1}, {75,0,0}, {16,4,2}, {37,0,0}, {12,7,1}, +{8,3,2}, {17,0,0}, {12,11,8}, {15,8,5}, {15,0,0}, {4,3,1}, +{13,12,4}, {92,0,0}, {5,4,3}, {41,0,0}, {23,0,0}, {7,4,1}, +{183,0,0}, {16,7,1}, {165,0,0}, {150,0,0}, {9,6,4}, {9,0,0}, +{231,0,0}, {16,10,4}, {207,0,0}, {9,6,5}, {5,0,0}, {180,0,0}, +{4,3,2}, {58,0,0}, {147,0,0}, {8,6,2}, {343,0,0}, {8,7,2}, +{11,6,1}, {44,0,0}, {13,8,6}, {5,0,0}, {347,0,0}, {18,16,8}, +{135,0,0}, {9,8,3}, {85,0,0}, {90,0,0}, {13,11,1}, {258,0,0}, +{351,0,0}, {10,6,4}, {19,0,0}, {7,6,1}, {309,0,0}, {18,0,0}, +{13,10,3}, {158,0,0}, {19,0,0}, {12,10,1}, {45,0,0}, {7,6,1}, +{233,0,0}, {98,0,0}, {11,6,5}, {3,0,0}, {83,0,0}, {16,14,9}, +{6,5,3}, {9,7,4}, {22,19,9}, {168,0,0}, {19,17,4}, {120,0,0}, +{14,5,2}, {17,15,6}, {7,0,0}, {10,8,6}, {185,0,0}, {93,0,0}, +{15,14,7}, {29,0,0}, {375,0,0}, {10,8,3}, {13,0,0}, {17,16,2}, +{329,0,0}, {68,0,0}, {13,9,6}, {92,0,0}, {12,10,3}, {7,6,3}, +{17,10,3}, {5,2,1}, {9,6,1}, {30,0,0}, {9,7,3}, {253,0,0}, +{143,0,0}, {7,4,1}, {9,4,1}, {12,10,4}, {53,0,0}, {25,0,0}, +{9,7,1}, {217,0,0}, {15,13,9}, {14,9,2}, {75,0,0}, {8,7,2}, +{21,0,0}, {7,0,0}, {14,3,2}, {15,0,0}, {159,0,0}, {12,10,8}, +{29,0,0}, {10,3,1}, {21,0,0}, {333,0,0}, {11,8,2}, {52,0,0}, +{119,0,0}, {16,9,7}, {123,0,0}, {15,11,2}, {17,0,0}, {9,0,0}, +{11,6,4}, {38,0,0}, {255,0,0}, {12,10,7}, {189,0,0}, {4,3,1}, +{17,10,7}, {49,0,0}, {13,5,2}, {149,0,0}, {15,0,0}, {14,7,5}, +{10,9,2}, {8,6,5}, {61,0,0}, {54,0,0}, {11,5,1}, {144,0,0}, +{47,0,0}, {11,10,7}, {105,0,0}, {2,0,0}, {105,0,0}, {136,0,0}, +{11,4,1}, {253,0,0}, {111,0,0}, {13,10,5}, {159,0,0}, {10,7,1}, +{7,5,3}, {29,0,0}, {19,10,3}, {119,0,0}, {207,0,0}, {17,15,4}, +{35,0,0}, {14,0,0}, {349,0,0}, {6,3,2}, {21,10,6}, {1,0,0}, +{75,0,0}, {9,5,2}, {145,0,0}, {11,7,6}, {301,0,0}, {378,0,0}, +{13,3,1}, {352,0,0}, {12,7,4}, {12,8,1}, {149,0,0}, {6,5,4}, +{12,9,8}, {11,0,0}, {15,7,5}, {78,0,0}, {99,0,0}, {17,16,12}, +{173,0,0}, {8,7,1}, {13,9,8}, {147,0,0}, {19,18,10}, {127,0,0}, +{183,0,0}, {12,4,1}, {31,0,0}, {11,8,6}, {173,0,0}, {12,0,0}, +{7,5,3}, {113,0,0}, {207,0,0}, {18,15,5}, {1,0,0}, {13,7,6}, +{21,0,0}, {35,0,0}, {12,7,2}, {117,0,0}, {123,0,0}, {12,10,2}, +{143,0,0}, {14,4,1}, {15,9,7}, {204,0,0}, {7,5,1}, {91,0,0}, +{4,2,1}, {8,6,3}, {183,0,0}, {12,10,7}, {77,0,0}, {36,0,0}, +{14,9,6}, {221,0,0}, {7,6,5}, {16,14,13}, {31,0,0}, {16,15,7}, +{365,0,0}, {403,0,0}, {10,3,2}, {11,4,3}, {31,0,0}, {10,9,4}, +{177,0,0}, {16,6,1}, {22,6,5}, {417,0,0}, {15,13,12}, {217,0,0}, +{207,0,0}, {7,5,4}, {10,7,1}, {11,6,1}, {45,0,0}, {24,0,0}, +{12,11,9}, {77,0,0}, {21,20,13}, {9,6,5}, {189,0,0}, {8,3,2}, +{13,12,10}, {260,0,0}, {16,9,7}, {168,0,0}, {131,0,0}, {7,6,3}, +{305,0,0}, {10,9,6}, {13,9,4}, {143,0,0}, {12,9,3}, {18,0,0}, +{15,8,5}, {20,9,6}, {103,0,0}, {15,4,2}, {201,0,0}, {36,0,0}, +{9,5,2}, {31,0,0}, {11,7,2}, {6,2,1}, {7,0,0}, {13,6,4}, +{9,8,7}, {19,0,0}, {17,10,6}, {15,0,0}, {9,3,1}, {178,0,0}, +{8,7,6}, {12,6,5}, {177,0,0}, {230,0,0}, {24,9,3}, {222,0,0}, +{3,0,0}, {16,13,12}, {121,0,0}, {10,4,2}, {161,0,0}, {39,0,0}, +{17,15,13}, {62,0,0}, {223,0,0}, {15,12,2}, {65,0,0}, {12,6,3}, +{101,0,0}, {59,0,0}, {5,4,3}, {17,0,0}, {5,3,2}, {13,8,3}, +{10,9,7}, {12,8,2}, {5,4,3}, {75,0,0}, {19,17,8}, {55,0,0}, +{99,0,0}, {10,7,4}, {115,0,0}, {9,8,6}, {385,0,0}, {186,0,0}, +{15,6,3}, {9,4,1}, {12,10,5}, {10,8,1}, {135,0,0}, {5,2,1}, +{317,0,0}, {7,0,0}, {19,6,1}, {294,0,0}, {35,0,0}, {13,12,6}, +{119,0,0}, {98,0,0}, {93,0,0}, {68,0,0}, {21,15,3}, {108,0,0}, +{75,0,0}, {12,6,5}, {411,0,0}, {12,7,2}, {13,7,2}, {21,0,0}, +{15,10,8}, {412,0,0}, {439,0,0}, {10,7,6}, {41,0,0}, {13,9,6}, +{8,5,2}, {10,0,0}, {15,7,2}, {141,0,0}, {159,0,0}, {13,12,10}, +{291,0,0}, {10,9,1}, {105,0,0}, {24,0,0}, {11,2,1}, {198,0,0}, +{27,0,0}, {6,3,1}, {439,0,0}, {10,3,1}, {49,0,0}, {168,0,0}, +{13,11,9}, {463,0,0}, {10,9,3}, {13,9,8}, {15,8,3}, {18,16,8}, +{15,14,11}, {7,0,0}, {19,9,8}, {12,6,3}, {7,4,3}, {15,14,5}, +{8,6,3}, {10,9,7}, {361,0,0}, {230,0,0}, {15,9,6}, {24,0,0}, +{407,0,0}, {16,7,2}, {189,0,0}, {62,0,0}, {189,0,0}, {112,0,0}, +{22,21,10}, {91,0,0}, {79,0,0}, {12,10,5}, {23,0,0}, {7,6,1}, +{57,0,0}, {139,0,0}, {24,15,6}, {14,0,0}, {83,0,0}, {16,9,1}, +{35,0,0}, {9,7,4}, {117,0,0}, {65,0,0}, {21,9,6}, {21,0,0}, +{195,0,0}, {23,11,10}, {327,0,0}, {17,14,3}, {417,0,0}, {13,0,0}, +{15,8,6}, {107,0,0}, {19,10,6}, {18,15,3}, {59,0,0}, {12,10,4}, +{9,7,5}, {283,0,0}, {13,9,6}, {62,0,0}, {427,0,0}, {14,7,3}, +{8,7,4}, {15,8,3}, {105,0,0}, {27,0,0}, {7,3,1}, {103,0,0}, +{551,0,0}, {10,6,1}, {6,4,1}, {11,6,4}, {129,0,0}, {9,0,0}, +{9,4,2}, {277,0,0}, {31,0,0}, {13,12,5}, {141,0,0}, {12,7,3}, +{357,0,0}, {7,2,1}, {11,9,7}, {227,0,0}, {131,0,0}, {7,6,3}, +{23,0,0}, {20,17,3}, {13,4,1}, {90,0,0}, {15,3,2}, {241,0,0}, +{75,0,0}, {13,6,1}, {307,0,0}, {8,7,3}, {245,0,0}, {66,0,0}, +{15,11,2}, {365,0,0}, {18,16,11}, {11,10,1}, {19,0,0}, {8,6,1}, +{189,0,0}, {133,0,0}, {12,7,2}, {114,0,0}, {27,0,0}, {6,5,1}, +{15,5,2}, {17,14,5}, {133,0,0}, {476,0,0}, {11,9,3}, {16,0,0}, +{375,0,0}, {15,8,6}, {25,0,0}, {17,11,6}, {77,0,0}, {87,0,0}, +{5,3,2}, {134,0,0}, {171,0,0}, {13,8,4}, {75,0,0}, {8,3,1}, +{233,0,0}, {196,0,0}, {9,8,7}, {173,0,0}, {15,14,12}, {13,6,5}, +{281,0,0}, {9,8,2}, {405,0,0}, {114,0,0}, {15,9,6}, {171,0,0}, +{287,0,0}, {8,4,2}, {43,0,0}, {4,2,1}, {513,0,0}, {273,0,0}, +{11,10,6}, {118,0,0}, {243,0,0}, {14,7,1}, {203,0,0}, {9,5,2}, +{257,0,0}, {302,0,0}, {27,25,9}, {393,0,0}, {91,0,0}, {12,10,6}, +{413,0,0}, {15,14,9}, {18,16,1}, {255,0,0}, {12,9,7}, {234,0,0}, +{167,0,0}, {16,13,10}, {27,0,0}, {15,6,2}, {433,0,0}, {105,0,0}, +{25,10,2}, {151,0,0}, {427,0,0}, {13,9,8}, {49,0,0}, {10,6,4}, +{153,0,0}, {4,0,0}, {17,7,5}, {54,0,0}, {203,0,0}, {16,15,1}, +{16,14,7}, {13,6,1}, {25,0,0}, {14,0,0}, {15,5,3}, {187,0,0}, +{15,13,10}, {13,10,5}, {97,0,0}, {11,10,9}, {19,10,4}, {589,0,0}, +{31,30,2}, {289,0,0}, {9,6,4}, {11,8,6}, {21,0,0}, {7,4,1}, +{7,4,2}, {77,0,0}, {5,3,2}, {119,0,0}, {7,0,0}, {9,5,2}, +{345,0,0}, {17,10,8}, {333,0,0}, {17,0,0}, {16,9,7}, {168,0,0}, +{15,13,4}, {11,10,1}, {217,0,0}, {18,11,10}, {189,0,0}, {216,0,0}, +{12,7,5}, {229,0,0}, {231,0,0}, {12,9,3}, {223,0,0}, {10,9,1}, +{153,0,0}, {470,0,0}, {23,16,6}, {99,0,0}, {10,4,3}, {9,8,4}, +{12,10,1}, {14,9,6}, {201,0,0}, {38,0,0}, {15,14,2}, {198,0,0}, +{399,0,0}, {14,11,5}, {75,0,0}, {11,10,1}, {77,0,0}, {16,12,8}, +{20,17,15}, {326,0,0}, {39,0,0}, {14,12,9}, {495,0,0}, {8,3,2}, +{333,0,0}, {476,0,0}, {15,14,2}, {164,0,0}, {19,0,0}, {12,4,2}, +{8,6,3}, {13,12,3}, {12,11,5}, {129,0,0}, {12,9,3}, {52,0,0}, +{10,8,3}, {17,16,2}, {337,0,0}, {12,9,3}, {397,0,0}, {277,0,0}, +{21,11,3}, {73,0,0}, {11,6,1}, {7,5,4}, {95,0,0}, {11,3,2}, +{617,0,0}, {392,0,0}, {8,3,2}, {75,0,0}, {315,0,0}, {15,6,4}, +{125,0,0}, {6,5,2}, {15,9,7}, {348,0,0}, {15,6,1}, {553,0,0}, +{6,3,2}, {10,9,7}, {553,0,0}, {14,10,4}, {237,0,0}, {39,0,0}, +{17,14,6}, {371,0,0}, {255,0,0}, {8,4,1}, {131,0,0}, {14,6,1}, +{117,0,0}, {98,0,0}, {5,3,2}, {56,0,0}, {655,0,0}, {9,5,2}, +{239,0,0}, {11,8,4}, {1,0,0}, {134,0,0}, {15,9,5}, {88,0,0}, +{10,5,3}, {10,9,4}, {181,0,0}, {15,11,2}, {609,0,0}, {52,0,0}, +{19,18,10}, {100,0,0}, {7,6,3}, {15,8,2}, {183,0,0}, {18,7,6}, +{10,9,2}, {130,0,0}, {11,5,1}, {12,0,0}, {219,0,0}, {13,10,7}, +{11,0,0}, {19,9,4}, {129,0,0}, {3,0,0}, {17,15,5}, {300,0,0}, +{17,13,9}, {14,6,5}, {97,0,0}, {13,8,3}, {601,0,0}, {55,0,0}, +{8,3,1}, {92,0,0}, {127,0,0}, {12,11,2}, {81,0,0}, {15,10,8}, +{13,2,1}, {47,0,0}, {14,13,6}, {194,0,0}, {383,0,0}, {25,14,11}, +{125,0,0}, {20,19,16}, {429,0,0}, {282,0,0}, {10,9,6}, {342,0,0}, +{5,3,2}, {15,9,4}, {33,0,0}, {9,4,2}, {49,0,0}, {15,0,0}, +{11,6,2}, {28,0,0}, {103,0,0}, {18,17,8}, {27,0,0}, {11,6,5}, +{33,0,0}, {17,0,0}, {11,10,6}, {387,0,0}, {363,0,0}, {15,10,9}, +{83,0,0}, {7,6,4}, {357,0,0}, {13,12,4}, {14,13,7}, {322,0,0}, +{395,0,0}, {16,5,1}, {595,0,0}, {13,10,3}, {421,0,0}, {195,0,0}, +{11,3,2}, {13,0,0}, {16,12,3}, {14,3,1}, {315,0,0}, {26,10,5}, +{297,0,0}, {52,0,0}, {9,4,2}, {314,0,0}, {243,0,0}, {16,14,9}, +{185,0,0}, {12,5,3}, {13,5,2}, {575,0,0}, {12,9,3}, {39,0,0}, +{311,0,0}, {13,5,2}, {181,0,0}, {20,18,14}, {49,0,0}, {25,0,0}, +{11,4,1}, {77,0,0}, {17,11,10}, {15,14,8}, {21,0,0}, {17,10,5}, +{69,0,0}, {49,0,0}, {11,10,2}, {32,0,0}, {411,0,0}, {21,16,3}, +{11,7,4}, {22,10,3}, {85,0,0}, {140,0,0}, {9,8,6}, {252,0,0}, +{279,0,0}, {9,5,2}, {307,0,0}, {17,10,4}, {13,12,9}, {94,0,0}, +{13,11,4}, {49,0,0}, {17,11,10}, {16,12,5}, {25,0,0}, {6,5,2}, +{12,5,1}, {80,0,0}, {8,3,2}, {246,0,0}, {11,5,2}, {11,10,2}, +{599,0,0}, {18,12,10}, {189,0,0}, {278,0,0}, {10,9,3}, {399,0,0}, +{299,0,0}, {13,10,6}, {277,0,0}, {13,10,6}, {69,0,0}, {220,0,0}, +{13,10,3}, {229,0,0}, {18,11,10}, {16,15,1}, {27,0,0}, {18,9,3}, +{473,0,0}, {373,0,0}, {18,17,7}, {60,0,0}, {207,0,0}, {13,9,8}, +{22,20,13}, {25,18,7}, {225,0,0}, {404,0,0}, {21,6,2}, {46,0,0}, +{6,2,1}, {17,12,6}, {75,0,0}, {4,2,1}, {365,0,0}, {445,0,0}, +{11,7,1}, {44,0,0}, {10,8,5}, {12,5,2}, {63,0,0}, {17,4,2}, +{189,0,0}, {557,0,0}, {19,12,2}, {252,0,0}, {99,0,0}, {10,8,5}, +{65,0,0}, {14,9,3}, {9,0,0}, {119,0,0}, {8,5,2}, {339,0,0}, +{95,0,0}, {12,9,7}, {7,0,0}, {13,10,2}, {77,0,0}, {127,0,0}, +{21,10,7}, {319,0,0}, {667,0,0}, {17,10,3}, {501,0,0}, {18,12,9}, +{9,8,5}, {17,0,0}, {20,9,2}, {341,0,0}, {731,0,0}, {7,6,5}, +{647,0,0}, {10,4,2}, {121,0,0}, {20,0,0}, {21,19,13}, {574,0,0}, +{399,0,0}, {15,10,7}, {85,0,0}, {16,8,3}, {169,0,0}, {15,0,0}, +{12,7,5}, {568,0,0}, {10,7,1}, {18,2,1}, {3,0,0}, {14,3,2}, +{13,7,3}, {643,0,0}, {14,11,1}, {548,0,0}, {783,0,0}, {14,11,1}, +{317,0,0}, {7,6,4}, {153,0,0}, {87,0,0}, {15,13,1}, {231,0,0}, +{11,5,3}, {18,13,7}, {771,0,0}, {30,20,11}, {15,6,3}, {103,0,0}, +{13,4,3}, {182,0,0}, {211,0,0}, {17,6,1}, {27,0,0}, {13,12,10}, +{15,14,10}, {17,0,0}, {13,11,5}, {69,0,0}, {11,5,1}, {18,6,1}, +{603,0,0}, {10,4,2}, {741,0,0}, {668,0,0}, {17,15,3}, {147,0,0}, +{227,0,0}, {15,10,9}, {37,0,0}, {16,6,1}, {173,0,0}, {427,0,0}, +{7,5,1}, {287,0,0}, {231,0,0}, {20,15,10}, {18,9,1}, {14,12,5}, +{16,5,1}, {310,0,0}, {18,13,1}, {434,0,0}, {579,0,0}, {18,13,8}, +{45,0,0}, {12,8,3}, {16,9,5}, {53,0,0}, {19,15,10}, {16,0,0}, +{17,6,5}, {17,10,1}, {37,0,0}, {17,10,9}, {21,13,7}, {99,0,0}, +{17,9,6}, {176,0,0}, {271,0,0}, {18,17,13}, {459,0,0}, {21,17,10}, +{6,5,2}, {202,0,0}, {5,4,3}, {90,0,0}, {755,0,0}, {15,7,2}, +{363,0,0}, {8,4,2}, {129,0,0}, {20,0,0}, {11,6,2}, {135,0,0}, +{15,8,7}, {14,13,2}, {10,4,3}, {24,13,10}, {19,14,11}, {31,0,0}, +{15,8,6}, {758,0,0}, {16,11,5}, {16,5,1}, {359,0,0}, {23,18,17}, +{501,0,0}, {29,0,0}, {15,6,3}, {201,0,0}, {459,0,0}, {12,10,7}, +{225,0,0}, {22,17,13}, {24,22,5}, {161,0,0}, {14,11,3}, {52,0,0}, +{19,17,6}, {21,14,12}, {93,0,0}, {13,10,3}, {201,0,0}, {178,0,0}, +{15,12,5}, {250,0,0}, {7,6,4}, {17,13,6}, {221,0,0}, {13,11,8}, +{17,14,9}, {113,0,0}, {17,14,10}, {300,0,0}, {39,0,0}, {18,13,3}, +{261,0,0}, {15,14,8}, {753,0,0}, {8,4,3}, {11,10,5}, {94,0,0}, +{15,13,1}, {10,4,2}, {14,11,10}, {8,6,2}, {461,0,0}, {418,0,0}, +{19,14,6}, {403,0,0}, {267,0,0}, {10,9,2}, {259,0,0}, {20,4,3}, +{869,0,0}, {173,0,0}, {19,18,2}, {369,0,0}, {255,0,0}, {22,12,9}, +{567,0,0}, {20,11,7}, {457,0,0}, {482,0,0}, {6,3,2}, {775,0,0}, +{19,17,6}, {6,4,3}, {99,0,0}, {15,14,8}, {6,5,2}, {165,0,0}, +{8,3,2}, {13,12,10}, {25,21,17}, {17,14,9}, {105,0,0}, {17,15,14}, +{10,3,2}, {250,0,0}, {25,6,5}, {327,0,0}, {279,0,0}, {13,6,5}, +{371,0,0}, {15,9,4}, {117,0,0}, {486,0,0}, {10,9,3}, {217,0,0}, +{635,0,0}, {30,27,17}, {457,0,0}, {16,6,2}, {57,0,0}, {439,0,0}, +{23,21,6}, {214,0,0}, {20,13,6}, {20,16,1}, {819,0,0}, {15,11,8}, +{593,0,0}, {190,0,0}, {17,14,3}, {114,0,0}, {21,18,3}, {10,5,2}, +{12,9,5}, {8,6,3}, {69,0,0}, {312,0,0}, {22,5,2}, {502,0,0}, +{843,0,0}, {15,10,3}, {747,0,0}, {6,5,2}, {101,0,0}, {123,0,0}, +{19,16,9}, {521,0,0}, {171,0,0}, {16,7,2}, {12,6,5}, {22,21,20}, +{545,0,0}, {163,0,0}, {23,18,1}, {479,0,0}, {495,0,0}, {13,6,5}, +{11,0,0}, {17,5,2}, {18,8,1}, {684,0,0}, {7,5,1}, {9,0,0}, +{18,11,3}, {22,20,13}, {273,0,0}, {4,3,2}, {381,0,0}, {51,0,0}, +{18,13,7}, {518,0,0}, {9,5,1}, {14,12,3}, {243,0,0}, {21,17,2}, +{53,0,0}, {836,0,0}, {21,10,2}, {66,0,0}, {12,10,7}, {13,9,8}, +{339,0,0}, {16,11,5}, {901,0,0}, {180,0,0}, {16,13,3}, {49,0,0}, +{6,3,2}, {15,4,1}, {16,13,6}, {18,15,12}, {885,0,0}, {39,0,0}, +{11,9,4}, {688,0,0}, {16,15,7}, {13,10,6}, {13,0,0}, {25,23,12}, +{149,0,0}, {260,0,0}, {11,9,1}, {53,0,0}, {11,0,0}, {12,4,2}, +{9,7,5}, {11,8,1}, {121,0,0}, {261,0,0}, {10,5,2}, {199,0,0}, +{20,4,3}, {17,9,2}, {13,9,4}, {12,8,7}, {253,0,0}, {174,0,0}, +{15,4,2}, {370,0,0}, {9,6,1}, {16,10,9}, {669,0,0}, {20,10,9}, +{833,0,0}, {353,0,0}, {17,13,2}, {29,0,0}, {371,0,0}, {9,8,5}, +{8,7,1}, {19,8,7}, {12,11,10}, {873,0,0}, {26,11,2}, {12,9,1}, +{10,7,2}, {13,6,1}, {235,0,0}, {26,24,19}, {733,0,0}, {778,0,0}, +{12,11,1}, {344,0,0}, {931,0,0}, {16,6,4}, {945,0,0}, {21,19,14}, +{18,13,11}, {67,0,0}, {20,15,10}, {462,0,0}, {14,5,1}, {10,9,6}, +{18,11,10}, {16,9,7}, {477,0,0}, {105,0,0}, {11,3,2}, {468,0,0}, +{23,16,15}, {16,15,6}, {327,0,0}, {23,10,4}, {357,0,0}, {25,0,0}, +{17,16,7}, {31,0,0}, {7,5,2}, {16,7,6}, {277,0,0}, {14,13,6}, +{413,0,0}, {103,0,0}, {15,10,1}, {231,0,0}, {747,0,0}, {5,2,1}, +{113,0,0}, {20,10,7}, {15,9,6}, {11,0,0}, {27,22,18}, {91,0,0}, +{51,0,0}, {18,13,12}, {603,0,0}, {10,7,3}, {9,0,0}, {121,0,0}, +{15,14,6}, {17,0,0}, {16,11,2}, {23,15,6}, {279,0,0}, {16,12,6}, +{89,0,0}, {371,0,0}, {17,15,2}, {771,0,0}, {99,0,0}, {7,6,3}, +{21,0,0}, {10,7,5}, {801,0,0}, {26,0,0}, {25,19,14}, {175,0,0}, +{10,7,2}, {20,5,4}, {12,11,1}, {22,5,1}, {165,0,0}, {841,0,0}, +{25,19,17}, {238,0,0}, {11,8,6}, {22,21,4}, {33,0,0}, {8,7,6}, +{14,9,2}, {113,0,0}, {13,11,5}, {311,0,0}, {891,0,0}, {20,16,14}, +{555,0,0}, {23,14,8}, {133,0,0}, {546,0,0}, {6,3,2}, {103,0,0}, +{15,0,0}, {10,7,3}, {307,0,0}, {14,10,1}, {15,12,2}, {367,0,0}, +{13,10,6}, {169,0,0}, {22,21,11}, {12,10,8}, {441,0,0}, {17,12,7}, +{917,0,0}, {205,0,0}, {26,23,13}, {54,0,0}, {459,0,0}, {17,15,4}, +{19,15,4}, {5,4,2}, {9,7,6}, {42,0,0}, {21,15,7}, {330,0,0}, +{20,7,3}, {20,7,2}, {81,0,0}, {19,14,1}, {349,0,0}, {165,0,0}, +{40,35,9}, {274,0,0}, {475,0,0}, {11,10,3}, {93,0,0}, {12,7,4}, +{13,12,2}, {386,0,0}, {7,6,2}, {881,0,0}, {143,0,0}, {9,8,4}, +{71,0,0}, {19,18,3}, {16,11,6}, {155,0,0}, {7,2,1}, {735,0,0}, +{16,8,7}, {9,7,4}, {45,0,0}, {7,6,4}, {12,11,3}, {3,0,0}, +{19,14,13} +}; + + + + +static +long FindTrinom(long n) +{ + if (n < 2) LogicError("tri--bad n"); + + long k; + + for (k = 1; k <= n/2; k++) + if (IterIrredTest(1 + GF2X(k,1) + GF2X(n,1))) + return k; + + return 0; +} + + +static +long FindPent(long n, long& kk2, long& kk1) +{ + if (n < 4) LogicError("pent--bad n"); + + long k1, k2, k3; + + for (k3 = 3; k3 < n; k3++) + for (k2 = 2; k2 < k3; k2++) + for (k1 = 1; k1 < k2; k1++) + if (IterIrredTest(1+GF2X(k1,1)+GF2X(k2,1)+GF2X(k3,1)+GF2X(n,1))) { + kk2 = k2; + kk1 = k1; + return k3; + } + + return 0; +} + +void BuildSparseIrred(GF2X& f, long n) +{ + if (n <= 0) LogicError("SparseIrred: n <= 0"); + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in BuildSparseIrred"); + + if (n == 1) { + SetX(f); + return; + } + + if (n <= 2048) { + if (GF2X_irred_tab[n][1] == 0) { + clear(f); + SetCoeff(f, n); + SetCoeff(f, GF2X_irred_tab[n][0]); + SetCoeff(f, 0); + } + else { + clear(f); + SetCoeff(f, n); + SetCoeff(f, GF2X_irred_tab[n][0]); + SetCoeff(f, GF2X_irred_tab[n][1]); + SetCoeff(f, GF2X_irred_tab[n][2]); + SetCoeff(f, 0); + } + + return; + } + + long k3, k2, k1; + + k3 = FindTrinom(n); + if (k3) { + clear(f); + SetCoeff(f, n); + SetCoeff(f, k3); + SetCoeff(f, 0); + return; + } + + k3 = FindPent(n, k2, k1); + if (k3) { + clear(f); + SetCoeff(f, n); + SetCoeff(f, k3); + SetCoeff(f, k2); + SetCoeff(f, k1); + SetCoeff(f, 0); + return; + } + + // the following is probably of only theoretical value... + // it is reasonable to conjecture that for all n >= 2, + // there is either an irreducible trinomial or pentanomial + // of degree n. + + BuildIrred(f, n); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GF2XTest.c b/thirdparty/linux/ntl/src/GF2XTest.c new file mode 100644 index 0000000000..43acfe0031 --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2XTest.c @@ -0,0 +1,97 @@ + +#include + +NTL_CLIENT + +struct wd { + int amt; + + wd(int x) { amt = x; } +}; + +#define WD(x,y) wd(x) << (y) + +ostream& operator<<(ostream& s, const wd& w) +{ + s.width(w.amt); + return s; +} + +int main() +{ + long n; + GF2X a, b, c, c1, ss, ss1, tt, tt1; + double t; + long iter, i; + + cout << WD(12,"n") << WD(12,"OldGCD") << WD(12,"GCD") << WD(12,"OldXGCD") + << WD(12, "XGCD") << "\n"; + + cout.precision(3); + cout.setf(ios::scientific); + + + for (n = 32; n <= (1L << 18); n = n << 3) { + random(a, n); + random(b, n); + OldGCD(c, a, b); + GCD(c1, a, b); + OldXGCD(c, ss, tt, a, b); + XGCD(c1, ss1, tt1, a, b); + if (c1 != c || ss1 != ss || tt1 != tt || + ss*a + tt*b != c) { + cerr << "**** GF2XTest FAILED!\n"; + return 1; + } + + cout << WD(12,n); + + iter = 0; + do { + iter = iter ? (2*iter) : 1; + t = GetTime(); + for (i = 0; i < iter; i++) + OldGCD(c, a, b); + t = GetTime()-t; + } while (t < 0.5); + + cout << WD(12,t/iter); + + iter = 0; + do { + iter = iter ? (2*iter) : 1; + t = GetTime(); + for (i = 0; i < iter; i++) + GCD(c, a, b); + t = GetTime()-t; + } while (t < 0.5); + + cout << WD(12,t/iter); + + iter = 0; + do { + iter = iter ? (2*iter) : 1; + t = GetTime(); + for (i = 0; i < iter; i++) + OldXGCD(c, ss, tt, a, b); + t = GetTime()-t; + } while (t < 0.5); + + cout << WD(12,t/iter); + + iter = 0; + do { + iter = iter ? (2*iter) : 1; + t = GetTime(); + for (i = 0; i < iter; i++) + XGCD(c, ss, tt, a, b); + t = GetTime()-t; + } while (t < 0.5); + + cout << WD(12,t/iter); + + cout << "\n"; + } + + return 0; +} diff --git a/thirdparty/linux/ntl/src/GF2XTimeTest.c b/thirdparty/linux/ntl/src/GF2XTimeTest.c new file mode 100644 index 0000000000..0a38dc644a --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2XTimeTest.c @@ -0,0 +1,149 @@ + +#include + +#include + +NTL_CLIENT + + +double clean_data(double *t) +{ + double x, y, z; + long i, ix, iy, n; + + x = t[0]; ix = 0; + y = t[0]; iy = 0; + + for (i = 1; i < 5; i++) { + if (t[i] < x) { + x = t[i]; + ix = i; + } + if (t[i] > y) { + y = t[i]; + iy = i; + } + } + + z = 0; n = 0; + for (i = 0; i < 5; i++) { + if (i != ix && i != iy) z+= t[i], n++; + } + + z = z/n; + + return z; +} + +void print_flag() +{ + + +#ifdef NTL_GF2X_ALTCODE +printf("NTL_GF2X_ALTCODE "); +#endif + + +#ifdef NTL_GF2X_ALTCODE1 +printf("NTL_GF2X_ALTCODE1 "); +#endif + + +#ifdef NTL_GF2X_NOINLINE +printf("NTL_GF2X_NOINLINE "); +#endif + +#ifdef NTL_PCLMUL +printf("NTL_PCLMUL "); +#endif + + +printf("\n"); + +} + +int main() +{ + long n, i, j, iter, s, k; + double t; + + SetSeed(ZZ(0)); + + + for (i = 0; i < 10000; i++) { + GF2X a, b, c, d; + long da = RandomBnd(5*NTL_BITS_PER_LONG); + long db = RandomBnd(5*NTL_BITS_PER_LONG); + long dc = RandomBnd(5*NTL_BITS_PER_LONG); + long dd = RandomBnd(5*NTL_BITS_PER_LONG); + random(a, da); random(b, db); random(c, dc); random(d, dd); + + if ((a + b)*(c + d) != c*a + d*a + c*b + d*b) { + printf("999999999999999 "); + print_flag(); + return 0; + } + } + + + n = 16; + s = 56; + + GF2X *a = new GF2X[s]; + GF2X *b = new GF2X[s]; + + GF2X c; + + for (k = 0; k < s; k++) { + random(a[k], (n + (k % 7))*NTL_BITS_PER_LONG); + random(b[k], (n + (k % 8))*NTL_BITS_PER_LONG); + } + + for (k = 0; k < s; k++) mul(c, a[k], b[k]); + + + iter = 1; + + do { + t = GetTime(); + for (i = 0; i < iter; i++) { + for (j = 0; j < 1; j++) for (k = 0; k < s; k++) mul(c, a[k], b[k]); + } + t = GetTime() - t; + iter = 2*iter; + } while(t < 1); + + iter = iter/2; + + iter = long((3/t)*iter) + 1; + + double tvec[5]; + long w; + + for (w = 0; w < 5; w++) { + t = GetTime(); + for (i = 0; i < iter; i++) { + for (j = 0; j < 1; j++) for (k = 0; k < s; k++) mul(c, a[k], b[k]); + } + t = GetTime() - t; + tvec[w] = t; + } + + + t = clean_data(tvec); + + t = floor((t/iter)*1e14); + + if (t < 0 || t >= 1e15) + printf("999999999999999 "); + else + printf("%015.0f ", t); + + printf(" [%ld] ", iter); + + print_flag(); + + return 0; +} + + diff --git a/thirdparty/linux/ntl/src/GF2XVec.c b/thirdparty/linux/ntl/src/GF2XVec.c new file mode 100644 index 0000000000..38727e40fd --- /dev/null +++ b/thirdparty/linux/ntl/src/GF2XVec.c @@ -0,0 +1,94 @@ + +#include + +#include + +NTL_START_IMPL + + +void GF2XVec::SetSize(long n, long d) +{ + if (n < 0 || d <= 0) LogicError("bad args to GF2XVec::SetSize()"); + + if (v) + LogicError("illegal GF2XVec initialization"); + + + if (n == 0) { + len = n; + bsize = d; + return; + } + + + GF2XVec tmp; + tmp.len = 0; + tmp.bsize = d; + + + tmp.v = (GF2X*) NTL_MALLOC(n, sizeof(GF2X), 0); + if (!tmp.v) MemoryError(); + + long i = 0; + long m; + long j; + + while (i < n) { + m = WV_BlockConstructAlloc(tmp.v[i].xrep, d, n-i); + for (j = 1; j < m; j++) + WV_BlockConstructSet(tmp.v[i].xrep, tmp.v[i+j].xrep, j); + i += m; + tmp.len = i; + } + + tmp.swap(*this); +} + + +void GF2XVec::kill() +{ + long n = len; + long i = 0; + while (i < n) { + long m = WV_BlockDestroy(v[i].xrep); + i += m; + } + + len = 0; + bsize = 0; + if (v) { + free(v); + v = 0; + } +} + + +GF2XVec& GF2XVec::operator=(const GF2XVec& a) +{ + if (this == &a) return *this; + GF2XVec tmp(a); + tmp.swap(*this); + return *this; +} + +GF2XVec::GF2XVec(const GF2XVec& a) : v(0), len(0), bsize(0) + +{ + SetSize(a.len, a.bsize); + + long i; + for (i = 0; i < a.len; i++) + v[i] = (a.v)[i]; +} + + + +void GF2XVec::swap(GF2XVec& x) +{ + _ntl_swap(v, x.v); + _ntl_swap(len, x.len); + _ntl_swap(bsize, x.bsize); +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/G_LLL_FP.c b/thirdparty/linux/ntl/src/G_LLL_FP.c new file mode 100644 index 0000000000..636fc06584 --- /dev/null +++ b/thirdparty/linux/ntl/src/G_LLL_FP.c @@ -0,0 +1,1570 @@ + +#include +#include +#include + + +#include + +NTL_START_IMPL + +static inline +void CheckFinite(double *p) +{ + if (!IsFinite(p)) ResourceError("G_LLL_FP: numbers too big...use G_LLL_XD"); +} + + + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + + } + else { + + for (i = 1; i <= n; i++) { + MulSubFrom(A(i), B(i), mu1); + } + + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + + + +#define TR_BND (NTL_FDOUBLE_PRECISION/2.0) +// Just to be safe!! + +static double max_abs(double *v, long n) +{ + long i; + double res, t; + + res = 0; + + for (i = 1; i <= n; i++) { + t = fabs(v[i]); + if (t > res) res = t; + } + + return res; +} + + +static void RowTransformStart(double *a, long *in_a, long& in_float, long n) +{ + long i; + long inf = 1; + + for (i = 1; i <= n; i++) { + in_a[i] = (a[i] < TR_BND && a[i] > -TR_BND); + inf = inf & in_a[i]; + } + + in_float = inf; +} + + +static void RowTransformFinish(vec_ZZ& A, double *a, long *in_a) +{ + long n = A.length(); + long i; + + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i]); + } + else { + conv(a[i], A(i)); + CheckFinite(&a[i]); + } + } +} + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1, + double *a, double *b, long *in_a, + double& max_a, double max_b, long& in_float) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + double mu; + + conv(mu, MU1); + CheckFinite(&mu); + + long n = A.length(); + long i; + + if (in_float) { + double mu_abs = fabs(mu); + if (mu_abs > 0 && max_b > 0 && (mu_abs >= TR_BND || max_b >= TR_BND)) { + in_float = 0; + } + else { + max_a += mu_abs*max_b; + if (max_a >= TR_BND) + in_float = 0; + } + } + + if (in_float) { + if (mu == 1) { + for (i = 1; i <= n; i++) + a[i] -= b[i]; + + return; + } + + if (mu == -1) { + for (i = 1; i <= n; i++) + a[i] += b[i]; + + return; + } + + if (mu == 0) return; + + for (i = 1; i <= n; i++) + a[i] -= mu*b[i]; + + + return; + } + + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i] < TR_BND && a[i] > -TR_BND && + b[i] < TR_BND && b[i] > -TR_BND) { + + a[i] -= b[i]; + } + else { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + + sub(A(i), A(i), B(i)); + } + } + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i] < TR_BND && a[i] > -TR_BND && + b[i] < TR_BND && b[i] > -TR_BND) { + + a[i] += b[i]; + } + else { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + + add(A(i), A(i), B(i)); + } + } + return; + } + + if (MU == 0) return; + + double b_bnd = fabs(TR_BND/mu) - 1; + if (b_bnd < 0) b_bnd = 0; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i] < TR_BND && a[i] > -TR_BND && + b[i] < b_bnd && b[i] > -b_bnd) { + + a[i] -= b[i]*mu; + } + else { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + MulSubFrom(A(i), B(i), mu1); + } + } + } + } + else { + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + +static void RowTransform2(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x + y*MU + +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } +} + + + +class GivensCache_FP { +public: + GivensCache_FP(long m, long n); + + void flush(); + void selective_flush(long l); + void swap(long l); + void swap(); + void touch(); + void incr(); + + long sz; + + Unique2DArray buf; + UniqueArray bl; + UniqueArray bv; + + long bp; +}; + + +GivensCache_FP::GivensCache_FP(long m, long n) +{ + sz = min(m, n)/10; + if (sz < 2) + sz = 2; + else if (sz > 20) + sz = 20; + + + buf.SetDims(sz, n+1); + bl.SetLength(sz); + bv.SetLength(sz); + + long i; + for (i = 0; i < sz; i++) bl[i] = 0; + for (i = 0; i < sz; i++) bv[i] = 0; + + bp = 0; +} + +void GivensCache_FP::flush() +{ + long i; + for (i = 0; i < sz; i++) bl[i] = 0; +} + +void GivensCache_FP::selective_flush(long l) +{ + long i; + + for (i = 0; i < sz; i++) + if (bl[i] && bv[i] >= l) + bl[i] = 0; +} + +void GivensCache_FP::swap(long l) +{ + long k = bl[bp]; + long i; + + i = 0; + while (i < sz && bl[i] != l) + i++; + + if (i < sz) { + bl[bp] = l; + bl[i] = k; + } + else + bl[bp] = l; + + selective_flush(l); +} + +void GivensCache_FP::swap() +{ + swap(bl[bp] - 1); +} + +void GivensCache_FP::touch() +{ + long k = bl[bp]; + bl[bp] = 0; + selective_flush(k); +} + +void GivensCache_FP::incr() +{ + long k = bl[bp]; + long k1 = k+1; + long i; + + i = 0; + while (i < sz && bl[i] != k1) + i++; + + if (i < sz) { + bp = i; + return; + } + + i = 0; + while (i < sz && bl[i] != 0) + i++; + + if (i < sz) { + bp = i; + return; + } + + long max_val = 0; + long max_index = 0; + for (i = 0; i < sz; i++) { + long t = labs(bl[i]-k1); + if (t > max_val) { + max_val = t; + max_index = i; + } + } + + bp = max_index; + bl[max_index] = 0; +} + + +static +void GivensComputeGS(double **B1, double **mu, double **aux, long k, long n, + GivensCache_FP& cache) +{ + long i, j; + + double c, s, a, b, t; + + double *p = mu[k]; + + double *pp = cache.buf[cache.bp]; + + if (!cache.bl[cache.bp]) { + for (j = 1; j <= n; j++) + pp[j] = B1[k][j]; + + long backoff; + backoff = k/4; + if (backoff < 2) + backoff = 2; + else if (backoff > cache.sz + 2) + backoff = cache.sz + 2; + + long ub = k-(backoff-1); + + for (i = 1; i < ub; i++) { + double *cptr = mu[i]; + double *sptr = aux[i]; + + for (j = n; j > i; j--) { + c = cptr[j]; + s = sptr[j]; + + a = c*pp[j-1] - s*pp[j]; + b = s*pp[j-1] + c*pp[j]; + + pp[j-1] = a; + pp[j] = b; + } + + pp[i] = pp[i]/mu[i][i]; + } + + cache.bl[cache.bp] = k; + cache.bv[cache.bp] = k-backoff; + } + + for (j = 1; j <= n; j++) + p[j] = pp[j]; + + for (i = max(cache.bv[cache.bp]+1, 1); i < k; i++) { + double *cptr = mu[i]; + double *sptr = aux[i]; + + for (j = n; j > i; j--) { + c = cptr[j]; + s = sptr[j]; + + a = c*p[j-1] - s*p[j]; + b = s*p[j-1] + c*p[j]; + + p[j-1] = a; + p[j] = b; + } + + p[i] = p[i]/mu[i][i]; + } + + for (j = n; j > k; j--) { + a = p[j-1]; + b = p[j]; + + if (b == 0) { + c = 1; + s = 0; + } + else if (fabs(b) > fabs(a)) { + t = -a/b; + s = 1/sqrt(1 + t*t); + c = s*t; + } + else { + t = -b/a; + c = 1/sqrt(1 + t*t); + s = c*t; + } + + p[j-1] = c*a - s*b; + p[j] = c; + aux[k][j] = s; + } + + if (k > n+1) LogicError("G_LLL_FP: internal error"); + if (k > n) p[k] = 0; + + for (i = 1; i <= k; i++) + CheckFinite(&p[i]); +} + +static NTL_CHEAP_THREAD_LOCAL double red_fudge = 0; +static NTL_CHEAP_THREAD_LOCAL long log_red = 0; +static NTL_CHEAP_THREAD_LOCAL long verbose = 0; +static NTL_CHEAP_THREAD_LOCAL unsigned long NumSwaps = 0; +static NTL_CHEAP_THREAD_LOCAL double StartTime = 0; +static NTL_CHEAP_THREAD_LOCAL double LastTime = 0; + + + +static void G_LLLStatus(long max_k, double t, long m, const mat_ZZ& B) +{ + cerr << "---- G_LLL_FP status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, t-StartTime); + cerr << ", stage: " << max_k; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = t; + +} + +static void init_red_fudge() +{ + long i; + + log_red = long(0.50*NTL_DOUBLE_PRECISION); + red_fudge = 1; + + for (i = log_red; i > 0; i--) + red_fudge = red_fudge*0.5; +} + +static void inc_red_fudge() +{ + + red_fudge = red_fudge * 2; + log_red--; + + + cerr << "G_LLL_FP: warning--relaxing reduction (" << log_red << ")\n"; + + if (log_red < 4) + ResourceError("G_LLL_FP: too much loss of precision...stop!"); +} + + +#if 0 + +static void print_mus(double **mu, long k) +{ + long i; + + for (i = k-1; i >= 1; i--) + cerr << mu[k][i] << " "; + cerr << "\n"; +} + +#endif + + + +static +long ll_G_LLL_FP(mat_ZZ& B, mat_ZZ* U, double delta, long deep, + LLLCheckFct check, double **B1, double **mu, + double **aux, + long m, long init_k, long &quit, GivensCache_FP& cache) +{ + long n = B.NumCols(); + + long i, j, k, Fc1; + ZZ MU; + double mu1; + + double t1; + ZZ T1; + double *tp; + + double half_plus_fudge = 0.5 + red_fudge; + + quit = 0; + k = init_k; + + vec_long in_vec_mem; + in_vec_mem.SetLength(n+1); + long *in_vec = in_vec_mem.elts(); + + UniqueArray max_b_store; + max_b_store.SetLength(m+1); + double *max_b = max_b_store.get(); + + for (i = 1; i <= m; i++) + max_b[i] = max_abs(B1[i], n); + + long in_float; + + long counter; + + long trigger_index; + long small_trigger; + long cnt; + + + long max_k = 0; + + double tt; + + long swap_cnt = 0; + + cache.flush(); + + while (k <= m) { + + if (k > max_k) { + max_k = k; + swap_cnt = 0; + } + + if (verbose) { + tt = GetTime(); + + if (tt > LastTime + LLLStatusInterval) + G_LLLStatus(max_k, tt, m, B); + } + + GivensComputeGS(B1, mu, aux, k, n, cache); + + if (swap_cnt > 200000) { + cerr << "G_LLL_FP: swap loop?\n"; + swap_cnt = 0; + } + + counter = 0; + trigger_index = k; + small_trigger = 0; + cnt = 0; + + long sz=0, new_sz; + + do { + // size reduction + + counter++; + if ((counter & 127) == 0) { + + new_sz = 0; + for (j = 1; j <= n; j++) + new_sz += NumBits(B(k,j)); + + if ((counter >> 7) == 1 || new_sz < sz) { + sz = new_sz; + } + else { + cerr << "G_LLL_FP: warning--infinite loop? (" << k << ")\n"; + } + } + + Fc1 = 0; + + for (j = k-1; j >= 1; j--) { + t1 = fabs(mu[k][j]); + if (t1 > half_plus_fudge) { + + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > 10) { + inc_red_fudge(); + half_plus_fudge = 0.5 + red_fudge; + cnt = 0; + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + + Fc1 = 1; + RowTransformStart(B1[k], in_vec, in_float, n); + } + + + mu1 = mu[k][j]; + if (mu1 >= 0) + mu1 = ceil(mu1-0.5); + else + mu1 = floor(mu1+0.5); + + double *mu_k = mu[k]; + double *mu_j = mu[j]; + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu_j[i]; + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + mu_k[i] += mu_j[i]; + } + else { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu1*mu_j[i]; + } + + mu_k[j] -= mu1; + + conv(MU, mu1); + + RowTransform(B(k), B(j), MU, B1[k], B1[j], in_vec, + max_b[k], max_b[j], in_float); + if (U) RowTransform((*U)(k), (*U)(j), MU); + } + } + + + if (Fc1) { + RowTransformFinish(B(k), B1[k], in_vec); + max_b[k] = max_abs(B1[k], n); + cache.touch(); + GivensComputeGS(B1, mu, aux, k, n, cache); + } + } while (Fc1); + + if (check && (*check)(B(k))) + quit = 1; + + if (IsZero(B(k))) { + for (i = k; i < m; i++) { + // swap i, i+1 + swap(B(i), B(i+1)); + tp = B1[i]; B1[i] = B1[i+1]; B1[i+1] = tp; + t1 = max_b[i]; max_b[i] = max_b[i+1]; max_b[i+1] = t1; + if (U) swap((*U)(i), (*U)(i+1)); + } + + cache.flush(); + + m--; + if (quit) break; + continue; + } + + if (quit) break; + + if (deep > 0) { + // deep insertions + + LogicError("sorry...deep insertions not implemented"); + } // end deep insertions + + // test G_LLL reduction condition + + if (k > 1 && + sqrt(delta - mu[k][k-1]*mu[k][k-1])*fabs(mu[k-1][k-1]) > + fabs(mu[k][k])) { + // swap rows k, k-1 + + swap(B(k), B(k-1)); + tp = B1[k]; B1[k] = B1[k-1]; B1[k-1] = tp; + t1 = max_b[k]; max_b[k] = max_b[k-1]; max_b[k-1] = t1; + if (U) swap((*U)(k), (*U)(k-1)); + + cache.swap(); + + k--; + NumSwaps++; + swap_cnt++; + // cout << "-\n"; + } + else { + + cache.incr(); + + k++; + // cout << "+\n"; + } + + } + + if (verbose) { + G_LLLStatus(m+1, GetTime(), m, B); + } + + + return m; +} + + + + + +static +long G_LLL_FP(mat_ZZ& B, mat_ZZ* U, double delta, long deep, + LLLCheckFct check) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long i, j; + long new_m, dep, quit; + ZZ MU; + + ZZ T1; + + init_red_fudge(); + + if (U) ident(*U, m); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+1, n+1); + double **B1 = B1_store.get(); // approximates B + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+1, n+2); + double **mu = mu_store.get(); + + Unique2DArray aux_store; + aux_store.SetDimsFrom1(m+1, n+1); + double **aux = aux_store.get(); + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + GivensCache_FP cache(m, n); + + new_m = ll_G_LLL_FP(B, U, delta, deep, check, B1, mu, aux, m, 1, quit, cache); + dep = m - new_m; + m = new_m; + + if (dep > 0) { + // for consistency, we move all of the zero rows to the front + + for (i = 0; i < m; i++) { + swap(B(m+dep-i), B(m-i)); + if (U) swap((*U)(m+dep-i), (*U)(m-i)); + } + } + + return m; +} + + + +long G_LLL_FP(mat_ZZ& B, double delta, long deep, LLLCheckFct check, + long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_LLL_FP: bad delta"); + if (deep < 0) LogicError("G_LLL_FP: bad deep"); + return G_LLL_FP(B, 0, delta, deep, check); +} + +long G_LLL_FP(mat_ZZ& B, mat_ZZ& U, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_LLL_FP: bad delta"); + if (deep < 0) LogicError("G_LLL_FP: bad deep"); + return G_LLL_FP(B, &U, delta, deep, check); +} + + + +NTL_TLS_GLOBAL_DECL(vec_double, G_BKZConstant) + +static +void ComputeG_BKZConstant(long beta, long p) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZConstant); + + const double c_PI = 3.14159265358979323846264338328; + const double LogPI = 1.14472988584940017414342735135; + + G_BKZConstant.SetLength(beta-1); + + vec_double Log; + Log.SetLength(beta); + + + long i, j, k; + double x, y; + + for (j = 1; j <= beta; j++) + Log(j) = log(double(j)); + + for (i = 1; i <= beta-1; i++) { + // First, we compute x = gamma(i/2)^{2/i} + + k = i/2; + + if ((i & 1) == 0) { // i even + x = 0; + for (j = 1; j <= k; j++) + x = x + Log(j); + + x = x * (1/double(k)); + + x = exp(x); + } + else { // i odd + x = 0; + for (j = k + 2; j <= 2*k + 2; j++) + x = x + Log(j); + + x = 0.5*LogPI + x - 2*(k+1)*Log(2); + + x = x * (2.0/double(i)); + + x = exp(x); + } + + // Second, we compute y = 2^{2*p/i} + + y = -(2*p/double(i))*Log(2); + y = exp(y); + + G_BKZConstant(i) = x*y/c_PI; + } +} + +NTL_TLS_GLOBAL_DECL(vec_double, G_BKZThresh) + +static +void ComputeG_BKZThresh(double *c, long beta) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZConstant); + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + G_BKZThresh.SetLength(beta-1); + + long i; + double x; + + x = 0; + + for (i = 1; i <= beta-1; i++) { + x += log(c[i-1]); + G_BKZThresh(i) = exp(x/double(i))*G_BKZConstant(i); + if (!IsFinite(&G_BKZThresh(i))) G_BKZThresh(i) = 0; + } +} + +static +void G_BKZStatus(double tt, double enum_time, unsigned long NumIterations, + unsigned long NumTrivial, unsigned long NumNonTrivial, + unsigned long NumNoOps, long m, + const mat_ZZ& B) +{ + cerr << "---- G_BKZ_FP status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, tt-StartTime); + cerr << ", enum time: "; + PrintTime(cerr, enum_time); + cerr << ", iter: " << NumIterations << "\n"; + cerr << "triv: " << NumTrivial; + cerr << ", nontriv: " << NumNonTrivial; + cerr << ", no ops: " << NumNoOps; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = tt; + +} + + + +static +long G_BKZ_FP(mat_ZZ& BB, mat_ZZ* UU, double delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + double t1; + ZZ T1; + double *tp; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+2, n+1); + double **B1 = B1_store.get(); // approximates B + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+2, n+2); + double **mu = mu_store.get(); + + Unique2DArray aux_store; + aux_store.SetDimsFrom1(m+2, n+1); + double **aux = aux_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+2); + double *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + + double cbar; + + UniqueArray ctilda_store; + ctilda_store.SetLength(m+2); + double *ctilda = ctilda_store.get(); + + UniqueArray vvec_store; + vvec_store.SetLength(m+2); + double *vvec = vvec_store.get(); + + + UniqueArray yvec_store; + yvec_store.SetLength(m+2); + double *yvec = yvec_store.get(); + + UniqueArray uvec_store; + uvec_store.SetLength(m+2); + double *uvec = uvec_store.get(); + + UniqueArray utildavec_store; + utildavec_store.SetLength(m+2); + double *utildavec = utildavec_store.get(); + + + UniqueArray Deltavec_store; + Deltavec_store.SetLength(m+2); + long *Deltavec = Deltavec_store.get(); + + UniqueArray deltavec_store; + deltavec_store.SetLength(m+2); + long *deltavec = deltavec_store.get(); + + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + double eta; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + GivensCache_FP cache(m, n); + + m = ll_G_LLL_FP(B, U, delta, 0, check, B1, mu, aux, m, 1, quit, cache); + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + long clean = 1; + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + if (!quit && m > 1) { + if (beta > m) beta = m; + + if (prune > 0) + ComputeG_BKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + for (i = jj; i <= kk; i++) { + c[i] = mu[i][i]*mu[i][i]; + CheckFinite(&c[i]); + } + + if (prune > 0) + ComputeG_BKZThresh(&c[jj], kk-jj+1); + + cbar = c[jj]; + utildavec[jj] = uvec[jj] = 1; + + yvec[jj] = vvec[jj] = 0; + Deltavec[jj] = 0; + + + s = t = jj; + deltavec[jj] = 1; + + for (i = jj+1; i <= kk+1; i++) { + ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = 0; + Deltavec[i] = 0; + vvec[i] = 0; + deltavec[i] = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + ctilda[t] = ctilda[t+1] + + (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*c[t]; + + ForceToMem(&ctilda[t]); // prevents an infinite loop + + if (prune > 0 && t > jj) { + eta = G_BKZThresh(t-jj); + } + else + eta = 0; + + if (ctilda[t] < cbar - eta) { + if (t > jj) { + t--; + t1 = 0; + for (i = t+1; i <= s; i++) + t1 += utildavec[i]*mu[i][t]; + yvec[t] = t1; + t1 = -t1; + if (t1 >= 0) + t1 = ceil(t1-0.5); + else + t1 = floor(t1+0.5); + utildavec[t] = vvec[t] = t1; + Deltavec[t] = 0; + if (utildavec[t] > -yvec[t]) + deltavec[t] = -1; + else + deltavec[t] = 1; + } + else { + cbar = ctilda[jj]; + for (i = jj; i <= kk; i++) { + uvec[i] = utildavec[i]; + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec[t] = -Deltavec[t]; + if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t]; + utildavec[t] = vvec[t] + Deltavec[t]; + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + if ((delta - 8*red_fudge)*c[jj] > cbar) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec[i] != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("G_BKZ_FP: internal error"); + + if (s > 0) { + // special case + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + // cerr << "special case\n"; + new_m = ll_G_LLL_FP(B, U, delta, 0, check, + B1, mu, aux, h, jj, quit, cache); + if (new_m != h) LogicError("G_BKZ_FP: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec[i] == 0) continue; + conv(MU, uvec[i]); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + for (i = 1; i <= n; i++) { + conv(B1[jj][i], B(jj, i)); + CheckFinite(&B1[jj][i]); + } + + if (IsZero(B(jj))) LogicError("G_BKZ_FP: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_G_LLL_FP(B, U, delta, 0, 0, B1, mu, aux, + kk+1, jj, quit, cache); + + if (new_m != kk) LogicError("G_BKZ_FP: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_G_LLL_FP(B, U, delta, 0, check, + B1, mu, aux, h, h, quit, cache); + + if (new_m != h) LogicError("G_BKZ_FP: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // G_LLL_FP + // cerr << "progress\n"; + + NumNoOps++; + + if (!clean) { + new_m = ll_G_LLL_FP(B, U, delta, 0, check, B1, mu, aux, + h, h, quit, cache); + if (new_m != h) LogicError("G_BKZ_FP: internal error"); + if (quit) break; + } + + z++; + } + } + } + + + if (verb) { + G_BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + // clean up + + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long G_BKZ_FP(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_FP: bad delta"); + if (beta < 2) LogicError("G_BKZ_FP: bad block size"); + + return G_BKZ_FP(BB, &UU, delta, beta, prune, check); +} + +long G_BKZ_FP(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_FP: bad delta"); + if (beta < 2) LogicError("G_BKZ_FP: bad block size"); + + return G_BKZ_FP(BB, 0, delta, beta, prune, check); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/G_LLL_QP.c b/thirdparty/linux/ntl/src/G_LLL_QP.c new file mode 100644 index 0000000000..3887d5743a --- /dev/null +++ b/thirdparty/linux/ntl/src/G_LLL_QP.c @@ -0,0 +1,2063 @@ + +#include +#include +#include + + +#include + +NTL_START_IMPL + + +static inline +void CheckFinite(double *p) +{ + if (!IsFinite(p)) ResourceError("G_LLL_QP: numbers too big...use G_LLL_XD"); +} + + +static inline +void CheckFinite(quad_float *p) +{ + if (!IsFinite(p)) ResourceError("G_LLL_QP: numbers too big...use G_LLL_XD"); +} + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + + } + else { + + for (i = 1; i <= n; i++) { + MulSubFrom(A(i), B(i), mu1); + } + + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + + + + +#define TR_BND (NTL_FDOUBLE_PRECISION/2.0) +// Just to be safe!! + +static double max_abs(quad_float *v, long n) +{ + long i; + double res, t; + + res = 0; + + for (i = 1; i <= n; i++) { + t = fabs(v[i].hi); + if (t > res) res = t; + } + + return res; +} + + +static void RowTransformStart(quad_float *a, long *in_a, long& in_float, long n) +{ + long i; + long inf = 1; + + for (i = 1; i <= n; i++) { + in_a[i] = (a[i].hi < TR_BND && a[i].hi > -TR_BND); + inf = inf & in_a[i]; + } + + in_float = inf; +} + + +static void RowTransformFinish(vec_ZZ& A, quad_float *a, long *in_a) +{ + long n = A.length(); + long i; + + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i].hi); + } + else { + conv(a[i], A(i)); + CheckFinite(&a[i]); + } + } +} + + + + + + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1, + quad_float *a, quad_float *b, long *in_a, + double& max_a, double max_b, long& in_float) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + double mu; + + + long n = A.length(); + long i; + + conv(mu, MU1); + CheckFinite(&mu); + + if (in_float) { + double mu_abs = fabs(mu); + if (mu_abs > 0 && max_b > 0 && (mu_abs >= TR_BND || max_b >= TR_BND)) { + in_float = 0; + } + else { + max_a += mu_abs*max_b; + if (max_a >= TR_BND) + in_float = 0; + } + } + + if (in_float) { + if (mu == 1) { + for (i = 1; i <= n; i++) + a[i].hi -= b[i].hi; + + return; + } + + if (mu == -1) { + for (i = 1; i <= n; i++) + a[i].hi += b[i].hi; + + return; + } + + if (mu == 0) return; + + for (i = 1; i <= n; i++) + a[i].hi -= mu*b[i].hi; + + + return; + } + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i].hi < TR_BND && a[i].hi > -TR_BND && + b[i].hi < TR_BND && b[i].hi > -TR_BND) { + + a[i].hi -= b[i].hi; + } + else { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + + sub(A(i), A(i), B(i)); + } + } + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i].hi < TR_BND && a[i].hi > -TR_BND && + b[i].hi < TR_BND && b[i].hi > -TR_BND) { + + a[i].hi += b[i].hi; + } + else { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + + add(A(i), A(i), B(i)); + } + } + + return; + } + + if (MU == 0) return; + + double b_bnd = fabs(TR_BND/mu) - 1; + if (b_bnd < 0) b_bnd = 0; + + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i].hi < TR_BND && a[i].hi > -TR_BND && + b[i].hi < b_bnd && b[i].hi > -b_bnd) { + + a[i].hi -= b[i].hi*mu; + } + else { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + MulSubFrom(A(i), B(i), mu1); + } + } + } + } + else { + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + +static void RowTransform2(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x + y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } +} + + +class GivensCache_QP { +public: + GivensCache_QP(long m, long n); + + void flush(); + void selective_flush(long l); + void swap(long l); + void swap(); + void touch(); + void incr(); + + long sz; + + + Unique2DArray buf; + UniqueArray bl; + UniqueArray bv; + + long bp; +}; + + +GivensCache_QP::GivensCache_QP(long m, long n) +{ + sz = min(m, n)/10; + if (sz < 2) + sz = 2; + else if (sz > 20) + sz = 20; + + buf.SetDims(sz, n+1); + bl.SetLength(sz); + bv.SetLength(sz); + + long i; + for (i = 0; i < sz; i++) bl[i] = 0; + for (i = 0; i < sz; i++) bv[i] = 0; + + bp = 0; +} + +void GivensCache_QP::flush() +{ + long i; + for (i = 0; i < sz; i++) bl[i] = 0; +} + +void GivensCache_QP::selective_flush(long l) +{ + long i; + + for (i = 0; i < sz; i++) + if (bl[i] && bv[i] >= l) + bl[i] = 0; +} + +void GivensCache_QP::swap(long l) +{ + long k = bl[bp]; + long i; + + i = 0; + while (i < sz && bl[i] != l) + i++; + + if (i < sz) { + bl[bp] = l; + bl[i] = k; + } + else + bl[bp] = l; + + selective_flush(l); +} + +void GivensCache_QP::swap() +{ + swap(bl[bp] - 1); +} + +void GivensCache_QP::touch() +{ + long k = bl[bp]; + bl[bp] = 0; + selective_flush(k); +} + +void GivensCache_QP::incr() +{ + long k = bl[bp]; + long k1 = k+1; + long i; + + i = 0; + while (i < sz && bl[i] != k1) + i++; + + if (i < sz) { + bp = i; + return; + } + + i = 0; + while (i < sz && bl[i] != 0) + i++; + + if (i < sz) { + bp = i; + return; + } + + long max_val = 0; + long max_index = 0; + for (i = 0; i < sz; i++) { + long t = labs(bl[i]-k1); + if (t > max_val) { + max_val = t; + max_index = i; + } + } + + bp = max_index; + bl[max_index] = 0; +} + + +static +void GivensComputeGS(quad_float **B1, quad_float **mu, quad_float **aux, long k, long n, + GivensCache_QP& cache) +{ + long i, j; + + quad_float c, s, a, b, t; + + quad_float *p = mu[k]; + + quad_float *pp = cache.buf[cache.bp]; + + if (!cache.bl[cache.bp]) { + for (j = 1; j <= n; j++) + pp[j] = B1[k][j]; + + long backoff; + backoff = k/4; + if (backoff < 2) + backoff = 2; + else if (backoff > cache.sz + 2) + backoff = cache.sz + 2; + + long ub = k-(backoff-1); + + for (i = 1; i < ub; i++) { + quad_float *cptr = mu[i]; + quad_float *sptr = aux[i]; + + for (j = n; j > i; j--) { + c = cptr[j]; + s = sptr[j]; + + a = c*pp[j-1] - s*pp[j]; + b = s*pp[j-1] + c*pp[j]; + + pp[j-1] = a; + pp[j] = b; + } + + pp[i] = pp[i]/mu[i][i]; + } + + cache.bl[cache.bp] = k; + cache.bv[cache.bp] = k-backoff; + } + + for (j = 1; j <= n; j++) + p[j] = pp[j]; + + for (i = max(cache.bv[cache.bp]+1, 1); i < k; i++) { + quad_float *cptr = mu[i]; + quad_float *sptr = aux[i]; + + for (j = n; j > i; j--) { + c = cptr[j]; + s = sptr[j]; + + a = c*p[j-1] - s*p[j]; + b = s*p[j-1] + c*p[j]; + + p[j-1] = a; + p[j] = b; + } + + p[i] = p[i]/mu[i][i]; + } + + for (j = n; j > k; j--) { + a = p[j-1]; + b = p[j]; + + if (b == 0) { + c = 1; + s = 0; + } + else if (fabs(b) > fabs(a)) { + t = -a/b; + s = 1/sqrt(1 + t*t); + c = s*t; + } + else { + t = -b/a; + c = 1/sqrt(1 + t*t); + s = c*t; + } + + p[j-1] = c*a - s*b; + p[j] = c; + aux[k][j] = s; + } + + if (k > n+1) LogicError("G_LLL_QP: internal error"); + if (k > n) p[k] = 0; + + for (i = 1; i <= k; i++) + CheckFinite(&p[i]); +} + +NTL_TLS_GLOBAL_DECL_INIT(quad_float, red_fudge, (to_quad_float(0))) + +static NTL_CHEAP_THREAD_LOCAL long log_red = 0; +static NTL_CHEAP_THREAD_LOCAL long verbose = 0; +static NTL_CHEAP_THREAD_LOCAL unsigned long NumSwaps = 0; +static NTL_CHEAP_THREAD_LOCAL double StartTime = 0; +static NTL_CHEAP_THREAD_LOCAL double LastTime = 0; + + + +static void G_LLLStatus(long max_k, double t, long m, const mat_ZZ& B) +{ + cerr << "---- G_LLL_QP status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, t-StartTime); + cerr << ", stage: " << max_k; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = t; + +} + + +static void init_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long i; + + // initial log_red should be <= NTL_DOUBLE_PRECISION-2, + // to help ensure stability in G_BKZ_QP1 + + log_red = NTL_DOUBLE_PRECISION-2; + + red_fudge = 1; + + for (i = log_red; i > 0; i--) + red_fudge = red_fudge*0.5; +} + +static void inc_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + + red_fudge = red_fudge * 2; + log_red--; + + cerr << "G_LLL_QP: warning--relaxing reduction (" << log_red << ")\n"; + + if (log_red < 4) + ResourceError("G_LLL_QP: too much loss of precision...stop!"); +} + + +static +long ll_G_LLL_QP(mat_ZZ& B, mat_ZZ* U, quad_float delta, long deep, + LLLCheckFct check, quad_float **B1, quad_float **mu, + quad_float **aux, + long m, long init_k, long &quit, GivensCache_QP& cache) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long n = B.NumCols(); + + long i, j, k, Fc1; + ZZ MU; + quad_float mu1; + + quad_float t1; + double dt1; + ZZ T1; + quad_float *tp; + + quad_float half = to_quad_float(0.5); + quad_float half_plus_fudge = 0.5 + red_fudge; + + quit = 0; + k = init_k; + + vec_long in_vec_mem; + in_vec_mem.SetLength(n+1); + long *in_vec = in_vec_mem.elts(); + + + UniqueArray max_b_store; + max_b_store.SetLength(m+1); + double *max_b = max_b_store.get(); + + for (i = 1; i <= m; i++) + max_b[i] = max_abs(B1[i], n); + + long in_float; + + + long counter; + + long trigger_index; + long small_trigger; + long cnt; + + long max_k = 0; + + double tt; + + cache.flush(); + + while (k <= m) { + + if (k > max_k) { + max_k = k; + } + + if (verbose) { + tt = GetTime(); + + if (tt > LastTime + LLLStatusInterval) + G_LLLStatus(max_k, tt, m, B); + } + + + GivensComputeGS(B1, mu, aux, k, n, cache); + + counter = 0; + trigger_index = k; + small_trigger = 0; + cnt = 0; + + do { + // size reduction + + counter++; + if (counter > 10000) { + cerr << "G_LLL_QP: warning--possible infinite loop\n"; + counter = 0; + } + + + Fc1 = 0; + + for (j = k-1; j >= 1; j--) { + t1 = fabs(mu[k][j]); + if (t1 > half_plus_fudge) { + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > 10) { + inc_red_fudge(); + half_plus_fudge = 0.5 + red_fudge; + cnt = 0; + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + + Fc1 = 1; + RowTransformStart(B1[k], in_vec, in_float, n); + } + + + + mu1 = mu[k][j]; + if (mu1 >= 0) + mu1 = ceil(mu1-half); + else + mu1 = floor(mu1+half); + + + quad_float *mu_k = mu[k]; + quad_float *mu_j = mu[j]; + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu_j[i]; + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + mu_k[i] += mu_j[i]; + } + else { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu1*mu_j[i]; + } + + // cout << j << " " << mu[k][j] << " " << mu1 << "\n"; + + mu_k[j] -= mu1; + + conv(MU, mu1); + + + RowTransform(B(k), B(j), MU, B1[k], B1[j], in_vec, + max_b[k], max_b[j], in_float); + + if (U) RowTransform((*U)(k), (*U)(j), MU); + } + } + + if (Fc1) { + RowTransformFinish(B(k), B1[k], in_vec); + max_b[k] = max_abs(B1[k], n); + cache.touch(); + GivensComputeGS(B1, mu, aux, k, n, cache); + } + } while (Fc1); + + if (check && (*check)(B(k))) + quit = 1; + + if (IsZero(B(k))) { + for (i = k; i < m; i++) { + // swap i, i+1 + swap(B(i), B(i+1)); + tp = B1[i]; B1[i] = B1[i+1]; B1[i+1] = tp; + dt1 = max_b[i]; max_b[i] = max_b[i+1]; max_b[i+1] = dt1; + if (U) swap((*U)(i), (*U)(i+1)); + } + + cache.flush(); + + m--; + if (quit) break; + continue; + } + + if (quit) break; + + if (deep > 0) { + // deep insertions + + LogicError("sorry...deep insertions not implemented"); + } // end deep insertions + + // test LLL reduction condition + + if (k > 1 && + sqrt(delta - mu[k][k-1]*mu[k][k-1])*fabs(mu[k-1][k-1]) > + fabs(mu[k][k])) { + + // swap rows k, k-1 + swap(B(k), B(k-1)); + tp = B1[k]; B1[k] = B1[k-1]; B1[k-1] = tp; + dt1 = max_b[k]; max_b[k] = max_b[k-1]; max_b[k-1] = dt1; + if (U) swap((*U)(k), (*U)(k-1)); + + cache.swap(); + + k--; + NumSwaps++; + // cout << "- " << k << "\n"; + } + else { + cache.incr(); + k++; + // cout << "+ " << k << "\n"; + } + } + + if (verbose) { + G_LLLStatus(m+1, GetTime(), m, B); + } + + + return m; +} + +static +long G_LLL_QP(mat_ZZ& B, mat_ZZ* U, quad_float delta, long deep, + LLLCheckFct check) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long i, j; + long new_m, dep, quit; + quad_float s; + ZZ MU; + quad_float mu1; + + quad_float t1; + ZZ T1; + + init_red_fudge(); + + if (U) ident(*U, m); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+1, n+1); + quad_float **B1 = B1_store.get(); // approximates B + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+1, n+2); + quad_float **mu = mu_store.get(); + + Unique2DArray aux_store; + aux_store.SetDimsFrom1(m+1, n+1); + quad_float **aux = aux_store.get(); + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + GivensCache_QP cache(m, n); + + new_m = + ll_G_LLL_QP(B, U, delta, deep, check, B1, mu, aux, m, 1, quit, cache); + + + + dep = m - new_m; + m = new_m; + + if (dep > 0) { + // for consistency, we move all of the zero rows to the front + + for (i = 0; i < m; i++) { + swap(B(m+dep-i), B(m-i)); + if (U) swap((*U)(m+dep-i), (*U)(m-i)); + } + } + + return m; +} + + + +long G_LLL_QP(mat_ZZ& B, double delta, long deep, LLLCheckFct check, + long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_LLL_QP: bad delta"); + if (deep < 0) LogicError("G_LLL_QP: bad deep"); + return G_LLL_QP(B, 0, to_quad_float(delta), deep, check); +} + +long G_LLL_QP(mat_ZZ& B, mat_ZZ& U, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("G_LLL_QP: bad delta"); + if (deep < 0) LogicError("G_LLL_QP: bad deep"); + return G_LLL_QP(B, &U, to_quad_float(delta), deep, check); +} + + + +NTL_TLS_GLOBAL_DECL(vec_quad_float, G_BKZConstant) + +static +void ComputeG_BKZConstant(long beta, long p) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZConstant); + + const quad_float c_PI = + to_quad_float("3.141592653589793238462643383279502884197"); + const quad_float LogPI = + to_quad_float("1.144729885849400174143427351353058711647"); + + G_BKZConstant.SetLength(beta-1); + + vec_quad_float Log; + Log.SetLength(beta); + + + long i, j, k; + quad_float x, y; + + for (j = 1; j <= beta; j++) + Log(j) = log(to_quad_float(j)); + + for (i = 1; i <= beta-1; i++) { + // First, we compute x = gamma(i/2)^{2/i} + + k = i/2; + + if ((i & 1) == 0) { // i even + x = 0; + for (j = 1; j <= k; j++) + x = x + Log(j); + + x = x * (1/to_quad_float(k)); + + x = exp(x); + } + else { // i odd + x = 0; + for (j = k + 2; j <= 2*k + 2; j++) + x = x + Log(j); + + x = 0.5*LogPI + x - 2*(k+1)*Log(2); + + x = x * (2.0/to_quad_float(i)); + + x = exp(x); + } + + // Second, we compute y = 2^{2*p/i} + + y = -(2*p/to_quad_float(i))*Log(2); + y = exp(y); + + G_BKZConstant(i) = x*y/c_PI; + } +} + +NTL_TLS_GLOBAL_DECL(vec_quad_float, G_BKZThresh) + +static +void ComputeG_BKZThresh(quad_float *c, long beta) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZConstant); + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + G_BKZThresh.SetLength(beta-1); + + long i; + quad_float x; + + x = 0; + + for (i = 1; i <= beta-1; i++) { + x += log(c[i-1]); + G_BKZThresh(i) = exp(x/to_quad_float(i))*G_BKZConstant(i); + if (!IsFinite(&G_BKZThresh(i))) G_BKZThresh(i) = 0; + } +} + + +static +void G_BKZStatus(double tt, double enum_time, unsigned long NumIterations, + unsigned long NumTrivial, unsigned long NumNonTrivial, + unsigned long NumNoOps, long m, + const mat_ZZ& B) +{ + cerr << "---- G_BKZ_QP status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, tt-StartTime); + cerr << ", enum time: "; + PrintTime(cerr, enum_time); + cerr << ", iter: " << NumIterations << "\n"; + cerr << "triv: " << NumTrivial; + cerr << ", nontriv: " << NumNonTrivial; + cerr << ", no ops: " << NumNoOps; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = tt; + +} + + +static +long G_BKZ_QP(mat_ZZ& BB, mat_ZZ* UU, quad_float delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + quad_float t1; + ZZ T1; + quad_float *tp; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+2, n+1); + quad_float **B1 = B1_store.get(); // approximates B + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+2, n+2); + quad_float **mu = mu_store.get(); + + Unique2DArray aux_store; + aux_store.SetDimsFrom1(m+2, n+1); + quad_float **aux = aux_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+2); + quad_float *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + + quad_float cbar; + + UniqueArray ctilda_store; + ctilda_store.SetLength(m+2); + quad_float *ctilda = ctilda_store.get(); + + UniqueArray vvec_store; + vvec_store.SetLength(m+2); + quad_float *vvec = vvec_store.get(); + + + UniqueArray yvec_store; + yvec_store.SetLength(m+2); + quad_float *yvec = yvec_store.get(); + + UniqueArray uvec_store; + uvec_store.SetLength(m+2); + quad_float *uvec = uvec_store.get(); + + UniqueArray utildavec_store; + utildavec_store.SetLength(m+2); + quad_float *utildavec = utildavec_store.get(); + + + UniqueArray Deltavec_store; + Deltavec_store.SetLength(m+2); + long *Deltavec = Deltavec_store.get(); + + UniqueArray deltavec_store; + deltavec_store.SetLength(m+2); + long *deltavec = deltavec_store.get(); + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + quad_float eta; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + GivensCache_QP cache(m, n); + + m = ll_G_LLL_QP(B, U, delta, 0, check, B1, mu, aux, m, 1, quit, cache); + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + long clean = 1; + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + if (!quit && m > 1) { + // cerr << "continuing\n"; + if (beta > m) beta = m; + + if (prune > 0) + ComputeG_BKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + + + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + for (i = jj; i <= kk; i++) { + c[i] = mu[i][i]*mu[i][i]; + CheckFinite(&c[i]); + } + + if (prune > 0) + ComputeG_BKZThresh(&c[jj], kk-jj+1); + + + cbar = c[jj]; + utildavec[jj] = uvec[jj] = 1; + + yvec[jj] = vvec[jj] = 0; + Deltavec[jj] = 0; + + + s = t = jj; + deltavec[jj] = 1; + + for (i = jj+1; i <= kk+1; i++) { + ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = 0; + Deltavec[i] = 0; + vvec[i] = 0; + deltavec[i] = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + ctilda[t] = ctilda[t+1] + + (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*c[t]; + + if (prune > 0 && t > jj) { + eta = G_BKZThresh(t-jj); + } + else + eta = 0; + + if (ctilda[t] < cbar - eta) { + if (t > jj) { + t--; + t1 = 0; + for (i = t+1; i <= s; i++) { + t1 += utildavec[i]*mu[i][t]; + } + + + yvec[t] = t1; + t1 = -t1; + if (t1 >= 0) + t1 = ceil(t1-0.5); + else + t1 = floor(t1+0.5); + + utildavec[t] = vvec[t] = t1; + Deltavec[t] = 0; + if (utildavec[t] > -yvec[t]) + deltavec[t] = -1; + else + deltavec[t] = 1; + } + else { + cbar = ctilda[jj]; + for (i = jj; i <= kk; i++) { + uvec[i] = utildavec[i]; + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec[t] = -Deltavec[t]; + if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t]; + utildavec[t] = vvec[t] + Deltavec[t]; + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + if ((delta-8*red_fudge)*c[jj] > cbar) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec[i] != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("G_BKZ_QP: internal error"); + + if (s > 0) { + // special case + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + // cerr << "special case\n"; + new_m = ll_G_LLL_QP(B, U, delta, 0, check, + B1, mu, aux, h, jj, quit, cache); + if (new_m != h) LogicError("G_BKZ_QP: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec[i] == 0) continue; + conv(MU, uvec[i]); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + for (i = 1; i <= n; i++) { + conv(B1[jj][i], B(jj, i)); + CheckFinite(&B1[jj][i]); + } + + if (IsZero(B(jj))) LogicError("G_BKZ_QP: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_G_LLL_QP(B, U, delta, 0, 0, B1, mu, aux, + kk+1, jj, quit, cache); + + if (new_m != kk) LogicError("G_BKZ_QP: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_G_LLL_QP(B, U, delta, 0, check, + B1, mu, aux, h, h, quit, cache); + + if (new_m != h) LogicError("G_BKZ_QP: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // G_LLL_QP + // cerr << "progress\n"; + + NumNoOps++; + + + if (!clean) { + new_m = + ll_G_LLL_QP(B, U, delta, 0, check, B1, mu, aux, + h, h, quit, cache); + if (new_m != h) LogicError("G_BKZ_QP: internal error"); + if (quit) break; + } + + z++; + } + } + } + + + if (verb) { + G_BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + + // clean up + + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long G_BKZ_QP(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_QP: bad delta"); + if (beta < 2) LogicError("G_BKZ_QP: bad block size"); + + return G_BKZ_QP(BB, &UU, to_quad_float(delta), beta, prune, check); +} + +long G_BKZ_QP(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_QP: bad delta"); + if (beta < 2) LogicError("G_BKZ_QP: bad block size"); + + return G_BKZ_QP(BB, 0, to_quad_float(delta), beta, prune, check); +} + + + +static +long G_BKZ_QP1(mat_ZZ& BB, mat_ZZ* UU, quad_float delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + ZZ T1; + quad_float *tp; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+2, n+1); + quad_float **B1 = B1_store.get(); // approximates B + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+2, n+2); + quad_float **mu = mu_store.get(); + + Unique2DArray aux_store; + aux_store.SetDimsFrom1(m+2, n+1); + quad_float **aux = aux_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+2); + quad_float *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + + double cbar; + + UniqueArray ctilda_store; + ctilda_store.SetLength(m+2); + double *ctilda = ctilda_store.get(); + + UniqueArray vvec_store; + vvec_store.SetLength(m+2); + double *vvec = vvec_store.get(); + + + UniqueArray yvec_store; + yvec_store.SetLength(m+2); + double *yvec = yvec_store.get(); + + UniqueArray uvec_store; + uvec_store.SetLength(m+2); + double *uvec = uvec_store.get(); + + UniqueArray utildavec_store; + utildavec_store.SetLength(m+2); + double *utildavec = utildavec_store.get(); + + + UniqueArray Deltavec_store; + Deltavec_store.SetLength(m+2); + long *Deltavec = Deltavec_store.get(); + + UniqueArray deltavec_store; + deltavec_store.SetLength(m+2); + long *deltavec = deltavec_store.get(); + + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + + double eta; + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + GivensCache_QP cache(m, n); + + m = ll_G_LLL_QP(B, U, delta, 0, check, B1, mu, aux, m, 1, quit, cache); + + + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + long clean = 1; + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + if (!quit && m > 1) { + // cerr << "continuing\n"; + if (beta > m) beta = m; + + if (prune > 0) + ComputeG_BKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + + + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + for (i = jj; i <= kk; i++) { + c[i] = mu[i][i]*mu[i][i]; + CheckFinite(&c[i]); + } + + if (prune > 0) + ComputeG_BKZThresh(&c[jj], kk-jj+1); + + + cbar = to_double(c[jj]); + utildavec[jj] = uvec[jj] = 1; + + yvec[jj] = vvec[jj] = 0; + Deltavec[jj] = 0; + + + s = t = jj; + deltavec[jj] = 1; + + for (i = jj+1; i <= kk+1; i++) { + ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = 0; + Deltavec[i] = 0; + vvec[i] = 0; + deltavec[i] = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + ctilda[t] = ctilda[t+1] + + (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*to_double(c[t]); + + ForceToMem(&ctilda[t]); // prevents an infinite loop + + if (prune > 0 && t > jj) { + eta = to_double(G_BKZThresh(t-jj)); + } + else + eta = 0; + + if (ctilda[t] < cbar - eta) { + if (t > jj) { + double t1; + + t--; + t1 = 0; + for (i = t+1; i <= s; i++) { + t1 += utildavec[i]*to_double(mu[i][t]); + } + + + yvec[t] = t1; + t1 = -t1; + if (t1 >= 0) + t1 = ceil(t1-0.5); + else + t1 = floor(t1+0.5); + + utildavec[t] = vvec[t] = t1; + Deltavec[t] = 0; + if (utildavec[t] > -yvec[t]) + deltavec[t] = -1; + else + deltavec[t] = 1; + } + else { + cbar = ctilda[jj]; + for (i = jj; i <= kk; i++) { + uvec[i] = utildavec[i]; + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec[t] = -Deltavec[t]; + if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t]; + utildavec[t] = vvec[t] + Deltavec[t]; + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + quad_float t1; + + if ((delta-8*red_fudge)*c[jj] > cbar*(1+64/NTL_FDOUBLE_PRECISION)) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec[i] != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("G_BKZ_QP: internal error"); + + if (s > 0) { + // special case + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + // cerr << "special case\n"; + new_m = ll_G_LLL_QP(B, U, delta, 0, check, + B1, mu, aux, h, jj, quit, cache); + if (new_m != h) LogicError("G_BKZ_QP: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec[i] == 0) continue; + conv(MU, uvec[i]); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + for (i = 1; i <= n; i++) { + conv(B1[jj][i], B(jj, i)); + CheckFinite(&B1[jj][i]); + } + + if (IsZero(B(jj))) LogicError("G_BKZ_QP: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_G_LLL_QP(B, U, delta, 0, 0, B1, mu, aux, + kk+1, jj, quit, cache); + + if (new_m != kk) LogicError("G_BKZ_QP: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_G_LLL_QP(B, U, delta, 0, check, + B1, mu, aux, h, h, quit, cache); + + if (new_m != h) LogicError("G_BKZ_QP: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // G_LLL_QP + // cerr << "progress\n"; + + NumNoOps++; + + + if (!clean) { + new_m = ll_G_LLL_QP(B, U, delta, 0, check, B1, mu, aux, + h, h, quit, cache); + + if (new_m != h) LogicError("G_BKZ_QP: internal error"); + if (quit) break; + } + + z++; + } + } + } + + + if (verb) { + G_BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + + // clean up + + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long G_BKZ_QP1(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_QP: bad delta"); + if (beta < 2) LogicError("G_BKZ_QP: bad block size"); + + return G_BKZ_QP1(BB, &UU, to_quad_float(delta), beta, prune, check); +} + +long G_BKZ_QP1(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_QP: bad delta"); + if (beta < 2) LogicError("G_BKZ_QP: bad block size"); + + return G_BKZ_QP1(BB, 0, to_quad_float(delta), beta, prune, check); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/G_LLL_RR.c b/thirdparty/linux/ntl/src/G_LLL_RR.c new file mode 100644 index 0000000000..14b318a586 --- /dev/null +++ b/thirdparty/linux/ntl/src/G_LLL_RR.c @@ -0,0 +1,1367 @@ + +#include +#include + +#include + +NTL_START_IMPL + + + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + +static void RowTransform2(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x + y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } +} + +class GivensCache_RR { +public: + GivensCache_RR(long m, long n); + + void flush(); + void selective_flush(long l); + void swap(long l); + void swap(); + void touch(); + void incr(); + + long sz; + + mat_RR buf; + + UniqueArray bl; + UniqueArray bv; + + long bp; +}; + + +GivensCache_RR::GivensCache_RR(long m, long n) +{ + sz = min(m, n)/10; + if (sz < 2) + sz = 2; + else if (sz > 20) + sz = 20; + + + buf.SetDims(sz, n); + + bl.SetLength(sz); + bv.SetLength(sz); + + long i; + for (i = 0; i < sz; i++) bl[i] = 0; + for (i = 0; i < sz; i++) bv[i] = 0; + + bp = 0; +} + +void GivensCache_RR::flush() +{ + long i; + for (i = 0; i < sz; i++) bl[i] = 0; +} + +void GivensCache_RR::selective_flush(long l) +{ + long i; + + for (i = 0; i < sz; i++) + if (bl[i] && bv[i] >= l) + bl[i] = 0; +} + +void GivensCache_RR::swap(long l) +{ + long k = bl[bp]; + long i; + + i = 0; + while (i < sz && bl[i] != l) + i++; + + if (i < sz) { + bl[bp] = l; + bl[i] = k; + } + else + bl[bp] = l; + + selective_flush(l); +} + +void GivensCache_RR::swap() +{ + swap(bl[bp] - 1); +} + +void GivensCache_RR::touch() +{ + long k = bl[bp]; + bl[bp] = 0; + selective_flush(k); +} + +void GivensCache_RR::incr() +{ + long k = bl[bp]; + long k1 = k+1; + long i; + + i = 0; + while (i < sz && bl[i] != k1) + i++; + + if (i < sz) { + bp = i; + return; + } + + i = 0; + while (i < sz && bl[i] != 0) + i++; + + if (i < sz) { + bp = i; + return; + } + + long max_val = 0; + long max_index = 0; + for (i = 0; i < sz; i++) { + long t = labs(bl[i]-k1); + if (t > max_val) { + max_val = t; + max_index = i; + } + } + + bp = max_index; + bl[max_index] = 0; +} + + +static +void GivensComputeGS(mat_RR& B1, mat_RR& mu, mat_RR& aux, long k, long n, + GivensCache_RR& cache) +{ + long i, j; + + RR c, s, a, b, t; + RR T1, T2; + + vec_RR& p = mu(k); + + vec_RR& pp = cache.buf[cache.bp]; + + if (!cache.bl[cache.bp]) { + for (j = 1; j <= n; j++) + pp(j) = B1(k,j); + + long backoff; + backoff = k/4; + if (backoff < 2) + backoff = 2; + else if (backoff > cache.sz + 2) + backoff = cache.sz + 2; + + long ub = k-(backoff-1); + + for (i = 1; i < ub; i++) { + vec_RR& cptr = mu(i); + vec_RR& sptr = aux(i); + + for (j = n; j > i; j--) { + c = cptr(j); + s = sptr(j); + + // a = c*pp(j-1) - s*pp(j); + mul(T1, c, pp(j-1)); + mul(T2, s, pp(j)); + sub(a, T1, T2); + + // b = s*pp(j-1) + c*pp(j); + mul(T1, s, pp(j-1)); + mul(T2, c, pp(j)); + add(b, T1, T2); + + pp(j-1) = a; + pp(j) = b; + } + + div(pp(i), pp(i), mu(i,i)); + } + + cache.bl[cache.bp] = k; + cache.bv[cache.bp] = k-backoff; + } + + for (j = 1; j <= n; j++) + p(j) = pp(j); + + for (i = max(cache.bv[cache.bp]+1, 1); i < k; i++) { + vec_RR& cptr = mu(i); + vec_RR& sptr = aux(i); + + for (j = n; j > i; j--) { + c = cptr(j); + s = sptr(j); + + // a = c*p(j-1) - s*p(j); + mul(T1, c, p(j-1)); + mul(T2, s, p(j)); + sub(a, T1, T2); + + // b = s*p(j-1) + c*p(j); + mul(T1, s, p(j-1)); + mul(T2, c, p(j)); + add(b, T1, T2); + + p(j-1) = a; + p(j) = b; + } + + div(p(i), p(i), mu(i,i)); + } + + for (j = n; j > k; j--) { + a = p(j-1); + b = p(j); + + if (b == 0) { + c = 1; + s = 0; + } + else { + abs(T1, b); + abs(T2, a); + + if (T1 > T2) { + // t = -a/b; + div(T1, a, b); + negate(t, T1); + + // s = 1/sqrt(1 + t*t); + sqr(T1, t); + add(T1, T1, 1); + SqrRoot(T1, T1); + inv(s, T1); + + // c = s*t; + mul(c, s, t); + } + else { + // t = -b/a; + div(T1, b, a); + negate(t, T1); + + // c = 1/sqrt(1 + t*t); + sqr(T1, t); + add(T1, T1, 1); + SqrRoot(T1, T1); + inv(c, T1); + + // s = c*t; + mul(s, c, t); + } + } + + // p(j-1) = c*a - s*b; + mul(T1, c, a); + mul(T2, s, b); + sub(p(j-1), T1, T2); + + p(j) = c; + aux(k,j) = s; + } + + if (k > n+1) LogicError("G_LLL_RR: internal error"); + if (k > n) p(k) = 0; + +} + +NTL_TLS_GLOBAL_DECL(RR, red_fudge) + +static NTL_CHEAP_THREAD_LOCAL long log_red = 0; + +static void init_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + log_red = long(0.50*RR::precision()); + + power2(red_fudge, -log_red); +} + +static void inc_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + + mul(red_fudge, red_fudge, 2); + log_red--; + + cerr << "G_LLL_RR: warning--relaxing reduction (" << log_red << ")\n"; + + if (log_red < 4) + ResourceError("G_LLL_RR: can not continue...sorry"); +} + + + + +static NTL_CHEAP_THREAD_LOCAL long verbose = 0; +static NTL_CHEAP_THREAD_LOCAL unsigned long NumSwaps = 0; +static NTL_CHEAP_THREAD_LOCAL double StartTime = 0; +static NTL_CHEAP_THREAD_LOCAL double LastTime = 0; + + + +static void G_LLLStatus(long max_k, double t, long m, const mat_ZZ& B) +{ + cerr << "---- G_LLL_RR status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, t-StartTime); + cerr << ", stage: " << max_k; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = t; + +} + + + +static +long ll_G_LLL_RR(mat_ZZ& B, mat_ZZ* U, const RR& delta, long deep, + LLLCheckFct check, mat_RR& B1, mat_RR& mu, + mat_RR& aux, long m, long init_k, long &quit, + GivensCache_RR& cache) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long n = B.NumCols(); + + long i, j, k, Fc1; + ZZ MU; + RR mu1, t1, t2, cc; + ZZ T1; + + + quit = 0; + k = init_k; + + long counter; + + long trigger_index; + long small_trigger; + long cnt; + + RR half; + conv(half, 0.5); + RR half_plus_fudge; + add(half_plus_fudge, half, red_fudge); + + long max_k = 0; + double tt; + + cache.flush(); + + while (k <= m) { + + if (k > max_k) { + max_k = k; + } + + if (verbose) { + tt = GetTime(); + + if (tt > LastTime + LLLStatusInterval) + G_LLLStatus(max_k, tt, m, B); + } + + GivensComputeGS(B1, mu, aux, k, n, cache); + + counter = 0; + trigger_index = k; + small_trigger = 0; + cnt = 0; + + do { + // size reduction + + counter++; + if (counter > 10000) { + cerr << "G_LLL_XD: warning--possible infinite loop\n"; + counter = 0; + } + + + Fc1 = 0; + + for (j = k-1; j >= 1; j--) { + abs(t1, mu(k,j)); + if (t1 > half_plus_fudge) { + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > 10) { + inc_red_fudge(); + add(half_plus_fudge, half, red_fudge); + cnt = 0; + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + } + + Fc1 = 1; + + mu1 = mu(k,j); + if (sign(mu1) >= 0) { + sub(mu1, mu1, half); + ceil(mu1, mu1); + } + else { + add(mu1, mu1, half); + floor(mu1, mu1); + } + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + sub(mu(k,i), mu(k,i), mu(j,i)); + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + add(mu(k,i), mu(k,i), mu(j,i)); + } + else { + for (i = 1; i <= j-1; i++) { + mul(t2, mu1, mu(j,i)); + sub(mu(k,i), mu(k,i), t2); + } + } + + + conv(MU, mu1); + + sub(mu(k,j), mu(k,j), mu1); + + RowTransform(B(k), B(j), MU); + if (U) RowTransform((*U)(k), (*U)(j), MU); + } + } + + if (Fc1) { + for (i = 1; i <= n; i++) + conv(B1(k, i), B(k, i)); + cache.touch(); + GivensComputeGS(B1, mu, aux, k, n, cache); + } + } while (Fc1); + + if (check && (*check)(B(k))) + quit = 1; + + if (IsZero(B(k))) { + for (i = k; i < m; i++) { + // swap i, i+1 + swap(B(i), B(i+1)); + swap(B1(i), B1(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + cache.flush(); + + m--; + if (quit) break; + continue; + } + + if (quit) break; + + if (deep > 0) { + // deep insertions + + LogicError("sorry...deep insertions not implemented"); + + } // end deep insertions + + // test G_LLL reduction condition + + if (k <= 1) { + cache.incr(); + k++; + } + else { + sqr(t1, mu(k,k-1)); + sub(t1, delta, t1); + sqr(t2, mu(k-1,k-1)); + mul(t1, t1, t2); + sqr(t2, mu(k, k)); + if (t1 > t2) { + // swap rows k, k-1 + swap(B(k), B(k-1)); + swap(B1(k), B1(k-1)); + if (U) swap((*U)(k), (*U)(k-1)); + + cache.swap(); + + k--; + NumSwaps++; + } + else { + cache.incr(); + k++; + } + } + } + + if (verbose) { + G_LLLStatus(m+1, GetTime(), m, B); + } + + + return m; +} + +static +long G_LLL_RR(mat_ZZ& B, mat_ZZ* U, const RR& delta, long deep, + LLLCheckFct check) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long i, j; + long new_m, dep, quit; + RR s; + ZZ MU; + RR mu1; + + RR t1; + ZZ T1; + + init_red_fudge(); + + if (U) ident(*U, m); + + mat_RR B1; // approximates B + B1.SetDims(m, n); + + + mat_RR mu; + mu.SetDims(m, n+1); + + mat_RR aux; + aux.SetDims(m, n); + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) + conv(B1(i, j), B(i, j)); + + GivensCache_RR cache(m, n); + + new_m = ll_G_LLL_RR(B, U, delta, deep, check, B1, mu, aux, m, 1, quit, cache); + + dep = m - new_m; + m = new_m; + + if (dep > 0) { + // for consistency, we move all of the zero rows to the front + + for (i = 0; i < m; i++) { + swap(B(m+dep-i), B(m-i)); + if (U) swap((*U)(m+dep-i), (*U)(m-i)); + } + } + + + return m; +} + + + +long G_LLL_RR(mat_ZZ& B, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_LLL_RR: bad delta"); + if (deep < 0) LogicError("G_LLL_RR: bad deep"); + RR Delta; + conv(Delta, delta); + return G_LLL_RR(B, 0, Delta, deep, check); +} + +long G_LLL_RR(mat_ZZ& B, mat_ZZ& U, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_LLL_RR: bad delta"); + if (deep < 0) LogicError("G_LLL_RR: bad deep"); + RR Delta; + conv(Delta, delta); + return G_LLL_RR(B, &U, Delta, deep, check); +} + + + +NTL_TLS_GLOBAL_DECL(vec_RR, G_BKZConstant) + +static +void ComputeG_BKZConstant(long beta, long p) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZConstant); + + RR c_PI; + ComputePi(c_PI); + + RR LogPI = log(c_PI); + + G_BKZConstant.SetLength(beta-1); + + vec_RR Log; + Log.SetLength(beta); + + + long i, j, k; + RR x, y; + + for (j = 1; j <= beta; j++) + Log(j) = log(to_RR(j)); + + for (i = 1; i <= beta-1; i++) { + // First, we compute x = gamma(i/2)^{2/i} + + k = i/2; + + if ((i & 1) == 0) { // i even + x = 0; + for (j = 1; j <= k; j++) + x += Log(j); + + x = exp(x/k); + + } + else { // i odd + x = 0; + for (j = k + 2; j <= 2*k + 2; j++) + x += Log(j); + + x += 0.5*LogPI - 2*(k+1)*Log(2); + + x = exp(2*x/i); + } + + // Second, we compute y = 2^{2*p/i} + + y = exp(-(2*p/to_RR(i))*Log(2)); + + G_BKZConstant(i) = x*y/c_PI; + } + +} + +NTL_TLS_GLOBAL_DECL(vec_RR, G_BKZThresh) + +static +void ComputeG_BKZThresh(RR *c, long beta) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZConstant); + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + G_BKZThresh.SetLength(beta-1); + + long i; + RR x; + RR t1; + + x = 0; + + for (i = 1; i <= beta-1; i++) { + log(t1, c[i-1]); + add(x, x, t1); + div(t1, x, i); + exp(t1, t1); + mul(G_BKZThresh(i), t1, G_BKZConstant(i)); + } +} + + + + +static +void G_BKZStatus(double tt, double enum_time, unsigned long NumIterations, + unsigned long NumTrivial, unsigned long NumNonTrivial, + unsigned long NumNoOps, long m, + const mat_ZZ& B) +{ + cerr << "---- G_BKZ_RR status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, tt-StartTime); + cerr << ", enum time: "; + PrintTime(cerr, enum_time); + cerr << ", iter: " << NumIterations << "\n"; + cerr << "triv: " << NumTrivial; + cerr << ", nontriv: " << NumNonTrivial; + cerr << ", no ops: " << NumNoOps; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = tt; + +} + + + + +static +long G_BKZ_RR(mat_ZZ& BB, mat_ZZ* UU, const RR& delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + RR t1, t2; + ZZ T1; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + + mat_RR B1; + B1.SetDims(m+1, n); + + mat_RR mu; + mu.SetDims(m+1, n+1); + + mat_RR aux; + aux.SetDims(m+1, n); + + vec_RR c; + c.SetLength(m+1); + + RR cbar; + + vec_RR ctilda; + ctilda.SetLength(m+1); + + vec_RR vvec; + vvec.SetLength(m+1); + + vec_RR yvec; + yvec.SetLength(m+1); + + vec_RR uvec; + uvec.SetLength(m+1); + + vec_RR utildavec; + utildavec.SetLength(m+1); + + vec_long Deltavec; + Deltavec.SetLength(m+1); + + vec_long deltavec; + deltavec.SetLength(m+1); + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) + conv(B1(i, j), B(i, j)); + + // cerr << "\n"; + // cerr << "first G_LLL\n"; + + GivensCache_RR cache(m, n); + + m = ll_G_LLL_RR(B, U, delta, 0, check, B1, mu, aux, m, 1, quit, cache); + + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + long clean = 1; + + if (!quit && m > 1) { + // cerr << "continuing\n"; + + if (beta > m) beta = m; + + if (prune > 0) + ComputeG_BKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + for (i = jj; i <= kk; i++) + sqr(c(i), mu(i,i)); + + + if (prune > 0) + ComputeG_BKZThresh(&c(jj), kk-jj+1); + + cbar = c(jj); + conv(utildavec(jj), 1); + conv(uvec(jj), 1); + + conv(yvec(jj), 0); + conv(vvec(jj), 0); + Deltavec(jj) = 0; + + + s = t = jj; + deltavec(jj) = 1; + + for (i = jj+1; i <= kk+1; i++) { + conv(ctilda(i), 0); + conv(uvec(i), 0); + conv(utildavec(i), 0); + conv(yvec(i), 0); + Deltavec(i) = 0; + conv(vvec(i), 0); + deltavec(i) = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + + add(t1, yvec(t), utildavec(t)); + sqr(t1, t1); + mul(t1, t1, c(t)); + add(ctilda(t), ctilda(t+1), t1); + + if (prune > 0 && t > jj) + sub(t1, cbar, G_BKZThresh(t-jj)); + else + t1 = cbar; + + + if (ctilda(t) jj) { + t--; + clear(t1); + for (i = t+1; i <= s; i++) { + mul(t2, utildavec(i), mu(i,t)); + add(t1, t1, t2); + } + + yvec(t) = t1; + negate(t1, t1); + if (sign(t1) >= 0) { + sub(t1, t1, 0.5); + ceil(t1, t1); + } + else { + add(t1, t1, 0.5); + floor(t1, t1); + } + + utildavec(t) = t1; + vvec(t) = t1; + Deltavec(t) = 0; + + negate(t1, t1); + + if (t1 < yvec(t)) + deltavec(t) = -1; + else + deltavec(t) = 1; + } + else { + cbar = ctilda(jj); + for (i = jj; i <= kk; i++) { + uvec(i) = utildavec(i); + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec(t) = -Deltavec(t); + if (Deltavec(t)*deltavec(t) >= 0) Deltavec(t) += deltavec(t); + add(utildavec(t), vvec(t), Deltavec(t)); + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + mul(t1, red_fudge, -8); + add(t1, t1, delta); + mul(t1, t1, c(jj)); + + if (t1 > cbar) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec(i) != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("G_BKZ_RR: internal error"); + + if (s > 0) { + // special case + // cerr << "special case\n"; + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + swap(B1(i-1), B1(i)); + if (U) swap((*U)(i-1), (*U)(i)); + } + + new_m = ll_G_LLL_RR(B, U, delta, 0, check, + B1, mu, aux, h, jj, quit, cache); + if (new_m != h) LogicError("G_BKZ_RR: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec(i) == 0) continue; + conv(MU, uvec(i)); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + swap(B1(i-1), B1(i)); + if (U) swap((*U)(i-1), (*U)(i)); + } + + for (i = 1; i <= n; i++) + conv(B1(jj, i), B(jj, i)); + + if (IsZero(B(jj))) LogicError("G_BKZ_RR: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_G_LLL_RR(B, U, delta, 0, 0, B1, mu, aux, + kk+1, jj, quit, cache); + + + if (new_m != kk) LogicError("G_BKZ_RR: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + swap(B1(i-1), B1(i)); + if (U) swap((*U)(i-1), (*U)(i)); + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_G_LLL_RR(B, U, delta, 0, check, + B1, mu, aux, h, h, quit, cache); + + if (new_m != h) LogicError("G_BKZ_RR: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // G_LLL_RR + // cerr << "progress\n"; + + NumNoOps++; + + if (!clean) { + new_m = ll_G_LLL_RR(B, U, delta, 0, check, B1, mu, aux, + h, h, quit, cache); + if (new_m != h) LogicError("G_BKZ_RR: internal error"); + if (quit) break; + } + + z++; + } + } + } + + if (verb) { + G_BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + + // clean up + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long G_BKZ_RR(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_RR: bad delta"); + if (beta < 2) LogicError("G_BKZ_RR: bad block size"); + + RR Delta; + conv(Delta, delta); + + return G_BKZ_RR(BB, &UU, Delta, beta, prune, check); +} + +long G_BKZ_RR(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_RR: bad delta"); + if (beta < 2) LogicError("G_BKZ_RR: bad block size"); + + RR Delta; + conv(Delta, delta); + + return G_BKZ_RR(BB, 0, Delta, beta, prune, check); +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/G_LLL_XD.c b/thirdparty/linux/ntl/src/G_LLL_XD.c new file mode 100644 index 0000000000..0807f25147 --- /dev/null +++ b/thirdparty/linux/ntl/src/G_LLL_XD.c @@ -0,0 +1,1320 @@ + +#include +#include +#include +#include + +#include + +NTL_START_IMPL + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + + } + else { + + for (i = 1; i <= n; i++) { + MulSubFrom(A(i), B(i), mu1); + } + + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + + +static void RowTransform2(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x + y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } +} + + +class GivensCache_XD { +public: + GivensCache_XD(long m, long n); + + void flush(); + void selective_flush(long l); + void swap(long l); + void swap(); + void touch(); + void incr(); + + long sz; + + Unique2DArray buf; + UniqueArray bl; + UniqueArray bv; + + long bp; +}; + + +GivensCache_XD::GivensCache_XD(long m, long n) +{ + sz = min(m, n)/10; + if (sz < 2) + sz = 2; + else if (sz > 20) + sz = 20; + + + buf.SetDims(sz, n+1); + bl.SetLength(sz); + bv.SetLength(sz); + + long i; + for (i = 0; i < sz; i++) bl[i] = 0; + for (i = 0; i < sz; i++) bv[i] = 0; + + bp = 0; +} + +void GivensCache_XD::flush() +{ + long i; + for (i = 0; i < sz; i++) bl[i] = 0; +} + +void GivensCache_XD::selective_flush(long l) +{ + long i; + + for (i = 0; i < sz; i++) + if (bl[i] && bv[i] >= l) + bl[i] = 0; +} + +void GivensCache_XD::swap(long l) +{ + long k = bl[bp]; + long i; + + i = 0; + while (i < sz && bl[i] != l) + i++; + + if (i < sz) { + bl[bp] = l; + bl[i] = k; + } + else + bl[bp] = l; + + selective_flush(l); +} + +void GivensCache_XD::swap() +{ + swap(bl[bp] - 1); +} + +void GivensCache_XD::touch() +{ + long k = bl[bp]; + bl[bp] = 0; + selective_flush(k); +} + +void GivensCache_XD::incr() +{ + long k = bl[bp]; + long k1 = k+1; + long i; + + i = 0; + while (i < sz && bl[i] != k1) + i++; + + if (i < sz) { + bp = i; + return; + } + + i = 0; + while (i < sz && bl[i] != 0) + i++; + + if (i < sz) { + bp = i; + return; + } + + long max_val = 0; + long max_index = 0; + for (i = 0; i < sz; i++) { + long t = labs(bl[i]-k1); + if (t > max_val) { + max_val = t; + max_index = i; + } + } + + bp = max_index; + bl[max_index] = 0; +} + + +static +void GivensComputeGS(xdouble **B1, xdouble **mu, xdouble **aux, long k, long n, + GivensCache_XD& cache) +{ + long i, j; + + xdouble c, s, a, b, t; + + xdouble *p = mu[k]; + + xdouble *pp = cache.buf[cache.bp]; + + if (!cache.bl[cache.bp]) { + for (j = 1; j <= n; j++) + pp[j] = B1[k][j]; + + long backoff; + backoff = k/4; + if (backoff < 2) + backoff = 2; + else if (backoff > cache.sz + 2) + backoff = cache.sz + 2; + + long ub = k-(backoff-1); + + for (i = 1; i < ub; i++) { + xdouble *cptr = mu[i]; + xdouble *sptr = aux[i]; + + for (j = n; j > i; j--) { + c = cptr[j]; + s = sptr[j]; + + a = c*pp[j-1] - s*pp[j]; + b = s*pp[j-1] + c*pp[j]; + + pp[j-1] = a; + pp[j] = b; + } + + pp[i] = pp[i]/mu[i][i]; + } + + cache.bl[cache.bp] = k; + cache.bv[cache.bp] = k-backoff; + } + + for (j = 1; j <= n; j++) + p[j] = pp[j]; + + for (i = max(cache.bv[cache.bp]+1, 1); i < k; i++) { + xdouble *cptr = mu[i]; + xdouble *sptr = aux[i]; + + for (j = n; j > i; j--) { + c = cptr[j]; + s = sptr[j]; + + a = c*p[j-1] - s*p[j]; + b = s*p[j-1] + c*p[j]; + + p[j-1] = a; + p[j] = b; + } + + p[i] = p[i]/mu[i][i]; + } + + for (j = n; j > k; j--) { + a = p[j-1]; + b = p[j]; + + if (b == 0) { + c = 1; + s = 0; + } + else if (fabs(b) > fabs(a)) { + t = -a/b; + s = 1/sqrt(1 + t*t); + c = s*t; + } + else { + t = -b/a; + c = 1/sqrt(1 + t*t); + s = c*t; + } + + p[j-1] = c*a - s*b; + p[j] = c; + aux[k][j] = s; + } + + if (k > n+1) LogicError("G_LLL_XD: internal error"); + if (k > n) p[k] = 0; +} + +NTL_TLS_GLOBAL_DECL_INIT(xdouble, red_fudge, (to_xdouble(0))) + +static NTL_CHEAP_THREAD_LOCAL long log_red = 0; + +static void init_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long i; + + log_red = long(0.50*NTL_DOUBLE_PRECISION); + red_fudge = 1; + + for (i = log_red; i > 0; i--) + red_fudge = red_fudge*0.5; +} + +static void inc_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + + red_fudge = red_fudge * 2; + log_red--; + + cerr << "G_LLL_XD: warning--relaxing reduction (" << log_red << ")\n"; + + if (log_red < 4) + ResourceError("G_LLL_XD: can not continue...sorry"); +} + + + +static NTL_CHEAP_THREAD_LOCAL long verbose = 0; +static NTL_CHEAP_THREAD_LOCAL unsigned long NumSwaps = 0; +static NTL_CHEAP_THREAD_LOCAL double StartTime = 0; +static NTL_CHEAP_THREAD_LOCAL double LastTime = 0; + + + +static void G_LLLStatus(long max_k, double t, long m, const mat_ZZ& B) +{ + cerr << "---- G_LLL_XD status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, t-StartTime); + cerr << ", stage: " << max_k; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = t; + +} + + +static +long ll_G_LLL_XD(mat_ZZ& B, mat_ZZ* U, xdouble delta, long deep, + LLLCheckFct check, xdouble **B1, xdouble **mu, + xdouble **aux, + long m, long init_k, long &quit, GivensCache_XD& cache) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long n = B.NumCols(); + + long i, j, k, Fc1; + ZZ MU; + xdouble mu1; + + xdouble t1; + ZZ T1; + xdouble *tp; + + + xdouble half = to_xdouble(0.5); + xdouble half_plus_fudge = 0.5 + red_fudge; + + quit = 0; + k = init_k; + + long counter; + + long trigger_index; + long small_trigger; + long cnt; + + long max_k = 0; + + double tt; + + cache.flush(); + + while (k <= m) { + + if (k > max_k) { + max_k = k; + } + + if (verbose) { + tt = GetTime(); + + if (tt > LastTime + LLLStatusInterval) + G_LLLStatus(max_k, tt, m, B); + } + + + GivensComputeGS(B1, mu, aux, k, n, cache); + + counter = 0; + trigger_index = k; + small_trigger = 0; + cnt = 0; + + do { + // size reduction + + counter++; + if (counter > 10000) { + cerr << "G_LLL_XD: warning--possible infinite loop\n"; + counter = 0; + } + + + Fc1 = 0; + + for (j = k-1; j >= 1; j--) { + t1 = fabs(mu[k][j]); + if (t1 > half_plus_fudge) { + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > 10) { + inc_red_fudge(); + half_plus_fudge = 0.5 + red_fudge; + cnt = 0; + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + } + + + Fc1 = 1; + + mu1 = mu[k][j]; + if (mu1 >= 0) + mu1 = ceil(mu1-half); + else + mu1 = floor(mu1+half); + + + xdouble *mu_k = mu[k]; + xdouble *mu_j = mu[j]; + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu_j[i]; + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + mu_k[i] += mu_j[i]; + } + else { + for (i = 1; i <= j-1; i++) + MulSub(mu_k[i], mu_k[i], mu1, mu_j[i]); + } + + mu_k[j] -= mu1; + + conv(MU, mu1); + + // cout << j << " " << MU << "\n"; + + RowTransform(B(k), B(j), MU); + if (U) RowTransform((*U)(k), (*U)(j), MU); + } + } + + if (Fc1) { + for (i = 1; i <= n; i++) + conv(B1[k][i], B(k, i)); + cache.touch(); + GivensComputeGS(B1, mu, aux, k, n, cache); + } + } while (Fc1); + + if (check && (*check)(B(k))) + quit = 1; + + if (IsZero(B(k))) { + for (i = k; i < m; i++) { + // swap i, i+1 + swap(B(i), B(i+1)); + tp = B1[i]; B1[i] = B1[i+1]; B1[i+1] = tp; + if (U) swap((*U)(i), (*U)(i+1)); + } + + cache.flush(); + + m--; + if (quit) break; + continue; + } + + if (quit) break; + + if (deep > 0) { + // deep insertions + + LogicError("sorry...deep insertions not implemented"); + } // end deep insertions + + // test G_LLL reduction condition + + if (k > 1 && + (delta - mu[k][k-1]*mu[k][k-1])*(mu[k-1][k-1])*(mu[k-1][k-1]) > + (mu[k][k])*(mu[k][k])) { + + // swap rows k, k-1 + swap(B(k), B(k-1)); + tp = B1[k]; B1[k] = B1[k-1]; B1[k-1] = tp; + if (U) swap((*U)(k), (*U)(k-1)); + + cache.swap(); + + k--; + NumSwaps++; + + // cout << "- " << k << "\n"; + } + else { + cache.incr(); + k++; + // cout << "+ " << k << "\n"; + } + } + + if (verbose) { + G_LLLStatus(m+1, GetTime(), m, B); + } + + + return m; +} + + + + +static +long G_LLL_XD(mat_ZZ& B, mat_ZZ* U, xdouble delta, long deep, + LLLCheckFct check) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long i, j; + long new_m, dep, quit; + xdouble s; + ZZ MU; + xdouble mu1; + + xdouble t1; + ZZ T1; + + init_red_fudge(); + + if (U) ident(*U, m); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+1, n+1); + xdouble **B1 = B1_store.get(); // approximates B + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+1, n+2); + xdouble **mu = mu_store.get(); + + Unique2DArray aux_store; + aux_store.SetDimsFrom1(m+1, n+1); + xdouble **aux = aux_store.get(); + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) + conv(B1[i][j], B(i, j)); + + GivensCache_XD cache(m, n); + + new_m = + ll_G_LLL_XD(B, U, delta, deep, check, B1, mu, aux, m, 1, quit, cache); + + dep = m - new_m; + m = new_m; + + if (dep > 0) { + // for consistency, we move all of the zero rows to the front + + for (i = 0; i < m; i++) { + swap(B(m+dep-i), B(m-i)); + if (U) swap((*U)(m+dep-i), (*U)(m-i)); + } + } + + return m; +} + + + +long G_LLL_XD(mat_ZZ& B, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("G_LLL_XD: bad delta"); + if (deep < 0) LogicError("G_LLL_XD: bad deep"); + return G_LLL_XD(B, 0, to_xdouble(delta), deep, check); +} + +long G_LLL_XD(mat_ZZ& B, mat_ZZ& U, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("G_LLL_XD: bad delta"); + if (deep < 0) LogicError("G_LLL_XD: bad deep"); + return G_LLL_XD(B, &U, to_xdouble(delta), deep, check); +} + + + +NTL_TLS_GLOBAL_DECL(vec_xdouble, G_BKZConstant) + +static +void ComputeG_BKZConstant(long beta, long p) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZConstant); + + const double c_PI = 3.14159265358979323846264338328; + const double LogPI = 1.14472988584940017414342735135; + + G_BKZConstant.SetLength(beta-1); + + vec_double Log; + Log.SetLength(beta); + + + long i, j, k; + double x, y; + + for (j = 1; j <= beta; j++) + Log(j) = log(double(j)); + + for (i = 1; i <= beta-1; i++) { + // First, we compute x = gamma(i/2)^{2/i} + + k = i/2; + + if ((i & 1) == 0) { // i even + x = 0; + for (j = 1; j <= k; j++) + x = x + Log(j); + + x = x * (1/double(k)); + + x = exp(x); + } + else { // i odd + x = 0; + for (j = k + 2; j <= 2*k + 2; j++) + x = x + Log(j); + + x = 0.5*LogPI + x - 2*(k+1)*Log(2); + + x = x * (2.0/double(i)); + + x = exp(x); + } + + // Second, we compute y = 2^{2*p/i} + + y = -(2*p/double(i))*Log(2); + y = exp(y); + + G_BKZConstant(i) = x*y/c_PI; + } +} + +NTL_TLS_GLOBAL_DECL(vec_xdouble, G_BKZThresh) + +static +void ComputeG_BKZThresh(xdouble *c, long beta) +{ + NTL_TLS_GLOBAL_ACCESS(G_BKZConstant); + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + G_BKZThresh.SetLength(beta-1); + + long i; + double x; + + x = 0; + + for (i = 1; i <= beta-1; i++) { + x += log(c[i-1]); + G_BKZThresh(i) = xexp(x/double(i))*G_BKZConstant(i); + } +} + + +static +void G_BKZStatus(double tt, double enum_time, unsigned long NumIterations, + unsigned long NumTrivial, unsigned long NumNonTrivial, + unsigned long NumNoOps, long m, + const mat_ZZ& B) +{ + cerr << "---- G_BKZ_XD status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, tt-StartTime); + cerr << ", enum time: "; + PrintTime(cerr, enum_time); + cerr << ", iter: " << NumIterations << "\n"; + cerr << "triv: " << NumTrivial; + cerr << ", nontriv: " << NumNonTrivial; + cerr << ", no ops: " << NumNoOps; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = tt; + +} + + +static +long G_BKZ_XD(mat_ZZ& BB, mat_ZZ* UU, xdouble delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + NTL_TLS_GLOBAL_ACCESS(G_BKZThresh); + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + xdouble t1; + ZZ T1; + xdouble *tp; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+2, n+1); + xdouble **B1 = B1_store.get(); // approximates B + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+2, n+2); + xdouble **mu = mu_store.get(); + + Unique2DArray aux_store; + aux_store.SetDimsFrom1(m+2, n+1); + xdouble **aux = aux_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+2); + xdouble *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + + xdouble cbar; + + UniqueArray ctilda_store; + ctilda_store.SetLength(m+2); + xdouble *ctilda = ctilda_store.get(); + + UniqueArray vvec_store; + vvec_store.SetLength(m+2); + xdouble *vvec = vvec_store.get(); + + + UniqueArray yvec_store; + yvec_store.SetLength(m+2); + xdouble *yvec = yvec_store.get(); + + UniqueArray uvec_store; + uvec_store.SetLength(m+2); + xdouble *uvec = uvec_store.get(); + + UniqueArray utildavec_store; + utildavec_store.SetLength(m+2); + xdouble *utildavec = utildavec_store.get(); + + + UniqueArray Deltavec_store; + Deltavec_store.SetLength(m+2); + long *Deltavec = Deltavec_store.get(); + + UniqueArray deltavec_store; + deltavec_store.SetLength(m+2); + long *deltavec = deltavec_store.get(); + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + xdouble eta; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) + conv(B1[i][j], B(i, j)); + + // cerr << "\n"; + // cerr << "first G_LLL\n"; + + GivensCache_XD cache(m, n); + + m = ll_G_LLL_XD(B, U, delta, 0, check, B1, mu, aux, m, 1, quit, cache); + + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + long clean = 1; + + if (!quit && m > 1) { + // cerr << "continuing\n"; + if (beta > m) beta = m; + + if (prune > 0) + ComputeG_BKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + for (i = jj; i <= kk; i++) + c[i] = mu[i][i]*mu[i][i]; + + if (prune > 0) + ComputeG_BKZThresh(&c[jj], kk-jj+1); + + cbar = c[jj]; + utildavec[jj] = uvec[jj] = 1; + + yvec[jj] = vvec[jj] = 0; + Deltavec[jj] = 0; + + + s = t = jj; + deltavec[jj] = 1; + + for (i = jj+1; i <= kk+1; i++) { + ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = 0; + Deltavec[i] = 0; + vvec[i] = 0; + deltavec[i] = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + G_BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + + ctilda[t] = ctilda[t+1] + + (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*c[t]; + + if (prune > 0 && t > jj) { + eta = G_BKZThresh(t-jj); + } + else + eta = 0; + + if (ctilda[t] < cbar - eta) { + if (t > jj) { + t--; + t1 = 0; + for (i = t+1; i <= s; i++) { + t1 += utildavec[i]*mu[i][t]; + } + + + yvec[t] = t1; + t1 = -t1; + if (t1 >= 0) + t1 = ceil(t1-0.5); + else + t1 = floor(t1+0.5); + + utildavec[t] = vvec[t] = t1; + Deltavec[t] = 0; + if (utildavec[t] > -yvec[t]) + deltavec[t] = -1; + else + deltavec[t] = 1; + } + else { + cbar = ctilda[jj]; + for (i = jj; i <= kk; i++) { + uvec[i] = utildavec[i]; + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec[t] = -Deltavec[t]; + if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t]; + utildavec[t] = vvec[t] + Deltavec[t]; + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + if ((delta-8*red_fudge)*c[jj] > cbar) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec[i] != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("G_BKZ_XD: internal error"); + + if (s > 0) { + // special case + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + // cerr << "special case\n"; + new_m = ll_G_LLL_XD(B, U, delta, 0, check, + B1, mu, aux, h, jj, quit, cache); + if (new_m != h) LogicError("G_BKZ_XD: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec[i] == 0) continue; + conv(MU, uvec[i]); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + for (i = 1; i <= n; i++) + conv(B1[jj][i], B(jj, i)); + + if (IsZero(B(jj))) LogicError("G_BKZ_XD: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_G_LLL_XD(B, U, delta, 0, 0, B1, mu, aux, + kk+1, jj, quit, cache); + + + if (new_m != kk) LogicError("G_BKZ_XD: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_G_LLL_XD(B, U, delta, 0, check, + B1, mu, aux, h, h, quit, cache); + + + if (new_m != h) LogicError("G_BKZ_XD: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // G_LLL_XD + // cerr << "progress\n"; + + NumNoOps++; + + if (!clean) { + new_m = ll_G_LLL_XD(B, U, delta, 0, check, B1, mu, aux, + h, h, quit, cache); + if (new_m != h) LogicError("G_BKZ_XD: internal error"); + if (quit) break; + } + + z++; + } + } + } + + if (verb) { + G_BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + + // clean up + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long G_BKZ_XD(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_XD: bad delta"); + if (beta < 2) LogicError("G_BKZ_XD: bad block size"); + + return G_BKZ_XD(BB, &UU, to_xdouble(delta), beta, prune, check); +} + +long G_BKZ_XD(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + + if (delta < 0.50 || delta >= 1) LogicError("G_BKZ_XD: bad delta"); + if (beta < 2) LogicError("G_BKZ_XD: bad block size"); + + return G_BKZ_XD(BB, 0, to_xdouble(delta), beta, prune, check); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/GetPID.c b/thirdparty/linux/ntl/src/GetPID.c new file mode 100644 index 0000000000..dc332f18d7 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetPID.c @@ -0,0 +1,9 @@ + +#include +#include + +unsigned long _ntl_GetPID() +{ + return getpid(); +} + diff --git a/thirdparty/linux/ntl/src/GetPID1.c b/thirdparty/linux/ntl/src/GetPID1.c new file mode 100644 index 0000000000..dc332f18d7 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetPID1.c @@ -0,0 +1,9 @@ + +#include +#include + +unsigned long _ntl_GetPID() +{ + return getpid(); +} + diff --git a/thirdparty/linux/ntl/src/GetPID2.c b/thirdparty/linux/ntl/src/GetPID2.c new file mode 100644 index 0000000000..283191c180 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetPID2.c @@ -0,0 +1,6 @@ + +unsigned long _ntl_GetPID() +{ + return 0; +} + diff --git a/thirdparty/linux/ntl/src/GetTime.c b/thirdparty/linux/ntl/src/GetTime.c new file mode 100644 index 0000000000..67320a51f1 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetTime.c @@ -0,0 +1,19 @@ +#include + +#include +#include + +// FIXME: it would be nice to have a per-thread +// timing function, but it seems very difficult +// to get a cross-platform solution to this. + + +double _ntl_GetTime() +{ + struct rusage used; + + getrusage(RUSAGE_SELF, &used); + return (used.ru_utime.tv_sec + used.ru_stime.tv_sec + + (used.ru_utime.tv_usec + used.ru_stime.tv_usec) / 1e6); +} + diff --git a/thirdparty/linux/ntl/src/GetTime0.c b/thirdparty/linux/ntl/src/GetTime0.c new file mode 100644 index 0000000000..fde10bb4e1 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetTime0.c @@ -0,0 +1,126 @@ + +/* + * Author: David Robert Nadeau + * Site: http://NadeauSoftware.com/ + * License: Creative Commons Attribution 3.0 Unported License + * http://creativecommons.org/licenses/by/3.0/deed.en_US + */ + + +// NTL NOTES: I've adapted this code from the above source. +// The reason is that for some multithreaded benchmarking, I want +// to use wall clock time, and this seemed like the best multiplatform +// solution to getting a high-resolution wall clock timer. +// The only change I made to the original code is to initialize +// timeConvert for the OSX case using a thread-safe initialization +// C++ idiom. + + + +#if defined(_WIN32) +#include + +#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) +#include /* POSIX flags */ +#include /* clock_gettime(), time() */ +#include /* gethrtime(), gettimeofday() */ + +#if defined(__MACH__) && defined(__APPLE__) +#include +#include + +static inline double InitTimeConvert() +{ + mach_timebase_info_data_t timeBase; + (void)mach_timebase_info( &timeBase ); + return (double)timeBase.numer / (double)timeBase.denom / 1000000000.0; +} + +#endif + +#else +#error "Unable to define GetTime( ) for an unknown OS." +#endif + + + + + + +/** + * Returns the real time, in seconds, or -1.0 if an error occurred. + * + * Time is measured since an arbitrary and OS-dependent start time. + * The returned real time is only useful for computing an elapsed time + * between two calls to this function. + */ +double _ntl_GetTime( ) +{ +#if defined(_WIN32) + FILETIME tm; + ULONGLONG t; +#if defined(NTDDI_WIN8) && NTDDI_VERSION >= NTDDI_WIN8 + /* Windows 8, Windows Server 2012 and later. ---------------- */ + GetSystemTimePreciseAsFileTime( &tm ); +#else + /* Windows 2000 and later. ---------------------------------- */ + GetSystemTimeAsFileTime( &tm ); +#endif + t = ((ULONGLONG)tm.dwHighDateTime << 32) | (ULONGLONG)tm.dwLowDateTime; + return (double)t / 10000000.0; + +#elif (defined(__hpux) || defined(hpux)) || ((defined(__sun__) || defined(__sun) || defined(sun)) && (defined(__SVR4) || defined(__svr4__))) + /* HP-UX, Solaris. ------------------------------------------ */ + return (double)gethrtime( ) / 1000000000.0; + +#elif defined(__MACH__) && defined(__APPLE__) + /* OSX. ----------------------------------------------------- */ + static double timeConvert = InitTimeConvert(); + // even in a multi-threaded environment, this will + // be safely initialized, according to C++11 standard + + return (double)mach_absolute_time( ) * timeConvert; + +#elif defined(_POSIX_VERSION) + /* POSIX. --------------------------------------------------- */ +#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) + { + struct timespec ts; +#if defined(CLOCK_MONOTONIC_PRECISE) + /* BSD. --------------------------------------------- */ + const clockid_t id = CLOCK_MONOTONIC_PRECISE; +#elif defined(CLOCK_MONOTONIC_RAW) + /* Linux. ------------------------------------------- */ + const clockid_t id = CLOCK_MONOTONIC_RAW; +#elif defined(CLOCK_HIGHRES) + /* Solaris. ----------------------------------------- */ + const clockid_t id = CLOCK_HIGHRES; +#elif defined(CLOCK_MONOTONIC) + /* AIX, BSD, Linux, POSIX, Solaris. ----------------- */ + const clockid_t id = CLOCK_MONOTONIC; +#elif defined(CLOCK_REALTIME) + /* AIX, BSD, HP-UX, Linux, POSIX. ------------------- */ + const clockid_t id = CLOCK_REALTIME; +#else + const clockid_t id = (clockid_t)-1; /* Unknown. */ +#endif /* CLOCK_* */ + if ( id != (clockid_t)-1 && clock_gettime( id, &ts ) != -1 ) + return (double)ts.tv_sec + + (double)ts.tv_nsec / 1000000000.0; + /* Fall thru. */ + } +#endif /* _POSIX_TIMERS */ + + /* AIX, BSD, Cygwin, HP-UX, Linux, OSX, POSIX, Solaris. ----- */ + struct timeval tm; + gettimeofday( &tm, NULL ); + return (double)tm.tv_sec + (double)tm.tv_usec / 1000000.0; +#else + return -1.0; /* Failed. */ +#endif +} + + + + + diff --git a/thirdparty/linux/ntl/src/GetTime1.c b/thirdparty/linux/ntl/src/GetTime1.c new file mode 100644 index 0000000000..67320a51f1 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetTime1.c @@ -0,0 +1,19 @@ +#include + +#include +#include + +// FIXME: it would be nice to have a per-thread +// timing function, but it seems very difficult +// to get a cross-platform solution to this. + + +double _ntl_GetTime() +{ + struct rusage used; + + getrusage(RUSAGE_SELF, &used); + return (used.ru_utime.tv_sec + used.ru_stime.tv_sec + + (used.ru_utime.tv_usec + used.ru_stime.tv_usec) / 1e6); +} + diff --git a/thirdparty/linux/ntl/src/GetTime2.c b/thirdparty/linux/ntl/src/GetTime2.c new file mode 100644 index 0000000000..278767ece1 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetTime2.c @@ -0,0 +1,21 @@ +#include + + +#include +#include + +// some (old?) Solaris systems don't seem +// to supply a getrusage prototype + +extern "C" int getrusage(int, struct rusage*); + + +double _ntl_GetTime() +{ + struct rusage used; + + getrusage(RUSAGE_SELF, &used); + return (used.ru_utime.tv_sec + used.ru_stime.tv_sec + + (used.ru_utime.tv_usec + used.ru_stime.tv_usec) / 1e6); +} + diff --git a/thirdparty/linux/ntl/src/GetTime3.c b/thirdparty/linux/ntl/src/GetTime3.c new file mode 100644 index 0000000000..dbf9daff13 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetTime3.c @@ -0,0 +1,18 @@ +#include + + +#include +#include +#include + + + +double _ntl_GetTime() +{ + struct rusage used; + + syscall(SYS_getrusage, RUSAGE_SELF, &used); + return (used.ru_utime.tv_sec + used.ru_stime.tv_sec + + (used.ru_utime.tv_usec + used.ru_stime.tv_usec) / 1e6); +} + diff --git a/thirdparty/linux/ntl/src/GetTime4.c b/thirdparty/linux/ntl/src/GetTime4.c new file mode 100644 index 0000000000..78940f53d8 --- /dev/null +++ b/thirdparty/linux/ntl/src/GetTime4.c @@ -0,0 +1,35 @@ +#include + +#include +#include + +using namespace std; + +// FIXME: this is the GetTime that ends up getting used +// on Windows. However, it returns the wall time, not CPU time. +// We could perhaps switch to using GetProcessTimes. +// See: http://nadeausoftware.com/articles/2012/03/c_c_tip_how_measure_cpu_time_benchmarking + +// NOTE: in this version, because clock_t can overflow fairly +// quickly (in less than an hour on some systems), we provide +// a partial work-around, by tracking the differences between calls + +double _ntl_GetTime() +{ + static NTL_CHEAP_THREAD_LOCAL clock_t last_clock = 0; + static NTL_CHEAP_THREAD_LOCAL double acc = 0; + + clock_t this_clock; + double delta; + + this_clock = clock(); + + delta = (this_clock - last_clock)/((double)CLOCKS_PER_SEC); + if (delta < 0) delta = 0; + + acc += delta; + last_clock = this_clock; + + return acc; +} + diff --git a/thirdparty/linux/ntl/src/GetTime5.c b/thirdparty/linux/ntl/src/GetTime5.c new file mode 100644 index 0000000000..7951e413cd --- /dev/null +++ b/thirdparty/linux/ntl/src/GetTime5.c @@ -0,0 +1,7 @@ +#include + + +double _ntl_GetTime() +{ + return 0; +} diff --git a/thirdparty/linux/ntl/src/HNF.c b/thirdparty/linux/ntl/src/HNF.c new file mode 100644 index 0000000000..c4400c9e2e --- /dev/null +++ b/thirdparty/linux/ntl/src/HNF.c @@ -0,0 +1,128 @@ + +#include + +#include + +NTL_START_IMPL + + +// This implements a variation of an algorithm in +// [P. Domich, R. Kannan and L. Trotter, Math. Oper. Research 12:50-59, 1987]. +// I started with the description in Henri Cohen's book, but had to modify +// that because Cohen does not actually keep the numbers reduced modulo +// the determinant, which leads to larger than necessary numbers. +// This modifiaction was put in place in v3.9b. + +static +void EuclUpdate(vec_ZZ& u, vec_ZZ& v, + const ZZ& a, const ZZ& b, const ZZ& c, const ZZ& d, + const ZZ& M) + +{ + long m = u.length(); + long i; + + ZZ M1; + RightShift(M1, M, 1); + + ZZ t1, t2, t3; + + for (i = 1; i <= m; i++) { + mul(t1, u(i), a); + mul(t2, v(i), b); + add(t1, t1, t2); + rem(t1, t1, M); + if (t1 > M1) + sub(t1, t1, M); + + t3 = t1; + + mul(t1, u(i), c); + mul(t2, v(i), d); + add(t1, t1, t2); + rem(t1, t1, M); + if (t1 > M1) + sub(t1, t1, M); + + u(i) = t3; + v(i) = t1; + } +} + + +static +void FixDiag(vec_ZZ& u, const ZZ& a, const vec_ZZ& v, const ZZ& M, long m) +{ + long i; + ZZ t1; + + for (i = 1; i <= m; i++) { + mul(t1, a, v(i)); + rem(u(i), t1, M); + } +} + + +static +void ReduceW(vec_ZZ& u, const ZZ& a, const vec_ZZ& v, const ZZ& M, long m) +{ + long i; + ZZ t1, t2; + + for (i = 1; i <= m; i++) { + mul(t1, a, v(i)); + sub(t2, u(i), t1); + rem(u(i), t2, M); + } +} + + + +void HNF(mat_ZZ& W, const mat_ZZ& A_in, const ZZ& D_in) +{ + mat_ZZ A = A_in; + + long n = A.NumRows(); + long m = A.NumCols(); + + ZZ D = D_in; + if (D < 0) + negate(D, D); + + if (n == 0 || m == 0 || D == 0) + LogicError("HNF: bad input"); + + W.SetDims(m, m); + clear(W); + + long i, j, k; + ZZ d, u, v, c1, c2; + + k = n; + + for (i = m; i >= 1; i--) { + for (j = k-1; j >= 1; j--) { + if (A(j, i) != 0) { + XGCD(d, u, v, A(k, i), A(j, i)); + div(c1, A(k, i), d); + div(c2, A(j, i), d); + negate(c2, c2); + EuclUpdate(A(j), A(k), c1, c2, v, u, D); + } + } + + XGCD(d, u, v, A(k, i), D); + FixDiag(W(i), u, A(k), D, i); + if (W(i, i) == 0) W(i, i) = D; + + for (j = i+1; j <= m; j++) { + div(c1, W(j, i), W(i, i)); + ReduceW(W(j), c1, W(i), D, i); + } + + div(D, D, d); + k--; + } +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/InitSettings.c b/thirdparty/linux/ntl/src/InitSettings.c new file mode 100644 index 0000000000..0b138e721c --- /dev/null +++ b/thirdparty/linux/ntl/src/InitSettings.c @@ -0,0 +1,169 @@ + +#include + +NTL_CLIENT + +#define make_string_aux(x) #x +#define make_string(x) make_string_aux(x) + + +int main() +{ + +#ifdef NTL_LEGACY_NO_NAMESPACE + cout << "NTL_LEGACY_NO_NAMESPACE=1\n"; +#else + cout << "NTL_LEGACY_NO_NAMESPACE=0\n"; +#endif + +#ifdef NTL_LEGACY_INPUT_ERROR + cout << "NTL_LEGACY_INPUT_ERROR=1\n"; +#else + cout << "NTL_LEGACY_INPUT_ERROR=0\n"; +#endif + +#ifdef NTL_THREADS + cout << "NTL_THREADS=1\n"; +#else + cout << "NTL_THREADS=0\n"; +#endif + +#ifdef NTL_DISABLE_TLS_HACK + cout << "NTL_DISABLE_TLS_HACK=1\n"; +#else + cout << "NTL_DISABLE_TLS_HACK=0\n"; +#endif + +#ifdef NTL_ENABLE_TLS_HACK + cout << "NTL_ENABLE_TLS_HACK=1\n"; +#else + cout << "NTL_ENABLE_TLS_HACK=0\n"; +#endif + +#ifdef NTL_EXCEPTIONS + cout << "NTL_EXCEPTIONS=1\n"; +#else + cout << "NTL_EXCEPTIONS=0\n"; +#endif + +#ifdef NTL_THREAD_BOOST + cout << "NTL_THREAD_BOOST=1\n"; +#else + cout << "NTL_THREAD_BOOST=0\n"; +#endif + + +#ifdef NTL_LEGACY_SP_MULMOD + cout << "NTL_LEGACY_SP_MULMOD=1\n"; +#else + cout << "NTL_LEGACY_SP_MULMOD=0\n"; +#endif + + +#ifdef NTL_DISABLE_LONGDOUBLE + cout << "NTL_DISABLE_LONGDOUBLE=1\n"; +#else + cout << "NTL_DISABLE_LONGDOUBLE=0\n"; +#endif + + +#ifdef NTL_DISABLE_LONGLONG + cout << "NTL_DISABLE_LONGLONG=1\n"; +#else + cout << "NTL_DISABLE_LONGLONG=0\n"; +#endif + + +#ifdef NTL_DISABLE_LL_ASM + cout << "NTL_DISABLE_LL_ASM=1\n"; +#else + cout << "NTL_DISABLE_LL_ASM=0\n"; +#endif + +#ifdef NTL_MAXIMIZE_SP_NBITS + cout << "NTL_MAXIMIZE_SP_NBITS=1\n"; +#else + cout << "NTL_MAXIMIZE_SP_NBITS=0\n"; +#endif + + + +#ifdef NTL_GMP_LIP + cout << "NTL_GMP_LIP=1\n"; +#else + cout << "NTL_GMP_LIP=0\n"; +#endif + + +#ifdef NTL_GF2X_LIB + cout << "NTL_GF2X_LIB=1\n"; +#else + cout << "NTL_GF2X_LIB=0\n"; +#endif + +#ifdef NTL_LONG_LONG_TYPE + cout << "FLAG_LONG_LONG_TYPE=1\n"; + cout << "NTL_LONG_LONG_TYPE=" make_string(NTL_LONG_LONG_TYPE) "\n"; +#else + cout << "FLAG_LONG_LONG_TYPE=0\n"; + cout << "NTL_LONG_LONG_TYPE=long long\n"; +#endif + + +#ifdef NTL_UNSIGNED_LONG_LONG_TYPE + cout << "FLAG_UNSIGNED_LONG_LONG_TYPE=1\n"; + cout << "NTL_UNSIGNED_LONG_LONG_TYPE=" make_string(NTL_UNSIGNED_LONG_LONG_TYPE) "\n"; +#else + cout << "FLAG_UNSIGNED_LONG_LONG_TYPE=0\n"; + cout << "NTL_UNSIGNED_LONG_LONG_TYPE=unsigned long long\n"; +#endif + + +#ifdef NTL_X86_FIX + cout << "NTL_X86_FIX=1\n"; +#else + cout << "NTL_X86_FIX=0\n"; +#endif + +#ifdef NTL_NO_X86_FIX + cout << "NTL_NO_X86_FIX=1\n"; +#else + cout << "NTL_NO_X86_FIX=0\n"; +#endif + + +#ifdef NTL_NO_INIT_TRANS + cout << "NTL_NO_INIT_TRANS=1\n"; +#else + cout << "NTL_NO_INIT_TRANS=0\n"; +#endif + +#ifdef NTL_CLEAN_INT + cout << "NTL_CLEAN_INT=1\n"; +#else + cout << "NTL_CLEAN_INT=0\n"; +#endif + +#ifdef NTL_CLEAN_PTR + cout << "NTL_CLEAN_PTR=1\n"; +#else + cout << "NTL_CLEAN_PTR=0\n"; +#endif + +#ifdef NTL_RANGE_CHECK + cout << "NTL_RANGE_CHECK=1\n"; +#else + cout << "NTL_RANGE_CHECK=0\n"; +#endif + + +// the following is synthetically defined +#ifdef NTL_LONGLONG_SP_MULMOD + cout << "NTL_LONGLONG_SP_MULMOD=1\n"; +#else + cout << "NTL_LONGLONG_SP_MULMOD=0\n"; +#endif + + + return 0; +} diff --git a/thirdparty/linux/ntl/src/LLL.c b/thirdparty/linux/ntl/src/LLL.c new file mode 100644 index 0000000000..57b29d7a02 --- /dev/null +++ b/thirdparty/linux/ntl/src/LLL.c @@ -0,0 +1,706 @@ + +#include + +#include + +NTL_START_IMPL + + +static void ExactDiv(ZZ& qq, const ZZ& a, const ZZ& b) +{ + NTL_ZZRegister(q); + NTL_ZZRegister(r); + + DivRem(q, r, a, b); + if (!IsZero(r)) { + cerr << "a = " << a << "\n"; + cerr << "b = " << b << "\n"; + LogicError("ExactDiv: nonzero remainder"); + } + qq = q; +} + + +static void BalDiv(ZZ& q, const ZZ& a, const ZZ& d) + +// rounds a/d to nearest integer, breaking ties +// by rounding towards zero. Assumes d > 0. + +{ + NTL_ZZRegister(r); + DivRem(q, r, a, d); + + + add(r, r, r); + + long cmp = compare(r, d); + if (cmp > 0 || (cmp == 0 && q < 0)) + add(q, q, 1); +} + + + +static void MulAddDiv(ZZ& c, const ZZ& c1, const ZZ& c2, + const ZZ& x, const ZZ& y, const ZZ& z) + +// c = (x*c1 + y*c2)/z + +{ + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + + mul(t1, x, c1); + mul(t2, y, c2); + add(t1, t1, t2); + ExactDiv(c, t1, z); +} + + +static void MulSubDiv(ZZ& c, const ZZ& c1, const ZZ& c2, + const ZZ& x, const ZZ& y, const ZZ& z) + +// c = (x*c1 - y*c2)/z + +{ + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + + mul(t1, x, c1); + mul(t2, y, c2); + sub(t1, t1, t2); + ExactDiv(c, t1, z); +} + + + + + +#if 0 + +static void MulSubDiv(vec_ZZ& c, const vec_ZZ& c1, const vec_ZZ& c2, + const ZZ& x, const ZZ& y, const ZZ& z) + +// c = (x*c1 + y*c2)/z + +{ + long n = c1.length(); + if (c2.length() != n) LogicError("MulSubDiv: length mismatch"); + c.SetLength(n); + + long i; + for (i = 1; i <= n; i++) + MulSubDiv(c(i), c1(i), c2(i), x, y, z); +} + +#endif + +static void RowTransform(vec_ZZ& c1, vec_ZZ& c2, + const ZZ& x, const ZZ& y, const ZZ& u, const ZZ& v) + +// (c1, c2) = (x*c1 + y*c2, u*c1 + v*c2) + +{ + long n = c1.length(); + if (c2.length() != n) LogicError("MulSubDiv: length mismatch"); + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + NTL_ZZRegister(t3); + NTL_ZZRegister(t4); + + long i; + for (i = 1; i <= n; i++) { + mul(t1, x, c1(i)); + mul(t2, y, c2(i)); + add(t1, t1, t2); + + mul(t3, u, c1(i)); + mul(t4, v, c2(i)); + add(t3, t3, t4); + + c1(i) = t1; + c2(i) = t3; + } +} + +static void RowTransform(ZZ& c1, ZZ& c2, + const ZZ& x, const ZZ& y, const ZZ& u, const ZZ& v) + +// (c1, c2) = (x*c1 + y*c2, u*c1 + v*c2) + +{ + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + NTL_ZZRegister(t3); + NTL_ZZRegister(t4); + + mul(t1, x, c1); + mul(t2, y, c2); + add(t1, t1, t2); + + mul(t3, u, c1); + mul(t4, v, c2); + add(t3, t3, t4); + + c1 = t1; + c2 = t3; +} + + + +static void MulSubFrom(vec_ZZ& c, const vec_ZZ& c2, const ZZ& x) + +// c = c - x*c2 + +{ + long n = c.length(); + if (c2.length() != n) LogicError("MulSubFrom: length mismatch"); + + long i; + for (i = 1; i <= n; i++) + MulSubFrom(c(i), c2(i), x); +} + +static void MulSubFrom(vec_ZZ& c, const vec_ZZ& c2, long x) + +// c = c - x*c2 + +{ + long n = c.length(); + if (c2.length() != n) LogicError("MulSubFrom: length mismatch"); + + long i; + for (i = 1; i <= n; i++) + MulSubFrom(c(i), c2(i), x); +} + + + + + +static long SwapTest(const ZZ& d0, const ZZ& d1, const ZZ& d2, const ZZ& lam, + long a, long b) + +// test if a*d1^2 > b*(d0*d2 + lam^2) + +{ + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + + mul(t1, d0, d2); + sqr(t2, lam); + add(t1, t1, t2); + mul(t1, t1, b); + + sqr(t2, d1); + mul(t2, t2, a); + + return t2 > t1; +} + + + + + + +static +void reduce(long k, long l, + mat_ZZ& B, vec_long& P, vec_ZZ& D, + vec_vec_ZZ& lam, mat_ZZ* U) +{ + NTL_ZZRegister(t1); + NTL_ZZRegister(r); + + if (P(l) == 0) return; + add(t1, lam(k)(P(l)), lam(k)(P(l))); + abs(t1, t1); + if (t1 <= D[P(l)]) return; + + long j; + long rr, small_r; + + BalDiv(r, lam(k)(P(l)), D[P(l)]); + + if (r.WideSinglePrecision()) { + small_r = 1; + rr = to_long(r); + } + else { + small_r = 0; + } + + if (small_r) { + MulSubFrom(B(k), B(l), rr); + + if (U) MulSubFrom((*U)(k), (*U)(l), rr); + + for (j = 1; j <= l-1; j++) + if (P(j) != 0) + MulSubFrom(lam(k)(P(j)), lam(l)(P(j)), rr); + MulSubFrom(lam(k)(P(l)), D[P(l)], rr); + } + else { + MulSubFrom(B(k), B(l), r); + + if (U) MulSubFrom((*U)(k), (*U)(l), r); + + for (j = 1; j <= l-1; j++) + if (P(j) != 0) + MulSubFrom(lam(k)(P(j)), lam(l)(P(j)), r); + MulSubFrom(lam(k)(P(l)), D[P(l)], r); + } + + +} + + +static +long swap(long k, mat_ZZ& B, vec_long& P, vec_ZZ& D, + vec_vec_ZZ& lam, mat_ZZ* U, long m, long verbose) + +// swaps vectors k-1 and k; assumes P(k-1) != 0 +// returns 1 if vector k-1 need to be reduced after the swap... +// this only occurs in 'case 2' when there are linear dependencies + +{ + long i, j; + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + NTL_ZZRegister(t3); + NTL_ZZRegister(e); + NTL_ZZRegister(x); + NTL_ZZRegister(y); + + + if (P(k) != 0) { + if (verbose) cerr << "swap case 1: " << k << "\n"; + + swap(B(k-1), B(k)); + if (U) swap((*U)(k-1), (*U)(k)); + + for (j = 1; j <= k-2; j++) + if (P(j) != 0) + swap(lam(k-1)(P(j)), lam(k)(P(j))); + + for (i = k+1; i <= m; i++) { + MulAddDiv(t1, lam(i)(P(k)-1), lam(i)(P(k)), + lam(k)(P(k)-1), D[P(k)-2], D[P(k)-1]); + MulSubDiv(t2, lam(i)(P(k)-1), lam(i)(P(k)), + D[P(k)], lam(k)(P(k)-1), D[P(k)-1]); + lam(i)(P(k)-1) = t1; + lam(i)(P(k)) = t2; + } + + MulAddDiv(D[P(k)-1], D[P(k)], lam(k)(P(k)-1), + D[P(k)-2], lam(k)(P(k)-1), D[P(k)-1]); + + return 0; + } + else if (!IsZero(lam(k)(P(k-1)))) { + if (verbose) cerr << "swap case 2: " << k << "\n"; + XGCD(e, x, y, lam(k)(P(k-1)), D[P(k-1)]); + + ExactDiv(t1, lam(k)(P(k-1)), e); + ExactDiv(t2, D[P(k-1)], e); + + t3 = t2; + negate(t2, t2); + RowTransform(B(k-1), B(k), t1, t2, y, x); + if (U) RowTransform((*U)(k-1), (*U)(k), t1, t2, y, x); + for (j = 1; j <= k-2; j++) + if (P(j) != 0) + RowTransform(lam(k-1)(P(j)), lam(k)(P(j)), t1, t2, y, x); + + sqr(t2, t2); + ExactDiv(D[P(k-1)], D[P(k-1)], t2); + + for (i = k+1; i <= m; i++) + if (P(i) != 0) { + ExactDiv(D[P(i)], D[P(i)], t2); + for (j = i+1; j <= m; j++) { + ExactDiv(lam(j)(P(i)), lam(j)(P(i)), t2); + } + } + + for (i = k+1; i <= m; i++) { + ExactDiv(lam(i)(P(k-1)), lam(i)(P(k-1)), t3); + } + + swap(P(k-1), P(k)); + + return 1; + } + else { + if (verbose) cerr << "swap case 3: " << k << "\n"; + + swap(B(k-1), B(k)); + if (U) swap((*U)(k-1), (*U)(k)); + + for (j = 1; j <= k-2; j++) + if (P(j) != 0) + swap(lam(k-1)(P(j)), lam(k)(P(j))); + + swap(P(k-1), P(k)); + + return 0; + } +} + + + + +static +void IncrementalGS(mat_ZZ& B, vec_long& P, vec_ZZ& D, vec_vec_ZZ& lam, + long& s, long k) +{ + long n = B.NumCols(); + long m = B.NumRows(); + + NTL_ZZRegister(u); + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + + long i, j; + + for (j = 1; j <= k-1; j++) { + long posj = P(j); + if (posj == 0) continue; + + InnerProduct(u, B(k), B(j)); + for (i = 1; i <= posj-1; i++) { + mul(t1, D[i], u); + mul(t2, lam(k)(i), lam(j)(i)); + sub(t1, t1, t2); + div(t1, t1, D[i-1]); + u = t1; + } + + lam(k)(posj) = u; + } + + InnerProduct(u, B(k), B(k)); + for (i = 1; i <= s; i++) { + mul(t1, D[i], u); + mul(t2, lam(k)(i), lam(k)(i)); + sub(t1, t1, t2); + div(t1, t1, D[i-1]); + u = t1; + } + + if (u == 0) { + P(k) = 0; + } + else { + s++; + P(k) = s; + D[s] = u; + } +} + + +static +long LLL(vec_ZZ& D, mat_ZZ& B, mat_ZZ* U, long a, long b, long verbose) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long force_reduce = 1; + + vec_long P; + P.SetLength(m); + + D.SetLength(m+1); + D[0] = 1; + + vec_vec_ZZ lam; + + lam.SetLength(m); + + long j; + for (j = 1; j <= m; j++) + lam(j).SetLength(m); + + if (U) ident(*U, m); + + long s = 0; + + long k = 1; + long max_k = 0; + + + while (k <= m) { + if (k > max_k) { + IncrementalGS(B, P, D, lam, s, k); + max_k = k; + } + + if (k == 1) { + force_reduce = 1; + k++; + continue; + } + + if (force_reduce) + for (j = k-1; j >= 1; j--) + reduce(k, j, B, P, D, lam, U); + + if (P(k-1) != 0 && + (P(k) == 0 || + SwapTest(D[P(k)], D[P(k)-1], D[P(k)-2], lam(k)(P(k)-1), a, b))) { + force_reduce = swap(k, B, P, D, lam, U, max_k, verbose); + k--; + } + else { + force_reduce = 1; + k++; + } + } + + D.SetLength(s+1); + return s; +} + + + +static +long image(ZZ& det, mat_ZZ& B, mat_ZZ* U, long verbose) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long force_reduce = 1; + + vec_long P; + P.SetLength(m); + + vec_ZZ D; + D.SetLength(m+1); + D[0] = 1; + + vec_vec_ZZ lam; + + lam.SetLength(m); + + long j; + for (j = 1; j <= m; j++) + lam(j).SetLength(m); + + if (U) ident(*U, m); + + long s = 0; + + long k = 1; + long max_k = 0; + + + while (k <= m) { + if (k > max_k) { + IncrementalGS(B, P, D, lam, s, k); + max_k = k; + } + + if (k == 1) { + force_reduce = 1; + k++; + continue; + } + + if (force_reduce) + for (j = k-1; j >= 1; j--) + reduce(k, j, B, P, D, lam, U); + + if (P(k-1) != 0 && P(k) == 0) { + force_reduce = swap(k, B, P, D, lam, U, max_k, verbose); + k--; + } + else { + force_reduce = 1; + k++; + } + } + + det = D[s]; + return s; +} + +long LLL(ZZ& det, mat_ZZ& B, mat_ZZ& U, long verbose) +{ + vec_ZZ D; + long s; + s = LLL(D, B, &U, 3, 4, verbose); + det = D[s]; + return s; +} + +long LLL(ZZ& det, mat_ZZ& B, long verbose) +{ + vec_ZZ D; + long s; + s = LLL(D, B, 0, 3, 4, verbose); + det = D[s]; + return s; +} + +long LLL(ZZ& det, mat_ZZ& B, mat_ZZ& U, long a, long b, long verbose) +{ + if (a <= 0 || b <= 0 || a > b || b/4 >= a) LogicError("LLL: bad args"); + + vec_ZZ D; + long s; + s = LLL(D, B, &U, a, b, verbose); + det = D[s]; + return s; +} + +long LLL(ZZ& det, mat_ZZ& B, long a, long b, long verbose) +{ + if (a <= 0 || b <= 0 || a > b || b/4 >= a) LogicError("LLL: bad args"); + + vec_ZZ D; + long s; + s = LLL(D, B, 0, a, b, verbose); + det = D[s]; + return s; +} + + +long LLL_plus(vec_ZZ& D_out, mat_ZZ& B, mat_ZZ& U, long verbose) +{ + vec_ZZ D; + long s; + s = LLL(D, B, &U, 3, 4, verbose); + D_out = D; + return s; +} + +long LLL_plus(vec_ZZ& D_out, mat_ZZ& B, long verbose) +{ + vec_ZZ D; + long s; + s = LLL(D, B, 0, 3, 4, verbose); + D_out = D; + return s; +} + +long LLL_plus(vec_ZZ& D_out, mat_ZZ& B, mat_ZZ& U, long a, long b, long verbose) +{ + if (a <= 0 || b <= 0 || a > b || b/4 >= a) LogicError("LLL_plus: bad args"); + + vec_ZZ D; + long s; + s = LLL(D, B, &U, a, b, verbose); + D_out = D; + return s; +} + +long LLL_plus(vec_ZZ& D_out, mat_ZZ& B, long a, long b, long verbose) +{ + if (a <= 0 || b <= 0 || a > b || b/4 >= a) LogicError("LLL_plus: bad args"); + + vec_ZZ D; + long s; + s = LLL(D, B, 0, a, b, verbose); + D_out = D; + return s; +} + + +long image(ZZ& det, mat_ZZ& B, mat_ZZ& U, long verbose) +{ + return image(det, B, &U, verbose); +} + +long image(ZZ& det, mat_ZZ& B, long verbose) +{ + return image(det, B, 0, verbose); +} + +long LatticeSolve(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& y, long reduce) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (y.length() != m) + LogicError("LatticeSolve: dimension mismatch"); + + if (reduce < 0 || reduce > 2) + LogicError("LatticeSolve: bad reduce parameter"); + + if (IsZero(y)) { + x.SetLength(n); + clear(x); + return 1; + } + + mat_ZZ A1, U1; + ZZ det2; + long im_rank, ker_rank; + + A1 = A; + + im_rank = image(det2, A1, U1); + ker_rank = n - im_rank; + + mat_ZZ A2, U2; + long new_rank; + long i; + + A2.SetDims(im_rank + 1, m); + for (i = 1; i <= im_rank; i++) + A2(i) = A1(ker_rank + i); + + A2(im_rank + 1) = y; + + new_rank = image(det2, A2, U2); + + if (new_rank != im_rank || + (U2(1)(im_rank+1) != 1 && U2(1)(im_rank+1) != -1)) + return 0; + + vec_ZZ x1; + x1.SetLength(im_rank); + + for (i = 1; i <= im_rank; i++) + x1(i) = U2(1)(i); + + if (U2(1)(im_rank+1) == 1) + negate(x1, x1); + + vec_ZZ x2, tmp; + x2.SetLength(n); + clear(x2); + tmp.SetLength(n); + + for (i = 1; i <= im_rank; i++) { + mul(tmp, U1(ker_rank+i), x1(i)); + add(x2, x2, tmp); + } + + if (reduce == 0) { + x = x2; + return 1; + } + else if (reduce == 1) { + U1.SetDims(ker_rank+1, n); + U1(ker_rank+1) = x2; + image(det2, U1); + + x = U1(ker_rank + 1); + return 1; + } + else if (reduce == 2) { + U1.SetDims(ker_rank, n); + LLL(det2, U1); + U1.SetDims(ker_rank+1, n); + U1(ker_rank+1) = x2; + image(det2, U1); + + x = U1(ker_rank + 1); + return 1; + } + + return 0; +} + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/LLLTest.c b/thirdparty/linux/ntl/src/LLLTest.c new file mode 100644 index 0000000000..ae6ea77cf7 --- /dev/null +++ b/thirdparty/linux/ntl/src/LLLTest.c @@ -0,0 +1,139 @@ + +#include + +NTL_CLIENT + +int main() +{ + mat_ZZ B; + long s; + +#if 1 + cin >> B; +#else + long i, j; + long n; + cerr << "n: "; + cin >> n; + + long m; + cerr << "m: "; + cin >> m; + + long k; + cerr << "k: "; + cin >> k; + + B.SetDims(n, m); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) { + RandomLen(B(i,j), k); + if (RandomBnd(2)) negate(B(i,j), B(i,j)); + } + + +#endif + + mat_ZZ U, B0, B1, B2; + + B0 = B; + + double t; + ZZ d; + + B = B0; + cerr << "LLL_FP..."; + t = GetTime(); + s = LLL_FP(B, U, 0.99); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + LLL(d, B, 90, 100); + if (B1 != B) TerminalError("bad LLLTest (2)"); + + B = B0; + cerr << "LLL_QP..."; + t = GetTime(); + s = LLL_QP(B, U, 0.99); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + LLL(d, B, 90, 100); + if (B1 != B) TerminalError("bad LLLTest (2)"); + + + B = B0; + cerr << "LLL_XD..."; + t = GetTime(); + s = LLL_XD(B, U, 0.99); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + LLL(d, B, 90, 100); + if (B1 != B) TerminalError("bad LLLTest (2)"); + + B = B0; + cerr << "LLL_RR..."; + t = GetTime(); + s = LLL_RR(B, U, 0.99); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + LLL(d, B, 90, 100); + if (B1 != B) TerminalError("bad LLLTest (2)"); + + B = B0; + cerr << "G_LLL_FP..."; + t = GetTime(); + s = G_LLL_FP(B, U, 0.99); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + LLL(d, B, 90, 100); + if (B1 != B) TerminalError("bad LLLTest (2)"); + + B = B0; + cerr << "G_LLL_QP..."; + t = GetTime(); + s = G_LLL_QP(B, U, 0.99); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + LLL(d, B, 90, 100); + if (B1 != B) TerminalError("bad LLLTest (2)"); + + B = B0; + cerr << "G_LLL_XD..."; + t = GetTime(); + s = G_LLL_XD(B, U, 0.99); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + LLL(d, B, 90, 100); + if (B1 != B) TerminalError("bad LLLTest (2)"); + + B = B0; + cerr << "G_LLL_RR..."; + t = GetTime(); + s = G_LLL_RR(B, U, 0.99); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + LLL(d, B, 90, 100); + if (B1 != B) TerminalError("bad LLLTest (2)"); + + + B = B0; + cerr << "LLL..."; + t = GetTime(); + s = LLL(d, B, U); + cerr << (GetTime()-t) << "\n"; + mul(B1, U, B0); + if (B1 != B) TerminalError("bad LLLTest (1)"); + + cout << "rank = " << s << "\n"; + cout << "det = " << d << "\n"; + cout << "B = " << B << "\n"; + cout << "U = " << U << "\n"; +} + diff --git a/thirdparty/linux/ntl/src/LLLTestIn b/thirdparty/linux/ntl/src/LLLTestIn new file mode 100644 index 0000000000..aaf5c3eb86 --- /dev/null +++ b/thirdparty/linux/ntl/src/LLLTestIn @@ -0,0 +1,13 @@ +[[927267 -895605 -866862 -733022 647694 -555086 970641 524600 582869 890322] +[-749289 -533762 -754674 -564542 874399 888872 860097 -801459 731651 -920001] +[-1008354 -839027 -531044 592717 543848 647360 641018 957632 893065 -813238] +[-750708 -783256 -868889 -649872 -807570 579545 840467 -734946 -720279 760893] +[648723 -1016200 -587545 -1025537 710862 987663 -1047329 -803105 910327 803227] +[-824476 -863571 -978793 -550626 -1000451 -780190 734624 -746905 620723 766901] +[-900849 -593349 686359 1031502 832388 835860 -1034307 975079 -541187 -935991] +[1015281 -971840 -970316 -851433 848978 -656104 -1044347 1014101 760024 -726970] +[674372 -809805 713198 896663 590902 -783974 -651080 627852 1008582 -681953] +[617949 -803220 -947289 786228 -540550 635343 -641246 536407 731378 -545576] +[-800345 -595182 -795484 -711756 1040656 -709216 -585028 1046975 -555068 -834202] +[-855857 -955395 -560560 -784720 810320 906673 657648 865224 -871327 -819494] +] diff --git a/thirdparty/linux/ntl/src/LLLTestOut b/thirdparty/linux/ntl/src/LLLTestOut new file mode 100644 index 0000000000..61d039effe --- /dev/null +++ b/thirdparty/linux/ntl/src/LLLTestOut @@ -0,0 +1,28 @@ +rank = 10 +det = 1 +B = [[0 0 0 0 0 0 0 0 0 0] +[0 0 0 0 0 0 0 0 0 0] +[0 0 0 -1 0 0 0 0 0 0] +[1 0 0 0 0 0 0 0 0 0] +[0 0 0 0 0 0 0 1 0 0] +[0 0 0 0 0 0 0 0 0 1] +[0 0 0 0 0 -1 0 0 0 0] +[0 1 0 0 0 0 0 0 0 0] +[0 0 0 0 0 0 0 0 1 0] +[0 0 0 0 -1 0 0 0 0 0] +[0 0 0 0 0 0 -1 0 0 0] +[0 0 1 0 0 0 0 0 0 0] +] +U = [[34654477538697060592273730261281361547125354042773753220424839 28642286990591936277350498595925990982433158292923417688624947 -46292844372743573462683566502729081625278709999456220510041506 -46483581193179866866502987050643114999497045917731148961241548 -30819781133464209851049806557946364538588971500372543352093774 55800458369363297482114583398843232983453109850341699695655921 85229498391108473380429143088097764479688244750584949192968451 44215190928961391993560815775038041607000619023229549814555005 -56225191928762141199604234326567899079861735856027790129591349 -2821038261866733485761289760067843518755252876151402111579092 -56600445704851358896081983000177726626583766185875526650047702 0] +[-44233997260522469446630760966822805324915027230445793900466469759669698951691849173534141789641880281231629593715630462491 -36559859916000261740784545629542534300396368966718562880888161447225079537906619812511890470709036923023819006565007070077 59089551959891482174305390308903681612869974820305118965879971390764354535195791774074234539014315392027344972399643936881 59333014063258601208900846773024008569829778426598034900594241114874316807176942590203248943103962849505271914727134722286 39339277664920601829814667137981330171852255478171631432913015028631637094451166058502577427743509963087332978102132712013 -71225350891240604745234148163467114628379186495659185673035976466933787742875646533693468838507251113274610193687025645555 -108789445581401845151765240024205495705983252808075763658353787231418340792157910611357034591406063587534375174967882313739 -56437573824080537369952168169996434511863668021220885631961274608555736934676053780501098026581810918251033094164409424562 71767493333928453425105748941069605855746310582760948904765028225255282167945264249214149704451263342227593036109931588710 3600856443670218308994302975953359579968407731026791968117691834796054492519277872560955703914855475043394551433564729451 72246478321798948850577554837322604664167598323834547771479811997403832547905027333681929696255633027929662067742578983231 -2] +[125234120541160085842632749926653225634551766876524582006028424843484622194344320904579593839076411029404574664662280 103507306308365590027731941844086431359931849897778184895383663429734212976552474675311287906480625174713280911796070 -167292773232422692290873819302822809788580775243595360607886223586801568934200920219320139539793909807299946571865339 -167982056686068274771198629623975762574405900229864145018321156388563290848293367974047543703500321756056697397808087 -111376320165568563716960194394217204009497051997154016140407042457009356929790310037864769292832069406625894388081734 201651325485357982987177428627741759299902792422495664374922776611537162429093348402370409973362309815950903742012622 308001794667252566229877100719231320739885156000660615371570077973945762030596461023423400653916810871232997557136660 159784563029835496486081864097482073959471110329746088885480563201812998253921876718515499356728955876875677166062989 -203186224798620884720827760514543747892116556820405494415111131126230598240582006841437806531067375712068132709639097 -10194649316046974623147872141714216501808326519648445055647762247398874459348988827542798424154186365421254334752367 -204542314399908507768050204400982686728664993936396150465899930418015991987429643495566957206595889365562455444467236 0] +[117406180154362091704893784913336690477979880467536357921992495076718262314767269465800610097261430551877526673389718 97037431965185887559012922676605420319160080674769369497859390855838086773442869902436519430495301700307644470099817 -156835895742912043834505276761716856620216220455948837576689484072169330962930089070645919481870579319266560657936921 -157482094534315194630350944280907639148675019860016386578358064988608494742302353618213409083875091356825096789041568 -104414581695337104199516758593256656150631980061085066179466246604570244774540083398860127033478158546601957820316608 189046816841889857824529919703125611619464894447684183696344378546425154323022711856741408055999939323119404268759879 288749695660499599304687364907411012977880491651500456730556451199679154887314292361559548458418384377876914670459809 149796997111511904474896701860710428924926799189384533547824516206929039299615462035121863019315993952889513336090450 -190485775046834677583737953925886140274939370647060111612422543129207174767287734478778953367518799193141856139551621 -9557417970743614074200161666047379107650857796491658811031063573656186757249092363542271789262571204111436620148956 -191757100300257955899359220207710378426399506731196957340564333479560530939548121099520261804626622451067507554363017 0] +[73035797907264384077316534420898282956915523440058269044064282535678431421994628093967682497961029421433604701746016 60364848435842041921117879698021186285922135359173644461033509004761716380394572231043877636205057555599865940090685 -97564155234621410025624133552441192542313207767418081033424528869476009124672806246048273422760088309009090106859677 -97966141265295328644121770542693765239102796551562119163446725510238003071761558904008924488064290659338565068496890 -64954010744968883036471143150709270576924698601509028795560228245970519820743472249545494316271851527539446620380960 117601859559033634910429000337822326308176042912609036185312781464573883941098699959645771741163601648433405334510815 179624823755590045790591952845455863170943141584331059893377778240813361093280915945443353579875938285753970612777449 93185411481466777188891779348289203097498945293920870500561184425467302748245335505487257097601354445268983530933842 -118497003754298087354915781404459045617256582021951448024985932763497472034023641766065231235211080753949336848293425 -5945459144558948732219203680407171785434064224193181813855869279170462463328659377381532461937249106502560576684314 -119287867183815553129800918062743050747458913189589892309216739151776284517086701782854139263494793319089107314952196 0] +[22116948787709292451755413138721499937621477015550187399463111658534488617623454723953276983701528060983112260902487883503 18279888762611296054253726594245795436400797306449304779068322746822808728985535458880689650339032888184073615608554247606 -29544709398260946461414597289584781605061386839569616430645469198301621818162658212095774093091114089590446387689367730747 -29666440175613148096203192662539678219828408353830989207759006299227190283963653699949964146180299346173001374982331394381 -19669594505243413030854403906336544072742227946076287665009332770427542418819281578472395975271609929597793052771952543071 35612595189404014827312653100910247613374630671337597217249287386090047934406649015172875467311834692358198989187389340086 54394600207531234648367619575172013285723048867058373569710215719983131117417842202228289979502291925011129071688657031671 28218723318585481974291578652110737060825667769496011305732812951103464900898179952215567584889398988014847614759250794360 -35883665799865709812516319059015872398539390158641006272137020544727087216277289141230835824630860703705756254642536532272 -1800424164415806231290703180738687586416564250163791135791055881961473408487473823233110267368564458618532565323136728831 -36123157754084096828826529108520741951654074068994587072706523674892820260017731262804924516844091386422555031894574423151 1] +[22116830653621455269329674626411237231333121413157877260771090397930206762489851361720911243273767451929856187884282420099 18279791123556041909075925908830851637273283657519891437322185361321102197429374012947504573542399361677591327954021371502 -29544551589996855450677963067392157679443869254136780112428394379143628600464138846222444209460236575630890155348516221094 -29666281717143891705467732813339896546766600449693886805102814652828190257890280434650931653681562254063388162197680820711 -19669489443300768597379753476273166132097435607912366511525700482161871454815355005271391361905596831441503184800390074263 35612404970514031549016136713644509491000005266892172786278762489755441637069471908475698692072977516672625660760817076234 54394309667613614734293552419159883065077688642516640977585399882272310759667200151711241849576401159532215686530231917862 28218572592860556325818374992390914765197304496518115725741311657885647833865968211694999732146914273433174919549646744383 -35883474133096111053263329996097477660410794824552359280155328352138246671173363420934053261939427835577472268504909453428 -1800414547742709738055458978930857855143695843843291531147854531034858091518345202288034474984979093416728299123459454532 -36122964808107143226134762730577956091489643889686636199849623829341228699446234605848428967112410571722733085472857110886 1] +[-22117023380848843894955719104751852279167260342562341244588102147725396364737486271559605492399277785601823392747228913780 -18279950414618837531655188544852365185646782426105987540363504650824383191861686249521380836424084969138850962998898485395 29544809042775761995817368936080946712630952784433599128802067610816305258634725980815194933097482721350163922600341502889 29666540230685538723626075013853570545506324890393343787760756085260975475088472251726602859990799036817275799602506662984 19669660844267929164645385989468002112503365473200220495440043099758826047967383984559018056033241228116597598658924968576 -35612715298886939004403301768136001873828323263435140242039463785209734933829719734598728561242091983696417681580185647515 -54394783662493448067695136004038168938701712579958850663398255295035974785741328413108159813590100547731798887218500168752 -28218818490988631749969922548235570768825878496731963303347718291272430620287819880244931503557810700026800198748193389337 35883786823579980661651678621121954803899252344219915842875991760372388584339006075992123931085342566207654712588151189674 1800430236649921214222020230653257220323071011803408768411755740005619963761788931207375877012053019259209084139042397341 36123279585525370798189988095931342561802762312080955538308325854210385432477026910734886131837716589106185637551656424490 -1] +[-22116877685835941126068449879581093156316879986795531671412000168217514541225745280456471005097220528629797966180975183514 -18279829996171668640949742120910399838099879919574547338299475711813145813260186707165945382249826805516492588696591997373 29544614417519886724816901953107832462425743018308566011804026585174846333167795048305565480828046063653548497688385384853 29666344803530298977494463562348795770610233864730846307314261420540044916775933640669283262407924039189118245372116171405 19669531271158512452686322981022392967127144032423100227199176837691899475722993080263082682695916547821344885330712436262 -35612480701539654614499931031648241565661080952333248717647741863275093121664274944949666893231588209025582316214258320509 -54394425339016952916271602018519968703971602499883549539435640522904050606323006287764747376334638139380788003303543991192 -28218632600642840213799892006870873703561382912197567715772157189133402781631597407814705923821812691958515900204006464272 35883550440559795915562052093985262043386958585867777192193545816411200764645415450152461262139285129456188186138525980991 1800418376387260987382720314694111287225894201886546400181385999479518025757906522437177939966534204299229200706868902782 36123041624856130801420614393353178598187710244588209189547586240441183040056537127889355422639945446034535458974459336466 -1] +[153300751389883906614715908684381595158854815019076330345965058748651401702935137858908440184753029760178436175776411 126704669325322889255155789303293256412204062617542426056038953236258878888078642336167657842284239352332104584617386 -204785307133600825926434947310539905959072578028218815433605540749531852825825065706317237736688008547806253849730194 -205629068170192708496023229829517659217250886529629638446491929871144234919221241905465442433196135071898116245497041 -136337233771768195750865800819175426698042373162934615788134775700140045270064573206827216859721165828160770141236872 246844067591877261901801021470632147053267081469760073274846382835180760806891203047811592085222603991006855646882271 377029090378001311022910299866042631109246528636811983342383233064557544085727080326250391921776065889271142719262077 195594407236064424442510470319951889917030725594690088094850130062786363258143173351832535868737612955693003921996835 -248722958240961065994294913864181843189905849244726166540687462459351767490199361396229100579949739182283697568449591 -12479405720685472871472679567692945622700070662844098966857399677574753129862397644704023430096898152822295568524135 -250382965545129219181227396113613345269638584922714186640342543946336771926751766478153625156845831899729315417747166 0] +[301145885839173819969703925479551813714060284167821933699853492199176116578711831992112605650315588049932068866510920 248900214369411457257922182414032693208395856920884823652804586240084496340274134344788924927011673597465847882543276 -402282781815934612630097787584272164188824371923361697829784861696748517383064848502033956699176061679884775285097360 -403940276397646163213060821209201385741465514653213934881581120753320425216660197172071260459590844140665355066530515 -267822542713061851602734767163253276833168588548648525325329956594452875903239142834446226474341945519480441271372013 484903529337861454020020797144731410464811339116524622140159263189250033132961385879210654032167035729974891033194678 740640592949588900414058268737287704350900234998920233112817592621562582316034237140851683599274553158342791390570328 384228064756762946206145773822526049238325477857776979702964428605396119140722353068188655960146888718381949083005303 -488594445290871062793246858987640328764845862805397138586352857562752368868285194578505080188543446932519701074818646 -24514698437089807263685819578779252693447679115202653667306007102834836796754613466881439945875757407900806631737462 -491855384102852680467538672897022957290337831412350521756365759325684235785140881811626973421906591392201175156517166 0] +[-22117226546116886109763322378844674622546324478431531135520811093364799400159840054921920560371132378642101991444495496654 -18280118332831059537068894499955495779228723067314165021046052970446590296867097673757907471648548586088198965665579492705 29545080439109033006559438795794252035612382706199285879517042079101293622464404015193348065694130279126870635558928144364 29666812745232097070406529987146813886760386808585192821066868530490495570649424591147935348711988942139718240704905610017 19669841528252790593070716736908131779276597652400348575771848510586757775959546738047964468508521533989044678518519608527 -35613042434538279612693088687007121375294759538525284151480712416479855084589328414372756965781242033206739341540434565345 -54395283328773697061562245921178021577369817175915744351482042640890919485397031773763669422211981196377459202217084356977 -28219077706875138457597460806660379260640059631307801018393953494350195313523278187138079380937456827669589568226031170718 35884116449273404444698288136788131834030229653398959030597574377784837675636371229369314291573973612525744122253682274033 1800446775262976035289944018144445300810930029492874702563867024782422136675722395287536367476434895753122371640566089769 36123611411180802892777524434718239367700912623868195289065665696626684802255818207220263750824955164746347612612248556209 -1] +] diff --git a/thirdparty/linux/ntl/src/LLL_FP.c b/thirdparty/linux/ntl/src/LLL_FP.c new file mode 100644 index 0000000000..013afcaab9 --- /dev/null +++ b/thirdparty/linux/ntl/src/LLL_FP.c @@ -0,0 +1,1692 @@ + +#include +#include +#include + + +#include + +NTL_START_IMPL + +static inline +void CheckFinite(double *p) +{ + if (!IsFinite(p)) ResourceError("LLL_FP: numbers too big...use LLL_XD"); +} + +static double InnerProduct(double *a, double *b, long n) +{ + double s; + long i; + + s = 0; + for (i = 1; i <= n; i++) + s += a[i]*b[i]; + + return s; +} + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + + } + else { + + for (i = 1; i <= n; i++) { + MulSubFrom(A(i), B(i), mu1); + } + + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + + +#define TR_BND (NTL_FDOUBLE_PRECISION/2.0) +// Just to be safe!! + +static double max_abs(double *v, long n) +{ + long i; + double res, t; + + res = 0; + + for (i = 1; i <= n; i++) { + t = fabs(v[i]); + if (t > res) res = t; + } + + return res; +} + + +static void RowTransformStart(double *a, long *in_a, long& in_float, long n) +{ + long i; + long inf = 1; + + for (i = 1; i <= n; i++) { + in_a[i] = (a[i] < TR_BND && a[i] > -TR_BND); + inf = inf & in_a[i]; + } + + in_float = inf; +} + + +static void RowTransformFinish(vec_ZZ& A, double *a, long *in_a) +{ + long n = A.length(); + long i; + + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i]); + } + else { + conv(a[i], A(i)); + CheckFinite(&a[i]); + } + } +} + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1, + double *a, double *b, long *in_a, + double& max_a, double max_b, long& in_float) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + double mu; + + conv(mu, MU1); + CheckFinite(&mu); + + long n = A.length(); + long i; + + if (in_float) { + double mu_abs = fabs(mu); + if (mu_abs > 0 && max_b > 0 && (mu_abs >= TR_BND || max_b >= TR_BND)) { + in_float = 0; + } + else { + max_a += mu_abs*max_b; + if (max_a >= TR_BND) + in_float = 0; + } + } + + if (in_float) { + if (mu == 1) { + for (i = 1; i <= n; i++) + a[i] -= b[i]; + + return; + } + + if (mu == -1) { + for (i = 1; i <= n; i++) + a[i] += b[i]; + + return; + } + + if (mu == 0) return; + + for (i = 1; i <= n; i++) + a[i] -= mu*b[i]; + + + return; + } + + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i] < TR_BND && a[i] > -TR_BND && + b[i] < TR_BND && b[i] > -TR_BND) { + + a[i] -= b[i]; + } + else { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + + sub(A(i), A(i), B(i)); + } + } + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i] < TR_BND && a[i] > -TR_BND && + b[i] < TR_BND && b[i] > -TR_BND) { + + a[i] += b[i]; + } + else { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + + add(A(i), A(i), B(i)); + } + } + return; + } + + if (MU == 0) return; + + double b_bnd = fabs(TR_BND/mu) - 1; + if (b_bnd < 0) b_bnd = 0; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i] < TR_BND && a[i] > -TR_BND && + b[i] < b_bnd && b[i] > -b_bnd) { + + a[i] -= b[i]*mu; + } + else { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + MulSubFrom(A(i), B(i), mu1); + } + } + } + } + else { + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i]); + in_a[i] = 0; + } + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + +static void RowTransform2(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x + y*MU + +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } +} + +static +void ComputeGS(mat_ZZ& B, double **B1, double **mu, double *b, + double *c, long k, double bound, long st, double *buf) + +{ + long n = B.NumCols(); + long i, j; + double s, t1, y, t; + + ZZ T1; + long test; + + double *mu_k = mu[k]; + + if (st < k) { + for (i = 1; i < st; i++) + buf[i] = mu_k[i]*c[i]; + } + + for (j = st; j <= k-1; j++) { + s = InnerProduct(B1[k], B1[j], n); + + // test = b[k]*b[j] >= NTL_FDOUBLE_PRECISION^2 + + test = (b[k]/NTL_FDOUBLE_PRECISION >= NTL_FDOUBLE_PRECISION/b[j]); + + // test = test && s^2 <= b[k]*b[j]/bound, + // but we compute it in a strange way to avoid overflow + + if (test && (y = fabs(s)) != 0) { + t = y/b[j]; + t1 = b[k]/y; + if (t <= 1) + test = (t*bound <= t1); + else if (t1 >= 1) + test = (t <= t1/bound); + else + test = 0; + } + + if (test) { + InnerProduct(T1, B(k), B(j)); + conv(s, T1); + } + + double *mu_j = mu[j]; + + t1 = 0; + for (i = 1; i <= j-1; i++) { + t1 += mu_j[i]*buf[i]; + } + + mu_k[j] = (buf[j] = (s - t1))/c[j]; + } + +#if (!NTL_EXT_DOUBLE) + + // Kahan summation + + double c1; + + s = c1 = 0; + for (j = 1; j <= k-1; j++) { + y = mu_k[j]*buf[j] - c1; + t = s+y; + c1 = t-s; + c1 = c1-y; + s = t; + } + + +#else + + s = 0; + for (j = 1; j <= k-1; j++) + s += mu_k[j]*buf[j]; + +#endif + + c[k] = b[k] - s; +} + +NTL_CHEAP_THREAD_LOCAL double LLLStatusInterval = 900.0; +NTL_CHEAP_THREAD_LOCAL char *LLLDumpFile = 0; + +static NTL_CHEAP_THREAD_LOCAL double red_fudge = 0; +static NTL_CHEAP_THREAD_LOCAL long log_red = 0; +static NTL_CHEAP_THREAD_LOCAL long verbose = 0; + +static NTL_CHEAP_THREAD_LOCAL unsigned long NumSwaps = 0; +static NTL_CHEAP_THREAD_LOCAL double RR_GS_time = 0; +static NTL_CHEAP_THREAD_LOCAL double StartTime = 0; +static NTL_CHEAP_THREAD_LOCAL double LastTime = 0; + + + +static void LLLStatus(long max_k, double t, long m, const mat_ZZ& B) +{ + cerr << "---- LLL_FP status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, t-StartTime); + cerr << ", stage: " << max_k; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = t; + +} + +static void init_red_fudge() +{ + long i; + + log_red = long(0.50*NTL_DOUBLE_PRECISION); + red_fudge = 1; + + for (i = log_red; i > 0; i--) + red_fudge = red_fudge*0.5; +} + +static void inc_red_fudge() +{ + + red_fudge = red_fudge * 2; + log_red--; + + + cerr << "LLL_FP: warning--relaxing reduction (" << log_red << ")\n"; + + if (log_red < 4) + ResourceError("LLL_FP: too much loss of precision...stop!"); +} + + +#if 0 + +static void print_mus(double **mu, long k) +{ + long i; + + for (i = k-1; i >= 1; i--) + cerr << mu[k][i] << " "; + cerr << "\n"; +} + +#endif + +void ComputeGS(const mat_ZZ& B, mat_RR& B1, + mat_RR& mu, vec_RR& b, + vec_RR& c, long k, const RR& bound, long st, + vec_RR& buf, const RR& bound2); + + + +static void RR_GS(mat_ZZ& B, double **B1, double **mu, + double *b, double *c, double *buf, long prec, + long rr_st, long k, long m_orig, + mat_RR& rr_B1, mat_RR& rr_mu, + vec_RR& rr_b, vec_RR& rr_c) +{ + double tt; + + cerr << "LLL_FP: RR refresh " << rr_st << "..." << k << "..."; + tt = GetTime(); + + if (rr_st > k) ResourceError("LLL_FP: can not continue!!!"); + + RRPush push; + RR::SetPrecision(prec); + + long n = B.NumCols(); + + rr_B1.SetDims(k, n); + rr_mu.SetDims(k, m_orig); + rr_b.SetLength(k); + rr_c.SetLength(k); + + vec_RR rr_buf; + rr_buf.SetLength(k); + + long i, j; + + for (i = rr_st; i <= k; i++) + for (j = 1; j <= n; j++) + conv(rr_B1(i, j), B(i, j)); + + for (i = rr_st; i <= k; i++) + InnerProduct(rr_b(i), rr_B1(i), rr_B1(i)); + + + + RR bound; + power2(bound, 2*long(0.15*RR::precision())); + + RR bound2; + power2(bound2, 2*RR::precision()); + + for (i = rr_st; i <= k; i++) + ComputeGS(B, rr_B1, rr_mu, rr_b, rr_c, i, bound, 1, rr_buf, bound2); + + for (i = rr_st; i <= k; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], rr_B1(i,j)); + CheckFinite(&B1[i][j]); + } + + for (i = rr_st; i <= k; i++) + for (j = 1; j <= i-1; j++) { + conv(mu[i][j], rr_mu(i,j)); + } + + for (i = rr_st; i <= k; i++) { + conv(b[i], rr_b(i)); + CheckFinite(&b[i]); + } + + + for (i = rr_st; i <= k; i++) { + conv(c[i], rr_c(i)); + CheckFinite(&c[i]); + } + + for (i = 1; i <= k-1; i++) { + conv(buf[i], rr_buf[i]); + } + + + tt = GetTime()-tt; + RR_GS_time += tt; + cerr << tt << " (" << RR_GS_time << ")\n"; +} + +void ComputeGS(const mat_ZZ& B, mat_RR& mu, vec_RR& c) +{ + long n = B.NumCols(); + long k = B.NumRows(); + + mat_RR B1; + vec_RR b; + + B1.SetDims(k, n); + mu.SetDims(k, k); + b.SetLength(k); + c.SetLength(k); + + vec_RR buf; + buf.SetLength(k); + + long i, j; + + for (i = 1; i <= k; i++) + for (j = 1; j <= n; j++) + conv(B1(i, j), B(i, j)); + + for (i = 1; i <= k; i++) + InnerProduct(b(i), B1(i), B1(i)); + + + + RR bound; + power2(bound, 2*long(0.15*RR::precision())); + + RR bound2; + power2(bound2, 2*RR::precision()); + + + for (i = 1; i <= k; i++) + ComputeGS(B, B1, mu, b, c, i, bound, 1, buf, bound2); + +} + + + + + +static +long ll_LLL_FP(mat_ZZ& B, mat_ZZ* U, double delta, long deep, + LLLCheckFct check, double **B1, double **mu, + double *b, double *c, + long m, long init_k, long &quit) +{ + long n = B.NumCols(); + + long i, j, k, Fc1; + ZZ MU; + double mu1; + + double t1; + ZZ T1; + double *tp; + + + static double bound = 0; + + if (bound == 0) { + // we tolerate a 15% loss of precision in computing + // inner products in ComputeGS. + + bound = 1; + for (i = 2*long(0.15*NTL_DOUBLE_PRECISION); i > 0; i--) + bound = bound * 2; + } + + double half_plus_fudge = 0.5 + red_fudge; + + quit = 0; + k = init_k; + + + vec_long st_mem; + st_mem.SetLength(m+2); + long *st = st_mem.elts(); + + for (i = 1; i < k; i++) + st[i] = i; + + for (i = k; i <= m+1; i++) + st[i] = 1; + + UniqueArray buf_store; + buf_store.SetLength(m+1); + double *buf = buf_store.get(); + + vec_long in_vec_mem; + in_vec_mem.SetLength(n+1); + long *in_vec = in_vec_mem.elts(); + + UniqueArray max_b_store; + max_b_store.SetLength(m+1); + double *max_b = max_b_store.get(); + + + for (i = 1; i <= m; i++) + max_b[i] = max_abs(B1[i], n); + + long in_float; + + long rst; + long counter; + long start_over; + + long trigger_index; + long small_trigger; + long cnt; + + mat_RR rr_B1; + mat_RR rr_mu; + vec_RR rr_c; + vec_RR rr_b; + + long m_orig = m; + + long rr_st = 1; + + long max_k = 0; + + long prec = RR::precision(); + + double tt; + + long swap_cnt = 0; + + + while (k <= m) { + + if (k > max_k) { + max_k = k; + swap_cnt = 0; + } + + if (verbose) { + tt = GetTime(); + + if (tt > LastTime + LLLStatusInterval) + LLLStatus(max_k, tt, m, B); + } + + if (k < rr_st) rr_st = k; + + if (st[k] == k) + rst = 1; + else + rst = k; + + if (st[k] < st[k+1]) st[k+1] = st[k]; + ComputeGS(B, B1, mu, b, c, k, bound, st[k], buf); + CheckFinite(&c[k]); + st[k] = k; + + if (swap_cnt > 200000) { + cerr << "LLL_FP: swap loop?\n"; + RR_GS(B, B1, mu, b, c, buf, prec, + rr_st, k, m_orig, rr_B1, rr_mu, rr_b, rr_c); + if (rr_st < st[k+1]) st[k+1] = rr_st; + rr_st = k+1; + rst = k; + swap_cnt = 0; + } + + counter = 0; + trigger_index = k; + small_trigger = 0; + cnt = 0; + + long thresh = 10; + long sz=0, new_sz; + + long did_rr_gs = 0; + + + do { + // size reduction + + counter++; + if ((counter & 127) == 0) { + + new_sz = 0; + for (j = 1; j <= n; j++) + new_sz += NumBits(B(k,j)); + + if ((counter >> 7) == 1 || new_sz < sz) { + sz = new_sz; + } + else { + cerr << "LLL_FP: warning--infinite loop?\n"; + } + } + + Fc1 = 0; + start_over = 0; + + for (j = rst-1; j >= 1; j--) { + t1 = fabs(mu[k][j]); + if (t1 > half_plus_fudge) { + + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > thresh) { + if (log_red <= 15) { + + while (log_red > 10) + inc_red_fudge(); + + half_plus_fudge = 0.5 + red_fudge; + + if (!did_rr_gs) { + RR_GS(B, B1, mu, b, c, buf, prec, + rr_st, k, m_orig, rr_B1, rr_mu, rr_b, rr_c); + if (rr_st < st[k+1]) st[k+1] = rr_st; + rr_st = k+1; + did_rr_gs = 1; + rst = k; + trigger_index = k; + small_trigger = 0; + start_over = 1; + break; + } + } + else { + inc_red_fudge(); + half_plus_fudge = 0.5 + red_fudge; + cnt = 0; + } + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + + Fc1 = 1; + if (k < rr_st) rr_st = k; + RowTransformStart(B1[k], in_vec, in_float, n); + } + + + mu1 = mu[k][j]; + if (mu1 >= 0) + mu1 = ceil(mu1-0.5); + else + mu1 = floor(mu1+0.5); + + double *mu_k = mu[k]; + double *mu_j = mu[j]; + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu_j[i]; + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + mu_k[i] += mu_j[i]; + } + else { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu1*mu_j[i]; + } + + mu_k[j] -= mu1; + + conv(MU, mu1); + + RowTransform(B(k), B(j), MU, B1[k], B1[j], in_vec, + max_b[k], max_b[j], in_float); + if (U) RowTransform((*U)(k), (*U)(j), MU); + } + } + + + if (Fc1) { + RowTransformFinish(B(k), B1[k], in_vec); + max_b[k] = max_abs(B1[k], n); + + if (!did_rr_gs) { + b[k] = InnerProduct(B1[k], B1[k], n); + CheckFinite(&b[k]); + + ComputeGS(B, B1, mu, b, c, k, bound, 1, buf); + CheckFinite(&c[k]); + } + else { + RR_GS(B, B1, mu, b, c, buf, prec, + rr_st, k, m_orig, rr_B1, rr_mu, rr_b, rr_c); + rr_st = k+1; + } + + rst = k; + } + } while (Fc1 || start_over); + + if (check && (*check)(B(k))) + quit = 1; + + if (b[k] == 0) { + for (i = k; i < m; i++) { + // swap i, i+1 + swap(B(i), B(i+1)); + tp = B1[i]; B1[i] = B1[i+1]; B1[i+1] = tp; + t1 = b[i]; b[i] = b[i+1]; b[i+1] = t1; + t1 = max_b[i]; max_b[i] = max_b[i+1]; max_b[i+1] = t1; + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = k; i <= m+1; i++) st[i] = 1; + if (k < rr_st) rr_st = k; + + m--; + if (quit) break; + continue; + } + + if (quit) break; + + if (deep > 0) { + // deep insertions + + double cc = b[k]; + long l = 1; + while (l <= k-1 && delta*c[l] <= cc) { + cc = cc - mu[k][l]*mu[k][l]*c[l]; + l++; + } + + if (l <= k-1 && (l <= deep || k-l <= deep)) { + // deep insertion at position l + + for (i = k; i > l; i--) { + // swap rows i, i-1 + swap(B(i), B(i-1)); + tp = B1[i]; B1[i] = B1[i-1]; B1[i-1] = tp; + tp = mu[i]; mu[i] = mu[i-1]; mu[i-1] = tp; + t1 = b[i]; b[i] = b[i-1]; b[i-1] = t1; + t1 = max_b[i]; max_b[i] = max_b[i-1]; max_b[i-1] = t1; + if (U) swap((*U)(i), (*U)(i-1)); + } + + k = l; + NumSwaps++; + swap_cnt++; + continue; + } + } // end deep insertions + + // test LLL reduction condition + + if (k > 1 && delta*c[k-1] > c[k] + mu[k][k-1]*mu[k][k-1]*c[k-1]) { + // swap rows k, k-1 + swap(B(k), B(k-1)); + tp = B1[k]; B1[k] = B1[k-1]; B1[k-1] = tp; + tp = mu[k]; mu[k] = mu[k-1]; mu[k-1] = tp; + t1 = b[k]; b[k] = b[k-1]; b[k-1] = t1; + t1 = max_b[k]; max_b[k] = max_b[k-1]; max_b[k-1] = t1; + if (U) swap((*U)(k), (*U)(k-1)); + + k--; + NumSwaps++; + swap_cnt++; + // cout << "-\n"; + } + else { + + k++; + // cout << "+\n"; + } + + } + + if (verbose) { + LLLStatus(m+1, GetTime(), m, B); + } + + + return m; +} + + + + + +static +long LLL_FP(mat_ZZ& B, mat_ZZ* U, double delta, long deep, + LLLCheckFct check) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long i, j; + long new_m, dep, quit; + ZZ MU; + + ZZ T1; + + init_red_fudge(); + + if (U) ident(*U, m); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+1, n+1); + double **B1 = B1_store.get(); // approximates B + + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+1, m+1); + double **mu = mu_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+1); + double *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + UniqueArray b_store; + b_store.SetLength(m+1); + double *b = b_store.get(); // squared lengths of basis vectors + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + for (i = 1; i <= m; i++) { + b[i] = InnerProduct(B1[i], B1[i], n); + CheckFinite(&b[i]); + } + + new_m = ll_LLL_FP(B, U, delta, deep, check, B1, mu, b, c, m, 1, quit); + dep = m - new_m; + m = new_m; + + if (dep > 0) { + // for consistency, we move all of the zero rows to the front + + for (i = 0; i < m; i++) { + swap(B(m+dep-i), B(m-i)); + if (U) swap((*U)(m+dep-i), (*U)(m-i)); + } + } + + return m; +} + + + +long LLL_FP(mat_ZZ& B, double delta, long deep, LLLCheckFct check, + long verb) +{ + verbose = verb; + RR_GS_time = 0; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("LLL_FP: bad delta"); + if (deep < 0) LogicError("LLL_FP: bad deep"); + return LLL_FP(B, 0, delta, deep, check); +} + +long LLL_FP(mat_ZZ& B, mat_ZZ& U, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + RR_GS_time = 0; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("LLL_FP: bad delta"); + if (deep < 0) LogicError("LLL_FP: bad deep"); + return LLL_FP(B, &U, delta, deep, check); +} + + + +static vec_double BKZConstant; + +static +void ComputeBKZConstant(long beta, long p) +{ + const double c_PI = 3.14159265358979323846264338328; + const double LogPI = 1.14472988584940017414342735135; + + BKZConstant.SetLength(beta-1); + + vec_double Log; + Log.SetLength(beta); + + + long i, j, k; + double x, y; + + for (j = 1; j <= beta; j++) + Log(j) = log(double(j)); + + for (i = 1; i <= beta-1; i++) { + // First, we compute x = gamma(i/2)^{2/i} + + k = i/2; + + if ((i & 1) == 0) { // i even + x = 0; + for (j = 1; j <= k; j++) + x = x + Log(j); + + x = x * (1/double(k)); + + x = exp(x); + } + else { // i odd + x = 0; + for (j = k + 2; j <= 2*k + 2; j++) + x = x + Log(j); + + x = 0.5*LogPI + x - 2*(k+1)*Log(2); + + x = x * (2.0/double(i)); + + x = exp(x); + } + + // Second, we compute y = 2^{2*p/i} + + y = -(2*p/double(i))*Log(2); + y = exp(y); + + BKZConstant(i) = x*y/c_PI; + } +} + +static vec_double BKZThresh; + +static +void ComputeBKZThresh(double *c, long beta) +{ + BKZThresh.SetLength(beta-1); + + long i; + double x; + + x = 0; + + for (i = 1; i <= beta-1; i++) { + x += log(c[i-1]); + BKZThresh(i) = exp(x/double(i))*BKZConstant(i); + if (!IsFinite(&BKZThresh(i))) BKZThresh(i) = 0; + } +} + +static +void BKZStatus(double tt, double enum_time, unsigned long NumIterations, + unsigned long NumTrivial, unsigned long NumNonTrivial, + unsigned long NumNoOps, long m, + const mat_ZZ& B) +{ + cerr << "---- BKZ_FP status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, tt-StartTime); + cerr << ", enum time: "; + PrintTime(cerr, enum_time); + cerr << ", iter: " << NumIterations << "\n"; + cerr << "triv: " << NumTrivial; + cerr << ", nontriv: " << NumNonTrivial; + cerr << ", no ops: " << NumNoOps; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = tt; + +} + + + +static +long BKZ_FP(mat_ZZ& BB, mat_ZZ* UU, double delta, + long beta, long prune, LLLCheckFct check) +{ + + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + double t1; + ZZ T1; + double *tp; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+2, n+1); + double **B1 = B1_store.get(); // approximates B + + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+2, m+1); + double **mu = mu_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+2); + double *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + UniqueArray b_store; + b_store.SetLength(m+2); + double *b = b_store.get(); // squared lengths of basis vectors + + double cbar; + + + UniqueArray ctilda_store; + ctilda_store.SetLength(m+2); + double *ctilda = ctilda_store.get(); + + + UniqueArray vvec_store; + vvec_store.SetLength(m+2); + double *vvec = vvec_store.get(); + + UniqueArray yvec_store; + yvec_store.SetLength(m+2); + double *yvec = yvec_store.get(); + + UniqueArray uvec_store; + uvec_store.SetLength(m+2); + double *uvec = uvec_store.get(); + + UniqueArray utildavec_store; + utildavec_store.SetLength(m+2); + double *utildavec = utildavec_store.get(); + + UniqueArray Deltavec_store; + Deltavec_store.SetLength(m+2); + long *Deltavec = Deltavec_store.get(); + + UniqueArray deltavec_store; + deltavec_store.SetLength(m+2); + long *deltavec = deltavec_store.get();; + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + double eta; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + for (i = 1; i <= m; i++) { + b[i] = InnerProduct(B1[i], B1[i], n); + CheckFinite(&b[i]); + } + + + + m = ll_LLL_FP(B, U, delta, 0, check, B1, mu, b, c, m, 1, quit); + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + long clean = 1; + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + if (!quit && m > 1) { + if (beta > m) beta = m; + + if (prune > 0) + ComputeBKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + + if (prune > 0) + ComputeBKZThresh(&c[jj], kk-jj+1); + + + cbar = c[jj]; + utildavec[jj] = uvec[jj] = 1; + + yvec[jj] = vvec[jj] = 0; + Deltavec[jj] = 0; + + + s = t = jj; + deltavec[jj] = 1; + + for (i = jj+1; i <= kk+1; i++) { + ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = 0; + Deltavec[i] = 0; + vvec[i] = 0; + deltavec[i] = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + ctilda[t] = ctilda[t+1] + + (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*c[t]; + + ForceToMem(&ctilda[t]); // prevents an infinite loop + + if (prune > 0 && t > jj) { + eta = BKZThresh(t-jj); + } + else + eta = 0; + + if (ctilda[t] < cbar - eta) { + if (t > jj) { + t--; + t1 = 0; + for (i = t+1; i <= s; i++) + t1 += utildavec[i]*mu[i][t]; + yvec[t] = t1; + t1 = -t1; + if (t1 >= 0) + t1 = ceil(t1-0.5); + else + t1 = floor(t1+0.5); + utildavec[t] = vvec[t] = t1; + Deltavec[t] = 0; + if (utildavec[t] > -yvec[t]) + deltavec[t] = -1; + else + deltavec[t] = 1; + } + else { + cbar = ctilda[jj]; + for (i = jj; i <= kk; i++) { + uvec[i] = utildavec[i]; + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec[t] = -Deltavec[t]; + if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t]; + utildavec[t] = vvec[t] + Deltavec[t]; + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + if ((delta - 8*red_fudge)*c[jj] > cbar) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec[i] != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("BKZ_FP: internal error"); + + if (s > 0) { + // special case + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + // cerr << "special case\n"; + new_m = ll_LLL_FP(B, U, delta, 0, check, + B1, mu, b, c, h, jj, quit); + if (new_m != h) LogicError("BKZ_FP: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec[i] == 0) continue; + conv(MU, uvec[i]); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + for (i = 1; i <= n; i++) { + conv(B1[jj][i], B(jj, i)); + CheckFinite(&B1[jj][i]); + } + + b[jj] = InnerProduct(B1[jj], B1[jj], n); + CheckFinite(&b[jj]); + + if (b[jj] == 0) LogicError("BKZ_FP: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_LLL_FP(B, U, delta, 0, 0, B1, mu, b, c, kk+1, jj, quit); + + if (new_m != kk) LogicError("BKZ_FP: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_LLL_FP(B, U, delta, 0, check, + B1, mu, b, c, h, h, quit); + + if (new_m != h) LogicError("BKZ_FP: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // LLL_FP + // cerr << "progress\n"; + + NumNoOps++; + + if (!clean) { + new_m = + ll_LLL_FP(B, U, delta, 0, check, B1, mu, b, c, h, h, quit); + if (new_m != h) LogicError("BKZ_FP: internal error"); + if (quit) break; + } + + z++; + } + } + } + + + if (verb) { + BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + // clean up + + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long BKZ_FP(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + RR_GS_time = 0; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_FP: bad delta"); + if (beta < 2) LogicError("BKZ_FP: bad block size"); + + return BKZ_FP(BB, &UU, delta, beta, prune, check); +} + +long BKZ_FP(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + RR_GS_time = 0; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_FP: bad delta"); + if (beta < 2) LogicError("BKZ_FP: bad block size"); + + return BKZ_FP(BB, 0, delta, beta, prune, check); +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/LLL_QP.c b/thirdparty/linux/ntl/src/LLL_QP.c new file mode 100644 index 0000000000..c6350d3562 --- /dev/null +++ b/thirdparty/linux/ntl/src/LLL_QP.c @@ -0,0 +1,1994 @@ + +#include +#include +#include + +#include + +NTL_START_IMPL + +static inline +void CheckFinite(double *p) +{ + if (!IsFinite(p)) ResourceError("LLL_QP: numbers too big...use LLL_XD"); +} + + +static inline +void CheckFinite(quad_float *p) +{ + if (!IsFinite(p)) ResourceError("LLL_QP: numbers too big...use LLL_XD"); +} + + + +static quad_float InnerProduct(quad_float *a, quad_float *b, long n) +{ + quad_float s; + long i; + + s = 0; + for (i = 1; i <= n; i++) + s += a[i]*b[i]; + + return s; +} + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + + } + else { + + for (i = 1; i <= n; i++) { + MulSubFrom(A(i), B(i), mu1); + } + + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + + + +#define TR_BND (NTL_FDOUBLE_PRECISION/2.0) +// Just to be safe!! + +static double max_abs(quad_float *v, long n) +{ + long i; + double res, t; + + res = 0; + + for (i = 1; i <= n; i++) { + t = fabs(v[i].hi); + if (t > res) res = t; + } + + return res; +} + + +static void RowTransformStart(quad_float *a, long *in_a, long& in_float, long n) +{ + long i; + long inf = 1; + + for (i = 1; i <= n; i++) { + in_a[i] = (a[i].hi < TR_BND && a[i].hi > -TR_BND); + inf = inf & in_a[i]; + } + + in_float = inf; +} + + +static void RowTransformFinish(vec_ZZ& A, quad_float *a, long *in_a) +{ + long n = A.length(); + long i; + + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i].hi); + } + else { + conv(a[i], A(i)); + CheckFinite(&a[i]); + } + } +} + + + + + + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1, + quad_float *a, quad_float *b, long *in_a, + double& max_a, double max_b, long& in_float) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + double mu; + + + long n = A.length(); + long i; + + conv(mu, MU1); + CheckFinite(&mu); + + if (in_float) { + double mu_abs = fabs(mu); + if (mu_abs > 0 && max_b > 0 && (mu_abs >= TR_BND || max_b >= TR_BND)) { + in_float = 0; + } + else { + max_a += mu_abs*max_b; + if (max_a >= TR_BND) + in_float = 0; + } + } + + if (in_float) { + if (mu == 1) { + for (i = 1; i <= n; i++) + a[i].hi -= b[i].hi; + + return; + } + + if (mu == -1) { + for (i = 1; i <= n; i++) + a[i].hi += b[i].hi; + + return; + } + + if (mu == 0) return; + + for (i = 1; i <= n; i++) + a[i].hi -= mu*b[i].hi; + + + return; + } + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i].hi < TR_BND && a[i].hi > -TR_BND && + b[i].hi < TR_BND && b[i].hi > -TR_BND) { + + a[i].hi -= b[i].hi; + } + else { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + + sub(A(i), A(i), B(i)); + } + } + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i].hi < TR_BND && a[i].hi > -TR_BND && + b[i].hi < TR_BND && b[i].hi > -TR_BND) { + + a[i].hi += b[i].hi; + } + else { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + + add(A(i), A(i), B(i)); + } + } + + return; + } + + if (MU == 0) return; + + double b_bnd = fabs(TR_BND/mu) - 1; + if (b_bnd < 0) b_bnd = 0; + + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + if (in_a[i] && a[i].hi < TR_BND && a[i].hi > -TR_BND && + b[i].hi < b_bnd && b[i].hi > -b_bnd) { + + a[i].hi -= b[i].hi*mu; + } + else { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + MulSubFrom(A(i), B(i), mu1); + } + } + } + } + else { + for (i = 1; i <= n; i++) { + if (in_a[i]) { + conv(A(i), a[i].hi); + in_a[i] = 0; + } + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + +static void RowTransform2(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x + y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } +} + +static +void ComputeGS(mat_ZZ& B, quad_float **B1, quad_float **mu, quad_float *b, + quad_float *c, long k, double bound, long st, quad_float *buf) +{ + long n = B.NumCols(); + long i, j; + quad_float s, t1, y, t; + ZZ T1; + long test; + + quad_float *mu_k = mu[k]; + + if (st < k) { + for (i = 1; i < st; i++) + buf[i] = mu_k[i]*c[i]; + } + + for (j = st; j <= k-1; j++) { + if (b[k].hi/NTL_FDOUBLE_PRECISION < NTL_FDOUBLE_PRECISION/b[j].hi) { + // we can compute inner product exactly in double precision + + double z = 0; + quad_float *B1_k = B1[k]; + quad_float *B1_j = B1[j]; + + for (i = 1; i <= n; i++) + z += B1_k[i].hi * B1_j[i].hi; + + s = z; + } + else { + s = InnerProduct(B1[k], B1[j], n); + + y = fabs(s); + if (y.hi == 0) + test = (b[k].hi != 0); + else { + double t = y.hi/b[j].hi; + double t1 = b[k].hi/y.hi; + if (t <= 1) + test = (t*bound <= t1); + else if (t1 >= 1) + test = (t <= t1/bound); + else + test = 0; + } + + if (test) { + InnerProduct(T1, B(k), B(j)); + conv(s, T1); + } + } + + + quad_float *mu_j = mu[j]; + + t1 = 0; + for (i = 1; i <= j-1; i++) + t1 += mu_j[i]*buf[i]; + + mu_k[j] = (buf[j] = (s - t1))/c[j]; + } + + s = 0; + for (j = 1; j <= k-1; j++) + s += mu_k[j]*buf[j]; + + c[k] = b[k] - s; +} + +NTL_TLS_GLOBAL_DECL_INIT(quad_float, red_fudge, (to_quad_float(0))) + +static NTL_CHEAP_THREAD_LOCAL long log_red = 0; +static NTL_CHEAP_THREAD_LOCAL long verbose = 0; +static NTL_CHEAP_THREAD_LOCAL unsigned long NumSwaps = 0; +static NTL_CHEAP_THREAD_LOCAL double StartTime = 0; +static NTL_CHEAP_THREAD_LOCAL double LastTime = 0; + + +static void LLLStatus(long max_k, double t, long m, const mat_ZZ& B) +{ + cerr << "---- LLL_QP status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, t-StartTime); + cerr << ", stage: " << max_k; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = t; + +} + + +static void init_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long i; + + // initial log_red should be <= NTL_DOUBLE_PRECISION-2, + // to help ensure stability in BKZ_QP1 + + log_red = NTL_DOUBLE_PRECISION-2; + + red_fudge = 1; + + for (i = log_red; i > 0; i--) + red_fudge = red_fudge*0.5; +} + +static void inc_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + + red_fudge = red_fudge * 2; + log_red--; + + cerr << "LLL_QP: warning--relaxing reduction (" << log_red << ")\n"; + + if (log_red < 4) + ResourceError("LLL_QP: too much loss of precision...stop!"); +} + + +static +long ll_LLL_QP(mat_ZZ& B, mat_ZZ* U, quad_float delta, long deep, + LLLCheckFct check, quad_float **B1, quad_float **mu, + quad_float *b, quad_float *c, + long m, long init_k, long &quit) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long n = B.NumCols(); + + long i, j, k, Fc1; + ZZ MU; + quad_float mu1; + + quad_float t1; + double dt1; + ZZ T1; + quad_float *tp; + + + static NTL_CHEAP_THREAD_LOCAL double bound = 0; + + + if (bound == 0) { + // we tolerate a 15% loss of precision in computing + // inner products in ComputeGS. + + bound = 1; + for (i = 2*long(0.15*2*NTL_DOUBLE_PRECISION); i > 0; i--) { + bound = bound * 2; + } + } + + + quad_float half = to_quad_float(0.5); + quad_float half_plus_fudge = 0.5 + red_fudge; + + quit = 0; + k = init_k; + + vec_long st_mem; + st_mem.SetLength(m+2); + long *st = st_mem.elts(); + + for (i = 1; i < k; i++) + st[i] = i; + + for (i = k; i <= m+1; i++) + st[i] = 1; + + UniqueArray buf_store; + buf_store.SetLength(m+1); + quad_float *buf = buf_store.get(); + + vec_long in_vec_mem; + in_vec_mem.SetLength(n+1); + long *in_vec = in_vec_mem.elts(); + + UniqueArray max_b_store; + max_b_store.SetLength(m+1); + double *max_b = max_b_store.get(); + + for (i = 1; i <= m; i++) + max_b[i] = max_abs(B1[i], n); + + long in_float; + + + long rst; + long counter; + + long trigger_index; + long small_trigger; + long cnt; + + long max_k = 0; + + double tt; + + while (k <= m) { + + if (k > max_k) { + max_k = k; + } + + if (verbose) { + tt = GetTime(); + + if (tt > LastTime + LLLStatusInterval) + LLLStatus(max_k, tt, m, B); + } + + + if (st[k] == k) + rst = 1; + else + rst = k; + + if (st[k] < st[k+1]) st[k+1] = st[k]; + ComputeGS(B, B1, mu, b, c, k, bound, st[k], buf); + CheckFinite(&c[k]); + st[k] = k; + + counter = 0; + trigger_index = k; + small_trigger = 0; + cnt = 0; + + do { + // size reduction + + counter++; + if (counter > 10000) { + cerr << "LLL_QP: warning--possible infinite loop\n"; + counter = 0; + } + + + Fc1 = 0; + + for (j = rst-1; j >= 1; j--) { + t1 = fabs(mu[k][j]); + if (t1 > half_plus_fudge) { + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > 10) { + inc_red_fudge(); + half_plus_fudge = 0.5 + red_fudge; + cnt = 0; + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + + Fc1 = 1; + RowTransformStart(B1[k], in_vec, in_float, n); + } + + + + mu1 = mu[k][j]; + if (mu1 >= 0) + mu1 = ceil(mu1-half); + else + mu1 = floor(mu1+half); + + + quad_float *mu_k = mu[k]; + quad_float *mu_j = mu[j]; + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu_j[i]; + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + mu_k[i] += mu_j[i]; + } + else { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu1*mu_j[i]; + } + + // cout << j << " " << mu[k][j] << " " << mu1 << "\n"; + + mu_k[j] -= mu1; + + conv(MU, mu1); + + + RowTransform(B(k), B(j), MU, B1[k], B1[j], in_vec, + max_b[k], max_b[j], in_float); + + if (U) RowTransform((*U)(k), (*U)(j), MU); + } + } + + if (Fc1) { + RowTransformFinish(B(k), B1[k], in_vec); + max_b[k] = max_abs(B1[k], n); + + b[k] = InnerProduct(B1[k], B1[k], n); + CheckFinite(&b[k]); + + ComputeGS(B, B1, mu, b, c, k, bound, 1, buf); + CheckFinite(&c[k]); + + } + } while (Fc1); + + if (check && (*check)(B(k))) + quit = 1; + + if (b[k] == 0) { + for (i = k; i < m; i++) { + // swap i, i+1 + swap(B(i), B(i+1)); + tp = B1[i]; B1[i] = B1[i+1]; B1[i+1] = tp; + t1 = b[i]; b[i] = b[i+1]; b[i+1] = t1; + dt1 = max_b[i]; max_b[i] = max_b[i+1]; max_b[i+1] = dt1; + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = k; i <= m+1; i++) st[i] = 1; + + m--; + if (quit) break; + continue; + } + + if (quit) break; + + if (deep > 0) { + // deep insertions + + quad_float cc = b[k]; + long l = 1; + while (l <= k-1 && delta*c[l] <= cc) { + cc = cc - mu[k][l]*mu[k][l]*c[l]; + l++; + } + + if (l <= k-1 && (l <= deep || k-l <= deep)) { + // deep insertion at position l + + for (i = k; i > l; i--) { + // swap rows i, i-1 + swap(B(i), B(i-1)); + tp = B1[i]; B1[i] = B1[i-1]; B1[i-1] = tp; + tp = mu[i]; mu[i] = mu[i-1]; mu[i-1] = tp; + t1 = b[i]; b[i] = b[i-1]; b[i-1] = t1; + dt1 = max_b[i]; max_b[i] = max_b[i-1]; max_b[i-1] = dt1; + if (U) swap((*U)(i), (*U)(i-1)); + } + + k = l; + NumSwaps++; + continue; + } + } // end deep insertions + + // test LLL reduction condition + + if (k > 1 && delta*c[k-1] > c[k] + mu[k][k-1]*mu[k][k-1]*c[k-1]) { + // swap rows k, k-1 + swap(B(k), B(k-1)); + tp = B1[k]; B1[k] = B1[k-1]; B1[k-1] = tp; + tp = mu[k]; mu[k] = mu[k-1]; mu[k-1] = tp; + t1 = b[k]; b[k] = b[k-1]; b[k-1] = t1; + dt1 = max_b[k]; max_b[k] = max_b[k-1]; max_b[k-1] = dt1; + if (U) swap((*U)(k), (*U)(k-1)); + + k--; + NumSwaps++; + // cout << "- " << k << "\n"; + } + else { + k++; + // cout << "+ " << k << "\n"; + } + } + + if (verbose) { + LLLStatus(m+1, GetTime(), m, B); + } + + return m; +} + +static +long LLL_QP(mat_ZZ& B, mat_ZZ* U, quad_float delta, long deep, + LLLCheckFct check) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long i, j; + long new_m, dep, quit; + quad_float s; + ZZ MU; + quad_float mu1; + + quad_float t1; + ZZ T1; + + init_red_fudge(); + + if (U) ident(*U, m); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+1, n+1); + quad_float **B1 = B1_store.get(); // approximates B + + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+1, m+1); + quad_float **mu = mu_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+1); + quad_float *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + UniqueArray b_store; + b_store.SetLength(m+1); + quad_float *b = b_store.get(); // squared lengths of basis vectors + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + + for (i = 1; i <= m; i++) { + b[i] = InnerProduct(B1[i], B1[i], n); + CheckFinite(&b[i]); + } + + + new_m = ll_LLL_QP(B, U, delta, deep, check, B1, mu, b, c, m, 1, quit); + dep = m - new_m; + m = new_m; + + if (dep > 0) { + // for consistency, we move all of the zero rows to the front + + for (i = 0; i < m; i++) { + swap(B(m+dep-i), B(m-i)); + if (U) swap((*U)(m+dep-i), (*U)(m-i)); + } + } + + return m; +} + + + +long LLL_QP(mat_ZZ& B, double delta, long deep, LLLCheckFct check, + long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("LLL_QP: bad delta"); + if (deep < 0) LogicError("LLL_QP: bad deep"); + return LLL_QP(B, 0, to_quad_float(delta), deep, check); +} + +long LLL_QP(mat_ZZ& B, mat_ZZ& U, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("LLL_QP: bad delta"); + if (deep < 0) LogicError("LLL_QP: bad deep"); + return LLL_QP(B, &U, to_quad_float(delta), deep, check); +} + + + +NTL_TLS_GLOBAL_DECL(vec_quad_float, BKZConstant) + +static +void ComputeBKZConstant(long beta, long p) +{ + NTL_TLS_GLOBAL_ACCESS(BKZConstant); + + + const quad_float c_PI = + to_quad_float("3.141592653589793238462643383279502884197"); + const quad_float LogPI = + to_quad_float("1.144729885849400174143427351353058711647"); + + BKZConstant.SetLength(beta-1); + + vec_quad_float Log; + Log.SetLength(beta); + + + long i, j, k; + quad_float x, y; + + for (j = 1; j <= beta; j++) + Log(j) = log(to_quad_float(j)); + + for (i = 1; i <= beta-1; i++) { + // First, we compute x = gamma(i/2)^{2/i} + + k = i/2; + + if ((i & 1) == 0) { // i even + x = 0; + for (j = 1; j <= k; j++) + x = x + Log(j); + + x = x * (1/to_quad_float(k)); + + x = exp(x); + } + else { // i odd + x = 0; + for (j = k + 2; j <= 2*k + 2; j++) + x = x + Log(j); + + x = 0.5*LogPI + x - 2*(k+1)*Log(2); + + x = x * (2.0/to_quad_float(i)); + + x = exp(x); + } + + // Second, we compute y = 2^{2*p/i} + + y = -(2*p/to_quad_float(i))*Log(2); + y = exp(y); + + BKZConstant(i) = x*y/c_PI; + } +} + + +NTL_TLS_GLOBAL_DECL(vec_quad_float, BKZThresh) + +static +void ComputeBKZThresh(quad_float *c, long beta) +{ + NTL_TLS_GLOBAL_ACCESS(BKZConstant); + NTL_TLS_GLOBAL_ACCESS(BKZThresh); + + BKZThresh.SetLength(beta-1); + + long i; + quad_float x; + + x = 0; + + for (i = 1; i <= beta-1; i++) { + x += log(c[i-1]); + BKZThresh(i) = exp(x/to_quad_float(i))*BKZConstant(i); + if (!IsFinite(&BKZThresh(i))) BKZThresh(i) = 0; + } +} + + +static +void BKZStatus(double tt, double enum_time, unsigned long NumIterations, + unsigned long NumTrivial, unsigned long NumNonTrivial, + unsigned long NumNoOps, long m, + const mat_ZZ& B) +{ + cerr << "---- BKZ_QP status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, tt-StartTime); + cerr << ", enum time: "; + PrintTime(cerr, enum_time); + cerr << ", iter: " << NumIterations << "\n"; + cerr << "triv: " << NumTrivial; + cerr << ", nontriv: " << NumNonTrivial; + cerr << ", no ops: " << NumNoOps; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = tt; + +} + + +static +long BKZ_QP(mat_ZZ& BB, mat_ZZ* UU, quad_float delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + NTL_TLS_GLOBAL_ACCESS(BKZThresh); + + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + quad_float t1; + ZZ T1; + quad_float *tp; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+2, n+1); + quad_float **B1 = B1_store.get(); // approximates B + + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+2, m+1); + quad_float **mu = mu_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+2); + quad_float *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + UniqueArray b_store; + b_store.SetLength(m+2); + quad_float *b = b_store.get(); // squared lengths of basis vectors + + quad_float cbar; + + + UniqueArray ctilda_store; + ctilda_store.SetLength(m+2); + quad_float *ctilda = ctilda_store.get(); + + + UniqueArray vvec_store; + vvec_store.SetLength(m+2); + quad_float *vvec = vvec_store.get(); + + UniqueArray yvec_store; + yvec_store.SetLength(m+2); + quad_float *yvec = yvec_store.get(); + + UniqueArray uvec_store; + uvec_store.SetLength(m+2); + quad_float *uvec = uvec_store.get(); + + UniqueArray utildavec_store; + utildavec_store.SetLength(m+2); + quad_float *utildavec = utildavec_store.get(); + + UniqueArray Deltavec_store; + Deltavec_store.SetLength(m+2); + long *Deltavec = Deltavec_store.get(); + + UniqueArray deltavec_store; + deltavec_store.SetLength(m+2); + long *deltavec = deltavec_store.get();; + + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + quad_float eta; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + for (i = 1; i <= m; i++) { + b[i] = InnerProduct(B1[i], B1[i], n); + CheckFinite(&b[i]); + } + + // cerr << "\n"; + // cerr << "first LLL\n"; + + + m = ll_LLL_QP(B, U, delta, 0, check, B1, mu, b, c, m, 1, quit); + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + long clean = 1; + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + if (!quit && m > 1) { + // cerr << "continuing\n"; + if (beta > m) beta = m; + + if (prune > 0) + ComputeBKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + + + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + + if (prune > 0) + ComputeBKZThresh(&c[jj], kk-jj+1); + + + cbar = c[jj]; + utildavec[jj] = uvec[jj] = 1; + + yvec[jj] = vvec[jj] = 0; + Deltavec[jj] = 0; + + + s = t = jj; + deltavec[jj] = 1; + + for (i = jj+1; i <= kk+1; i++) { + ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = 0; + Deltavec[i] = 0; + vvec[i] = 0; + deltavec[i] = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + ctilda[t] = ctilda[t+1] + + (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*c[t]; + + if (prune > 0 && t > jj) { + eta = BKZThresh(t-jj); + } + else + eta = 0; + + if (ctilda[t] < cbar - eta) { + if (t > jj) { + t--; + t1 = 0; + for (i = t+1; i <= s; i++) { + t1 += utildavec[i]*mu[i][t]; + } + + + yvec[t] = t1; + t1 = -t1; + if (t1 >= 0) + t1 = ceil(t1-0.5); + else + t1 = floor(t1+0.5); + + utildavec[t] = vvec[t] = t1; + Deltavec[t] = 0; + if (utildavec[t] > -yvec[t]) + deltavec[t] = -1; + else + deltavec[t] = 1; + } + else { + cbar = ctilda[jj]; + for (i = jj; i <= kk; i++) { + uvec[i] = utildavec[i]; + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec[t] = -Deltavec[t]; + if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t]; + utildavec[t] = vvec[t] + Deltavec[t]; + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + if ((delta-8*red_fudge)*c[jj] > cbar) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec[i] != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("BKZ_QP: internal error"); + + if (s > 0) { + // special case + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + // cerr << "special case\n"; + new_m = ll_LLL_QP(B, U, delta, 0, check, + B1, mu, b, c, h, jj, quit); + if (new_m != h) LogicError("BKZ_QP: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec[i] == 0) continue; + conv(MU, uvec[i]); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + for (i = 1; i <= n; i++) { + conv(B1[jj][i], B(jj, i)); + CheckFinite(&B1[jj][i]); + } + + b[jj] = InnerProduct(B1[jj], B1[jj], n); + CheckFinite(&b[jj]); + + if (b[jj] == 0) LogicError("BKZ_QP: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_LLL_QP(B, U, delta, 0, 0, B1, mu, b, c, kk+1, jj, quit); + + if (new_m != kk) LogicError("BKZ_QP: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_LLL_QP(B, U, delta, 0, check, + B1, mu, b, c, h, h, quit); + + if (new_m != h) LogicError("BKZ_QP: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // LLL_QP + // cerr << "progress\n"; + + NumNoOps++; + + + if (!clean) { + new_m = + ll_LLL_QP(B, U, delta, 0, check, B1, mu, b, c, h, h, quit); + if (new_m != h) LogicError("BKZ_QP: internal error"); + if (quit) break; + } + + z++; + } + } + } + + + if (verb) { + BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + + // clean up + + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long BKZ_QP(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_QP: bad delta"); + if (beta < 2) LogicError("BKZ_QP: bad block size"); + + return BKZ_QP(BB, &UU, to_quad_float(delta), beta, prune, check); +} + +long BKZ_QP(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_QP: bad delta"); + if (beta < 2) LogicError("BKZ_QP: bad block size"); + + return BKZ_QP(BB, 0, to_quad_float(delta), beta, prune, check); +} + +static +long BKZ_QP1(mat_ZZ& BB, mat_ZZ* UU, quad_float delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + NTL_TLS_GLOBAL_ACCESS(BKZThresh); + + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + ZZ T1; + quad_float *tp; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+2, n+1); + quad_float **B1 = B1_store.get(); // approximates B + + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+2, m+1); + quad_float **mu = mu_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+2); + quad_float *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + UniqueArray b_store; + b_store.SetLength(m+2); + quad_float *b = b_store.get(); // squared lengths of basis vectors + + double cbar; + + + UniqueArray ctilda_store; + ctilda_store.SetLength(m+2); + double *ctilda = ctilda_store.get(); + + + UniqueArray vvec_store; + vvec_store.SetLength(m+2); + double *vvec = vvec_store.get(); + + UniqueArray yvec_store; + yvec_store.SetLength(m+2); + double *yvec = yvec_store.get(); + + UniqueArray uvec_store; + uvec_store.SetLength(m+2); + double *uvec = uvec_store.get(); + + UniqueArray utildavec_store; + utildavec_store.SetLength(m+2); + double *utildavec = utildavec_store.get(); + + UniqueArray Deltavec_store; + Deltavec_store.SetLength(m+2); + long *Deltavec = Deltavec_store.get(); + + UniqueArray deltavec_store; + deltavec_store.SetLength(m+2); + long *deltavec = deltavec_store.get();; + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + + double eta; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) { + conv(B1[i][j], B(i, j)); + CheckFinite(&B1[i][j]); + } + + + for (i = 1; i <= m; i++) { + b[i] = InnerProduct(B1[i], B1[i], n); + CheckFinite(&b[i]); + } + + // cerr << "\n"; + // cerr << "first LLL\n"; + + + m = ll_LLL_QP(B, U, delta, 0, check, B1, mu, b, c, m, 1, quit); + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + long clean = 1; + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + if (!quit && m > 1) { + // cerr << "continuing\n"; + if (beta > m) beta = m; + + if (prune > 0) + ComputeBKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + + + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + + if (prune > 0) + ComputeBKZThresh(&c[jj], kk-jj+1); + + + cbar = to_double(c[jj]); + utildavec[jj] = uvec[jj] = 1; + + yvec[jj] = vvec[jj] = 0; + Deltavec[jj] = 0; + + + s = t = jj; + deltavec[jj] = 1; + + for (i = jj+1; i <= kk+1; i++) { + ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = 0; + Deltavec[i] = 0; + vvec[i] = 0; + deltavec[i] = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + ctilda[t] = ctilda[t+1] + + (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*to_double(c[t]); + + ForceToMem(&ctilda[t]); // prevents an infinite loop + + if (prune > 0 && t > jj) { + eta = to_double(BKZThresh(t-jj)); + } + else + eta = 0; + + if (ctilda[t] < cbar - eta) { + if (t > jj) { + double t1; + + t--; + t1 = 0; + for (i = t+1; i <= s; i++) { + t1 += utildavec[i]*to_double(mu[i][t]); + } + + + yvec[t] = t1; + t1 = -t1; + if (t1 >= 0) + t1 = ceil(t1-0.5); + else + t1 = floor(t1+0.5); + + utildavec[t] = vvec[t] = t1; + Deltavec[t] = 0; + if (utildavec[t] > -yvec[t]) + deltavec[t] = -1; + else + deltavec[t] = 1; + } + else { + cbar = ctilda[jj]; + for (i = jj; i <= kk; i++) { + uvec[i] = utildavec[i]; + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec[t] = -Deltavec[t]; + if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t]; + utildavec[t] = vvec[t] + Deltavec[t]; + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + quad_float t1; + + if ((delta-8*red_fudge)*c[jj] > cbar*(1+64/NTL_FDOUBLE_PRECISION)) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec[i] != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("BKZ_QP: internal error"); + + if (s > 0) { + // special case + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + // cerr << "special case\n"; + new_m = ll_LLL_QP(B, U, delta, 0, check, + B1, mu, b, c, h, jj, quit); + if (new_m != h) LogicError("BKZ_QP: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec[i] == 0) continue; + conv(MU, uvec[i]); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + for (i = 1; i <= n; i++) { + conv(B1[jj][i], B(jj, i)); + CheckFinite(&B1[jj][i]); + } + + b[jj] = InnerProduct(B1[jj], B1[jj], n); + CheckFinite(&b[jj]); + + if (b[jj] == 0) LogicError("BKZ_QP: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_LLL_QP(B, U, delta, 0, 0, B1, mu, b, c, kk+1, jj, quit); + + if (new_m != kk) LogicError("BKZ_QP: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_LLL_QP(B, U, delta, 0, check, + B1, mu, b, c, h, h, quit); + + if (new_m != h) LogicError("BKZ_QP: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // LLL_QP + // cerr << "progress\n"; + + NumNoOps++; + + + if (!clean) { + new_m = + ll_LLL_QP(B, U, delta, 0, check, B1, mu, b, c, h, h, quit); + if (new_m != h) LogicError("BKZ_QP: internal error"); + if (quit) break; + } + + z++; + } + } + } + + + if (verb) { + BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + + // clean up + + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long BKZ_QP1(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_QP: bad delta"); + if (beta < 2) LogicError("BKZ_QP: bad block size"); + + return BKZ_QP1(BB, &UU, to_quad_float(delta), beta, prune, check); +} + +long BKZ_QP1(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_QP: bad delta"); + if (beta < 2) LogicError("BKZ_QP: bad block size"); + + return BKZ_QP1(BB, 0, to_quad_float(delta), beta, prune, check); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/LLL_RR.c b/thirdparty/linux/ntl/src/LLL_RR.c new file mode 100644 index 0000000000..bfc5f9c234 --- /dev/null +++ b/thirdparty/linux/ntl/src/LLL_RR.c @@ -0,0 +1,1357 @@ + + +#include +#include + +#include + +NTL_START_IMPL + + + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + +static void RowTransform2(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x + y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } +} + +void ComputeGS(const mat_ZZ& B, mat_RR& B1, + mat_RR& mu, vec_RR& b, + vec_RR& c, long k, const RR& bound, long st, + vec_RR& buf, const RR& bound2) +{ + long i, j; + RR s, t, t1; + ZZ T1; + + if (st < k) { + for (i = 1; i < st; i++) + mul(buf(i), mu(k,i), c(i)); + } + + for (j = st; j <= k-1; j++) { + InnerProduct(s, B1(k), B1(j)); + + sqr(t1, s); + mul(t1, t1, bound); + mul(t, b(k), b(j)); + + if (t >= bound2 && t >= t1) { + InnerProduct(T1, B(k), B(j)); + conv(s, T1); + } + + clear(t1); + for (i = 1; i <= j-1; i++) { + mul(t, mu(j, i), buf(i)); + add(t1, t1, t); + } + + sub(t, s, t1); + buf(j) = t; + div(mu(k, j), t, c(j)); + } + + + clear(s); + for (j = 1; j <= k-1; j++) { + mul(t, mu(k, j), buf(j)); + add(s, s, t); + } + + sub(c(k), b(k), s); +} + +NTL_TLS_GLOBAL_DECL(RR, red_fudge) + +static NTL_CHEAP_THREAD_LOCAL long log_red = 0; + +static void init_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + log_red = long(0.50*RR::precision()); + + power2(red_fudge, -log_red); +} + +static void inc_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + + mul(red_fudge, red_fudge, 2); + log_red--; + + cerr << "LLL_RR: warning--relaxing reduction (" << log_red << ")\n"; + + if (log_red < 4) + ResourceError("LLL_RR: can not continue...sorry"); +} + + + + +static NTL_CHEAP_THREAD_LOCAL long verbose = 0; +static NTL_CHEAP_THREAD_LOCAL unsigned long NumSwaps = 0; +static NTL_CHEAP_THREAD_LOCAL double StartTime = 0; +static NTL_CHEAP_THREAD_LOCAL double LastTime = 0; + + + +static void LLLStatus(long max_k, double t, long m, const mat_ZZ& B) +{ + cerr << "---- LLL_RR status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, t-StartTime); + cerr << ", stage: " << max_k; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = t; + +} + + + +static +long ll_LLL_RR(mat_ZZ& B, mat_ZZ* U, const RR& delta, long deep, + LLLCheckFct check, mat_RR& B1, mat_RR& mu, + vec_RR& b, vec_RR& c, long m, long init_k, long &quit) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long n = B.NumCols(); + + long i, j, k, Fc1; + ZZ MU; + RR mu1, t1, t2, cc; + ZZ T1; + + RR bound; + + // we tolerate a 15% loss of precision in computing + // inner products in ComputeGS. + + power2(bound, 2*long(0.15*RR::precision())); + + + RR bound2; + + power2(bound2, 2*RR::precision()); + + + quit = 0; + k = init_k; + + vec_long st_mem; + st_mem.SetLength(m+2); + long *st = st_mem.elts(); + + for (i = 1; i < k; i++) + st[i] = i; + + for (i = k; i <= m+1; i++) + st[i] = 1; + + vec_RR buf; + buf.SetLength(m); + + long rst; + long counter; + + long trigger_index; + long small_trigger; + long cnt; + + RR half; + conv(half, 0.5); + RR half_plus_fudge; + add(half_plus_fudge, half, red_fudge); + + long max_k = 0; + double tt; + + while (k <= m) { + + if (k > max_k) { + max_k = k; + } + + if (verbose) { + tt = GetTime(); + + if (tt > LastTime + LLLStatusInterval) + LLLStatus(max_k, tt, m, B); + } + + + if (st[k] == k) + rst = 1; + else + rst = k; + + if (st[k] < st[k+1]) st[k+1] = st[k]; + ComputeGS(B, B1, mu, b, c, k, bound, st[k], buf, bound2); + st[k] = k; + + counter = 0; + trigger_index = k; + small_trigger = 0; + cnt = 0; + + do { + // size reduction + + counter++; + if (counter > 10000) { + cerr << "LLL_XD: warning--possible infinite loop\n"; + counter = 0; + } + + + Fc1 = 0; + + for (j = rst-1; j >= 1; j--) { + abs(t1, mu(k,j)); + if (t1 > half_plus_fudge) { + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > 10) { + inc_red_fudge(); + add(half_plus_fudge, half, red_fudge); + cnt = 0; + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + } + + Fc1 = 1; + + mu1 = mu(k,j); + if (sign(mu1) >= 0) { + sub(mu1, mu1, half); + ceil(mu1, mu1); + } + else { + add(mu1, mu1, half); + floor(mu1, mu1); + } + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + sub(mu(k,i), mu(k,i), mu(j,i)); + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + add(mu(k,i), mu(k,i), mu(j,i)); + } + else { + for (i = 1; i <= j-1; i++) { + mul(t2, mu1, mu(j,i)); + sub(mu(k,i), mu(k,i), t2); + } + } + + + conv(MU, mu1); + + sub(mu(k,j), mu(k,j), mu1); + + RowTransform(B(k), B(j), MU); + if (U) RowTransform((*U)(k), (*U)(j), MU); + } + } + + if (Fc1) { + for (i = 1; i <= n; i++) + conv(B1(k, i), B(k, i)); + + InnerProduct(b(k), B1(k), B1(k)); + ComputeGS(B, B1, mu, b, c, k, bound, 1, buf, bound2); + } + } while (Fc1); + + if (check && (*check)(B(k))) + quit = 1; + + if (IsZero(b(k))) { + for (i = k; i < m; i++) { + // swap i, i+1 + swap(B(i), B(i+1)); + swap(B1(i), B1(i+1)); + swap(b(i), b(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = k; i <= m+1; i++) st[i] = 1; + + m--; + if (quit) break; + continue; + } + + if (quit) break; + + if (deep > 0) { + // deep insertions + + cc = b(k); + long l = 1; + while (l <= k-1) { + mul(t1, delta, c(l)); + if (t1 > cc) break; + sqr(t1, mu(k,l)); + mul(t1, t1, c(l)); + sub(cc, cc, t1); + l++; + } + + if (l <= k-1 && (l <= deep || k-l <= deep)) { + // deep insertion at position l + + for (i = k; i > l; i--) { + // swap rows i, i-1 + swap(B(i), B(i-1)); + swap(B1(i), B1(i-1)); + swap(mu(i), mu(i-1)); + swap(b(i), b(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + + k = l; + continue; + } + } // end deep insertions + + // test LLL reduction condition + + if (k <= 1) { + k++; + } + else { + sqr(t1, mu(k,k-1)); + mul(t1, t1, c(k-1)); + add(t1, t1, c(k)); + mul(t2, delta, c(k-1)); + if (t2 > t1) { + // swap rows k, k-1 + swap(B(k), B(k-1)); + swap(B1(k), B1(k-1)); + swap(mu(k), mu(k-1)); + swap(b(k), b(k-1)); + if (U) swap((*U)(k), (*U)(k-1)); + + k--; + NumSwaps++; + } + else { + k++; + } + } + } + + if (verbose) { + LLLStatus(m+1, GetTime(), m, B); + } + + + return m; +} + +static +long LLL_RR(mat_ZZ& B, mat_ZZ* U, const RR& delta, long deep, + LLLCheckFct check) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long i, j; + long new_m, dep, quit; + RR s; + ZZ MU; + RR mu1; + + RR t1; + ZZ T1; + + init_red_fudge(); + + if (U) ident(*U, m); + + mat_RR B1; // approximates B + B1.SetDims(m, n); + + + mat_RR mu; + mu.SetDims(m, m); + + vec_RR c; // squared lengths of Gramm-Schmidt basis vectors + c.SetLength(m); + + vec_RR b; // squared lengths of basis vectors + b.SetLength(m); + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) + conv(B1(i, j), B(i, j)); + + + + for (i = 1; i <= m; i++) { + InnerProduct(b(i), B1(i), B1(i)); + } + + + new_m = ll_LLL_RR(B, U, delta, deep, check, B1, mu, b, c, m, 1, quit); + dep = m - new_m; + m = new_m; + + if (dep > 0) { + // for consistency, we move all of the zero rows to the front + + for (i = 0; i < m; i++) { + swap(B(m+dep-i), B(m-i)); + if (U) swap((*U)(m+dep-i), (*U)(m-i)); + } + } + + + return m; +} + + + +long LLL_RR(mat_ZZ& B, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("LLL_RR: bad delta"); + if (deep < 0) LogicError("LLL_RR: bad deep"); + RR Delta; + conv(Delta, delta); + return LLL_RR(B, 0, Delta, deep, check); +} + +long LLL_RR(mat_ZZ& B, mat_ZZ& U, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("LLL_RR: bad delta"); + if (deep < 0) LogicError("LLL_RR: bad deep"); + RR Delta; + conv(Delta, delta); + return LLL_RR(B, &U, Delta, deep, check); +} + + + +NTL_TLS_GLOBAL_DECL(vec_RR, BKZConstant) + +static +void ComputeBKZConstant(long beta, long p) +{ + NTL_TLS_GLOBAL_ACCESS(BKZConstant); + + RR c_PI; + ComputePi(c_PI); + + RR LogPI = log(c_PI); + + BKZConstant.SetLength(beta-1); + + vec_RR Log; + Log.SetLength(beta); + + + long i, j, k; + RR x, y; + + for (j = 1; j <= beta; j++) + Log(j) = log(to_RR(j)); + + for (i = 1; i <= beta-1; i++) { + // First, we compute x = gamma(i/2)^{2/i} + + k = i/2; + + if ((i & 1) == 0) { // i even + x = 0; + for (j = 1; j <= k; j++) + x += Log(j); + + x = exp(x/k); + + } + else { // i odd + x = 0; + for (j = k + 2; j <= 2*k + 2; j++) + x += Log(j); + + x += 0.5*LogPI - 2*(k+1)*Log(2); + + x = exp(2*x/i); + } + + // Second, we compute y = 2^{2*p/i} + + y = exp(-(2*p/to_RR(i))*Log(2)); + + BKZConstant(i) = x*y/c_PI; + } + +} + +NTL_TLS_GLOBAL_DECL(vec_RR, BKZThresh) + +static +void ComputeBKZThresh(RR *c, long beta) +{ + NTL_TLS_GLOBAL_ACCESS(BKZConstant); + NTL_TLS_GLOBAL_ACCESS(BKZThresh); + + BKZThresh.SetLength(beta-1); + + long i; + RR x; + RR t1; + + x = 0; + + for (i = 1; i <= beta-1; i++) { + log(t1, c[i-1]); + add(x, x, t1); + div(t1, x, i); + exp(t1, t1); + mul(BKZThresh(i), t1, BKZConstant(i)); + } +} + + + + +static +void BKZStatus(double tt, double enum_time, unsigned long NumIterations, + unsigned long NumTrivial, unsigned long NumNonTrivial, + unsigned long NumNoOps, long m, + const mat_ZZ& B) +{ + cerr << "---- BKZ_RR status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, tt-StartTime); + cerr << ", enum time: "; + PrintTime(cerr, enum_time); + cerr << ", iter: " << NumIterations << "\n"; + cerr << "triv: " << NumTrivial; + cerr << ", nontriv: " << NumNonTrivial; + cerr << ", no ops: " << NumNoOps; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = tt; + +} + + + + +static +long BKZ_RR(mat_ZZ& BB, mat_ZZ* UU, const RR& delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + NTL_TLS_GLOBAL_ACCESS(BKZThresh); + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + RR t1, t2; + ZZ T1; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + + mat_RR B1; + B1.SetDims(m+1, n); + + mat_RR mu; + mu.SetDims(m+1, m); + + vec_RR c; + c.SetLength(m+1); + + vec_RR b; + b.SetLength(m+1); + + RR cbar; + + vec_RR ctilda; + ctilda.SetLength(m+1); + + vec_RR vvec; + vvec.SetLength(m+1); + + vec_RR yvec; + yvec.SetLength(m+1); + + vec_RR uvec; + uvec.SetLength(m+1); + + vec_RR utildavec; + utildavec.SetLength(m+1); + + vec_long Deltavec; + Deltavec.SetLength(m+1); + + vec_long deltavec; + deltavec.SetLength(m+1); + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) + conv(B1(i, j), B(i, j)); + + + for (i = 1; i <= m; i++) { + InnerProduct(b(i), B1(i), B1(i)); + } + + // cerr << "\n"; + // cerr << "first LLL\n"; + + m = ll_LLL_RR(B, U, delta, 0, check, B1, mu, b, c, m, 1, quit); + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + long clean = 1; + + if (!quit && m > 1) { + // cerr << "continuing\n"; + + if (beta > m) beta = m; + + if (prune > 0) + ComputeBKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + if (prune > 0) + ComputeBKZThresh(&c(jj), kk-jj+1); + + cbar = c(jj); + conv(utildavec(jj), 1); + conv(uvec(jj), 1); + + conv(yvec(jj), 0); + conv(vvec(jj), 0); + Deltavec(jj) = 0; + + + s = t = jj; + deltavec(jj) = 1; + + for (i = jj+1; i <= kk+1; i++) { + conv(ctilda(i), 0); + conv(uvec(i), 0); + conv(utildavec(i), 0); + conv(yvec(i), 0); + Deltavec(i) = 0; + conv(vvec(i), 0); + deltavec(i) = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + + add(t1, yvec(t), utildavec(t)); + sqr(t1, t1); + mul(t1, t1, c(t)); + add(ctilda(t), ctilda(t+1), t1); + + if (prune > 0 && t > jj) + sub(t1, cbar, BKZThresh(t-jj)); + else + t1 = cbar; + + + if (ctilda(t) jj) { + t--; + clear(t1); + for (i = t+1; i <= s; i++) { + mul(t2, utildavec(i), mu(i,t)); + add(t1, t1, t2); + } + + yvec(t) = t1; + negate(t1, t1); + if (sign(t1) >= 0) { + sub(t1, t1, 0.5); + ceil(t1, t1); + } + else { + add(t1, t1, 0.5); + floor(t1, t1); + } + + utildavec(t) = t1; + vvec(t) = t1; + Deltavec(t) = 0; + + negate(t1, t1); + + if (t1 < yvec(t)) + deltavec(t) = -1; + else + deltavec(t) = 1; + } + else { + cbar = ctilda(jj); + for (i = jj; i <= kk; i++) { + uvec(i) = utildavec(i); + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec(t) = -Deltavec(t); + if (Deltavec(t)*deltavec(t) >= 0) Deltavec(t) += deltavec(t); + add(utildavec(t), vvec(t), Deltavec(t)); + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + mul(t1, red_fudge, -8); + add(t1, t1, delta); + mul(t1, t1, c(jj)); + + if (t1 > cbar) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec(i) != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("BKZ_RR: internal error"); + + if (s > 0) { + // special case + // cerr << "special case\n"; + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + swap(B1(i-1), B1(i)); + swap(b(i-1), b(i)); + if (U) swap((*U)(i-1), (*U)(i)); + } + + new_m = ll_LLL_RR(B, U, delta, 0, check, + B1, mu, b, c, h, jj, quit); + if (new_m != h) LogicError("BKZ_RR: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec(i) == 0) continue; + conv(MU, uvec(i)); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + swap(B1(i-1), B1(i)); + swap(b(i-1), b(i)); + if (U) swap((*U)(i-1), (*U)(i)); + } + + for (i = 1; i <= n; i++) + conv(B1(jj, i), B(jj, i)); + + InnerProduct(b(jj), B1(jj), B1(jj)); + + if (b(jj) == 0) LogicError("BKZ_RR: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_LLL_RR(B, U, delta, 0, 0, B1, mu, b, c, kk+1, jj, quit); + + if (new_m != kk) LogicError("BKZ_RR: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + swap(B1(i-1), B1(i)); + swap(b(i-1), b(i)); + if (U) swap((*U)(i-1), (*U)(i)); + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_LLL_RR(B, U, delta, 0, check, + B1, mu, b, c, h, h, quit); + + if (new_m != h) LogicError("BKZ_RR: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // LLL_RR + // cerr << "progress\n"; + + NumNoOps++; + + if (!clean) { + new_m = + ll_LLL_RR(B, U, delta, 0, check, B1, mu, b, c, h, h, quit); + if (new_m != h) LogicError("BKZ_RR: internal error"); + if (quit) break; + } + + z++; + } + } + } + + if (verb) { + BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + + // clean up + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long BKZ_RR(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_RR: bad delta"); + if (beta < 2) LogicError("BKZ_RR: bad block size"); + + RR Delta; + conv(Delta, delta); + + return BKZ_RR(BB, &UU, Delta, beta, prune, check); +} + +long BKZ_RR(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_RR: bad delta"); + if (beta < 2) LogicError("BKZ_RR: bad block size"); + + RR Delta; + conv(Delta, delta); + + return BKZ_RR(BB, 0, Delta, beta, prune, check); +} + + + + +void NearVector(vec_ZZ& ww, const mat_ZZ& BB, const vec_ZZ& a) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long n = BB.NumCols(); + + if (n != BB.NumRows()) + LogicError("NearVector: matrix must be square"); + + if (n != a.length()) + LogicError("NearVector: dimension mismatch"); + + long i, j; + mat_ZZ B; + + B.SetDims(n+1, n); + for (i = 1; i <= n; i++) + B(i) = BB(i); + + B(n+1) = a; + + mat_RR B1, mu; + vec_RR b, c; + + B1.SetDims(n+1, n); + mu.SetDims(n+1, n+1); + b.SetLength(n+1); + c.SetLength(n+1); + + vec_RR buf; + buf.SetLength(n+1); + + + for (i = 1; i <= n+1; i++) + for (j = 1; j <= n; j++) + conv(B1(i, j), B(i, j)); + + for (i = 1; i <= n+1; i++) + InnerProduct(b(i), B1(i), B1(i)); + + + + RR bound; + power2(bound, 2*long(0.15*RR::precision())); + + RR bound2; + power2(bound2, 2*RR::precision()); + + + for (i = 1; i <= n+1; i++) + ComputeGS(B, B1, mu, b, c, i, bound, 1, buf, bound2); + + init_red_fudge(); + + RR half; + conv(half, 0.5); + RR half_plus_fudge; + add(half_plus_fudge, half, red_fudge); + + RR t1, t2, mu1; + ZZ MU; + + long trigger_index = n+1; + long small_trigger = 0; + long cnt = 0; + + long Fc1; + + vec_ZZ w; + w.SetLength(n); + clear(w); + + do { + Fc1 = 0; + + for (j = n; j >= 1; j--) { + abs(t1, mu(n+1,j)); + if (t1 > half_plus_fudge) { + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > 10) { + inc_red_fudge(); + add(half_plus_fudge, half, red_fudge); + cnt = 0; + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + } + + Fc1 = 1; + + mu1 = mu(n+1,j); + if (sign(mu1) >= 0) { + sub(mu1, mu1, half); + ceil(mu1, mu1); + } + else { + add(mu1, mu1, half); + floor(mu1, mu1); + } + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + sub(mu(n+1,i), mu(n+1,i), mu(j,i)); + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + add(mu(n+1,i), mu(n+1,i), mu(j,i)); + } + else { + for (i = 1; i <= j-1; i++) { + mul(t2, mu1, mu(j,i)); + sub(mu(n+1,i), mu(n+1,i), t2); + } + } + + + conv(MU, mu1); + + sub(mu(n+1,j), mu(n+1,j), mu1); + + RowTransform(B(n+1), B(j), MU); + RowTransform2(w, B(j), MU); + } + } + + if (Fc1) { + for (i = 1; i <= n; i++) + conv(B1(n+1, i), B(n+1, i)); + + InnerProduct(b(n+1), B1(n+1), B1(n+1)); + ComputeGS(B, B1, mu, b, c, n+1, bound, 1, buf, bound2); + } + } while (Fc1); + + ww = w; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/LLL_XD.c b/thirdparty/linux/ntl/src/LLL_XD.c new file mode 100644 index 0000000000..28968111ba --- /dev/null +++ b/thirdparty/linux/ntl/src/LLL_XD.c @@ -0,0 +1,1227 @@ + +#include +#include +#include +#include + +#include + +NTL_START_IMPL + + +static xdouble InnerProduct(xdouble *a, xdouble *b, long n) +{ + xdouble s; + long i; + + s = 0; + for (i = 1; i <= n; i++) + MulAdd(s, s, a[i], b[i]); + + return s; +} + + +static void RowTransform(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x - y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + if (k > 0) { + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + LeftShift(T, T, k); + sub(A(i), A(i), T); + } + + } + else { + + for (i = 1; i <= n; i++) { + MulSubFrom(A(i), B(i), mu1); + } + + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + sub(A(i), A(i), T); + } + } +} + +static void RowTransform2(vec_ZZ& A, vec_ZZ& B, const ZZ& MU1) +// x = x + y*MU +{ + NTL_ZZRegister(T); + NTL_ZZRegister(MU); + long k; + + long n = A.length(); + long i; + + MU = MU1; + + if (MU == 1) { + for (i = 1; i <= n; i++) + add(A(i), A(i), B(i)); + + return; + } + + if (MU == -1) { + for (i = 1; i <= n; i++) + sub(A(i), A(i), B(i)); + + return; + } + + if (MU == 0) return; + + if (NumTwos(MU) >= NTL_ZZ_NBITS) + k = MakeOdd(MU); + else + k = 0; + + if (MU.WideSinglePrecision()) { + long mu1; + conv(mu1, MU); + + for (i = 1; i <= n; i++) { + mul(T, B(i), mu1); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } + else { + for (i = 1; i <= n; i++) { + mul(T, B(i), MU); + if (k > 0) LeftShift(T, T, k); + add(A(i), A(i), T); + } + } +} + +static +void ComputeGS(mat_ZZ& B, xdouble **B1, xdouble **mu, xdouble *b, + xdouble *c, long k, xdouble bound, long st, xdouble *buf) +{ + long n = B.NumCols(); + long i, j; + xdouble s, t1, y, t; + ZZ T1; + + xdouble *mu_k = mu[k]; + + if (st < k) { + for (i = 1; i < st; i++) + buf[i] = mu_k[i]*c[i]; + } + + for (j = st; j <= k-1; j++) { + if (b[k]*b[j] < NTL_FDOUBLE_PRECISION*NTL_FDOUBLE_PRECISION) { + double z = 0; + xdouble *B1_k = B1[k]; + xdouble *B1_j = B1[j]; + + for (i = 1; i <= n; i++) + z += B1_k[i].x * B1_j[i].x; + + s = z; + } + else { + s = InnerProduct(B1[k], B1[j], n); + + if (s*s <= b[k]*b[j]/bound) { + InnerProduct(T1, B(k), B(j)); + conv(s, T1); + } + } + + xdouble *mu_j = mu[j]; + + t1 = 0; + for (i = 1; i <= j-1; i++) + MulAdd(t1, t1, mu_j[i], buf[i]); + + mu_k[j] = (buf[j] = (s - t1))/c[j]; + } + + s = 0; + for (j = 1; j <= k-1; j++) + MulAdd(s, s, mu_k[j], buf[j]); + + c[k] = b[k] - s; +} + +NTL_TLS_GLOBAL_DECL_INIT(xdouble, red_fudge, (to_xdouble(0))) + + +static NTL_CHEAP_THREAD_LOCAL long log_red = 0; + +static void init_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long i; + + log_red = long(0.50*NTL_DOUBLE_PRECISION); + red_fudge = 1; + + for (i = log_red; i > 0; i--) + red_fudge = red_fudge*0.5; +} + +static void inc_red_fudge() +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + + red_fudge = red_fudge * 2; + log_red--; + + cerr << "LLL_XD: warning--relaxing reduction (" << log_red << ")\n"; + + if (log_red < 4) + ResourceError("LLL_XD: can not continue...sorry"); +} + + + +static NTL_CHEAP_THREAD_LOCAL long verbose = 0; +static NTL_CHEAP_THREAD_LOCAL unsigned long NumSwaps = 0; +static NTL_CHEAP_THREAD_LOCAL double StartTime = 0; +static NTL_CHEAP_THREAD_LOCAL double LastTime = 0; + + + +static void LLLStatus(long max_k, double t, long m, const mat_ZZ& B) +{ + cerr << "---- LLL_XD status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, t-StartTime); + cerr << ", stage: " << max_k; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = t; + +} + + +static +long ll_LLL_XD(mat_ZZ& B, mat_ZZ* U, xdouble delta, long deep, + LLLCheckFct check, xdouble **B1, xdouble **mu, + xdouble *b, xdouble *c, + long m, long init_k, long &quit) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + + long n = B.NumCols(); + + long i, j, k, Fc1; + ZZ MU; + xdouble mu1; + + xdouble t1; + ZZ T1; + xdouble *tp; + + + NTL_TLS_LOCAL_INIT(xdouble, bound, (to_xdouble(0))); + + + if (bound == 0) { + // we tolerate a 15% loss of precision in computing + // inner products in ComputeGS. + + bound = 1; + for (i = 2*long(0.15*NTL_DOUBLE_PRECISION); i > 0; i--) { + bound = bound * 2; + } + } + + + xdouble half = to_xdouble(0.5); + xdouble half_plus_fudge = 0.5 + red_fudge; + + quit = 0; + k = init_k; + + vec_long st_mem; + st_mem.SetLength(m+2); + long *st = st_mem.elts(); + + for (i = 1; i < k; i++) + st[i] = i; + + for (i = k; i <= m+1; i++) + st[i] = 1; + + UniqueArray buf_store; + buf_store.SetLength(m+1); + xdouble *buf = buf_store.get(); + + long rst; + long counter; + + long trigger_index; + long small_trigger; + long cnt; + + long max_k = 0; + + double tt; + + + while (k <= m) { + + if (k > max_k) { + max_k = k; + } + + if (verbose) { + tt = GetTime(); + + if (tt > LastTime + LLLStatusInterval) + LLLStatus(max_k, tt, m, B); + } + + + if (st[k] == k) + rst = 1; + else + rst = k; + + if (st[k] < st[k+1]) st[k+1] = st[k]; + ComputeGS(B, B1, mu, b, c, k, bound, st[k], buf); + st[k] = k; + + counter = 0; + trigger_index = k; + small_trigger = 0; + cnt = 0; + + do { + // size reduction + + counter++; + if (counter > 10000) { + cerr << "LLL_XD: warning--possible infinite loop\n"; + counter = 0; + } + + + Fc1 = 0; + + for (j = rst-1; j >= 1; j--) { + t1 = fabs(mu[k][j]); + if (t1 > half_plus_fudge) { + + if (!Fc1) { + if (j > trigger_index || + (j == trigger_index && small_trigger)) { + + cnt++; + + if (cnt > 10) { + inc_red_fudge(); + half_plus_fudge = 0.5 + red_fudge; + cnt = 0; + } + } + + trigger_index = j; + small_trigger = (t1 < 4); + } + + + Fc1 = 1; + + mu1 = mu[k][j]; + if (mu1 >= 0) + mu1 = ceil(mu1-half); + else + mu1 = floor(mu1+half); + + + xdouble *mu_k = mu[k]; + xdouble *mu_j = mu[j]; + + if (mu1 == 1) { + for (i = 1; i <= j-1; i++) + mu_k[i] -= mu_j[i]; + } + else if (mu1 == -1) { + for (i = 1; i <= j-1; i++) + mu_k[i] += mu_j[i]; + } + else { + for (i = 1; i <= j-1; i++) + MulSub(mu_k[i], mu_k[i], mu1, mu_j[i]); + } + + mu_k[j] -= mu1; + + conv(MU, mu1); + + // cout << j << " " << MU << "\n"; + + RowTransform(B(k), B(j), MU); + if (U) RowTransform((*U)(k), (*U)(j), MU); + } + } + + if (Fc1) { + for (i = 1; i <= n; i++) + conv(B1[k][i], B(k, i)); + + b[k] = InnerProduct(B1[k], B1[k], n); + ComputeGS(B, B1, mu, b, c, k, bound, 1, buf); + } + } while (Fc1); + + if (check && (*check)(B(k))) + quit = 1; + + if (b[k] == 0) { + for (i = k; i < m; i++) { + // swap i, i+1 + swap(B(i), B(i+1)); + tp = B1[i]; B1[i] = B1[i+1]; B1[i+1] = tp; + t1 = b[i]; b[i] = b[i+1]; b[i+1] = t1; + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = k; i <= m+1; i++) st[i] = 1; + + m--; + if (quit) break; + continue; + } + + if (quit) break; + + if (deep > 0) { + // deep insertions + + xdouble cc = b[k]; + long l = 1; + while (l <= k-1 && delta*c[l] <= cc) { + cc = cc - mu[k][l]*mu[k][l]*c[l]; + l++; + } + + if (l <= k-1 && (l <= deep || k-l <= deep)) { + // deep insertion at position l + + for (i = k; i > l; i--) { + // swap rows i, i-1 + swap(B(i), B(i-1)); + tp = B1[i]; B1[i] = B1[i-1]; B1[i-1] = tp; + tp = mu[i]; mu[i] = mu[i-1]; mu[i-1] = tp; + t1 = b[i]; b[i] = b[i-1]; b[i-1] = t1; + if (U) swap((*U)(i), (*U)(i-1)); + } + + k = l; + continue; + } + } // end deep insertions + + // test LLL reduction condition + + if (k > 1 && delta*c[k-1] > c[k] + mu[k][k-1]*mu[k][k-1]*c[k-1]) { + // swap rows k, k-1 + swap(B(k), B(k-1)); + tp = B1[k]; B1[k] = B1[k-1]; B1[k-1] = tp; + tp = mu[k]; mu[k] = mu[k-1]; mu[k-1] = tp; + t1 = b[k]; b[k] = b[k-1]; b[k-1] = t1; + if (U) swap((*U)(k), (*U)(k-1)); + + k--; + NumSwaps++; + + // cout << "- " << k << "\n"; + } + else { + k++; + // cout << "+ " << k << "\n"; + } + } + + if (verbose) { + LLLStatus(m+1, GetTime(), m, B); + } + + + return m; +} + + + + +static +long LLL_XD(mat_ZZ& B, mat_ZZ* U, xdouble delta, long deep, + LLLCheckFct check) +{ + long m = B.NumRows(); + long n = B.NumCols(); + + long i, j; + long new_m, dep, quit; + xdouble s; + ZZ MU; + xdouble mu1; + + xdouble t1; + ZZ T1; + + init_red_fudge(); + + if (U) ident(*U, m); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+1, n+1); + xdouble **B1 = B1_store.get(); // approximates B + + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+1, m+1); + xdouble **mu = mu_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+1); + xdouble *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + UniqueArray b_store; + b_store.SetLength(m+1); + xdouble *b = b_store.get(); // squared lengths of basis vectors + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) + conv(B1[i][j], B(i, j)); + + + + for (i = 1; i <= m; i++) { + b[i] = InnerProduct(B1[i], B1[i], n); + } + + + new_m = ll_LLL_XD(B, U, delta, deep, check, B1, mu, b, c, m, 1, quit); + dep = m - new_m; + m = new_m; + + if (dep > 0) { + // for consistency, we move all of the zero rows to the front + + for (i = 0; i < m; i++) { + swap(B(m+dep-i), B(m-i)); + if (U) swap((*U)(m+dep-i), (*U)(m-i)); + } + } + + return m; +} + + + +long LLL_XD(mat_ZZ& B, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + if (delta < 0.50 || delta >= 1) LogicError("LLL_XD: bad delta"); + if (deep < 0) LogicError("LLL_XD: bad deep"); + return LLL_XD(B, 0, to_xdouble(delta), deep, check); +} + +long LLL_XD(mat_ZZ& B, mat_ZZ& U, double delta, long deep, + LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("LLL_XD: bad delta"); + if (deep < 0) LogicError("LLL_XD: bad deep"); + return LLL_XD(B, &U, to_xdouble(delta), deep, check); +} + + + +NTL_TLS_GLOBAL_DECL(vec_xdouble, BKZConstant) + +static +void ComputeBKZConstant(long beta, long p) +{ + NTL_TLS_GLOBAL_ACCESS(BKZConstant); + + const double c_PI = 3.14159265358979323846264338328; + const double LogPI = 1.14472988584940017414342735135; + + BKZConstant.SetLength(beta-1); + + vec_double Log; + Log.SetLength(beta); + + + long i, j, k; + double x, y; + + for (j = 1; j <= beta; j++) + Log(j) = log(double(j)); + + for (i = 1; i <= beta-1; i++) { + // First, we compute x = gamma(i/2)^{2/i} + + k = i/2; + + if ((i & 1) == 0) { // i even + x = 0; + for (j = 1; j <= k; j++) + x = x + Log(j); + + x = x * (1/double(k)); + + x = exp(x); + } + else { // i odd + x = 0; + for (j = k + 2; j <= 2*k + 2; j++) + x = x + Log(j); + + x = 0.5*LogPI + x - 2*(k+1)*Log(2); + + x = x * (2.0/double(i)); + + x = exp(x); + } + + // Second, we compute y = 2^{2*p/i} + + y = -(2*p/double(i))*Log(2); + y = exp(y); + + BKZConstant(i) = x*y/c_PI; + } +} + +NTL_TLS_GLOBAL_DECL(vec_xdouble, BKZThresh) + +static +void ComputeBKZThresh(xdouble *c, long beta) +{ + NTL_TLS_GLOBAL_ACCESS(BKZConstant); + NTL_TLS_GLOBAL_ACCESS(BKZThresh); + + BKZThresh.SetLength(beta-1); + + long i; + double x; + + x = 0; + + for (i = 1; i <= beta-1; i++) { + x += log(c[i-1]); + BKZThresh(i) = xexp(x/double(i))*BKZConstant(i); + } +} + + +static +void BKZStatus(double tt, double enum_time, unsigned long NumIterations, + unsigned long NumTrivial, unsigned long NumNonTrivial, + unsigned long NumNoOps, long m, + const mat_ZZ& B) +{ + cerr << "---- BKZ_XD status ----\n"; + cerr << "elapsed time: "; + PrintTime(cerr, tt-StartTime); + cerr << ", enum time: "; + PrintTime(cerr, enum_time); + cerr << ", iter: " << NumIterations << "\n"; + cerr << "triv: " << NumTrivial; + cerr << ", nontriv: " << NumNonTrivial; + cerr << ", no ops: " << NumNoOps; + cerr << ", rank: " << m; + cerr << ", swaps: " << NumSwaps << "\n"; + + + + ZZ t1; + long i; + double prodlen = 0; + + for (i = 1; i <= m; i++) { + InnerProduct(t1, B(i), B(i)); + if (!IsZero(t1)) + prodlen += log(t1); + } + + cerr << "log of prod of lengths: " << prodlen/(2.0*log(2.0)) << "\n"; + + + if (LLLDumpFile) { + cerr << "dumping to " << LLLDumpFile << "..."; + + ofstream f; + OpenWrite(f, LLLDumpFile); + + f << "["; + for (i = 1; i <= m; i++) { + f << B(i) << "\n"; + } + f << "]\n"; + + f.close(); + + cerr << "\n"; + } + + LastTime = tt; + +} + + +static +long BKZ_XD(mat_ZZ& BB, mat_ZZ* UU, xdouble delta, + long beta, long prune, LLLCheckFct check) +{ + NTL_TLS_GLOBAL_ACCESS(red_fudge); + NTL_TLS_GLOBAL_ACCESS(BKZThresh); + + + long m = BB.NumRows(); + long n = BB.NumCols(); + long m_orig = m; + + long i, j; + ZZ MU; + + xdouble t1; + ZZ T1; + xdouble *tp; + + init_red_fudge(); + + mat_ZZ B; + B = BB; + + B.SetDims(m+1, n); + + Unique2DArray B1_store; + B1_store.SetDimsFrom1(m+2, n+1); + xdouble **B1 = B1_store.get(); // approximates B + + + Unique2DArray mu_store; + mu_store.SetDimsFrom1(m+2, m+1); + xdouble **mu = mu_store.get(); + + UniqueArray c_store; + c_store.SetLength(m+2); + xdouble *c = c_store.get(); // squared lengths of Gramm-Schmidt basis vectors + + UniqueArray b_store; + b_store.SetLength(m+2); + xdouble *b = b_store.get(); // squared lengths of basis vectors + + xdouble cbar; + + + UniqueArray ctilda_store; + ctilda_store.SetLength(m+2); + xdouble *ctilda = ctilda_store.get(); + + + UniqueArray vvec_store; + vvec_store.SetLength(m+2); + xdouble *vvec = vvec_store.get(); + + UniqueArray yvec_store; + yvec_store.SetLength(m+2); + xdouble *yvec = yvec_store.get(); + + UniqueArray uvec_store; + uvec_store.SetLength(m+2); + xdouble *uvec = uvec_store.get(); + + UniqueArray utildavec_store; + utildavec_store.SetLength(m+2); + xdouble *utildavec = utildavec_store.get(); + + UniqueArray Deltavec_store; + Deltavec_store.SetLength(m+2); + long *Deltavec = Deltavec_store.get(); + + UniqueArray deltavec_store; + deltavec_store.SetLength(m+2); + long *deltavec = deltavec_store.get();; + + + + mat_ZZ Ulocal; + mat_ZZ *U; + + if (UU) { + Ulocal.SetDims(m+1, m); + for (i = 1; i <= m; i++) + conv(Ulocal(i, i), 1); + U = &Ulocal; + } + else + U = 0; + + long quit; + long new_m; + long z, jj, kk; + long s, t; + long h; + xdouble eta; + + + for (i = 1; i <=m; i++) + for (j = 1; j <= n; j++) + conv(B1[i][j], B(i, j)); + + + for (i = 1; i <= m; i++) { + b[i] = InnerProduct(B1[i], B1[i], n); + } + + // cerr << "\n"; + // cerr << "first LLL\n"; + + m = ll_LLL_XD(B, U, delta, 0, check, B1, mu, b, c, m, 1, quit); + + double tt; + + double enum_time = 0; + unsigned long NumIterations = 0; + unsigned long NumTrivial = 0; + unsigned long NumNonTrivial = 0; + unsigned long NumNoOps = 0; + + long verb = verbose; + + verbose = 0; + + + + if (m < m_orig) { + for (i = m_orig+1; i >= m+2; i--) { + // swap i, i-1 + + swap(B(i), B(i-1)); + if (U) swap((*U)(i), (*U)(i-1)); + } + } + + long clean = 1; + + if (!quit && m > 1) { + // cerr << "continuing\n"; + if (beta > m) beta = m; + + if (prune > 0) + ComputeBKZConstant(beta, prune); + + z = 0; + jj = 0; + + while (z < m-1) { + jj++; + kk = min(jj+beta-1, m); + + if (jj == m) { + jj = 1; + kk = beta; + clean = 1; + } + + if (verb) { + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + + // ENUM + + double tt1; + + if (verb) { + tt1 = GetTime(); + } + + if (prune > 0) + ComputeBKZThresh(&c[jj], kk-jj+1); + + cbar = c[jj]; + utildavec[jj] = uvec[jj] = 1; + + yvec[jj] = vvec[jj] = 0; + Deltavec[jj] = 0; + + + s = t = jj; + deltavec[jj] = 1; + + for (i = jj+1; i <= kk+1; i++) { + ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = 0; + Deltavec[i] = 0; + vvec[i] = 0; + deltavec[i] = 1; + } + + long enum_cnt = 0; + + while (t <= kk) { + if (verb) { + enum_cnt++; + if (enum_cnt > 100000) { + enum_cnt = 0; + tt = GetTime(); + if (tt > LastTime + LLLStatusInterval) { + enum_time += tt - tt1; + tt1 = tt; + BKZStatus(tt, enum_time, NumIterations, NumTrivial, + NumNonTrivial, NumNoOps, m, B); + } + } + } + + + ctilda[t] = ctilda[t+1] + + (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*c[t]; + + if (prune > 0 && t > jj) { + eta = BKZThresh(t-jj); + } + else + eta = 0; + + if (ctilda[t] < cbar - eta) { + if (t > jj) { + t--; + t1 = 0; + for (i = t+1; i <= s; i++) { + t1 += utildavec[i]*mu[i][t]; + } + + + yvec[t] = t1; + t1 = -t1; + if (t1 >= 0) + t1 = ceil(t1-0.5); + else + t1 = floor(t1+0.5); + + utildavec[t] = vvec[t] = t1; + Deltavec[t] = 0; + if (utildavec[t] > -yvec[t]) + deltavec[t] = -1; + else + deltavec[t] = 1; + } + else { + cbar = ctilda[jj]; + for (i = jj; i <= kk; i++) { + uvec[i] = utildavec[i]; + } + } + } + else { + t++; + s = max(s, t); + if (t < s) Deltavec[t] = -Deltavec[t]; + if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t]; + utildavec[t] = vvec[t] + Deltavec[t]; + } + } + + if (verb) { + tt1 = GetTime() - tt1; + enum_time += tt1; + } + + NumIterations++; + + h = min(kk+1, m); + + if ((delta-8*red_fudge)*c[jj] > cbar) { + + clean = 0; + + // we treat the case that the new vector is b_s (jj < s <= kk) + // as a special case that appears to occur most of the time. + + s = 0; + for (i = jj+1; i <= kk; i++) { + if (uvec[i] != 0) { + if (s == 0) + s = i; + else + s = -1; + } + } + + if (s == 0) LogicError("BKZ_XD: internal error"); + + if (s > 0) { + // special case + + NumTrivial++; + + for (i = s; i > jj; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + // cerr << "special case\n"; + new_m = ll_LLL_XD(B, U, delta, 0, check, + B1, mu, b, c, h, jj, quit); + if (new_m != h) LogicError("BKZ_XD: internal error"); + if (quit) break; + } + else { + // the general case + + NumNonTrivial++; + + for (i = 1; i <= n; i++) conv(B(m+1, i), 0); + + if (U) { + for (i = 1; i <= m_orig; i++) + conv((*U)(m+1, i), 0); + } + + for (i = jj; i <= kk; i++) { + if (uvec[i] == 0) continue; + conv(MU, uvec[i]); + RowTransform2(B(m+1), B(i), MU); + if (U) RowTransform2((*U)(m+1), (*U)(i), MU); + } + + for (i = m+1; i >= jj+1; i--) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + for (i = 1; i <= n; i++) + conv(B1[jj][i], B(jj, i)); + + b[jj] = InnerProduct(B1[jj], B1[jj], n); + + if (b[jj] == 0) LogicError("BKZ_XD: internal error"); + + // remove linear dependencies + + // cerr << "general case\n"; + new_m = ll_LLL_XD(B, U, delta, 0, 0, B1, mu, b, c, kk+1, jj, quit); + + if (new_m != kk) LogicError("BKZ_XD: internal error"); + + // remove zero vector + + for (i = kk+2; i <= m+1; i++) { + // swap i, i-1 + swap(B(i-1), B(i)); + if (U) swap((*U)(i-1), (*U)(i)); + tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp; + t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1; + } + + quit = 0; + if (check) { + for (i = 1; i <= kk; i++) + if ((*check)(B(i))) { + quit = 1; + break; + } + } + + if (quit) break; + + if (h > kk) { + // extend reduced basis + + new_m = ll_LLL_XD(B, U, delta, 0, check, + B1, mu, b, c, h, h, quit); + + if (new_m != h) LogicError("BKZ_XD: internal error"); + if (quit) break; + } + } + + z = 0; + } + else { + // LLL_XD + // cerr << "progress\n"; + + NumNoOps++; + + if (!clean) { + new_m = + ll_LLL_XD(B, U, delta, 0, check, B1, mu, b, c, h, h, quit); + if (new_m != h) LogicError("BKZ_XD: internal error"); + if (quit) break; + } + + z++; + } + } + } + + if (verb) { + BKZStatus(GetTime(), enum_time, NumIterations, NumTrivial, NumNonTrivial, + NumNoOps, m, B); + } + + + // clean up + + if (m_orig > m) { + // for consistency, we move zero vectors to the front + + for (i = m+1; i <= m_orig; i++) { + swap(B(i), B(i+1)); + if (U) swap((*U)(i), (*U)(i+1)); + } + + for (i = 0; i < m; i++) { + swap(B(m_orig-i), B(m-i)); + if (U) swap((*U)(m_orig-i), (*U)(m-i)); + } + } + + B.SetDims(m_orig, n); + BB = B; + + if (U) { + U->SetDims(m_orig, m_orig); + *UU = *U; + } + + return m; +} + +long BKZ_XD(mat_ZZ& BB, mat_ZZ& UU, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_XD: bad delta"); + if (beta < 2) LogicError("BKZ_XD: bad block size"); + + return BKZ_XD(BB, &UU, to_xdouble(delta), beta, prune, check); +} + +long BKZ_XD(mat_ZZ& BB, double delta, + long beta, long prune, LLLCheckFct check, long verb) +{ + verbose = verb; + NumSwaps = 0; + if (verbose) { + StartTime = GetTime(); + LastTime = StartTime; + } + + + + if (delta < 0.50 || delta >= 1) LogicError("BKZ_XD: bad delta"); + if (beta < 2) LogicError("BKZ_XD: bad block size"); + + return BKZ_XD(BB, 0, to_xdouble(delta), beta, prune, check); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/MakeCheckFeature b/thirdparty/linux/ntl/src/MakeCheckFeature new file mode 100644 index 0000000000..906087fc8b --- /dev/null +++ b/thirdparty/linux/ntl/src/MakeCheckFeature @@ -0,0 +1,26 @@ + +printf '*** Checking for feature: %s ' "$1" + +echo "" > "../include/NTL/HAVE_$1.h" + +sh RemoveProg CheckFeature +echo $3 -o CheckFeature $2 $4 >> "CheckFeature.log" 2>&1 +$3 -o CheckFeature $2 $4 >> "CheckFeature.log" 2>&1 + +if test -f CheckFeature +then + if ./CheckFeature + then + echo "[yes]" + echo "#ifndef NTL_HAVE_$1" > "../include/NTL/HAVE_$1.h" + echo "#define NTL_HAVE_$1" >> "../include/NTL/HAVE_$1.h" + echo "#endif" >> "../include/NTL/HAVE_$1.h" + sh RemoveProg CheckFeature + exit 0 + fi +fi + +echo "[no]" +sh RemoveProg CheckFeature +exit 0 + diff --git a/thirdparty/linux/ntl/src/MakeDesc.c b/thirdparty/linux/ntl/src/MakeDesc.c new file mode 100644 index 0000000000..14a302b368 --- /dev/null +++ b/thirdparty/linux/ntl/src/MakeDesc.c @@ -0,0 +1,1206 @@ + + +#include +#include +#include +#include +#include +#include + + +#include + +using namespace std; + + +#if (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) + + +#define GNUC_INTEL (1) + +#else + +#define GNUC_INTEL (0) + +#endif + + +int val_int(int x); +unsigned int val_uint(unsigned int x); + +long val_long(long x); +unsigned long val_ulong(unsigned long x); + +size_t val_size_t(size_t x); + +double val_double(double x); +long double val_ldouble(double x); + +void touch_int(int* x); +void touch_uint(unsigned int* x); + +void touch_long(long* x); +void touch_ulong(unsigned long* x); + +void touch_size_t(size_t* x); + +void touch_double(double* x); +void touch_ldouble(long double* x); + +double sum_double(double *x, long n); + +double fma_test(double a, double b, double c); + + +double power2(long k); + + +long FMADetected(long dp) +{ + double x = power2(0) + power2(dp-1); + double y = power2(0) + power2(dp-1); + + touch_double(&x); + touch_double(&y); + + double z = x*y; + touch_double(&z); + z = -z; + touch_double(&z); + + double lo = fma_test(x, y, z); + return lo != 0; +} + +long DoubleRounding(long dp) +{ + double a = power2(dp-1) + 1; + double b = (power2(dp)-1)/power2(dp+1); + + double vec[2]; + vec[0] = a; + vec[1] = b; + + double sum = sum_double(vec, 2); + + touch_double(&sum); + + if (sum != a) + return 1; + else + return 0; +} + + + +long DoublePrecision() +{ + double eps, one, res; + long k; + + one = val_double(1.0); + eps = val_double(1.0); + + k = 0; + + do { + double tmp; + + k++; + eps *= 1.0/2.0; + tmp = 1.0 + eps; + touch_double(&tmp); + res = tmp - one; + } while (res == eps); + + return k; +} + +long LongDoublePrecision() +{ + long double eps, one, res; + long k; + + one = val_ldouble(1.0); + eps = val_ldouble(1.0); + + k = 0; + + do { + long double tmp; + + k++; + eps *= 1.0/2.0; + tmp = 1.0 + eps; + touch_ldouble(&tmp); + res = tmp - one; + } while (res == eps && k < 500); + + // if k >= 500, then most likely this is some + // weird double/double implementation. + // We also check what numeric_limits says about long doubles. + + if (k >= 500 || !numeric_limits::is_iec559) k = 0; + return k; +} + +void print2k(FILE *f, long k, long bpl) +{ + long m, l; + long first; + + if (k <= 0) { + fprintf(f, "((double) 1.0)"); + return; + } + + m = bpl - 2; + first = 1; + + fprintf(f, "("); + + while (k > 0) { + if (k > m) + l = m; + else + l = k; + + k = k - l; + + if (first) + first = 0; + else + fprintf(f, "*"); + + fprintf(f, "((double)(1L<<%ld))", l); + } + + fprintf(f, ")"); +} + + +void print2k_WD(FILE *f, long k, long bpl) +{ + long m, l; + long first; + + if (k <= 0) { + fprintf(f, "(wide_double(1L))"); + return; + } + + m = bpl - 2; + first = 1; + + fprintf(f, "("); + + while (k > 0) { + if (k > m) + l = m; + else + l = k; + + k = k - l; + + if (first) + first = 0; + else + fprintf(f, "*"); + + fprintf(f, "(wide_double(1L<<%ld))", l); + } + + fprintf(f, ")"); +} + + + + + + + + +void print_mul_body(FILE *f, long n1, long k, long fn, + long half_flag, long short_flag) +{ + + long n, i, chop, r; + unsigned long mask, mask2; + + if (half_flag) + n = n1/2; + else + n = n1; + + chop = n % k; /* first block */ + if (chop == 0) + chop = k; + r = n - k; + mask = (1UL << k) - 1UL; + + fprintf(f, "\n\n#define NTL_"); + if (half_flag) fprintf(f, "HALF_"); + if (short_flag) fprintf(f, "SHORT_"); + + fprintf(f, "BB_MUL_CODE%ld \\\n", fn); + + + if (fn > 0) /* Mul1/AddMul1 */ + { + fprintf(f, " long i;\\\n"); + fprintf(f, " _ntl_ulong carry = 0, b;\\\n"); + } + fprintf(f, " _ntl_ulong hi, lo, t;\\\n"); + fprintf(f, " _ntl_ulong A[%ld];\\\n", 1L << k); + fprintf(f, " A[0] = 0;\\\n"); + + fprintf(f, " A[1] = a;\\\n"); + + for (i = 2; i < (1L << k); i++) + { + if (i % 2 == 0) + fprintf(f, " A[%ld] = A[%ld] << 1;\\\n", i, i / 2); + else + fprintf(f, " A[%ld] = A[%ld] ^ A[1];\\\n", i, i - 1); + } + + if (fn > 0) + { + fprintf(f, " for (i = 0; i < sb; i++) {\\\n"); + fprintf(f, " b = bp[i];\\\n"); + fprintf(f, " "); + } + + fprintf(f, " lo = A[b & %lu]; ", mask); + fprintf(f, "t = A[(b >> %ld) & %lu]; ", k, mask); + fprintf(f, "hi = t >> %ld; lo ^= t << %ld;\\\n", n1-k, k); + + for (i = 2*k; i < n - chop; i += k) { + if (fn > 0) fprintf(f, " "); + fprintf(f, " t = A[(b >> %ld) & %lu]; ", i, mask); + fprintf(f, "hi ^= t >> %ld; lo ^= t << %ld;\\\n", n1-i, i); + } + + if (fn > 0) fprintf(f, " "); + fprintf(f, " t = A[b >> %ld]; ", n-chop); + fprintf(f, "hi ^= t >> %ld; lo ^= t << %ld;\\\n", n1-i, i); + + mask = 0; + for (i = 0; i < n; i += k) + mask |= 1UL << i; + mask = ~mask; + if (half_flag) mask &= (1UL << n) - 1UL; + mask2 = mask; + + if (!short_flag) { + for (i = 1; i < k; i++) + { + if (fn > 0) fprintf(f, " "); + + if (i == 1) + fprintf(f, " if (a >> %ld) ", n1-i); + else + fprintf(f, " if ((a >> %ld) & 1) ", n1-i); + + /* bit n1-i from a was not considered in blocks of + k bits from b for index j >= i */ + fprintf(f, "hi ^= ((b & 0x%lxUL) >> %ld);\\\n", mask2, i); + mask2 = (mask2 << 1) & mask; + } + } + + if (fn > 0) fprintf(f, " "); + + if (fn == 0) + { + fprintf(f, " c[0] = lo; "); + fprintf(f, " c[1] = hi;\\\n"); + } + else if (fn == 1 || fn == 3) + { + fprintf(f, " cp[i] = carry ^ lo; "); + fprintf(f, " carry = hi;\\\n"); + } + else if (fn == 2) + { + fprintf(f, " cp[i] ^= (carry ^ lo); "); + fprintf(f, " carry = hi;\\\n"); + } + if (fn > 0) + { + + fprintf(f, " }\\\n"); + + if (fn == 1 || fn == 3) + fprintf(f, " cp[sb] = carry;\\\n"); + else + fprintf(f, " cp[sb] ^= carry;\\\n"); + + } + fprintf(f, "\n\n\n"); + + +} + + + + + +/* + * This generates anternative code that runs significantly faster + * on some machines, like a PowerPC (and probably other RISC machines). + * It makes it easier for the compiler to schedule instrucyions better, + * and it avoids branches. It seems like this does not help + * on x86 machines (and can even make things worse). + */ + + +void print_alt_mul_body(FILE *f, long n1, long k, long fn, + long half_flag, long short_flag) +{ + + long n, i, chop, r; + unsigned long mask, mask2; + + if (half_flag) + n = n1/2; + else + n = n1; + + chop = n % k; /* first block */ + if (chop == 0) + chop = k; + r = n - k; + mask = (1UL << k) - 1UL; + + fprintf(f, "\n\n#define NTL_ALT_"); + if (half_flag) fprintf(f, "HALF_"); + if (short_flag) fprintf(f, "SHORT_"); + + fprintf(f, "BB_MUL_CODE%ld \\\n", fn); + + if (fn > 0) /* Mul1/AddMul1 */ + { + fprintf(f, " long i;\\\n"); + fprintf(f, " _ntl_ulong carry = 0;\\\n"); + } + fprintf(f, " _ntl_ulong A[%ld];\\\n", 1L << k); + fprintf(f, " A[0] = 0;\\\n"); + + fprintf(f, " A[1] = a;\\\n"); + + for (i = 2; i < (1L << k); i++) + { + if (i % 2 == 0) + fprintf(f, " A[%ld] = A[%ld] << 1;\\\n", i, i / 2); + else + fprintf(f, " A[%ld] = A[%ld] ^ A[1];\\\n", i, i - 1); + } + + if (fn > 0) + { + fprintf(f, " for (i = 0; i < sb; i++) {\\\n"); + fprintf(f, " const _ntl_ulong b = bp[i];\\\n"); + } + + for (i = k; i < n - chop; i += k) { + if (fn > 0) fprintf(f, " "); + fprintf(f, " const _ntl_ulong t%ld = A[(b >> %ld) & %lu]; \\\n", i, i, mask); + + } + if (fn > 0) fprintf(f, " "); + fprintf(f, " const _ntl_ulong t%ld = A[b >> %ld]; \\\n", n-chop, n-chop); + + if (fn > 0) fprintf(f, " "); + fprintf(f, " const _ntl_ulong lo = A[b & %lu] \\\n", mask); + + for (i = k; i < n; i += k) { + if (fn > 0) fprintf(f, " "); + fprintf(f, " ^ (t%ld << %ld)", i, i); + if (i == n - chop) + fprintf(f, ";\\\n"); + else + fprintf(f, "\\\n"); + } + + for (i = k; i < n; i += k) { + if (fn > 0) fprintf(f, " "); + if (i == k) + fprintf(f, " const _ntl_ulong hi = "); + else + fprintf(f, " ^ "); + fprintf(f, "(t%ld >> %ld)", i, n1-i); + if (i == n - chop && short_flag) + fprintf(f, ";\\\n"); + else + fprintf(f, "\\\n"); + + + } + + mask = 0; + for (i = 0; i < n; i += k) + mask |= 1UL << i; + mask = ~mask; + if (half_flag) mask &= (1UL << n) - 1UL; + mask2 = mask; + + if (!short_flag) { + for (i = 1; i < k; i++) + { + + /* bit n1-i from a was not considered in blocks of + k bits from b for index j >= i */ + + if (fn > 0) fprintf(f, " "); + + + if (i == 1) + fprintf(f, + " ^ (((b & 0x%lxUL) >> %ld) & (-(a >> %ld)))", + mask2, i, n1-1); + else { + fprintf(f, + " ^ (((b & 0x%lxUL) >> %ld) & (-((a >> %ld) & 1UL)))", + mask2, i, n1-i); + } + if (i == k-1) + fprintf(f, ";\\\n"); + else + fprintf(f, "\\\n"); + + + + mask2 = (mask2 << 1) & mask; + } + } + + if (fn > 0) fprintf(f, " "); + + if (fn == 0) + { + fprintf(f, " c[0] = lo; "); + fprintf(f, " c[1] = hi;\\\n"); + } + else if (fn == 1) + { + fprintf(f, " cp[i] = carry ^ lo; "); + fprintf(f, " carry = hi;\\\n"); + } + else if (fn == 2) + { + fprintf(f, " cp[i] ^= (carry ^ lo); "); + fprintf(f, " carry = hi;\\\n"); + } + if (fn > 0) + { + + fprintf(f, " }\\\n"); + + if (fn == 1 || fn == 3) + fprintf(f, " cp[sb] = carry;\\\n"); + else + fprintf(f, " cp[sb] ^= carry;\\\n"); + + } + fprintf(f, "\n\n\n"); + + +} + + + + +void print_alt1_mul_body(FILE *f, long n1, long k, long fn, + long half_flag, long short_flag) +{ + + long n, i, chop, r; + unsigned long mask, mask2; + + if (half_flag) + n = n1/2; + else + n = n1; + + chop = n % k; /* first block */ + if (chop == 0) + chop = k; + r = n - k; + mask = (1UL << k) - 1UL; + + fprintf(f, "\n\n#define NTL_ALT1_"); + if (half_flag) fprintf(f, "HALF_"); + if (short_flag) fprintf(f, "SHORT_"); + + fprintf(f, "BB_MUL_CODE%ld \\\n", fn); + + + if (fn > 0) /* Mul1/AddMul1 */ + { + fprintf(f, " long i;\\\n"); + fprintf(f, " _ntl_ulong carry = 0, b;\\\n"); + } + fprintf(f, " _ntl_ulong hi, lo, t;\\\n"); + fprintf(f, " _ntl_ulong A[%ld];\\\n", 1L << k); + fprintf(f, " A[0] = 0;\\\n"); + + fprintf(f, " A[1] = a;\\\n"); + + for (i = 2; i < (1L << k); i++) + { + if (i % 2 == 0) + fprintf(f, " A[%ld] = A[%ld] << 1;\\\n", i, i / 2); + else + fprintf(f, " A[%ld] = A[%ld] ^ A[1];\\\n", i, i - 1); + } + + if (fn > 0) + { + fprintf(f, " for (i = 0; i < sb; i++) {\\\n"); + fprintf(f, " b = bp[i];\\\n"); + fprintf(f, " "); + } + + fprintf(f, " lo = A[b & %lu]; ", mask); + fprintf(f, "t = A[(b >> %ld) & %lu]; ", k, mask); + fprintf(f, "hi = t >> %ld; lo ^= t << %ld;\\\n", n1-k, k); + + for (i = 2*k; i < n - chop; i += k) { + if (fn > 0) fprintf(f, " "); + fprintf(f, " t = A[(b >> %ld) & %lu]; ", i, mask); + fprintf(f, "hi ^= t >> %ld; lo ^= t << %ld;\\\n", n1-i, i); + } + + if (fn > 0) fprintf(f, " "); + fprintf(f, " t = A[b >> %ld]; ", n-chop); + fprintf(f, "hi ^= t >> %ld; lo ^= t << %ld;\\\n", n1-i, i); + + mask = 0; + for (i = 0; i < n; i += k) + mask |= 1UL << i; + mask = ~mask; + if (half_flag) mask &= (1UL << n) - 1UL; + mask2 = mask; + + + if (!short_flag) { + for (i = 1; i < k; i++) + { + + /* bit n1-i from a was not considered in blocks of + k bits from b for index j >= i */ + + if (fn > 0) fprintf(f, " "); + + + if (i == 1) + fprintf(f, + " hi ^= (((b & 0x%lxUL) >> %ld) & (-(a >> %ld)))", + mask2, i, n1-1); + else { + fprintf(f, + " ^ (((b & 0x%lxUL) >> %ld) & (-((a >> %ld) & 1UL)))", + mask2, i, n1-i); + } + if (i == k-1) + fprintf(f, ";\\\n"); + else + fprintf(f, "\\\n"); + + + + mask2 = (mask2 << 1) & mask; + } + } + + + + + if (fn > 0) fprintf(f, " "); + + if (fn == 0) + { + fprintf(f, " c[0] = lo; "); + fprintf(f, " c[1] = hi;\\\n"); + } + else if (fn == 1 || fn == 3) + { + fprintf(f, " cp[i] = carry ^ lo; "); + fprintf(f, " carry = hi;\\\n"); + } + else if (fn == 2) + { + fprintf(f, " cp[i] ^= (carry ^ lo); "); + fprintf(f, " carry = hi;\\\n"); + } + if (fn > 0) + { + + fprintf(f, " }\\\n"); + + if (fn == 1 || fn == 3) + fprintf(f, " cp[sb] = carry;\\\n"); + else + fprintf(f, " cp[sb] ^= carry;\\\n"); + + } + fprintf(f, "\n\n\n"); + + +} + + + + + + + +void print_BB_mul_code(FILE *f, long n) +{ + long k; + + if (n >= 64) + k = 4; + else + k = 3; + + + print_mul_body(f, n, k, 0, 0, 0); + print_mul_body(f, n, 4, 1, 0, 0); + print_mul_body(f, n, 4, 2, 0, 0); + print_mul_body(f, n, 4, 1, 0, 1); + print_mul_body(f, n, 2, 0, 1, 0); + + + + print_alt_mul_body(f, n, k, 0, 0, 0); + print_alt_mul_body(f, n, 4, 1, 0, 0); + print_alt_mul_body(f, n, 4, 2, 0, 0); + print_alt_mul_body(f, n, 4, 1, 0, 1); + print_alt_mul_body(f, n, 2, 0, 1, 0); + + + + print_alt1_mul_body(f, n, k, 0, 0, 0); + print_alt1_mul_body(f, n, 4, 1, 0, 0); + print_alt1_mul_body(f, n, 4, 2, 0, 0); + print_alt1_mul_body(f, n, 4, 1, 0, 1); + print_alt1_mul_body(f, n, 2, 0, 1, 0); + + fprintf(f, "#define NTL_BB_MUL1_BITS (4)\n\n"); +} + + + + + +void print_BB_sqr_code(FILE *f, long n) +{ + long i, pos; + + fprintf(f, "\n\n"); + fprintf(f, "#define NTL_BB_SQR_CODE \\\n"); + fprintf(f, "lo=sqrtab[a&255];\\\n"); + pos = 16; + + for (i = 8; i < n; i += 8) { + if (2*(i+8) <= n) { + fprintf(f, "lo=lo|(sqrtab[(a>>%ld)&255]<<%ld);\\\n", i, pos); + pos += 16; + } + else if (2*i == n) { + fprintf(f, "hi=sqrtab[(a>>%ld)&255];\\\n", i); + pos = 16; + } + else if (2*i > n) { + fprintf(f, "hi=hi|(sqrtab[(a>>%ld)&255]<<%ld);\\\n", i, pos); + pos += 16; + } + else { /* only applies if word size is not a multiple of 16 */ + fprintf(f, "_ntl_ulong t=sqrtab[(a>>%ld)&255];\\\n", i); + fprintf(f, "lo=lo|(t<<%ld);\\\n", pos); + fprintf(f, "hi=t>>%ld;\\\n", n-8); + pos = 8; + } + } + + fprintf(f, "\n\n"); +} + + +void print_BB_rev_code(FILE *f, long n) +{ + long i; + + fprintf(f, "\n\n"); + fprintf(f, "#define NTL_BB_REV_CODE "); + + for (i = 0; i < n; i += 8) { + if (i != 0) fprintf(f, "\\\n|"); + fprintf(f, "(revtab[(a>>%ld)&255]<<%ld)", i, n-8-i); + } + + fprintf(f, "\n\n"); +} + + + + +const char *yn_vec[2] = { "no", "yes" }; + + + +int main() +{ + long bpl, bpi, bpt, rs_arith, nbits, wnbits; + long dp, dr; + long fma_detected; + long big_pointers; + long ldp; + FILE *f; + long warnings = 0; + + unsigned long ulval; + unsigned int uival; + size_t tval; + long slval; + + fprintf(stderr, "This is NTL version %s\n\n", NTL_VERSION); + + + + + /* + * compute bpl = bits per long + */ + + ulval = val_ulong(1); + bpl = 0; + + while (ulval) { + ulval <<= 1; + touch_ulong(&ulval); + bpl++; + } + + + + + /* + * compute bpi = bits per int + */ + + uival = val_uint(1); + bpi = 0; + + while (uival) { + uival <<= 1; + touch_uint(&uival); + bpi++; + } + + + + /* + * compute bpt = bits per size_t + */ + + tval = val_size_t(1); + bpt = 0; + + while (tval) { + tval <<= 1; + touch_size_t(&tval); + bpt++; + } + + + /* + * check if bpl and bpi are not too small --- any standard conforming + * platform should pass this test. + */ + + if (bpi < 16) { + fprintf(stderr, "BAD NEWS: int type too short.\n"); + return 1; + } + + if (bpl < 32) { + fprintf(stderr, "BAD NEWS: long type too short.\n"); + return 1; + } + + /* + * check that there are 8 bits in a char. This is a POSIX requirement. + */ + + if (CHAR_BIT != 8) { + fprintf(stderr, "BAD NEWS: char type must have 8 bits.\n"); + return 1; + } + + + /* + * check that bpi is a multiple of 8. + */ + + if (bpi % 8 != 0) { + fprintf(stderr, "BAD NEWS: int type must be multiple of 8 bits.\n"); + return 1; + } + + + /* + * check that bpl is a multiple of 8. + */ + + if (bpl % 8 != 0) { + fprintf(stderr, "BAD NEWS: long type must be multiple of 8 bits.\n"); + return 1; + } + + + /* + * check if width of signed versions of int and long agree with that of + * the unsigned versions, and that negative numbers are represented + * using 2's compliment. + * + * The C99 standard, at least, is very precise about the possible + * representations of unsigned and signed integer types, and so if + * the following tests pass, we can be sure that the desired + * properties hold. NTL relies implicitly and crucially on + * these properties. + * + * I know of no machines for which these properties do not hold. + */ + + if (((unsigned int) val_int(INT_MIN)) != val_uint(1U << (bpi-1))) { + fprintf(stderr, "BAD NEWS: machine must be 2's compliment.\n"); + return 1; + } + + if (((unsigned int) val_int(INT_MAX)) != val_uint((1U << (bpi-1)) - 1U)) { + fprintf(stderr, "BAD NEWS: machine must be 2's compliment.\n"); + return 1; + } + + if (((unsigned long) val_long(LONG_MIN)) != val_ulong(1UL << (bpl-1))) { + fprintf(stderr, "BAD NEWS: machine must be 2's compliment.\n"); + return 1; + } + + if (((unsigned long) val_long(LONG_MAX)) != val_ulong((1UL<<(bpl-1))-1UL)) { + fprintf(stderr, "BAD NEWS: machine must be 2's compliment.\n"); + return 1; + } + + + + /* + * check that floating point to integer conversions truncates toward zero + * --- any standard conforming platform should pass this test. + */ + + if (((long) val_double(1.75)) != 1L) { + fprintf(stderr, + "BAD NEWS: machine must truncate floating point toward zero.\n"); + return 1; + } + + if (((long) val_double(-1.75)) != -1L) { + fprintf(stderr, + "BAD NEWS: machine must truncate floating point toward zero.\n"); + return 1; + } + + + + + + + /* + * Test if right shift is arithemtic or not. According to the + * standards, the result of right-shifting a negative number is + * "implementation defined", which almost surely means the right shift + * is *always* arithmetic or *always* logical. However, this cannot + * be guaranteed, and so this test is *not* 100% portable --- but I + * know of no machine for which this test does not correctly + * predict the general behavior. One should set the NTL_CLEAN_INT + * flag if one wants to avoid such machine dependencies. + */ + + slval = val_long(-1); + if ((slval >> 1) == slval) + rs_arith = 1; + else + rs_arith = 0; + + + + /* + * Next, we check some properties of floating point arithmetic. + * An implementation should conform to the IEEE floating + * point standard --- essentially all modern platforms do, + * except for a few very old Cray's. There is no easy way + * to check this, so we simply make a few simple sanity checks, + * calculate the precision, and if the platform performs + * double precision arithemtic in extended double precision registers. + * The last property is one that the IEE standard allows, and which + * some important platforms (like x86) have --- this is quite + * unfortunate, as it really makes many of the other properties + * of the IEEE standard unusable. + */ + + /* + * First, we simply check that we are using a machine with radix 2. + */ + + if (FLT_RADIX != 2) { + fprintf(stderr, "BAD NEWS: machine must use IEEE floating point.\n"); + return 1; + } + + /* + * Next, we calculate the precision of "in memory" doubles, + * and check that it is at least 53. + */ + + dp = DoublePrecision(); + + if (dp < 53) { + fprintf(stderr, "BAD NEWS: machine must use IEEE floating point (*).\n"); + return 1; + } + + /* + * Next, we check that the *range* of doubles is sufficiently large. + * Specifically, we require that DBL_MAX > 2^{7*max(bpl, dp)} + * and 1/DBL_MIN > 2^{7*max(bpl, dp)}. + * On IEEE floating point compliant machines, this + * will hold, and the following test will pass, if bpl is at most 128, which + * should be true for the foreseeable future. + */ + + if (log(DBL_MAX)/log(2.0) < 7.01*bpl || log(DBL_MAX)/log(2.0) < 7.01*dp || + -log(DBL_MIN)/log(2.0) < 7.01*bpl || -log(DBL_MIN)/log(2.0) < 7.01*dp) { + fprintf(stderr, "BAD NEWS: range of doubles too small.\n"); + return 1; + } + + + + /* + * Next, we check if the machine has wider "in-register" doubles or not. + * This test almost always yields the correct result --- if not, + * you will have to set the NTL_EXT_DOUBLE in "mach_desc.h" + * by hand. + */ + + + dr = DoubleRounding(dp); + + + /* + * Next, we check if the platform uses FMA (fused multiply add), + * even across statement boundaries. + */ + + fma_detected = FMADetected(dp); + + + + /* + * Next, we test the precision of long doubles. + * If long doubles don't look good or useful, ldp == 0. + * Right now, we ony enable long double usage on Intel/gcc + * platforms. + */ + + ldp = LongDoublePrecision(); + if (ldp <= dp || !GNUC_INTEL) ldp = 0; + + // Disable if it looks like rounding doesn't work right + if (((long) val_ldouble(1.75)) != 1L) ldp = 0; + if (((long) val_ldouble(-1.75)) != -1L) ldp = 0; + + + /* + * Set nbits = min(bpl-2, dp-3) [and even] + + * Given the minimum size of blp and dp, the smallest possible + * value of nbits is 30. + */ + + + if (bpl-2 < dp-3) + nbits = bpl-2; + else + nbits = dp-3; + + if (nbits % 2 != 0) nbits--; + + /* + * Set wnbits = min(bpl-2, ldp-2) [and even] + */ + + if (ldp) { + if (bpl-2 < ldp-2) + wnbits = bpl-2; + else + wnbits = ldp-2; + + if (wnbits % 2 != 0) wnbits--; + } + else { + wnbits = nbits; + } + + if (wnbits <= nbits) ldp = 0; + // disable long doubles if it doesn't increase nbits... + // (for example, on 32-bit machines) + + + big_pointers = 0; + if (sizeof(char*) > sizeof(long)) big_pointers = 1; + + + /* + * That's it! All tests have passed. + */ + + fprintf(stderr, "GOOD NEWS: compatible machine.\n"); + fprintf(stderr, "summary of machine characteristics:\n"); + fprintf(stderr, "bits per long = %ld\n", bpl); + fprintf(stderr, "bits per int = %ld\n", bpi); + fprintf(stderr, "bits per size_t = %ld\n", bpt); + fprintf(stderr, "arith right shift = %s\n", yn_vec[rs_arith]); + fprintf(stderr, "double precision = %ld\n", dp); + fprintf(stderr, "long double precision = %ld\n", ldp); + fprintf(stderr, "NBITS (maximum) = %ld\n", nbits); + fprintf(stderr, "WNBITS (maximum) = %ld\n", wnbits); + fprintf(stderr, "double rounding detected = %s\n", yn_vec[dr]); + fprintf(stderr, "FMA detected = %s\n", yn_vec[fma_detected]); + fprintf(stderr, "big pointers = %s\n", yn_vec[big_pointers]); + + if (dr && GNUC_INTEL) + fprintf(stderr, "-- auto x86 fix\n"); + + if (dp != 53) { + warnings = 1; + + fprintf(stderr, "\n\nWARNING:\n\n"); + fprintf(stderr, "Nonstandard floating point precision.\n"); + fprintf(stderr, "IEEE standard is 53 bits.\n"); + } + +#if (defined(__sparc__) && !defined(__sparc_v8__) && \ + !defined(__sparcv8) && !defined(__sparc_v9__) && !defined(__sparcv9)) + + warnings = 1; + + fprintf(stderr, "\n\nWARNING:\n\n"); + fprintf(stderr, "If this Sparc is a Sparc-10 or later (so it has\n"); + fprintf(stderr, "a hardware integer multiply instruction) you\n"); + fprintf(stderr, "should specify the -mv8 option in the makefile\n"); + fprintf(stderr, "to obtain more efficient code.\n"); + +#endif + + if (dr && !GNUC_INTEL) { + warnings = 1; + fprintf(stderr, "\n\nWARNING:\n\n"); + fprintf(stderr, "This platform has extended double precision registers.\n"); + fprintf(stderr, "While that may sound like a good thing, it actually is not.\n"); + fprintf(stderr, "If this is a Pentium or other x86 and your compiler\n"); + fprintf(stderr, "is g++ or supports GNU 'asm' constructs, it is recommended\n"); + fprintf(stderr, "to compile NTL with the NTL_X86_FIX flag to get true IEEE floating point.\n"); + fprintf(stderr, "Set this flag by editing the file config.h.\n"); + fprintf(stderr, "The code should still work even if you don't set\n"); + fprintf(stderr, "this flag. See quad_float.txt for details.\n\n"); + } + + +#if 0 + + /* better not to be interactive */ + + if (warnings) { + int c; + + fprintf(stderr, "Do you want to continue anyway[y/n]? "); + c = getchar(); + if (c == 'n' || c == 'N') { + fprintf(stderr, "Make the necessary changes to the makefile and/or config.h,\n"); + fprintf(stderr, "then type 'make clobber' and then 'make'.\n\n\n"); + return 1; + } + } + +#endif + + f = fopen("mach_desc.h", "w"); + if (!f) { + fprintf(stderr, "can't open mach_desc.h for writing\n"); + return 1; + } + + fprintf(f, "#ifndef NTL_mach_desc__H\n"); + fprintf(f, "#define NTL_mach_desc__H\n\n\n"); + fprintf(f, "#define NTL_BITS_PER_LONG (%ld)\n", bpl); + fprintf(f, "#define NTL_MAX_LONG (%ldL)\n", ((long) ((1UL<<(bpl-1))-1UL))); + fprintf(f, "#define NTL_MAX_INT (%ld)\n", ((long) ((1UL<<(bpi-1))-1UL))); + fprintf(f, "#define NTL_BITS_PER_INT (%ld)\n", bpi); + fprintf(f, "#define NTL_BITS_PER_SIZE_T (%ld)\n", bpt); + fprintf(f, "#define NTL_ARITH_RIGHT_SHIFT (%ld)\n", rs_arith); + fprintf(f, "#define NTL_NBITS_MAX (%ld)\n", nbits); + fprintf(f, "#define NTL_WNBITS_MAX (%ld)\n", wnbits); + fprintf(f, "#define NTL_DOUBLE_PRECISION (%ld)\n", dp); + fprintf(f, "#define NTL_FDOUBLE_PRECISION "); + print2k(f, dp-1, bpl); + fprintf(f, "\n"); + + + if (ldp) { + fprintf(f, "#define NTL_LONGDOUBLE_OK (1)\n"); + fprintf(f, "#define NTL_LONGDOUBLE_PRECISION (%ld)\n", ldp); + fprintf(f, "#define NTL_WIDE_DOUBLE_LDP "); + print2k_WD(f, ldp-1, bpl); + fprintf(f, "\n"); + fprintf(f, "#define NTL_WIDE_DOUBLE_DP "); + print2k_WD(f, dp-1, bpl); + fprintf(f, "\n"); + } + else { + fprintf(f, "#define NTL_LONGDOUBLE_OK (0)\n"); + fprintf(f, "#define NTL_WIDE_DOUBLE_DP "); + print2k_WD(f, dp-1, bpl); + fprintf(f, "\n"); + } + + + fprintf(f, "#define NTL_QUAD_FLOAT_SPLIT ("); + print2k(f, dp - (dp/2), bpl); + fprintf(f, "+1.0)\n"); + fprintf(f, "#define NTL_EXT_DOUBLE (%ld)\n", dr); + + fprintf(f, "#define NTL_FMA_DETECTED (%ld)\n", fma_detected); + fprintf(f, "#define NTL_BIG_POINTERS (%ld)\n", big_pointers); + + fprintf(f, "#define NTL_MIN_LONG (-NTL_MAX_LONG - 1L)\n"); + fprintf(f, "#define NTL_MIN_INT (-NTL_MAX_INT - 1)\n"); + + + print_BB_mul_code(f, bpl); + print_BB_sqr_code(f, bpl); + print_BB_rev_code(f, bpl); + + + fprintf(f, "#endif\n\n"); + + fclose(f); + + fprintf(stderr, "\n\n"); + + return 0; +} diff --git a/thirdparty/linux/ntl/src/MakeDescAux.c b/thirdparty/linux/ntl/src/MakeDescAux.c new file mode 100644 index 0000000000..d343142f25 --- /dev/null +++ b/thirdparty/linux/ntl/src/MakeDescAux.c @@ -0,0 +1,56 @@ + +#include +using namespace std; + +int val_int(int x) { return x; } +unsigned int val_uint(unsigned int x) { return x; } + +long val_long(long x) { return x; } +unsigned long val_ulong(unsigned long x) { return x; } + +size_t val_size_t(size_t x) { return x; } + +double val_double(double x) { return x; } +long double val_ldouble(double x) { return x; } + +void touch_int(int* x) {} +void touch_uint(unsigned int* x) {} + +void touch_long(long* x) {} +void touch_ulong(unsigned long* x) {} + +void touch_size_t(size_t* x) {} + +void touch_double(double* x) {} +void touch_ldouble(long double* x) {} + +double sum_double(double *x, long n) +{ + long i; + double acc = 0; + + for (i = 0; i < n; i++) + acc += x[i]; + + return acc; +} + +double fma_test(double a, double b, double c) +{ + double t1 = a*b; + double t2 = t1 + c; + return t2; +} + +double power2(long k) +{ + long i; + double res; + + res = 1; + + for (i = 1; i <= k; i++) + res = res * 2; + + return res; +} diff --git a/thirdparty/linux/ntl/src/MakeGetPID b/thirdparty/linux/ntl/src/MakeGetPID new file mode 100644 index 0000000000..2f7ff0b849 --- /dev/null +++ b/thirdparty/linux/ntl/src/MakeGetPID @@ -0,0 +1,26 @@ + +if test -f GetPID.c +then + rm GetPID.c +fi + +echo "who am I?" + + +sh RemoveProg TestGetPID +echo $1 -o TestGetPID TestGetPID.c GetPID1.c $2 >> "CheckFeature.log" 2>&1 +$1 -o TestGetPID TestGetPID.c GetPID1.c $2 >> "CheckFeature.log" 2>&1 + +if test -f TestGetPID +then +cp GetPID1.c GetPID.c +echo "using GetPID1.c" +exit 0 +fi + + +cp GetPID2.c GetPID.c +echo "using GetPID2.c" +echo "warning: this GetPID function always returns 0" +exit 0 + diff --git a/thirdparty/linux/ntl/src/MakeGetTime b/thirdparty/linux/ntl/src/MakeGetTime new file mode 100644 index 0000000000..ad30a9fa20 --- /dev/null +++ b/thirdparty/linux/ntl/src/MakeGetTime @@ -0,0 +1,83 @@ + +if test -f GetTime.c +then + rm GetTime.c +fi + +echo "does anybody really know what time it is?" + + +sh RemoveProg TestGetTime +echo $1 -o TestGetTime TestGetTime.c GetTime1.c $2 >> "CheckFeature.log" 2>&1 +$1 -o TestGetTime TestGetTime.c GetTime1.c $2 >> "CheckFeature.log" 2>&1 + +if test -f TestGetTime +then +if ./TestGetTime 1 1048576 1048575 >> "CheckFeature.log" 2>&1 +then +cp GetTime1.c GetTime.c +echo "using GetTime1.c" +exit 0 +fi +fi + + +sh RemoveProg TestGetTime +echo $1 -o TestGetTime TestGetTime.c GetTime2.c $2 >> "CheckFeature.log" 2>&1 +$1 -o TestGetTime TestGetTime.c GetTime2.c $2 >> "CheckFeature.log" 2>&1 + +if test -f TestGetTime +then +if ./TestGetTime 1 1048576 1048575 >> "CheckFeature.log" 2>&1 +then +cp GetTime2.c GetTime.c +echo "using GetTime2.c" +exit 0 +fi +fi + +sh RemoveProg TestGetTime +echo $1 -o TestGetTime TestGetTime.c GetTime3.c $2 >> "CheckFeature.log" 2>&1 +$1 -o TestGetTime TestGetTime.c GetTime3.c $2 >> "CheckFeature.log" 2>&1 + +if test -f TestGetTime +then +if ./TestGetTime 1 1048576 1048575 >> "CheckFeature.log" 2>&1 +then +cp GetTime3.c GetTime.c +echo "using GetTime3.c" +exit 0 +fi +fi + +sh RemoveProg TestGetTime +echo $1 -o TestGetTime TestGetTime.c GetTime4.c $2 >> "CheckFeature.log" 2>&1 +$1 -o TestGetTime TestGetTime.c GetTime4.c $2 >> "CheckFeature.log" 2>&1 + + +if test -f TestGetTime +then +if ./TestGetTime 1 1048576 1048575 >> "CheckFeature.log" 2>&1 +then +cp GetTime4.c GetTime.c +echo "using GetTime4.c" +exit 0 +fi +fi + +sh RemoveProg TestGetTime +echo $1 -o TestGetTime TestGetTime.c GetTime5.c $2 >> "CheckFeature.log" 2>&1 +$1 -o TestGetTime TestGetTime.c GetTime5.c $2 >> "CheckFeature.log" 2>&1 + + +if test -f TestGetTime +then +cp GetTime5.c GetTime.c +echo "using GetTime5.c" +echo "warning: this GetTime function always returns 0" +exit 0 +else +echo "something is wrong..." +exit 1 +fi + diff --git a/thirdparty/linux/ntl/src/MatrixTest.c b/thirdparty/linux/ntl/src/MatrixTest.c new file mode 100644 index 0000000000..9c265fc18a --- /dev/null +++ b/thirdparty/linux/ntl/src/MatrixTest.c @@ -0,0 +1,58 @@ + +#include +#include +#include + +NTL_CLIENT + +int main() +{ + mat_ZZ B, X; + vec_ZZ v, w; + + cin >> B; + cin >> v; + + ZZ d; + + double t; + cerr << "matrix inverse..."; + t = GetTime(); + inv(d, X, B); + cerr << (GetTime()-t) << "\n"; + + cout << d << "\n"; + cout << X << "\n"; + + cout << "\n\n\n"; + + cerr << "hensel solve..."; + t = GetTime(); + HenselSolve1(d, w, B, v); + cerr << (GetTime()-t) << "\n"; + + cout << d << "\n"; + cout << w << "\n"; + + cout << "\n\n\n"; + + ZZX f; + + cerr << "char poly..."; + t = GetTime(); + CharPoly(f, B); + cerr << (GetTime()-t) << "\n"; + + cout << f << "\n"; + + cout << "\n\n\n"; + + cerr << "HNF..."; + t = GetTime(); + HNF(X, B, d); + cerr << (GetTime()-t) << "\n"; + + cout << X; + + return 0; +} diff --git a/thirdparty/linux/ntl/src/MatrixTestIn b/thirdparty/linux/ntl/src/MatrixTestIn new file mode 100644 index 0000000000..e3cb89851b --- /dev/null +++ b/thirdparty/linux/ntl/src/MatrixTestIn @@ -0,0 +1,13 @@ +[[927267 -895605 -866862 -733022 647694 -555086 970641 524600 582869 890322] +[-749289 -533762 -754674 -564542 874399 888872 860097 -801459 731651 -920001] +[-1008354 -839027 -531044 592717 543848 647360 641018 957632 893065 -813238] +[-750708 -783256 -868889 -649872 -807570 579545 840467 -734946 -720279 760893] +[648723 -1016200 -587545 -1025537 710862 987663 -1047329 -803105 910327 803227] +[-824476 -863571 -978793 -550626 -1000451 -780190 734624 -746905 620723 766901] +[-900849 -593349 686359 1031502 832388 835860 -1034307 975079 -541187 -935991] +[1015281 -971840 -970316 -851433 848978 -656104 -1044347 1014101 760024 -726970] +[674372 -809805 713198 896663 590902 -783974 -651080 627852 1008582 -681953] +[617949 -803220 -947289 786228 -540550 635343 -641246 536407 731378 -545576] +] + +[1 0 1 0 1 0 1 0 1 1] diff --git a/thirdparty/linux/ntl/src/MatrixTestOut b/thirdparty/linux/ntl/src/MatrixTestOut new file mode 100644 index 0000000000..71956a9050 --- /dev/null +++ b/thirdparty/linux/ntl/src/MatrixTestOut @@ -0,0 +1,35 @@ +113200891409702717792163966000355453253167532371751053300095404 +[[9509068573319153106464382357375229187086855611933097546 4271842113282984626360758539562365814114171429102170870 -17759702680062972863550010512920362656015043038991243880 41477187861721572946030655775707662755085404597108307160 -5243782742622400377848684512280147647926604264242557316 -57570474516915330032297900074382277948092226082851451198 -39691442023468050071303361769224768064783020685116735498 1368136574325345397683214068314548753685759225204791630 38882325913404281065622952126474660414786444511334629982 21658060525924366067499408569988309603526842936659482904] +[16472827960331056248258838159209192114875841064752417973 11055661592473954088842425168207875905160844070216428777 -16560334922191139189855127652632587481364130161107253300 -102245748658385867081609185652450631512925122490140100688 -6927806773133223877423268518425302220467823310751166148 28742921711161354730773870543456692789867725621589693379 14310736605755305933866385638819253964858157392606846769 -16014991501246590459421014324140905882300657048699826879 -81503308043560647324620876343032886277665012480242791525 19194058609377655982717031197566907183658664910378329254] +[-109734933934562270503406590168469216229765499568865356641 -79814977936376262987151168741382893862364782954378186677 133626969697143446178706879427077311304614297868917769972 149665059613481916985855546923739615387602354833519713620 41594932401764115975248510994547285258748104347283084044 -114293087661479606464120160022800358925423770750498443543 -131521483304858915292609610805812270795903190662579776593 27868073260247900348789264908353779013914728480966707387 126507466382585557035788800218240658029902198985527596649 -103983285416770288102268920890479984309962375465970108130] +[114179091124182582838159635859138807708117275312431988888 72039857186790938018866342099151764006275450407675210704 -135958647564984313864674762976737220386273609465893392876 -137836977113748623086616532582520232134322908706353110508 -39648958505403130424111817393832633444137482306335426792 98771210737273227760225008543985447809800217211255619424 138383817476246957183754907890374698153472269727036432232 -83531568830256521066176037083463270650302126963488770196 -59665850209941010245210238755312622861489834786890672540 104212488172405994217125642352348699856251768748581561844] +[111146907229695951696474016292279176346380873649543797504 78969317235163683361563886953576359244786258829455206460 -111284423156656542447821201425798324474500022987488315616 -128864395236737761888784942727936809950578458670337206372 -16132840533073216704190053885210668883271756416346952680 71936575188688673331139096261137395113789612697561557288 128028235167433369167870171440603042287857319552486125624 -41129844043092203151024600561803347120823946971906314680 -69716278056658512473084306613953260253123063966900954104 34043655739979891222203807318846715642905966692040868996] +[-48945544119598748666062390519512449564008849843608859415 -34927798213824985216852923656744552110662885021091481427 83872849154007409489147877549998397111409710950909271768 72378117750764994652170299394193193907241706518396524004 39167881720986278038950691657480446660498428518175599392 -81609637303540982160086561536058086094023691612652712665 -66440879526238756393022955803236662628829375990239458479 1111020514420903761458906973190717225987391378209788497 20190606337847249085182187793374022035135207033785218683 -14837054054733076096806488318719796374279547144082285446] +[4565965807956488541051226759685506863651410011624760868 3492181881011202418943817267527580110911157147323976900 39461193147298576391825815620320144432845605945305549448 56604265994474462404748264295892220624225863674908427124 -17328413096863500505028926621982940665124422268469920716 -48643771179225048379840010236577072580175169863772980140 -52021419276660697100984251779634298701037261640215712780 -9349417465113435540652912465753929600296084265070495752 38749631289242781437940266166713604911714618202430909116 -18933915915475912601355036112428373924790403131648561896] +[-41336383131401391642050443152991618920841425678015082424 -83981921540218745933168166698961730340818141341644355484 119619051324150226809528823887692424873092665569791069844 57359568975642082106242309019095290886689391528980785256 13601689001352389599706696797494222236044903367813347568 -59795682186578959961110105467067404963104249848788910772 -65397723614377571502938854693721405692307224823682592396 42592878104789608838213389518482176731590188510637180208 9920812157887076126172384077761201801497607298814386108 -45132518927593441552501636215301078568022033762084120792] +[-49861066123459998006862729910986171295203945428483340835 -35407425167837977411438500961193686418300361423162989791 92357206488938784494418325514759738413689140887758688444 -5657564760989076343828111309403153669939072934734661300 41671230880738697588228732473944374615246021756976674148 -10711124317120463934382132614621248039431313315612206405 -76224317951311690961135035362182609056216309727633080639 2637208115428887814017590413315049349698597330449554141 25858155834234711795118681073700474922768148144251824295 -25990119790243388449513847601985281200137341894872589166] +[61623531631064187432686831211234110772009391741788753961 -22095530335186280470348249242411284415648788206132842467 -20711755361024954539607118196944853953156730345410464400 -66746268941133150887857051757708016432030879372710964244 26099945908427902073513340584391341626437984932690918396 50975963872583708863130921554269663855103316623806906743 52201087513253571112258091209051975785803391414176589285 -45191083604629728008798145680113826847397615310533055439 -50146971408392286617891235891675861782320000106664060689 11541953231519243604644211417760923555459588623123390410] +] + + + +113200891409702717792163966000355453253167532371751053300095404 +[27249473184013512266407136541118635644159086017541712403 -50584592209942650462069456184321559526502345177792664705 115689488135636337213972690426493653067477248330091983968 46478284530817963216164361200291518714365212127753615988 70661072818371598049922919033409244304109328087891245876 -108305918613468066616370185132973898524229550691366617255 -119229339875612413145903997067198928544279071748882590601 -63796927162833233139985589317687726451218561510888812713 110134329954416532243495157079500276243728345769979945249 -81663651625066088258790377298297690632997722240667516882] + + + +[113200891409702717792163966000355453253167532371751053300095404 53416187193543401439887430820819294205975695546488797425 -123607612902745582703514051875473670204659819545865 -68402251288605920205399779446887509578119997 -40944447194334612294903085651515816465 -15153544924233995088343823419092 -22979394554373573500894590 -8722575497586664894 -6104870715982 413939 1] + + + +[[56600445704851358896081983000177726626583766185875526650047702 0 0 0 0 0 0 0 0 0] +[11465750313487428661702512911842922520691216342563125571848443 2 0 0 0 0 0 0 0 0] +[55231767316270678911483408554295434801191847687452448852191136 1 1 0 0 0 0 0 0 0] +[37908835104003064912155774912601500340365754030028784231954718 0 0 1 0 0 0 0 0 0] +[48615901178068930937993265623819971877392545512335892026898174 0 0 0 1 0 0 0 0 0] +[51840818598597540130642998688588606375873071993964789114593617 1 0 0 0 1 0 0 0 0] +[55361570761366016585700727191488280019398548582423308017837536 0 0 0 0 0 1 0 0 0] +[36090811750869350784108074019471312745428867031617225757665622 0 0 0 0 0 0 1 0 0] +[19232281111318501488032229905889533180588029146643536978782361 1 0 0 0 0 0 0 1 0] +[42496144125982001757158835167287232900763450071403701240310105 1 0 0 0 0 0 0 0 1] +] \ No newline at end of file diff --git a/thirdparty/linux/ntl/src/MoreFacTest.c b/thirdparty/linux/ntl/src/MoreFacTest.c new file mode 100644 index 0000000000..75b005c241 --- /dev/null +++ b/thirdparty/linux/ntl/src/MoreFacTest.c @@ -0,0 +1,66 @@ +#include + +NTL_CLIENT + +long NumFacs(const vec_pair_ZZX_long& v) +{ + long i; + long res; + + res = 0; + + for (i = 0; i < v.length(); i++) + res += v[i].b; + + return res; +} + + +int main() +{ + long cnt = 0; + while (SkipWhiteSpace(cin)) { + cnt++; + cerr << "."; + + vec_ZZ w; + ZZX f1, f; + long nfacs; + + cin >> w; + cin >> nfacs; + + long i, n; + n = w.length(); + f.rep.SetLength(n); + for (i = 0; i < n; i++) + f.rep[i] = w[n-1-i]; + f.normalize(); + + vec_pair_ZZX_long factors; + ZZ c; + + factor(c, factors, f, 0); + + + mul(f1, factors); + mul(f1, f1, c); + + if (f != f1) { + cerr << f << "\n"; + cerr << c << " " << factors << "\n"; + TerminalError("FACTORIZATION INCORRECT (1) !!!"); + } + + long nfacs1 = NumFacs(factors); + + if (nfacs1 != nfacs) + TerminalError("FACTORIZATION INCORRECT (2) !!!"); + } + + + cerr << "\n"; + cerr << "MoreFacTest OK\n"; + + return 0; +} diff --git a/thirdparty/linux/ntl/src/MoreFacTestIn b/thirdparty/linux/ntl/src/MoreFacTestIn new file mode 100644 index 0000000000..0b6e9519cf --- /dev/null +++ b/thirdparty/linux/ntl/src/MoreFacTestIn @@ -0,0 +1,1161 @@ +[ +1 +] +0 +[ +-1 +] +0 +[ +1234567890987654321 +] +0 +[ +1 +0 +] +1 +[ +1 +1 +] +1 +[ +1 +1234567890987654321 +] +1 +[ +1234567890987654321 +1 +] +1 +[ +1 +-1234567890987654321 +] +1 +[ +-1234567890987654321 +1 +] +1 +[ +1234567890987654321 +1234567890987654321 +] +1 +[ +-1234567890987654321 +-1234567890987654321 +] +1 +[ +1234500000 +6789000000 +] +1 +[ +1234500000 +-6789000000 +] +1 +[ +-1234500000 +6789000000 +] +1 +[ +-1234500000 +-6789000000 +] +1 +[ +1 +0 +1 +] +1 +[ +1 +0 +-1 +] +2 +[ +1 +1125899906842624 +1267650600228229401496703205376 +] +1 +[ +1267650600228229401496703205376 +1125899906842624 +1 +] +1 +[ +1 +0 +0 +] +2 +[ +-1 +0 +0 +] +2 +[ +1 +2 +1 +] +2 +[ +-9 +24 +-16 +] +2 +[ +1 +15716643102160534111758180 +61753217600172574271106391194796676956532699228100 +] +2 +[ +1 +23574964653240801167637270 +123506435200345148542212782389593353913065398456200 +] +2 +[ +123506435200345148542212782389593353913065398456200 +23574964653240801167637270 +1 +] +2 +[ +1267167468929855903854750250191966490209 +0 +-61753217600172574271106391194796676956532699228100 +] +2 +[ +35597295809230452047 +0 +-7858321551080267055879090 +] +1 +[ +1 +1 +0 +] +2 +[ +35597295809230452047 +7858321551080267055879090 +0 +] +2 +[ +35597295809230452047 +7858285953784457825427043 +-7858321551080267055879090 +] +2 +[ +1 +0 +279734996817854936178276161872067809674997229 +] +1 +[ +279734996817854936178276161872067809674997231 +0 +279734996817854936178276161872067809674997229 +] +1 +[ +348678440100000000000000000000 +291733167875766667063796853374976 +7979226629761200100000000000000000000 +] +1 +[ +120 +-720 +810 +] +2 +[ +1 +1 +-6 +] +2 +[ +-1 +-1 +6 +] +2 +[ +5 +19 +-4 +] +2 +[ +-5 +-19 +4 +] +2 +[ +42 +-1 +-1 +] +2 +[ +-42 +1 +1 +] +2 +[ +1 +0 +0 +0 +] +3 +[ +1 +-6 +12 +-8 +] +3 +[ +-8 +12 +-6 +1 +] +3 +[ +955593817727321453093807642925081991552428315714137911219172409259950196321 +2910517013546164872066111470835330246421044768278430000000000000000000000000 +2954919802742283328413552300000000000000000000000000000000000000000000000000 +1000000000000000000000000000000000000000000000000000000000000000000000000000 +] +3 +[ +1 +1 +0 +0 +] +3 +[ +3 +17 +21 +-9 +] +3 +[ +4787767769400 +-56802290702175 +190362236823900 +-125287863234300 +] +3 +[ +1 +6 +11 +6 +] +3 +[ +-6 +-11 +-6 +-1 +] +3 +[ +7 +48 +77 +-12 +] +3 +[ +219912 +657951 +-859605 +108732 +] +3 +[ +41606516273784 +76486500495993 +-30324946543980 +-57977141944800 +] +3 +[ +1 +2 +2 +1 +] +2 +[ +3 +8 +0 +-1 +] +2 +[ +1005406248 +-2867492493 +797202216 +805731303 +] +2 +[ +199 +-211 +55667264366753132299476956212541494125324448571 +-59024084328567391533616270155006307841424415319 +] +2 +[ +1 +6 +11 +279734996817854936178276161872067809674997236 +] +1 +[ +61753217600172574271106391194796676956532699228100 +-1 +-3813459883974263793031618518285054261914116064020745192095813534545111079038096283794818335829610000 +61753217600172574271106391194796676956532699228100 +] +3 +[ +1 +2 +-2 +-1 +] +2 +[ +1000000 +1000001 +-1000001 +-1000000 +] +2 +[ +16 +-96 +216 +-216 +81 +] +4 +[ +-16 +96 +-216 +216 +-81 +] +4 +[ +1 +0 +0 +0 +1 +] +1 +[ +2339825252947983196289989414023384155425975650625 +0 +0 +0 +1257565912098743428344355615835487157290616629841 +] +1 +[ +1 +0 +0 +0 +4 +] +2 +[ +2339825252947983196289989414023384155425975650625 +0 +0 +0 +5030263648394973713377422463341948629162466519364 +] +2 +[ +1 +0 +-2 +0 +9 +] +1 +[ +1606938044258990275541962092341162602522202993782792835301376 +0 +-1306637247000141812193380534316115641074287420945909743086143932738994282954752 +0 +2390525899882872924049031898322016641463101073880550463771174655651832418111719646949462291396009 +] +1 +[ +4 +-28 +61 +-42 +9 +] +4 +[ +78251668444685311269707371487528656450521930098767964925231152371415252349959400507672900 +34549072257044214828682415315874081146403635560456663308003943263668841080562387370131482083860 +3813459883661257119254482986600538873291451020849207247624613697324211759570921656132843895891782081 +-34549072257044214828682415315874081146403635560456663308003943263668841080562387370131482083860 +78251668444685311269707371487528656450521930098767964925231152371415252349959400507672900 +] +4 +[ +16 +-48 +64 +-64 +52 +-28 +12 +-4 +] +5 +[ +27648 +-857088 +11887488 +-97206208 +519469068 +-1897564792 +4798209187 +-8242846327 +8864358367 +-3955897247 +-3955897247 +8864358367 +-8242846327 +4798209187 +-1897564792 +519469068 +-97206208 +11887488 +-857088 +27648 +] +19 +[ +-24016 +-32548 +0 +211584 +284066 +0 +-53325 +307600 +384808 +469800 +0 +228140 +266152 +630450 +0 +21964 +0 +436050 +] +2 +[ +-190188 +-247212 +0 +0 +-376854 +-489846 +309744 +0 +0 +-264247 +133825 +-90985 +-118265 +208008 +0 +601324 +0 +148180 +] +1 +[ +-190188 +-247212 +0 +0 +-376854 +-489846 +309744 +0 +0 +-264247 +133825 +-90985 +-118265 +208008 +0 +601324 +0 +148180 +203796 +0 +50220 +] +2 +[ +-23 + 0 + 70 + 0 +-242 + 0 + 284 + 0 +-309 + 0 + 301 + 0 +-272 + 0 + 249 + 0 +-189 + 0 + 177 + 0 +-149 + 0 + 129 + 0 +-116 + 0 + 108 + 0 +-104 + 0 + 97 + 0 +-75 + 0 + 7 + 0 +-41 + 0 +-110 + 0 + 51 + 0 + 19 + 0 +-34 + 0 +-11 + 0 + 115 + 0 +-103 + 0 + 134 + 0 +-135 + 0 + 134 + 0 +-132 + 0 + 131 + 0 +-141 + 0 + 156 + 0 +-144 + 0 + 230 + 0 +-220 + 0 + 255 + 0 +-284 + 0 + 260 + 0 +-181 + 0 + 53 + 0 +-15 +] +3 +[ +-23 + 140883096 +-45 + 275640840 +-7 + 42877464 + 22 +-134757744 +-10 + 61253520 +-9 + 55128168 + 22 +-134757744 + 15 +-91880280 +] +2 +[ +1 +0 +-200000000000136 +0 +27200000000006477 +0 +-1295200000000142048 +0 +28382400000001519810 +0 +-302666800000007595088 +0 +1490635200000015464098 +0 +-2790152800000013050016 +0 +1119368000000013996989 +0 +-9245000000005596840 +0 +46225 +] +2 +[ +1 +0 +-256 +0 +-999999999999999999999971968 +0 +255999999999999999999998270976 +0 +-28031999999999999999999932845823 +0 +1729023999999999999999998267416320 +0 +-67154175999999999999999969586180736 +0 + 1732583423999999999999999635453582848 +0 +-30413791231999999999999997060769836800 +0 + 364544688127999999999999984700590055424 +0 +-2939163009023999999999999952829480067072 +0 + 15297677361151999999999999933076097564672 +0 +-47140106141695999999999999994797371400192 +0 + 66559357747200000000000000178002258296832 +0 +-2263465590783999999999999325149651795968 +0 +-193299935657983999999999998732715790696448 +0 +-627710242062335999999999999173537662238720 +0 +-1333843567050751999999999999806700064342016 +0 +-824198872170495999999999999372289757937664 +0 + 1333843567050752 +0 + 824198872170496 +] +2 +[ +1 +0 +-256 +0 +28032 +0 +-1729024 +0 +67154176 +0 +-1732583424 +0 +30413791232 +0 +-364544688128 +0 +2939163009024 +0 +-15297677361152 +0 +47140106141696 +0 +-66559357747200 +0 +2263465590784 +0 +193299935657984 +0 +627710242062336 +0 +1333843567050752 +0 +824198872170496 +] +1 +[ +1 +0 +-456 +0 +80684 +0 +-7580440 +0 +426504758 +0 +-15173272376 +0 +348965414828 +0 +-5222990961896 +0 +51398572708049 +0 +-342604051286656 +0 +1550317051426208 +0 +-4831933058958336 +0 +10198629967583488 +0 +-15932586425303040 +0 +20876094152884224 +0 +-13926929280270336 +0 +316045337296896 +] +2 +[ +1 +0 +0 +0 +-456 +0 +0 +0 +80684 +0 +0 +0 +-7580440 +0 +0 +0 +426504758 +0 +0 +0 +-15173272376 +0 +0 +0 +348965414828 +0 +0 +0 +-5222990961896 +0 +0 +0 +51398572708049 +0 +0 +0 +-342604051286656 +0 +0 +0 +1550317051426208 +0 +0 +0 +-4831933058958336 +0 +0 +0 +10198629967583488 +0 +0 +0 +-15932586425303040 +0 +0 +0 +20876094152884224 +0 +0 +0 +-13926929280270336 +0 +0 +0 +316045337296896 +] +2 +[ +-1 + 0 + 0 + 0 + 208 + 0 +-432 + 0 +-14944 + 0 + 63520 + 0 + 411752 + 0 +-3169952 + 0 + 32544 + 0 + 60405744 + 0 +-193237168 + 0 +-153920768 + 0 + 2515813476 + 0 +-6391147264 + 0 + 789453360 + 0 + 36373780048 + 0 +-103370680864 + 0 + 114447129888 + 0 + 85495132376 + 0 +-537480834720 + 0 + 952639088992 + 0 +-815573687440 + 0 +-94716509008 + 0 + 1280699815936 + 0 +-1830275916102 + 0 + 1280699815936 + 0 +-94716509008 + 0 +-815573687440 + 0 + 952639088992 + 0 +-537480834720 + 0 + 85495132376 + 0 + 114447129888 + 0 +-103370680864 + 0 + 36373780048 + 0 + 789453360 + 0 +-6391147264 + 0 + 2515813476 + 0 +-153920768 + 0 +-193237168 + 0 + 60405744 + 0 + 32544 + 0 +-3169952 + 0 + 411752 + 0 + 63520 + 0 +-14944 + 0 +-432 + 0 + 208 + 0 + 0 + 0 +-1 +] +1 +[ + 1 + 0 + 0 + 0 +-288 + 0 + 592 + 0 + 27660 + 0 +-115824 + 0 +-1058592 + 0 + 7626336 + 0 + 8880418 + 0 +-215487264 + 0 + 488513248 + 0 + 2157251280 + 0 +-13734423844 + 0 + 15735635856 + 0 + 97082316256 + 0 +-437410264064 + 0 + 547254790511 + 0 + 1350779531008 + 0 +-7133429310144 + 0 + 13591562590752 + 0 +-7381113216744 + 0 +-29077015274080 + 0 + 97676724196928 + 0 +-169734262965824 + 0 + 200990761577180 + 0 +-169734262965824 + 0 + 97676724196928 + 0 +-29077015274080 + 0 +-7381113216744 + 0 + 13591562590752 + 0 +-7133429310144 + 0 + 1350779531008 + 0 + 547254790511 + 0 +-437410264064 + 0 + 97082316256 + 0 + 15735635856 + 0 +-13734423844 + 0 + 2157251280 + 0 + 488513248 + 0 +-215487264 + 0 + 8880418 + 0 + 7626336 + 0 +-1058592 + 0 +-115824 + 0 + 27660 + 0 + 592 + 0 +-288 + 0 + 0 + 0 + 1 +] +2 +[ +10000000000 +0 +10000000000 +0 +4700000000 +0 +1380000000 +0 +282000000 +0 +42200000 +0 +4740000 +0 +402000 +0 +24900 +0 +1080 +0 +27 +] +4 +[ + 1 + 0 +-1 + 0 +-2 + 0 + 9 + 0 +-10 + 0 +-20 + 0 + 89 + 0 + 112 + 0 +-409 + 0 + 460 + 0 + 918 + 0 +-4072 + 0 + 4520 + 0 + 9058 + 0 + 4171 + 0 + 327 + 0 + 770 + 0 + 871 + 0 +-790 + 0 +-440 + 0 + 467 + 0 + 300 + 0 +-119 + 0 +-44 + 0 + 110 + 0 + 20 + 0 +-36 + 0 + 10 + 0 + 29 + 0 + 1 + 0 +-10 + 0 + 1 + 0 + 2 + 0 + 0 + 0 + 1 + 0 + 2 + 0 + 1 +] +2 diff --git a/thirdparty/linux/ntl/src/MulTimeTest.c b/thirdparty/linux/ntl/src/MulTimeTest.c new file mode 100644 index 0000000000..34751aba3f --- /dev/null +++ b/thirdparty/linux/ntl/src/MulTimeTest.c @@ -0,0 +1,161 @@ + +#include + +#include + +NTL_CLIENT + + +double clean_data(double *t) +{ + double x, y, z; + long i, ix, iy, n; + + x = t[0]; ix = 0; + y = t[0]; iy = 0; + + for (i = 1; i < 5; i++) { + if (t[i] < x) { + x = t[i]; + ix = i; + } + if (t[i] > y) { + y = t[i]; + iy = i; + } + } + + z = 0; n = 0; + for (i = 0; i < 5; i++) { + if (i != ix && i != iy) z+= t[i], n++; + } + + z = z/n; + + return z; +} + +void print_flag() +{ + +#if defined(NTL_LONG_LONG) +printf("LONG_LONG\n"); +#elif defined(NTL_AVOID_FLOAT) +printf("AVOID_FLOAT\n"); +#else +printf("DEFAULT\n"); +#endif + +} + + +int main() +{ + +#ifdef NTL_LONG_LONG + + + if (sizeof(NTL_LL_TYPE) < 2*sizeof(long)) { + printf("999999999999999 "); + print_flag(); + return 0; + } + +#endif + + SetSeed(ZZ(0)); + + long i, k; + + + k = 10*NTL_ZZ_NBITS; + + for (i = 0; i < 10000; i++) { + ZZ a, b, c, d; + long da = RandomBnd(k); + long db = RandomBnd(k); + long dc = RandomBnd(k); + long dd = RandomBnd(k); + RandomLen(a, da); RandomLen(b, db); RandomLen(c, dc); RandomLen(d, dd); + + if ((a + b)*(c + d) != c*a + d*a + c*b + d*b) { + printf("999999999999999 "); + print_flag(); + return 0; + } + } + + + + for (i = 0; i < 10000; i++) { + ZZ a, b, c; + + long da = RandomBnd(k); + long db = RandomBnd(k); + long dc = RandomBnd(k) + 2; + + RandomLen(a, da); RandomLen(b, db); RandomLen(c, dc); + + if ( ( a * b ) % c != ((a % c) * (b % c)) % c ) { + printf("999999999999999 "); + print_flag(); + return 0; + } + } + + k = 1024; + + ZZ x1, x2, x3; + double t; + long j; + + RandomLen(x1, k); + RandomLen(x2, k); + + + long iter; + + mul(x3, x1, x2); + + iter = 1; + + do { + t = GetTime(); + for (i = 0; i < iter; i++) { + for (j = 0; j < 500; j++) mul(x3, x1, x2); + } + t = GetTime() - t; + iter = 2*iter; + } while(t < 1); + + + iter = iter/2; + iter = long((3/t)*iter) + 1; + + double tvec[5]; + long w; + + for (w = 0; w < 5; w++) { + t = GetTime(); + for (i = 0; i < iter; i++) { + for (j = 0; j < 500; j++) mul(x3, x1, x2); + } + t = GetTime() - t; + tvec[w] = t; + } + + t = clean_data(tvec); + + t = floor((t/iter)*1e14); + + if (t < 0 || t >= 1e15) + printf("999999999999999 "); + else + printf("%015.0f ", t); + + printf(" [%ld] ", iter); + + print_flag(); + + return 0; +} diff --git a/thirdparty/linux/ntl/src/NOTES b/thirdparty/linux/ntl/src/NOTES new file mode 100644 index 0000000000..a3cf532783 --- /dev/null +++ b/thirdparty/linux/ntl/src/NOTES @@ -0,0 +1,49 @@ + +===================================== + +FIXME: maybe it would make more sense to take the +1/-1 logic +out of [cg]_lip_impl block_construct routines and just put it in +the caller: the ZZ_p and ZZVec BlockConstruct stuff: add 1 there... +that would mean in the ZZ_p ConstructFromVec we don't do the -1 either... +not critical... + +FIXME: maybe add a reserve method to Vec, for compatibility +with STL vectors. + + +TODO list: + * allow 32-bit SP MulMod...this would allow GPU/SSE + support...both this and previous would require + a complete scan to introduce new, special types + * add template functions clear(), to clear multiple + entries in a Vec or Poly. The important thing is + to provide specialized ones for Vec and GF2X. + + + +----------------------------- +These are basically notes to myself on preparing a new +distribution of NTL. +----------------------------- + +- make sure I update tour-time.html + +- update ../README and ../doc/copying.txt + +- change version numbers in ../include/NTL/version.h, DIRNAME, and WINDIR + +- change the libtool soname in VERSION_INFO. See: + +http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html + +- if changes were made to makefile or ../include/NTL/config.h, + make sure these changes are implemented in the template files + mfile and cfile, and then run: + +export COPYFILE_DISABLE=1 +make ppdoc +make ppclean +make package +make winpack + + diff --git a/thirdparty/linux/ntl/src/Poly1TimeTest.c b/thirdparty/linux/ntl/src/Poly1TimeTest.c new file mode 100644 index 0000000000..1248f94ab6 --- /dev/null +++ b/thirdparty/linux/ntl/src/Poly1TimeTest.c @@ -0,0 +1,237 @@ + +#include +#include + +#include + +NTL_CLIENT + + +double clean_data(double *t) +{ + double x, y, z; + long i, ix, iy, n; + + x = t[0]; ix = 0; + y = t[0]; iy = 0; + + for (i = 1; i < 5; i++) { + if (t[i] < x) { + x = t[i]; + ix = i; + } + if (t[i] > y) { + y = t[i]; + iy = i; + } + } + + z = 0; n = 0; + for (i = 0; i < 5; i++) { + if (i != ix && i != iy) z+= t[i], n++; + } + + z = z/n; + + return z; +} + + +void print_flag() +{ + +#if defined(NTL_FFT_LAZYMUL) +printf("FFT_LAZYMUL "); +#endif + +#if defined(NTL_SPMM_ULL) +printf("SPMM_ULL "); +#elif defined(NTL_SPMM_ASM) +printf("SPMM_ASM "); +#endif + +#if defined(NTL_AVOID_BRANCHING) +printf("AVOID_BRANCHING "); +#endif + +#if defined(NTL_FFT_BIGTAB) +printf("FFT_BIGTAB "); +#endif + +printf("\n"); + +} + + +int main() +{ + +#ifdef NTL_SPMM_ULL + + if (sizeof(NTL_ULL_TYPE) < 2*sizeof(long)) { + printf("999999999999999 "); + print_flag(); + return 0; + } + +#endif + + SetSeed(ZZ(0)); + + + long n, k; + + n = 200; + k = 10*NTL_ZZ_NBITS; + + ZZ p; + + RandomLen(p, k); + if (!IsOdd(p)) p++; + + + ZZ_p::init(p); // initialization + + ZZ_pX f, g, h, r1, r2, r3; + + random(g, n); // g = random polynomial of degree < n + random(h, n); // h = " " + random(f, n); // f = " " + + SetCoeff(f, n); // Sets coefficient of X^n to 1 + + + // For doing arithmetic mod f quickly, one must pre-compute + // some information. + + ZZ_pXModulus F; + build(F, f); + + PlainMul(r1, g, h); // this uses classical arithmetic + PlainRem(r1, r1, f); + + MulMod(r2, g, h, F); // this uses the FFT + + MulMod(r3, g, h, f); // uses FFT, but slower + + // compare the results... + + if (r1 != r2) { + printf("999999999999999 "); + print_flag(); + return 0; + } + else if (r1 != r3) { + printf("999999999999999 "); + print_flag(); + return 0; + } + + double t; + long i, j; + long iter; + + const int nprimes = 30; + const long L = 12; + const long N = 1L << L; + long r; + + + for (r = 0; r < nprimes; r++) UseFFTPrime(r); + + vec_long A1[nprimes], A2[nprimes]; + vec_long B1[nprimes], B2[nprimes]; + + for (r = 0; r < nprimes; r++) { + A1[r].SetLength(N); + A2[r].SetLength(N); + B1[r].SetLength(N); + B2[r].SetLength(N); + + for (i = 0; i < N; i++) { + A1[r][i] = RandomBnd(GetFFTPrime(r)); + A2[r][i] = RandomBnd(GetFFTPrime(r)); + } + } + + for (r = 0; r < nprimes; r++) { + long *A1p = A1[r].elts(); + long *A2p = A2[r].elts(); + long *B1p = B1[r].elts(); + long *B2p = B2[r].elts(); + long q = GetFFTPrime(r); + mulmod_t qinv = GetFFTPrimeInv(r); + + FFTFwd(B1p, A1p, L, r); + FFTFwd(B2p, A2p, L, r); + for (i = 0; i < N; i++) B1p[i] = NormalizedMulMod(B1p[i], B2p[i], q, qinv); + FFTRev1(B1p, B1p, L, r); + } + + iter = 1; + + do { + t = GetTime(); + for (j = 0; j < iter; j++) { + for (r = 0; r < nprimes; r++) { + long *A1p = A1[r].elts(); + long *A2p = A2[r].elts(); + long *B1p = B1[r].elts(); + long *B2p = B2[r].elts(); + long q = GetFFTPrime(r); + mulmod_t qinv = GetFFTPrimeInv(r); + + FFTFwd(B1p, A1p, L, r); + FFTFwd(B2p, A2p, L, r); + for (i = 0; i < N; i++) B1p[i] = NormalizedMulMod(B1p[i], B2p[i], q, qinv); + FFTRev1(B1p, B1p, L, r); + } + } + t = GetTime() - t; + iter = 2*iter; + } while(t < 1); + + iter = iter/2; + + iter = long((3/t)*iter) + 1; + + + double tvec[5]; + long w; + + for (w = 0; w < 5; w++) { + t = GetTime(); + for (j = 0; j < iter; j++) { + for (r = 0; r < nprimes; r++) { + long *A1p = A1[r].elts(); + long *A2p = A2[r].elts(); + long *B1p = B1[r].elts(); + long *B2p = B2[r].elts(); + long q = GetFFTPrime(r); + mulmod_t qinv = GetFFTPrimeInv(r); + + FFTFwd(B1p, A1p, L, r); + FFTFwd(B2p, A2p, L, r); + for (i = 0; i < N; i++) B1p[i] = NormalizedMulMod(B1p[i], B2p[i], q, qinv); + FFTRev1(B1p, B1p, L, r); + } + } + t = GetTime() - t; + tvec[w] = t; + } + + t = clean_data(tvec); + + t = floor((t/iter)*1e13); + + if (t < 0 || t >= 1e15) + printf("999999999999999 "); + else + printf("%015.0f ", t); + + printf(" [%ld] ", iter); + + print_flag(); + + return 0; +} diff --git a/thirdparty/linux/ntl/src/Poly2TimeTest.c b/thirdparty/linux/ntl/src/Poly2TimeTest.c new file mode 100644 index 0000000000..605f1c7bdc --- /dev/null +++ b/thirdparty/linux/ntl/src/Poly2TimeTest.c @@ -0,0 +1,186 @@ + +#include + +#include + +NTL_CLIENT + + +double clean_data(double *t) +{ + double x, y, z; + long i, ix, iy, n; + + x = t[0]; ix = 0; + y = t[0]; iy = 0; + + for (i = 1; i < 5; i++) { + if (t[i] < x) { + x = t[i]; + ix = i; + } + if (t[i] > y) { + y = t[i]; + iy = i; + } + } + + z = 0; n = 0; + for (i = 0; i < 5; i++) { + if (i != ix && i != iy) z+= t[i], n++; + } + + z = z/n; + + return z; +} + +void print_flag() +{ + + +#if (defined(NTL_TBL_REM)) +printf("TBL_REM "); +#elif (defined(NTL_TBL_REM_LL)) +printf("TBL_REM_LL "); +#else +printf("DEFAULT "); +#endif + + +printf("\n"); + +} + + +int main() +{ + +#if (defined(NTL_TBL_REM) && defined(NTL_GMP_LIP) && !(defined(NTL_HAVE_LL_TYPE) && NTL_ZZ_NBITS == NTL_BITS_PER_LONG)) + { + printf("999999999999999 "); + print_flag(); + return 0; + } + +#endif + +#if (defined(NTL_TBL_REM_LL) && !defined(NTL_GMP_LIP) && !defined(NTL_HAVE_LL_TYPE)) + { + printf("999999999999999 "); + print_flag(); + return 0; + } + +#endif + + SetSeed(ZZ(0)); + + long n, k; + + n = 200; + k = 10*NTL_ZZ_NBITS; + + ZZ p; + + RandomLen(p, k); + if (!IsOdd(p)) p++; + + + ZZ_p::init(p); // initialization + + ZZ_pX f, g, h, r1, r2, r3; + + random(g, n); // g = random polynomial of degree < n + random(h, n); // h = " " + random(f, n); // f = " " + + SetCoeff(f, n); // Sets coefficient of X^n to 1 + + // For doing arithmetic mod f quickly, one must pre-compute + // some information. + + ZZ_pXModulus F; + build(F, f); + + PlainMul(r1, g, h); // this uses classical arithmetic + PlainRem(r1, r1, f); + + MulMod(r2, g, h, F); // this uses the FFT + + MulMod(r3, g, h, f); // uses FFT, but slower + + // compare the results... + + if (r1 != r2) { + printf("999999999999999 "); + print_flag(); + return 0; + } + else if (r1 != r3) { + printf("999999999999999 "); + print_flag(); + return 0; + } + + double t; + long i; + long iter; + + n = 1024; + k = 1600; + RandomLen(p, k); + if (!IsOdd(p)) p++; + + ZZ_p::init(p); + + ZZ_pX a; + random(a, n); + long da = deg(a); + + ZZ_pXModRep modrep; + ToZZ_pXModRep(modrep, a, 0, da); + + iter = 1; + + do { + t = GetTime(); + for (i = 0; i < iter; i++) { + ToZZ_pXModRep(modrep, a, 0, da); + } + t = GetTime() - t; + iter = 2*iter; + } while(t < 1); + + iter = iter/2; + + iter = long((3/t)*iter) + 1; + + double tvec[5]; + long w; + + for (w = 0; w < 5; w++) { + t = GetTime(); + for (i = 0; i < iter; i++) { + ToZZ_pXModRep(modrep, a, 0, da); + } + t = GetTime() - t; + tvec[w] = t; + } + + + t = clean_data(tvec); + + t = floor((t/iter)*1e12); + + if (t < 0 || t >= 1e15) + printf("999999999999999 "); + else + printf("%015.0f ", t); + + printf(" [%ld] ", iter); + + print_flag(); + + return 0; +} diff --git a/thirdparty/linux/ntl/src/Poly3TimeTest.c b/thirdparty/linux/ntl/src/Poly3TimeTest.c new file mode 100644 index 0000000000..8a814bd3d1 --- /dev/null +++ b/thirdparty/linux/ntl/src/Poly3TimeTest.c @@ -0,0 +1,188 @@ + +#include + +#include + +NTL_CLIENT + + +double clean_data(double *t) +{ + double x, y, z; + long i, ix, iy, n; + + x = t[0]; ix = 0; + y = t[0]; iy = 0; + + for (i = 1; i < 5; i++) { + if (t[i] < x) { + x = t[i]; + ix = i; + } + if (t[i] > y) { + y = t[i]; + iy = i; + } + } + + z = 0; n = 0; + for (i = 0; i < 5; i++) { + if (i != ix && i != iy) z+= t[i], n++; + } + + z = z/n; + + return z; +} + +void print_flag() +{ + + +#if (defined(NTL_CRT_ALTCODE)) +printf("CRT_ALTCODE "); +#else +printf("DEFAULT "); +#endif + + +printf("\n"); + +} + + +int main() +{ + +#if (defined(NTL_CRT_ALTCODE) && !(defined(NTL_HAVE_LL_TYPE) && NTL_ZZ_NBITS == NTL_BITS_PER_LONG)) + + { + printf("999999999999999 "); + print_flag(); + return 0; + } + + +#endif + + SetSeed(ZZ(0)); + + long n, k; + + n = 1024; + k = 30*NTL_SP_NBITS; + + ZZ p; + + RandomLen(p, k); + if (!IsOdd(p)) p++; + + + ZZ_p::init(p); // initialization + + ZZ_pX f, g, h, r1, r2, r3; + + random(g, n); // g = random polynomial of degree < n + random(h, n); // h = " " + random(f, n); // f = " " + + SetCoeff(f, n); // Sets coefficient of X^n to 1 + + // For doing arithmetic mod f quickly, one must pre-compute + // some information. + + ZZ_pXModulus F; + build(F, f); + + PlainMul(r1, g, h); // this uses classical arithmetic + PlainRem(r1, r1, f); + + MulMod(r2, g, h, F); // this uses the FFT + + MulMod(r3, g, h, f); // uses FFT, but slower + + // compare the results... + + if (r1 != r2) { + printf("999999999999999 "); + print_flag(); + return 0; + } + else if (r1 != r3) { + printf("999999999999999 "); + print_flag(); + return 0; + } + + double t; + long i; + long iter; + + ZZ_pX a, b, c; + random(a, n); + random(b, n); + long da = deg(a); + long db = deg(b); + long dc = da + db; + long l = NextPowerOfTwo(dc+1); + + FFTRep arep, brep, crep; + ToFFTRep(arep, a, l, 0, da); + ToFFTRep(brep, b, l, 0, db); + + mul(crep, arep, brep); + + ZZ_pXModRep modrep; + FromFFTRep(modrep, crep); + + FromZZ_pXModRep(c, modrep, 0, dc); + + iter = 1; + + do { + t = GetTime(); + for (i = 0; i < iter; i++) { + FromZZ_pXModRep(c, modrep, 0, dc); + } + t = GetTime() - t; + iter = 2*iter; + } while(t < 1); + + iter = iter/2; + + iter = long((3/t)*iter) + 1; + + double tvec[5]; + long w; + + for (w = 0; w < 5; w++) { + t = GetTime(); + for (i = 0; i < iter; i++) { + FromZZ_pXModRep(c, modrep, 0, dc); + } + t = GetTime() - t; + tvec[w] = t; + } + + + t = clean_data(tvec); + + t = floor((t/iter)*1e12); + + // The following is just to test some tuning Wizard logic -- + // be sure to get rid of this!! +#if (defined(NTL_CRT_ALTCODE)) + // t *= 1.12; +#endif + + if (t < 0 || t >= 1e15) + printf("999999999999999 "); + else + printf("%015.0f ", t); + + printf(" [%ld] ", iter); + + print_flag(); + + return 0; +} diff --git a/thirdparty/linux/ntl/src/QuadTest.c b/thirdparty/linux/ntl/src/QuadTest.c new file mode 100644 index 0000000000..20cd6caf18 --- /dev/null +++ b/thirdparty/linux/ntl/src/QuadTest.c @@ -0,0 +1,108 @@ + +#include + +NTL_CLIENT + +int main() +{ + quad_float a, b, c, d; + + quad_float::SetOutputPrecision(25); + + if (PrecisionOK()) + cout << "Precision OK\n"; + else + cout << "Precision not OK\n"; + + + cin >> a; + cout << a << "\n"; + + cin >> b; + cout << b << "\n"; + + c = a + b; + d = a; + d += b; + cout << c << "\n"; + cout << d << "\n"; + + c = a - b; + d = a; + d -= b; + cout << c << "\n"; + cout << d << "\n"; + + c = a * b; + d = a; + d *= b; + cout << c << "\n"; + cout << d << "\n"; + + c = a / b; + d = a; + d /= b; + cout << c << "\n"; + cout << d << "\n"; + + c = -a; + cout << c << "\n"; + + c = sqrt(a); + cout << c << "\n"; + + power(c, to_quad_float(10), 20); + cout << c << "\n"; + + { + + long n, n1; + int shamt = min(NTL_BITS_PER_LONG,2*NTL_DOUBLE_PRECISION); + + n = to_long((1UL << (shamt-1)) - 1UL); + c = to_quad_float(n); + n1 = to_long(c); + + if (n1 == n) + cout << "long conversion OK\n"; + else + cout << "long conversion not OK\n"; + + n = to_long(1UL << (shamt-1)); + c = to_quad_float(n); + n1 = to_long(c); + + if (n1 == n) + cout << "long conversion OK\n"; + else + cout << "long conversion not OK\n"; + + } + + { + + unsigned long n; + ZZ n1; + int shamt = min(NTL_BITS_PER_LONG,2*NTL_DOUBLE_PRECISION); + + n = (1UL << (shamt-1)) - 1UL; + c = to_quad_float(n); + n1 = to_ZZ(c); + + if (n1 == to_ZZ(n)) + cout << "ulong conversion OK\n"; + else + cout << "ulong conversion not OK\n"; + + n = 1UL << (shamt-1); + c = to_quad_float(n); + n1 = to_ZZ(c); + + if (n1 == to_ZZ(n)) + cout << "ulong conversion OK\n"; + else + cout << "ulong conversion not OK\n"; + + } + +} diff --git a/thirdparty/linux/ntl/src/QuadTestIn b/thirdparty/linux/ntl/src/QuadTestIn new file mode 100644 index 0000000000..1e1e0a092a --- /dev/null +++ b/thirdparty/linux/ntl/src/QuadTestIn @@ -0,0 +1,2 @@ +1.333333333333333333333333 +2.555555555555555555555555 diff --git a/thirdparty/linux/ntl/src/QuadTestOut b/thirdparty/linux/ntl/src/QuadTestOut new file mode 100644 index 0000000000..929fe735b5 --- /dev/null +++ b/thirdparty/linux/ntl/src/QuadTestOut @@ -0,0 +1,18 @@ +Precision OK +1.333333333333333333333333 +2.555555555555555555555555 +3.888888888888888888888888 +3.888888888888888888888888 +-1.222222222222222222222222 +-1.222222222222222222222222 +3.407407407407407407407406 +3.407407407407407407407406 +0.5217391304347826086956522 +0.5217391304347826086956522 +-1.333333333333333333333333 +1.154700538379251529018297 +0.1e21 +long conversion OK +long conversion OK +ulong conversion OK +ulong conversion OK diff --git a/thirdparty/linux/ntl/src/QuickTest.c b/thirdparty/linux/ntl/src/QuickTest.c new file mode 100644 index 0000000000..7e6f2b283f --- /dev/null +++ b/thirdparty/linux/ntl/src/QuickTest.c @@ -0,0 +1,478 @@ + +#include +#include +#include +#include + +#include + +NTL_CLIENT + + +#define make_string_aux(x) #x +#define make_string(x) make_string_aux(x) + +int SmallModulusTest(long p, long n) +{ + zz_pBak bak; + + bak.save(); + + + zz_p::init(p); + + zz_pX a, b, c, cc; + + random(a, n); + random(b, n); + PlainMul(c, a, b); + FFTMul(cc, a, b); + + int res; + res = (c != cc); + + bak.restore(); + + return res; +} + + +int GF2X_test() +{ + GF2X a, b, c, c1; + + long n; + +#ifdef NTL_GF2X_LIB + for (n = 32; n <= (1L << 18); n = n << 1) { + random(a, n); + random(b, n); + OldMul(c, a, b); + mul(c1, a, b); + if (c1 != c) return 1; + } +#endif + + return 0; +} + +void GF2X_time() +{ + long n = 1000000L; + long iter; + + GF2X a, b, c; + + double t; + long i; + + random(a, n); + random(b, n); + + mul(c, a, b); + + iter = 0; + do { + iter = iter ? (2*iter) : 1; + t = GetTime(); + for (i = 0; i < iter; i++) + mul(c, a, b); + t = GetTime() - t; + } while (t < 0.5); + + cerr << "time to multiply polynomials over GF(2) \n of degree < 1000000: " + << (t/iter) << "s\n"; + +#ifdef NTL_GF2X_LIB + OldMul(c, a, b); + + iter = 0; + do { + iter = iter ? (2*iter) : 1; + t = GetTime(); + for (i = 0; i < iter; i++) + OldMul(c, a, b); + t = GetTime() - t; + } while (t < 0.5); + + cerr << " **** using old code: " << (t/iter) << "s\n"; +#endif + +} + + + +ZZX KarMul(const ZZX& a, const ZZX& b) +{ + ZZX res; + KarMul(res, a, b); + return res; +} + + + +int main() +{ + SetSeed(ZZ(0)); + + + cerr << "This is NTL version " << NTL_VERSION << "\n"; + + cerr << "Hardware charactersitics:\n"; + cerr << "NTL_BITS_PER_LONG = " << NTL_BITS_PER_LONG << "\n"; + cerr << "NTL_ZZ_NBITS = " << NTL_ZZ_NBITS << "\n"; + cerr << "NTL_SP_NBITS = " << NTL_SP_NBITS << "\n"; + +#ifdef NTL_HAVE_LL_TYPE + cerr << "NTL_HAVE_LL_TYPE\n"; +#endif + +#ifdef NTL_HAVE_BUILTIN_CLZL + cerr << "NTL_HAVE_BUILTIN_CLZL\n"; +#endif + +#ifdef NTL_HAVE_AVX + cerr << "NTL_HAVE_AVX\n"; +#endif + +#ifdef NTL_HAVE_FMA + cerr << "NTL_HAVE_FMA\n"; +#endif + + + +#ifdef NTL_LONGDOUBLE_SP_MULMOD + cerr << "NTL_LONGDOUBLE_SP_MULMOD\n"; +#endif + +#ifdef NTL_LONGLONG_SP_MULMOD + cerr << "NTL_LONGLONG_SP_MULMOD\n"; +#endif + + cerr << "\n"; + + + + + cerr << "Basic Configuration Options:\n"; + + + +#ifdef NTL_LEGACY_NO_NAMESPACE + cerr << "NTL_LEGACY_NO_NAMESPACE\n"; +#endif + + +#ifdef NTL_LEGACY_INPUT_ERROR + cerr << "NTL_LEGACY_INPUT_ERROR\n"; +#endif + + +#ifdef NTL_THREADS + cerr << "NTL_THREADS\n"; +#endif + +#ifdef NTL_DISABLE_TLS_HACK + cerr << "NTL_DISABLE_TLS_HACK\n"; +#endif + +#ifdef NTL_ENABLE_TLS_HACK + cerr << "NTL_ENABLE_TLS_HACK\n"; +#endif + + +#ifdef NTL_EXCEPTIONS + cerr << "NTL_EXCEPTIONS\n"; +#endif + +#ifdef NTL_THREAD_BOOST + cerr << "NTL_THREAD_BOOST\n"; +#endif + + +#ifdef NTL_LEGACY_SP_MULMOD + cerr << "NTL_LEGACY_SP_MULMOD\n"; +#endif + + +#ifdef NTL_DISABLE_LONGDOUBLE + cerr << "NTL_DISABLE_LONGDOUBLE\n"; +#endif + + +#ifdef NTL_DISABLE_LONGLONG + cerr << "NTL_DISABLE_LONGLONG\n"; +#endif + +#ifdef NTL_DISABLE_LL_ASM + cerr << "NTL_DISABLE_LL_ASM\n"; +#endif + +#ifdef NTL_MAXIMIZE_SP_NBITS + cerr << "NTL_MAXIMIZE_SP_NBITS\n"; +#endif + + + + +#ifdef NTL_GMP_LIP + cerr << "NTL_GMP_LIP\n"; +#endif + + +#ifdef NTL_GF2X_LIB + cerr << "NTL_GF2X_LIB\n"; +#endif + + + +#ifdef NTL_LONG_LONG_TYPE + cerr << "NTL_LONG_LONG_TYPE: "; + cerr << make_string(NTL_LONG_LONG_TYPE) << "\n"; +#endif + +#ifdef NTL_UNSIGNED_LONG_LONG_TYPE + cerr << "NTL_UNSIGNED_LONG_LONG_TYPE: "; + cerr << make_string(NTL_UNSIGNED_LONG_LONG_TYPE) << "\n"; +#endif + + +#ifdef NTL_X86_FIX + cerr << "NTL_X86_FIX\n"; +#endif + +#ifdef NTL_NO_X86_FIX + cerr << "NTL_NO_X86_FIX\n"; +#endif + +#ifdef NTL_NO_INIT_TRANS + cerr << "NTL_NO_INIT_TRANS\n"; +#endif + +#ifdef NTL_CLEAN_INT + cerr << "NTL_CLEAN_INT\n"; +#endif + +#ifdef NTL_CLEAN_PTR + cerr << "NTL_CLEAN_PTR\n"; +#endif + +#ifdef NTL_RANGE_CHECK + cerr << "NTL_RANGE_CHECK\n"; +#endif + + +cerr << "\n"; +cerr << "Resolution of double-word types:\n"; +cerr << make_string(NTL_LL_TYPE) << "\n"; +cerr << make_string(NTL_ULL_TYPE) << "\n"; + + +cerr << "\n"; +cerr << "Performance Options:\n"; + +#ifdef NTL_LONG_LONG + cerr << "NTL_LONG_LONG\n"; +#endif + +#ifdef NTL_AVOID_FLOAT + cerr << "NTL_AVOID_FLOAT\n"; +#endif + + +#ifdef NTL_SPMM_ULL + cerr << "NTL_SPMM_ULL\n"; +#endif + + +#ifdef NTL_SPMM_ASM + cerr << "NTL_SPMM_ASM\n"; +#endif + + + + +#ifdef NTL_AVOID_BRANCHING + cerr << "NTL_AVOID_BRANCHING\n"; +#endif + + +#ifdef NTL_FFT_BIGTAB + cerr << "NTL_FFT_BIGTAB\n"; +#endif + +#ifdef NTL_FFT_LAZYMUL + cerr << "NTL_FFT_LAZYMUL\n"; +#endif + + +#ifdef NTL_TBL_REM + cerr << "NTL_TBL_REM\n"; +#endif + + +#ifdef NTL_TBL_REM_LL + cerr << "NTL_TBL_REM_LL\n"; +#endif + +#ifdef NTL_CRT_ALTCODE + cerr << "NTL_CRT_ALTCODE\n"; +#endif + +#ifdef NTL_CRT_ALTCODE_SMALL + cerr << "NTL_CRT_ALTCODE_SMALL\n"; +#endif + +#ifdef NTL_GF2X_ALTCODE + cerr << "NTL_GF2X_ALTCODE\n"; +#endif + +#ifdef NTL_GF2X_ALTCODE1 + cerr << "NTL_GF2X_ALTCODE1\n"; +#endif + +#ifdef NTL_GF2X_NOINLINE + cerr << "NTL_GF2X_NOINLINE\n"; +#endif + +#ifdef NTL_PCLMUL + cerr << "NTL_PCLMUL\n"; +#endif + + + cerr << "\n\n"; + + cerr << "running tests"; + + long n, k, i; + + n = 250; + k = 16000; + + ZZ p; + + + for (i = 0; i < 15; i++) { + // cerr << n << "/" << k; + cerr << "."; + RandomLen(p, k); + ZZ_p::init(p); + + + ZZ_pX a, b, c, c1; + + + random(a, n); + random(b, n); + + FFTMul(c, a, b); + //cerr << ZZ_pInfo->FFTInfo->NumPrimes; + + c1 = conv( KarMul( conv(a), conv(b) ) ); + + if (c1 != c) { + cerr << "ZZ_pX mul failed!\n"; + return 1; + } + + n = long(n * 1.35); + k = long(k / 1.414); + } + + + // small prime tests...I've made some changes in v5.3 + // that should be checked on various platforms, so + // we might as well check them here. + + if (SmallModulusTest(17, 1000)) { + cerr << "first SmallModulusTest failed!!\n"; + return 1; + } + + if (SmallModulusTest((1L << (NTL_SP_NBITS))-1, 1000)) { + cerr << "second SmallModulusTest failed!!\n"; + return 1; + } + + // Test gf2x code.... + + if (GF2X_test()) { + cerr << "GF2X test failed!\n"; + return 1; + } + + + cerr << "OK\n"; + + ZZ x1, x2, x3, x4; + double t; + + RandomLen(x1, 1024); + RandomBnd(x2, x1); + RandomBnd(x3, x1); + + mul(x4, x2, x3); + + t = GetTime(); + for (i = 0; i < 100000; i++) + mul(x4, x2, x3); + t = GetTime()-t; + + cerr << "time for 1024-bit mul: " << t*10 << "us"; + cerr << "\n"; + + rem(x2, x4, x1); + + t = GetTime(); + for (i = 0; i < 100000; i++) + rem(x2, x4, x1); + t = GetTime()-t; + + cerr << "time for 2048/1024-bit rem: " << t*10 << "us"; + cerr << "\n"; + + + GenPrime(p, 1024); + RandomBnd(x1, p); + if (IsZero(x1)) set(x1); + + InvMod(x2, x1, p); + + t = GetTime(); + for (i = 0; i < 1000; i++) + InvMod(x2, x1, p); + t = GetTime()-t; + + cerr << "time for 1024-bit modular inverse: " << t*1000 << "us"; + cerr << "\n"; + + + + // test modulus switching + + n = 1024; + k = 1024; + RandomLen(p, k); + + ZZ_p::init(p); + if (!IsOdd(p)) p++; + + ZZ_pX j1, j2, j3; + + random(j1, n); + random(j2, n); + + mul(j3, j1, j2); + + t = GetTime(); + for (i = 0; i < 500; i++) mul(j3, j1, j2); + t = GetTime()-t; + + cerr << "time to multiply degree 1023 polynomials\n modulo a 1024-bit number: "; + cerr << (t/500) << "s"; + cerr << "\n"; + + GF2X_time(); + + return 0; +} diff --git a/thirdparty/linux/ntl/src/RR.c b/thirdparty/linux/ntl/src/RR.c new file mode 100644 index 0000000000..252b14c84f --- /dev/null +++ b/thirdparty/linux/ntl/src/RR.c @@ -0,0 +1,2123 @@ + +#include + + +#include + +NTL_START_IMPL + + +// FIXME: I just converted all the static RR's to thread local static RR's. +// Perhaps I should at some point make the equivalent of an RR Register. +// But be careful: certain computations, like ComputePi, actually cache +// results, so that will take more work. In any case, RR is not a high +// priority right now. + + +NTL_CHEAP_THREAD_LOCAL +long RR::prec = 150; + +void RR::SetPrecision(long p) +{ + if (p < 53) + p = 53; + + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("RR: precision too high"); + + prec = p; +} + +NTL_CHEAP_THREAD_LOCAL +long RR::oprec = 10; + +void RR::SetOutputPrecision(long p) +{ + if (p < 1) + p = 1; + + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("RR: output precision too high"); + + oprec = p; +} + + + +static +void normalize1(RR& z, const ZZ& y_x, long y_e, long prec, long residual) +{ + long len = NumBits(y_x); + + if (len > prec) { + long correction = ZZ_RoundCorrection(y_x, len - prec, residual); + + RightShift(z.x, y_x, len - prec); + + if (correction) + add(z.x, z.x, correction); + + z.e = y_e + len - prec; + } + else if (len == 0) { + clear(z.x); + z.e = 0; + } + else { + z.x = y_x; + z.e = y_e; + } + + if (!IsOdd(z.x)) + z.e += MakeOdd(z.x); + + if (z.e >= NTL_OVFBND) + ResourceError("RR: overflow"); + + if (z.e <= -NTL_OVFBND) + ResourceError("RR: underflow"); +} + +void normalize(RR& z, const RR& y, long residual = 0) +{ + normalize1(z, y.x, y.e, RR::prec, residual); +} + +void MakeRR(RR& z, const ZZ& a, long e) +{ + if (e >= NTL_OVFBND) + ResourceError("MakeRR: e too big"); + + if (e <= -NTL_OVFBND) + ResourceError("MakeRR: e too small"); + + normalize1(z, a, e, RR::prec, 0); +} + +void MakeRRPrec(RR& x, const ZZ& a, long e, long p) +{ + if (p < 1) + LogicError("MakeRRPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("MakeRRPrec: precsion too big"); + + RRPush push; + RR::prec = p; + MakeRR(x, a, e); +} + +void random(RR& z) +{ + NTL_TLS_LOCAL(RR, t); + RandomBits(t.x, RR::prec); + t.e = -RR::prec; + normalize(z, t); +} + + +static inline +void xcopy(RR& x, const RR& a) + { normalize(x, a); } + +// xcopy emulates old assignment semantics... +// many routines here implicitly assume assignment normalizes, +// but that is no longer the case as of v3.0. + +void ConvPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("ConvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("ConvPrec: precsion too big"); + + RRPush push; + RR::prec = p; + normalize(x, a); +} + +void RoundToPrecision(RR& x, const RR& a, long p) +{ + ConvPrec(x, a, p); +} + + +void conv(RR& x, const RR& a) +{ + normalize(x, a); +} + + +long IsZero(const RR& a) +{ + return IsZero(a.x); +} + +long IsOne(const RR& a) +{ + return a.e == 0 && IsOne(a.x); +} + +long sign(const RR& a) +{ + return sign(a.x); +} + +void clear(RR& z) +{ + z.e = 0; + clear(z.x); +} + +void set(RR& z) +{ + z.e = 0; + set(z.x); +} + + +void add(RR& z, const RR& a, const RR& b) +{ + NTL_TLS_LOCAL(RR, t); + + if (IsZero(a.x)) { + xcopy(z, b); + return; + } + + if (IsZero(b.x)) { + xcopy(z, a); + return; + } + + if (a.e > b.e) { + if (a.e-b.e - max(RR::prec-NumBits(a.x),0) >= NumBits(b.x) + 2) + normalize(z, a, sign(b)); + else { + LeftShift(t.x, a.x, a.e-b.e); + add(t.x, t.x, b.x); + t.e = b.e; + normalize(z, t); + } + } + else if (a.e < b.e) { + if (b.e-a.e - max(RR::prec-NumBits(b.x),0) >= NumBits(a.x) + 2) + normalize(z, b, sign(a)); + else { + LeftShift(t.x, b.x, b.e-a.e); + add(t.x, t.x, a.x); + t.e = a.e; + normalize(z, t); + } + } + else { + add(t.x, a.x, b.x); + t.e = a.e; + normalize(z, t); + } +} + +void AddPrec(RR& x, const RR& a, const RR& b, long p) +{ + if (p < 1) + LogicError("AddPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("AddPrec: bad precsion"); + + RRPush push; + RR::prec = p; + add(x, a, b); +} + +void sub(RR& z, const RR& a, const RR& b) +{ + NTL_TLS_LOCAL(RR, t); + + if (IsZero(a.x)) { + negate(z, b); + return; + } + + if (IsZero(b.x)) { + xcopy(z, a); + return; + } + + if (a.e > b.e) { + if (a.e-b.e - max(RR::prec-NumBits(a.x),0) >= NumBits(b.x) + 2) + normalize(z, a, -sign(b)); + else { + LeftShift(t.x, a.x, a.e-b.e); + sub(t.x, t.x, b.x); + t.e = b.e; + xcopy(z, t); + } + } + else if (a.e < b.e) { + if (b.e-a.e - max(RR::prec-NumBits(b.x),0) >= NumBits(a.x) + 2) { + normalize(z, b, -sign(a)); + negate(z.x, z.x); + } + else { + LeftShift(t.x, b.x, b.e-a.e); + sub(t.x, a.x, t.x); + t.e = a.e; + xcopy(z, t); + } + } + else { + sub(t.x, a.x, b.x); + t.e = a.e; + normalize(z, t); + } +} + +void SubPrec(RR& x, const RR& a, const RR& b, long p) +{ + if (p < 1) + LogicError("SubPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("SubPrec: bad precsion"); + + RRPush push; + RR::prec = p; + sub(x, a, b); +} + +void negate(RR& z, const RR& a) +{ + xcopy(z, a); + negate(z.x, z.x); +} + +void NegatePrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("NegatePrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("NegatePrec: bad precsion"); + + RRPush push; + RR::prec = p; + negate(x, a); +} + +void abs(RR& z, const RR& a) +{ + xcopy(z, a); + abs(z.x, z.x); +} + +void AbsPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("AbsPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("AbsPrec: bad precsion"); + + RRPush push; + RR::prec = p; + abs(x, a); +} + + +void mul(RR& z, const RR& a, const RR& b) +{ + NTL_TLS_LOCAL(RR, t); + + mul(t.x, a.x, b.x); + t.e = a.e + b.e; + xcopy(z, t); +} + +void MulPrec(RR& x, const RR& a, const RR& b, long p) +{ + if (p < 1) + LogicError("MulPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("MulPrec: bad precsion"); + + RRPush push; + RR::prec = p; + mul(x, a, b); +} + + +void sqr(RR& z, const RR& a) +{ + NTL_TLS_LOCAL(RR, t); + + sqr(t.x, a.x); + t.e = a.e + a.e; + xcopy(z, t); +} + +void SqrPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("SqrPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("SqrPrec: bad precsion"); + + RRPush push; + RR::prec = p; + sqr(x, a); +} + + + +void div(RR& z, const RR& a, const RR& b) +{ + if (IsZero(b)) + ArithmeticError("RR: division by zero"); + + if (IsZero(a)) { + clear(z); + return; + } + + long la = NumBits(a.x); + long lb = NumBits(b.x); + + long neg = (sign(a) != sign(b)); + + long k = RR::prec - la + lb + 1; + if (k < 0) k = 0; + + NTL_TLS_LOCAL(RR, t); + NTL_ZZRegister(A); + NTL_ZZRegister(B); + NTL_ZZRegister(R); + + abs(A, a.x); + LeftShift(A, A, k); + + abs(B, b.x); + DivRem(t.x, R, A, B); + + t.e = a.e - b.e - k; + + normalize(z, t, !IsZero(R)); + + if (neg) + negate(z.x, z.x); +} + +void DivPrec(RR& x, const RR& a, const RR& b, long p) +{ + if (p < 1) + LogicError("DivPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("DivPrec: bad precsion"); + + RRPush push; + RR::prec = p; + div(x, a, b); +} + + +void SqrRoot(RR& z, const RR& a) +{ + if (sign(a) < 0) + ArithmeticError("RR: attempt to take square root of negative number"); + + if (IsZero(a)) { + clear(z); + return; + } + + RR t; + ZZ T1, T2; + long k; + + k = 2*RR::prec - NumBits(a.x) + 1; + + if (k < 0) k = 0; + + if ((a.e - k) & 1) k++; + + LeftShift(T1, a.x, k); + // since k >= 2*prec - bits(a) + 1, T1 has at least 2*prec+1 bits, + // thus T1 >= 2^(2*prec) + + SqrRoot(t.x, T1); // t.x >= 2^prec thus t.x contains the round bit + t.e = (a.e - k)/2; + sqr(T2, t.x); + + // T1-T2 is the (lower part of the) sticky bit + normalize(z, t, T2 < T1); +} + + + + +void SqrRootPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("SqrRootPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("SqrRootPrec: bad precsion"); + + RRPush push; + RR::prec = p; + SqrRoot(x, a); +} + + + + +long compare(const RR& a, const RR& b) +{ + NTL_TLS_LOCAL(RR, t); + + SubPrec(t, a, b, 1); + return sign(t); +} + + + +long operator==(const RR& a, const RR& b) +{ + return a.e == b.e && a.x == b.x; +} + + +void trunc(RR& z, const RR& a) +{ + NTL_TLS_LOCAL(RR, t); + + if (a.e >= 0) + xcopy(z, a); + else { + RightShift(t.x, a.x, -a.e); + t.e = 0; + xcopy(z, t); + } +} + +void TruncPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("TruncPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("TruncPrec: bad precsion"); + + RRPush push; + RR::prec = p; + trunc(x, a); +} + +void floor(RR& z, const RR& a) +{ + NTL_TLS_LOCAL(RR, t); + + if (a.e >= 0) + xcopy(z, a); + else { + RightShift(t.x, a.x, -a.e); + if (sign(a.x) < 0) + add(t.x, t.x, -1); + t.e = 0; + xcopy(z, t); + } +} + +void FloorPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("FloorPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("FloorPrec: bad precsion"); + + RRPush push; + RR::prec = p; + floor(x, a); +} + +void ceil(RR& z, const RR& a) +{ + NTL_TLS_LOCAL(RR, t); + + if (a.e >= 0) + xcopy(z, a); + else { + RightShift(t.x, a.x, -a.e); + if (sign(a.x) > 0) + add(t.x, t.x, 1); + t.e = 0; + xcopy(z, t); + } +} + +void CeilPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("CeilPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("CeilPrec: bad precsion"); + + RRPush push; + RR::prec = p; + ceil(x, a); +} + +void round(RR& z, const RR& a) +{ + if (a.e >= 0) { + xcopy(z, a); + return; + } + + long len = NumBits(a.x); + + if (-a.e > len) { + z = 0; + return; + } + + if (-a.e == len) { + if (len == 1) + z = 0; + else + z = sign(a.x); + + return; + } + + NTL_TLS_LOCAL(RR, t); + ConvPrec(t, a, len+a.e); + xcopy(z, t); +} + +void RoundPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("RoundPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("RoundPrec: bad precsion"); + + RRPush push; + RR::prec = p; + round(x, a); +} + + + + +void conv(RR& z, const ZZ& a) +{ + normalize1(z, a, 0, RR::prec, 0); +} + +void ConvPrec(RR& x, const ZZ& a, long p) +{ + if (p < 1) + LogicError("ConvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("ConvPrec: bad precsion"); + + RRPush push; + RR::prec = p; + conv(x, a); +} + + +void conv(RR& z, long a) +{ + if (a == 0) { + clear(z); + return; + } + + if (a == 1) { + set(z); + return; + } + + NTL_ZZRegister(t); + t = a; + conv(z, t); +} + +void ConvPrec(RR& x, long a, long p) +{ + if (p < 1) + LogicError("ConvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("ConvPrec: bad precsion"); + + RRPush push; + RR::prec = p; + conv(x, a); +} + +void conv(RR& z, unsigned long a) +{ + if (a == 0) { + clear(z); + return; + } + + if (a == 1) { + set(z); + return; + } + + NTL_ZZRegister(t); + conv(t, a); + conv(z, t); +} + +void ConvPrec(RR& x, unsigned long a, long p) +{ + if (p < 1) + LogicError("ConvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("ConvPrec: bad precsion"); + + RRPush push; + RR::prec = p; + conv(x, a); +} + + +void conv(RR& z, double a) +{ + if (a == 0) { + clear(z); + return; + } + + if (a == 1) { + set(z); + return; + } + + if (!IsFinite(&a)) + ArithmeticError("RR: conversion of a non-finite double"); + + int e; + double f; + NTL_TLS_LOCAL(RR, t); + + f = frexp(a, &e); + + f = f * NTL_FDOUBLE_PRECISION; + f = f * 4; + + conv(t.x, f); + t.e = e - (NTL_DOUBLE_PRECISION + 1); + + xcopy(z, t); +} + +void ConvPrec(RR& x, double a, long p) +{ + if (p < 1) + LogicError("ConvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("ConvPrec: bad precsion"); + + RRPush push; + RR::prec = p; + conv(x, a); +} + + +void conv(ZZ& z, const RR& a) +{ + if (a.e >= 0) + LeftShift(z, a.x, a.e); + else { + long sgn = sign(a.x); + RightShift(z, a.x, -a.e); + if (sgn < 0) + sub(z, z, 1); + } +} + +void CeilToZZ(ZZ& z, const RR& a) +{ + if (a.e >= 0) + LeftShift(z, a.x, a.e); + else { + long sgn = sign(a.x); + RightShift(z, a.x, -a.e); + if (sgn > 0) + add(z, z, 1); + } +} + +void TruncToZZ(ZZ& z, const RR& a) +{ + if (a.e >= 0) + LeftShift(z, a.x, a.e); + else + RightShift(z, a.x, -a.e); +} + + +void RoundToZZ(ZZ& z, const RR& a) +{ + if (a.e >= 0) { + LeftShift(z, a.x, a.e); + return; + } + + long len = NumBits(a.x); + + if (-a.e > len) { + z = 0; + return; + } + + if (-a.e == len) { + if (len == 1) + z = 0; + else + z = sign(a.x); + + return; + } + + NTL_TLS_LOCAL(RR, t); + + ConvPrec(t, a, len+a.e); + + LeftShift(z, t.x, t.e); +} + + +void conv(long& z, const RR& a) +{ + ZZ t; + if (a.e >= NTL_BITS_PER_LONG) + z = 0; + else { + conv(t, a); + conv(z, t); + } +} + +void conv(double& z, const RR& aa) +{ + double x; + NTL_TLS_LOCAL(RR, a); + + ConvPrec(a, aa, NTL_DOUBLE_PRECISION); + // round to NTL_DOUBLE_PRECISION bits to avoid double overflow + + conv(x, a.x); + z = _ntl_ldexp(x, a.e); +} + + + + +void add(RR& z, const RR& a, double b) +{ + NTL_TLS_LOCAL(RR, B); + B = b; + add(z, a, B); +} + + + +void sub(RR& z, const RR& a, double b) +{ + NTL_TLS_LOCAL(RR, B); + B = b; + sub(z, a, B); +} + +void sub(RR& z, double a, const RR& b) +{ + NTL_TLS_LOCAL(RR, A); + A = a; + sub(z, A, b); +} + + + +void mul(RR& z, const RR& a, double b) +{ + NTL_TLS_LOCAL(RR, B); + B = b; + mul(z, a, B); +} + + +void div(RR& z, const RR& a, double b) +{ + NTL_TLS_LOCAL(RR, B); + B = b; + div(z, a, B); +} + +void div(RR& z, double a, const RR& b) +{ + NTL_TLS_LOCAL(RR, A); + A = a; + div(z, A, b); +} + + +void inv(RR& z, const RR& a) +{ + NTL_TLS_LOCAL_INIT(RR, one, (to_RR(1))); + div(z, one, a); +} + +void InvPrec(RR& x, const RR& a, long p) +{ + if (p < 1) + LogicError("InvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("InvPrec: bad precsion"); + + RRPush push; + RR::prec = p; + inv(x, a); +} + + +long compare(const RR& a, double b) +{ + if (b == 0) return sign(a); + + NTL_TLS_LOCAL(RR, B); + B = b; + return compare(a, B); +} + + +long operator==(const RR& a, double b) +{ + if (b == 0) return IsZero(a); + if (b == 1) return IsOne(a); + + NTL_TLS_LOCAL(RR, B); + B = b; + return a == B; +} + + +void power(RR& z, const RR& a, long e) +{ + RR b, res; + + long n = NumBits(e); + + RRPush push; + long p = RR::precision(); + RR::SetPrecision(p + n + 10); + + xcopy(b, a); + + set(res); + long i; + + for (i = n-1; i >= 0; i--) { + sqr(res, res); + if (bit(e, i)) + mul(res, res, b); + } + + RR::SetPrecision(p); + + if (e < 0) + inv(z, res); + else + xcopy(z, res); +} + + +istream& operator>>(istream& s, RR& x) +{ + RR v; + + { + RRPush push; + + long c; + long cval; + long sign; + ZZ a, b; + + if (!s) NTL_INPUT_ERROR(s, "bad RR input"); + + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + if (c == '-') { + sign = -1; + s.get(); + c = s.peek(); + } + else + sign = 1; + + long got1 = 0; + long got_dot = 0; + long got2 = 0; + + a = 0; + b = 1; + + cval = CharToIntVal(c); + + if (cval >= 0 && cval <= 9) { + got1 = 1; + + while (cval >= 0 && cval <= 9) { + mul(a, a, 10); + add(a, a, cval); + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + } + } + + if (c == '.') { + got_dot = 1; + + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + + if (cval >= 0 && cval <= 9) { + got2 = 1; + + while (cval >= 0 && cval <= 9) { + mul(a, a, 10); + add(a, a, cval); + mul(b, b, 10); + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + } + } + } + + if (got_dot && !got1 && !got2) NTL_INPUT_ERROR(s, "bad RR input"); + + ZZ e; + + long got_e = 0; + long e_sign; + + if (c == 'e' || c == 'E') { + got_e = 1; + + s.get(); + c = s.peek(); + + if (c == '-') { + e_sign = -1; + s.get(); + c = s.peek(); + } + else if (c == '+') { + e_sign = 1; + s.get(); + c = s.peek(); + } + else + e_sign = 1; + + cval = CharToIntVal(c); + + if (cval < 0 || cval > 9) NTL_INPUT_ERROR(s, "bad RR input"); + + e = 0; + while (cval >= 0 && cval <= 9) { + mul(e, e, 10); + add(e, e, cval); + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + } + } + + if (!got1 && !got2 && !got_e) NTL_INPUT_ERROR(s, "bad RR input"); + + RR t1, t2; + + + long old_p = RR::precision(); + + if (got1 || got2) { + ConvPrec(t1, a, max(NumBits(a), 1)); + ConvPrec(t2, b, NumBits(b)); + if (got_e) + RR::SetPrecision(old_p + 10); + + div(v, t1, t2); + } + else + set(v); + + if (sign < 0) + negate(v, v); + + if (got_e) { + if (e >= NTL_OVFBND) ResourceError("RR input overflow"); + long E; + conv(E, e); + if (e_sign < 0) E = -E; + RR::SetPrecision(old_p + 10); + power(t1, to_RR(10), E); + mul(v, v, t1); + } + } + + xcopy(x, v); + return s; +} + +istream& InputPrec(RR& x, istream& s, long p) +{ + if (p < 1) + LogicError("InputPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("InputPrec: bad precsion"); + + RRPush push; + RR::prec = p; + s >> x; + return s; +} + + +void conv(RR& z, const xdouble& a) +{ + conv(z, a.mantissa()); + + if (a.exponent() > ((2*NTL_OVFBND)/(2*NTL_XD_HBOUND_LOG))) + ResourceError("RR: overlow"); + + if (a.exponent() < -((2*NTL_OVFBND)/(2*NTL_XD_HBOUND_LOG))) + ResourceError("RR: underflow"); + + z.e += a.exponent()*(2*NTL_XD_HBOUND_LOG); + + if (z.e >= NTL_OVFBND) + ResourceError("RR: overflow"); + + if (z.e <= -NTL_OVFBND) + ResourceError("RR: underflow"); +} + +void ConvPrec(RR& x, const xdouble& a, long p) +{ + if (p < 1) + LogicError("ConvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("ConvPrec: bad precsion"); + + RRPush push; + RR::prec = p; + conv(x, a); +} + + + +void conv(xdouble& z, const RR& a) +{ + xdouble x; + xdouble y; + + conv(x, a.x); + power2(y, a.e); + z = x*y; +} + +void power2(RR& z, long e) +{ + if (e >= NTL_OVFBND) + ResourceError("RR: overflow"); + + if (e <= -NTL_OVFBND) + ResourceError("RR: underflow"); + + set(z.x); + z.e = e; +} + +void conv(RR& z, const quad_float& a) +{ + NTL_TLS_LOCAL(RR, hi); + NTL_TLS_LOCAL(RR, lo); + NTL_TLS_LOCAL(RR, res); + + ConvPrec(hi, a.hi, NTL_DOUBLE_PRECISION); + ConvPrec(lo, a.lo, NTL_DOUBLE_PRECISION); + + add(res, hi, lo); + + z = res; +} + +void ConvPrec(RR& x, const quad_float& a, long p) +{ + if (p < 1) + LogicError("ConvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("ConvPrec: bad precsion"); + + RRPush push; + RR::prec = p; + conv(x, a); +} + + +void conv(quad_float& z, const RR& a) +{ + NTL_TLS_LOCAL(RR, a_hi); + NTL_TLS_LOCAL(RR, a_lo); + + ConvPrec(a_hi, a, NTL_DOUBLE_PRECISION); // high order bits + SubPrec(a_lo, a, a_hi, NTL_DOUBLE_PRECISION); // low order bits + + z = to_quad_float(a_hi.x)*power2_quad_float(a_hi.e) + + to_quad_float(a_lo.x)*power2_quad_float(a_lo.e); +} + +void conv(RR& x, const char *s) +{ + long c; + long cval; + long sign; + ZZ a, b; + long i = 0; + + if (!s) InputError("bad RR input"); + + RRPush push; + + + c = s[i]; + while (IsWhiteSpace(c)) { + i++; + c = s[i]; + } + + if (c == '-') { + sign = -1; + i++; + c = s[i]; + } + else + sign = 1; + + long got1 = 0; + long got_dot = 0; + long got2 = 0; + + a = 0; + b = 1; + + cval = CharToIntVal(c); + + if (cval >= 0 && cval <= 9) { + got1 = 1; + + while (cval >= 0 && cval <= 9) { + mul(a, a, 10); + add(a, a, cval); + i++; + c = s[i]; + cval = CharToIntVal(c); + } + } + + if (c == '.') { + got_dot = 1; + + i++; + c = s[i]; + cval = CharToIntVal(c); + + if (cval >= 0 && cval <= 9) { + got2 = 1; + + while (cval >= 0 && cval <= 9) { + mul(a, a, 10); + add(a, a, cval); + mul(b, b, 10); + i++; + c = s[i]; + cval = CharToIntVal(c); + } + } + } + + if (got_dot && !got1 && !got2) InputError("bad RR input"); + + ZZ e; + + long got_e = 0; + long e_sign; + + if (c == 'e' || c == 'E') { + got_e = 1; + + i++; + c = s[i]; + + if (c == '-') { + e_sign = -1; + i++; + c = s[i]; + } + else if (c == '+') { + e_sign = 1; + i++; + c = s[i]; + } + else + e_sign = 1; + + + cval = CharToIntVal(c); + + if (cval < 0 || cval > 9) InputError("bad RR input"); + + e = 0; + while (cval >= 0 && cval <= 9) { + mul(e, e, 10); + add(e, e, cval); + i++; + c = s[i]; + cval = CharToIntVal(c); + } + } + + if (!got1 && !got2 && !got_e) InputError("bad RR input"); + + RR t1, t2, v; + + long old_p = RR::precision(); + + if (got1 || got2) { + ConvPrec(t1, a, max(NumBits(a), 1)); + ConvPrec(t2, b, NumBits(b)); + if (got_e) + RR::SetPrecision(old_p + 10); + + div(v, t1, t2); + } + else + set(v); + + if (sign < 0) + negate(v, v); + + if (got_e) { + if (e >= NTL_OVFBND) ResourceError("RR input overflow"); + long E; + conv(E, e); + if (e_sign < 0) E = -E; + RR::SetPrecision(old_p + 10); + power(t1, to_RR(10), E); + mul(v, v, t1); + RR::prec = old_p; + } + + xcopy(x, v); +} + +void ConvPrec(RR& x, const char *s, long p) +{ + if (p < 1) + LogicError("ConvPrec: bad precsion"); + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("ConvPrec: bad precsion"); + + RRPush push; + RR::prec = p; + conv(x, s); +} + + +void ReallyComputeE(RR& res) +{ + RRPush push; + long p = RR::precision(); + RR::SetPrecision(p + NumBits(p) + 10); + + RR s, s1, t; + + s = 1; + t = 1; + + long i; + + for (i = 2; ; i++) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + div(t, t, i); + } + + RR::SetPrecision(p); + xcopy(res, s); +} + +void ComputeE(RR& res) +{ + static NTL_CHEAP_THREAD_LOCAL long prec = 0; + + NTL_TLS_LOCAL(RR, e); + + RRPush push; + long p = RR::precision(); + + if (prec <= p + 10) { + prec = p + 20; + RR::SetPrecision(prec); + ReallyComputeE(e); + RR::SetPrecision(p); + } + + xcopy(res, e); +} + + +void exp(RR& res, const RR& x) +{ + if (x >= NTL_OVFBND || x <= -NTL_OVFBND) + ResourceError("RR: overflow"); + + RRPush push; + long p = RR::precision(); + + // step 0: write x = n + f, n an integer and |f| <= 1/2 + // careful -- we want to compute f to > p bits of precision + + + RR f, nn; + RR::SetPrecision(NTL_BITS_PER_LONG); + round(nn, x); + RR::SetPrecision(p + 10); + sub(f, x, nn); + long n = to_long(nn); + + // step 1: calculate t1 = e^n by repeated squaring + + RR::SetPrecision(p + NumBits(n) + 10); + + RR e; + ComputeE(e); + + RR::SetPrecision(p + 10); + + RR t1; + power(t1, e, n); + + // step 2: calculate t2 = e^f using Taylor series expansion + + RR::SetPrecision(p + NumBits(p) + 10); + + RR t2, s, s1, t; + long i; + + s = 0; + t = 1; + + for (i = 1; ; i++) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t, t, f); + div(t, t, i); + } + + xcopy(t2, s); + + RR::SetPrecision(p); + + mul(res, t1, t2); +} + + + +void ReallyComputeLn2(RR& res) +{ + RRPush push; + long p = RR::precision(); + RR::SetPrecision(p + NumBits(p) + 10); + + RR s, s1, t, t1; + + s = 0; + t = 0.5; + t1 = 0.5; + + long i; + + for (i = 2; ; i++) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t1, t1, 0.5); + div(t, t1, i); + } + + RR::SetPrecision(p); + xcopy(res, s); +} + + +void ComputeLn2(RR& res) +{ + static NTL_CHEAP_THREAD_LOCAL long prec = 0; + + NTL_TLS_LOCAL(RR, ln2); + + RRPush push; + long p = RR::precision(); + + if (prec <= p + 10) { + prec = p + 20; + RR::SetPrecision(prec); + ReallyComputeLn2(ln2); + RR::SetPrecision(p); + } + + xcopy(res, ln2); +} + +long Lg2(const RR& x) +{ + return NumBits(x.mantissa()) + x.exponent(); +} + +void log(RR& res, const RR& x) +{ + if (x <= 0) ArithmeticError("argument to log must be positive"); + + RRPush push; + long p = RR::precision(); + + RR::SetPrecision(p + NumBits(p) + 10); + + RR y; + long n; + + // re-write x = 2^n * (1 - y), where -1/2 < y < 1/4 (so 3/4 < 1-y < 3/2) + + if (x > 0.75 && x < 1.5) { + n = 0; + sub(y, 1, x); + } + else { + n = Lg2(x) - 1; + RR t; + power2(t, -n); + mul(t, t, x); + while (t > 1.5) { + mul(t, t, 0.5); + n++; + } + + sub(y, 1, t); + } + + // compute s = - ln(1-y) by power series expansion + + RR s, s1, t, t1; + + s = 0; + xcopy(t, y); + xcopy(t1, y); + + long i; + + for (i = 2; ; i++) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t1, t1, y); + div(t, t1, i); + } + + if (n == 0) + t = 0; + else { + ComputeLn2(t); + mul(t, t, n); + } + + RR::SetPrecision(p); + + sub(res, t, s); +} + + +void ComputeLn10(RR& res) +{ + static NTL_CHEAP_THREAD_LOCAL long prec = 0; + + NTL_TLS_LOCAL(RR, ln10); + + RRPush push; + long p = RR::precision(); + + if (prec <= p + 10) { + prec = p + 20; + RR::SetPrecision(prec); + log(ln10, to_RR(10)); + RR::SetPrecision(p); + } + + xcopy(res, ln10); +} + +void log10(RR& res, const RR& x) +{ + RRPush push; + long p = RR::precision(); + RR::SetPrecision(p + 10); + + RR ln10, t1, t2; + ComputeLn10(ln10); + + log(t1, x); + div(t2, t1, ln10); + + RR::SetPrecision(p); + + xcopy(res, t2); +} + + +void expm1(RR& res, const RR& x) +{ + RRPush push; + long p = RR::precision(); + + if (x < -0.5 || x > 0.5) { + RR t; + RR::SetPrecision(p + 10); + exp(t, x); + RR::SetPrecision(p); + sub(res, t, 1); + return; + } + + + RR::SetPrecision(p + NumBits(p) + 10); + + RR f; + + xcopy(f, x); + + RR s, s1, t; + long i; + + s = 0; + xcopy(t, f); + + for (i = 2; ; i++) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t, t, f); + div(t, t, i); + } + + RR::SetPrecision(p); + + xcopy(res, s); +} + + + +void log1p(RR& res, const RR& x) +{ + RRPush push; + long p = RR::precision(); + RR y; + + if (x < -0.5 || x > 0.5) { + RR::SetPrecision(p + 10); + log(y, 1 + x); + RR::SetPrecision(p); + xcopy(res, y); + return; + } + + + RR::SetPrecision(p + NumBits(p) + 10); + + + negate(y, x); + + // compute s = - ln(1-y) by power series expansion + + RR s, s1, t, t1; + + s = 0; + xcopy(t, y); + xcopy(t1, y); + + long i; + + for (i = 2; ; i++) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t1, t1, y); + div(t, t1, i); + } + + RR::SetPrecision(p); + + negate(res, s); + +} + + +void pow(RR& res, const RR& x, const RR& y) +{ + + if (y == 0) { + res = 1; + return; + } + + if (x == 0) { + res = 0; + return; + } + + if (x == 1) { + res = 1; + return; + } + + if (x < 0) { + ArithmeticError("pow: sorry...first argument to pow must be nonnegative"); + } + + RRPush push; + long p = RR::precision(); + + // calculate working precison...one could use p + NTL_BITS_PER_LONG + 10, + // for example, but we want the behaviour to be machine independent. + // so we calculate instead a rough approximation to log |y log(x)| + + RR t1, t2; + long k; + + if (x > 0.5 && x < 1.5) { + xcopy(t1, x - 1); + k = Lg2(t1); + } + else { + k = NumBits(Lg2(x)); + } + + k += Lg2(y); + + if (k > NTL_BITS_PER_LONG+10) ResourceError("RR: overflow"); + + if (k < 0) k = 0; + + + RR::SetPrecision(p + k + 10); + + t1 = y*log(x); + + RR::SetPrecision(p); + + t2 = exp(t1); + + res = t2; +} + + +void ReallyComputePi(RR& res) +{ + RRPush push; + long p = RR::precision(); + RR::SetPrecision(p + NumBits(p) + 10); + + + RR sum1; + + RR s, s1, t, t1; + + s = 0; + t = 0.5; + t1 = 0.5; + + long i; + + for (i = 3; ; i+=2) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t1, t1, -0.25); + div(t, t1, i); + } + + xcopy(sum1, s); + + + RR g; + + inv(g, to_RR(3)); // g = 1/3 + + s = 0; + xcopy(t, g); + xcopy(t1, g); + + sqr(g, g); + negate(g, g); // g = -1/9 + + for (i = 3; ; i+=2) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t1, t1, g); + div(t, t1, i); + } + + add(s, s, sum1); + mul(s, s, 4); + + RR::SetPrecision(p); + xcopy(res, s); +} + +void ComputePi(RR& res) +{ + static NTL_CHEAP_THREAD_LOCAL long prec = 0; + + NTL_TLS_LOCAL(RR, pi); + + RRPush push; + long p = RR::precision(); + + if (prec <= p + 10) { + prec = p + 20; + RR::SetPrecision(prec); + ReallyComputePi(pi); + RR::SetPrecision(p); + } + + xcopy(res, pi); +} + + + +void sin(RR& res, const RR& x) +{ + if (x == 0) { + res = 0; + return; + } + + if (Lg2(x) > 1000) + ResourceError("sin: sorry...argument too large in absolute value"); + + RRPush push; + long p = RR::precision(); + + RR pi, t1, f; + RR n; + + + // we want to make x^2 < 3, so that the series for sin(x) + // converges nicely, without any nasty cancellations in the + // first terms of the series. + + RR::SetPrecision(p + NumBits(p) + 10); + + if (x*x < 3) { + xcopy(f, x); + } + else { + + // we want to write x/pi = n + f, |f| < 1/2.... + // but we have to do *this* very carefully, so that f is computed + // to precision > p. I know, this is sick! + + long p1; + + p1 = p + Lg2(x) + 20; + + + for (;;) { + RR::SetPrecision(p1); + ComputePi(pi); + xcopy(t1, x/pi); + xcopy(n, floor(t1)); + xcopy(f, t1 - n); + if (f > 0.5) { + n++; + xcopy(f, t1 - n); + } + + if (f == 0 || p1 < p - Lg2(f) + Lg2(n) + 10) { + // we don't have enough bits of f...increase p1 and continue + + p1 = p1 + max(20, p1/10); + } + else + break; + } + + RR::SetPrecision(p + NumBits(p) + 10); + ComputePi(pi); + + xcopy(f, pi * f); + + if (n != 0 && n.exponent() == 0) { + // n is odd, so we negate f, which negates sin(f) + + xcopy(f, -f); + } + + } + + // Boy, that was painful, but now its over, and we can simply apply + // the series for sin(f) + + RR t2, s, s1, t; + long i; + + s = 0; + xcopy(t, f); + + for (i = 3; ; i=i+2) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t, t, f); + mul(t, t, f); + div(t, t, i-1); + div(t, t, i); + negate(t, t); + } + + RR::SetPrecision(p); + + xcopy(res, s); + +} + +void cos(RR& res, const RR& x) +{ + if (x == 0) { + res = 1; + return; + } + + if (Lg2(x) > 1000) + ResourceError("cos: sorry...argument too large in absolute value"); + + RRPush push; + long p = RR::precision(); + + RR pi, t1, f; + RR n; + + // we want to write x/pi = (n+1/2) + f, |f| < 1/2.... + // but we have to do *this* very carefully, so that f is computed + // to precision > p. I know, this is sick! + + long p1; + + p1 = p + Lg2(x) + 20; + + + for (;;) { + RR::SetPrecision(p1); + ComputePi(pi); + xcopy(t1, x/pi); + xcopy(n, floor(t1)); + xcopy(f, t1 - (n + 0.5)); + + if (f == 0 || p1 < p - Lg2(f) + Lg2(n) + 10) { + // we don't have enough bits of f...increase p1 and continue + + p1 = p1 + max(20, p1/10); + } + else + break; + } + + RR::SetPrecision(p + NumBits(p) + 10); + ComputePi(pi); + + xcopy(f, pi * f); + + if (n == 0 || n.exponent() != 0) { + // n is even, so we negate f, which negates sin(f) + + xcopy(f, -f); + } + + // Boy, that was painful, but now its over, and we can simply apply + // the series for sin(f) + + RR t2, s, s1, t; + long i; + + s = 0; + xcopy(t, f); + + for (i = 3; ; i=i+2) { + add(s1, s, t); + if (s == s1) break; + xcopy(s, s1); + mul(t, t, f); + mul(t, t, f); + div(t, t, i-1); + div(t, t, i); + negate(t, t); + } + + RR::SetPrecision(p); + + xcopy(res, s); + +} + + +ostream& operator<<(ostream& s, const RR& a) +{ + if (IsZero(a)) { + s << "0"; + return s; + } + + RRPush push; + + // we compute new_p and log_10_a precisely using sufficient + // precision---this is necessary to achieve accuracy and + // platform independent behaviour + + long temp_p = max(NumBits(RR::OutputPrecision()), + NumBits(Lg2(a))) + 10; + + RR::SetPrecision(temp_p); + + RR ln2, ln10, log_2_10; + ComputeLn2(ln2); + ComputeLn10(ln10); + log_2_10 = ln10/ln2; + long new_p = to_long(RR::OutputPrecision()*log_2_10) + 20; + long log_10_a = to_long(Lg2(a)/log_2_10); + + RR::SetPrecision(new_p); + + RR b; + long neg; + + if (a < 0) { + negate(b, a); + neg = 1; + } + else { + xcopy(b, a); + neg = 0; + } + + long k = RR::OutputPrecision() - log_10_a; + + RR c, d; + + power(c, to_RR(10), RR::OutputPrecision()); + power(d, to_RR(10), log_10_a); + + div(b, b, d); + mul(b, b, c); + + while (b < c) { + mul(b, b, 10); + k++; + } + + while (b >= c) { + div(b, b, 10); + k--; + } + + add(b, b, 0.5); + k = -k; + + ZZ B; + conv(B, b); + + long bp_len = RR::OutputPrecision()+10; + + UniqueArray bp_store; + bp_store.SetLength(bp_len); + char *bp = bp_store.get(); + + long len, i; + + len = 0; + do { + if (len >= bp_len) LogicError("RR output: buffer overflow"); + bp[len] = IntValToChar(DivRem(B, B, 10)); + len++; + } while (B > 0); + + for (i = 0; i < len/2; i++) { + char tmp; + tmp = bp[i]; + bp[i] = bp[len-1-i]; + bp[len-1-i] = tmp; + } + + i = len-1; + while (bp[i] == '0') i--; + + k += (len-1-i); + len = i+1; + + bp[len] = '\0'; + + if (k > 3 || k < -len - 3) { + // use scientific notation + + if (neg) s << "-"; + s << "0." << bp << "e" << (k + len); + } + else if (k >= 0) { + if (neg) s << "-"; + s << bp; + for (i = 0; i < k; i++) + s << "0"; + } + else if (k <= -len) { + if (neg) s << "-"; + s << "0."; + for (i = 0; i < -len-k; i++) + s << "0"; + s << bp; + } + else { + if (neg) s << "-"; + for (i = 0; i < len+k; i++) + s << bp[i]; + + s << "."; + + for (i = len+k; i < len; i++) + s << bp[i]; + } + + return s; +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/RRTest.c b/thirdparty/linux/ntl/src/RRTest.c new file mode 100644 index 0000000000..9e5a8681d0 --- /dev/null +++ b/thirdparty/linux/ntl/src/RRTest.c @@ -0,0 +1,27 @@ + +#include + +NTL_CLIENT + +int main() +{ + mat_RR A; + vec_RR x, y, z; + RR d; + + RR::SetPrecision(200); + + cin >> A; + cin >> y; + + solve(d, x, A, y); + + // mul(z, x, A); + // sub(z, z, y); + + z = x*A - y; + + cout << d << "\n"; + cout << x << "\n"; + cout << z << "\n"; +} diff --git a/thirdparty/linux/ntl/src/RRTestIn b/thirdparty/linux/ntl/src/RRTestIn new file mode 100644 index 0000000000..023f9ba227 --- /dev/null +++ b/thirdparty/linux/ntl/src/RRTestIn @@ -0,0 +1,22 @@ +[[-1007377 -621256 -685733 -1029120 -1011952 540589 -891039 -526851 665990 -628992 -641479 -812282 732497 679539 -941076 649927 586413 761030 821315 995148] +[-789365 -622518 987275 -594200 -835589 571387 -995798 -993857 806554 -915946 906504 -680376 -545294 -720939 812630 -900506 -918548 575268 688388 -593592] +[550743 1012525 -742758 684636 -819566 -922450 -931004 862846 -1037626 1023813 -844078 525899 -812455 -681704 -628061 -918116 -909821 -644178 827337 -786322] +[-564945 -733107 979752 773955 -548908 -642504 -793003 1042300 984860 567651 -627125 -933584 732283 528600 -615886 631186 964459 922892 -843197 774273] +[-938526 576037 -864612 653669 -1018155 597732 544943 -894518 -933942 621892 -859594 -942483 -590467 775614 873241 -917781 -1000228 658893 -677585 889401] +[1007616 -810354 605966 -530965 870440 672791 677405 -842554 955096 856668 985158 929574 837102 -954192 -772025 -766460 -680720 -794914 -740091 -672270] +[852689 -651039 -535517 903809 982092 532512 -682583 -915492 -571826 -811136 -749897 785952 -954046 537266 883213 630761 -902395 -607566 935742 633521] +[660404 591376 873522 -1001549 871542 1041954 -537328 -699583 -675351 963615 572679 833723 -832887 -623849 940286 -820470 -783690 663914 -631307 -779979] +[672814 -758937 802563 -928035 1047989 620898 931085 879974 655815 1007647 557164 843862 929963 -931350 -1017250 -695547 775528 594474 845613 -1031318] +[-793811 -539625 -618683 -958438 -943420 979026 985199 -705064 538218 599551 -635710 848659 -692530 578628 -581393 739362 703204 936610 -828619 -760384] +[-828152 -596634 -759545 878054 611482 1028872 553542 -915076 899691 846870 -535336 -591759 693682 -613243 -808226 911793 996273 696763 -837040 -703417] +[697478 629327 -594833 924068 842000 775153 675791 -543387 -978721 -575508 598224 760540 783232 849017 -1024693 -570191 583682 735309 -633097 -701871] +[-992571 -948854 -911900 -764507 684692 -898657 813746 -635634 744436 854601 998034 -664419 819562 1004955 -780684 -892501 -678615 -799675 -573309 -882355] +[-953063 548712 559591 922389 -558574 -967290 -655590 -687331 1047546 711682 -954689 -821640 -786920 786979 762762 603011 593903 -756313 693749 -643021] +[-748707 933192 700160 -975435 995281 -646390 -1035167 973363 614204 -967837 -586649 833688 -647661 711361 615899 887422 -534256 -722406 663119 594950] +[630840 -726745 991278 -809600 -665945 -660710 -603207 -954200 938302 991768 -650427 789303 905344 -728251 824408 924197 843327 -850701 728323 617049] +[-780719 -1036907 -896496 729651 -780826 -554285 -604816 596378 -949904 -579762 -710465 806954 701677 946362 946408 968898 799263 -574346 -968732 635951] +[-593706 622808 978139 -741721 -801289 -771952 901691 1034952 -839341 882675 570078 -666109 -794509 -629324 983777 -789268 989966 -988905 -952690 757890] +[-954640 -667081 -1017441 -907522 601534 854519 937315 936736 -945500 -587586 -577805 946046 -808901 -529612 -969288 -886033 -569067 -639671 576920 733108] +[-758077 585690 732142 -770057 892395 -900234 698793 -910499 -584416 -593236 -549557 -989525 -697743 -676367 1019901 -766922 752620 1037353 812504 761751] +] +[947891 538090 995215 -681544 752658 -692090 904199 -892030 -819336 705526 -745006 649281 810836 -727461 612233 -739736 989072 -1028677 -646961 651303] diff --git a/thirdparty/linux/ntl/src/RRTestOut b/thirdparty/linux/ntl/src/RRTestOut new file mode 100644 index 0000000000..5e37023a5f --- /dev/null +++ b/thirdparty/linux/ntl/src/RRTestOut @@ -0,0 +1,3 @@ +-0.3543709603e127 +[0.6342777424 -1.66630001 -0.7490057533 -0.4878568653 -0.2446744714 1.996508497 -1.44462438 1.021627692 -0.4039028959 -0.6198159641 -1.253578169 -0.8264701067 -0.9749456962 1.80856531 -1.155185633 -0.6781996511 1.113882043 -1.103882646 0.6760286159 1.027918795] +[0.3262652234e-53 0.7177834915e-53 -0.8482895808e-53 0.195759134e-53 0.6525304468e-53 0.3915182681e-53 0.6525304468e-54 -0.1174554804e-52 -0.5872774021e-53 0.4567713128e-53 0.7830365362e-53 -0.195759134e-53 -0.7830365362e-53 0.195759134e-53 0.6525304468e-54 0.5220243574e-53 0.7177834915e-53 -0.1044048715e-52 0.1305060894e-53 0.4567713128e-53] diff --git a/thirdparty/linux/ntl/src/RemoveProg b/thirdparty/linux/ntl/src/RemoveProg new file mode 100644 index 0000000000..99b8caa2ae --- /dev/null +++ b/thirdparty/linux/ntl/src/RemoveProg @@ -0,0 +1,12 @@ + +for i in $* +do + rm -f "$i" + rm -f "$i.exe" + rm -f ".libs/$i" + rm -f ".libs/$i.exe" + rm -rf "$i.dSYM" +done + +exit 0 + diff --git a/thirdparty/linux/ntl/src/ResetFeatures b/thirdparty/linux/ntl/src/ResetFeatures new file mode 100644 index 0000000000..96a495bbcb --- /dev/null +++ b/thirdparty/linux/ntl/src/ResetFeatures @@ -0,0 +1,5 @@ + +echo "" > "$1/include/NTL/HAVE_LL_TYPE.h" +echo "" > "$1/include/NTL/HAVE_BUILTIN_CLZL.h" +echo "" > "$1/include/NTL/HAVE_AVX.h" +echo "" > "$1/include/NTL/HAVE_FMA.h" diff --git a/thirdparty/linux/ntl/src/TestGetPID.c b/thirdparty/linux/ntl/src/TestGetPID.c new file mode 100644 index 0000000000..f087abb236 --- /dev/null +++ b/thirdparty/linux/ntl/src/TestGetPID.c @@ -0,0 +1,17 @@ + + +#include + +#include +#include + +using namespace std; + +unsigned long _ntl_GetPID(); + + +int main() +{ + printf("%lu\n", _ntl_GetPID()); + return 0; +} diff --git a/thirdparty/linux/ntl/src/TestGetTime.c b/thirdparty/linux/ntl/src/TestGetTime.c new file mode 100644 index 0000000000..30965be883 --- /dev/null +++ b/thirdparty/linux/ntl/src/TestGetTime.c @@ -0,0 +1,56 @@ + + +#include + +#include +#include + +using namespace std; + +double _ntl_GetTime(); + +/* Assuming the processor speed is at most 200GHz, and that + * the clock resolution is at least 1 millisecond, the following + * code should correctly determine if the GetTime function + * is working properly, and should not run for more than + * a few seconds on a machine with a speed of at least 100MHz. + */ + +#define LOOP_COUNT (400) + +int main(int argc, char **argv) +{ + long a, x, n, m; + long i, j, k; + double t0, t1; + + fprintf(stderr, "running"); + + x = atol(argv[1]); /* = 1 */ + + n = atol(argv[2]); /* = 1048576 = 2^20 */ + + m = atol(argv[3]); /* = 1048575 = 2^20 - 1 */ + + k = -1; + t0 = _ntl_GetTime(); + + a = 1; + + for (i = 1; i <= LOOP_COUNT; i++) { + for (j = 0; j < n; j++) + a = (a + x) & m; + + if (a == 17) return -2; /* keeps the compiler honest! */ + + t1 = _ntl_GetTime(); + if (t1 > t0) { fprintf(stderr, "\n"); return 0; } + + if ((i % 10) == 0) { + fprintf(stderr, "."); + } + } + + fprintf(stderr, "\n"); + return -1; +} diff --git a/thirdparty/linux/ntl/src/TestScript b/thirdparty/linux/ntl/src/TestScript new file mode 100644 index 0000000000..101ab8e95a --- /dev/null +++ b/thirdparty/linux/ntl/src/TestScript @@ -0,0 +1,204 @@ + +echo +echo "---------------------------------" +echo "making CanZassTest" +make CanZassTest +echo "running CanZassTest" +./CanZassTest < CanZassTestIn > XXX +sh RemoveProg CanZassTest +if diff -b XXX CanZassTestOut +then +echo "CanZassTest OK" +else +echo "bad CanZassTest" +fi + +echo +echo "---------------------------------" +echo "making BerlekampTest" +make BerlekampTest +echo "running BerlekampTest" +./BerlekampTest < BerlekampTestIn > XXX +sh RemoveProg BerlekampTest +if diff -b XXX BerlekampTestOut +then +echo "BerlekampTest OK" +else +echo "bad BerlekampTest" +fi + + +echo +echo "---------------------------------" +echo "making ZZXFacTest" +make ZZXFacTest +echo "running ZZXFacTest" +./ZZXFacTest < ZZXFacTestIn > XXX +sh RemoveProg ZZXFacTest +if diff -b XXX ZZXFacTestOut +then +echo "ZZXFacTest OK" +else +echo "bad ZZXFacTest" +fi + +echo +echo "---------------------------------" +echo "making MoreFacTest" +make MoreFacTest +echo "running MoreFacTest" +./MoreFacTest < MoreFacTestIn +sh RemoveProg MoreFacTest + +echo +echo "---------------------------------" +echo "making GF2XTest" +make GF2XTest +echo "running GF2XTest" +./GF2XTest +sh RemoveProg GF2XTest + +echo +echo "---------------------------------" +echo "making GF2EXTest" +make GF2EXTest +echo "running GF2EXTest" +./GF2EXTest +sh RemoveProg GF2EXTest + + +echo +echo "---------------------------------" +echo "making MatrixTest" +make MatrixTest +echo "running MatrixTest" +./MatrixTest < MatrixTestIn > XXX +sh RemoveProg MatrixTest +if diff -b XXX MatrixTestOut +then +echo "MatrixTest OK" +else +echo "bad MatrixTest" +fi + +echo +echo "---------------------------------" +echo "making mat_lzz_pTest" +make mat_lzz_pTest +echo "running mat_lzz_pTest" +./mat_lzz_pTest +sh RemoveProg mat_lzz_pTest + + +echo +echo "---------------------------------" +echo "making CharPolyTest" +make CharPolyTest +echo "running CharPolyTest" +./CharPolyTest < CharPolyTestIn > XXX +sh RemoveProg CharPolyTest +if diff -b XXX CharPolyTestOut +then +echo "CharPolyTest OK" +else +echo "bad CharPolyTest" +fi + +echo +echo "---------------------------------" +echo "making BitMatTest" +make BitMatTest +echo "running BitMatTest" +./BitMatTest +sh RemoveProg BitMatTest + + +echo +echo "---------------------------------" +echo "making RRTest" +make RRTest +echo "running RRTest" +./RRTest < RRTestIn > XXX +sh RemoveProg RRTest +if diff -b XXX RRTestOut +then +echo "RRTest OK" +else +echo "bad RRTest" +fi + +echo +echo "---------------------------------" +echo "making QuadTest" +make QuadTest +echo "running QuadTest" +./QuadTest < QuadTestIn > XXX +sh RemoveProg QuadTest +if diff -b XXX QuadTestOut +then +echo "QuadTest OK" +else +echo "bad QuadTest" +fi + + +echo +echo "---------------------------------" +echo "making LLLTest" +make LLLTest +echo "running LLLTest" +./LLLTest < LLLTestIn > XXX +sh RemoveProg LLLTest +if diff -b XXX LLLTestOut +then +echo "LLLTest OK" +else +echo "bad LLLTest" +fi + +echo +echo "---------------------------------" +echo "making subset" +make subset +echo "subset 40 40 20 10 999999 f" +./subset < + +#ifdef NTL_THREADS + + +#include +#include +#include +#include + +NTL_CLIENT + +#if 1 + + +long mobius(long n) +{ + long p,e,arity=0; + PrimeSeq s; + while (n!=1) + { p=s.next(); + e=0; + while ((n%p==0)) { n=n/p; e++; } + if (e>1) { return 0; } + if (e!=0) { arity^=1; } + } + if (arity==0) { return 1; } + return -1; +} + + +ZZX Cyclotomic(long N) +{ + ZZX Num,Den,G,F; + set(Num); set(Den); + long m,d; + for (d=1; d<=N; d++) + { if ((N%d)==0) + { clear(G); + SetCoeff(G,N/d,1); SetCoeff(G,0,-1); + m=mobius(d); + if (m==1) { Num*=G; } + else if (m==-1) { Den*=G; } + } + } + F=Num/Den; + return F; +} + +long multOrd(const ZZ& p, long m) +{ + long pp = rem(p, m); + if (GCD(pp, m) != 1) return 0; + + long ord = 1; + long val = pp; + while (val != 1) { + ord++; + val = MulMod(val, pp, m); + } + return ord; +} + +#endif + + + + + +int main() +{ + SetSeed(ZZ(0)); + + long NumContexts = 3; + long NumPolys = 6; + long n = 2000; + + Vec context_vec; + context_vec.SetLength(NumContexts); + + for (long i = 0; i < NumContexts; i++) { + ZZ p; + GenPrime(p, 150 + i*20); + context_vec[i] = ZZ_pContext(p); + } + + Vec poly_vec; + Vec res_vec; + + poly_vec.SetLength(NumPolys); + res_vec.SetLength(NumPolys); + + + for (long i = 0; i < NumPolys; i++) { + context_vec[i % NumContexts].restore(); + ZZX f = Cyclotomic(n+i); + conv(poly_vec[i], f); + } + + + cerr << "START\n"; + + BasicThreadPool pool(NumPolys); + + pool.exec_index(NumPolys, + [&](long i) { + fprintf(stderr, "starting %ld: %s\n", i, CurrentThreadID().c_str()); + context_vec[i % NumContexts].restore(); + CanZass(res_vec[i], poly_vec[i]); + fprintf(stderr, "stopping %ld: %s\n", i, CurrentThreadID().c_str()); + }); + + cerr << "checking results...\n"; + + + for (long i = 0; i < NumPolys; i++) { + context_vec[i % NumContexts].restore(); + if (res_vec[i].length() == deg(poly_vec[i])/multOrd(ZZ_p::modulus(), n+i)) + cerr << i << " GOOD\n"; + else + cerr << i << " BAD\n"; + } +} + +#else + +#include + +NTL_CLIENT + +int main() +{ + cerr << "threads not enabled\n"; +} + + +#endif + + diff --git a/thirdparty/linux/ntl/src/Timing.c b/thirdparty/linux/ntl/src/Timing.c new file mode 100644 index 0000000000..c8168f7c13 --- /dev/null +++ b/thirdparty/linux/ntl/src/Timing.c @@ -0,0 +1,194 @@ + +#include +#include +#include +#include + +NTL_CLIENT + + +#define TIME_IT(t, action) \ +do { \ + double _t0, _t1; \ + long _iter = 1; \ + long _cnt = 0; \ + do { \ + _t0 = GetTime(); \ + for (long _i = 0; _i < _iter; _i++) { action; _cnt++; } \ + _t1 = GetTime(); \ + } while ( _t1 - _t0 < 4 && (_iter *= 2)); \ + t = (_t1 - _t0)/_iter; \ +} while(0) + + + + +int main() +{ + double t; + + + long k = 1000; + long n = 1000; + + { + SetSeed(conv(1)); + ZZ p = RandomPrime_ZZ(k); + + ZZ_p::init(p); + + ZZ x, y, z, w; + + SetSeed(conv(2)); + RandomBnd(x, p); + + + SetSeed(conv(3)); + RandomBnd(y, p); + + TIME_IT(t, mul(z, x, y)); + cout << "multiply 1000-bit ints: " << t << "\n"; + + + TIME_IT(t, rem(w, z, p)); + cout << "remainder 2000/1000-bit ints: " << t << "\n"; + + TIME_IT(t, GCD(w, x, y)); + cout << "gcd 1000-bit ints: " << t << "\n"; + + ZZ_pX a, b, c; + + SetSeed(conv(4)); + random(a, n); + + SetSeed(conv(5)); + random(b, n); + + mul(c, a, b); + + TIME_IT(t, mul(c, a, b)); + cout << "multiply degree-1000 poly mod 1000-bit prime: " << t << "\n"; + + + ZZ_pX f; + SetSeed(conv(6)); + random(f, n); + SetCoeff(f, n); + + ZZ_pX A, B; + + SetSeed(conv(7)); + random(A, 2*(deg(f)-1)); + + TIME_IT(t, rem(B, A, f)); + cout << "remainder degree-2000/1000 poly mod 1000-bit prime: " << t << "\n"; + + ZZ_pXModulus F(f); + + TIME_IT(t, rem(B, A, F)); + cout << "preconditioned remainder degree-2000/1000 poly mod 1000-bit prime: " << t << "\n"; + + + TIME_IT(t, GCD(a, b)); + cout << "gcd degree-1000 poly mod 1000-bit prime: " << t << "\n"; + + + ZZX AA = conv(a); + ZZX BB = conv(b); + ZZX CC; + + + TIME_IT(t, mul(CC, AA, BB)); + cout << "multiply degree-1000 int poly with 1000-bit coeffs: " << t << "\n"; + + cout << "\n"; + cout << "factoring degree-1000 poly mod 1000-bit prime...\n"; + TIME_IT(t, CanZass(f, _cnt == 0)); + cout << "...total time = " << t << "\n\n"; + } + { + n = 500; + k = 500; + + SetSeed(conv(8)); + GF2X p = BuildRandomIrred(BuildIrred_GF2X(k)); + + GF2E::init(p); + + GF2X x, y, z, w; + + SetSeed(conv(9)); + random(x, deg(p)); + + + SetSeed(conv(10)); + random(y, deg(p)); + + TIME_IT(t, mul(z, x, y)); + cout << "multiply 500-bit GF2Xs: " << t << "\n"; + + + TIME_IT(t, rem(w, z, p)); + cout << "remainder 1000/500-bit GF2Xs: " << t << "\n"; + + TIME_IT(t, GCD(w, x, y)); + cout << "gcd 500-bit GF2Xs: " << t << "\n"; + + SetSeed(conv(11)); + GF2X fff; + random(fff, k); + SetCoeff(fff, k); + + cout << "\n"; + TIME_IT(t, CanZass(fff, 0)); + cout << "factoring degree-500 GF2X: " << t << "\n"; + + + TIME_IT(t, GCD(w, x, y)); + cout << "gcd 500-bit GF2X: " << t << "\n"; + + GF2EX a, b, c; + + SetSeed(conv(12)); + random(a, n); + + SetSeed(conv(13)); + random(b, n); + + mul(c, a, b); + + TIME_IT(t, mul(c, a, b)); + cout << "multiply degree-500 poly mod 500-bit GF2X: " << t << "\n"; + + + + GF2EX f; + SetSeed(conv(14)); + random(f, n); + SetCoeff(f, n); + + GF2EX A, B; + + SetSeed(conv(15)); + random(A, 2*(deg(f)-1)); + + TIME_IT(t, rem(B, A, f)); + cout << "remainder degree-1000/500 poly mod 500-bit GF2X: " << t << "\n"; + + GF2EXModulus F(f); + + TIME_IT(t, rem(B, A, F)); + cout << "preconditioned remainder degree-1000/500 poly mod 500-bit GF2X: " << t << "\n"; + + + TIME_IT(t, GCD(a, b)); + cout << "gcd degree-500 poly mod 500-bit GF2X: " << t << "\n"; + + + f = f >> n/2; + cout << "\n"; + cout << "factoring degree-500 poly mod 500-bit GF2X...\n"; + TIME_IT(t, CanZass(f, _cnt == 0)); + cout << "\n...total time = " << t << "\n"; + } +} diff --git a/thirdparty/linux/ntl/src/VERSION_INFO b/thirdparty/linux/ntl/src/VERSION_INFO new file mode 100644 index 0000000000..3857442876 --- /dev/null +++ b/thirdparty/linux/ntl/src/VERSION_INFO @@ -0,0 +1 @@ +29:0:0 diff --git a/thirdparty/linux/ntl/src/WINDIR b/thirdparty/linux/ntl/src/WINDIR new file mode 100644 index 0000000000..c10616198e --- /dev/null +++ b/thirdparty/linux/ntl/src/WINDIR @@ -0,0 +1 @@ +WinNTL-9_11_0 diff --git a/thirdparty/linux/ntl/src/Wizard b/thirdparty/linux/ntl/src/Wizard new file mode 100644 index 0000000000..0157bfae17 --- /dev/null +++ b/thirdparty/linux/ntl/src/Wizard @@ -0,0 +1,119 @@ + +if test "$1" = "on" +then + echo "" + echo "*" + echo "*" + echo "* The wizard is going to run." + echo "* It will perform some timing experiments, and then automatically" + echo "* update your config.h file." + echo "* Please be patient, and don't be spooked by any error messages." + echo "*" + echo "*" +else + echo "" + echo "*" + echo "*" + echo "* You have chosen not to run the wizard." + echo "*" + echo "*" + exit 0 +fi + +rm -rf small + +mkdir small +mkdir small/src +mkdir small/include +mkdir small/include/NTL + +cp MulTimeTest.c small/src +cp Poly1TimeTest.c small/src +cp Poly2TimeTest.c small/src +cp Poly3TimeTest.c small/src +cp GF2XTimeTest.c small/src +cp InitSettings.c small/src +cp DispSettings.c small/src + +cp FFT.c small/src +cp GetTime.c small/src +cp GetPID.c small/src +cp ctools.c small/src +cp ZZ.c small/src +cp ZZVec.c small/src +cp ZZ_p.c small/src +cp ZZ_pX.c small/src +cp ZZ_pX1.c small/src +cp lip.c small/src +cp g_lip_impl.h small/src +cp c_lip_impl.h small/src +cp tools.c small/src +cp vec_ZZ.c small/src +cp vec_ZZ_p.c small/src +cp GF2.c small/src +cp WordVector.c small/src +cp vec_GF2.c small/src +cp GF2X.c small/src +cp GF2X1.c small/src +cp thread.c small/src +cp BasicThreadPool.c small/src +cp fileio.c small/src + + + +sh CopyFeatures '..' small +cp ../include/NTL/FFT.h small/include/NTL +cp ../include/NTL/SPMM_ASM.h small/include/NTL +cp ../include/NTL/ctools.h small/include/NTL +cp ../include/NTL/ZZ.h small/include/NTL +cp ../include/NTL/sp_arith.h small/include/NTL +cp ../include/NTL/ZZVec.h small/include/NTL +cp ../include/NTL/ZZ_p.h small/include/NTL +cp ../include/NTL/ZZ_pX.h small/include/NTL +cp ../include/NTL/config.h small/include/NTL +cp ../include/NTL/lip.h small/include/NTL +cp ../include/NTL/g_lip.h small/include/NTL +cp ../include/NTL/c_lip.h small/include/NTL +cp ../include/NTL/gmp_aux.h small/include/NTL +cp ../include/NTL/mach_desc.h small/include/NTL +cp ../include/NTL/new.h small/include/NTL +cp ../include/NTL/SmartPtr.h small/include/NTL +cp ../include/NTL/Lazy.h small/include/NTL +cp ../include/NTL/LazyTable.h small/include/NTL +cp ../include/NTL/thread.h small/include/NTL +cp ../include/NTL/BasicThreadPool.h small/include/NTL +cp ../include/NTL/fileio.h small/include/NTL +cp ../include/NTL/tools.h small/include/NTL +cp ../include/NTL/vec_ZZ.h small/include/NTL +cp ../include/NTL/vec_ZZ_p.h small/include/NTL +cp ../include/NTL/vec_long.h small/include/NTL +cp ../include/NTL/vector.h small/include/NTL +cp ../include/NTL/GF2.h small/include/NTL +cp ../include/NTL/WordVector.h small/include/NTL +cp ../include/NTL/vec_GF2.h small/include/NTL +cp ../include/NTL/GF2X.h small/include/NTL + +cp cfile small/src +cp WizardAux small/src + +cp makefile small/src + + +cd small/src +perl WizardAux + +cd ../.. + +echo "*" +echo "*" +echo "* Updating config.h and wizard_log.h" +echo "*" +echo "*" + +cp small/include/NTL/config.h ../include/NTL/config.h +cp small/src/wizard_log.h ../include/NTL/wizard_log.h + +rm -r small + +exit 0 + diff --git a/thirdparty/linux/ntl/src/WizardAux b/thirdparty/linux/ntl/src/WizardAux new file mode 100644 index 0000000000..c42905f7f0 --- /dev/null +++ b/thirdparty/linux/ntl/src/WizardAux @@ -0,0 +1,360 @@ +# This is a perl script, invoked from a shell + +use warnings; # this doesn't work on older versions of perl + + +sub GenConfigHeader { + + my $line; + local *CFILE; + local *CFILEOUT; + + open(CFILE, "< cfile"); + open(CFILEOUT, "> cfileout"); + + while ($line = ) { + + $line =~ s/@\{(.*?)\}/$Config{$1}/ge; + + print CFILEOUT $line; + + } + + close(CFILE); + close(CFILEOUT); + + system("cp cfileout ../include/NTL/config.h"); + +} + + + + +sub RemoveProg { + +# This should work on unix and cygwin on windows + + my ($name) = @_; + + unlink($name); unlink("$name.exe"); + +} + + +sub RunProg { + + my ($name) = @_; + my $val; + my $res; + + system("make wntl.a") and return "999999999999999"; + RemoveProg($name); + system("make $name") and return "999999999999999"; + + print "\n*** running $name..."; + + $val = `./$name`; + + if ($? != 0) { + $res = "999999999999999"; + } + else { + ($res) = ( $val =~ /^([0-9]*)/ ); + } + + print $val, "\n"; + + return $res; + +} + + +############################################################ + + + +system("make InitSettings"); + +@lines = `./InitSettings`; + +%Config = ( + +'NTL_LONG_LONG' => 0, +'NTL_AVOID_FLOAT' => 0, +'NTL_SPMM_ULL' => 0, +'NTL_SPMM_ASM' => 0, +'NTL_TBL_REM' => 0, +'NTL_TBL_REM_LL' => 0, +'NTL_CRT_ALTCODE' => 0, +'NTL_CRT_ALTCODE_SMALL'=> 0, +'NTL_AVOID_BRANCHING' => 0, +'NTL_GF2X_ALTCODE' => 0, +'NTL_GF2X_ALTCODE1' => 0, +'NTL_GF2X_NOINLINE' => 0, +'NTL_PCLMUL' => 0, +'NTL_FFT_BIGTAB' => 0, +'NTL_FFT_LAZYMUL' => 0, + +'WIZARD_HACK' => '#define NTL_WIZARD_HACK', + +); + +foreach $line (@lines) { + chomp($line); + ($name, $val) = ($line =~ /(.*?)=(.*)/); + $Config{$name} = $val; +} + + + + + +# set AVOID_BRANCHING, SPMM, and FFT flags...try all combinations + + +$time = "999999999999999"; +$aflag = "default"; +$bflag = "default"; +$cflag = "default"; + +$Config{"NTL_FFT_BIGTAB"} = 1; + +foreach $aflag1 ("default", "NTL_FFT_LAZYMUL") { + foreach $bflag1 ("default", "NTL_SPMM_ULL", "NTL_SPMM_ASM") { + foreach $cflag1 ("default", "NTL_AVOID_BRANCHING") { + + + $Config{$aflag1} = 1; + $Config{$bflag1} = 1; + $Config{$cflag1} = 1; + + + if ($Config{"NTL_FFT_LAZYMUL"} == 1 && $Config{"NTL_LONGLONG_SP_MULMOD"} == 0 && + $Config{"NTL_SPMM_ULL"} == 0 && $Config{"NTL_SPMM_ASM"} == 0) { + + $Config{$aflag1} = 0; + $Config{$bflag1} = 0; + $Config{$cflag1} = 0; + + print "skip: $aflag1 $bflag1 $cflag1\n"; + next; + } + + if ($Config{"NTL_LONGLONG_SP_MULMOD"} == 1 && $Config{"NTL_SPMM_ULL"} == 1) { + + $Config{$aflag1} = 0; + $Config{$bflag1} = 0; + $Config{$cflag1} = 0; + + print "skip: $aflag1 $bflag1 $cflag1\n"; + next; + } + + print "run: $aflag1 $bflag1 $cflag1 NTL_FFT_BIGTAB\n"; + GenConfigHeader(); + $time1 = RunProg("Poly1TimeTest"); + + if ($time1 < $time) { + $aflag = $aflag1; + $bflag = $bflag1; + $cflag = $cflag1; + $time = $time1; + } + + $Config{$aflag1} = 0; + $Config{$bflag1} = 0; + $Config{$cflag1} = 0; + + # these are the files that include class definitions + # from FFT.h and ZZ_p.h, which have memvers defined + # in terms of mulmod_precon_t + unlink("FFT.o"); unlink("ZZ_p.o"); unlink("vec_ZZ_p.o"); + unlink("ZZ_pX.o"); unlink("ZZ_pX1.o"); + } + } +} + +$Config{$aflag} = 1; +$Config{$bflag} = 1; +$Config{$cflag} = 1; + +# now see if BIGTAB really helps + + +$Config{"NTL_FFT_BIGTAB"} = 0; +print "run: $aflag $bflag $cflag default\n"; +GenConfigHeader(); +$time1 = RunProg("Poly1TimeTest"); + +if ($time1*1.0 > $time*1.04) { + # stick with BIGTABs + $Config{"NTL_FFT_BIGTAB"} = 1; +} + + +unlink("FFT.o"); unlink("ZZ_p.o"); unlink("vec_ZZ_p.o"); +unlink("ZZ_pX.o"); unlink("ZZ_pX1.o"); + +# also unlink lip.o, since that may depend on the MulMod impl +unlink("lip.o"); + + + + +# set flags NTL_GF2X_NOINLINE, NTL_GF2X_ALTCODE, NTL_GF2X_ALTCODE1 + +$time = "999999999999999"; +$aflag = "default"; +$bflag = "default"; + +foreach $aflag1 ("default", "NTL_GF2X_NOINLINE") { + foreach $bflag1 ("default", "NTL_GF2X_ALTCODE", "NTL_GF2X_ALTCODE1") { + + $Config{$aflag1} = 1; + $Config{$bflag1} = 1; + GenConfigHeader(); + $time1 = RunProg("GF2XTimeTest"); + + if ($time1 < $time) { + $aflag = $aflag1; + $bflag = $bflag1; + $time = $time1; + } + + $Config{$aflag1} = 0; + $Config{$bflag1} = 0; + unlink("GF2X.o"); + } +} + + + +# now try NTL_PCLMUL instead +unlink("GF2X.o"); +unlink("GF2X1.o"); +$Config{"NTL_PCLMUL"} = 1; +GenConfigHeader(); +$time1 = RunProg("GF2XTimeTest"); +unlink("GF2X.o"); +unlink("GF2X1.o"); +if ($time1 >= $time) { + $Config{"NTL_PCLMUL"} = 0; + $Config{$aflag} = 1; + $Config{$bflag} = 1; +} + +if ($Config{"NTL_GMP_LIP"} == 0) { + + # GMP is not the primary long integer package + # Choose between default, AVOID_FLOAT, and LONG_LONG implementatsions + + $time = "999999999999999"; + $flag = "default"; + + foreach $flag1 ("default", "NTL_AVOID_FLOAT", "NTL_LONG_LONG") { + $Config{$flag1} = 1; + GenConfigHeader(); + $time1 = RunProg("MulTimeTest"); + + if ($time1 < $time) { + $flag = $flag1; + $time = $time1; + } + + $Config{$flag1} = 0; + unlink("lip.o"); + } + + $Config{$flag} = 1; + + + # finally, now set TBL_REM and TBL_REM_LL + + $time = "999999999999999"; + $flag = "default"; + + foreach $flag1 ("default", "NTL_TBL_REM", "NTL_TBL_REM_LL") { + $Config{$flag1} = 1; + GenConfigHeader(); + $time1 = RunProg("Poly2TimeTest"); + + if ($time1 < $time) { + $flag = $flag1; + $time = $time1; + } + + $Config{$flag1} = 0; + unlink("lip.o"); + } + + $Config{$flag} = 1; +} +else { + + # set NTL_TBL_REM + + $time = "999999999999999"; + $flag = "default"; + + foreach $flag1 ("default", "NTL_TBL_REM") { + $Config{$flag1} = 1; + GenConfigHeader(); + $time1 = RunProg("Poly2TimeTest"); + + if ($time1 < $time) { + $flag = $flag1; + $time = $time1; + } + + $Config{$flag1} = 0; + unlink("lip.o"); + } + + $Config{$flag} = 1; + + + # set NTL_CRT_ALTCODE + + $time = "999999999999999"; + $flag = "default"; + + foreach $flag1 ("default", "NTL_CRT_ALTCODE") { + $Config{$flag1} = 1; + GenConfigHeader(); + $time1 = RunProg("Poly3TimeTest"); + + if ($time1 < $time) { + $flag = $flag1; + $time = $time1; + } + + $Config{$flag1} = 0; + unlink("lip.o"); + } + + $Config{$flag} = 1; + + # set NTL_CRT_ALTCODE_SMALL, if NTL_CRT_ALTCODE + # not set but it did not perform too badly + + if ($Config{"NTL_CRT_ALTCODE"} == 0) { + # time measures default and time1 measures ALTCODE + if (1.0*$time1 < 1.15*$time) { + $Config{"NTL_CRT_ALTCODE_SMALL"} = 1; + } + } + + + +} + +$Config{'WIZARD_HACK'} = ""; +GenConfigHeader(); + +print "\n\n*** the wizard is done!!\n\n"; + +system("make DispSettings"); +system("./DispSettings"); +system("./DispSettings > wizard_log.h"); + + diff --git a/thirdparty/linux/ntl/src/WordVector.c b/thirdparty/linux/ntl/src/WordVector.c new file mode 100644 index 0000000000..4cd71ca48f --- /dev/null +++ b/thirdparty/linux/ntl/src/WordVector.c @@ -0,0 +1,398 @@ + +#include + +#include +#include + +NTL_START_IMPL + + + +void WordVector::DoSetLength(long n) +{ + long m; + + if (n < 0) { + LogicError("negative length in vector::SetLength"); + } + + if (NTL_OVERFLOW(n, NTL_BITS_PER_LONG, 0)) + ResourceError("length too big in vector::SetLength"); + + if (n == 0) { + if (rep) rep[-1] = 0; + return; + } + + if (!rep) { + m = ((n+NTL_WordVectorMinAlloc-1)/NTL_WordVectorMinAlloc) * NTL_WordVectorMinAlloc; + + if (NTL_OVERFLOW(m, NTL_BITS_PER_LONG, 0)) + ResourceError("length too big in vector::SetLength"); + + _ntl_ulong *p = (_ntl_ulong *) + NTL_MALLOC(m, sizeof(_ntl_ulong), 2*sizeof(_ntl_ulong)); + + if (!p) { + MemoryError(); + } + + rep = p+2; + + rep[-1] = n; + rep[-2] = m << 1; + + return; + } + + long max_length = (rep[-2] >> 1); + + if (n <= max_length) { + rep[-1] = n; + return; + } + + long frozen = (rep[-2] & 1); + + if (frozen) LogicError("Cannot grow this WordVector"); + + m = max(n, long(NTL_WordVectorExpansionRatio*max_length)); + + m = ((m+NTL_WordVectorMinAlloc-1)/NTL_WordVectorMinAlloc)*NTL_WordVectorMinAlloc; + _ntl_ulong *p = rep - 2; + + if (NTL_OVERFLOW(m, NTL_BITS_PER_LONG, 0)) + ResourceError("length too big in vector::SetLength"); + + p = (_ntl_ulong *) + NTL_REALLOC(p, m, sizeof(_ntl_ulong), 2*sizeof(_ntl_ulong)); + + if (!p) { + MemoryError(); + } + + rep = p+2; + + rep[-1] = n; + rep[-2] = m << 1; +} + + +void WordVector::SetMaxLength(long n) +{ + long OldLength = length(); + DoSetLength(n); + if (rep) rep[-1] = OldLength; +} + + +WordVector& WordVector::operator=(const WordVector& a) +{ + long i, n; + _ntl_ulong *p; + const _ntl_ulong *ap; + + if (this == &a) return *this; + + n = a.length(); + ap = a.elts(); + + SetLength(n); + p = elts(); + + for (i = 0; i < n; i++) + p[i] = ap[i]; + + return *this; +} + + +WordVector::~WordVector() +{ + if (!rep) return; + if (rep[-2] & 1) TerminalError("Cannot free this WordVector"); + free(rep-2); +} + +void WordVector::kill() +{ + if (!rep) return; + if (rep[-2] & 1) LogicError("Cannot free this WordVector"); + free(rep-2); + rep = 0; +} + +void CopySwap(WordVector& x, WordVector& y) +{ + NTL_TLS_LOCAL(WordVector, t); + WordVectorWatcher watch_t(t); + + long sz_x = x.length(); + long sz_y = y.length(); + long sz = (sz_x > sz_y) ? sz_x : sz_y; + + x.SetMaxLength(sz); + y.SetMaxLength(sz); + + // EXCEPTIONS: all of the above ensures that swap provides strong ES + + t = x; + x = y; + y = t; +} + +void WordVector::swap(WordVector& y) +{ + if ((this->rep && (this->rep[-2] & 1)) || + (y.rep && (y.rep[-2] & 1))) { + CopySwap(*this, y); + return; + } + + _ntl_swap(this->rep, y.rep); +} + +void WordVector::append(_ntl_ulong a) +{ + long l = this->length(); + this->SetLength(l+1); + (*this)[l] = a; +} + +void WordVector::append(const WordVector& w) +{ + long l = this->length(); + long m = w.length(); + long i; + this->SetLength(l+m); + for (i = 0; i < m; i++) + (*this)[l+i] = w[i]; +} + + +istream & operator>>(istream& s, WordVector& a) +{ + WordVector ibuf; + long c; + long n; + if (!s) NTL_INPUT_ERROR(s, "bad vector input"); + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + if (c != '[') { + NTL_INPUT_ERROR(s, "bad vector input"); + } + + n = 0; + ibuf.SetLength(0); + + s.get(); + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + while (c != ']' && c != EOF) { + if (n % NTL_WordVectorInputBlock == 0) ibuf.SetMaxLength(n + NTL_WordVectorInputBlock); + n++; + ibuf.SetLength(n); + if (!(s >> ibuf[n-1])) NTL_INPUT_ERROR(s, "bad vector input"); + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + } + if (c == EOF) NTL_INPUT_ERROR(s, "bad vector input"); + s.get(); + + a = ibuf; + return s; +} + + +ostream& operator<<(ostream& s, const WordVector& a) +{ + long i, n; + + n = a.length(); + + s << '['; + + for (i = 0; i < n; i++) { + s << a[i]; + if (i < n-1) s << " "; + } + + s << ']'; + + return s; +} + +long operator==(const WordVector& a, const WordVector& b) +{ + long n = a.length(); + if (b.length() != n) return 0; + const _ntl_ulong* ap = a.elts(); + const _ntl_ulong* bp = b.elts(); + long i; + for (i = 0; i < n; i++) if (ap[i] != bp[i]) return 0; + return 1; +} + +long operator!=(const WordVector& a, const WordVector& b) +{ return !(a == b); } + + + + + +long InnerProduct(const WordVector& a, const WordVector& b) +{ + long n = min(a.length(), b.length()); + const _ntl_ulong *ap = a.elts(); + const _ntl_ulong *bp = b.elts(); + + _ntl_ulong acc; + long i; + + acc = 0; + for (i = 0; i < n; i++) + acc ^= ap[i] & bp[i]; + +#if (NTL_BITS_PER_LONG == 32) + acc ^= acc >> 16; + acc ^= acc >> 8; + acc ^= acc >> 4; + acc ^= acc >> 2; + acc ^= acc >> 1; + acc &= 1; +#elif (NTL_BITS_PER_LONG == 64) + acc ^= acc >> 32; + acc ^= acc >> 16; + acc ^= acc >> 8; + acc ^= acc >> 4; + acc ^= acc >> 2; + acc ^= acc >> 1; + acc &= 1; +#else + _ntl_ulong t = acc; + while (t) { + t = t >> 8; + acc ^= t; + } + + acc ^= acc >> 4; + acc ^= acc >> 2; + acc ^= acc >> 1; + acc &= 1; +#endif + + return long(acc); +} + + +void ShiftAdd(_ntl_ulong *cp, const _ntl_ulong* ap, long sa, long n) +// c = c + (a << n) +{ + if (sa == 0) return; + + long i; + + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + if (bn == 0) { + for (i = sa+wn-1; i >= wn; i--) + cp[i] ^= ap[i-wn]; + } + else { + _ntl_ulong t = ap[sa-1] >> (NTL_BITS_PER_LONG-bn); + if (t) cp[sa+wn] ^= t; + for (i = sa+wn-1; i >= wn+1; i--) + cp[i] ^= (ap[i-wn] << bn) | (ap[i-wn-1] >> (NTL_BITS_PER_LONG-bn)); + cp[wn] ^= ap[0] << bn; + } +} + +long WV_BlockConstructAlloc(WordVector& x, long d, long n) +{ + long nwords, nbytes, AllocAmt, m, j; + _ntl_ulong *p, *q; + + + /* check n value */ + + if (n <= 0) + LogicError("block construct: n must be positive"); + + /* check d value */ + + if (d <= 0) + LogicError("block construct: d must be positive"); + + if (NTL_OVERFLOW(d, NTL_BITS_PER_LONG, 0) || + NTL_OVERFLOW(d, sizeof(_ntl_ulong), 2*sizeof(_ntl_ulong))) + ResourceError("block construct: d too large"); + + nwords = d + 2; + nbytes = nwords*sizeof(_ntl_ulong); + + AllocAmt = (NTL_MAX_ALLOC_BLOCK - sizeof(_ntl_ulong)) / nbytes; + if (AllocAmt == 0) AllocAmt = 1; + + if (AllocAmt < n) + m = AllocAmt; + else + m = n; + + p = (_ntl_ulong *) NTL_MALLOC(m, nbytes, sizeof(_ntl_ulong)); + if (!p) MemoryError(); + + *p = m; + + q = p+3; + x.rep = q; + + for (j = 0; j < m; j++) { + q[-2] = (d << 1) | 1; + q[-1] = 0; + q += nwords; + } + + return m; +} + +void WV_BlockConstructSet(WordVector& x, WordVector& y, long i) +{ + long d, size; + + d = x.rep[-2] >> 1; + size = d + 2; + + y.rep = x.rep + i*size; +} + +long WV_BlockDestroy(WordVector& x) +{ + long m; + _ntl_ulong *p; + + p = x.rep - 3; + m = (long) *p; + free(p); + return m; +} + +long WV_storage(long d) +{ + return (d + 2)*sizeof(_ntl_ulong) + sizeof(WordVector); +} + + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ.c b/thirdparty/linux/ntl/src/ZZ.c new file mode 100644 index 0000000000..58bab47f7f --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ.c @@ -0,0 +1,2393 @@ + +#include +#include +#include +#include + +#include + + + +NTL_START_IMPL + + + + + +const ZZ& ZZ::zero() +{ + + static const ZZ z; // GLOBAL (relies on C++11 thread-safe init) + return z; +} + + +const ZZ& ZZ_expo(long e) +{ + NTL_TLS_LOCAL(ZZ, expo_helper); + + conv(expo_helper, e); + return expo_helper; +} + + + +void AddMod(ZZ& x, const ZZ& a, long b, const ZZ& n) +{ + NTL_ZZRegister(B); + conv(B, b); + AddMod(x, a, B, n); +} + + +void SubMod(ZZ& x, const ZZ& a, long b, const ZZ& n) +{ + NTL_ZZRegister(B); + conv(B, b); + SubMod(x, a, B, n); +} + +void SubMod(ZZ& x, long a, const ZZ& b, const ZZ& n) +{ + NTL_ZZRegister(A); + conv(A, a); + SubMod(x, A, b, n); +} + + + +// ****** input and output + + +static NTL_CHEAP_THREAD_LOCAL long iodigits = 0; +static NTL_CHEAP_THREAD_LOCAL long ioradix = 0; +// iodigits is the greatest integer such that 10^{iodigits} < NTL_WSP_BOUND +// ioradix = 10^{iodigits} + +static void InitZZIO() +{ + long x; + + x = (NTL_WSP_BOUND-1)/10; + iodigits = 0; + ioradix = 1; + + while (x) { + x = x / 10; + iodigits++; + ioradix = ioradix * 10; + } + + if (iodigits <= 0) TerminalError("problem with I/O"); +} + + +istream& operator>>(istream& s, ZZ& x) +{ + long c; + long cval; + long sign; + long ndigits; + long acc; + NTL_ZZRegister(a); + + if (!s) NTL_INPUT_ERROR(s, "bad ZZ input"); + + if (!iodigits) InitZZIO(); + + a = 0; + + SkipWhiteSpace(s); + c = s.peek(); + + if (c == '-') { + sign = -1; + s.get(); + c = s.peek(); + } + else + sign = 1; + + cval = CharToIntVal(c); + + if (cval < 0 || cval > 9) NTL_INPUT_ERROR(s, "bad ZZ input"); + + ndigits = 0; + acc = 0; + while (cval >= 0 && cval <= 9) { + acc = acc*10 + cval; + ndigits++; + + if (ndigits == iodigits) { + mul(a, a, ioradix); + add(a, a, acc); + ndigits = 0; + acc = 0; + } + + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + } + + if (ndigits != 0) { + long mpy = 1; + while (ndigits > 0) { + mpy = mpy * 10; + ndigits--; + } + + mul(a, a, mpy); + add(a, a, acc); + } + + if (sign == -1) + negate(a, a); + + x = a; + return s; +} + + +// The class _ZZ_local_stack should be defined in an empty namespace, +// but since I don't want to rely on namespaces, we just give it a funny +// name to avoid accidental name clashes. + +struct _ZZ_local_stack { + long top; + Vec data; + + _ZZ_local_stack() { top = -1; } + + long pop() { return data[top--]; } + long empty() { return (top == -1); } + void push(long x); +}; + +void _ZZ_local_stack::push(long x) +{ + if (top+1 >= data.length()) + data.SetLength(max(32, long(1.414*data.length()))); + + top++; + data[top] = x; +} + + +static +void PrintDigits(ostream& s, long d, long justify) +{ + NTL_TLS_LOCAL_INIT(Vec, buf, (INIT_SIZE, iodigits)); + + long i = 0; + + while (d) { + buf[i] = IntValToChar(d % 10); + d = d / 10; + i++; + } + + if (justify) { + long j = iodigits - i; + while (j > 0) { + s << "0"; + j--; + } + } + + while (i > 0) { + i--; + s << buf[i]; + } +} + + + + +ostream& operator<<(ostream& s, const ZZ& a) +{ + ZZ b; + _ZZ_local_stack S; + long r; + long k; + + if (!iodigits) InitZZIO(); + + b = a; + + k = sign(b); + + if (k == 0) { + s << "0"; + return s; + } + + if (k < 0) { + s << "-"; + negate(b, b); + } + + do { + r = DivRem(b, b, ioradix); + S.push(r); + } while (!IsZero(b)); + + r = S.pop(); + PrintDigits(s, r, 0); + + while (!S.empty()) { + r = S.pop(); + PrintDigits(s, r, 1); + } + + return s; +} + + + +long GCD(long a, long b) +{ + long u, v, t, x; + + if (a < 0) { + if (a < -NTL_MAX_LONG) ResourceError("GCD: integer overflow"); + a = -a; + } + + if (b < 0) { + if (b < -NTL_MAX_LONG) ResourceError("GCD: integer overflow"); + b = -b; + } + + + if (b==0) + x = a; + else { + u = a; + v = b; + do { + t = u % v; + u = v; + v = t; + } while (v != 0); + + x = u; + } + + return x; +} + + + +void XGCD(long& d, long& s, long& t, long a, long b) +{ + long u, v, u0, v0, u1, v1, u2, v2, q, r; + + long aneg = 0, bneg = 0; + + if (a < 0) { + if (a < -NTL_MAX_LONG) ResourceError("XGCD: integer overflow"); + a = -a; + aneg = 1; + } + + if (b < 0) { + if (b < -NTL_MAX_LONG) ResourceError("XGCD: integer overflow"); + b = -b; + bneg = 1; + } + + u1=1; v1=0; + u2=0; v2=1; + u = a; v = b; + + while (v != 0) { + q = u / v; + r = u % v; + u = v; + v = r; + u0 = u2; + v0 = v2; + u2 = u1 - q*u2; + v2 = v1- q*v2; + u1 = u0; + v1 = v0; + } + + if (aneg) + u1 = -u1; + + if (bneg) + v1 = -v1; + + d = u; + s = u1; + t = v1; +} + +long InvModStatus(long& x, long a, long n) +{ + long d, s, t; + + XGCD(d, s, t, a, n); + if (d != 1) { + x = d; + return 1; + } + else { + if (s < 0) + x = s + n; + else + x = s; + + return 0; + } +} + +long InvMod(long a, long n) +{ + long d, s, t; + + XGCD(d, s, t, a, n); + if (d != 1) { + InvModError("InvMod: inverse undefined"); + } + if (s < 0) + return s + n; + else + return s; +} + + +long PowerMod(long a, long ee, long n) +{ + long x, y; + + unsigned long e; + + if (ee < 0) + e = - ((unsigned long) ee); + else + e = ee; + + x = 1; + y = a; + while (e) { + if (e & 1) x = MulMod(x, y, n); + y = MulMod(y, y, n); + e = e >> 1; + } + + if (ee < 0) x = InvMod(x, n); + + return x; +} + +long ProbPrime(long n, long NumTests) +{ + long m, x, y, z; + long i, j, k; + + if (n <= 1) return 0; + + + if (n == 2) return 1; + if (n % 2 == 0) return 0; + + if (n == 3) return 1; + if (n % 3 == 0) return 0; + + if (n == 5) return 1; + if (n % 5 == 0) return 0; + + if (n == 7) return 1; + if (n % 7 == 0) return 0; + + if (n >= NTL_SP_BOUND) { + return ProbPrime(to_ZZ(n), NumTests); + } + + m = n - 1; + k = 0; + while((m & 1) == 0) { + m = m >> 1; + k++; + } + + // n - 1 == 2^k * m, m odd + + for (i = 0; i < NumTests; i++) { + do { + x = RandomBnd(n); + } while (x == 0); + // x == 0 is not a useful candidtae for a witness! + + + if (x == 0) continue; + z = PowerMod(x, m, n); + if (z == 1) continue; + + j = 0; + do { + y = z; + z = MulMod(y, y, n); + j++; + } while (j != k && z != 1); + + if (z != 1 || y != n-1) return 0; + } + + return 1; +} + + +long MillerWitness(const ZZ& n, const ZZ& x) +{ + ZZ m, y, z; + long j, k; + + if (x == 0) return 0; + + add(m, n, -1); + k = MakeOdd(m); + // n - 1 == 2^k * m, m odd + + PowerMod(z, x, m, n); + if (z == 1) return 0; + + j = 0; + do { + y = z; + SqrMod(z, y, n); + j++; + } while (j != k && z != 1); + + if (z != 1) return 1; + add(y, y, 1); + if (y != n) return 1; + return 0; +} + + +// ComputePrimeBound computes a reasonable bound for trial +// division in the Miller-Rabin test. +// It is computed a bit on the "low" side, since being a bit +// low doesn't hurt much, but being too high can hurt a lot. + +static +long ComputePrimeBound(long bn) +{ + long wn = (bn+NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS; + + long fn; + + if (wn <= 36) + fn = wn/4 + 1; + else + fn = long(1.67*sqrt(double(wn))); + + long prime_bnd; + + if (NumBits(bn) + NumBits(fn) > NTL_SP_NBITS) + prime_bnd = NTL_SP_BOUND; + else + prime_bnd = bn*fn; + + return prime_bnd; +} + + +long ProbPrime(const ZZ& n, long NumTrials) +{ + if (n <= 1) return 0; + + if (n.SinglePrecision()) { + return ProbPrime(to_long(n), NumTrials); + } + + + long prime_bnd = ComputePrimeBound(NumBits(n)); + + + PrimeSeq s; + long p; + + p = s.next(); + while (p && p < prime_bnd) { + if (rem(n, p) == 0) + return 0; + + p = s.next(); + } + + ZZ W; + W = 2; + + // first try W == 2....the exponentiation + // algorithm runs slightly faster in this case + + if (MillerWitness(n, W)) + return 0; + + + long i; + + for (i = 0; i < NumTrials; i++) { + do { + RandomBnd(W, n); + } while (W == 0); + // W == 0 is not a useful candidate for a witness! + + if (MillerWitness(n, W)) + return 0; + } + + return 1; +} + + +void RandomPrime(ZZ& n, long l, long NumTrials) +{ + if (l <= 1) + LogicError("RandomPrime: l out of range"); + + if (l == 2) { + if (RandomBnd(2)) + n = 3; + else + n = 2; + + return; + } + + do { + RandomLen(n, l); + if (!IsOdd(n)) add(n, n, 1); + } while (!ProbPrime(n, NumTrials)); +} + +void NextPrime(ZZ& n, const ZZ& m, long NumTrials) +{ + ZZ x; + + if (m <= 2) { + n = 2; + return; + } + + x = m; + + while (!ProbPrime(x, NumTrials)) + add(x, x, 1); + + n = x; +} + +long NextPrime(long m, long NumTrials) +{ + long x; + + if (m <= 2) + return 2; + + x = m; + + while (x < NTL_SP_BOUND && !ProbPrime(x, NumTrials)) + x++; + + if (x >= NTL_SP_BOUND) + ResourceError("NextPrime: no more primes"); + + return x; +} + + + +long NextPowerOfTwo(long m) +{ + long k; + unsigned long n, um; + + if (m < 0) return 0; + + um = m; + n = 1; + k = 0; + + while (n < um) { + n = n << 1; + k++; + } + + if (k >= NTL_BITS_PER_LONG-1) + ResourceError("NextPowerOfTwo: overflow"); + + return k; +} + + + +long NumBits(long a) +{ + unsigned long aa; + if (a < 0) + aa = - ((unsigned long) a); + else + aa = a; + + long k = 0; + while (aa) { + k++; + aa = aa >> 1; + } + + return k; +} + + +long bit(long a, long k) +{ + unsigned long aa; + if (a < 0) + aa = - ((unsigned long) a); + else + aa = a; + + if (k < 0 || k >= NTL_BITS_PER_LONG) + return 0; + else + return long((aa >> k) & 1); +} + + + +long divide(ZZ& q, const ZZ& a, const ZZ& b) +{ + NTL_ZZRegister(qq); + NTL_ZZRegister(r); + + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + + if (IsOne(b)) { + q = a; + return 1; + } + + DivRem(qq, r, a, b); + if (!IsZero(r)) return 0; + q = qq; + return 1; +} + +long divide(const ZZ& a, const ZZ& b) +{ + NTL_ZZRegister(r); + + if (IsZero(b)) return IsZero(a); + if (IsOne(b)) return 1; + + rem(r, a, b); + return IsZero(r); +} + +long divide(ZZ& q, const ZZ& a, long b) +{ + NTL_ZZRegister(qq); + + if (!b) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + if (b == 1) { + q = a; + return 1; + } + + long r = DivRem(qq, a, b); + if (r) return 0; + q = qq; + return 1; +} + +long divide(const ZZ& a, long b) +{ + if (!b) return IsZero(a); + if (b == 1) { + return 1; + } + + long r = rem(a, b); + return (r == 0); +} + + +void InvMod(ZZ& x, const ZZ& a, const ZZ& n) +{ + // NOTE: the underlying LIP routines write to the first argument, + // even if inverse is undefined + + NTL_ZZRegister(xx); + if (InvModStatus(xx, a, n)) + InvModError("InvMod: inverse undefined", a, n); + x = xx; +} + +void PowerMod(ZZ& x, const ZZ& a, const ZZ& e, const ZZ& n) +{ + // NOTE: this ensures that all modular inverses are computed + // in the routine InvMod above, rather than the LIP-internal + // modular inverse routine + if (e < 0) { + ZZ a_inv; + ZZ e_neg; + + InvMod(a_inv, a, n); + negate(e_neg, e); + LowLevelPowerMod(x, a_inv, e_neg, n); + } + else + LowLevelPowerMod(x, a, e, n); +} + +#ifdef NTL_EXCEPTIONS + +void InvModError(const char *s, const ZZ& a, const ZZ& n) +{ + throw InvModErrorObject(s, a, n); +} + +#else + +void InvModError(const char *s, const ZZ& a, const ZZ& n) +{ + TerminalError(s); +} + + +#endif + +long RandomPrime_long(long l, long NumTrials) +{ + if (l <= 1 || l >= NTL_BITS_PER_LONG) + ResourceError("RandomPrime: length out of range"); + + long n; + do { + n = RandomLen_long(l); + } while (!ProbPrime(n, NumTrials)); + + return n; +} + + +static Lazy< Vec > lowsieve_storage; +// This is a GLOBAL VARIABLE + + +PrimeSeq::PrimeSeq() +{ + movesieve = 0; + pshift = -1; + pindex = -1; + exhausted = 0; +} + + +long PrimeSeq::next() +{ + if (exhausted) { + return 0; + } + + if (pshift < 0) { + shift(0); + return 2; + } + + for (;;) { + const char *p = movesieve; + long i = pindex; + + while ((++i) < NTL_PRIME_BND) { + if (p[i]) { + pindex = i; + return pshift + 2 * i + 3; + } + } + + long newshift = pshift + 2*NTL_PRIME_BND; + + if (newshift > 2 * NTL_PRIME_BND * (2 * NTL_PRIME_BND + 1)) { + /* end of the road */ + exhausted = 1; + return 0; + } + + shift(newshift); + } +} + +void PrimeSeq::shift(long newshift) +{ + long i; + long j; + long jstep; + long jstart; + long ibound; + char *p; + + if (!lowsieve_storage.built()) + start(); + + const char *lowsieve = lowsieve_storage->elts(); + + + if (newshift < 0) { + pshift = -1; + } + else if (newshift == 0) { + pshift = 0; + movesieve = lowsieve; + } + else if (newshift != pshift) { + if (movesieve_mem.length() == 0) { + movesieve_mem.SetLength(NTL_PRIME_BND); + } + + pshift = newshift; + movesieve = p = movesieve_mem.elts(); + for (i = 0; i < NTL_PRIME_BND; i++) + p[i] = 1; + + jstep = 3; + ibound = pshift + 2 * NTL_PRIME_BND + 1; + for (i = 0; jstep * jstep <= ibound; i++) { + if (lowsieve[i]) { + if (!((jstart = (pshift + 2) / jstep + 1) & 1)) + jstart++; + if (jstart <= jstep) + jstart = jstep; + jstart = (jstart * jstep - pshift - 3) / 2; + for (j = jstart; j < NTL_PRIME_BND; j += jstep) + p[j] = 0; + } + jstep += 2; + } + } + + pindex = -1; + exhausted = 0; +} + + +void PrimeSeq::start() +{ + long i; + long j; + long jstep; + long jstart; + long ibnd; + char *p; + + do { + Lazy< Vec >::Builder builder(lowsieve_storage); + if (!builder()) break; + + UniquePtr< Vec > ptr; + ptr.make(); + ptr->SetLength(NTL_PRIME_BND); + + p = ptr->elts(); + + for (i = 0; i < NTL_PRIME_BND; i++) + p[i] = 1; + + jstep = 1; + jstart = -1; + ibnd = (SqrRoot(2 * NTL_PRIME_BND + 1) - 3) / 2; + for (i = 0; i <= ibnd; i++) { + jstart += 2 * ((jstep += 2) - 1); + if (p[i]) + for (j = jstart; j < NTL_PRIME_BND; j += jstep) + p[j] = 0; + } + + builder.move(ptr); + } while (0); + +} + +void PrimeSeq::reset(long b) +{ + if (b > (2*NTL_PRIME_BND+1)*(2*NTL_PRIME_BND+1)) { + exhausted = 1; + return; + } + + if (b <= 2) { + shift(-1); + return; + } + + if ((b & 1) == 0) b++; + + shift(((b-3) / (2*NTL_PRIME_BND))* (2*NTL_PRIME_BND)); + pindex = (b - pshift - 3)/2 - 1; +} + +long Jacobi(const ZZ& aa, const ZZ& nn) +{ + ZZ a, n; + long t, k; + long d; + + a = aa; + n = nn; + t = 1; + + while (a != 0) { + k = MakeOdd(a); + d = trunc_long(n, 3); + if ((k & 1) && (d == 3 || d == 5)) t = -t; + + if (trunc_long(a, 2) == 3 && (d & 3) == 3) t = -t; + swap(a, n); + rem(a, a, n); + } + + if (n == 1) + return t; + else + return 0; +} + + +void SqrRootMod(ZZ& x, const ZZ& aa, const ZZ& nn) +{ + if (aa == 0 || aa == 1) { + x = aa; + return; + } + + // at this point, we must have nn >= 5 + + if (trunc_long(nn, 2) == 3) { // special case, n = 3 (mod 4) + ZZ n, a, e, z; + + n = nn; + a = aa; + + add(e, n, 1); + RightShift(e, e, 2); + + PowerMod(z, a, e, n); + x = z; + + return; + } + + ZZ n, m; + int h, nlen; + + n = nn; + nlen = NumBits(n); + + sub(m, n, 1); + h = MakeOdd(m); // h >= 2 + + + if (nlen > 50 && h < SqrRoot(nlen)) { + long i, j; + ZZ a, b, a_inv, c, r, m1, d; + + a = aa; + InvMod(a_inv, a, n); + + if (h == 2) + b = 2; + else { + do { + RandomBnd(b, n); + } while (Jacobi(b, n) != -1); + } + + + PowerMod(c, b, m, n); + + add(m1, m, 1); + RightShift(m1, m1, 1); + PowerMod(r, a, m1, n); + + for (i = h-2; i >= 0; i--) { + SqrMod(d, r, n); + MulMod(d, d, a_inv, n); + for (j = 0; j < i; j++) + SqrMod(d, d, n); + if (!IsOne(d)) + MulMod(r, r, c, n); + SqrMod(c, c, n); + } + + x = r; + return; + } + + + + + + long i, k; + ZZ ma, t, u, v, e; + ZZ t1, t2, t3, t4; + + n = nn; + NegateMod(ma, aa, n); + + // find t such that t^2 - 4*a is not a square + + MulMod(t1, ma, 4, n); + do { + RandomBnd(t, n); + SqrMod(t2, t, n); + AddMod(t2, t2, t1, n); + } while (Jacobi(t2, n) != -1); + + // compute u*X + v = X^{(n+1)/2} mod f, where f = X^2 - t*X + a + + add(e, n, 1); + RightShift(e, e, 1); + + u = 0; + v = 1; + + k = NumBits(e); + + for (i = k - 1; i >= 0; i--) { + add(t2, u, v); + sqr(t3, t2); // t3 = (u+v)^2 + sqr(t1, u); + sqr(t2, v); + sub(t3, t3, t1); + sub(t3, t3, t2); // t1 = u^2, t2 = v^2, t3 = 2*u*v + rem(t1, t1, n); + mul(t4, t1, t); + add(t4, t4, t3); + rem(u, t4, n); + + mul(t4, t1, ma); + add(t4, t4, t2); + rem(v, t4, n); + + if (bit(e, i)) { + MulMod(t1, u, t, n); + AddMod(t1, t1, v, n); + MulMod(v, u, ma, n); + u = t1; + } + + } + + x = v; +} + + + +// Chinese Remaindering. +// +// This version in new to v3.7, and is significantly +// simpler and faster than the previous version. +// +// This function takes as input g, a, G, p, +// such that a > 0, 0 <= G < p, and gcd(a, p) = 1. +// It computes a' = a*p and g' such that +// * g' = g (mod a); +// * g' = G (mod p); +// * -a'/2 < g' <= a'/2. +// It then sets g := g' and a := a', and returns 1 iff g has changed. +// +// Under normal use, the input value g satisfies -a/2 < g <= a/2; +// however, this was not documented or enforced in earlier versions, +// so to maintain backward compatability, no restrictions are placed +// on g. This routine runs faster, though, if -a/2 < g <= a/2, +// and the first thing the routine does is to make this condition +// hold. +// +// Also, under normal use, both a and p are odd; however, the routine +// will still work even if this is not so. +// +// The routine is based on the following simple fact. +// +// Let -a/2 < g <= a/2, and let h satisfy +// * g + a h = G (mod p); +// * -p/2 < h <= p/2. +// Further, if p = 2*h and g > 0, set +// g' := g - a h; +// otherwise, set +// g' := g + a h. +// Then g' so defined satisfies the above requirements. +// +// It is trivial to see that g's satisfies the congruence conditions. +// The only thing is to check that the "balancing" condition +// -a'/2 < g' <= a'/2 also holds. + + +long CRT(ZZ& gg, ZZ& a, long G, long p) +{ + if (p >= NTL_SP_BOUND) { + ZZ GG, pp; + conv(GG, G); + conv(pp, p); + return CRT(gg, a, GG, pp); + } + + long modified = 0; + + NTL_ZZRegister(g); + + if (!CRTInRange(gg, a)) { + modified = 1; + ZZ a1; + rem(g, gg, a); + RightShift(a1, a, 1); + if (g > a1) sub(g, g, a); + } + else + g = gg; + + + long p1; + p1 = p >> 1; + + long a_inv; + a_inv = rem(a, p); + a_inv = InvMod(a_inv, p); + + long h; + h = rem(g, p); + h = SubMod(G, h, p); + h = MulMod(h, a_inv, p); + if (h > p1) + h = h - p; + + if (h != 0) { + modified = 1; + + if (!(p & 1) && g > 0 && (h == p1)) + MulSubFrom(g, a, h); + else + MulAddTo(g, a, h); + } + + mul(a, a, p); + gg = g; + + return modified; +} + +long CRT(ZZ& gg, ZZ& a, const ZZ& G, const ZZ& p) +{ + long modified = 0; + + ZZ g; + + if (!CRTInRange(gg, a)) { + modified = 1; + ZZ a1; + rem(g, gg, a); + RightShift(a1, a, 1); + if (g > a1) sub(g, g, a); + } + else + g = gg; + + + ZZ p1; + RightShift(p1, p, 1); + + ZZ a_inv; + rem(a_inv, a, p); + InvMod(a_inv, a_inv, p); + + ZZ h; + rem(h, g, p); + SubMod(h, G, h, p); + MulMod(h, h, a_inv, p); + if (h > p1) + sub(h, h, p); + + if (h != 0) { + modified = 1; + ZZ ah; + mul(ah, a, h); + + if (!IsOdd(p) && g > 0 && (h == p1)) + sub(g, g, ah); + else + add(g, g, ah); + } + + mul(a, a, p); + gg = g; + + return modified; +} + + + +void sub(ZZ& x, const ZZ& a, long b) +{ + NTL_ZZRegister(B); + conv(B, b); + sub(x, a, B); +} + +void sub(ZZ& x, long a, const ZZ& b) +{ + NTL_ZZRegister(A); + conv(A, a); + sub(x, A, b); +} + + +void power2(ZZ& x, long e) +{ + if (e < 0) ArithmeticError("power2: negative exponent"); + set(x); + LeftShift(x, x, e); +} + + +void conv(ZZ& x, const char *s) +{ + long c; + long cval; + long sign; + long ndigits; + long acc; + long i = 0; + + NTL_ZZRegister(a); + + if (!s) InputError("bad ZZ input"); + + if (!iodigits) InitZZIO(); + + a = 0; + + c = s[i]; + while (IsWhiteSpace(c)) { + i++; + c = s[i]; + } + + if (c == '-') { + sign = -1; + i++; + c = s[i]; + } + else + sign = 1; + + cval = CharToIntVal(c); + if (cval < 0 || cval > 9) InputError("bad ZZ input"); + + ndigits = 0; + acc = 0; + while (cval >= 0 && cval <= 9) { + acc = acc*10 + cval; + ndigits++; + + if (ndigits == iodigits) { + mul(a, a, ioradix); + add(a, a, acc); + ndigits = 0; + acc = 0; + } + + i++; + c = s[i]; + cval = CharToIntVal(c); + } + + if (ndigits != 0) { + long mpy = 1; + while (ndigits > 0) { + mpy = mpy * 10; + ndigits--; + } + + mul(a, a, mpy); + add(a, a, acc); + } + + if (sign == -1) + negate(a, a); + + x = a; +} + + + +void bit_and(ZZ& x, const ZZ& a, long b) +{ + NTL_ZZRegister(B); + conv(B, b); + bit_and(x, a, B); +} + +void bit_or(ZZ& x, const ZZ& a, long b) +{ + NTL_ZZRegister(B); + conv(B, b); + bit_or(x, a, B); +} + +void bit_xor(ZZ& x, const ZZ& a, long b) +{ + NTL_ZZRegister(B); + conv(B, b); + bit_xor(x, a, B); +} + + +long power_long(long a, long e) +{ + if (e < 0) ArithmeticError("power_long: negative exponent"); + + if (e == 0) return 1; + + if (a == 1) return 1; + if (a == -1) { + if (e & 1) + return -1; + else + return 1; + } + + // no overflow check --- result is computed correctly + // modulo word size + + unsigned long res = 1; + unsigned long aa = a; + long i; + + for (i = 0; i < e; i++) + res *= aa; + + return to_long(res); +} + + + +// ======================= new PRG stuff ====================== + + + + +#if (NTL_BITS_PER_INT32 == 32) +#define INT32MASK(x) (x) +#else +#define INT32MASK(x) ((x) & _ntl_uint32(0xffffffff)) +#endif + + + +// SHA256 code adapted from an implementauin by Brad Conte. +// The following is from his original source files. +/********************************************************************* +* Filename: sha256.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the SHA-256 hashing algorithm. + SHA-256 is one of the three algorithms in the SHA2 + specification. The others, SHA-384 and SHA-512, are not + offered in this implementation. + Algorithm specification can be found here: + * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf + This implementation uses little endian byte order. +*********************************************************************/ + + + + +#define SHA256_BLOCKSIZE (64) +#define SHA256_HASHSIZE (32) + +// DBL_INT_ADD treats two unsigned ints a and b as one 64-bit integer and adds c to it +static inline +void DBL_INT_ADD(_ntl_uint32& a, _ntl_uint32& b, _ntl_uint32 c) +{ + _ntl_uint32 aa = INT32MASK(a); + if (aa > INT32MASK(_ntl_uint32(0xffffffff) - c)) b++; + a = aa + c; +} + +#define ROTLEFT(a,b) (((a) << (b)) | (INT32MASK(a) >> (32-(b)))) +#define ROTRIGHT(a,b) ((INT32MASK(a) >> (b)) | ((a) << (32-(b)))) + +#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) +#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) +#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) +#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ (INT32MASK(x) >> 3)) +#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ (INT32MASK(x) >> 10)) + +struct SHA256_CTX { + unsigned char data[64]; + _ntl_uint32 datalen; + _ntl_uint32 bitlen[2]; + _ntl_uint32 state[8]; +}; + +static const _ntl_uint32 sha256_const[64] = { + 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, + 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, + 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, + 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, + 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, + 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, + 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +}; + + +static +void sha256_transform(SHA256_CTX& ctx, unsigned char *data) +{ + _ntl_uint32 a,b,c,d,e,f,g,h,i,j,t1,t2,m[64]; + + for (i=0,j=0; i < 16; ++i, j += 4) + m[i] = (data[j] << 24) | (data[j+1] << 16) | (data[j+2] << 8) | (data[j+3]); + for ( ; i < 64; ++i) + m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16]; + + a = ctx.state[0]; + b = ctx.state[1]; + c = ctx.state[2]; + d = ctx.state[3]; + e = ctx.state[4]; + f = ctx.state[5]; + g = ctx.state[6]; + h = ctx.state[7]; + + for (i = 0; i < 64; ++i) { + t1 = h + EP1(e) + CH(e,f,g) + sha256_const[i] + m[i]; + t2 = EP0(a) + MAJ(a,b,c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + + ctx.state[0] += a; + ctx.state[1] += b; + ctx.state[2] += c; + ctx.state[3] += d; + ctx.state[4] += e; + ctx.state[5] += f; + ctx.state[6] += g; + ctx.state[7] += h; +} + +static +void sha256_init(SHA256_CTX& ctx) +{ + ctx.datalen = 0; + ctx.bitlen[0] = 0; + ctx.bitlen[1] = 0; + ctx.state[0] = 0x6a09e667; + ctx.state[1] = 0xbb67ae85; + ctx.state[2] = 0x3c6ef372; + ctx.state[3] = 0xa54ff53a; + ctx.state[4] = 0x510e527f; + ctx.state[5] = 0x9b05688c; + ctx.state[6] = 0x1f83d9ab; + ctx.state[7] = 0x5be0cd19; +} + +static +void sha256_update(SHA256_CTX& ctx, const unsigned char *data, _ntl_uint32 len) +{ + _ntl_uint32 i; + + for (i=0; i < len; ++i) { + ctx.data[ctx.datalen] = data[i]; + ctx.datalen++; + if (ctx.datalen == 64) { + sha256_transform(ctx,ctx.data); + DBL_INT_ADD(ctx.bitlen[0],ctx.bitlen[1],512); + ctx.datalen = 0; + } + } +} + +static +void sha256_final(SHA256_CTX& ctx, unsigned char *hash, + long hlen=SHA256_HASHSIZE) +{ + _ntl_uint32 i, j; + + i = ctx.datalen; + + // Pad whatever data is left in the buffer. + if (ctx.datalen < 56) { + ctx.data[i++] = 0x80; + while (i < 56) + ctx.data[i++] = 0x00; + } + else { + ctx.data[i++] = 0x80; + while (i < 64) + ctx.data[i++] = 0x00; + sha256_transform(ctx,ctx.data); + memset(ctx.data,0,56); + } + + // Append to the padding the total message's length in bits and transform. + DBL_INT_ADD(ctx.bitlen[0],ctx.bitlen[1],ctx.datalen * 8); + + ctx.data[63] = ctx.bitlen[0]; + ctx.data[62] = ctx.bitlen[0] >> 8; + ctx.data[61] = ctx.bitlen[0] >> 16; + ctx.data[60] = ctx.bitlen[0] >> 24; + ctx.data[59] = ctx.bitlen[1]; + ctx.data[58] = ctx.bitlen[1] >> 8; + ctx.data[57] = ctx.bitlen[1] >> 16; + ctx.data[56] = ctx.bitlen[1] >> 24; + sha256_transform(ctx,ctx.data); + + for (i = 0; i < 8; i++) { + _ntl_uint32 w = ctx.state[i]; + for (j = 0; j < 4; j++) { + if (hlen <= 0) break; + hash[4*i + j] = w >> (24-j*8); + hlen--; + } + } + +} + + + +static +void sha256(const unsigned char *data, long dlen, unsigned char *hash, + long hlen=SHA256_HASHSIZE) +{ + if (dlen < 0) dlen = 0; + if (hlen < 0) hlen = 0; + + SHA256_CTX ctx; + sha256_init(ctx); + + const long BLKSIZE = 4096; + + long i; + for (i = 0; i <= dlen-BLKSIZE; i += BLKSIZE) + sha256_update(ctx, data + i, BLKSIZE); + + if (i < dlen) + sha256_update(ctx, data + i, dlen - i); + + sha256_final(ctx, hash, hlen); +} + + +static +void hmac_sha256(const unsigned char *key, long klen, + const unsigned char *data, long dlen, + unsigned char *hash, long hlen=SHA256_HASHSIZE) +{ + if (klen < 0) klen = 0; + if (dlen < 0) dlen = 0; + if (hlen < 0) hlen = 0; + + unsigned char K[SHA256_BLOCKSIZE]; + unsigned char tmp[SHA256_HASHSIZE]; + + long i; + + if (klen <= SHA256_BLOCKSIZE) { + for (i = 0; i < klen; i++) + K[i] = key[i]; + for (i = klen; i < SHA256_BLOCKSIZE; i++) + K[i] = 0; + } + else { + sha256(key, klen, K, SHA256_BLOCKSIZE); + for (i = SHA256_HASHSIZE; i < SHA256_BLOCKSIZE; i++) + K[i] = 0; + } + + for (i = 0; i < SHA256_BLOCKSIZE; i++) + K[i] ^= 0x36; + + SHA256_CTX ctx; + sha256_init(ctx); + sha256_update(ctx, K, SHA256_BLOCKSIZE); + sha256_update(ctx, data, dlen); + sha256_final(ctx, tmp); + + for (i = 0; i < SHA256_BLOCKSIZE; i++) + K[i] ^= (0x36 ^ 0x5C); + + sha256_init(ctx); + sha256_update(ctx, K, SHA256_BLOCKSIZE); + sha256_update(ctx, tmp, SHA256_HASHSIZE); + sha256_final(ctx, hash, hlen); +} + + +// This key derivation uses HMAC with a zero key to derive +// an intermediate key K from the data, and then uses HMAC +// as a PRF in counter mode with key K to derive the final key + +void DeriveKey(unsigned char *key, long klen, + const unsigned char *data, long dlen) +{ + if (dlen < 0) LogicError("DeriveKey: bad args"); + if (klen < 0) LogicError("DeriveKey: bad args"); + + long i, j; + + + unsigned char K[SHA256_HASHSIZE]; + hmac_sha256(0, 0, data, dlen, K); + + // initialize 64-bit counter to zero + unsigned char counter[8]; + for (j = 0; j < 8; j++) counter[j] = 0; + + for (i = 0; i <= klen-SHA256_HASHSIZE; i += SHA256_HASHSIZE) { + hmac_sha256(K, SHA256_HASHSIZE, counter, 8, key+i); + + // increment counter + for (j = 0; j < 8; j++) { + counter[j]++; + if (counter[j] != 0) break; + } + } + + if (i < klen) + hmac_sha256(K, SHA256_HASHSIZE, counter, 8, key+i, klen-i); +} + + + + +// ******************** ChaCha20 stuff *********************** + +static const _ntl_uint32 chacha_const[4] = + { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 }; + + +#define LE(p) (((_ntl_uint32)((p)[0])) + ((_ntl_uint32)((p)[1]) << 8) + \ + ((_ntl_uint32)((p)[2]) << 16) + ((_ntl_uint32)((p)[3]) << 24)) + +#define FROMLE(p, x) (p)[0] = (x), (p)[1] = ((x) >> 8), \ + (p)[2] = ((x) >> 16), (p)[3] = ((x) >> 24) + + +#define QUARTERROUND(x, a, b, c, d) \ + x[a] += x[b], x[d] = ROTLEFT(x[d] ^ x[a], 16), \ + x[c] += x[d], x[b] = ROTLEFT(x[b] ^ x[c], 12), \ + x[a] += x[b], x[d] = ROTLEFT(x[d] ^ x[a], 8), \ + x[c] += x[d], x[b] = ROTLEFT(x[b] ^ x[c], 7) + + +static +void salsa20_core(_ntl_uint32* data) +{ + long i; + + for (i = 0; i < 10; i++) { + QUARTERROUND(data, 0, 4, 8, 12); + QUARTERROUND(data, 1, 5, 9, 13); + QUARTERROUND(data, 2, 6, 10, 14); + QUARTERROUND(data, 3, 7, 11, 15); + QUARTERROUND(data, 0, 5, 10, 15); + QUARTERROUND(data, 1, 6, 11, 12); + QUARTERROUND(data, 2, 7, 8, 13); + QUARTERROUND(data, 3, 4, 9, 14); + } +} + + +// key K must be exactly 32 bytes +static +void salsa20_init(_ntl_uint32 *state, const unsigned char *K) +{ + long i; + + for (i = 0; i < 4; i++) + state[i] = chacha_const[i]; + + for (i = 4; i < 12; i++) + state[i] = LE(K + 4*(i-4)); + + for (i = 12; i < 16; i++) + state[i] = 0; +} + + + +// state and data are of length 16 +static +void salsa20_apply(_ntl_uint32 *state, _ntl_uint32 *data) +{ + long i; + + for (i = 0; i < 16; i++) data[i] = state[i]; + + salsa20_core(data); + + for (i = 0; i < 16; i++) data[i] += state[i]; + + for (i = 12; i < 16; i++) { + state[i]++; + state[i] = INT32MASK(state[i]); + if (state[i] != 0) break; + } +} + + +#if 0 +// state is 16 words, data is 64 bytes +static +void salsa20_apply(_ntl_uint32 *state, unsigned char *data) +{ + _ntl_uint32 wdata[16]; + salsa20_apply(state, wdata); + + long i; + for (i = 0; i < 16; i++) + FROMLE(data + 4*i, wdata[i]); + + // FIXME: could use memcpy for above if everything + // is right +} +#endif + + + +RandomStream::RandomStream(const unsigned char *key) +{ + salsa20_init(state, key); + pos = 64; +} + + +void RandomStream::do_get(unsigned char *NTL_RESTRICT res, long n) +{ + if (n < 0) LogicError("RandomStream::get: bad args"); + + long i, j; + + if (n <= 64-pos) { + for (i = 0; i < n; i++) res[i] = buf[pos+i]; + pos += n; + return; + } + + // read remainder of buffer + for (i = 0; i < 64-pos; i++) res[i] = buf[pos+i]; + n -= 64-pos; + res += 64-pos; + pos = 64; + + _ntl_uint32 wdata[16]; + + // read 64-byte chunks + for (i = 0; i <= n-64; i += 64) { + salsa20_apply(state, wdata); + for (j = 0; j < 16; j++) + FROMLE(res + i + 4*j, wdata[j]); + } + + if (i < n) { + salsa20_apply(state, wdata); + + for (j = 0; j < 16; j++) + FROMLE(buf + 4*j, wdata[j]); + + pos = n-i; + for (j = 0; j < pos; j++) + res[i+j] = buf[j]; + } +} + + +NTL_TLS_GLOBAL_DECL(UniquePtr, CurrentRandomStream); + + +void SetSeed(const RandomStream& s) +{ + NTL_TLS_GLOBAL_ACCESS(CurrentRandomStream); + + if (!CurrentRandomStream) + CurrentRandomStream.make(s); + else + *CurrentRandomStream = s; +} + + +void SetSeed(const unsigned char *data, long dlen) +{ + if (dlen < 0) LogicError("SetSeed: bad args"); + + Vec key; + key.SetLength(NTL_PRG_KEYLEN); + DeriveKey(key.elts(), NTL_PRG_KEYLEN, data, dlen); + + SetSeed(RandomStream(key.elts())); +} + +void SetSeed(const ZZ& seed) +{ + long nb = NumBytes(seed); + + Vec buf; + buf.SetLength(nb); + + BytesFromZZ(buf.elts(), seed, nb); + + SetSeed(buf.elts(), nb); +} + + +static +void InitRandomStream() +{ + const string& id = UniqueID(); + SetSeed((const unsigned char *) id.c_str(), id.length()); +} + +static inline +RandomStream& LocalGetCurrentRandomStream() +{ + NTL_TLS_GLOBAL_ACCESS(CurrentRandomStream); + + if (!CurrentRandomStream) InitRandomStream(); + return *CurrentRandomStream; +} + +RandomStream& GetCurrentRandomStream() +{ + return LocalGetCurrentRandomStream(); +} + + + + + + + +static inline +unsigned long WordFromBytes(const unsigned char *buf, long n) +{ + unsigned long res = 0; + long i; + + for (i = n-1; i >= 0; i--) + res = (res << 8) | buf[i]; + + return res; +} + + +unsigned long RandomWord() +{ + RandomStream& stream = LocalGetCurrentRandomStream(); + unsigned char buf[NTL_BITS_PER_LONG/8]; + + stream.get(buf, NTL_BITS_PER_LONG/8); + return WordFromBytes(buf, NTL_BITS_PER_LONG/8); +} + +long RandomBits_long(long l) +{ + if (l <= 0) return 0; + if (l >= NTL_BITS_PER_LONG) + ResourceError("RandomBits: length too big"); + + RandomStream& stream = LocalGetCurrentRandomStream(); + unsigned char buf[NTL_BITS_PER_LONG/8]; + long nb = (l+7)/8; + stream.get(buf, nb); + + return long(WordFromBytes(buf, nb) & ((1UL << l)-1UL)); +} + +unsigned long RandomBits_ulong(long l) +{ + if (l <= 0) return 0; + if (l > NTL_BITS_PER_LONG) + ResourceError("RandomBits: length too big"); + + RandomStream& stream = LocalGetCurrentRandomStream(); + unsigned char buf[NTL_BITS_PER_LONG/8]; + long nb = (l+7)/8; + stream.get(buf, nb); + unsigned long res = WordFromBytes(buf, nb); + if (l < NTL_BITS_PER_LONG) + res = res & ((1UL << l)-1UL); + return res; +} + +long RandomLen_long(long l) +{ + if (l <= 0) return 0; + if (l == 1) return 1; + if (l >= NTL_BITS_PER_LONG) + ResourceError("RandomLen: length too big"); + + RandomStream& stream = LocalGetCurrentRandomStream(); + unsigned char buf[NTL_BITS_PER_LONG/8]; + long nb = ((l-1)+7)/8; + stream.get(buf, nb); + unsigned long res = WordFromBytes(buf, nb); + unsigned long mask = (1UL << (l-1)) - 1UL; + return long((res & mask) | (mask+1UL)); +} + + +long RandomBnd(long bnd) +{ + if (bnd <= 1) return 0; + + RandomStream& stream = LocalGetCurrentRandomStream(); + unsigned char buf[NTL_BITS_PER_LONG/8]; + long l = NumBits(bnd-1); + long nb = (l+7)/8; + + long tmp; + do { + stream.get(buf, nb); + tmp = long(WordFromBytes(buf, nb) & ((1UL << l)-1UL)); + } while (tmp >= bnd); + + return tmp; +} + + + +void RandomBits(ZZ& x, long l) +{ + if (l <= 0) { + x = 0; + return; + } + + if (NTL_OVERFLOW(l, 1, 0)) + ResourceError("RandomBits: length too big"); + + RandomStream& stream = LocalGetCurrentRandomStream(); + + long nb = (l+7)/8; + unsigned long mask = (1UL << (8 - nb*8 + l)) - 1UL; + + NTL_TLS_LOCAL(Vec, buf_mem); + Vec::Watcher watch_buf_mem(buf_mem); + + buf_mem.SetLength(nb); + unsigned char *buf = buf_mem.elts(); + + x.SetSize((l + NTL_ZZ_NBITS - 1)/NTL_ZZ_NBITS); + // pre-allocate to ensure strong ES + + stream.get(buf, nb); + buf[nb-1] &= mask; + + ZZFromBytes(x, buf, nb); +} + + +void RandomLen(ZZ& x, long l) +{ + if (l <= 0) { + x = 0; + return; + } + + if (l == 1) { + x = 1; + return; + } + + if (NTL_OVERFLOW(l, 1, 0)) + ResourceError("RandomLen: length too big"); + + RandomStream& stream = LocalGetCurrentRandomStream(); + + long nb = (l+7)/8; + unsigned long mask = (1UL << (8 - nb*8 + l)) - 1UL; + + NTL_TLS_LOCAL(Vec, buf_mem); + Vec::Watcher watch_buf_mem(buf_mem); + + buf_mem.SetLength(nb); + unsigned char *buf = buf_mem.elts(); + + x.SetSize((l + NTL_ZZ_NBITS - 1)/NTL_ZZ_NBITS); + // pre-allocate to ensure strong ES + + stream.get(buf, nb); + buf[nb-1] &= mask; + buf[nb-1] |= ((mask >> 1) + 1UL); + + ZZFromBytes(x, buf, nb); +} + + + + + +/********************************************************** + +The following implementation of RandomBnd is designed +for speed. It certainly is not resilient against a +timing side-channel attack (but then again, none of these +PRG routines are designed to be). + +The naive strategy generates random candidates of the right +bit length until the candidate < bnd. +The idea in this implementation is to generate the high +order two bytes of the candidate first, and compare this +to the high order two bytes of tmp. We can discard the +candidate if this is already too large. + +***********************************************************/ + +void RandomBnd(ZZ& x, const ZZ& bnd) +{ + if (bnd <= 1) { + x = 0; + return; + } + + RandomStream& stream = LocalGetCurrentRandomStream(); + + long l = NumBits(bnd); + long nb = (l+7)/8; + + if (nb <= 3) { + long lbnd = conv(bnd); + unsigned char lbuf[3]; + long ltmp; + + x.SetSize((l + NTL_ZZ_NBITS - 1)/NTL_ZZ_NBITS); + // pre-allocate to ensure strong ES + do { + stream.get(lbuf, nb); + ltmp = long(WordFromBytes(lbuf, nb) & ((1UL << l)-1UL)); + } while (ltmp >= lbnd); + + conv(x, ltmp); + return; + } + + // deal with possible alias + NTL_ZZRegister(tmp_store); + const ZZ& bnd_ref = ((&x == &bnd) ? (tmp_store = bnd) : bnd); + + + NTL_ZZRegister(hbnd); + RightShift(hbnd, bnd_ref, (nb-2)*8); + long lhbnd = conv(hbnd); + + unsigned long mask = (1UL << (16 - nb*8 + l)) - 1UL; + + NTL_TLS_LOCAL(Vec, buf_mem); + Vec::Watcher watch_buf_mem(buf_mem); + buf_mem.SetLength(nb); + unsigned char *buf = buf_mem.elts(); + + unsigned char hbuf[2]; + + x.SetSize((l + NTL_ZZ_NBITS - 1)/NTL_ZZ_NBITS); + // pre-allocate to ensure strong ES + for (;;) { + stream.get(hbuf, 2); + long hpart = long(WordFromBytes(hbuf, 2) & mask); + + if (hpart > lhbnd) continue; + + stream.get(buf, nb-2); + buf[nb-2] = ((unsigned long) hpart); + buf[nb-1] = ((unsigned long) hpart) >> 8; + + ZZFromBytes(x, buf, nb); + if (hpart < lhbnd || x < bnd_ref) break; + } +} + + + + +// More prime generation stuff... + +static +double Log2(double x) +{ + static const double log2 = log(2.0); // GLOBAL (relies on C++11 thread-safe init) + return log(x)/log2; +} + +// Define p(k,t) to be the conditional probability that a random, odd, k-bit +// number is composite, given that it passes t iterations of the +// Miller-Rabin test. +// This routine returns 0 or 1, and if it returns 1 then +// p(k,t) <= 2^{-n}. +// This basically encodes the estimates of Damgard, Landrock, and Pomerance; +// it uses floating point arithmetic, but is coded in such a way +// that its results should be correct, assuming that the log function +// is computed with reasonable precision. +// +// It is assumed that k >= 3 and t >= 1; if this does not hold, +// then 0 is returned. + +static +long ErrBoundTest(long kk, long tt, long nn) + +{ + const double fudge = (1.0 + 1024.0/NTL_FDOUBLE_PRECISION); + const double log2_3 = Log2(3.0); + const double log2_7 = Log2(7.0); + const double log2_20 = Log2(20.0); + + double k = kk; + double t = tt; + double n = nn; + + if (k < 3 || t < 1) return 0; + if (n < 1) return 1; + + // the following test is largely academic + if (9*t > NTL_FDOUBLE_PRECISION) LogicError("ErrBoundTest: t too big"); + + double log2_k = Log2(k); + + if ((n + log2_k)*fudge <= 2*t) + return 1; + + if ((2*log2_k + 4.0 + n)*fudge <= 2*sqrt(k)) + return 2; + + if ((t == 2 && k >= 88) || (3 <= t && 9*t <= k && k >= 21)) { + if ((1.5*log2_k + t + 4.0 + n)*fudge <= 0.5*Log2(t) + 2*(sqrt(t*k))) + return 3; + } + + if (k <= 9*t && 4*t <= k && k >= 21) { + if ( ((log2_3 + log2_7 + log2_k + n)*fudge <= log2_20 + 5*t) && + ((log2_3 + (15.0/4.0)*log2_k + n)*fudge <= log2_7 + k/2 + 2*t) && + ((2*log2_3 + 2 + log2_k + n)*fudge <= k/4 + 3*t) ) + return 4; + } + + if (4*t >= k && k >= 21) { + if (((15.0/4.0)*log2_k + n)*fudge <= log2_7 + k/2 + 2*t) + return 5; + } + + return 0; +} + + +void GenPrime(ZZ& n, long k, long err) +{ + if (k <= 1) LogicError("GenPrime: bad length"); + + if (k > (1L << 20)) ResourceError("GenPrime: length too large"); + + if (err < 1) err = 1; + if (err > 512) err = 512; + + if (k == 2) { + if (RandomBnd(2)) + n = 3; + else + n = 2; + + return; + } + + + long t; + + t = 1; + while (!ErrBoundTest(k, t, err)) + t++; + + RandomPrime(n, k, t); +} + + +long GenPrime_long(long k, long err) +{ + if (k <= 1) LogicError("GenPrime: bad length"); + + if (k >= NTL_BITS_PER_LONG) ResourceError("GenPrime: length too large"); + + if (err < 1) err = 1; + if (err > 512) err = 512; + + if (k == 2) { + if (RandomBnd(2)) + return 3; + else + return 2; + } + + long t; + + t = 1; + while (!ErrBoundTest(k, t, err)) + t++; + + return RandomPrime_long(k, t); +} + + +void GenGermainPrime(ZZ& n, long k, long err) +{ + if (k <= 1) LogicError("GenGermainPrime: bad length"); + + if (k > (1L << 20)) ResourceError("GenGermainPrime: length too large"); + + if (err < 1) err = 1; + if (err > 512) err = 512; + + if (k == 2) { + if (RandomBnd(2)) + n = 3; + else + n = 2; + + return; + } + + + long prime_bnd = ComputePrimeBound(k); + + if (NumBits(prime_bnd) >= k/2) + prime_bnd = (1L << (k/2-1)); + + + ZZ two; + two = 2; + + ZZ n1; + + + PrimeSeq s; + + ZZ iter; + iter = 0; + + + for (;;) { + iter++; + + RandomLen(n, k); + if (!IsOdd(n)) add(n, n, 1); + + s.reset(3); + long p; + + long sieve_passed = 1; + + p = s.next(); + while (p && p < prime_bnd) { + long r = rem(n, p); + + if (r == 0) { + sieve_passed = 0; + break; + } + + // test if 2*r + 1 = 0 (mod p) + if (r == p-r-1) { + sieve_passed = 0; + break; + } + + p = s.next(); + } + + if (!sieve_passed) continue; + + + if (MillerWitness(n, two)) continue; + + // n1 = 2*n+1 + mul(n1, n, 2); + add(n1, n1, 1); + + + if (MillerWitness(n1, two)) continue; + + // now do t M-R iterations...just to make sure + + // First compute the appropriate number of M-R iterations, t + // The following computes t such that + // p(k,t)*8/k <= 2^{-err}/(5*iter^{1.25}) + // which suffices to get an overall error probability of 2^{-err}. + // Note that this method has the advantage of not requiring + // any assumptions on the density of Germain primes. + + long err1 = max(1, err + 7 + (5*NumBits(iter) + 3)/4 - NumBits(k)); + long t; + t = 1; + while (!ErrBoundTest(k, t, err1)) + t++; + + ZZ W; + long MR_passed = 1; + + long i; + for (i = 1; i <= t; i++) { + do { + RandomBnd(W, n); + } while (W == 0); + // W == 0 is not a useful candidate witness! + + if (MillerWitness(n, W)) { + MR_passed = 0; + break; + } + } + + if (MR_passed) break; + } +} + +long GenGermainPrime_long(long k, long err) +{ + if (k >= NTL_BITS_PER_LONG-1) + ResourceError("GenGermainPrime_long: length too long"); + + ZZ n; + GenGermainPrime(n, k, err); + return to_long(n); +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZVec.c b/thirdparty/linux/ntl/src/ZZVec.c new file mode 100644 index 0000000000..db68adea8e --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZVec.c @@ -0,0 +1,85 @@ + +#include + +#include + +NTL_START_IMPL + +void ZZVec::SetSize(long n, long d) +{ + if (n < 0 || d <= 0) LogicError("bad args to ZZVec::SetSize()"); + + if (v) + LogicError("illegal ZZVec initialization"); + + if (n == 0) { + len = n; + bsize = d; + return; + } + + ZZVec tmp; + tmp.len = 0; + tmp.bsize = d; + + tmp.v = (ZZ*) NTL_MALLOC(n, sizeof(ZZ), 0); + if (!tmp.v) MemoryError(); + + long i = 0; + long m; + long j; + + while (i < n) { + m = ZZ_BlockConstructAlloc(tmp.v[i], d, n-i); + for (j = 1; j < m; j++) + ZZ_BlockConstructSet(tmp.v[i], tmp.v[i+j], j); + i += m; + tmp.len = i; + } + + tmp.swap(*this); +} + +void ZZVec::kill() +{ + long n = len; + long i = 0; + while (i < n) { + long m = ZZ_BlockDestroy(v[i]); + i += m; + } + + len = 0; + bsize = 0; + if (v) { + free(v); + v = 0; + } +} + + +ZZVec& ZZVec::operator=(const ZZVec& a) +{ + if (this == &a) return *this; + ZZVec tmp(a); + tmp.swap(*this); + return *this; +} + +ZZVec::ZZVec(const ZZVec& a) : v(0), len(0), bsize(0) +{ + SetSize(a.len, a.bsize); + + long i; + for (i = 0; i < a.len; i++) + v[i] = (a.v)[i]; +} + +void ZZVec::swap(ZZVec& x) +{ + _ntl_swap(v, x.v); + _ntl_swap(len, x.len); + _ntl_swap(bsize, x.bsize); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZX.c b/thirdparty/linux/ntl/src/ZZX.c new file mode 100644 index 0000000000..bc5bb1cb32 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZX.c @@ -0,0 +1,972 @@ + +#include + +#include + +NTL_START_IMPL + + + +const ZZX& ZZX::zero() +{ + static const ZZX z; // GLOBAL (relies on C++11 thread-safe init) + return z; +} + + + +void conv(ZZ_pX& x, const ZZX& a) +{ + conv(x.rep, a.rep); + x.normalize(); +} + +void conv(ZZX& x, const ZZ_pX& a) +{ + conv(x.rep, a.rep); + x.normalize(); +} + + +istream& operator>>(istream& s, ZZX& x) +{ + NTL_INPUT_CHECK_RET(s, s >> x.rep); + x.normalize(); + return s; +} + +ostream& operator<<(ostream& s, const ZZX& a) +{ + return s << a.rep; +} + + +void ZZX::normalize() +{ + long n; + const ZZ* p; + + n = rep.length(); + if (n == 0) return; + p = rep.elts() + n; + while (n > 0 && IsZero(*--p)) { + n--; + } + rep.SetLength(n); +} + + +long IsZero(const ZZX& a) +{ + return a.rep.length() == 0; +} + + +long IsOne(const ZZX& a) +{ + return a.rep.length() == 1 && IsOne(a.rep[0]); +} + +long operator==(const ZZX& a, const ZZX& b) +{ + long i, n; + const ZZ *ap, *bp; + + n = a.rep.length(); + if (n != b.rep.length()) return 0; + + ap = a.rep.elts(); + bp = b.rep.elts(); + + for (i = 0; i < n; i++) + if (ap[i] != bp[i]) return 0; + + return 1; +} + + +long operator==(const ZZX& a, long b) +{ + if (b == 0) + return IsZero(a); + + if (deg(a) != 0) + return 0; + + return a.rep[0] == b; +} + +long operator==(const ZZX& a, const ZZ& b) +{ + if (IsZero(b)) + return IsZero(a); + + if (deg(a) != 0) + return 0; + + return a.rep[0] == b; +} + + +void GetCoeff(ZZ& x, const ZZX& a, long i) +{ + if (i < 0 || i > deg(a)) + clear(x); + else + x = a.rep[i]; +} + +void SetCoeff(ZZX& x, long i, const ZZ& a) +{ + long j, m; + + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m && IsZero(a)) return; + + if (i > m) { + /* careful: a may alias a coefficient of x */ + + long alloc = x.rep.allocated(); + + if (alloc > 0 && i >= alloc) { + ZZ aa = a; + x.rep.SetLength(i+1); + x.rep[i] = aa; + } + else { + x.rep.SetLength(i+1); + x.rep[i] = a; + } + + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + else + x.rep[i] = a; + + x.normalize(); +} + + +void SetCoeff(ZZX& x, long i) +{ + long j, m; + + if (i < 0) + LogicError("coefficient index out of range"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + set(x.rep[i]); + x.normalize(); +} + + +void SetX(ZZX& x) +{ + clear(x); + SetCoeff(x, 1); +} + + +long IsX(const ZZX& a) +{ + return deg(a) == 1 && IsOne(LeadCoeff(a)) && IsZero(ConstTerm(a)); +} + + + +const ZZ& coeff(const ZZX& a, long i) +{ + if (i < 0 || i > deg(a)) + return ZZ::zero(); + else + return a.rep[i]; +} + + +const ZZ& LeadCoeff(const ZZX& a) +{ + if (IsZero(a)) + return ZZ::zero(); + else + return a.rep[deg(a)]; +} + +const ZZ& ConstTerm(const ZZX& a) +{ + if (IsZero(a)) + return ZZ::zero(); + else + return a.rep[0]; +} + + + +void conv(ZZX& x, const ZZ& a) +{ + if (IsZero(a)) + x.rep.SetLength(0); + else { + x.rep.SetLength(1); + x.rep[0] = a; + } +} + + +void conv(ZZX& x, long a) +{ + if (a == 0) + x.rep.SetLength(0); + else { + x.rep.SetLength(1); + conv(x.rep[0], a); + } +} + + +void conv(ZZX& x, const vec_ZZ& a) +{ + x.rep = a; + x.normalize(); +} + + +void add(ZZX& x, const ZZX& a, const ZZX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const ZZ *ap, *bp; + ZZ* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + add(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab && &x != &b) + for (i = db-minab; i; i--, xp++, bp++) + *xp = *bp; + else + x.normalize(); +} + +void add(ZZX& x, const ZZX& a, const ZZ& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (&x == &a) { + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ *xp = x.rep.elts(); + add(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + + +void add(ZZX& x, const ZZX& a, long b) +{ + if (a.rep.length() == 0) { + conv(x, b); + } + else { + if (&x != &a) x = a; + add(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + + +void sub(ZZX& x, const ZZX& a, const ZZX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const ZZ *ap, *bp; + ZZ* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + sub(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab) + for (i = db-minab; i; i--, xp++, bp++) + negate(*xp, *bp); + else + x.normalize(); + +} + +void sub(ZZX& x, const ZZX& a, const ZZ& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + negate(x, x); + } + else if (&x == &a) { + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ *xp = x.rep.elts(); + sub(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + +void sub(ZZX& x, const ZZX& a, long b) +{ + if (b == 0) { + x = a; + return; + } + + if (a.rep.length() == 0) { + x.rep.SetLength(1); + conv(x.rep[0], b); + negate(x.rep[0], x.rep[0]); + } + else { + if (&x != &a) x = a; + sub(x.rep[0], x.rep[0], b); + } + x.normalize(); +} + +void sub(ZZX& x, long a, const ZZX& b) +{ + negate(x, b); + add(x, x, a); +} + + +void sub(ZZX& x, const ZZ& b, const ZZX& a) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (x.rep.MaxLength() == 0) { + negate(x, a); + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ *xp = x.rep.elts(); + sub(xp[0], b, a.rep[0]); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + negate(xp[i], ap[i]); + x.normalize(); + } +} + + + +void negate(ZZX& x, const ZZX& a) +{ + long n = a.rep.length(); + x.rep.SetLength(n); + + const ZZ* ap = a.rep.elts(); + ZZ* xp = x.rep.elts(); + long i; + + for (i = n; i; i--, ap++, xp++) + negate((*xp), (*ap)); + +} + +long MaxBits(const ZZX& f) +{ + long i, m; + m = 0; + + for (i = 0; i <= deg(f); i++) { + m = max(m, NumBits(f.rep[i])); + } + + return m; +} + + +void PlainMul(ZZX& x, const ZZX& a, const ZZX& b) +{ + if (&a == &b) { + PlainSqr(x, a); + return; + } + + long da = deg(a); + long db = deg(b); + + if (da < 0 || db < 0) { + clear(x); + return; + } + + long d = da+db; + + + + const ZZ *ap, *bp; + ZZ *xp; + + ZZX la, lb; + + if (&x == &a) { + la = a; + ap = la.rep.elts(); + } + else + ap = a.rep.elts(); + + if (&x == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + x.rep.SetLength(d+1); + + xp = x.rep.elts(); + + long i, j, jmin, jmax; + ZZ t, accum; + + for (i = 0; i <= d; i++) { + jmin = max(0, i-db); + jmax = min(da, i); + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, ap[j], bp[i-j]); + add(accum, accum, t); + } + xp[i] = accum; + } + x.normalize(); +} + +void PlainSqr(ZZX& x, const ZZX& a) +{ + long da = deg(a); + + if (da < 0) { + clear(x); + return; + } + + long d = 2*da; + + const ZZ *ap; + ZZ *xp; + + ZZX la; + + if (&x == &a) { + la = a; + ap = la.rep.elts(); + } + else + ap = a.rep.elts(); + + + x.rep.SetLength(d+1); + + xp = x.rep.elts(); + + long i, j, jmin, jmax; + long m, m2; + ZZ t, accum; + + for (i = 0; i <= d; i++) { + jmin = max(0, i-da); + jmax = min(da, i); + m = jmax - jmin + 1; + m2 = m >> 1; + jmax = jmin + m2 - 1; + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, ap[j], ap[i-j]); + add(accum, accum, t); + } + add(accum, accum, accum); + if (m & 1) { + sqr(t, ap[jmax + 1]); + add(accum, accum, t); + } + + xp[i] = accum; + } + + x.normalize(); +} + + + +static +void PlainMul(ZZ *xp, const ZZ *ap, long sa, const ZZ *bp, long sb) +{ + if (sa == 0 || sb == 0) return; + + long sx = sa+sb-1; + + long i, j, jmin, jmax; + NTL_ZZRegister(t); + NTL_ZZRegister(accum); + + for (i = 0; i < sx; i++) { + jmin = max(0, i-sb+1); + jmax = min(sa-1, i); + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, ap[j], bp[i-j]); + add(accum, accum, t); + } + xp[i] = accum; + } +} + + + +static +void KarFold(ZZ *T, const ZZ *b, long sb, long hsa) +{ + long m = sb - hsa; + long i; + + for (i = 0; i < m; i++) + add(T[i], b[i], b[hsa+i]); + + for (i = m; i < hsa; i++) + T[i] = b[i]; +} + +static +void KarSub(ZZ *T, const ZZ *b, long sb) +{ + long i; + + for (i = 0; i < sb; i++) + sub(T[i], T[i], b[i]); +} + +static +void KarAdd(ZZ *T, const ZZ *b, long sb) +{ + long i; + + for (i = 0; i < sb; i++) + add(T[i], T[i], b[i]); +} + +static +void KarFix(ZZ *c, const ZZ *b, long sb, long hsa) +{ + long i; + + for (i = 0; i < hsa; i++) + c[i] = b[i]; + + for (i = hsa; i < sb; i++) + add(c[i], c[i], b[i]); +} + +static void PlainMul1(ZZ *xp, const ZZ *ap, long sa, const ZZ& b) +{ + long i; + + for (i = 0; i < sa; i++) + mul(xp[i], ap[i], b); +} + + + +static +void KarMul(ZZ *c, const ZZ *a, + long sa, const ZZ *b, long sb, ZZ *stk) +{ + if (sa < sb) { + { long t = sa; sa = sb; sb = t; } + { const ZZ *t = a; a = b; b = t; } + } + + if (sb == 1) { + if (sa == 1) + mul(*c, *a, *b); + else + PlainMul1(c, a, sa, *b); + + return; + } + + if (sb == 2 && sa == 2) { + mul(c[0], a[0], b[0]); + mul(c[2], a[1], b[1]); + add(stk[0], a[0], a[1]); + add(stk[1], b[0], b[1]); + mul(c[1], stk[0], stk[1]); + sub(c[1], c[1], c[0]); + sub(c[1], c[1], c[2]); + + return; + + } + + long hsa = (sa + 1) >> 1; + + if (hsa < sb) { + /* normal case */ + + long hsa2 = hsa << 1; + + ZZ *T1, *T2, *T3; + + T1 = stk; stk += hsa; + T2 = stk; stk += hsa; + T3 = stk; stk += hsa2 - 1; + + /* compute T1 = a_lo + a_hi */ + + KarFold(T1, a, sa, hsa); + + /* compute T2 = b_lo + b_hi */ + + KarFold(T2, b, sb, hsa); + + /* recursively compute T3 = T1 * T2 */ + + KarMul(T3, T1, hsa, T2, hsa, stk); + + /* recursively compute a_hi * b_hi into high part of c */ + /* and subtract from T3 */ + + KarMul(c + hsa2, a+hsa, sa-hsa, b+hsa, sb-hsa, stk); + KarSub(T3, c + hsa2, sa + sb - hsa2 - 1); + + + /* recursively compute a_lo*b_lo into low part of c */ + /* and subtract from T3 */ + + KarMul(c, a, hsa, b, hsa, stk); + KarSub(T3, c, hsa2 - 1); + + clear(c[hsa2 - 1]); + + /* finally, add T3 * X^{hsa} to c */ + + KarAdd(c+hsa, T3, hsa2-1); + } + else { + /* degenerate case */ + + ZZ *T; + + T = stk; stk += hsa + sb - 1; + + /* recursively compute b*a_hi into high part of c */ + + KarMul(c + hsa, a + hsa, sa - hsa, b, sb, stk); + + /* recursively compute b*a_lo into T */ + + KarMul(T, a, hsa, b, sb, stk); + + KarFix(c, T, hsa + sb - 1, hsa); + } +} + +void KarMul(ZZX& c, const ZZX& a, const ZZX& b) +{ + if (IsZero(a) || IsZero(b)) { + clear(c); + return; + } + + if (&a == &b) { + KarSqr(c, a); + return; + } + + vec_ZZ mem; + + const ZZ *ap, *bp; + ZZ *cp; + + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (&a == &c) { + mem = a.rep; + ap = mem.elts(); + } + else + ap = a.rep.elts(); + + if (&b == &c) { + mem = b.rep; + bp = mem.elts(); + } + else + bp = b.rep.elts(); + + c.rep.SetLength(sa+sb-1); + cp = c.rep.elts(); + + long maxa, maxb, xover; + + maxa = MaxBits(a); + maxb = MaxBits(b); + xover = 2; + + if (sa < xover || sb < xover) + PlainMul(cp, ap, sa, bp, sb); + else { + /* karatsuba */ + + long n, hn, sp, depth; + + n = max(sa, sb); + sp = 0; + depth = 0; + do { + hn = (n+1) >> 1; + sp += (hn << 2) - 1; + n = hn; + depth++; + } while (n >= xover); + + ZZVec stk; + stk.SetSize(sp, + ((maxa + maxb + NumBits(min(sa, sb)) + 2*depth + 10) + + NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); + + KarMul(cp, ap, sa, bp, sb, stk.elts()); + } + + c.normalize(); +} + + + + + + +void PlainSqr(ZZ* xp, const ZZ* ap, long sa) +{ + if (sa == 0) return; + + long da = sa-1; + long d = 2*da; + + long i, j, jmin, jmax; + long m, m2; + NTL_ZZRegister(t); + NTL_ZZRegister(accum); + + for (i = 0; i <= d; i++) { + jmin = max(0, i-da); + jmax = min(da, i); + m = jmax - jmin + 1; + m2 = m >> 1; + jmax = jmin + m2 - 1; + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, ap[j], ap[i-j]); + add(accum, accum, t); + } + add(accum, accum, accum); + if (m & 1) { + sqr(t, ap[jmax + 1]); + add(accum, accum, t); + } + + xp[i] = accum; + } +} + + +static +void KarSqr(ZZ *c, const ZZ *a, long sa, ZZ *stk) +{ + if (sa == 1) { + sqr(*c, *a); + return; + } + + if (sa == 2) { + sqr(c[0], a[0]); + sqr(c[2], a[1]); + mul(c[1], a[0], a[1]); + add(c[1], c[1], c[1]); + + return; + } + + if (sa == 3) { + sqr(c[0], a[0]); + mul(c[1], a[0], a[1]); + add(c[1], c[1], c[1]); + sqr(stk[0], a[1]); + mul(c[2], a[0], a[2]); + add(c[2], c[2], c[2]); + add(c[2], c[2], stk[0]); + mul(c[3], a[1], a[2]); + add(c[3], c[3], c[3]); + sqr(c[4], a[2]); + + return; + + } + + long hsa = (sa + 1) >> 1; + long hsa2 = hsa << 1; + + ZZ *T1, *T2; + + T1 = stk; stk += hsa; + T2 = stk; stk += hsa2-1; + + KarFold(T1, a, sa, hsa); + KarSqr(T2, T1, hsa, stk); + + + KarSqr(c + hsa2, a+hsa, sa-hsa, stk); + KarSub(T2, c + hsa2, sa + sa - hsa2 - 1); + + + KarSqr(c, a, hsa, stk); + KarSub(T2, c, hsa2 - 1); + + clear(c[hsa2 - 1]); + + KarAdd(c+hsa, T2, hsa2-1); +} + + +void KarSqr(ZZX& c, const ZZX& a) +{ + if (IsZero(a)) { + clear(c); + return; + } + + vec_ZZ mem; + + const ZZ *ap; + ZZ *cp; + + long sa = a.rep.length(); + + if (&a == &c) { + mem = a.rep; + ap = mem.elts(); + } + else + ap = a.rep.elts(); + + c.rep.SetLength(sa+sa-1); + cp = c.rep.elts(); + + long maxa, xover; + + maxa = MaxBits(a); + + xover = 2; + + if (sa < xover) + PlainSqr(cp, ap, sa); + else { + /* karatsuba */ + + long n, hn, sp, depth; + + n = sa; + sp = 0; + depth = 0; + do { + hn = (n+1) >> 1; + sp += hn+hn+hn - 1; + n = hn; + depth++; + } while (n >= xover); + + ZZVec stk; + stk.SetSize(sp, + ((2*maxa + NumBits(sa) + 2*depth + 10) + + NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); + + KarSqr(cp, ap, sa, stk.elts()); + } + + c.normalize(); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZX1.c b/thirdparty/linux/ntl/src/ZZX1.c new file mode 100644 index 0000000000..2c90226fbe --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZX1.c @@ -0,0 +1,2715 @@ + + +#include + +#include + +NTL_START_IMPL + +/*****************************************************************/ + +// Fast CRT routines +// - we could perhaps move these out of here to make them +// - more widely accessible + + +struct FastCRTHelper { + + ZZ prod; + ZZ prod_half; + + long nprimes; + + long nlevels; + long veclen; + long nblocks; // number of nodes in the last level + long start_last_level; // index of first item in last level + + Vec nprimes_vec; // length == veclen + Vec first_vec; // length == nblocks + Vec prod_vec; // length == veclen + Vec coeff_vec; // length == nprimes, coeff_vec[i] = (prod/p_i)^{-1} mod p_i + + + Vec tmp_vec; // length == nlevels + + + ZZ tmp1, tmp2, tmp3; + + FastCRTHelper(long bound, long thresh); + + void fill_nprimes_vec(long index); + void fill_prod_vec(long index); + + void reduce_aux(const ZZ& value, ZZ **remainders, long index, long level); + void reconstruct_aux(ZZ& value, ZZ **remainders, long index, long level); + + void reduce(const ZZ& value, ZZ **remainders); + void reconstruct(ZZ& value, ZZ **remainders); + +}; + +void FastCRTHelper::fill_nprimes_vec(long index) +{ + long left, right; + left = 2*index + 1; + right = 2*index + 2; + if (left >= veclen) return; + + nprimes_vec[left] = nprimes_vec[index]/2; + nprimes_vec[right] = nprimes_vec[index] - nprimes_vec[left]; + fill_nprimes_vec(left); + fill_nprimes_vec(right); +} + +void FastCRTHelper::fill_prod_vec(long index) +{ + long left, right; + left = 2*index + 1; + right = 2*index + 2; + if (left >= veclen) return; + + fill_prod_vec(left); + fill_prod_vec(right); + mul(prod_vec[index], prod_vec[left], prod_vec[right]); +} + +FastCRTHelper::FastCRTHelper(long bound, long thresh) +{ + // assumes bound >= 1, thresh >= 1 + + prod = 1; + for (nprimes = 0; NumBits(prod) <= bound; nprimes++) { + UseFFTPrime(nprimes); + prod *= GetFFTPrime(nprimes); + } + + RightShift(prod_half, prod, 1); + + long sz = nprimes; + nlevels = 1; + while (sz > thresh) { + sz = sz/2; + nlevels++; + } + + veclen = (1L << nlevels) - 1; + nblocks = 1L << (nlevels-1); + start_last_level = (1L << (nlevels-1)) - 1; + + nprimes_vec.SetLength(veclen); + nprimes_vec[0] = nprimes; + + fill_nprimes_vec(0); + + long k, i; + first_vec.SetLength(nblocks+1); + + first_vec[0] = 0; + for (k = 1; k <= nblocks; k++) + first_vec[k] = first_vec[k-1] + nprimes_vec[start_last_level + k-1]; + + prod_vec.SetLength(veclen); + + // fill product leaves + for (k = 0; k < nblocks; k++) { + prod_vec[start_last_level + k] = 1; + for (i = first_vec[k]; i < first_vec[k+1]; i++) { + prod_vec[start_last_level + k] *= GetFFTPrime(i); + } + } + + // fill rest of product trees + fill_prod_vec(0); + + ZZ t1; + long tt; + + // fill coeff_vec: simple, quadratic time for now + // not really essential to speed this up + coeff_vec.SetLength(nprimes); + for (long i = 0; i < nprimes; i++) { + long p = GetFFTPrime(i); + div(t1, prod, p); + tt = rem(t1, p); + tt = InvMod(tt, p); + coeff_vec[i] = tt; + } + + tmp_vec.SetLength(nlevels); +} + +void FastCRTHelper::reduce_aux(const ZZ& value, ZZ **remainders, long index, long level) +{ + long left, right; + left = 2*index + 1; + right = 2*index + 2; + + ZZ *result = 0; + + if (left >= veclen) + result = remainders[index - start_last_level]; + else + result = &tmp_vec[level]; + + if (NumBits(value) <= NumBits(prod_vec[index])) + *result = value; + else { + rem(tmp1, value, prod_vec[index]); + sub(tmp2, tmp1, prod_vec[index]); + if (NumBits(tmp2) < NumBits(tmp1)) + *result = tmp2; + else + *result = tmp1; + } + + if (left < veclen) { + reduce_aux(*result, remainders, left, level+1); + reduce_aux(*result, remainders, right, level+1); + } + +} + +void FastCRTHelper::reduce(const ZZ& value, ZZ **remainders) +{ + reduce_aux(value, remainders, 0, 0); +} + +void FastCRTHelper::reconstruct_aux(ZZ& value, ZZ **remainders, long index, long level) +{ + long left, right; + left = 2*index + 1; + right = 2*index + 2; + + if (left >= veclen) { + value = *remainders[index - start_last_level]; + return; + } + + reconstruct_aux(tmp_vec[level], remainders, left, level+1); + reconstruct_aux(tmp1, remainders, right, level+1); + + mul(tmp2, tmp_vec[level], prod_vec[right]); + mul(tmp3, tmp1, prod_vec[left]); + add(value, tmp2, tmp3); +} + +void FastCRTHelper::reconstruct(ZZ& value, ZZ **remainders) +{ + reconstruct_aux(tmp1, remainders, 0, 0); + rem(tmp1, tmp1, prod); + if (tmp1 > prod_half) + sub(tmp1, tmp1, prod); + + value = tmp1; +} + + + + + + + + + +/*****************************************************************/ + + +static +long MaxSize(const ZZX& a) +{ + long res = 0; + long n = a.rep.length(); + + long i; + for (i = 0; i < n; i++) { + long t = a.rep[i].size(); + if (t > res) + res = t; + } + + return res; +} + + + +void conv(zz_pX& x, const ZZX& a) +{ + conv(x.rep, a.rep); + x.normalize(); +} + + +void conv(ZZX& x, const zz_pX& a) +{ + conv(x.rep, a.rep); + x.normalize(); +} + + +long CRT(ZZX& gg, ZZ& a, const zz_pX& G) +{ + long n = gg.rep.length(); + + long p = zz_p::modulus(); + + ZZ new_a; + mul(new_a, a, p); + + long a_inv; + a_inv = rem(a, p); + a_inv = InvMod(a_inv, p); + + long p1; + p1 = p >> 1; + + ZZ a1; + RightShift(a1, a, 1); + + long p_odd = (p & 1); + + long modified = 0; + + long h; + + long m = G.rep.length(); + + long max_mn = max(m, n); + + gg.rep.SetLength(max_mn); + + ZZ g; + long i; + + for (i = 0; i < n; i++) { + if (!CRTInRange(gg.rep[i], a)) { + modified = 1; + rem(g, gg.rep[i], a); + if (g > a1) sub(g, g, a); + } + else + g = gg.rep[i]; + + h = rem(g, p); + + if (i < m) + h = SubMod(rep(G.rep[i]), h, p); + else + h = NegateMod(h, p); + + h = MulMod(h, a_inv, p); + if (h > p1) + h = h - p; + + if (h != 0) { + modified = 1; + + if (!p_odd && g > 0 && (h == p1)) + MulSubFrom(g, a, h); + else + MulAddTo(g, a, h); + } + + gg.rep[i] = g; + } + + + for (; i < m; i++) { + h = rep(G.rep[i]); + h = MulMod(h, a_inv, p); + if (h > p1) + h = h - p; + + modified = 1; + mul(g, a, h); + gg.rep[i] = g; + } + + gg.normalize(); + a = new_a; + + return modified; +} + +long CRT(ZZX& gg, ZZ& a, const ZZ_pX& G) +{ + long n = gg.rep.length(); + + const ZZ& p = ZZ_p::modulus(); + + ZZ new_a; + mul(new_a, a, p); + + ZZ a_inv; + rem(a_inv, a, p); + InvMod(a_inv, a_inv, p); + + ZZ p1; + RightShift(p1, p, 1); + + ZZ a1; + RightShift(a1, a, 1); + + long p_odd = IsOdd(p); + + long modified = 0; + + ZZ h; + ZZ ah; + + long m = G.rep.length(); + + long max_mn = max(m, n); + + gg.rep.SetLength(max_mn); + + ZZ g; + long i; + + for (i = 0; i < n; i++) { + if (!CRTInRange(gg.rep[i], a)) { + modified = 1; + rem(g, gg.rep[i], a); + if (g > a1) sub(g, g, a); + } + else + g = gg.rep[i]; + + rem(h, g, p); + + if (i < m) + SubMod(h, rep(G.rep[i]), h, p); + else + NegateMod(h, h, p); + + MulMod(h, h, a_inv, p); + if (h > p1) + sub(h, h, p); + + if (h != 0) { + modified = 1; + mul(ah, a, h); + + if (!p_odd && g > 0 && (h == p1)) + sub(g, g, ah); + else + add(g, g, ah); + } + + gg.rep[i] = g; + } + + + for (; i < m; i++) { + h = rep(G.rep[i]); + MulMod(h, h, a_inv, p); + if (h > p1) + sub(h, h, p); + + modified = 1; + mul(g, a, h); + gg.rep[i] = g; + } + + gg.normalize(); + a = new_a; + + return modified; +} + + + + +/* Compute a = b * 2^l mod p, where p = 2^n+1. 0<=l<=n and 0= 0) { + l %= (n << 1); + } else { + l = (n << 1) - 1 - (-(l + 1) % (n << 1)); + } + + /* a = b * 2^l mod p */ + if (l < n) { + LeftRotate(a, b, l, p, n, scratch); + } else { + LeftRotate(a, b, l - n, p, n, scratch); + SubPos(a, p, a); + } +} + + + +/* Fast Fourier Transform. a is a vector of length 2^l, 2^l divides 2n, + p = 2^n+1, w = 2^r mod p is a primitive (2^l)th root of + unity. Returns a(1),a(w),...,a(w^{2^l-1}) mod p in bit-reverse + order. */ +static void fft(ZZVec& a, long r, long l, const ZZ& p, long n) +{ + long round; + long off, i, j, e; + long halfsize; + ZZ tmp, tmp1; + ZZ scratch; + + for (round = 0; round < l; round++, r <<= 1) { + halfsize = 1L << (l - 1 - round); + for (i = (1L << round) - 1, off = 0; i >= 0; i--, off += halfsize) { + for (j = 0, e = 0; j < halfsize; j++, off++, e+=r) { + /* One butterfly : + ( a[off], a[off+halfsize] ) *= ( 1 w^{j2^round} ) + ( 1 -w^{j2^round} ) */ + /* tmp = a[off] - a[off + halfsize] mod p */ + sub(tmp, a[off], a[off + halfsize]); + if (sign(tmp) < 0) { + add(tmp, tmp, p); + } + /* a[off] += a[off + halfsize] mod p */ + add(a[off], a[off], a[off + halfsize]); + sub(tmp1, a[off], p); + if (sign(tmp1) >= 0) { + a[off] = tmp1; + } + /* a[off + halfsize] = tmp * w^{j2^round} mod p */ + Rotate(a[off + halfsize], tmp, e, p, n, scratch); + } + } + } +} + +/* Inverse FFT. r must be the same as in the call to FFT. Result is + by 2^l too large. */ +static void ifft(ZZVec& a, long r, long l, const ZZ& p, long n) +{ + long round; + long off, i, j, e; + long halfsize; + ZZ tmp, tmp1; + ZZ scratch; + + for (round = l - 1, r <<= l - 1; round >= 0; round--, r >>= 1) { + halfsize = 1L << (l - 1 - round); + for (i = (1L << round) - 1, off = 0; i >= 0; i--, off += halfsize) { + for (j = 0, e = 0; j < halfsize; j++, off++, e+=r) { + /* One inverse butterfly : + ( a[off], a[off+halfsize] ) *= ( 1 1 ) + ( w^{-j2^round} -w^{-j2^round} ) */ + /* a[off + halfsize] *= w^{-j2^round} mod p */ + Rotate(a[off + halfsize], a[off + halfsize], -e, p, n, scratch); + /* tmp = a[off] - a[off + halfsize] */ + sub(tmp, a[off], a[off + halfsize]); + + /* a[off] += a[off + halfsize] mod p */ + add(a[off], a[off], a[off + halfsize]); + sub(tmp1, a[off], p); + if (sign(tmp1) >= 0) { + a[off] = tmp1; + } + /* a[off+halfsize] = tmp mod p */ + if (sign(tmp) < 0) { + add(a[off+halfsize], tmp, p); + } else { + a[off+halfsize] = tmp; + } + } + } + } +} + + + +/* Multiplication a la Schoenhage & Strassen, modulo a "Fermat" number + p = 2^{mr}+1, where m is a power of two and r is odd. Then w = 2^r + is a primitive 2mth root of unity, i.e., polynomials whose product + has degree less than 2m can be multiplied, provided that the + coefficients of the product polynomial are at most 2^{mr-1} in + absolute value. The algorithm is not called recursively; + coefficient arithmetic is done directly.*/ + +void SSMul(ZZX& c, const ZZX& a, const ZZX& b) +{ + if (&a == &b) { + SSSqr(c, a); + return; + } + + long na = deg(a); + long nb = deg(b); + + if (na <= 0 || nb <= 0) { + PlainMul(c, a, b); + return; + } + + long n = na + nb; /* degree of the product */ + + + /* Choose m and r suitably */ + long l = NextPowerOfTwo(n + 1) - 1; /* 2^l <= n < 2^{l+1} */ + long m2 = 1L << (l + 1); /* m2 = 2m = 2^{l+1} */ + /* Bitlength of the product: if the coefficients of a are absolutely less + than 2^ka and the coefficients of b are absolutely less than 2^kb, then + the coefficients of ab are absolutely less than + (min(na,nb)+1)2^{ka+kb} <= 2^bound. */ + long bound = 2 + NumBits(min(na, nb)) + MaxBits(a) + MaxBits(b); + /* Let r be minimal so that mr > bound */ + long r = (bound >> l) + 1; + long mr = r << l; + + /* p := 2^{mr}+1 */ + ZZ p; + set(p); + LeftShift(p, p, mr); + add(p, p, 1); + + /* Make coefficients of a and b positive */ + ZZVec aa, bb; + aa.SetSize(m2, p.size()); + bb.SetSize(m2, p.size()); + + long i; + for (i = 0; i <= deg(a); i++) { + if (sign(a.rep[i]) >= 0) { + aa[i] = a.rep[i]; + } else { + add(aa[i], a.rep[i], p); + } + } + + for (i = 0; i <= deg(b); i++) { + if (sign(b.rep[i]) >= 0) { + bb[i] = b.rep[i]; + } else { + add(bb[i], b.rep[i], p); + } + } + + + /* 2m-point FFT's mod p */ + fft(aa, r, l + 1, p, mr); + fft(bb, r, l + 1, p, mr); + + /* Pointwise multiplication aa := aa * bb mod p */ + ZZ tmp, ai; + for (i = 0; i < m2; i++) { + mul(ai, aa[i], bb[i]); + if (NumBits(ai) > mr) { + RightShift(tmp, ai, mr); + trunc(ai, ai, mr); + sub(ai, ai, tmp); + if (sign(ai) < 0) { + add(ai, ai, p); + } + } + aa[i] = ai; + } + + ifft(aa, r, l + 1, p, mr); + ZZ scratch; + + /* Retrieve c, dividing by 2m, and subtracting p where necessary */ + c.rep.SetLength(n + 1); + for (i = 0; i <= n; i++) { + ai = aa[i]; + ZZ& ci = c.rep[i]; + if (!IsZero(ai)) { + /* ci = -ai * 2^{mr-l-1} = ai * 2^{-l-1} = ai / 2m mod p */ + LeftRotate(ai, ai, mr - l - 1, p, mr, scratch); + sub(tmp, p, ai); + if (NumBits(tmp) >= mr) { /* ci >= (p-1)/2 */ + negate(ci, ai); /* ci = -ai = ci - p */ + } + else + ci = tmp; + } + else + clear(ci); + } +} + + +// SSRatio computes how much bigger the SS modulus must be +// to accomodate the necessary roots of unity. +// This is useful in determining algorithm crossover points. + +double SSRatio(long na, long maxa, long nb, long maxb) +{ + if (na <= 0 || nb <= 0) return 0; + + long n = na + nb; /* degree of the product */ + + + long l = NextPowerOfTwo(n + 1) - 1; /* 2^l <= n < 2^{l+1} */ + long bound = 2 + NumBits(min(na, nb)) + maxa + maxb; + long r = (bound >> l) + 1; + long mr = r << l; + + return double(mr + 1)/double(bound); +} + + + +static +void conv(vec_zz_p& x, const ZZVec& a) +{ + long i, n; + + n = a.length(); + x.SetLength(n); + + VectorConv(n, x.elts(), a.elts()); +} + + +void HomMul(ZZX& x, const ZZX& a, const ZZX& b) +{ + if (&a == &b) { + HomSqr(x, a); + return; + } + + + long da = deg(a); + long db = deg(b); + + if (da < 0 || db < 0) { + clear(x); + return; + } + + zz_pBak bak; + bak.save(); + + long bound = 2 + NumBits(min(da, db)+1) + MaxBits(a) + MaxBits(b); + + FastCRTHelper H(bound, 96); + + long i, j, k; + + Vec c, aa, bb; + + c.SetLength(H.nblocks); + aa.SetLength(H.nblocks); + bb.SetLength(H.nblocks); + + long sz_a = max(1, MaxSize(a)); + long sz_b = max(1, MaxSize(b)); + + for (k = 0; k < H.nblocks; k++) { + ZZ *prod_vec = &H.prod_vec[H.start_last_level]; + c[k].SetSize(da+db+1, prod_vec[k].size()+1); + aa[k].SetSize(da+1, min(sz_a, prod_vec[k].size())); + bb[k].SetSize(db+1, min(sz_b, prod_vec[k].size())); + } + + Vec ptr_vec; + ptr_vec.SetLength(H.nblocks); + + for (j = 0; j <= da; j++) { + for (k = 0; k < H.nblocks; k++) ptr_vec[k] = &aa[k][j]; + H.reduce(a.rep[j], ptr_vec.elts()); + } + + for (j = 0; j <= db; j++) { + for (k = 0; k < H.nblocks; k++) ptr_vec[k] = &bb[k][j]; + H.reduce(b.rep[j], ptr_vec.elts()); + } + + ZZ t1; + + for (k = 0; k < H.nblocks; k++) { + for (i = H.first_vec[k]; i < H.first_vec[k+1]; i++) { + zz_p::FFTInit(i); + + zz_pX A, B, C; + conv(A.rep, aa[k]); A.normalize(); + conv(B.rep, bb[k]); B.normalize(); + mul(C, A, B); + + long m = deg(C); + long p = zz_p::modulus(); + long tt = H.coeff_vec[i]; + mulmod_precon_t ttpinv = PrepMulModPrecon(tt, p); + div(t1, H.prod_vec[H.start_last_level+k], p); + for (j = 0; j <= m; j++) { + long tt1 = MulModPrecon(rep(C.rep[j]), tt, p, ttpinv); + MulAddTo(c[k][j], t1, tt1); + } + } + } + + x.rep.SetLength(da+db+1); + for (j = 0; j <= da+db; j++) { + for (k = 0; k < H.nblocks; k++) ptr_vec[k] = &c[k][j]; + H.reconstruct(x.rep[j], ptr_vec.elts()); + } + + x.normalize(); + bak.restore(); +} + + + + +void mul(ZZX& c, const ZZX& a, const ZZX& b) +{ + if (IsZero(a) || IsZero(b)) { + clear(c); + return; + } + + if (&a == &b) { + sqr(c, a); + return; + } + + long maxa = MaxSize(a); + long maxb = MaxSize(b); + + long k = min(maxa, maxb); + long s = min(deg(a), deg(b)) + 1; + + // FIXME: I should have a way of setting all these crossovers + // automatically + + if (s == 1 || (k == 1 && s < 40) || (k == 2 && s < 20) || + (k == 3 && s < 10)) { + + PlainMul(c, a, b); + return; + } + + if (s < 80 || (k < 30 && s < 150)) { + KarMul(c, a, b); + return; + } + + + double rat = SSRatio(deg(a), MaxBits(a), deg(b), MaxBits(b)); + long k1 = (maxa + maxb)/2; + + if ( + + (k1 >= 26 && rat < 1.40) || + (k1 >= 53 && rat < 1.60) || + (k1 >= 106 && rat < 1.80) || + (k1 >= 212 && rat < 2.00) + + ) { + SSMul(c, a, b); + } + else { + HomMul(c, a, b); + } +} + + +void SSSqr(ZZX& c, const ZZX& a) +{ + long na = deg(a); + if (na <= 0) { + PlainSqr(c, a); + return; + } + + long n = na + na; /* degree of the product */ + + + long l = NextPowerOfTwo(n + 1) - 1; /* 2^l <= n < 2^{l+1} */ + long m2 = 1L << (l + 1); /* m2 = 2m = 2^{l+1} */ + long bound = 2 + NumBits(na) + 2*MaxBits(a); + long r = (bound >> l) + 1; + long mr = r << l; + + /* p := 2^{mr}+1 */ + ZZ p; + set(p); + LeftShift(p, p, mr); + add(p, p, 1); + + ZZVec aa; + aa.SetSize(m2, p.size()); + + long i; + for (i = 0; i <= deg(a); i++) { + if (sign(a.rep[i]) >= 0) { + aa[i] = a.rep[i]; + } else { + add(aa[i], a.rep[i], p); + } + } + + + /* 2m-point FFT's mod p */ + fft(aa, r, l + 1, p, mr); + + /* Pointwise multiplication aa := aa * aa mod p */ + ZZ tmp, ai; + for (i = 0; i < m2; i++) { + sqr(ai, aa[i]); + if (NumBits(ai) > mr) { + RightShift(tmp, ai, mr); + trunc(ai, ai, mr); + sub(ai, ai, tmp); + if (sign(ai) < 0) { + add(ai, ai, p); + } + } + aa[i] = ai; + } + + ifft(aa, r, l + 1, p, mr); + + ZZ ci; + + /* Retrieve c, dividing by 2m, and subtracting p where necessary */ + c.rep.SetLength(n + 1); + + ZZ scratch; + + for (i = 0; i <= n; i++) { + ai = aa[i]; + ZZ& ci = c.rep[i]; + if (!IsZero(ai)) { + /* ci = -ai * 2^{mr-l-1} = ai * 2^{-l-1} = ai / 2m mod p */ + LeftRotate(ai, ai, mr - l - 1, p, mr, scratch); + sub(tmp, p, ai); + if (NumBits(tmp) >= mr) { /* ci >= (p-1)/2 */ + negate(ci, ai); /* ci = -ai = ci - p */ + } + else + ci = tmp; + } + else + clear(ci); + } +} + +void HomSqr(ZZX& x, const ZZX& a) +{ + long da = deg(a); + + if (da < 0) { + clear(x); + return; + } + + zz_pBak bak; + bak.save(); + + long bound = 2 + NumBits(da+1) + 2*MaxBits(a); + + FastCRTHelper H(bound, 96); + + long i, j, k; + + Vec c, aa; + + c.SetLength(H.nblocks); + aa.SetLength(H.nblocks); + + long sz_a = max(1, MaxSize(a)); + + for (k = 0; k < H.nblocks; k++) { + ZZ *prod_vec = &H.prod_vec[H.start_last_level]; + c[k].SetSize(da+da+1, prod_vec[k].size()+1); + aa[k].SetSize(da+1, min(sz_a, prod_vec[k].size())); + } + + Vec ptr_vec; + ptr_vec.SetLength(H.nblocks); + + for (j = 0; j <= da; j++) { + for (k = 0; k < H.nblocks; k++) ptr_vec[k] = &aa[k][j]; + H.reduce(a.rep[j], ptr_vec.elts()); + } + + ZZ t1; + + for (k = 0; k < H.nblocks; k++) { + for (i = H.first_vec[k]; i < H.first_vec[k+1]; i++) { + zz_p::FFTInit(i); + + zz_pX A, C; + conv(A.rep, aa[k]); A.normalize(); + sqr(C, A); + + long m = deg(C); + long p = zz_p::modulus(); + long tt = H.coeff_vec[i]; + mulmod_precon_t ttpinv = PrepMulModPrecon(tt, p); + div(t1, H.prod_vec[H.start_last_level+k], p); + for (j = 0; j <= m; j++) { + long tt1 = MulModPrecon(rep(C.rep[j]), tt, p, ttpinv); + MulAddTo(c[k][j], t1, tt1); + } + } + } + + x.rep.SetLength(da+da+1); + for (j = 0; j <= da+da; j++) { + for (k = 0; k < H.nblocks; k++) ptr_vec[k] = &c[k][j]; + H.reconstruct(x.rep[j], ptr_vec.elts()); + } + + x.normalize(); + bak.restore(); +} + + + +void sqr(ZZX& c, const ZZX& a) +{ + if (IsZero(a)) { + clear(c); + return; + } + + long maxa = MaxSize(a); + + long k = maxa; + long s = deg(a) + 1; + + if (s == 1 || (k == 1 && s < 50) || (k == 2 && s < 25) || + (k == 3 && s < 25) || (k == 4 && s < 10)) { + + PlainSqr(c, a); + return; + } + + if (s < 80 || (k < 30 && s < 150)) { + KarSqr(c, a); + return; + } + + long mba = MaxBits(a); + double rat = SSRatio(deg(a), mba, deg(a), mba); + long k1 = maxa; + + if ( + + (k1 >= 26 && rat < 1.40) || + (k1 >= 53 && rat < 1.60) || + (k1 >= 106 && rat < 1.80) || + (k1 >= 212 && rat < 2.00) + + ) { + SSSqr(c, a); + } + else { + HomSqr(c, a); + } +} + + +void mul(ZZX& x, const ZZX& a, const ZZ& b) +{ + ZZ t; + long i, da; + + const ZZ *ap; + ZZ* xp; + + if (IsZero(b)) { + clear(x); + return; + } + + t = b; + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + for (i = 0; i <= da; i++) + mul(xp[i], ap[i], t); +} + +void mul(ZZX& x, const ZZX& a, long b) +{ + long i, da; + + const ZZ *ap; + ZZ* xp; + + if (b == 0) { + clear(x); + return; + } + + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + for (i = 0; i <= da; i++) + mul(xp[i], ap[i], b); +} + + + + +void diff(ZZX& x, const ZZX& a) +{ + long n = deg(a); + long i; + + if (n <= 0) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(n); + + for (i = 0; i <= n-1; i++) { + mul(x.rep[i], a.rep[i+1], i+1); + } + + if (&x == &a) + x.rep.SetLength(n); + + x.normalize(); +} + +void HomPseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b) +{ + if (IsZero(b)) ArithmeticError("division by zero"); + + long da = deg(a); + long db = deg(b); + + if (da < db) { + r = a; + clear(q); + return; + } + + ZZ LC; + LC = LeadCoeff(b); + + ZZ LC1; + + power(LC1, LC, da-db+1); + + long a_bound = NumBits(LC1) + MaxBits(a); + + LC1.kill(); + + long b_bound = MaxBits(b); + + zz_pBak bak; + bak.save(); + + ZZX qq, rr; + + ZZ prod, t; + set(prod); + + clear(qq); + clear(rr); + + long i; + long Qinstable, Rinstable; + + Qinstable = 1; + Rinstable = 1; + + for (i = 0; ; i++) { + zz_p::FFTInit(i); + long p = zz_p::modulus(); + + + if (divide(LC, p)) continue; + + zz_pX A, B, Q, R; + + conv(A, a); + conv(B, b); + + if (!IsOne(LC)) { + zz_p y; + conv(y, LC); + power(y, y, da-db+1); + mul(A, A, y); + } + + if (!Qinstable) { + conv(Q, qq); + mul(R, B, Q); + sub(R, A, R); + + if (deg(R) >= db) + Qinstable = 1; + else + Rinstable = CRT(rr, prod, R); + } + + if (Qinstable) { + DivRem(Q, R, A, B); + t = prod; + Qinstable = CRT(qq, t, Q); + Rinstable = CRT(rr, prod, R); + } + + if (!Qinstable && !Rinstable) { + // stabilized...check if prod is big enough + + long bound1 = b_bound + MaxBits(qq) + NumBits(min(db, da-db)+1); + long bound2 = MaxBits(rr); + long bound = max(bound1, bound2); + + if (a_bound > bound) + bound = a_bound; + + bound += 4; + + if (NumBits(prod) > bound) + break; + } + } + + bak.restore(); + + q = qq; + r = rr; +} + + + + +void HomPseudoDiv(ZZX& q, const ZZX& a, const ZZX& b) +{ + ZZX r; + HomPseudoDivRem(q, r, a, b); +} + +void HomPseudoRem(ZZX& r, const ZZX& a, const ZZX& b) +{ + ZZX q; + HomPseudoDivRem(q, r, a, b); +} + +void PlainPseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ *bp; + ZZ *qp; + ZZ *xp; + + + ZZ s, t; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + ZZX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + ZZ LC = bp[db]; + LCIsOne = IsOne(LC); + + + vec_ZZ x; + + x = a.rep; + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + if (!LCIsOne) { + t = LC; + for (i = dq-1; i >= 0; i--) { + mul(xp[i], xp[i], t); + if (i > 0) mul(t, t, LC); + } + } + + for (i = dq; i >= 0; i--) { + t = xp[i+db]; + qp[i] = t; + + for (j = db-1; j >= 0; j--) { + mul(s, t, bp[j]); + if (!LCIsOne) mul(xp[i+j], xp[i+j], LC); + sub(xp[i+j], xp[i+j], s); + } + } + + if (!LCIsOne) { + t = LC; + for (i = 1; i <= dq; i++) { + mul(qp[i], qp[i], t); + if (i < dq) mul(t, t, LC); + } + } + + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + r.rep[i] = xp[i]; + r.normalize(); +} + + +void PlainPseudoDiv(ZZX& q, const ZZX& a, const ZZX& b) +{ + ZZX r; + PlainPseudoDivRem(q, r, a, b); +} + +void PlainPseudoRem(ZZX& r, const ZZX& a, const ZZX& b) +{ + ZZX q; + PlainPseudoDivRem(q, r, a, b); +} + +void div(ZZX& q, const ZZX& a, long b) +{ + if (b == 0) ArithmeticError("div: division by zero"); + + if (!divide(q, a, b)) ArithmeticError("DivRem: quotient undefined over ZZ"); +} + +void div(ZZX& q, const ZZX& a, const ZZ& b) +{ + if (b == 0) ArithmeticError("div: division by zero"); + + if (!divide(q, a, b)) ArithmeticError("DivRem: quotient undefined over ZZ"); +} + +static +void ConstDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZ& b) +{ + if (b == 0) ArithmeticError("DivRem: division by zero"); + + if (!divide(q, a, b)) ArithmeticError("DivRem: quotient undefined over ZZ"); + + r = 0; +} + +static +void ConstRem(ZZX& r, const ZZX& a, const ZZ& b) +{ + if (b == 0) ArithmeticError("rem: division by zero"); + + r = 0; +} + + + +void DivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b) +{ + long da = deg(a); + long db = deg(b); + + if (db < 0) ArithmeticError("DivRem: division by zero"); + + if (da < db) { + r = a; + q = 0; + } + else if (db == 0) { + ConstDivRem(q, r, a, ConstTerm(b)); + } + else if (IsOne(LeadCoeff(b))) { + PseudoDivRem(q, r, a, b); + } + else if (LeadCoeff(b) == -1) { + ZZX b1; + negate(b1, b); + PseudoDivRem(q, r, a, b1); + negate(q, q); + } + else if (divide(q, a, b)) { + r = 0; + } + else { + ZZX q1, r1; + ZZ m; + PseudoDivRem(q1, r1, a, b); + power(m, LeadCoeff(b), da-db+1); + if (!divide(q, q1, m)) ArithmeticError("DivRem: quotient not defined over ZZ"); + if (!divide(r, r1, m)) ArithmeticError("DivRem: remainder not defined over ZZ"); + } +} + +void div(ZZX& q, const ZZX& a, const ZZX& b) +{ + long da = deg(a); + long db = deg(b); + + if (db < 0) ArithmeticError("div: division by zero"); + + if (da < db) { + q = 0; + } + else if (db == 0) { + div(q, a, ConstTerm(b)); + } + else if (IsOne(LeadCoeff(b))) { + PseudoDiv(q, a, b); + } + else if (LeadCoeff(b) == -1) { + ZZX b1; + negate(b1, b); + PseudoDiv(q, a, b1); + negate(q, q); + } + else if (divide(q, a, b)) { + + // nothing to do + + } + else { + ZZX q1; + ZZ m; + PseudoDiv(q1, a, b); + power(m, LeadCoeff(b), da-db+1); + if (!divide(q, q1, m)) ArithmeticError("div: quotient not defined over ZZ"); + } +} + +void rem(ZZX& r, const ZZX& a, const ZZX& b) +{ + long da = deg(a); + long db = deg(b); + + if (db < 0) ArithmeticError("rem: division by zero"); + + if (da < db) { + r = a; + } + else if (db == 0) { + ConstRem(r, a, ConstTerm(b)); + } + else if (IsOne(LeadCoeff(b))) { + PseudoRem(r, a, b); + } + else if (LeadCoeff(b) == -1) { + ZZX b1; + negate(b1, b); + PseudoRem(r, a, b1); + } + else if (divide(a, b)) { + r = 0; + } + else { + ZZX r1; + ZZ m; + PseudoRem(r1, a, b); + power(m, LeadCoeff(b), da-db+1); + if (!divide(r, r1, m)) ArithmeticError("rem: remainder not defined over ZZ"); + } +} + +long HomDivide(ZZX& q, const ZZX& a, const ZZX& b) +{ + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + if (IsZero(a)) { + clear(q); + return 1; + } + + if (deg(b) == 0) { + return divide(q, a, ConstTerm(b)); + } + + if (deg(a) < deg(b)) return 0; + + ZZ ca, cb, cq; + + content(ca, a); + content(cb, b); + + if (!divide(cq, ca, cb)) return 0; + + ZZX aa, bb; + + divide(aa, a, ca); + divide(bb, b, cb); + + if (!divide(LeadCoeff(aa), LeadCoeff(bb))) + return 0; + + if (!divide(ConstTerm(aa), ConstTerm(bb))) + return 0; + + zz_pBak bak; + bak.save(); + + ZZX qq; + + ZZ prod; + set(prod); + + clear(qq); + long res = 1; + long Qinstable = 1; + + + long a_bound = MaxBits(aa); + long b_bound = MaxBits(bb); + + + long i; + for (i = 0; ; i++) { + zz_p::FFTInit(i); + long p = zz_p::modulus(); + + if (divide(LeadCoeff(bb), p)) continue; + + zz_pX A, B, Q, R; + + conv(A, aa); + conv(B, bb); + + if (!Qinstable) { + conv(Q, qq); + mul(R, B, Q); + sub(R, A, R); + + if (deg(R) >= deg(B)) + Qinstable = 1; + else if (!IsZero(R)) { + res = 0; + break; + } + else + mul(prod, prod, p); + } + + if (Qinstable) { + if (!divide(Q, A, B)) { + res = 0; + break; + } + + Qinstable = CRT(qq, prod, Q); + } + + if (!Qinstable) { + // stabilized...check if prod is big enough + + long bound = b_bound + MaxBits(qq) + + NumBits(min(deg(bb), deg(qq)) + 1); + + if (a_bound > bound) + bound = a_bound; + + bound += 3; + + if (NumBits(prod) > bound) + break; + } + } + + bak.restore(); + + if (res) mul(q, qq, cq); + return res; + +} + + +long HomDivide(const ZZX& a, const ZZX& b) +{ + if (deg(b) == 0) { + return divide(a, ConstTerm(b)); + } + else { + ZZX q; + return HomDivide(q, a, b); + } +} + +long PlainDivide(ZZX& qq, const ZZX& aa, const ZZX& bb) +{ + if (IsZero(bb)) { + if (IsZero(aa)) { + clear(qq); + return 1; + } + else + return 0; + } + + if (deg(bb) == 0) { + return divide(qq, aa, ConstTerm(bb)); + } + + long da, db, dq, i, j, LCIsOne; + const ZZ *bp; + ZZ *qp; + ZZ *xp; + + + ZZ s, t; + + da = deg(aa); + db = deg(bb); + + if (da < db) { + return 0; + } + + ZZ ca, cb, cq; + + content(ca, aa); + content(cb, bb); + + if (!divide(cq, ca, cb)) { + return 0; + } + + + ZZX a, b, q; + + divide(a, aa, ca); + divide(b, bb, cb); + + if (!divide(LeadCoeff(a), LeadCoeff(b))) + return 0; + + if (!divide(ConstTerm(a), ConstTerm(b))) + return 0; + + long coeff_bnd = MaxBits(a) + (NumBits(da+1)+1)/2 + (da-db); + + bp = b.rep.elts(); + + ZZ LC; + LC = bp[db]; + + LCIsOne = IsOne(LC); + + xp = a.rep.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + if (!LCIsOne) { + if (!divide(t, xp[i+db], LC)) + return 0; + } + else + t = xp[i+db]; + + if (NumBits(t) > coeff_bnd) return 0; + + qp[i] = t; + + for (j = db-1; j >= 0; j--) { + mul(s, t, bp[j]); + sub(xp[i+j], xp[i+j], s); + } + } + + for (i = 0; i < db; i++) + if (!IsZero(xp[i])) + return 0; + + mul(qq, q, cq); + return 1; +} + +long PlainDivide(const ZZX& a, const ZZX& b) +{ + if (deg(b) == 0) + return divide(a, ConstTerm(b)); + else { + ZZX q; + return PlainDivide(q, a, b); + } +} + + +long divide(ZZX& q, const ZZX& a, const ZZX& b) +{ + long da = deg(a); + long db = deg(b); + + if (db <= 8 || da-db <= 8) + return PlainDivide(q, a, b); + else + return HomDivide(q, a, b); +} + +long divide(const ZZX& a, const ZZX& b) +{ + long da = deg(a); + long db = deg(b); + + if (db <= 8 || da-db <= 8) + return PlainDivide(a, b); + else + return HomDivide(a, b); +} + + + + + + + +long divide(ZZX& q, const ZZX& a, const ZZ& b) +{ + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + if (IsOne(b)) { + q = a; + return 1; + } + + if (b == -1) { + negate(q, a); + return 1; + } + + long n = a.rep.length(); + vec_ZZ res(INIT_SIZE, n); + long i; + + for (i = 0; i < n; i++) { + if (!divide(res[i], a.rep[i], b)) + return 0; + } + + q.rep = res; + return 1; +} + +long divide(const ZZX& a, const ZZ& b) +{ + if (IsZero(b)) return IsZero(a); + + if (IsOne(b) || b == -1) { + return 1; + } + + long n = a.rep.length(); + long i; + + for (i = 0; i < n; i++) { + if (!divide(a.rep[i], b)) + return 0; + } + + return 1; +} + +long divide(ZZX& q, const ZZX& a, long b) +{ + if (b == 0) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + if (b == 1) { + q = a; + return 1; + } + + if (b == -1) { + negate(q, a); + return 1; + } + + long n = a.rep.length(); + vec_ZZ res(INIT_SIZE, n); + long i; + + for (i = 0; i < n; i++) { + if (!divide(res[i], a.rep[i], b)) + return 0; + } + + q.rep = res; + return 1; +} + +long divide(const ZZX& a, long b) +{ + if (b == 0) return IsZero(a); + if (b == 1 || b == -1) { + return 1; + } + + long n = a.rep.length(); + long i; + + for (i = 0; i < n; i++) { + if (!divide(a.rep[i], b)) + return 0; + } + + return 1; +} + + + +void content(ZZ& d, const ZZX& f) +{ + ZZ res; + long i; + + clear(res); + for (i = 0; i <= deg(f); i++) { + GCD(res, res, f.rep[i]); + if (IsOne(res)) break; + } + + if (sign(LeadCoeff(f)) < 0) negate(res, res); + d = res; +} + +void PrimitivePart(ZZX& pp, const ZZX& f) +{ + if (IsZero(f)) { + clear(pp); + return; + } + + ZZ d; + + content(d, f); + divide(pp, f, d); +} + + +static +void BalCopy(ZZX& g, const zz_pX& G) +{ + long p = zz_p::modulus(); + long p2 = p >> 1; + long n = G.rep.length(); + long i; + long t; + + g.rep.SetLength(n); + for (i = 0; i < n; i++) { + t = rep(G.rep[i]); + if (t > p2) t = t - p; + conv(g.rep[i], t); + } +} + + + +void GCD(ZZX& d, const ZZX& a, const ZZX& b) +{ + if (IsZero(a)) { + d = b; + if (sign(LeadCoeff(d)) < 0) negate(d, d); + return; + } + + if (IsZero(b)) { + d = a; + if (sign(LeadCoeff(d)) < 0) negate(d, d); + return; + } + + ZZ c1, c2, c; + ZZX f1, f2; + + content(c1, a); + divide(f1, a, c1); + + content(c2, b); + divide(f2, b, c2); + + GCD(c, c1, c2); + + ZZ ld; + GCD(ld, LeadCoeff(f1), LeadCoeff(f2)); + + ZZX g, h, res; + + ZZ prod; + set(prod); + + zz_pBak bak; + bak.save(); + + + long FirstTime = 1; + + long i; + for (i = 0; ;i++) { + zz_p::FFTInit(i); + long p = zz_p::modulus(); + + if (divide(LeadCoeff(f1), p) || divide(LeadCoeff(f2), p)) continue; + + zz_pX G, F1, F2; + zz_p LD; + + conv(F1, f1); + conv(F2, f2); + conv(LD, ld); + + GCD(G, F1, F2); + mul(G, G, LD); + + + if (deg(G) == 0) { + set(res); + break; + } + + if (FirstTime || deg(G) < deg(g)) { + FirstTime = 0; + conv(prod, p); + BalCopy(g, G); + } + else if (deg(G) > deg(g)) + continue; + else if (!CRT(g, prod, G)) { + PrimitivePart(res, g); + if (divide(f1, res) && divide(f2, res)) + break; + } + + } + + bak.restore(); + + mul(d, res, c); + if (sign(LeadCoeff(d)) < 0) negate(d, d); +} + +void trunc(ZZX& x, const ZZX& a, long m) + +// x = a % X^m, output may alias input + +{ + if (m < 0) LogicError("trunc: bad args"); + + if (&x == &a) { + if (x.rep.length() > m) { + x.rep.SetLength(m); + x.normalize(); + } + } + else { + long n; + long i; + ZZ* xp; + const ZZ* ap; + + n = min(a.rep.length(), m); + x.rep.SetLength(n); + + xp = x.rep.elts(); + ap = a.rep.elts(); + + for (i = 0; i < n; i++) xp[i] = ap[i]; + + x.normalize(); + } +} + + + +void LeftShift(ZZX& x, const ZZX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) + clear(x); + else + RightShift(x, a, -n); + return; + } + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in LeftShift"); + + long m = a.rep.length(); + + x.rep.SetLength(m+n); + + long i; + for (i = m-1; i >= 0; i--) + x.rep[i+n] = a.rep[i]; + + for (i = 0; i < n; i++) + clear(x.rep[i]); +} + + +void RightShift(ZZX& x, const ZZX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) ResourceError("overflow in RightShift"); + LeftShift(x, a, -n); + return; + } + + long da = deg(a); + long i; + + if (da < n) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(da-n+1); + + for (i = 0; i <= da-n; i++) + x.rep[i] = a.rep[i+n]; + + if (&x == &a) + x.rep.SetLength(da-n+1); + + x.normalize(); +} + + +void TraceVec(vec_ZZ& S, const ZZX& ff) +{ + if (!IsOne(LeadCoeff(ff))) + LogicError("TraceVec: bad args"); + + ZZX f; + f = ff; + + long n = deg(f); + + S.SetLength(n); + + if (n == 0) + return; + + long k, i; + ZZ acc, t; + + S[0] = n; + + for (k = 1; k < n; k++) { + mul(acc, f.rep[n-k], k); + + for (i = 1; i < k; i++) { + mul(t, f.rep[n-i], S[k-i]); + add(acc, acc, t); + } + + negate(S[k], acc); + } + +} + +static +void EuclLength(ZZ& l, const ZZX& a) +{ + long n = a.rep.length(); + long i; + + ZZ sum, t; + + clear(sum); + for (i = 0; i < n; i++) { + sqr(t, a.rep[i]); + add(sum, sum, t); + } + + if (sum > 1) { + SqrRoot(l, sum); + add(l, l, 1); + } + else + l = sum; +} + + + +static +long ResBound(const ZZX& a, const ZZX& b) +{ + if (IsZero(a) || IsZero(b)) + return 0; + + ZZ t1, t2, t; + EuclLength(t1, a); + EuclLength(t2, b); + power(t1, t1, deg(b)); + power(t2, t2, deg(a)); + mul(t, t1, t2); + return NumBits(t); +} + + + +void resultant(ZZ& rres, const ZZX& a, const ZZX& b, long deterministic) +{ + if (IsZero(a) || IsZero(b)) { + clear(rres); + return; + } + + zz_pBak zbak; + zbak.save(); + + ZZ_pBak Zbak; + Zbak.save(); + + long instable = 1; + + long bound = 2+ResBound(a, b); + + long gp_cnt = 0; + + ZZ res, prod; + + clear(res); + set(prod); + + + long i; + for (i = 0; ; i++) { + if (NumBits(prod) > bound) + break; + + if (!deterministic && + !instable && bound > 1000 && NumBits(prod) < 0.25*bound) { + + ZZ P; + + + long plen = 90 + NumBits(max(bound, NumBits(res))); + + do { + GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); + } + while (divide(LeadCoeff(a), P) || divide(LeadCoeff(b), P)); + + ZZ_p::init(P); + + ZZ_pX A, B; + conv(A, a); + conv(B, b); + + ZZ_p t; + resultant(t, A, B); + + if (CRT(res, prod, rep(t), P)) + instable = 1; + else + break; + } + + + zz_p::FFTInit(i); + long p = zz_p::modulus(); + if (divide(LeadCoeff(a), p) || divide(LeadCoeff(b), p)) + continue; + + zz_pX A, B; + conv(A, a); + conv(B, b); + + zz_p t; + resultant(t, A, B); + + instable = CRT(res, prod, rep(t), p); + } + + rres = res; + + zbak.restore(); + Zbak.restore(); +} + + + + +void MinPolyMod(ZZX& gg, const ZZX& a, const ZZX& f) + +{ + if (!IsOne(LeadCoeff(f)) || deg(f) < 1 || deg(a) >= deg(f)) + LogicError("MinPolyMod: bad args"); + + if (IsZero(a)) { + SetX(gg); + return; + } + + ZZ_pBak Zbak; + Zbak.save(); + zz_pBak zbak; + zbak.save(); + + long n = deg(f); + + long instable = 1; + + long gp_cnt = 0; + + ZZ prod; + ZZX g; + + clear(g); + set(prod); + + long bound = -1; + + long i; + for (i = 0; ; i++) { + if (deg(g) == n) { + if (bound < 0) + bound = 2+CharPolyBound(a, f); + + if (NumBits(prod) > bound) + break; + } + + if (!instable && + (deg(g) < n || + (deg(g) == n && bound > 1000 && NumBits(prod) < 0.75*bound))) { + + // guarantees 2^{-80} error probability + long plen = 90 + max( 2*NumBits(n) + NumBits(MaxBits(f)), + max( NumBits(n) + NumBits(MaxBits(a)), + NumBits(MaxBits(g)) )); + + ZZ P; + GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); + ZZ_p::init(P); + + + ZZ_pX A, F, G; + conv(A, a); + conv(F, f); + conv(G, g); + + ZZ_pXModulus FF; + build(FF, F); + + ZZ_pX H; + CompMod(H, G, A, FF); + + if (IsZero(H)) + break; + + instable = 1; + } + + zz_p::FFTInit(i); + + zz_pX A, F; + conv(A, a); + conv(F, f); + + zz_pXModulus FF; + build(FF, F); + + zz_pX G; + MinPolyMod(G, A, FF); + + if (deg(G) < deg(g)) + continue; + + if (deg(G) > deg(g)) { + clear(g); + set(prod); + } + + instable = CRT(g, prod, G); + } + + gg = g; + + Zbak.restore(); + zbak.restore(); +} + + +void XGCD(ZZ& rr, ZZX& ss, ZZX& tt, const ZZX& a, const ZZX& b, + long deterministic) +{ + ZZ r; + + resultant(r, a, b, deterministic); + + if (IsZero(r)) { + clear(rr); + return; + } + + zz_pBak bak; + bak.save(); + + long i; + long instable = 1; + + ZZ tmp; + ZZ prod; + ZZX s, t; + + set(prod); + clear(s); + clear(t); + + for (i = 0; ; i++) { + zz_p::FFTInit(i); + long p = zz_p::modulus(); + + if (divide(LeadCoeff(a), p) || divide(LeadCoeff(b), p) || divide(r, p)) + continue; + + zz_p R; + conv(R, r); + + zz_pX D, S, T, A, B; + conv(A, a); + conv(B, b); + + if (!instable) { + conv(S, s); + conv(T, t); + zz_pX t1, t2; + mul(t1, A, S); + mul(t2, B, T); + add(t1, t1, t2); + + if (deg(t1) == 0 && ConstTerm(t1) == R) + mul(prod, prod, p); + else + instable = 1; + } + + if (instable) { + XGCD(D, S, T, A, B); + + mul(S, S, R); + mul(T, T, R); + + tmp = prod; + long Sinstable = CRT(s, tmp, S); + long Tinstable = CRT(t, prod, T); + + instable = Sinstable || Tinstable; + } + + if (!instable) { + long bound1 = NumBits(min(deg(a), deg(s)) + 1) + + MaxBits(a) + MaxBits(s); + long bound2 = NumBits(min(deg(b), deg(t)) + 1) + + MaxBits(b) + MaxBits(t); + + long bound = 4 + max(NumBits(r), max(bound1, bound2)); + + if (NumBits(prod) > bound) + break; + } + } + + rr = r; + ss = s; + tt = t; + + bak.restore(); +} + +void NormMod(ZZ& x, const ZZX& a, const ZZX& f, long deterministic) +{ + if (!IsOne(LeadCoeff(f)) || deg(a) >= deg(f) || deg(f) <= 0) + LogicError("norm: bad args"); + + if (IsZero(a)) { + clear(x); + return; + } + + resultant(x, f, a, deterministic); +} + +void TraceMod(ZZ& res, const ZZX& a, const ZZX& f) +{ + if (!IsOne(LeadCoeff(f)) || deg(a) >= deg(f) || deg(f) <= 0) + LogicError("trace: bad args"); + + vec_ZZ S; + + TraceVec(S, f); + + InnerProduct(res, S, a.rep); +} + + +void discriminant(ZZ& d, const ZZX& a, long deterministic) +{ + long m = deg(a); + + if (m < 0) { + clear(d); + return; + } + + ZZX a1; + ZZ res; + + diff(a1, a); + resultant(res, a, a1, deterministic); + if (!divide(res, res, LeadCoeff(a))) + LogicError("discriminant: inexact division"); + + m = m & 3; + if (m >= 2) + negate(res, res); + + d = res; +} + + +void MulMod(ZZX& x, const ZZX& a, const ZZX& b, const ZZX& f) +{ + if (deg(a) >= deg(f) || deg(b) >= deg(f) || deg(f) == 0 || + !IsOne(LeadCoeff(f))) + LogicError("MulMod: bad args"); + + ZZX t; + mul(t, a, b); + rem(x, t, f); +} + +void SqrMod(ZZX& x, const ZZX& a, const ZZX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0 || !IsOne(LeadCoeff(f))) + LogicError("MulMod: bad args"); + + ZZX t; + sqr(t, a); + rem(x, t, f); +} + + + +static +void MulByXModAux(ZZX& h, const ZZX& a, const ZZX& f) +{ + long i, n, m; + ZZ* hh; + const ZZ *aa, *ff; + + ZZ t, z; + + + n = deg(f); + m = deg(a); + + if (m >= n || n == 0 || !IsOne(LeadCoeff(f))) + LogicError("MulByXMod: bad args"); + + if (m < 0) { + clear(h); + return; + } + + if (m < n-1) { + h.rep.SetLength(m+2); + hh = h.rep.elts(); + aa = a.rep.elts(); + for (i = m+1; i >= 1; i--) + hh[i] = aa[i-1]; + clear(hh[0]); + } + else { + h.rep.SetLength(n); + hh = h.rep.elts(); + aa = a.rep.elts(); + ff = f.rep.elts(); + negate(z, aa[n-1]); + for (i = n-1; i >= 1; i--) { + mul(t, z, ff[i]); + add(hh[i], aa[i-1], t); + } + mul(hh[0], z, ff[0]); + h.normalize(); + } +} + +void MulByXMod(ZZX& h, const ZZX& a, const ZZX& f) +{ + if (&h == &f) { + ZZX hh; + MulByXModAux(hh, a, f); + h = hh; + } + else + MulByXModAux(h, a, f); +} + +static +void EuclLength1(ZZ& l, const ZZX& a) +{ + long n = a.rep.length(); + long i; + + ZZ sum, t; + + clear(sum); + for (i = 0; i < n; i++) { + sqr(t, a.rep[i]); + add(sum, sum, t); + } + + abs(t, ConstTerm(a)); + mul(t, t, 2); + add(t, t, 1); + add(sum, sum, t); + + if (sum > 1) { + SqrRoot(l, sum); + add(l, l, 1); + } + else + l = sum; +} + + +long CharPolyBound(const ZZX& a, const ZZX& f) +// This computes a bound on the size of the +// coefficients of the characterstic polynomial. +// It uses the characterization of the char poly as +// resultant_y(f(y), x-a(y)), and then interpolates this +// through complex primimitive (deg(f)+1)-roots of unity. + +{ + if (IsZero(a) || IsZero(f)) + LogicError("CharPolyBound: bad args"); + + ZZ t1, t2, t; + EuclLength1(t1, a); + EuclLength(t2, f); + power(t1, t1, deg(f)); + power(t2, t2, deg(a)); + mul(t, t1, t2); + return NumBits(t); +} + + +void SetCoeff(ZZX& x, long i, long a) +{ + if (a == 1) + SetCoeff(x, i); + else { + NTL_ZZRegister(aa); + conv(aa, a); + SetCoeff(x, i, aa); + } +} + + +void CopyReverse(ZZX& x, const ZZX& a, long hi) + + // x[0..hi] = reverse(a[0..hi]), with zero fill + // input may not alias output + +{ + long i, j, n, m; + + n = hi+1; + m = a.rep.length(); + + x.rep.SetLength(n); + + const ZZ* ap = a.rep.elts(); + ZZ* xp = x.rep.elts(); + + for (i = 0; i < n; i++) { + j = hi-i; + if (j < 0 || j >= m) + clear(xp[i]); + else + xp[i] = ap[j]; + } + + x.normalize(); +} + +void reverse(ZZX& x, const ZZX& a, long hi) +{ + if (hi < 0) { clear(x); return; } + if (NTL_OVERFLOW(hi, 1, 0)) + ResourceError("overflow in reverse"); + + if (&x == &a) { + ZZX tmp; + CopyReverse(tmp, a, hi); + x = tmp; + } + else + CopyReverse(x, a, hi); +} + +void MulTrunc(ZZX& x, const ZZX& a, const ZZX& b, long n) +{ + ZZX t; + mul(t, a, b); + trunc(x, t, n); +} + +void SqrTrunc(ZZX& x, const ZZX& a, long n) +{ + ZZX t; + sqr(t, a); + trunc(x, t, n); +} + + +void NewtonInvTrunc(ZZX& c, const ZZX& a, long e) +{ + ZZ x; + + if (ConstTerm(a) == 1) + x = 1; + else if (ConstTerm(a) == -1) + x = -1; + else + ArithmeticError("InvTrunc: non-invertible constant term"); + + if (e == 1) { + conv(c, x); + return; + } + + vec_long E; + E.SetLength(0); + append(E, e); + while (e > 1) { + e = (e+1)/2; + append(E, e); + } + + long L = E.length(); + + ZZX g, g0, g1, g2; + + + g.rep.SetMaxLength(E[0]); + g0.rep.SetMaxLength(E[0]); + g1.rep.SetMaxLength((3*E[0]+1)/2); + g2.rep.SetMaxLength(E[0]); + + conv(g, x); + + long i; + + for (i = L-1; i > 0; i--) { + // lift from E[i] to E[i-1] + + long k = E[i]; + long l = E[i-1]-E[i]; + + trunc(g0, a, k+l); + + mul(g1, g0, g); + RightShift(g1, g1, k); + trunc(g1, g1, l); + + mul(g2, g1, g); + trunc(g2, g2, l); + LeftShift(g2, g2, k); + + sub(g, g, g2); + } + + c = g; +} + + +void InvTrunc(ZZX& c, const ZZX& a, long e) +{ + if (e < 0) LogicError("InvTrunc: bad args"); + + if (e == 0) { + clear(c); + return; + } + + if (NTL_OVERFLOW(e, 1, 0)) + ResourceError("overflow in InvTrunc"); + + NewtonInvTrunc(c, a, e); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZXCharPoly.c b/thirdparty/linux/ntl/src/ZZXCharPoly.c new file mode 100644 index 0000000000..6b0b7edcf4 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZXCharPoly.c @@ -0,0 +1,79 @@ +#include + +#include + +NTL_START_IMPL + + +void CharPolyMod(ZZX& gg, const ZZX& a, const ZZX& f, long deterministic) +{ + if (!IsOne(LeadCoeff(f)) || deg(f) < 1 || deg(a) >= deg(f)) + LogicError("CharPolyMod: bad args"); + + + if (IsZero(a)) { + clear(gg); + SetCoeff(gg, deg(f)); + return; + } + + long bound = 2 + CharPolyBound(a, f); + + long gp_cnt = 0; + + zz_pBak bak; + bak.save(); + + ZZ_pBak bak1; + bak1.save(); + + ZZX g; + ZZ prod; + + clear(g); + set(prod); + + long i; + + long instable = 1; + + for (i = 0; ; i++) { + if (NumBits(prod) > bound) + break; + + if (!deterministic && + !instable && bound > 1000 && NumBits(prod) < 0.25*bound) { + long plen = 90 + NumBits(max(bound, MaxBits(g))); + + ZZ P; + + GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); + + ZZ_p::init(P); + ZZ_pX G, A, F; + conv(A, a); + conv(F, f); + CharPolyMod(G, A, F); + + if (CRT(g, prod, G)) + instable = 1; + else + break; + } + + zz_p::FFTInit(i); + + zz_pX G, A, F; + conv(A, a); + conv(F, f); + CharPolyMod(G, A, F); + instable = CRT(g, prod, G); + } + + gg = g; + + bak.restore(); + bak1.restore(); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZXFacTest.c b/thirdparty/linux/ntl/src/ZZXFacTest.c new file mode 100644 index 0000000000..d1729237e7 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZXFacTest.c @@ -0,0 +1,77 @@ +#include + +NTL_CLIENT + + +long compare(const ZZX& a, const ZZX& b) +{ + if (deg(a) < deg(b)) + return 0; + + if (deg(a) > deg(b)) + return 1; + + long n = a.rep.length(); + long i; + + for (i = 0; i < n; i++) { + if (a.rep[i] < b.rep[i]) return 0; + if (a.rep[i] > b.rep[i]) return 1; + } + + return 0; +} + + +void sort(vec_pair_ZZX_long& v) +{ + long n = v.length(); + long i, j; + + for (i = 0; i < n-1; i++) + for (j = 0; j < n-1-i; j++) + if (compare(v[j].a, v[j+1].a)) { + swap(v[j].a, v[j+1].a); + swap(v[j].b, v[j+1].b); + } +} + + + +int main(int argc, char **argv) +{ + ZZX f1, f; + + if (argc > 1) + ZZXFac_MaxPrune = atoi(argv[1]); + + cin >> f; + + vec_pair_ZZX_long factors; + ZZ c; + + double t; + + t = GetTime(); + factor(c, factors, f, 0); + t = GetTime()-t; + + cerr << "total time: " << t << "\n"; + + + mul(f1, factors); + mul(f1, f1, c); + + if (f != f1) + TerminalError("FACTORIZATION INCORRECT!!!"); + + + + sort(factors); + + cout << c << "\n"; + cout << factors << "\n"; + + return 0; +} + diff --git a/thirdparty/linux/ntl/src/ZZXFacTestIn b/thirdparty/linux/ntl/src/ZZXFacTestIn new file mode 100644 index 0000000000..54605a2be5 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZXFacTestIn @@ -0,0 +1,159 @@ +[ +2757808487144838302895430769948248417729237108863869417509479459915767341323330697211864790593685466362642868685686280572196434680273380283012145961103760692626213505149801403142032630867205290294889677921852958863296316098679545758264555461523658710938789459422476508285713011640130252486568573874403279248369585169539213650618995126951019719642265868631105857663880266888615740687129117228288314207488263153874611799588864 +0 +0 +0 +0 +0 +-216322796837555313021432683645937044292921028613383407157804069297831574640998740656489092120685008467028608331849579265535298919026656996120638521170849310580349430742714282458612690837924997423939358531519250439003103809474194653867052412730011984853274391208927747745495001768576577005852099658803085546514828589578330785402902514368037827033046816681733696103002335485194130317912396254882191766550071245537280 +0 +0 +0 +0 +0 +-461037645433479417894020721333520318742787637796222824861773705209889273869538158383427629416696554307900636877857163108477972707536277062193762789904663727290361284224373517600871057249998734207159849953758589691456457289461969382313561313225919667846650484354465287838669574263259013740628682955721862333944942797610117714349319340424888849377188433375019685712170554798874566289464703683278915239936 +0 +0 +0 +0 +0 +-3006606488311441089896683017688393310181237526393515483185168180454254753097973363635722407434487318654611062408549655477030250215182281891825672566995224009864674440632939039163270907351880438387976862382953630127475285232095605983810246580601204355061953631934206101782233808526261885763004588113518320634896519093555011110157604966440212309535525594054654293518320014613131809110622208 +0 +0 +0 +0 +0 +-21667646451621122746329554641562526144092633323312599411016990419323105385099476066414168972485002571354506235276444763858199439264635997569307854261698326829531703857487029929169587379456251270438809123403197975344780862412222180332148184540879408846734261457232407592147312924337495632438881306066500258566449544033936294138097409605834038606201765458164323353573757812736 +0 +0 +0 +0 +0 +-36008946342647299345277533676465042992542509358162091707911485884063199188678585181307031154064029716715217548592392000646414311357113983560970141370451913098200635822787067067854257570327208938999696955706264073506673179971102683350614122127866850451153062491787093182867397127538003924258331826086311209206145468869911581822662684689775342390081444166762496 +0 +0 +0 +0 +0 +64738333885402706154009400777843384948514403975131509466836736265603889387062685884838924226241494383359395885853573043385536386037715829201209541872597706855188799653603478660006394706968635342978467902539925660331269747297563569373972862258677268612185384308671129076702627081272278586910256050615727493523124564910427355844022903201664598016 +0 +0 +0 +0 +0 +-26515019326962174180142979909852192000598024025005741295940640978614198820508372895246811842900425977352727727321855043342564236181262669572054649107156675328942799346600624572167621155490882477365676272092450671794375283335841074839674104033802419109510303627230113525211458922561813524476132216574712854465367040031027145736192 +0 +0 +0 +0 +0 +7206103572192660713378206441635342218917489717304647992423853713271386261468915707872924405011212678144586363155710573280556474140830016874081726445858124315662916567493737199217110913142452405975457031642733926038866594546019867393529529573884911052196182095293632879922014565430281245217054716156649445034819584 +0 +0 +0 +0 +0 +-1781067445792627012203411699914789751917558964317719007317246424900608235475797721781459407143458070618250900401160717537304193257391576912113861254338589725654811524225989470856639467420913364918958248936121424237401680876071173527822994467656057208698530802400487646077493116544189557449970155520 +0 +0 +0 +0 +0 +257785868023252499546719038629025522238370895088838886132974669167861057091364847664488796999614147085642953811171182098166013025545859882309438475565596820726130902460185426222014793570631782686609398943162320798462220946658785090792728810983258963716578939056822038673472371032064 +0 +0 +0 +0 +0 +-11784090207202014162295386617819122580975749672866993427537075993392758066192465164322165415699997447946113816444451865902475946080059998736123233932142195687596120620775313704725043713670267057116323795251403918781211539279372041111665200731953898028103168618921984 +0 +0 +0 +0 +0 +-516024270178733628499623062311311909026630748206142586718438261050317424310047984113834321775600080026192071968298564681207899012445940514128285818475704941072871032892579017242172498256146144732370397366971600828731748910067376163611390633039626240 +0 +0 +0 +0 +0 +30082639005203703587384390914836066417375929216836277495255127429645641088564854026651642629932602536304239256338883055950342516352052967456427931536708936701164689433642079716822865825451517411212445927909524063734481575283370491904 +0 +0 +0 +0 +0 +-473470234117762253449114353073575227253051133950080887048220406201777923641087656057923908627860421347898111116275922195546977433176120962969045561381814949718893972897291028710748149921273892963880265171252944044032 +0 +0 +0 +0 +0 +1716231160060069630599685050354626175015068955757203109707649882577572773165548338611188714799775273882912731234014004123562719620912869622981185312386637525543829470533595866145631607760885722906624 +0 +0 +0 +0 +0 +29097610765037817861202663659325678575900946766272112229562997159720733072202069362200879071525561150306287386997432893787696445551277644983772558101666007653602762940659581050683392 +0 +0 +0 +0 +0 +-359926316046483943753279829287876608460021803187136100943332906260813526077725113393005907399350257834664287535635450778981949557310768189387775714926393025942781952 +0 +0 +0 +0 +0 +1585164055408008634764541421847990628448201383564778254157208549989039915612818011950714697494640213070044956331618331201033480861222528629063286784 +0 +0 +0 +0 +0 +-2786147914453983786915822775172002754752542153742435684470657396691295332282129004399822009077517014397168296974400680697863864320 +0 +0 +0 +0 +0 +-38152084257499719867416104314045082135545017120806331924723488759543432766463641099223031913762482060169052160 +0 +0 +0 +0 +0 +6156765507729276548165940166182211314235431893392787456381452344509549569751736679901247307776 +0 +0 +0 +0 +0 +-6689833869884920066141475743520535508680559052015304499397212203049103130624 +0 +0 +0 +0 +0 +2000741892753026115892243757690184900091769801141945106432 +0 +0 +0 +0 +0 +-50111580155260460844584241578962989056 +0 +0 +0 +0 +0 +-28293184124737694080 +0 +0 +0 +0 +0 +1 +] diff --git a/thirdparty/linux/ntl/src/ZZXFacTestOut b/thirdparty/linux/ntl/src/ZZXFacTestOut new file mode 100644 index 0000000000..2d486adbeb --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZXFacTestOut @@ -0,0 +1,2 @@ +1 +[[[-446972 -1000 1] 1] [[-446972 1000 1] 1] [[-33692 -652 1] 1] [[-33692 652 1] 1] [[273892 -1076 1] 1] [[273892 1076 1] 1] [[680548 -1652 1] 1] [[680548 1652 1] 1] [[849892 -1844 1] 1] [[849892 1844 1] 1] [[2052868 -2920 1] 1] [[2052868 2920 1] 1] [[1135150864 -21967184 458796 652 1] 1] [[1135150864 21967184 458796 -652 1] 1] [[75016827664 -294707792 883884 -1076 1] 1] [[75016827664 294707792 883884 1076 1] 1] [[190713877264 0 -868516 0 1] 1] [[190713877264 0 354248 0 1] 1] [[190713877264 0 514268 0 1] 1] [[199783968784 -446972000 1446972 1000 1] 1] [[199783968784 446972000 1446972 -1000 1] 1] [[463145580304 -1124265296 2048556 -1652 1] 1] [[463145580304 1124265296 2048556 1652 1] 1] [[722316411664 -1567200848 2550444 -1844 1] 1] [[722316411664 1567200848 2550444 1844 1] 1] [[4214267025424 -5994374560 6473532 -2920 1] 1] [[4214267025424 5994374560 6473532 2920 1] 1] [[1509082248015679744 -4357134752332032 -98072348436992 295269334272 9137611056 68929344 311776 828 1] 1] [[1509082248015679744 0 208724941108480 0 2515466400 0 62032 0 1] 1] [[1509082248015679744 4357134752332032 -98072348436992 -295269334272 9137611056 -68929344 311776 -828 1] 1] [[8242842673465502834944 -15956454898822379520 -27757983946374272 73664993909760 11779157808 -523091712 100024 1248 1] 1] [[8242842673465502834944 0 86404395953830144 0 503486762592 0 1357456 0 1] 1] [[8242842673465502834944 15956454898822379520 -27757983946374272 -73664993909760 11779157808 523091712 100024 -1248 1] 1] [[24645776912953075323136 -32346413682678568704 122046949888364032 -141608259851520 543658138416 -2284518720 4370272 -3324 1] 1] [[24645776912953075323136 0 -201640765439499008 0 329745118368 0 2308432 0 1] 1] [[24645776912953075323136 32346413682678568704 122046949888364032 141608259851520 543658138416 2284518720 4370272 3324 1] 1]] diff --git a/thirdparty/linux/ntl/src/ZZXFactoring.c b/thirdparty/linux/ntl/src/ZZXFactoring.c new file mode 100644 index 0000000000..159822a63a --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZXFactoring.c @@ -0,0 +1,3817 @@ + +#include +#include +#include +#include +#include + +#include + +#include + +NTL_START_IMPL + +NTL_CHEAP_THREAD_LOCAL long ZZXFac_van_Hoeij = 1; + +static NTL_CHEAP_THREAD_LOCAL long ok_to_abandon = 0; + +struct LocalInfoT { + long n; + long NumPrimes; + long NumFactors; + vec_long p; + vec_vec_long pattern; + ZZ PossibleDegrees; + PrimeSeq s; +}; + + + +static +void mul(ZZ_pX& x, vec_ZZ_pX& a) +// this performs multiplications in close-to-optimal order, +// and kills a in the process +{ + long n = a.length(); + + // first, deal with some trivial cases + + if (n == 0) { + set(x); + a.kill(); + return; + } + else if (n == 1) { + x = a[0]; + a.kill(); + return; + } + + long i, j; + + // assume n > 1 and all a[i]'s are nonzero + + // sort into non-increasing degrees + + for (i = 1; i <= n - 1; i++) + for (j = 0; j <= n - i - 1; j++) + if (deg(a[j]) < deg(a[j+1])) + swap(a[j], a[j+1]); + + ZZ_pX g; + + while (n > 1) { + // replace smallest two poly's by their product + mul(g, a[n-2], a[n-1]); + a[n-2].kill(); + a[n-1].kill(); + swap(g, a[n-2]); + n--; + + // re-establish order + + i = n-1; + while (i > 0 && deg(a[i-1]) < deg(a[i])) { + swap(a[i-1], a[i]); + i--; + } + } + + x = a[0]; + + a[0].kill(); + a.SetLength(0); +} + + +void mul(ZZX& x, const vec_pair_ZZX_long& a) +{ + long l = a.length(); + ZZX res; + long i, j; + + set(res); + for (i = 0; i < l; i++) + for (j = 0; j < a[i].b; j++) + mul(res, res, a[i].a); + + x = res; +} + + +void SquareFreeDecomp(vec_pair_ZZX_long& u, const ZZX& ff) +// input is primitive +{ + ZZX f = ff; + + ZZX d, v, w, s, t1; + long i; + + u.SetLength(0); + + if (deg(f) <= 0) + return; + + diff(t1, f); + GCD(d, f, t1); + + if (deg(d) == 0) { + append(u, cons(f, 1L)); + return; + } + + divide(v, f, d); + divide(w, t1, d); + i = 0; + + for (;;) { + i = i + 1; + + diff(t1, v); + sub(s, w, t1); + + if (IsZero(s)) { + if (deg(v) != 0) append(u, cons(v, i)); + return; + } + + GCD(d, v, s); + divide(v, v, d); + divide(w, s, d); + + if (deg(d) != 0) append(u, cons(d, i)); + } +} + + + + +static +void HenselLift(ZZX& Gout, ZZX& Hout, ZZX& Aout, ZZX& Bout, + const ZZX& f, const ZZX& g, const ZZX& h, + const ZZX& a, const ZZX& b, const ZZ& p) +{ + ZZX c, g1, h1, G, H, A, B; + + mul(c, g, h); + sub(c, f, c); + + if (!divide(c, c, p)) + LogicError("inexact division"); + + ZZ_pX cc, gg, hh, aa, bb, tt, gg1, hh1; + + conv(cc, c); + conv(gg, g); + conv(hh, h); + conv(aa, a); + conv(bb, b); + + ZZ_pXModulus GG; + ZZ_pXModulus HH; + + build(GG, gg); + build(HH, hh); + + ZZ_pXMultiplier AA; + ZZ_pXMultiplier BB; + + build(AA, aa, HH); + build(BB, bb, GG); + + rem(gg1, cc, GG); + MulMod(gg1, gg1, BB, GG); + + rem(hh1, cc, HH); + MulMod(hh1, hh1, AA, HH); + + conv(g1, gg1); + mul(g1, g1, p); + add(G, g, g1); + + conv(h1, hh1); + mul(h1, h1, p); + add(H, h, h1); + + /* lift inverses */ + + ZZX t1, t2, r; + + mul(t1, a, G); + mul(t2, b, H); + add(t1, t1, t2); + add(t1, t1, -1); + negate(t1, t1); + + if (!divide(r, t1, p)) + LogicError("inexact division"); + + ZZ_pX rr, aa1, bb1; + + conv(rr, r); + + rem(aa1, rr, HH); + MulMod(aa1, aa1, AA, HH); + rem(bb1, rr, GG); + MulMod(bb1, bb1, BB, GG); + + ZZX a1, b1; + + conv(a1, aa1); + mul(a1, a1, p); + add(A, a, a1); + + conv(b1, bb1); + mul(b1, b1, p); + add(B, b, b1); + + Gout = G; + Hout = H; + Aout = A; + Bout = B; +} + +static +void HenselLift1(ZZX& Gout, ZZX& Hout, + const ZZX& f, const ZZX& g, const ZZX& h, + const ZZX& a, const ZZX& b, const ZZ& p) +{ + ZZX c, g1, h1, G, H; + + mul(c, g, h); + sub(c, f, c); + + if (!divide(c, c, p)) + LogicError("inexact division"); + + ZZ_pX cc, gg, hh, aa, bb, tt, gg1, hh1; + + conv(cc, c); + conv(gg, g); + conv(hh, h); + conv(aa, a); + conv(bb, b); + + ZZ_pXModulus GG; + ZZ_pXModulus HH; + + build(GG, gg); + build(HH, hh); + + rem(gg1, cc, GG); + MulMod(gg1, gg1, bb, GG); + + rem(hh1, cc, HH); + MulMod(hh1, hh1, aa, HH); + + conv(g1, gg1); + mul(g1, g1, p); + add(G, g, g1); + + conv(h1, hh1); + mul(h1, h1, p); + add(H, h, h1); + + Gout = G; + Hout = H; +} + +static +void BuildTree(vec_long& link, vec_ZZX& v, vec_ZZX& w, + const vec_zz_pX& a) +{ + long k = a.length(); + + if (k < 2) LogicError("bad arguments to BuildTree"); + + vec_zz_pX V, W; + + V.SetLength(2*k-2); + W.SetLength(2*k-2); + link.SetLength(2*k-2); + + long i, j, s; + long minp, mind; + + for (i = 0; i < k; i++) { + V[i] = a[i]; + link[i] = -(i+1); + } + + for (j = 0; j < 2*k-4; j += 2) { + minp = j; + mind = deg(V[j]); + + for (s = j+1; s < i; s++) + if (deg(V[s]) < mind) { + minp = s; + mind = deg(V[s]); + } + + swap(V[j], V[minp]); + swap(link[j], link[minp]); + + minp = j+1; + mind = deg(V[j+1]); + + for (s = j+2; s < i; s++) + if (deg(V[s]) < mind) { + minp = s; + mind = deg(V[s]); + } + + swap(V[j+1], V[minp]); + swap(link[j+1], link[minp]); + + mul(V[i], V[j], V[j+1]); + link[i] = j; + i++; + } + + zz_pX d; + + for (j = 0; j < 2*k-2; j += 2) { + XGCD(d, W[j], W[j+1], V[j], V[j+1]); + if (!IsOne(d)) + LogicError("relatively prime polynomials expected"); + } + + v.SetLength(2*k-2); + for (j = 0; j < 2*k-2; j++) + conv(v[j], V[j]); + + w.SetLength(2*k-2); + for (j = 0; j < 2*k-2; j++) + conv(w[j], W[j]); +} + +static +void RecTreeLift(const vec_long& link, vec_ZZX& v, vec_ZZX& w, + const ZZ& p, const ZZX& f, long j, long inv) +{ + if (j < 0) return; + + if (inv) + HenselLift(v[j], v[j+1], w[j], w[j+1], + f, v[j], v[j+1], w[j], w[j+1], p); + else + HenselLift1(v[j], v[j+1], f, v[j], v[j+1], w[j], w[j+1], p); + + RecTreeLift(link, v, w, p, v[j], link[j], inv); + RecTreeLift(link, v, w, p, v[j+1], link[j+1], inv); +} + +static +void TreeLift(const vec_long& link, vec_ZZX& v, vec_ZZX& w, + long e0, long e1, const ZZX& f, long inv) + +// lift from p^{e0} to p^{e1} + +{ + ZZ p0, p1; + + power(p0, zz_p::modulus(), e0); + power(p1, zz_p::modulus(), e1-e0); + + ZZ_pBak bak; + bak.save(); + ZZ_p::init(p1); + + RecTreeLift(link, v, w, p0, f, v.length()-2, inv); + + bak.restore(); +} + +void MultiLift(vec_ZZX& A, const vec_zz_pX& a, const ZZX& f, long e, + long verbose) + +{ + long k = a.length(); + long i; + + if (k < 2 || e < 1 || NTL_OVERFLOW(e, 1, 0)) LogicError("MultiLift: bad args"); + + if (!IsOne(LeadCoeff(f))) + LogicError("MultiLift: bad args"); + + for (i = 0; i < a.length(); i++) + if (!IsOne(LeadCoeff(a[i]))) + LogicError("MultiLift: bad args"); + + if (e == 1) { + A.SetLength(k); + for (i = 0; i < k; i++) + conv(A[i], a[i]); + return; + } + + vec_long E; + append(E, e); + while (e > 1) { + e = (e+1)/2; + append(E, e); + } + long l = E.length(); + + vec_ZZX v, w; + vec_long link; + + double t; + + if (verbose) { + cerr << "building tree..."; + t = GetTime(); + } + + BuildTree(link, v, w, a); + + if (verbose) cerr << (GetTime()-t) << "\n"; + + + for (i = l-1; i > 0; i--) { + if (verbose) { + cerr << "lifting to " << E[i-1] << "..."; + t = GetTime(); + } + + TreeLift(link, v, w, E[i], E[i-1], f, i != 1); + + if (verbose) cerr << (GetTime()-t) << "\n"; + } + + A.SetLength(k); + for (i = 0; i < 2*k-2; i++) { + long t = link[i]; + if (t < 0) + A[-(t+1)] = v[i]; + } +} + +static +void inplace_rev(ZZX& f) +{ + long n = deg(f); + long i, j; + + i = 0; + j = n; + while (i < j) { + swap(f.rep[i], f.rep[j]); + i++; + j--; + } + + f.normalize(); +} + +NTL_CHEAP_THREAD_LOCAL long ZZXFac_InitNumPrimes = 7; +NTL_CHEAP_THREAD_LOCAL long ZZXFac_MaxNumPrimes = 50; + +static +void RecordPattern(vec_long& pat, vec_pair_zz_pX_long& fac) +{ + long n = pat.length()-1; + long i; + + for (i = 0; i <= n; i++) + pat[i] = 0; + + long k = fac.length(); + + for (i = 0; i < k; i++) { + long d = fac[i].b; + long m = deg(fac[i].a)/d; + + pat[d] = m; + } +} + +static +long NumFactors(const vec_long& pat) +{ + long n = pat.length()-1; + + long i; + long res = 0; + + for (i = 0; i <= n; i++) + res += pat[i]; + + return res; +} + +static +void CalcPossibleDegrees(ZZ& pd, const vec_long& pat) +{ + long n = pat.length()-1; + set(pd); + + long d, j; + ZZ t1; + + for (d = 1; d <= n; d++) + for (j = 0; j < pat[d]; j++) { + LeftShift(t1, pd, d); + bit_or(pd, pd, t1); + } +} + +static +void CalcPossibleDegrees(vec_ZZ& S, const vec_ZZ_pX& fac, long k) + +// S[i] = possible degrees of the product of any subset of size k +// among fac[i...], encoded as a bit vector. + +{ + long r = fac.length(); + + S.SetLength(r); + + if (r == 0) + return; + + if (k < 1 || k > r) + LogicError("CalcPossibleDegrees: bad args"); + + long i, l; + ZZ old, t1; + + set(S[r-1]); + LeftShift(S[r-1], S[r-1], deg(fac[r-1])); + + for (i = r-2; i >= 0; i--) { + set(t1); + LeftShift(t1, t1, deg(fac[i])); + bit_or(S[i], t1, S[i+1]); + } + + for (l = 2; l <= k; l++) { + old = S[r-l]; + LeftShift(S[r-l], S[r-l+1], deg(fac[r-l])); + + for (i = r-l-1; i >= 0; i--) { + LeftShift(t1, old, deg(fac[i])); + old = S[i]; + bit_or(S[i], S[i+1], t1); + } + } +} + + + +static +vec_zz_pX * +SmallPrimeFactorization(LocalInfoT& LocalInfo, const ZZX& f, + long verbose) + +{ + long n = deg(f); + long i; + double t; + + LocalInfo.n = n; + long& NumPrimes = LocalInfo.NumPrimes; + NumPrimes = 0; + + LocalInfo.NumFactors = 0; + + // some sanity checking... + + if (ZZXFac_InitNumPrimes < 1 || ZZXFac_InitNumPrimes > 10000) + LogicError("bad ZZXFac_InitNumPrimes"); + + if (ZZXFac_MaxNumPrimes < ZZXFac_InitNumPrimes || ZZXFac_MaxNumPrimes > 10000) + LogicError("bad ZZXFac_MaxNumPrimes"); + + LocalInfo.p.SetLength(ZZXFac_InitNumPrimes); + LocalInfo.pattern.SetLength(ZZXFac_InitNumPrimes); + + // set bits 0..n of LocalInfo.PossibleDegrees + SetBit(LocalInfo.PossibleDegrees, n+1); + add(LocalInfo.PossibleDegrees, LocalInfo.PossibleDegrees, -1); + + long minr = n+1; + long irred = 0; + + UniquePtr bestfac; + UniquePtr besth; + UniquePtr spfactors; + zz_pContext bestp; + long bestp_index; + + long maxroot = NextPowerOfTwo(deg(f))+1; + + for (; NumPrimes < ZZXFac_InitNumPrimes;) { + long p = LocalInfo.s.next(); + if (!p) ResourceError("out of small primes"); + if (divide(LeadCoeff(f), p)) { + if (verbose) cerr << "skipping " << p << "\n"; + continue; + } + zz_p::init(p, maxroot); + + zz_pX ff, ffp, d; + + conv(ff, f); + MakeMonic(ff); + diff(ffp, ff); + + GCD(d, ffp, ff); + if (!IsOne(d)) { + if (verbose) cerr << "skipping " << p << "\n"; + continue; + } + + + if (verbose) { + cerr << "factoring mod " << p << "..."; + t = GetTime(); + } + + vec_pair_zz_pX_long thisfac; + zz_pX thish; + + SFCanZass1(thisfac, thish, ff, 0); + + LocalInfo.p[NumPrimes] = p; + + vec_long& pattern = LocalInfo.pattern[NumPrimes]; + pattern.SetLength(n+1); + + RecordPattern(pattern, thisfac); + long r = NumFactors(pattern); + + if (verbose) { + cerr << (GetTime()-t) << "\n"; + cerr << "degree sequence: "; + for (i = 0; i <= n; i++) + if (pattern[i]) { + cerr << pattern[i] << "*" << i << " "; + } + cerr << "\n"; + } + + if (r == 1) { + irred = 1; + break; + } + + // update admissibility info + + ZZ pd; + + CalcPossibleDegrees(pd, pattern); + bit_and(LocalInfo.PossibleDegrees, LocalInfo.PossibleDegrees, pd); + + if (weight(LocalInfo.PossibleDegrees) == 2) { + irred = 1; + break; + } + + + if (r < minr) { + minr = r; + bestfac.make(thisfac); + besth.make(thish); + bestp.save(); + bestp_index = NumPrimes; + } + + NumPrimes++; + } + + if (!irred) { + // remove best prime from LocalInfo + swap(LocalInfo.pattern[bestp_index], LocalInfo.pattern[NumPrimes-1]); + LocalInfo.p[bestp_index] = LocalInfo.p[NumPrimes-1]; + NumPrimes--; + + bestp.restore(); + + spfactors.make(); + + if (verbose) { + cerr << "p = " << zz_p::modulus() << ", completing factorization..."; + t = GetTime(); + } + SFCanZass2(*spfactors, *bestfac, *besth, 0); + if (verbose) { + cerr << (GetTime()-t) << "\n"; + } + } + + return spfactors.release(); +} + + +static +long ConstTermTest(const vec_ZZ_pX& W, + const vec_long& I, + const ZZ& ct, + const ZZ_p& lc, + vec_ZZ_p& prod, + long& ProdLen) +{ + long k = I.length(); + ZZ_p t; + ZZ t1, t2; + long i; + + if (ProdLen == 0) { + mul(prod[0], lc, ConstTerm(W[I[0]])); + ProdLen++; + } + + for (i = ProdLen; i < k; i++) + mul(prod[i], prod[i-1], ConstTerm(W[I[i]])); + + ProdLen = k-1; + + // should make this a routine in ZZ_p + t1 = rep(prod[k-1]); + RightShift(t2, ZZ_p::modulus(), 1); + if (t1 > t2) + sub(t1, t1, ZZ_p::modulus()); + + return divide(ct, t1); +} + +static +void BalCopy(ZZX& g, const ZZ_pX& G) +{ + const ZZ& p = ZZ_p::modulus(); + ZZ p2, t; + RightShift(p2, p, 1); + + long n = G.rep.length(); + long i; + + g.rep.SetLength(n); + for (i = 0; i < n; i++) { + t = rep(G.rep[i]); + if (t > p2) sub(t, t, p); + g.rep[i] = t; + } +} + + + + +static +void mul(ZZ_pX& g, const vec_ZZ_pX& W, const vec_long& I) +{ + vec_ZZ_pX w; + long k = I.length(); + w.SetLength(k); + long i; + + for (i = 0; i < k; i++) + w[i] = W[I[i]]; + + mul(g, w); +} + + + + +static +void InvMul(ZZ_pX& g, const vec_ZZ_pX& W, const vec_long& I) +{ + vec_ZZ_pX w; + long k = I.length(); + long r = W.length(); + w.SetLength(r-k); + long i, j; + + i = 0; + for (j = 0; j < r; j++) { + if (i < k && j == I[i]) + i++; + else + w[j-i] = W[j]; + } + + mul(g, w); +} + + + + +static +void RemoveFactors(vec_ZZ_pX& W, const vec_long& I) +{ + long k = I.length(); + long r = W.length(); + long i, j; + + i = 0; + for (j = 0; j < r; j++) { + if (i < k && j == I[i]) + i++; + else + swap(W[j-i], W[j]); + } + + W.SetLength(r-k); +} + +static +void unpack(vec_long& x, const ZZ& a, long n) +{ + x.SetLength(n+1); + long i; + + for (i = 0; i <= n; i++) + x[i] = bit(a, i); +} + +static +void SubPattern(vec_long& p1, const vec_long& p2) +{ + long l = p1.length(); + + if (p2.length() != l) + LogicError("SubPattern: bad args"); + + long i; + + for (i = 0; i < l; i++) { + p1[i] -= p2[i]; + if (p1[i] < 0) + LogicError("SubPattern: internal error"); + } +} + +static +void UpdateLocalInfo(LocalInfoT& LocalInfo, vec_ZZ& pdeg, + const vec_ZZ_pX& W, const vec_ZZX& factors, + const ZZX& f, long k, long verbose) +{ + static NTL_CHEAP_THREAD_LOCAL long cnt = 0; + + if (verbose) { + cnt = (cnt + 1) % 100; + if (!cnt) cerr << "#"; + } + + double t; + long i, j; + + if (LocalInfo.NumFactors < factors.length()) { + zz_pBak bak; + bak.save(); + + vec_long pattern; + pattern.SetLength(LocalInfo.n+1); + + ZZ pd; + + if (verbose) { + cerr << "updating local info..."; + t = GetTime(); + } + + for (i = 0; i < LocalInfo.NumPrimes; i++) { + zz_p::init(LocalInfo.p[i], NextPowerOfTwo(LocalInfo.n)+1); + + for (j = LocalInfo.NumFactors; j < factors.length(); j++) { + vec_pair_zz_pX_long thisfac; + zz_pX thish; + + zz_pX ff; + conv(ff, factors[j]); + MakeMonic(ff); + + SFCanZass1(thisfac, thish, ff, 0); + RecordPattern(pattern, thisfac); + SubPattern(LocalInfo.pattern[i], pattern); + } + + CalcPossibleDegrees(pd, LocalInfo.pattern[i]); + bit_and(LocalInfo.PossibleDegrees, LocalInfo.PossibleDegrees, pd); + + } + + bak.restore(); + LocalInfo.NumFactors = factors.length(); + + CalcPossibleDegrees(pdeg, W, k); + + if (verbose) cerr << (GetTime()-t) << "\n"; + } + + if (!ZZXFac_van_Hoeij && LocalInfo.NumPrimes + 1 < ZZXFac_MaxNumPrimes) { + if (verbose) + cerr << "adding a prime\n"; + + zz_pBak bak; + bak.save(); + + for (;;) { + long p = LocalInfo.s.next(); + if (!p) + ResourceError("UpdateLocalInfo: out of primes"); + + if (divide(LeadCoeff(f), p)) { + if (verbose) cerr << "skipping " << p << "\n"; + continue; + } + + zz_p::init(p, NextPowerOfTwo(deg(f))+1); + + zz_pX ff, ffp, d; + + conv(ff, f); + MakeMonic(ff); + diff(ffp, ff); + + GCD(d, ffp, ff); + if (!IsOne(d)) { + if (verbose) cerr << "skipping " << p << "\n"; + continue; + } + + vec_pair_zz_pX_long thisfac; + zz_pX thish; + + if (verbose) { + cerr << "factoring mod " << p << "..."; + t = GetTime(); + } + + SFCanZass1(thisfac, thish, ff, 0); + + LocalInfo.p.SetLength(LocalInfo.NumPrimes+1); + LocalInfo.pattern.SetLength(LocalInfo.NumPrimes+1); + + LocalInfo.p[LocalInfo.NumPrimes] = p; + vec_long& pattern = LocalInfo.pattern[LocalInfo.NumPrimes]; + + pattern.SetLength(LocalInfo.n+1); + RecordPattern(pattern, thisfac); + + if (verbose) { + cerr << (GetTime()-t) << "\n"; + cerr << "degree sequence: "; + for (i = 0; i <= LocalInfo.n; i++) + if (pattern[i]) { + cerr << pattern[i] << "*" << i << " "; + } + cerr << "\n"; + } + + ZZ pd; + CalcPossibleDegrees(pd, pattern); + bit_and(LocalInfo.PossibleDegrees, LocalInfo.PossibleDegrees, pd); + + LocalInfo.NumPrimes++; + + break; + } + + bak.restore(); + } +} + + + +const int ZZX_OVERLIFT = NTL_BITS_PER_LONG; + // number of bits by which we "overlift"....this enables, in particular, + // the "n-1" test. + // Must lie in the range 4..NTL_BITS_PER_LONG. + + +#define EXTRA_BITS (1) +// Any small number, like 1, 2 or 3, should be OK. + + +static +void CardinalitySearch(vec_ZZX& factors, ZZX& f, + vec_ZZ_pX& W, + LocalInfoT& LocalInfo, + long k, + long bnd, + long verbose) +{ + double start_time, end_time; + + if (verbose) { + start_time = GetTime(); + cerr << "\n************ "; + cerr << "start cardinality " << k << "\n"; + } + + vec_long I, D; + I.SetLength(k); + D.SetLength(k); + + long r = W.length(); + + vec_ZZ_p prod; + prod.SetLength(k); + long ProdLen; + + vec_ZZ pdeg; + CalcPossibleDegrees(pdeg, W, k); + + ZZ pd; + vec_long upd; + + long i, state; + + long cnt = 0; + + ZZ ct; + mul(ct, ConstTerm(f), LeadCoeff(f)); + + ZZ_p lc; + conv(lc, LeadCoeff(f)); + + ZZ_pX gg; + ZZX g, h; + + I[0] = 0; + + while (I[0] <= r-k) { + bit_and(pd, pdeg[I[0]], LocalInfo.PossibleDegrees); + + if (IsZero(pd)) { + if (verbose) cerr << "skipping\n"; + goto done; + } + + unpack(upd, pd, LocalInfo.n); + + D[0] = deg(W[I[0]]); + i = 1; + state = 0; + ProdLen = 0; + + for (;;) { + if (i < ProdLen) + ProdLen = i; + + if (i == k) { + // process indices I[0], ..., I[k-1] + + if (cnt > 2000000) { + cnt = 0; + UpdateLocalInfo(LocalInfo, pdeg, W, factors, f, k, verbose); + bit_and(pd, pdeg[I[0]], LocalInfo.PossibleDegrees); + if (IsZero(pd)) { + if (verbose) cerr << "skipping\n"; + goto done; + } + unpack(upd, pd, LocalInfo.n); + } + + state = 1; // default continuation state + + + if (!upd[D[k-1]]) { + i--; + cnt++; + continue; + } + + if (!ConstTermTest(W, I, ct, lc, prod, ProdLen)) { + i--; + cnt += 100; + continue; + } + + if (verbose) { + cerr << "+"; + } + + cnt += 1000; + + if (2*D[k-1] <= deg(f)) { + mul(gg, W, I); + mul(gg, gg, lc); + BalCopy(g, gg); + if(MaxBits(g) > bnd) { + i--; + continue; + } + if (verbose) { + cerr << "*"; + } + PrimitivePart(g, g); + if (!divide(h, f, g)) { + i--; + continue; + } + + // factor found! + append(factors, g); + if (verbose) { + cerr << "degree " << deg(g) << " factor found\n"; + } + f = h; + mul(ct, ConstTerm(f), LeadCoeff(f)); + conv(lc, LeadCoeff(f)); + } + else { + InvMul(gg, W, I); + mul(gg, gg, lc); + BalCopy(g, gg); + if(MaxBits(g) > bnd) { + i--; + continue; + } + if (verbose) { + cerr << "*"; + } + PrimitivePart(g, g); + if (!divide(h, f, g)) { + i--; + continue; + } + + // factor found! + append(factors, h); + if (verbose) { + cerr << "degree " << deg(h) << " factor found\n"; + } + f = g; + mul(ct, ConstTerm(f), LeadCoeff(f)); + conv(lc, LeadCoeff(f)); + } + + RemoveFactors(W, I); + r = W.length(); + cnt = 0; + + if (2*k > r) + goto done; + else + break; + } + else if (state == 0) { + I[i] = I[i-1] + 1; + D[i] = D[i-1] + deg(W[I[i]]); + i++; + } + else { // state == 1 + I[i]++; + if (i == 0) break; + + if (I[i] > r-k+i) + i--; + else { + D[i] = D[i-1] + deg(W[I[i]]); + i++; + state = 0; + } + } + } + } + + + done: + + + if (verbose) { + end_time = GetTime(); + cerr << "\n************ "; + cerr << "end cardinality " << k << "\n"; + cerr << "time: " << (end_time-start_time) << "\n"; + } +} + + + +typedef unsigned long TBL_T; + +#if (NTL_BITS_PER_LONG >= 64) + +// for 64-bit machines + +#define TBL_MSK (63) +#define TBL_SHAMT (6) + +#else + +// for 32-bit machines + +#define TBL_MSK (31) +#define TBL_SHAMT (5) + +#endif + + +#if 0 + +// recursive version + +static +void RecInitTab(TBL_T ***lookup_tab, long i, const vec_ulong& ratio, + long r, long k, unsigned long thresh1, long **shamt_tab, + unsigned long sum, long card, long j) +{ + if (j >= i || card >= k-1) { + if (card > 1) { + long shamt = shamt_tab[i][card]; + unsigned long index1 = ((-sum) >> shamt); + lookup_tab[i][card][index1 >> TBL_SHAMT] |= (1UL << (index1 & TBL_MSK)); + unsigned long index2 = ((-sum+thresh1) >> shamt); + if (index1 != index2) + lookup_tab[i][card][index2 >> TBL_SHAMT] |= (1UL << (index2 & TBL_MSK)); + + } + + return; + } + + + RecInitTab(lookup_tab, i, ratio, r, k, thresh1, shamt_tab, sum, card, j+1); + RecInitTab(lookup_tab, i, ratio, r, k, thresh1, shamt_tab, + sum+ratio[r-1-j], card+1, j+1); +} + + +static +void DoInitTab(TBL_T ***lookup_tab, long i, const vec_ulong& ratio, + long r, long k, unsigned long thresh1, long **shamt_tab) +{ + RecInitTab(lookup_tab, i, ratio, r, k, thresh1, shamt_tab, 0, 0, 0); +} + +#else + +// iterative version + + + +typedef Vec< Vec< Vec< TBL_T > > > lookup_tab_t; +typedef Vec< Vec > shamt_tab_t; + + +static +void DoInitTab(lookup_tab_t& lookup_tab, long i, const vec_ulong& ratio, + long r, long k, unsigned long thresh1, shamt_tab_t& shamt_tab) +{ + vec_long sum_vec, card_vec, location_vec; + sum_vec.SetLength(i+1); + card_vec.SetLength(i+1); + location_vec.SetLength(i+1); + + long j = 0; + sum_vec[0] = 0; + card_vec[0] = 0; + + unsigned long sum; + long card, location; + + location = 0; + + while (j >= 0) { + sum = sum_vec[j]; + card = card_vec[j]; + + switch (location) { + + case 0: + + if (j >= i || card >= k-1) { + if (card > 1) { + long shamt = shamt_tab[i][card]; + unsigned long index1 = ((-sum) >> shamt); + lookup_tab[i][card][index1 >> TBL_SHAMT] |= (1UL << (index1 & TBL_MSK)); + unsigned long index2 = ((-sum+thresh1) >> shamt); + if (index1 != index2) + lookup_tab[i][card][index2 >> TBL_SHAMT] |= (1UL << (index2 & TBL_MSK)); + + } + + location = location_vec[j]; + j--; + continue; + } + + + sum_vec[j+1] = sum; + card_vec[j+1] = card; + location_vec[j+1] = 1; + j++; + location = 0; + continue; + + case 1: + + sum_vec[j+1] = sum+ratio[r-1-j]; + card_vec[j+1] = card+1; + location_vec[j+1] = 2; + j++; + location = 0; + continue; + + case 2: + + location = location_vec[j]; + j--; + continue; + } + } +} + +#endif + + + +static +void InitTab(lookup_tab_t& lookup_tab, const vec_ulong& ratio, long r, long k, + unsigned long thresh1, shamt_tab_t& shamt_tab, long pruning) +{ + long i, j, t; + + if (pruning) { + for (i = 2; i <= pruning; i++) { + long len = min(k-1, i); + for (j = 2; j <= len; j++) { + long ub = (((1L << (NTL_BITS_PER_LONG-shamt_tab[i][j])) + + TBL_MSK) >> TBL_SHAMT); + for (t = 0; t < ub; t++) + lookup_tab[i][j][t] = 0; + } + + DoInitTab(lookup_tab, i, ratio, r, k, thresh1, shamt_tab); + } + } +} + + +static +void RatioInit1(vec_ulong& ratio, const vec_ZZ_pX& W, const ZZ_p& lc, + long pruning, lookup_tab_t& lookup_tab, + vec_vec_ulong& pair_ratio, long k, unsigned long thresh1, + shamt_tab_t& shamt_tab) +{ + long r = W.length(); + long i, j; + + ZZ_p a; + + ZZ p; + p = ZZ_p::modulus(); + + ZZ aa; + + for (i = 0; i < r; i++) { + long m = deg(W[i]); + mul(a, W[i].rep[m-1], lc); + LeftShift(aa, rep(a), NTL_BITS_PER_LONG); + div(aa, aa, p); + ratio[i] = to_ulong(aa); + } + + InitTab(lookup_tab, ratio, r, k, thresh1, shamt_tab, pruning); + + for (i = 0; i < r; i++) + for (j = 0; j < i; j++) { + mul(a, W[i].rep[deg(W[i])-1], W[j].rep[deg(W[j])-1]); + mul(a, a, lc); + LeftShift(aa, rep(a), NTL_BITS_PER_LONG); + div(aa, aa, p); + pair_ratio[i][j] = to_ulong(aa); + } + + for (i = 0; i < r; i++) { + long m = deg(W[i]); + if (m >= 2) { + mul(a, W[i].rep[m-2], lc); + LeftShift(aa, rep(a), NTL_BITS_PER_LONG); + div(aa, aa, p); + pair_ratio[i][i] = to_ulong(aa); + } + else + pair_ratio[i][i] = 0; + } +} + +static +long SecondOrderTest(const vec_long& I_vec, const vec_vec_ulong& pair_ratio_vec, + vec_ulong& sum_stack_vec, long& SumLen) +{ + long k = I_vec.length(); + const long *I = I_vec.elts(); + unsigned long *sum_stack = sum_stack_vec.elts(); + + unsigned long sum, thresh1; + + if (SumLen == 0) { + unsigned long epsilon = (1UL << (NTL_BITS_PER_LONG-ZZX_OVERLIFT)); + unsigned long delta = (unsigned long) ((k*(k+1)) >> 1); + unsigned long thresh = epsilon + delta; + thresh1 = (epsilon << 1) + delta; + + sum = thresh; + sum_stack[k] = thresh1; + } + else { + sum = sum_stack[SumLen-1]; + thresh1 = sum_stack[k]; + } + + long i, j; + + for (i = SumLen; i < k; i++) { + const unsigned long *p = pair_ratio_vec[I[i]].elts(); + for (j = 0; j <= i; j++) { + sum += p[I[j]]; + } + + sum_stack[i] = sum; + } + + SumLen = k-1; + + return (sum <= thresh1); +} + + +static +ZZ choose_fn(long r, long k) +{ + ZZ a, b; + + a = 1; + b = 1; + + long i; + for (i = 0; i < k; i++) { + a *= r-i; + b *= k-i; + } + + return a/b; +} + +static +void PrintInfo(const char *s, const ZZ& a, const ZZ& b) +{ + cerr << s << a << " / " << b << " = "; + + double x = to_double(a)/to_double(b); + + if (x == 0) + cerr << "0"; + else { + int n; + double f; + + f = frexp(x, &n); + cerr << f << "*2^" << n; + } + + cerr << "\n"; +} + +static +void RemoveFactors1(vec_long& W, const vec_long& I, long r) +{ + long k = I.length(); + long i, j; + + i = 0; + for (j = 0; j < r; j++) { + if (i < k && j == I[i]) + i++; + else + swap(W[j-i], W[j]); + } +} + +static +void RemoveFactors1(vec_vec_long& W, const vec_long& I, long r) +{ + long k = I.length(); + long i, j; + + i = 0; + for (j = 0; j < r; j++) { + if (i < k && j == I[i]) + i++; + else + swap(W[j-i], W[j]); + } + + for (i = 0; i < r-k; i++) + RemoveFactors1(W[i], I, r); +} + + +static +void RemoveFactors1(vec_ulong& W, const vec_long& I, long r) +{ + long k = I.length(); + long i, j; + + i = 0; + for (j = 0; j < r; j++) { + if (i < k && j == I[i]) + i++; + else + _ntl_swap(W[j-i], W[j]); + } +} + +static +void RemoveFactors1(vec_vec_ulong& W, const vec_long& I, long r) +{ + long k = I.length(); + long i, j; + + i = 0; + for (j = 0; j < r; j++) { + if (i < k && j == I[i]) + i++; + else + swap(W[j-i], W[j]); + } + + for (i = 0; i < r-k; i++) + RemoveFactors1(W[i], I, r); +} + + +static +void RemoveFactors1(vec_ZZ_p& W, const vec_long& I, long r) +{ + long k = I.length(); + long i, j; + + i = 0; + for (j = 0; j < r; j++) { + if (i < k && j == I[i]) + i++; + else + swap(W[j-i], W[j]); + } +} + +static +void SumCoeffs(ZZ& sum, const ZZX& a) +{ + ZZ res; + res = 0; + long i; + long n = a.rep.length(); + for (i = 0; i < n; i++) + res += a.rep[i]; + + sum = res; +} + +static +void SumCoeffs(ZZ_p& sum, const ZZ_pX& a) +{ + ZZ_p res; + res = 0; + long i; + long n = a.rep.length(); + for (i = 0; i < n; i++) + res += a.rep[i]; + + sum = res; +} + + +static +long ConstTermTest(const vec_ZZ_p& W, + const vec_long& I, + const ZZ& ct, + const ZZ_p& lc, + vec_ZZ_p& prod, + long& ProdLen) +{ + long k = I.length(); + ZZ_p t; + ZZ t1, t2; + long i; + + if (ProdLen == 0) { + mul(prod[0], lc, W[I[0]]); + ProdLen++; + } + + for (i = ProdLen; i < k; i++) + mul(prod[i], prod[i-1], W[I[i]]); + + ProdLen = k-1; + + // should make this a routine in ZZ_p + t1 = rep(prod[k-1]); + RightShift(t2, ZZ_p::modulus(), 1); + if (t1 > t2) + sub(t1, t1, ZZ_p::modulus()); + + return divide(ct, t1); +} + + +NTL_CHEAP_THREAD_LOCAL long ZZXFac_MaxPrune = 10; + + + +static +long pruning_bnd(long r, long k) +{ + double x = 0; + + long i; + for (i = 0; i < k; i++) { + x += log(double(r-i)/double(k-i)); + } + + return long((x/log(2.0)) * 0.75); +} + +static +long shamt_tab_init(long pos, long card, long pruning, long thresh1_len) +{ + double x = 1; + long i; + + for (i = 0; i < card; i++) { + x *= double(pos-i)/double(card-i); + } + + x *= pruning; // this can be adjusted to control the density + if (pos <= 6) x *= 2; // a little boost that costs very little + + + long t = long(ceil(log(x)/log(2.0))); + + t = max(t, TBL_SHAMT); + + t = min(t, NTL_BITS_PER_LONG-thresh1_len); + + + return NTL_BITS_PER_LONG-t; +} + +// The following routine should only be called for k > 1, +// and is only worth calling for k > 2. + + +static +void CardinalitySearch1(vec_ZZX& factors, ZZX& f, + vec_ZZ_pX& W, + LocalInfoT& LocalInfo, + long k, + long bnd, + long verbose) +{ + double start_time, end_time; + + if (verbose) { + start_time = GetTime(); + cerr << "\n************ "; + cerr << "start cardinality " << k << "\n"; + } + + if (k <= 1) LogicError("internal error: call CardinalitySearch"); + + // This test is needed to ensure correcntes of "n-2" test + if (NumBits(k) > NTL_BITS_PER_LONG/2-2) + ResourceError("Cardinality Search: k too large..."); + + vec_ZZ pdeg; + CalcPossibleDegrees(pdeg, W, k); + ZZ pd; + + bit_and(pd, pdeg[0], LocalInfo.PossibleDegrees); + if (pd == 0) { + if (verbose) cerr << "skipping\n"; + return; + } + + vec_long I, D; + I.SetLength(k); + D.SetLength(k); + + long r = W.length(); + + long initial_r = r; + + vec_ulong ratio, ratio_sum; + ratio.SetLength(r); + ratio_sum.SetLength(k); + + unsigned long epsilon = (1UL << (NTL_BITS_PER_LONG-ZZX_OVERLIFT)); + unsigned long delta = (unsigned long) k; + unsigned long thresh = epsilon + delta; + unsigned long thresh1 = (epsilon << 1) + delta; + + long thresh1_len = NumBits(long(thresh1)); + + long pruning; + + pruning = min(r/2, ZZXFac_MaxPrune); + pruning = min(pruning, pruning_bnd(r, k)); + pruning = min(pruning, NTL_BITS_PER_LONG-EXTRA_BITS-thresh1_len); + + if (pruning <= 4) pruning = 0; + + long init_pruning = pruning; + + lookup_tab_t lookup_tab; + + shamt_tab_t shamt_tab; + + if (pruning) { + + long i, j; + + shamt_tab.SetLength(pruning+1); + + for (i = 2; i <= pruning; i++) { + long len = min(k-1, i); + shamt_tab[i].SetLength(len+1); + shamt_tab[i][0] = shamt_tab[i][1] = 0; + + for (j = 2; j <= len; j++) + shamt_tab[i][j] = shamt_tab_init(i, j, pruning, thresh1_len); + } + + lookup_tab.SetLength(pruning+1); + + for (i = 2; i <= pruning; i++) { + long len = min(k-1, i); + lookup_tab[i].SetLength(len+1); + + for (j = 2; j <= len; j++) { + lookup_tab[i][j].SetLength(((1L << (NTL_BITS_PER_LONG-shamt_tab[i][j]))+TBL_MSK) >> TBL_SHAMT); + } + } + } + + if (verbose) { + cerr << "pruning = " << pruning << "\n"; + } + + vec_ZZ_p prod; + prod.SetLength(k); + long ProdLen; + + vec_ZZ_p prod1; + prod1.SetLength(k); + long ProdLen1; + + vec_ulong sum_stack; + sum_stack.SetLength(k+1); + long SumLen; + + vec_long upd; + + long i, state; + + long cnt = 0; + + ZZ ct; + mul(ct, ConstTerm(f), LeadCoeff(f)); + + ZZ_p lc; + conv(lc, LeadCoeff(f)); + + vec_vec_ulong pair_ratio; + pair_ratio.SetLength(r); + for (i = 0; i < r; i++) + pair_ratio[i].SetLength(r); + + RatioInit1(ratio, W, lc, pruning, lookup_tab, pair_ratio, k, thresh1, shamt_tab); + + ZZ c1; + SumCoeffs(c1, f); + mul(c1, c1, LeadCoeff(f)); + + vec_ZZ_p sum_coeffs; + sum_coeffs.SetLength(r); + for (i = 0; i < r; i++) + SumCoeffs(sum_coeffs[i], W[i]); + + vec_long degv; + degv.SetLength(r); + + for (i = 0; i < r; i++) + degv[i] = deg(W[i]); + + ZZ_pX gg; + ZZX g, h; + + I[0] = 0; + + long loop_cnt = 0, degree_cnt = 0, n2_cnt = 0, sl_cnt = 0, ct_cnt = 0, + pl_cnt = 0, c1_cnt = 0, pl1_cnt = 0, td_cnt = 0; + + ZZ loop_total, degree_total, n2_total, sl_total, ct_total, + pl_total, c1_total, pl1_total, td_total; + + while (I[0] <= r-k) { + bit_and(pd, pdeg[I[0]], LocalInfo.PossibleDegrees); + + if (IsZero(pd)) { + if (verbose) cerr << "skipping\n"; + goto done; + } + + unpack(upd, pd, LocalInfo.n); + + D[0] = degv[I[0]]; + ratio_sum[0] = ratio[I[0]] + thresh; + i = 1; + state = 0; + ProdLen = 0; + ProdLen1 = 0; + SumLen = 0; + + for (;;) { + cnt++; + + if (cnt > 2000000) { + if (verbose) { + loop_total += loop_cnt; loop_cnt = 0; + degree_total += degree_cnt; degree_cnt = 0; + n2_total += n2_cnt; n2_cnt = 0; + sl_total += sl_cnt; sl_cnt = 0; + ct_total += ct_cnt; ct_cnt = 0; + pl_total += pl_cnt; pl_cnt = 0; + c1_total += c1_cnt; c1_cnt = 0; + pl1_total += pl1_cnt; pl1_cnt = 0; + td_total += td_cnt; td_cnt = 0; + } + + cnt = 0; + UpdateLocalInfo(LocalInfo, pdeg, W, factors, f, k, verbose); + bit_and(pd, pdeg[I[0]], LocalInfo.PossibleDegrees); + if (IsZero(pd)) { + if (verbose) cerr << "skipping\n"; + goto done; + } + unpack(upd, pd, LocalInfo.n); + } + + if (i == k-1) { + + unsigned long ratio_sum_last = ratio_sum[k-2]; + long I_last = I[k-2]; + + + { + long D_last = D[k-2]; + + unsigned long rs; + long I_this; + long D_this; + + for (I_this = I_last+1; I_this < r; I_this++) { + loop_cnt++; + + rs = ratio_sum_last + ratio[I_this]; + if (rs > thresh1) { + cnt++; + continue; + } + + degree_cnt++; + + D_this = D_last + degv[I_this]; + + if (!upd[D_this]) { + cnt++; + continue; + } + + n2_cnt++; + sl_cnt += (k-SumLen); + + I[k-1] = I_this; + + if (!SecondOrderTest(I, pair_ratio, sum_stack, SumLen)) { + cnt += 2; + continue; + } + + c1_cnt++; + pl1_cnt += (k-ProdLen1); + + if (!ConstTermTest(sum_coeffs, I, c1, lc, prod1, ProdLen1)) { + cnt += 100; + continue; + } + + ct_cnt++; + pl_cnt += (k-ProdLen); + + D[k-1] = D_this; + + if (!ConstTermTest(W, I, ct, lc, prod, ProdLen)) { + cnt += 100; + continue; + } + + td_cnt++; + + if (verbose) { + cerr << "+"; + } + + cnt += 1000; + + if (2*D[k-1] <= deg(f)) { + mul(gg, W, I); + mul(gg, gg, lc); + BalCopy(g, gg); + if(MaxBits(g) > bnd) { + continue; + } + if (verbose) { + cerr << "*"; + } + PrimitivePart(g, g); + if (!divide(h, f, g)) { + continue; + } + + // factor found! + append(factors, g); + if (verbose) { + cerr << "degree " << deg(g) << " factor found\n"; + } + f = h; + mul(ct, ConstTerm(f), LeadCoeff(f)); + conv(lc, LeadCoeff(f)); + } + else { + InvMul(gg, W, I); + mul(gg, gg, lc); + BalCopy(g, gg); + if(MaxBits(g) > bnd) { + continue; + } + if (verbose) { + cerr << "*"; + } + PrimitivePart(g, g); + if (!divide(h, f, g)) { + continue; + } + + // factor found! + append(factors, h); + if (verbose) { + cerr << "degree " << deg(h) << " factor found\n"; + } + f = g; + mul(ct, ConstTerm(f), LeadCoeff(f)); + conv(lc, LeadCoeff(f)); + } + + RemoveFactors(W, I); + RemoveFactors1(degv, I, r); + RemoveFactors1(sum_coeffs, I, r); + RemoveFactors1(ratio, I, r); + RemoveFactors1(pair_ratio, I, r); + + r = W.length(); + cnt = 0; + + pruning = min(pruning, r/2); + if (pruning <= 4) pruning = 0; + + InitTab(lookup_tab, ratio, r, k, thresh1, shamt_tab, pruning); + + if (2*k > r) + goto done; + else + goto restart; + } /* end of inner for loop */ + + } + + i--; + state = 1; + } + else { + if (state == 0) { + long I_i = I[i-1] + 1; + I[i] = I_i; + + long pruned; + + if (pruning && r-I_i <= pruning) { + long pos = r-I_i; + unsigned long rs = ratio_sum[i-1]; + unsigned long index1 = (rs >> shamt_tab[pos][k-i]); + if (lookup_tab[pos][k-i][index1 >> TBL_SHAMT] & (1UL << (index1&TBL_MSK))) + pruned = 0; + else + pruned = 1; + } + else + pruned = 0; + + if (pruned) { + i--; + state = 1; + } + else { + D[i] = D[i-1] + degv[I_i]; + ratio_sum[i] = ratio_sum[i-1] + ratio[I_i]; + i++; + } + } + else { // state == 1 + + loop_cnt++; + + if (i < ProdLen) + ProdLen = i; + + if (i < ProdLen1) + ProdLen1 = i; + + if (i < SumLen) + SumLen = i; + + long I_i = (++I[i]); + + if (i == 0) break; + + if (I_i > r-k+i) { + i--; + } + else { + + long pruned; + + if (pruning && r-I_i <= pruning) { + long pos = r-I_i; + unsigned long rs = ratio_sum[i-1]; + unsigned long index1 = (rs >> shamt_tab[pos][k-i]); + if (lookup_tab[pos][k-i][index1 >> TBL_SHAMT] & (1UL << (index1&TBL_MSK))) + pruned = 0; + else + pruned = 1; + } + else + pruned = 0; + + + if (pruned) { + i--; + } + else { + D[i] = D[i-1] + degv[I_i]; + ratio_sum[i] = ratio_sum[i-1] + ratio[I_i]; + i++; + state = 0; + } + } + } + } + } + + restart: ; + } + + done: + + if (verbose) { + end_time = GetTime(); + cerr << "\n************ "; + cerr << "end cardinality " << k << "\n"; + cerr << "time: " << (end_time-start_time) << "\n"; + ZZ loops_max = choose_fn(initial_r+1, k); + ZZ tuples_max = choose_fn(initial_r, k); + + loop_total += loop_cnt; + degree_total += degree_cnt; + n2_total += n2_cnt; + sl_total += sl_cnt; + ct_total += ct_cnt; + pl_total += pl_cnt; + c1_total += c1_cnt; + pl1_total += pl1_cnt; + td_total += td_cnt; + + cerr << "\n"; + PrintInfo("loops: ", loop_total, loops_max); + PrintInfo("degree tests: ", degree_total, tuples_max); + + PrintInfo("n-2 tests: ", n2_total, tuples_max); + + cerr << "ave sum len: "; + if (n2_total == 0) + cerr << "--"; + else + cerr << (to_double(sl_total)/to_double(n2_total)); + cerr << "\n"; + + PrintInfo("f(1) tests: ", c1_total, tuples_max); + + cerr << "ave prod len: "; + if (c1_total == 0) + cerr << "--"; + else + cerr << (to_double(pl1_total)/to_double(c1_total)); + cerr << "\n"; + + PrintInfo("f(0) tests: ", ct_total, tuples_max); + + cerr << "ave prod len: "; + if (ct_total == 0) + cerr << "--"; + else + cerr << (to_double(pl_total)/to_double(ct_total)); + cerr << "\n"; + + PrintInfo("trial divs: ", td_total, tuples_max); + } +} + + + +static +void FindTrueFactors(vec_ZZX& factors, const ZZX& ff, + const vec_ZZX& w, const ZZ& P, + LocalInfoT& LocalInfo, + long verbose, + long bnd) +{ + ZZ_pBak bak; + bak.save(); + ZZ_p::init(P); + + long r = w.length(); + + vec_ZZ_pX W; + W.SetLength(r); + + long i; + for (i = 0; i < r; i++) + conv(W[i], w[i]); + + + ZZX f; + + f = ff; + + long k; + + k = 1; + factors.SetLength(0); + while (2*k <= W.length()) { + if (k <= 1) + CardinalitySearch(factors, f, W, LocalInfo, k, bnd, verbose); + else + CardinalitySearch1(factors, f, W, LocalInfo, k, bnd, verbose); + k++; + } + + append(factors, f); + + bak.restore(); +} + + + + + +/**********************************************************************\ + + van Hoeij's algorithm + +\**********************************************************************/ + + + +const long van_hoeij_size_thresh = 12; +// Use van Hoeij's algorithm if number of modular factors exceeds this bound. +// Must be >= 1. + +const long van_hoeij_card_thresh = 3; +// Switch to knapsack method if cardinality of candidate factors +// exceeds this bound. +// Must be >= 1. + + + + +// This routine assumes that the input f is a non-zero polynomial +// of degree n, and returns the value f(a). + +static +ZZ PolyEval(const ZZX& f, const ZZ& a) +{ + if (f == 0) LogicError("PolyEval: internal error"); + + long n = deg(f); + + ZZ acc, t1, t2; + long i; + + acc = f.rep[n]; + + for (i = n-1; i >= 0; i--) { + mul(t1, acc, a); + add(acc, t1, f.rep[i]); + } + + return acc; +} + + +// This routine assumes that the input f is a polynomial with non-zero constant +// term, of degree n, and with leading coefficient c; it returns +// an upper bound on the absolute value of the roots of the +// monic, integer polynomial g(X) = c^{n-1} f(X/c). + +static +ZZ RootBound(const ZZX& f) +{ + if (ConstTerm(f) == 0) LogicError("RootBound: internal error"); + + long n = deg(f); + + ZZX g; + long i; + + g = f; + + if (g.rep[n] < 0) negate(g.rep[n], g.rep[n]); + for (i = 0; i < n; i++) { + if (g.rep[i] > 0) negate(g.rep[i], g.rep[i]); + } + + ZZ lb, ub, mb; + + + lb = 0; + + ub = 1; + while (PolyEval(g, ub) < 0) { + ub = 2*ub; + } + + // lb < root <= ub + + while (ub - lb > 1) { + ZZ mb = (ub + lb)/2; + + if (PolyEval(g, mb) < 0) + lb = mb; + else + ub = mb; + } + + return ub*g.rep[n]; +} + + +// This routine takes as input an n x m integer matrix M, where the rows of M +// are assumed to be linearly independent. +// It is also required that both n and m are non-zero. +// It computes an integer d, along with an n x m matrix R, such that +// R*d^{-1} is the reduced row echelon form of M. +// The routine is probabilistic: the output is always correct, but the +// routine may abort the program with negligible probability +// (specifically, if GenPrime returns a composite, and the modular +// gauss routine can't invert a non-zero element). + +static +void gauss(ZZ& d_out, mat_ZZ& R_out, const mat_ZZ& M) +{ + long n = M.NumRows(); + long m = M.NumCols(); + + if (n == 0 || m == 0) LogicError("gauss: internal error"); + + zz_pBak bak; + bak.save(); + + for (;;) { + long p = GenPrime_long(NTL_SP_NBITS); + zz_p::init(p); + + mat_zz_p MM; + conv(MM, M); + + long r = gauss(MM); + if (r < n) continue; + + // compute pos(1..n), so that pos(i) is the index + // of the i-th pivot column + + vec_long pos; + pos.SetLength(n); + + long i, j; + for (i = j = 1; i <= n; i++) { + while (MM(i, j) == 0) j++; + pos(i) = j; + j++; + } + + // compute the n x n sub-matrix consisting of the + // pivot columns of M + + mat_ZZ S; + S.SetDims(n, n); + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + S(i, j) = M(i, pos(j)); + + mat_ZZ S_inv; + ZZ d; + + inv(d, S_inv, S); + if (d == 0) continue; + + mat_ZZ R; + mul(R, S_inv, M); + + // now check that R is of the right form, which it will be + // if we were not unlucky + + long OK = 1; + + for (i = 1; i <= n && OK; i++) { + for (j = 1; j < pos(i) && OK; j++) + if (R(i, j) != 0) OK = 0; + + if (R(i, pos(i)) != d) OK = 0; + + for (j = 1; j < i && OK; j++) + if (R(j, pos(i)) != 0) OK = 0; + } + + if (!OK) continue; + + d_out = d; + R_out = R; + break; + } +} + + +// The input polynomial f should be monic, and deg(f) > 0. +// The input P should be > 1. +// Tr.length() >= d, and Tr(i), for i = 1..d-1, should be the +// Tr_i(f) mod P (in van Hoeij's notation). +// The quantity Tr_d(f) mod P is computed, and stored in Tr(d). + + +void ComputeTrace(vec_ZZ& Tr, const ZZX& f, long d, const ZZ& P) +{ + long n = deg(f); + + // check arguments + + if (n <= 0 || LeadCoeff(f) != 1) + LogicError("ComputeTrace: internal error (1)"); + + if (d <= 0) + LogicError("ComputeTrace: internal error (2)"); + + if (Tr.length() < d) + LogicError("ComputeTrace: internal error (3)"); + + if (P <= 1) + LogicError("ComputeTrace: internal error (4)"); + + // treat d > deg(f) separately + + if (d > n) { + ZZ t1, t2; + long i; + + t1 = 0; + + for (i = 1; i <= n; i++) { + mul(t2, Tr(i + d - n - 1), f.rep[i-1]); + add(t1, t1, t2); + } + + rem(t1, t1, P); + NegateMod(t1, t1, P); + Tr(d) = t1; + } + else { + ZZ t1, t2; + long i; + + mul(t1, f.rep[n-d], d); + + for (i = 1; i < d; i++) { + mul(t2, Tr(i), f.rep[n-d+i]); + add(t1, t1, t2); + } + + rem(t1, t1, P); + NegateMod(t1, t1, P); + Tr(d) = t1; + } +} + +// Tr(1..d) are traces as computed above. +// C and pb have length at least d. +// For i = 1..d, pb(i) = p^{a_i} for a_i > 0. +// pdelta = p^delta for delta > 0. +// P = p^a for some a >= max{ a_i : i=1..d }. + +// This routine computes C(1..d), where +// C(i) = C_{a_i}^{a_i + delta}( Tr(i)*lc^i ) for i = 1..d. + + +void ChopTraces(vec_ZZ& C, const vec_ZZ& Tr, long d, + const vec_ZZ& pb, const ZZ& pdelta, const ZZ& P, const ZZ& lc) +{ + if (d <= 0) LogicError("ChopTraces: internal error (1)"); + if (C.length() < d) LogicError("ChopTraces: internal error (2)"); + if (Tr.length() < d) LogicError("ChopTraces: internal error (3)"); + if (pb.length() < d) LogicError("ChopTraces: internal error (4)"); + if (P <= 1) LogicError("ChopTraces: internal error (5)"); + + ZZ lcpow, lcred; + lcpow = 1; + rem(lcred, lc, P); + + ZZ pdelta_2; + RightShift(pdelta_2, pdelta, 1); + + ZZ t1, t2; + + long i; + for (i = 1; i <= d; i++) { + MulMod(lcpow, lcpow, lcred, P); + MulMod(t1, lcpow, Tr(i), P); + + RightShift(t2, pb(i), 1); + add(t1, t1, t2); + div(t1, t1, pb(i)); + rem(t1, t1, pdelta); + if (t1 > pdelta_2) + sub(t1, t1, pdelta); + + C(i) = t1; + } +} + + +// Similar to above, but computes a linear combination of traces. + + +static +void DenseChopTraces(vec_ZZ& C, const vec_ZZ& Tr, long d, long d1, + const ZZ& pb_eff, const ZZ& pdelta, const ZZ& P, + const ZZ& lc, const mat_ZZ& A) +{ + + ZZ pdelta_2; + RightShift(pdelta_2, pdelta, 1); + + ZZ pb_eff_2; + RightShift(pb_eff_2, pb_eff, 1); + + ZZ acc, t1, t2; + + long i, j; + + ZZ lcpow, lcred; + rem(lcred, lc, P); + + for (i = 1; i <= d1; i++) { + lcpow = 1; + acc = 0; + + for (j = 1; j <= d; j++) { + MulMod(lcpow, lcpow, lcred, P); + MulMod(t1, lcpow, Tr(j), P); + rem(t2, A(i, j), P); + MulMod(t1, t1, t2, P); + AddMod(acc, acc, t1, P); + } + + t1 = acc; + add(t1, t1, pb_eff_2); + div(t1, t1, pb_eff); + rem(t1, t1, pdelta); + if (t1 > pdelta_2) + sub(t1, t1, pdelta); + + C(i) = t1; + } +} + + +static +void Compute_pb(vec_long& b,vec_ZZ& pb, long p, long d, + const ZZ& root_bound, long n) +{ + ZZ t1, t2; + long i; + + t1 = 2*power(root_bound, d)*n; + + if (d == 1) { + i = 0; + t2 = 1; + } + else { + i = b(d-1); + t2 = pb(d-1); + } + + while (t2 <= t1) { + i++; + t2 *= p; + } + + b.SetLength(d); + b(d) = i; + + pb.SetLength(d); + pb(d) = t2; +} + +static +void Compute_pdelta(long& delta, ZZ& pdelta, long p, long bit_delta) +{ + ZZ t1; + long i; + + i = delta; + t1 = pdelta; + + while (NumBits(t1) <= bit_delta) { + i++; + t1 *= p; + } + + delta = i; + pdelta = t1; +} + +static +void BuildReductionMatrix(mat_ZZ& M, long& C, long r, long d, const ZZ& pdelta, + const vec_vec_ZZ& chop_vec, + const mat_ZZ& B_L, long verbose) +{ + long s = B_L.NumRows(); + + C = long( sqrt(double(d) * double(r)) / 2.0 ) + 1; + + M.SetDims(s+d, r+d); + clear(M); + + + long i, j, k; + ZZ t1, t2; + + for (i = 1; i <= s; i++) + for (j = 1; j <= r; j++) + mul(M(i, j), B_L(i, j), C); + + ZZ pdelta_2; + + RightShift(pdelta_2, pdelta, 1); + + long maxbits = 0; + + for (i = 1; i <= s; i++) + for (j = 1; j <= d; j++) { + t1 = 0; + for (k = 1; k <= r; k++) { + mul(t2, B_L(i, k), chop_vec(k)(j)); + add(t1, t1, t2); + } + + rem(t1, t1, pdelta); + if (t1 > pdelta_2) + sub(t1, t1, pdelta); + + maxbits = max(maxbits, NumBits(t1)); + + M(i, j+r) = t1; + } + + + for (i = 1; i <= d; i++) + M(i+s, i+r) = pdelta; + + if (verbose) + cerr << "ratio = " << double(maxbits)/double(NumBits(pdelta)) + << "; "; +} + + +static +void CutAway(mat_ZZ& B1, vec_ZZ& D, mat_ZZ& M, + long C, long r, long d) +{ + long k = M.NumRows(); + ZZ bnd = 4*to_ZZ(C)*to_ZZ(C)*to_ZZ(r) + to_ZZ(d)*to_ZZ(r)*to_ZZ(r); + + while (k >= 1 && 4*D[k] > bnd*D[k-1]) k--; + + mat_ZZ B2; + + B2.SetDims(k, r); + long i, j; + + for (i = 1; i <= k; i++) + for (j = 1; j <= r; j++) + div(B2(i, j), M(i, j), C); + + M.kill(); // save space + D.kill(); + + ZZ det2; + long rnk; + + rnk = image(det2, B2); + + B1.SetDims(rnk, r); + for (i = 1; i <= rnk; i++) + for (j = 1; j <= r; j++) + B1(i, j) = B2(i + k - rnk, j); +} + + + + +static +long GotThem(vec_ZZX& factors, + const mat_ZZ& B_L, + const vec_ZZ_pX& W, + const ZZX& f, + long bnd, + long verbose) +{ + double tt0, tt1; + ZZ det; + mat_ZZ R; + long s, r; + long i, j, cnt; + + if (verbose) { + cerr << " checking A (s = " << B_L.NumRows() + << "): gauss..."; + } + + tt0 = GetTime(); + + gauss(det, R, B_L); + + tt1 = GetTime(); + + if (verbose) cerr << (tt1-tt0) << "; "; + + // check if condition A holds + + s = B_L.NumRows(); + r = B_L.NumCols(); + + for (j = 0; j < r; j++) { + cnt = 0; + for (i = 0; i < s; i++) { + if (R[i][j] == 0) continue; + if (R[i][j] != det) { + if (verbose) cerr << "failed.\n"; + return 0; + } + cnt++; + } + + if (cnt != 1) { + if (verbose) cerr << "failed.\n"; + return 0; + } + } + + if (verbose) { + cerr << "passed.\n"; + cerr << " checking B..."; + } + + // extract relevant information from R + + vec_vec_long I_vec; + I_vec.SetLength(s); + + vec_long deg_vec; + deg_vec.SetLength(s); + + for (i = 0; i < s; i++) { + long dg = 0; + + for (j = 0; j < r; j++) { + if (R[i][j] != 0) append(I_vec[i], j); + dg += deg(W[j]); + } + + deg_vec[i] = dg; + } + + R.kill(); // save space + + + // check if any candidate factor is the product of too few + // modular factors + + for (i = 0; i < s; i++) + if (I_vec[i].length() <= van_hoeij_card_thresh) { + if (verbose) cerr << "X\n"; + return 0; + } + + if (verbose) cerr << "1"; + + + // sort deg_vec, I_vec in order of increasing degree + + for (i = 0; i < s-1; i++) + for (j = 0; j < s-1-i; j++) + if (deg_vec[j] > deg_vec[j+1]) { + _ntl_swap(deg_vec[j], deg_vec[j+1]); + swap(I_vec[j], I_vec[j+1]); + } + + + // perform constant term tests + + ZZ ct; + mul(ct, LeadCoeff(f), ConstTerm(f)); + + ZZ half_P; + RightShift(half_P, ZZ_p::modulus(), 1); + + ZZ_p lc, prod; + conv(lc, LeadCoeff(f)); + + ZZ t1; + + for (i = 0; i < s; i++) { + vec_long& I = I_vec[i]; + prod = lc; + for (j = 0; j < I.length(); j++) + mul(prod, prod, ConstTerm(W[I[j]])); + + t1 = rep(prod); + if (t1 > half_P) + sub(t1, t1, ZZ_p::modulus()); + + if (!divide(ct, t1)) { + if (verbose) cerr << "X\n"; + return 0; + } + } + + if (verbose) cerr << "2"; + + + // multiply out polynomials and perform size tests + + vec_ZZX fac; + ZZ_pX gg; + ZZX g; + + for (i = 0; i < s-1; i++) { + vec_long& I = I_vec[i]; + mul(gg, W, I); + mul(gg, gg, lc); + BalCopy(g, gg); + if (MaxBits(g) > bnd) { + if (verbose) cerr << "X\n"; + return 0; + } + PrimitivePart(g, g); + append(fac, g); + } + + if (verbose) cerr << "3"; + + + // finally...trial division + + ZZX f1 = f; + ZZX h; + + for (i = 0; i < s-1; i++) { + if (!divide(h, f1, fac[i])) { + cerr << "X\n"; + return 0; + } + + f1 = h; + } + + // got them! + + if (verbose) cerr << "$\n"; + + append(factors, fac); + append(factors, f1); + + return 1; +} + + +void AdditionalLifting(ZZ& P1, + long& e1, + vec_ZZX& w1, + long p, + long new_bound, + const ZZX& f, + long doubling, + long verbose) +{ + long new_e1; + + if (doubling) + new_e1 = max(2*e1, new_bound); // at least double e1 + else + new_e1 = new_bound; + + if (verbose) { + cerr << ">>> additional hensel lifting to " << new_e1 << "...\n"; + } + + ZZ new_P1; + + power(new_P1, p, new_e1); + + ZZX f1; + ZZ t1, t2; + long i; + long n = deg(f); + + if (LeadCoeff(f) == 1) + f1 = f; + else if (LeadCoeff(f) == -1) + negate(f1, f); + else { + rem(t1, LeadCoeff(f), new_P1); + InvMod(t1, t1, new_P1); + f1.rep.SetLength(n+1); + for (i = 0; i <= n; i++) { + mul(t2, f.rep[i], t1); + rem(f1.rep[i], t2, new_P1); + } + } + + zz_pBak bak; + bak.save(); + + zz_p::init(p, NextPowerOfTwo(n)+1); + + long r = w1.length(); + + vec_zz_pX ww1; + ww1.SetLength(r); + for (i = 0; i < r; i++) + conv(ww1[i], w1[i]); + + w1.kill(); + + double tt0, tt1; + + tt0 = GetTime(); + + MultiLift(w1, ww1, f1, new_e1, verbose); + + tt1 = GetTime(); + + if (verbose) { + cerr << "lifting time: " << (tt1-tt0) << "\n\n"; + } + + P1 = new_P1; + e1 = new_e1; + + bak.restore(); +} + +static +void Compute_pb_eff(long& b_eff, ZZ& pb_eff, long p, long d, + const ZZ& root_bound, + long n, long ran_bits) +{ + ZZ t1, t2; + long i; + + if (root_bound == 1) + t1 = (to_ZZ(d)*to_ZZ(n)) << (ran_bits + 1); + else + t1 = (power(root_bound, d)*n) << (ran_bits + 2); + + i = 0; + t2 = 1; + + while (t2 <= t1) { + i++; + t2 *= p; + } + + b_eff = i; + pb_eff = t2; +} + + + +static +long d1_val(long bit_delta, long r, long s) +{ + return long( 0.30*double(r)*double(s)/double(bit_delta) ) + 1; +} + + + + +// Next comes van Hoeij's algorithm itself. +// Some notation that differs from van Hoeij's paper: +// n = deg(f) +// r = # modular factors +// s = dim(B_L) (gets smaller over time) +// d = # traces used +// d1 = number of "compressed" traces +// +// The algorithm starts with a "sparse" version of van Hoeij, so that +// at first the traces d = 1, 2, ... are used in conjunction with +// a d x d identity matrix for van Hoeij's matrix A. +// The number of "excess" bits used for each trace, bit_delta, is initially +// 2*r. +// +// When d*bit_delta exceeds 0.25*r*s, we switch to +// a "dense" mode, where we use only about 0.25*r*s "compressed" traces. +// These bounds follow from van Hoeij's heuristic estimates. +// +// In sparse mode, d and bit_delta increase exponentially (but gently). +// In dense mode, but d increases somewhat more aggressively, +// and bit_delta is increased more gently. + + +static +void FindTrueFactors_vH(vec_ZZX& factors, const ZZX& ff, + const vec_ZZX& w, const ZZ& P, + long p, long e, + LocalInfoT& LocalInfo, + long verbose, + long bnd) +{ + const long SkipSparse = 0; + + ZZ_pBak bak; + bak.save(); + ZZ_p::init(P); + + long r = w.length(); + + vec_ZZ_pX W; + W.SetLength(r); + + long i, j; + + for (i = 0; i < r; i++) + conv(W[i], w[i]); + + + ZZX f; + + f = ff; + + long k; + + k = 1; + factors.SetLength(0); + while (2*k <= W.length() && + (k <= van_hoeij_card_thresh || W.length() <= van_hoeij_size_thresh)) { + + if (k <= 1) + CardinalitySearch(factors, f, W, LocalInfo, k, bnd, verbose); + else + CardinalitySearch1(factors, f, W, LocalInfo, k, bnd, verbose); + k++; + } + + if (2*k > W.length()) { + // rest is irreducible, so we're done + + append(factors, f); + } + else { + + // now we apply van Hoeij's algorithm proper to f + + double time_start, time_stop, lll_time, tt0, tt1; + + time_start = GetTime(); + lll_time = 0; + + if (verbose) { + cerr << "\n\n*** starting knapsack procedure\n"; + } + + ZZ P1 = P; + long e1 = e; // invariant: P1 = p^{e1} + + r = W.length(); + + vec_ZZX w1; + w1.SetLength(r); + for (i = 0; i < r; i++) + conv(w1[i], W[i]); + + long n = deg(f); + + mat_ZZ B_L; // van Hoeij's lattice + ident(B_L, r); + + long d = 0; // number of traces + + long bit_delta = 0; // number of "excess" bits + + vec_long b; + vec_ZZ pb; // pb(i) = p^{b(i)} + + long delta = 0; + ZZ pdelta = to_ZZ(1); // pdelta = p^delta + pdelta = 1; + + vec_vec_ZZ trace_vec; + trace_vec.SetLength(r); + + vec_vec_ZZ chop_vec; + chop_vec.SetLength(r); + + ZZ root_bound = RootBound(f); + + if (verbose) { + cerr << "NumBits(root_bound) = " << NumBits(root_bound) << "\n"; + } + + long dense = 0; + long ran_bits = 32; + + long loop_cnt = 0; + + + long s = r; + + for (;;) { + + loop_cnt++; + + // if we are using the power hack, then we do not try too hard... + // this is really a hack on a hack! + + if (ok_to_abandon && + ((d >= 2 && s > 128) || (d >= 3 && s > 32) || (d >= 4 && s > 8) || + d >= 5) ) { + if (verbose) cerr << " abandoning\n"; + append(factors, f); + break; + } + + long d_last, d_inc, d_index; + + d_last = d; + + // set d_inc: + + if (!dense) { + d_inc = 1 + d/8; + } + else { + d_inc = 1 + d/4; + } + + d_inc = min(d_inc, n-1-d); + + d += d_inc; + + // set bit_delta: + + if (bit_delta == 0) { + // set initial value...don't make it any smaller than 2*r + + bit_delta = 2*r; + } + else { + long extra_bits; + + if (!dense) { + extra_bits = 1 + bit_delta/8; + } + else if (d_inc != 0) { + if (d1_val(bit_delta, r, s) > 1) + extra_bits = 1 + bit_delta/16; + else + extra_bits = 0; + } + else + extra_bits = 1 + bit_delta/8; + + bit_delta += extra_bits; + } + + if (d > d1_val(bit_delta, r, s)) + dense = 1; + + Compute_pdelta(delta, pdelta, p, bit_delta); + + long d1; + long b_eff; + ZZ pb_eff; + + if (!dense) { + for (d_index = d_last + 1; d_index <= d; d_index++) + Compute_pb(b, pb, p, d_index, root_bound, n); + + d1 = d; + b_eff = b(d); + pb_eff = pb(d); + } + else { + d1 = d1_val(bit_delta, r, s); + Compute_pb_eff(b_eff, pb_eff, p, d, root_bound, n, ran_bits); + } + + if (verbose) { + cerr << "*** d = " << d + << "; s = " << s + << "; delta = " << delta + << "; b_eff = " << b_eff; + + if (dense) cerr << "; dense [" << d1 << "]"; + cerr << "\n"; + } + + if (b_eff + delta > e1) { + long doubling; + + doubling = 1; + + AdditionalLifting(P1, e1, w1, p, b_eff + delta, f, + doubling, verbose); + + if (verbose) { + cerr << ">>> recomputing traces..."; + } + + tt0 = GetTime(); + + trace_vec.kill(); + trace_vec.SetLength(r); + + for (i = 0; i < r; i++) { + trace_vec[i].SetLength(d_last); + + for (d_index = 1; d_index <= d_last; d_index++) { + ComputeTrace(trace_vec[i], w1[i], d_index, P1); + } + } + + tt1 = GetTime(); + if (verbose) cerr << (tt1-tt0) << "\n"; + } + + if (verbose) cerr << " trace..."; + + tt0 = GetTime(); + + mat_ZZ A; + + if (dense) { + A.SetDims(d1, d); + for (i = 1; i <= d1; i++) + for (j = 1; j <= d; j++) { + RandomBits(A(i, j), ran_bits); + if (RandomBnd(2)) negate(A(i, j), A(i, j)); + } + } + + + for (i = 0; i < r; i++) { + trace_vec[i].SetLength(d); + for (d_index = d_last + 1; d_index <= d; d_index++) + ComputeTrace(trace_vec[i], w1[i], d_index, P1); + + chop_vec[i].SetLength(d1); + + if (!dense) + ChopTraces(chop_vec[i], trace_vec[i], d, pb, pdelta, + P1, LeadCoeff(f)); + else + DenseChopTraces(chop_vec[i], trace_vec[i], d, d1, pb_eff, + pdelta, P1, LeadCoeff(f), A); + } + + A.kill(); + + tt1 = GetTime(); + + if (verbose) cerr << (tt1-tt0) << "\n"; + + mat_ZZ M; + long C; + + if (verbose) cerr << " building matrix..."; + + tt0 = GetTime(); + + BuildReductionMatrix(M, C, r, d1, pdelta, chop_vec, B_L, verbose); + + tt1 = GetTime(); + + if (verbose) cerr << (tt1-tt0) << "\n"; + + if (SkipSparse) { + if (!dense) { + if (verbose) cerr << "skipping LLL\n"; + continue; + } + } + + if (verbose) cerr << " LLL..."; + + tt0 = GetTime(); + + vec_ZZ D; + long rnk = LLL_plus(D, M); + + tt1 = GetTime(); + + lll_time += (tt1-tt0); + + if (verbose) cerr << (tt1-tt0) << "\n"; + + if (rnk != s + d1) { + LogicError("van Hoeij -- bad rank"); + } + + mat_ZZ B1; + + if (verbose) cerr << " CutAway..."; + + tt0 = GetTime(); + + CutAway(B1, D, M, C, r, d1); + + tt1 = GetTime(); + + if (verbose) cerr << (tt1-tt0) << "\n"; + + if (B1.NumRows() >= s) continue; + // no progress...try again + + // otherwise, update B_L and test if we are done + + swap(B1, B_L); + B1.kill(); + s = B_L.NumRows(); + + if (s == 0) + LogicError("oops! s == 0 should not happen!"); + + if (s == 1) { + if (verbose) cerr << " irreducible!\n"; + append(factors, f); + break; + } + + if (s > r / (van_hoeij_card_thresh + 1)) continue; + // dimension too high...we can't be done + + if (GotThem(factors, B_L, W, f, bnd, verbose)) break; + } + + time_stop = GetTime(); + + if (verbose) { + cerr << "*** knapsack finished: total time = " + << (time_stop - time_start) << "; LLL time = " + << lll_time << "\n"; + } + } + + bak.restore(); +} + + +static +void ll_SFFactor(vec_ZZX& factors, const ZZX& ff, + long verbose, + long bnd) + +// input is primitive and square-free, with positive leading +// coefficient +{ + if (deg(ff) <= 1) { + factors.SetLength(1); + factors[0] = ff; + if (verbose) { + cerr << "*** SFFactor, trivial case 1.\n"; + } + return; + } + + // remove a factor of X, if necessary + + ZZX f; + long xfac; + long rev; + + double t; + + if (IsZero(ConstTerm(ff))) { + RightShift(f, ff, 1); + xfac = 1; + } + else { + f = ff; + xfac = 0; + } + + // return a factor of X-1 if necessary + + long x1fac = 0; + + ZZ c1; + SumCoeffs(c1, f); + + if (c1 == 0) { + x1fac = 1; + div(f, f, ZZX(1,1) - 1); + } + + SumCoeffs(c1, f); + + if (deg(f) <= 1) { + long r = 0; + factors.SetLength(0); + if (deg(f) > 0) { + factors.SetLength(r+1); + factors[r] = f; + r++; + } + if (xfac) { + factors.SetLength(r+1); + SetX(factors[r]); + r++; + } + + if (x1fac) { + factors.SetLength(r+1); + factors[r] = ZZX(1,1) - 1; + r++; + } + + if (verbose) { + cerr << "*** SFFactor: trivial case 2.\n"; + } + + return; + } + + if (verbose) { + cerr << "*** start SFFactor.\n"; + } + + // reverse f if this makes lead coefficient smaller + + ZZ t1, t2; + + abs(t1, LeadCoeff(f)); + abs(t2, ConstTerm(f)); + + if (t1 > t2) { + inplace_rev(f); + rev = 1; + } + else + rev = 0; + + // obtain factorization modulo small primes + + if (verbose) { + cerr << "factorization modulo small primes...\n"; + t = GetTime(); + } + + LocalInfoT LocalInfo; + + zz_pBak bak; + bak.save(); + + UniquePtr spfactors( SmallPrimeFactorization(LocalInfo, f, verbose) ); + + if (!spfactors) { + // f was found to be irreducible + + bak.restore(); + + if (verbose) { + t = GetTime()-t; + cerr << "small prime time: " << t << ", irreducible.\n"; + } + + if (rev) + inplace_rev(f); + + long r = 0; + + factors.SetLength(r+1); + factors[r] = f; + r++; + + if (xfac) { + factors.SetLength(r+1); + SetX(factors[r]); + r++; + } + + if (x1fac) { + factors.SetLength(r+1); + factors[r] = ZZX(1,1) - 1; + r++; + } + + return; + } + + if (verbose) { + t = GetTime()-t; + cerr << "small prime time: "; + cerr << t << ", number of factors = " << spfactors->length() << "\n"; + } + + // prepare for Hensel lifting + + // first, calculate bit bound + + long bnd1; + long n = deg(f); + long i; + long e; + ZZ P; + long p; + + bnd1 = MaxBits(f) + (NumBits(n+1)+1)/2; + + if (!bnd || bnd1 < bnd) + bnd = bnd1; + + i = n/2; + while (!bit(LocalInfo.PossibleDegrees, i)) + i--; + + long lc_bnd = NumBits(LeadCoeff(f)); + + long coeff_bnd = bnd + lc_bnd + i; + + long lift_bnd; + + lift_bnd = coeff_bnd + 15; + // +15 helps avoid trial divisions...can be any number >= 0 + + lift_bnd = max(lift_bnd, bnd + lc_bnd + 2*NumBits(n) + ZZX_OVERLIFT); + // facilitates "n-1" and "n-2" tests + + lift_bnd = max(lift_bnd, lc_bnd + NumBits(c1)); + // facilitates f(1) test + + lift_bnd += 2; + // +2 needed to get inequalities right + + + p = zz_p::modulus(); + + e = long(double(lift_bnd)/(log(double(p))/log(double(2)))); + power(P, p, e); + + while (NumBits(P) <= lift_bnd) { + mul(P, P, p); + e++; + } + + if (verbose) { + cerr << "lifting bound = " << lift_bnd << " bits.\n"; + cerr << "Hensel lifting to exponent " << e << "...\n"; + t = GetTime(); + } + + // third, compute f1 so that it is monic and equal to f mod P + + ZZX f1; + + if (LeadCoeff(f) == 1) + f1 = f; + else if (LeadCoeff(f) == -1) + negate(f1, f); + else { + rem(t1, LeadCoeff(f), P); + if (sign(P) < 0) + LogicError("whoops!!!"); + InvMod(t1, t1, P); + f1.rep.SetLength(n+1); + for (i = 0; i <= n; i++) { + mul(t2, f.rep[i], t1); + rem(f1.rep[i], t2, P); + } + } + + + // Do Hensel lift + + vec_ZZX w; + + MultiLift(w, *spfactors, f1, e, verbose); + + + if (verbose) { + t = GetTime()-t; + cerr << "\nlifting time: "; + cerr << t << "\n\n"; + } + + // We're done with zz_p...restore + + spfactors.reset(); + bak.restore(); + + // search for true factors + + if (verbose) { + cerr << "searching for true factors...\n"; + t = GetTime(); + } + + if (ZZXFac_van_Hoeij && w.length() > van_hoeij_size_thresh) + FindTrueFactors_vH(factors, f, w, P, p, e, + LocalInfo, verbose, coeff_bnd); + else + FindTrueFactors(factors, f, w, P, LocalInfo, verbose, coeff_bnd); + + if (verbose) { + t = GetTime()-t; + cerr << "factor search time " << t << "\n"; + } + + long r = factors.length(); + + if (rev) { + for (i = 0; i < r; i++) { + inplace_rev(factors[i]); + if (sign(LeadCoeff(factors[i])) < 0) + negate(factors[i], factors[i]); + } + } + + if (xfac) { + factors.SetLength(r+1); + SetX(factors[r]); + r++; + } + + if (x1fac) { + factors.SetLength(r+1); + factors[r] = ZZX(1,1)-1; + r++; + } + + // that's it!! + + if (verbose) { + cerr << "*** end SFFactor. degree sequence:\n"; + for (i = 0; i < r; i++) + cerr << deg(factors[i]) << " "; + cerr << "\n"; + } +} + + + +static +long DeflationFactor(const ZZX& f) +{ + long n = deg(f); + long m = 0; + long i; + + for (i = 1; i <= n && m != 1; i++) { + if (f.rep[i] != 0) + m = GCD(m, i); + } + + return m; +} + +static +void inflate(ZZX& g, const ZZX& f, long m) +// input may not alias output +{ + long n = deg(f); + long i; + + g = 0; + for (i = n; i >= 0; i--) + SetCoeff(g, i*m, f.rep[i]); +} + +static +void deflate(ZZX& g, const ZZX& f, long m) +// input may not alias output +{ + long n = deg(f); + long i; + + g = 0; + for (i = n; i >= 0; i -= m) + SetCoeff(g, i/m, f.rep[i]); +} + +static +void MakeFacList(vec_long& v, long m) +{ + if (m <= 0) LogicError("internal error: MakeFacList"); + + v.SetLength(0); + + long p = 2; + while (m > 1) { + while (m % p == 0) { + append(v, p); + m = m / p; + } + + p++; + } +} + +NTL_CHEAP_THREAD_LOCAL long ZZXFac_PowerHack = 1; + +void SFFactor(vec_ZZX& factors, const ZZX& ff, + long verbose, + long bnd) + +// input is primitive and square-free, with positive leading +// coefficient + +{ + if (ff == 0) + LogicError("SFFactor: bad args"); + + if (deg(ff) <= 0) { + factors.SetLength(0); + return; + } + + + if (!ZZXFac_PowerHack) { + ok_to_abandon = 0; + ll_SFFactor(factors, ff, verbose, bnd); + return; + } + + long m = DeflationFactor(ff); + + if (m == 1) { + if (verbose) { + cerr << "SFFactor -- no deflation\n"; + } + + ok_to_abandon = 0; + ll_SFFactor(factors, ff, verbose, bnd); + return; + } + + + vec_long v; + MakeFacList(v, m); + long l = v.length(); + + if (verbose) { + cerr << "SFFactor -- deflation: " << v << "\n"; + } + + vec_ZZX res; + res.SetLength(1); + deflate(res[0], ff, m); + + long done; + long j, k; + + done = 0; + k = l-1; + + while (!done) { + vec_ZZX res1; + res1.SetLength(0); + for (j = 0; j < res.length(); j++) { + vec_ZZX res2; + double t; + if (verbose) { + cerr << "begin - step " << k << ", " << j << "; deg = " + << deg(res[j]) << "\n"; + t = GetTime(); + } + + if (k < 0) + ok_to_abandon = 0; + else + ok_to_abandon = 1; + + ll_SFFactor(res2, res[j], verbose, k < 0 ? bnd : 0); + + if (verbose) { + t = GetTime()-t; + cerr << "end - step " << k << ", " << j << "; time = " + << t << "\n\n"; + } + + append(res1, res2); + } + + if (k < 0) { + done = 1; + swap(res, res1); + } + else { + vec_ZZX res2; + res2.SetLength(res1.length()); + for (j = 0; j < res1.length(); j++) + inflate(res2[j], res1[j], v[k]); + k--; + swap(res, res2); + } + } + + factors = res; +} + + + + + +void factor(ZZ& c, + vec_pair_ZZX_long& factors, + const ZZX& f, + long verbose, + long bnd) + +{ + ZZX ff = f; + + if (deg(ff) <= 0) { + c = ConstTerm(ff); + factors.SetLength(0); + return; + } + + content(c, ff); + divide(ff, ff, c); + + long bnd1 = MaxBits(ff) + (NumBits(deg(ff)+1)+1)/2; + if (!bnd || bnd > bnd1) + bnd = bnd1; + + vec_pair_ZZX_long sfd; + + double t; + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, ff); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + vec_ZZX x; + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + t = GetTime(); + } + + SFFactor(x, sfd[i].a, verbose, bnd); + + if (verbose) { + t = GetTime()-t; + cerr << "total time for multiplicity " + << sfd[i].b << ": " << t << "\n"; + } + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ_p.c b/thirdparty/linux/ntl/src/ZZ_p.c new file mode 100644 index 0000000000..8069ec34e6 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_p.c @@ -0,0 +1,365 @@ + + +#include +#include + +#include + + +NTL_START_IMPL + + + +NTL_TLS_GLOBAL_DECL(SmartPtr, ZZ_pInfo_stg) +NTL_TLS_GLOBAL_DECL(SmartPtr, ZZ_pTmpSpace_stg) + +NTL_CHEAP_THREAD_LOCAL ZZ_pInfoT *ZZ_pInfo = 0; +NTL_CHEAP_THREAD_LOCAL ZZ_pTmpSpaceT *ZZ_pTmpSpace = 0; +NTL_CHEAP_THREAD_LOCAL bool ZZ_pInstalled = false; + + + +ZZ_pInfoT::ZZ_pInfoT(const ZZ& NewP) +{ + if (NewP <= 1) LogicError("ZZ_pContext: p must be > 1"); + + p = NewP; + size = p.size(); + + ExtendedModulusSize = 2*size + + (NTL_BITS_PER_LONG + NTL_ZZ_NBITS - 1)/NTL_ZZ_NBITS; + +} + + + +// we use a lazy strategy for initializing and installing +// FFTInfo and TmpSpace related to a ZZ_p modulus. +// The routines GetFFTInfo and GetTmpSpace make sure this process +// is complete. + +void ZZ_p::DoInstall() +{ + SmartPtr tmps; + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(ZZ_pInfo->FFTInfo); + if (!builder()) break; + + UniquePtr FFTInfo; + FFTInfo.make(); + + ZZ B, M, M1, M2, M3; + long n, i; + long q, t; + mulmod_t qinv; + + sqr(B, ZZ_pInfo->p); + + LeftShift(B, B, NTL_FFTMaxRoot+NTL_FFTFudge); + + // FIXME: the following is quadratic time...would + // be nice to get a faster solution... + // One could estimate the # of primes by summing logs, + // then multiply using a tree-based multiply, then + // adjust up or down... + + // Assuming IEEE floating point, the worst case estimate + // for error guarantees a correct answer +/- 1 for + // numprimes up to 2^25...for sure we won't be + // using that many primes...we can certainly put in + // a sanity check, though. + + // If I want a more accuaruate summation (with using Kahan, + // which has some portability issues), I could represent + // numbers as x = a + f, where a is integer and f is the fractional + // part. Summing in this representation introduces an *absolute* + // error of 2 epsilon n, which is just as good as Kahan + // for this application. + + // same strategy could also be used in the ZZX HomMul routine, + // if we ever want to make that subquadratic + + set(M); + n = 0; + while (M <= B) { + UseFFTPrime(n); + q = GetFFTPrime(n); + n++; + mul(M, M, q); + } + + FFTInfo->NumPrimes = n; + FFTInfo->MaxRoot = CalcMaxRoot(q); + + + double fn = double(n); + + // NOTE: these next two range checks are somewhat academic, + // but various bits of code in the ZZ_pX implementation + // implicitly rely on them + + if (8.0*fn*(fn+48) > NTL_FDOUBLE_PRECISION) + ResourceError("modulus too big"); + + if (n >= NTL_SP_BOUND) + ResourceError("modulus too big"); + + + + FFTInfo->rem_struct.init(n, ZZ_pInfo->p, GetFFTPrime); + FFTInfo->crt_struct.init(n, ZZ_pInfo->p, GetFFTPrime); + + if (!FFTInfo->crt_struct.special()) { + FFTInfo->prime.SetLength(n); + FFTInfo->prime_recip.SetLength(n); + FFTInfo->u.SetLength(n); + FFTInfo->uqinv.SetLength(n); + + // montgomery + FFTInfo->reduce_struct.init(ZZ_pInfo->p, ZZ(n) << NTL_SP_NBITS); + + ZZ qq, rr; + + DivRem(qq, rr, M, ZZ_pInfo->p); + + NegateMod(FFTInfo->MinusMModP, rr, ZZ_pInfo->p); + + // montgomery + FFTInfo->reduce_struct.adjust(FFTInfo->MinusMModP); + + for (i = 0; i < n; i++) { + q = GetFFTPrime(i); + qinv = GetFFTPrimeInv(i); + + long tt = rem(qq, q); + + mul(M2, ZZ_pInfo->p, tt); + add(M2, M2, rr); + div(M2, M2, q); // = (M/q) rem p + + + div(M1, M, q); + t = rem(M1, q); + t = InvMod(t, q); + + // montgomery + FFTInfo->reduce_struct.adjust(M2); + + FFTInfo->crt_struct.insert(i, M2); + + FFTInfo->prime[i] = q; + FFTInfo->prime_recip[i] = 1/double(q); + FFTInfo->u[i] = t; + FFTInfo->uqinv[i] = PrepMulModPrecon(FFTInfo->u[i], q, qinv); + } + + } + + tmps = MakeSmart(); + tmps->crt_tmp_vec.fetch(FFTInfo->crt_struct); + tmps->rem_tmp_vec.fetch(FFTInfo->rem_struct); + + builder.move(FFTInfo); + } while (0); + + if (!tmps) { + const ZZ_pFFTInfoT *FFTInfo = ZZ_pInfo->FFTInfo.get(); + tmps = MakeSmart(); + tmps->crt_tmp_vec.fetch(FFTInfo->crt_struct); + tmps->rem_tmp_vec.fetch(FFTInfo->rem_struct); + } + + NTL_TLS_GLOBAL_ACCESS(ZZ_pTmpSpace_stg); + ZZ_pTmpSpace_stg = tmps; + ZZ_pTmpSpace = ZZ_pTmpSpace_stg.get(); +} + + + + +void ZZ_p::init(const ZZ& p) +{ + ZZ_pContext c(p); + c.restore(); +} + + +void ZZ_pContext::save() +{ + NTL_TLS_GLOBAL_ACCESS(ZZ_pInfo_stg); + ptr = ZZ_pInfo_stg; +} + + +void ZZ_pContext::restore() const +{ + if (ZZ_pInfo == ptr.get()) return; + // NOTE: this simple optimization could be useful in some situations, + // for example, a worker thread re-setting the current modulus + // in a multi-threaded build + + NTL_TLS_GLOBAL_ACCESS(ZZ_pInfo_stg); + ZZ_pInfo_stg = ptr; + ZZ_pInfo = ZZ_pInfo_stg.get(); + + NTL_TLS_GLOBAL_ACCESS(ZZ_pTmpSpace_stg); + ZZ_pTmpSpace_stg = 0; + ZZ_pTmpSpace = 0; + + ZZ_pInstalled = false; +} + + + +ZZ_pBak::~ZZ_pBak() +{ + if (MustRestore) c.restore(); +} + +void ZZ_pBak::save() +{ + c.save(); + MustRestore = true; +} + + +void ZZ_pBak::restore() +{ + c.restore(); + MustRestore = false; +} + + +const ZZ_p& ZZ_p::zero() +{ + static const ZZ_p z(INIT_NO_ALLOC); // GLOBAL (assumes C++11 thread-safe init) + return z; +} + +NTL_CHEAP_THREAD_LOCAL +ZZ_p::DivHandlerPtr ZZ_p::DivHandler = 0; + + + +ZZ_p::ZZ_p(INIT_VAL_TYPE, const ZZ& a) // NO_ALLOC +{ + conv(*this, a); +} + +ZZ_p::ZZ_p(INIT_VAL_TYPE, long a) // NO_ALLOC +{ + conv(*this, a); +} + + +void conv(ZZ_p& x, long a) +{ + if (a == 0) + clear(x); + else if (a == 1) + set(x); + else { + NTL_ZZRegister(y); + + conv(y, a); + conv(x, y); + } +} + +istream& operator>>(istream& s, ZZ_p& x) +{ + NTL_ZZRegister(y); + + NTL_INPUT_CHECK_RET(s, s >> y); + conv(x, y); + + return s; +} + +void div(ZZ_p& x, const ZZ_p& a, const ZZ_p& b) +{ + NTL_ZZ_pRegister(T); + + inv(T, b); + mul(x, a, T); +} + +void inv(ZZ_p& x, const ZZ_p& a) +{ + NTL_ZZRegister(T); + + if (InvModStatus(T, a._ZZ_p__rep, ZZ_p::modulus())) { + if (!IsZero(a._ZZ_p__rep) && ZZ_p::DivHandler) + (*ZZ_p::DivHandler)(a); + + InvModError("ZZ_p: division by non-invertible element", + a._ZZ_p__rep, ZZ_p::modulus()); + } + + x._ZZ_p__rep = T; +} + +long operator==(const ZZ_p& a, long b) +{ + if (b == 0) + return IsZero(a); + + if (b == 1) + return IsOne(a); + + NTL_ZZ_pRegister(T); + conv(T, b); + return a == T; +} + + + +void add(ZZ_p& x, const ZZ_p& a, long b) +{ + NTL_ZZ_pRegister(T); + conv(T, b); + add(x, a, T); +} + +void sub(ZZ_p& x, const ZZ_p& a, long b) +{ + NTL_ZZ_pRegister(T); + conv(T, b); + sub(x, a, T); +} + +void sub(ZZ_p& x, long a, const ZZ_p& b) +{ + NTL_ZZ_pRegister(T); + conv(T, a); + sub(x, T, b); +} + +void mul(ZZ_p& x, const ZZ_p& a, long b) +{ + NTL_ZZ_pRegister(T); + conv(T, b); + mul(x, a, T); +} + +void div(ZZ_p& x, const ZZ_p& a, long b) +{ + NTL_ZZ_pRegister(T); + conv(T, b); + div(x, a, T); +} + +void div(ZZ_p& x, long a, const ZZ_p& b) +{ + if (a == 1) { + inv(x, b); + } + else { + NTL_ZZ_pRegister(T); + conv(T, a); + div(x, T, b); + } +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ_pE.c b/thirdparty/linux/ntl/src/ZZ_pE.c new file mode 100644 index 0000000000..986dc7bbd5 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_pE.c @@ -0,0 +1,147 @@ + + +#include + +#include + +NTL_START_IMPL + + +NTL_TLS_GLOBAL_DECL(SmartPtr, ZZ_pEInfo_stg) + +NTL_CHEAP_THREAD_LOCAL +ZZ_pEInfoT *ZZ_pEInfo = 0; + + +ZZ_pEInfoT::ZZ_pEInfoT(const ZZ_pX& NewP) +{ + build(p, NewP); + + _card_base = ZZ_p::modulus(); + _card_exp = deg(NewP); +} + +const ZZ& ZZ_pE::cardinality() +{ + if (!ZZ_pEInfo) LogicError("ZZ_pE::cardinality: undefined modulus"); + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(ZZ_pEInfo->_card); + if (!builder()) break; + UniquePtr p; + p.make(); + power(*p, ZZ_pEInfo->_card_base, ZZ_pEInfo->_card_exp); + builder.move(p); + } while (0); + + return *ZZ_pEInfo->_card; +} + + + + +void ZZ_pE::init(const ZZ_pX& p) +{ + ZZ_pEContext c(p); + c.restore(); +} + + +void ZZ_pEContext::save() +{ + NTL_TLS_GLOBAL_ACCESS(ZZ_pEInfo_stg); + ptr = ZZ_pEInfo_stg; +} + +void ZZ_pEContext::restore() const +{ + NTL_TLS_GLOBAL_ACCESS(ZZ_pEInfo_stg); + ZZ_pEInfo_stg = ptr; + ZZ_pEInfo = ZZ_pEInfo_stg.get(); +} + + +ZZ_pEBak::~ZZ_pEBak() +{ + if (MustRestore) c.restore(); +} + +void ZZ_pEBak::save() +{ + c.save(); + MustRestore = true; +} + + +void ZZ_pEBak::restore() +{ + c.restore(); + MustRestore = false; +} + + +const ZZ_pE& ZZ_pE::zero() +{ + static const ZZ_pE z(INIT_NO_ALLOC); // GLOBAL (assumes C++11 thread-safe init) + return z; +} + + + + +istream& operator>>(istream& s, ZZ_pE& x) +{ + ZZ_pX y; + + NTL_INPUT_CHECK_RET(s, s >> y); + conv(x, y); + + return s; +} + +void div(ZZ_pE& x, const ZZ_pE& a, const ZZ_pE& b) +{ + ZZ_pE t; + + inv(t, b); + mul(x, a, t); +} + +void div(ZZ_pE& x, const ZZ_pE& a, long b) +{ + NTL_ZZ_pRegister(B); + B = b; + inv(B, B); + mul(x, a, B); +} + +void div(ZZ_pE& x, const ZZ_pE& a, const ZZ_p& b) +{ + NTL_ZZ_pRegister(B); + B = b; + inv(B, B); + mul(x, a, B); +} + +void div(ZZ_pE& x, long a, const ZZ_pE& b) +{ + ZZ_pE t; + inv(t, b); + mul(x, a, t); +} + +void div(ZZ_pE& x, const ZZ_p& a, const ZZ_pE& b) +{ + ZZ_pE t; + inv(t, b); + mul(x, a, t); +} + + + +void inv(ZZ_pE& x, const ZZ_pE& a) +{ + InvMod(x._ZZ_pE__rep, a._ZZ_pE__rep, ZZ_pE::modulus()); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ_pEX.c b/thirdparty/linux/ntl/src/ZZ_pEX.c new file mode 100644 index 0000000000..082dc971f8 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_pEX.c @@ -0,0 +1,3444 @@ + + + +#include +#include +#include + +#include + +NTL_START_IMPL + + +const ZZ_pEX& ZZ_pEX::zero() +{ + static const ZZ_pEX z; // GLOBAL (assumes C++11 thread-safe init) + return z; +} + + +istream& operator>>(istream& s, ZZ_pEX& x) +{ + NTL_INPUT_CHECK_RET(s, s >> x.rep); + x.normalize(); + return s; +} + +ostream& operator<<(ostream& s, const ZZ_pEX& a) +{ + return s << a.rep; +} + + +void ZZ_pEX::normalize() +{ + long n; + const ZZ_pE* p; + + n = rep.length(); + if (n == 0) return; + p = rep.elts() + n; + while (n > 0 && IsZero(*--p)) { + n--; + } + rep.SetLength(n); +} + + +long IsZero(const ZZ_pEX& a) +{ + return a.rep.length() == 0; +} + + +long IsOne(const ZZ_pEX& a) +{ + return a.rep.length() == 1 && IsOne(a.rep[0]); +} + +long operator==(const ZZ_pEX& a, long b) +{ + if (b == 0) + return IsZero(a); + + if (b == 1) + return IsOne(a); + + long da = deg(a); + + if (da > 0) return 0; + + NTL_ZZ_pRegister(bb); + bb = b; + + if (da < 0) + return IsZero(bb); + + return a.rep[0] == bb; +} + +long operator==(const ZZ_pEX& a, const ZZ_p& b) +{ + if (IsZero(b)) + return IsZero(a); + + long da = deg(a); + + if (da != 0) + return 0; + + return a.rep[0] == b; +} + +long operator==(const ZZ_pEX& a, const ZZ_pE& b) +{ + if (IsZero(b)) + return IsZero(a); + + long da = deg(a); + + if (da != 0) + return 0; + + return a.rep[0] == b; +} + + + + +void SetCoeff(ZZ_pEX& x, long i, const ZZ_pE& a) +{ + long j, m; + + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + + if (i > m && IsZero(a)) return; + + if (i > m) { + /* careful: a may alias a coefficient of x */ + + long alloc = x.rep.allocated(); + + if (alloc > 0 && i >= alloc) { + ZZ_pE aa = a; + x.rep.SetLength(i+1); + x.rep[i] = aa; + } + else { + x.rep.SetLength(i+1); + x.rep[i] = a; + } + + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + else + x.rep[i] = a; + + x.normalize(); +} + +void SetCoeff(ZZ_pEX& x, long i, const ZZ_p& aa) +{ + long j, m; + + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + NTL_ZZ_pRegister(a); // watch out for aliases! + a = aa; + + m = deg(x); + + if (i > m && IsZero(a)) return; + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + x.rep[i] = a; + x.normalize(); +} + +void SetCoeff(ZZ_pEX& x, long i, long a) +{ + if (a == 1) + SetCoeff(x, i); + else { + NTL_ZZ_pRegister(T); + T = a; + SetCoeff(x, i, T); + } +} + + + +void SetCoeff(ZZ_pEX& x, long i) +{ + long j, m; + + if (i < 0) + LogicError("coefficient index out of range"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + set(x.rep[i]); + x.normalize(); +} + + +void SetX(ZZ_pEX& x) +{ + clear(x); + SetCoeff(x, 1); +} + + +long IsX(const ZZ_pEX& a) +{ + return deg(a) == 1 && IsOne(LeadCoeff(a)) && IsZero(ConstTerm(a)); +} + + + +const ZZ_pE& coeff(const ZZ_pEX& a, long i) +{ + if (i < 0 || i > deg(a)) + return ZZ_pE::zero(); + else + return a.rep[i]; +} + + +const ZZ_pE& LeadCoeff(const ZZ_pEX& a) +{ + if (IsZero(a)) + return ZZ_pE::zero(); + else + return a.rep[deg(a)]; +} + +const ZZ_pE& ConstTerm(const ZZ_pEX& a) +{ + if (IsZero(a)) + return ZZ_pE::zero(); + else + return a.rep[0]; +} + + + +void conv(ZZ_pEX& x, const ZZ_pE& a) +{ + if (IsZero(a)) + x.rep.SetLength(0); + else { + x.rep.SetLength(1); + x.rep[0] = a; + } +} + +void conv(ZZ_pEX& x, long a) +{ + if (a == 0) + clear(x); + else if (a == 1) + set(x); + else { + NTL_ZZ_pRegister(T); + T = a; + conv(x, T); + } +} + +void conv(ZZ_pEX& x, const ZZ& a) +{ + NTL_ZZ_pRegister(T); + conv(T, a); + conv(x, T); +} + +void conv(ZZ_pEX& x, const ZZ_p& a) +{ + if (IsZero(a)) + clear(x); + else if (IsOne(a)) + set(x); + else { + x.rep.SetLength(1); + conv(x.rep[0], a); + x.normalize(); + } +} + +void conv(ZZ_pEX& x, const ZZ_pX& aa) +{ + ZZ_pX a = aa; // in case a aliases the rep of a coefficient of x + + long n = deg(a)+1; + long i; + + x.rep.SetLength(n); + for (i = 0; i < n; i++) + conv(x.rep[i], coeff(a, i)); +} + + +void conv(ZZ_pEX& x, const vec_ZZ_pE& a) +{ + x.rep = a; + x.normalize(); +} + + + +/* additional legacy conversions for v6 conversion regime */ + +void conv(ZZ_pEX& x, const ZZX& a) +{ + long n = a.rep.length(); + long i; + + x.rep.SetLength(n); + for (i = 0; i < n; i++) + conv(x.rep[i], a.rep[i]); + + x.normalize(); +} + + +/* ------------------------------------- */ + + +void add(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const ZZ_pE *ap, *bp; + ZZ_pE* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + add(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab && &x != &b) + for (i = db-minab; i; i--, xp++, bp++) + *xp = *bp; + else + x.normalize(); +} + + +void add(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pE& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (&x == &a) { + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ_pE *xp = x.rep.elts(); + add(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + +void add(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_p& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (&x == &a) { + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ_pE *xp = x.rep.elts(); + add(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + + +void add(ZZ_pEX& x, const ZZ_pEX& a, long b) +{ + if (a.rep.length() == 0) { + conv(x, b); + } + else { + if (&x != &a) x = a; + add(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + + +void sub(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const ZZ_pE *ap, *bp; + ZZ_pE* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + sub(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab) + for (i = db-minab; i; i--, xp++, bp++) + negate(*xp, *bp); + else + x.normalize(); +} + + +void sub(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pE& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + negate(x, x); + } + else if (&x == &a) { + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ_pE *xp = x.rep.elts(); + sub(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + +void sub(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_p& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + negate(x, x); + } + else if (&x == &a) { + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ_pE *xp = x.rep.elts(); + sub(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + + +void sub(ZZ_pEX& x, const ZZ_pEX& a, long b) +{ + if (a.rep.length() == 0) { + conv(x, b); + negate(x, x); + } + else { + if (&x != &a) x = a; + sub(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + +void sub(ZZ_pEX& x, const ZZ_pE& b, const ZZ_pEX& a) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (x.rep.MaxLength() == 0) { + negate(x, a); + add(x.rep[0], x.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ_pE *xp = x.rep.elts(); + sub(xp[0], b, a.rep[0]); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + negate(xp[i], ap[i]); + x.normalize(); + } +} + + +void sub(ZZ_pEX& x, const ZZ_p& a, const ZZ_pEX& b) +{ + NTL_ZZ_pRegister(T); // avoids aliasing problems + T = a; + negate(x, b); + add(x, x, T); +} + +void sub(ZZ_pEX& x, long a, const ZZ_pEX& b) +{ + NTL_ZZ_pRegister(T); + T = a; + negate(x, b); + add(x, x, T); +} + +void mul(ZZ_pEX& c, const ZZ_pEX& a, const ZZ_pEX& b) +{ + if (&a == &b) { + sqr(c, a); + return; + } + + if (IsZero(a) || IsZero(b)) { + clear(c); + return; + } + + if (deg(a) == 0) { + mul(c, b, ConstTerm(a)); + return; + } + + if (deg(b) == 0) { + mul(c, a, ConstTerm(b)); + return; + } + + // general case...Kronecker subst + + ZZ_pX A, B, C; + + long da = deg(a); + long db = deg(b); + + long n = ZZ_pE::degree(); + long n2 = 2*n-1; + + if (NTL_OVERFLOW(da+db+1, n2, 0)) + ResourceError("overflow in ZZ_pEX mul"); + + long i, j; + + A.rep.SetLength((da+1)*n2); + + for (i = 0; i <= da; i++) { + const ZZ_pX& coeff = rep(a.rep[i]); + long dcoeff = deg(coeff); + for (j = 0; j <= dcoeff; j++) + A.rep[n2*i + j] = coeff.rep[j]; + } + + A.normalize(); + + B.rep.SetLength((db+1)*n2); + + for (i = 0; i <= db; i++) { + const ZZ_pX& coeff = rep(b.rep[i]); + long dcoeff = deg(coeff); + for (j = 0; j <= dcoeff; j++) + B.rep[n2*i + j] = coeff.rep[j]; + } + + B.normalize(); + + mul(C, A, B); + + long Clen = C.rep.length(); + long lc = (Clen + n2 - 1)/n2; + long dc = lc - 1; + + c.rep.SetLength(dc+1); + + ZZ_pX tmp; + + for (i = 0; i <= dc; i++) { + tmp.rep.SetLength(n2); + for (j = 0; j < n2 && n2*i + j < Clen; j++) + tmp.rep[j] = C.rep[n2*i + j]; + for (; j < n2; j++) + clear(tmp.rep[j]); + tmp.normalize(); + conv(c.rep[i], tmp); + } + + c.normalize(); +} + + +void mul(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pE& b) +{ + if (IsZero(b)) { + clear(x); + return; + } + + ZZ_pE t; + t = b; + + long i, da; + + const ZZ_pE *ap; + ZZ_pE* xp; + + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + for (i = 0; i <= da; i++) + mul(xp[i], ap[i], t); + + x.normalize(); +} + + + +void mul(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_p& b) +{ + if (IsZero(b)) { + clear(x); + return; + } + + NTL_ZZ_pRegister(t); + t = b; + + long i, da; + + const ZZ_pE *ap; + ZZ_pE* xp; + + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + for (i = 0; i <= da; i++) + mul(xp[i], ap[i], t); + + x.normalize(); +} + + +void mul(ZZ_pEX& x, const ZZ_pEX& a, long b) +{ + NTL_ZZ_pRegister(t); + t = b; + mul(x, a, t); +} + +void sqr(ZZ_pEX& c, const ZZ_pEX& a) +{ + if (IsZero(a)) { + clear(c); + return; + } + + if (deg(a) == 0) { + ZZ_pE res; + sqr(res, ConstTerm(a)); + conv(c, res); + return; + } + + // general case...Kronecker subst + + ZZ_pX A, C; + + long da = deg(a); + + long n = ZZ_pE::degree(); + long n2 = 2*n-1; + + if (NTL_OVERFLOW(2*da+1, n2, 0)) + ResourceError("overflow in ZZ_pEX sqr"); + + long i, j; + + A.rep.SetLength((da+1)*n2); + + for (i = 0; i <= da; i++) { + const ZZ_pX& coeff = rep(a.rep[i]); + long dcoeff = deg(coeff); + for (j = 0; j <= dcoeff; j++) + A.rep[n2*i + j] = coeff.rep[j]; + } + + A.normalize(); + + sqr(C, A); + + long Clen = C.rep.length(); + long lc = (Clen + n2 - 1)/n2; + long dc = lc - 1; + + c.rep.SetLength(dc+1); + + ZZ_pX tmp; + + for (i = 0; i <= dc; i++) { + tmp.rep.SetLength(n2); + for (j = 0; j < n2 && n2*i + j < Clen; j++) + tmp.rep[j] = C.rep[n2*i + j]; + for (; j < n2; j++) + clear(tmp.rep[j]); + tmp.normalize(); + conv(c.rep[i], tmp); + } + + + c.normalize(); +} + + +void MulTrunc(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, long n) +{ + if (n < 0) LogicError("MulTrunc: bad args"); + + ZZ_pEX t; + mul(t, a, b); + trunc(x, t, n); +} + +void SqrTrunc(ZZ_pEX& x, const ZZ_pEX& a, long n) +{ + if (n < 0) LogicError("SqrTrunc: bad args"); + + ZZ_pEX t; + sqr(t, a); + trunc(x, t, n); +} + + +void CopyReverse(ZZ_pEX& x, const ZZ_pEX& a, long hi) + + // x[0..hi] = reverse(a[0..hi]), with zero fill + // input may not alias output + +{ + long i, j, n, m; + + n = hi+1; + m = a.rep.length(); + + x.rep.SetLength(n); + + const ZZ_pE* ap = a.rep.elts(); + ZZ_pE* xp = x.rep.elts(); + + for (i = 0; i < n; i++) { + j = hi-i; + if (j < 0 || j >= m) + clear(xp[i]); + else + xp[i] = ap[j]; + } + + x.normalize(); +} + + +void trunc(ZZ_pEX& x, const ZZ_pEX& a, long m) + +// x = a % X^m, output may alias input + +{ + if (m < 0) LogicError("trunc: bad args"); + + if (&x == &a) { + if (x.rep.length() > m) { + x.rep.SetLength(m); + x.normalize(); + } + } + else { + long n; + long i; + ZZ_pE* xp; + const ZZ_pE* ap; + + n = min(a.rep.length(), m); + x.rep.SetLength(n); + + xp = x.rep.elts(); + ap = a.rep.elts(); + + for (i = 0; i < n; i++) xp[i] = ap[i]; + + x.normalize(); + } +} + + +void random(ZZ_pEX& x, long n) +{ + long i; + + x.rep.SetLength(n); + + for (i = 0; i < n; i++) + random(x.rep[i]); + + x.normalize(); +} + +void negate(ZZ_pEX& x, const ZZ_pEX& a) +{ + long n = a.rep.length(); + x.rep.SetLength(n); + + const ZZ_pE* ap = a.rep.elts(); + ZZ_pE* xp = x.rep.elts(); + long i; + + for (i = n; i; i--, ap++, xp++) + negate((*xp), (*ap)); +} + + + +static +void MulByXModAux(ZZ_pEX& h, const ZZ_pEX& a, const ZZ_pEX& f) +{ + long i, n, m; + ZZ_pE* hh; + const ZZ_pE *aa, *ff; + + ZZ_pE t, z; + + n = deg(f); + m = deg(a); + + if (m >= n || n == 0) LogicError("MulByXMod: bad args"); + + if (m < 0) { + clear(h); + return; + } + + if (m < n-1) { + h.rep.SetLength(m+2); + hh = h.rep.elts(); + aa = a.rep.elts(); + for (i = m+1; i >= 1; i--) + hh[i] = aa[i-1]; + clear(hh[0]); + } + else { + h.rep.SetLength(n); + hh = h.rep.elts(); + aa = a.rep.elts(); + ff = f.rep.elts(); + negate(z, aa[n-1]); + if (!IsOne(ff[n])) + div(z, z, ff[n]); + for (i = n-1; i >= 1; i--) { + mul(t, z, ff[i]); + add(hh[i], aa[i-1], t); + } + mul(hh[0], z, ff[0]); + h.normalize(); + } +} + +void MulByXMod(ZZ_pEX& h, const ZZ_pEX& a, const ZZ_pEX& f) +{ + if (&h == &f) { + ZZ_pEX hh; + MulByXModAux(hh, a, f); + h = hh; + } + else + MulByXModAux(h, a, f); +} + + + +void PlainMul(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long da = deg(a); + long db = deg(b); + + if (da < 0 || db < 0) { + clear(x); + return; + } + + long d = da+db; + + + + const ZZ_pE *ap, *bp; + ZZ_pE *xp; + + ZZ_pEX la, lb; + + if (&x == &a) { + la = a; + ap = la.rep.elts(); + } + else + ap = a.rep.elts(); + + if (&x == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + x.rep.SetLength(d+1); + + xp = x.rep.elts(); + + long i, j, jmin, jmax; + ZZ_pX t, accum; + + for (i = 0; i <= d; i++) { + jmin = max(0, i-db); + jmax = min(da, i); + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, rep(ap[j]), rep(bp[i-j])); + add(accum, accum, t); + } + conv(xp[i], accum); + } + x.normalize(); +} + +void SetSize(vec_ZZ_pX& x, long n, long m) +{ + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + x[i].rep.SetMaxLength(m); +} + + + +void PlainDivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_pE *bp; + ZZ_pE *qp; + ZZ_pX *xp; + + + ZZ_pE LCInv, t; + ZZ_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pEX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + ZZ_pEX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_ZZ_pX x; + + SetSize(x, da+1, 2*ZZ_pE::degree()); + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainRem(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b, vec_ZZ_pX& x) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_pE *bp; + ZZ_pX *xp; + + + ZZ_pE LCInv, t; + ZZ_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pEX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainDivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b, + vec_ZZ_pX& x) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_pE *bp; + ZZ_pE *qp; + ZZ_pX *xp; + + + ZZ_pE LCInv, t; + ZZ_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pEX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + ZZ_pEX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainDiv(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_pE *bp; + ZZ_pE *qp; + ZZ_pX *xp; + + + ZZ_pE LCInv, t; + ZZ_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pEX: division by zero"); + + if (da < db) { + clear(q); + return; + } + + ZZ_pEX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_ZZ_pX x; + SetSize(x, da+1-db, 2*ZZ_pE::degree()); + + for (i = db; i <= da; i++) + x[i-db] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + long lastj = max(0, db-i); + + for (j = db-1; j >= lastj; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j-db], xp[i+j-db], s); + } + } +} + +void PlainRem(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_pE *bp; + ZZ_pX *xp; + + + ZZ_pE LCInv, t; + ZZ_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pEX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_ZZ_pX x; + SetSize(x, da + 1, 2*ZZ_pE::degree()); + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + + +void RightShift(ZZ_pEX& x, const ZZ_pEX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) ResourceError("overflow in RightShift"); + LeftShift(x, a, -n); + return; + } + + long da = deg(a); + long i; + + if (da < n) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(da-n+1); + + for (i = 0; i <= da-n; i++) + x.rep[i] = a.rep[i+n]; + + if (&x == &a) + x.rep.SetLength(da-n+1); + + x.normalize(); +} + +void LeftShift(ZZ_pEX& x, const ZZ_pEX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) + clear(x); + else + RightShift(x, a, -n); + return; + } + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in LeftShift"); + + long m = a.rep.length(); + + x.rep.SetLength(m+n); + + long i; + for (i = m-1; i >= 0; i--) + x.rep[i+n] = a.rep[i]; + + for (i = 0; i < n; i++) + clear(x.rep[i]); +} + + + +void NewtonInv(ZZ_pEX& c, const ZZ_pEX& a, long e) +{ + ZZ_pE x; + + inv(x, ConstTerm(a)); + + if (e == 1) { + conv(c, x); + return; + } + + vec_long E; + E.SetLength(0); + append(E, e); + while (e > 1) { + e = (e+1)/2; + append(E, e); + } + + long L = E.length(); + + ZZ_pEX g, g0, g1, g2; + + + g.rep.SetMaxLength(E[0]); + g0.rep.SetMaxLength(E[0]); + g1.rep.SetMaxLength((3*E[0]+1)/2); + g2.rep.SetMaxLength(E[0]); + + conv(g, x); + + long i; + + for (i = L-1; i > 0; i--) { + // lift from E[i] to E[i-1] + + long k = E[i]; + long l = E[i-1]-E[i]; + + trunc(g0, a, k+l); + + mul(g1, g0, g); + RightShift(g1, g1, k); + trunc(g1, g1, l); + + mul(g2, g1, g); + trunc(g2, g2, l); + LeftShift(g2, g2, k); + + sub(g, g, g2); + } + + c = g; +} + +void InvTrunc(ZZ_pEX& c, const ZZ_pEX& a, long e) +{ + if (e < 0) LogicError("InvTrunc: bad args"); + if (e == 0) { + clear(c); + return; + } + + if (NTL_OVERFLOW(e, 1, 0)) + ResourceError("overflow in InvTrunc"); + + NewtonInv(c, a, e); +} + + + + +const long ZZ_pEX_MOD_PLAIN = 0; +const long ZZ_pEX_MOD_MUL = 1; + + +void build(ZZ_pEXModulus& F, const ZZ_pEX& f) +{ + long n = deg(f); + + if (n <= 0) LogicError("build(ZZ_pEXModulus,ZZ_pEX): deg(f) <= 0"); + + if (NTL_OVERFLOW(n, ZZ_pE::degree(), 0)) + ResourceError("build(ZZ_pEXModulus,ZZ_pEX): overflow"); + + + F.tracevec.make(); + + F.f = f; + F.n = n; + + if (F.n < ZZ_pE::ModCross()) { + F.method = ZZ_pEX_MOD_PLAIN; + } + else { + F.method = ZZ_pEX_MOD_MUL; + ZZ_pEX P1; + ZZ_pEX P2; + + CopyReverse(P1, f, n); + InvTrunc(P2, P1, n-1); + CopyReverse(P1, P2, n-2); + trunc(F.h0, P1, n-2); + trunc(F.f0, f, n); + F.hlc = ConstTerm(P2); + } +} + + + +ZZ_pEXModulus::ZZ_pEXModulus() +{ + n = -1; + method = ZZ_pEX_MOD_PLAIN; +} + + +ZZ_pEXModulus::~ZZ_pEXModulus() +{ +} + + + +ZZ_pEXModulus::ZZ_pEXModulus(const ZZ_pEX& ff) +{ + n = -1; + method = ZZ_pEX_MOD_PLAIN; + + build(*this, ff); +} + + +void UseMulRem21(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + ZZ_pEX P1; + ZZ_pEX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + mul(P1, P2, F.f0); + trunc(P1, P1, F.n); + trunc(r, a, F.n); + sub(r, r, P1); +} + +void UseMulDivRem21(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + ZZ_pEX P1; + ZZ_pEX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + mul(P1, P2, F.f0); + trunc(P1, P1, F.n); + trunc(r, a, F.n); + sub(r, r, P1); + q = P2; +} + +void UseMulDiv21(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + ZZ_pEX P1; + ZZ_pEX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + q = P2; + +} + + +void rem(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + if (F.method == ZZ_pEX_MOD_PLAIN) { + PlainRem(x, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulRem21(x, a, F); + return; + } + + ZZ_pEX buf(INIT_SIZE, 2*n-1); + + long a_len = da+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + UseMulRem21(buf, buf, F); + + a_len -= amt; + } + + x = buf; +} + +void DivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + if (F.method == ZZ_pEX_MOD_PLAIN) { + PlainDivRem(q, r, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulDivRem21(q, r, a, F); + return; + } + + ZZ_pEX buf(INIT_SIZE, 2*n-1); + ZZ_pEX qbuf(INIT_SIZE, n-1); + + ZZ_pEX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + UseMulDivRem21(qbuf, buf, buf, F); + long dl = qbuf.rep.length(); + a_len = a_len - amt; + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + r = buf; + + qq.normalize(); + q = qq; +} + +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + if (F.method == ZZ_pEX_MOD_PLAIN) { + PlainDiv(q, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulDiv21(q, a, F); + return; + } + + ZZ_pEX buf(INIT_SIZE, 2*n-1); + ZZ_pEX qbuf(INIT_SIZE, n-1); + + ZZ_pEX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + a_len = a_len - amt; + if (a_len > 0) + UseMulDivRem21(qbuf, buf, buf, F); + else + UseMulDiv21(qbuf, buf, F); + + long dl = qbuf.rep.length(); + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + qq.normalize(); + q = qq; +} + + + + +void MulMod(ZZ_pEX& c, const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEXModulus& F) +{ + if (deg(a) >= F.n || deg(b) >= F.n) LogicError("MulMod: bad args"); + + ZZ_pEX t; + mul(t, a, b); + rem(c, t, F); +} + + +void SqrMod(ZZ_pEX& c, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + if (deg(a) >= F.n) LogicError("MulMod: bad args"); + + ZZ_pEX t; + sqr(t, a); + rem(c, t, F); +} + + + +void UseMulRem(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b) +{ + ZZ_pEX P1; + ZZ_pEX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + mul(P1, P2, b); + sub(P1, a, P1); + + r = P1; +} + +void UseMulDivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b) +{ + ZZ_pEX P1; + ZZ_pEX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + mul(P1, P2, b); + sub(P1, a, P1); + + r = P1; + q = P2; +} + +void UseMulDiv(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b) +{ + ZZ_pEX P1; + ZZ_pEX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + + q = P2; +} + + + +void DivRem(ZZ_pEX& q, ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < ZZ_pE::DivCross() || sa-sb < ZZ_pE::DivCross()) + PlainDivRem(q, r, a, b); + else if (sa < 4*sb) + UseMulDivRem(q, r, a, b); + else { + ZZ_pEXModulus B; + build(B, b); + DivRem(q, r, a, B); + } +} + +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < ZZ_pE::DivCross() || sa-sb < ZZ_pE::DivCross()) + PlainDiv(q, a, b); + else if (sa < 4*sb) + UseMulDiv(q, a, b); + else { + ZZ_pEXModulus B; + build(B, b); + div(q, a, B); + } +} + +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pE& b) +{ + ZZ_pE T; + inv(T, b); + mul(q, a, T); +} + +void div(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_p& b) +{ + NTL_ZZ_pRegister(T); + inv(T, b); + mul(q, a, T); +} + +void div(ZZ_pEX& q, const ZZ_pEX& a, long b) +{ + NTL_ZZ_pRegister(T); + T = b; + inv(T, T); + mul(q, a, T); +} + +void rem(ZZ_pEX& r, const ZZ_pEX& a, const ZZ_pEX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < ZZ_pE::DivCross() || sa-sb < ZZ_pE::DivCross()) + PlainRem(r, a, b); + else if (sa < 4*sb) + UseMulRem(r, a, b); + else { + ZZ_pEXModulus B; + build(B, b); + rem(r, a, B); + } +} + +void GCD(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b) +{ + ZZ_pE t; + + if (IsZero(b)) + x = a; + else if (IsZero(a)) + x = b; + else { + long n = max(deg(a),deg(b)) + 1; + ZZ_pEX u(INIT_SIZE, n), v(INIT_SIZE, n); + + vec_ZZ_pX tmp; + SetSize(tmp, n, 2*ZZ_pE::degree()); + + u = a; + v = b; + do { + PlainRem(u, u, v, tmp); + swap(u, v); + } while (!IsZero(v)); + + x = u; + } + + if (IsZero(x)) return; + if (IsOne(LeadCoeff(x))) return; + + /* make gcd monic */ + + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + + + + +void XGCD(ZZ_pEX& d, ZZ_pEX& s, ZZ_pEX& t, const ZZ_pEX& a, const ZZ_pEX& b) +{ + ZZ_pE z; + + + if (IsZero(b)) { + set(s); + clear(t); + d = a; + } + else if (IsZero(a)) { + clear(s); + set(t); + d = b; + } + else { + long e = max(deg(a), deg(b)) + 1; + + ZZ_pEX temp(INIT_SIZE, e), u(INIT_SIZE, e), v(INIT_SIZE, e), + u0(INIT_SIZE, e), v0(INIT_SIZE, e), + u1(INIT_SIZE, e), v1(INIT_SIZE, e), + u2(INIT_SIZE, e), v2(INIT_SIZE, e), q(INIT_SIZE, e); + + + set(u1); clear(v1); + clear(u2); set(v2); + u = a; v = b; + + do { + DivRem(q, u, u, v); + swap(u, v); + u0 = u2; + v0 = v2; + mul(temp, q, u2); + sub(u2, u1, temp); + mul(temp, q, v2); + sub(v2, v1, temp); + u1 = u0; + v1 = v0; + } while (!IsZero(v)); + + d = u; + s = u1; + t = v1; + } + + if (IsZero(d)) return; + if (IsOne(LeadCoeff(d))) return; + + /* make gcd monic */ + + inv(z, LeadCoeff(d)); + mul(d, d, z); + mul(s, s, z); + mul(t, t, z); +} + +void IterBuild(ZZ_pE* a, long n) +{ + long i, k; + ZZ_pE b, t; + + if (n <= 0) return; + + negate(a[0], a[0]); + + for (k = 1; k <= n-1; k++) { + negate(b, a[k]); + add(a[k], b, a[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t, a[i], b); + add(a[i], t, a[i-1]); + } + mul(a[0], a[0], b); + } +} + +void BuildFromRoots(ZZ_pEX& x, const vec_ZZ_pE& a) +{ + long n = a.length(); + + if (n == 0) { + set(x); + return; + } + + x.rep.SetMaxLength(n+1); + x.rep = a; + IterBuild(&x.rep[0], n); + x.rep.SetLength(n+1); + SetCoeff(x, n); +} + +void eval(ZZ_pE& b, const ZZ_pEX& f, const ZZ_pE& a) +// does a Horner evaluation +{ + ZZ_pE acc; + long i; + + clear(acc); + for (i = deg(f); i >= 0; i--) { + mul(acc, acc, a); + add(acc, acc, f.rep[i]); + } + + b = acc; +} + +void eval(vec_ZZ_pE& b, const ZZ_pEX& f, const vec_ZZ_pE& a) +// naive algorithm: repeats Horner +{ + if (&b == &f.rep) { + vec_ZZ_pE bb; + eval(bb, f, a); + b = bb; + return; + } + + long m = a.length(); + b.SetLength(m); + long i; + for (i = 0; i < m; i++) + eval(b[i], f, a[i]); +} + + +void interpolate(ZZ_pEX& f, const vec_ZZ_pE& a, const vec_ZZ_pE& b) +{ + long m = a.length(); + if (b.length() != m) LogicError("interpolate: vector length mismatch"); + + if (m == 0) { + clear(f); + return; + } + + vec_ZZ_pE prod; + prod = a; + + ZZ_pE t1, t2; + + long k, i; + + vec_ZZ_pE res; + res.SetLength(m); + + for (k = 0; k < m; k++) { + + const ZZ_pE& aa = a[k]; + + set(t1); + for (i = k-1; i >= 0; i--) { + mul(t1, t1, aa); + add(t1, t1, prod[i]); + } + + clear(t2); + for (i = k-1; i >= 0; i--) { + mul(t2, t2, aa); + add(t2, t2, res[i]); + } + + + inv(t1, t1); + sub(t2, b[k], t2); + mul(t1, t1, t2); + + for (i = 0; i < k; i++) { + mul(t2, prod[i], t1); + add(res[i], res[i], t2); + } + + res[k] = t1; + + if (k < m-1) { + if (k == 0) + negate(prod[0], prod[0]); + else { + negate(t1, a[k]); + add(prod[k], t1, prod[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t2, prod[i], t1); + add(prod[i], t2, prod[i-1]); + } + mul(prod[0], prod[0], t1); + } + } + } + + while (m > 0 && IsZero(res[m-1])) m--; + res.SetLength(m); + f.rep = res; +} + +void InnerProduct(ZZ_pEX& x, const vec_ZZ_pE& v, long low, long high, + const vec_ZZ_pEX& H, long n, vec_ZZ_pX& t) +{ + ZZ_pX s; + long i, j; + + for (j = 0; j < n; j++) + clear(t[j]); + + high = min(high, v.length()-1); + for (i = low; i <= high; i++) { + const vec_ZZ_pE& h = H[i-low].rep; + long m = h.length(); + const ZZ_pX& w = rep(v[i]); + + for (j = 0; j < m; j++) { + mul(s, w, rep(h[j])); + add(t[j], t[j], s); + } + } + + x.rep.SetLength(n); + for (j = 0; j < n; j++) + conv(x.rep[j], t[j]); + x.normalize(); +} + + + +void CompMod(ZZ_pEX& x, const ZZ_pEX& g, const ZZ_pEXArgument& A, + const ZZ_pEXModulus& F) +{ + if (deg(g) <= 0) { + x = g; + return; + } + + + ZZ_pEX s, t; + vec_ZZ_pX scratch; + SetSize(scratch, deg(F), 2*ZZ_pE::degree()); + + long m = A.H.length() - 1; + long l = ((g.rep.length()+m-1)/m) - 1; + + const ZZ_pEX& M = A.H[m]; + + InnerProduct(t, g.rep, l*m, l*m + m - 1, A.H, F.n, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g.rep, i*m, i*m + m - 1, A.H, F.n, scratch); + MulMod(t, t, M, F); + add(t, t, s); + } + + x = t; +} + + +void build(ZZ_pEXArgument& A, const ZZ_pEX& h, const ZZ_pEXModulus& F, long m) +{ + long i; + + if (m <= 0 || deg(h) >= F.n) + LogicError("build: bad args"); + + if (m > F.n) m = F.n; + + if (ZZ_pEXArgBound > 0) { + double sz = ZZ_p::storage(); + sz = sz*ZZ_pE::degree(); + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_ZZ_p); + sz = sz*F.n; + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_ZZ_pE); + sz = sz/1024; + m = min(m, long(ZZ_pEXArgBound/sz)); + m = max(m, 1); + } + + + A.H.SetLength(m+1); + + set(A.H[0]); + A.H[1] = h; + for (i = 2; i <= m; i++) + MulMod(A.H[i], A.H[i-1], h, F); +} + +NTL_CHEAP_THREAD_LOCAL long ZZ_pEXArgBound = 0; + + + + +void CompMod(ZZ_pEX& x, const ZZ_pEX& g, const ZZ_pEX& h, const ZZ_pEXModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(g.rep.length()); + + if (m == 0) { + clear(x); + return; + } + + ZZ_pEXArgument A; + + build(A, h, F, m); + + CompMod(x, g, A, F); +} + + + + +void Comp2Mod(ZZ_pEX& x1, ZZ_pEX& x2, const ZZ_pEX& g1, const ZZ_pEX& g2, + const ZZ_pEX& h, const ZZ_pEXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + return; + } + + ZZ_pEXArgument A; + + build(A, h, F, m); + + ZZ_pEX xx1, xx2; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + + x1 = xx1; + x2 = xx2; +} + +void Comp3Mod(ZZ_pEX& x1, ZZ_pEX& x2, ZZ_pEX& x3, + const ZZ_pEX& g1, const ZZ_pEX& g2, const ZZ_pEX& g3, + const ZZ_pEX& h, const ZZ_pEXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length() + g3.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + clear(x3); + return; + } + + ZZ_pEXArgument A; + + build(A, h, F, m); + + ZZ_pEX xx1, xx2, xx3; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + CompMod(xx3, g3, A, F); + + x1 = xx1; + x2 = xx2; + x3 = xx3; +} + +void build(ZZ_pEXTransMultiplier& B, const ZZ_pEX& b, const ZZ_pEXModulus& F) +{ + long db = deg(b); + + if (db >= F.n) LogicError("build TransMultiplier: bad args"); + + ZZ_pEX t; + + LeftShift(t, b, F.n-1); + div(t, t, F); + + // we optimize for low degree b + + long d; + + d = deg(t); + if (d < 0) + B.shamt_fbi = 0; + else + B.shamt_fbi = F.n-2 - d; + + CopyReverse(B.fbi, t, d); + + // The following code optimizes the case when + // f = X^n + low degree poly + + trunc(t, F.f, F.n); + d = deg(t); + if (d < 0) + B.shamt = 0; + else + B.shamt = d; + + CopyReverse(B.f0, t, d); + + if (db < 0) + B.shamt_b = 0; + else + B.shamt_b = db; + + CopyReverse(B.b, b, db); +} + +void TransMulMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEXTransMultiplier& B, + const ZZ_pEXModulus& F) +{ + if (deg(a) >= F.n) LogicError("TransMulMod: bad args"); + + ZZ_pEX t1, t2; + + mul(t1, a, B.b); + RightShift(t1, t1, B.shamt_b); + + mul(t2, a, B.f0); + RightShift(t2, t2, B.shamt); + trunc(t2, t2, F.n-1); + + mul(t2, t2, B.fbi); + if (B.shamt_fbi > 0) LeftShift(t2, t2, B.shamt_fbi); + trunc(t2, t2, F.n-1); + LeftShift(t2, t2, 1); + + sub(x, t1, t2); +} + + +void ShiftSub(ZZ_pEX& U, const ZZ_pEX& V, long n) +// assumes input does not alias output +{ + if (IsZero(V)) + return; + + long du = deg(U); + long dv = deg(V); + + long d = max(du, n+dv); + + U.rep.SetLength(d+1); + long i; + + for (i = du+1; i <= d; i++) + clear(U.rep[i]); + + for (i = 0; i <= dv; i++) + sub(U.rep[i+n], U.rep[i+n], V.rep[i]); + + U.normalize(); +} + + +void UpdateMap(vec_ZZ_pE& x, const vec_ZZ_pE& a, + const ZZ_pEXTransMultiplier& B, const ZZ_pEXModulus& F) +{ + ZZ_pEX xx; + TransMulMod(xx, to_ZZ_pEX(a), B, F); + x = xx.rep; +} + +static +void ProjectPowers(vec_ZZ_pE& x, const ZZ_pEX& a, long k, + const ZZ_pEXArgument& H, const ZZ_pEXModulus& F) +{ + if (k < 0 || deg(a) >= F.n) + LogicError("ProjectPowers: bad args"); + + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive args"); + + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + ZZ_pEXTransMultiplier M; + build(M, H.H[m], F); + + ZZ_pEX s; + s = a; + + x.SetLength(k); + + long i; + + for (i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + for (long j = 0; j < m1; j++) + InnerProduct(x[i*m+j], H.H[j].rep, s.rep); + if (i < l) + TransMulMod(s, s, M, F); + } +} + +static +void ProjectPowers(vec_ZZ_pE& x, const ZZ_pEX& a, long k, const ZZ_pEX& h, + const ZZ_pEXModulus& F) +{ + if (k < 0 || deg(a) >= F.n || deg(h) >= F.n) + LogicError("ProjectPowers: bad args"); + + if (k == 0) { + x.SetLength(0);; + return; + } + + long m = SqrRoot(k); + + ZZ_pEXArgument H; + build(H, h, F, m); + + ProjectPowers(x, a, k, H, F); +} + +void ProjectPowers(vec_ZZ_pE& x, const vec_ZZ_pE& a, long k, + const ZZ_pEXArgument& H, const ZZ_pEXModulus& F) +{ + ProjectPowers(x, to_ZZ_pEX(a), k, H, F); +} + +void ProjectPowers(vec_ZZ_pE& x, const vec_ZZ_pE& a, long k, + const ZZ_pEX& h, const ZZ_pEXModulus& F) +{ + ProjectPowers(x, to_ZZ_pEX(a), k, h, F); +} + + + + +void BerlekampMassey(ZZ_pEX& h, const vec_ZZ_pE& a, long m) +{ + ZZ_pEX Lambda, Sigma, Temp; + long L; + ZZ_pE Delta, Delta1, t1; + long shamt; + + // cerr << "*** " << m << "\n"; + + Lambda.SetMaxLength(m+1); + Sigma.SetMaxLength(m+1); + Temp.SetMaxLength(m+1); + + L = 0; + set(Lambda); + clear(Sigma); + set(Delta); + shamt = 0; + + long i, r, dl; + + for (r = 1; r <= 2*m; r++) { + // cerr << r << "--"; + clear(Delta1); + dl = deg(Lambda); + for (i = 0; i <= dl; i++) { + mul(t1, Lambda.rep[i], a[r-i-1]); + add(Delta1, Delta1, t1); + } + + if (IsZero(Delta1)) { + shamt++; + // cerr << "case 1: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else if (2*L < r) { + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + Sigma = Lambda; + ShiftSub(Lambda, Temp, shamt+1); + shamt = 0; + L = r-L; + Delta = Delta1; + // cerr << "case 2: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else { + shamt++; + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + ShiftSub(Lambda, Temp, shamt); + // cerr << "case 3: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + } + + // cerr << "finished: " << L << " " << deg(Lambda) << "\n"; + + dl = deg(Lambda); + h.rep.SetLength(L + 1); + + for (i = 0; i < L - dl; i++) + clear(h.rep[i]); + + for (i = L - dl; i <= L; i++) + h.rep[i] = Lambda.rep[L - i]; +} + + + + +void MinPolySeq(ZZ_pEX& h, const vec_ZZ_pE& a, long m) +{ + if (m < 0 || NTL_OVERFLOW(m, 1, 0)) LogicError("MinPoly: bad args"); + if (a.length() < 2*m) LogicError("MinPoly: sequence too short"); + + BerlekampMassey(h, a, m); +} + + +void DoMinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m, + const ZZ_pEX& R) +{ + vec_ZZ_pE x; + + ProjectPowers(x, R, 2*m, g, F); + MinPolySeq(h, x, m); +} + +void ProbMinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m) +{ + long n = F.n; + if (m < 1 || m > n) LogicError("ProbMinPoly: bad args"); + + ZZ_pEX R; + random(R, n); + + DoMinPolyMod(h, g, F, m, R); +} + +void ProbMinPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F) +{ + ProbMinPolyMod(h, g, F, F.n); +} + +void MinPolyMod(ZZ_pEX& hh, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m) +{ + ZZ_pEX h, h1; + long n = F.n; + if (m < 1 || m > n) LogicError("MinPoly: bad args"); + + /* probabilistically compute min-poly */ + + ProbMinPolyMod(h, g, F, m); + if (deg(h) == m) { hh = h; return; } + CompMod(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + ZZ_pEX h2, h3; + ZZ_pEX R; + ZZ_pEXTransMultiplier H1; + + + for (;;) { + random(R, n); + build(H1, h1, F); + TransMulMod(R, R, H1, F); + DoMinPolyMod(h2, g, F, m-deg(h), R); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompMod(h3, h2, g, F); + MulMod(h1, h3, h1, F); + if (IsZero(h1)) { hh = h; return; } + } +} + +void IrredPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m) +{ + if (m < 1 || m > F.n) LogicError("IrredPoly: bad args"); + + ZZ_pEX R; + set(R); + + DoMinPolyMod(h, g, F, m, R); +} + + + +void IrredPolyMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F) +{ + IrredPolyMod(h, g, F, F.n); +} + + + +void MinPolyMod(ZZ_pEX& hh, const ZZ_pEX& g, const ZZ_pEXModulus& F) +{ + MinPolyMod(hh, g, F, F.n); +} + +void diff(ZZ_pEX& x, const ZZ_pEX& a) +{ + long n = deg(a); + long i; + + if (n <= 0) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(n); + + for (i = 0; i <= n-1; i++) { + mul(x.rep[i], a.rep[i+1], i+1); + } + + if (&x == &a) + x.rep.SetLength(n); + + x.normalize(); +} + + + +void MakeMonic(ZZ_pEX& x) +{ + if (IsZero(x)) + return; + + if (IsOne(LeadCoeff(x))) + return; + + ZZ_pE t; + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + +long divide(ZZ_pEX& q, const ZZ_pEX& a, const ZZ_pEX& b) +{ + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + ZZ_pEX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + q = lq; + return 1; +} + +long divide(const ZZ_pEX& a, const ZZ_pEX& b) +{ + if (IsZero(b)) return IsZero(a); + ZZ_pEX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + return 1; +} + + + +static +long OptWinSize(long n) +// finds k that minimizes n/(k+1) + 2^{k-1} + +{ + long k; + double v, v_new; + + + v = n/2.0 + 1.0; + k = 1; + + for (;;) { + v_new = n/(double(k+2)) + double(1L << k); + if (v_new >= v) break; + v = v_new; + k++; + } + + return k; +} + + + +void PowerMod(ZZ_pEX& h, const ZZ_pEX& g, const ZZ& e, const ZZ_pEXModulus& F) +// h = g^e mod f using "sliding window" algorithm +{ + if (deg(g) >= F.n) LogicError("PowerMod: bad args"); + + if (e == 0) { + set(h); + return; + } + + if (e == 1) { + h = g; + return; + } + + if (e == -1) { + InvMod(h, g, F); + return; + } + + if (e == 2) { + SqrMod(h, g, F); + return; + } + + if (e == -2) { + SqrMod(h, g, F); + InvMod(h, h, F); + return; + } + + + long n = NumBits(e); + + ZZ_pEX res; + res.SetMaxLength(F.n); + set(res); + + long i; + + if (n < 16) { + // plain square-and-multiply algorithm + + for (i = n - 1; i >= 0; i--) { + SqrMod(res, res, F); + if (bit(e, i)) + MulMod(res, res, g, F); + } + + if (e < 0) InvMod(res, res, F); + + h = res; + return; + } + + long k = OptWinSize(n); + k = min(k, 3); + + vec_ZZ_pEX v; + + v.SetLength(1L << (k-1)); + + v[0] = g; + + if (k > 1) { + ZZ_pEX t; + SqrMod(t, g, F); + + for (i = 1; i < (1L << (k-1)); i++) + MulMod(v[i], v[i-1], t, F); + } + + + long val; + long cnt; + long m; + + val = 0; + for (i = n-1; i >= 0; i--) { + val = (val << 1) | bit(e, i); + if (val == 0) + SqrMod(res, res, F); + else if (val >= (1L << (k-1)) || i == 0) { + cnt = 0; + while ((val & 1) == 0) { + val = val >> 1; + cnt++; + } + + m = val; + while (m > 0) { + SqrMod(res, res, F); + m = m >> 1; + } + + MulMod(res, res, v[val >> 1], F); + + while (cnt > 0) { + SqrMod(res, res, F); + cnt--; + } + + val = 0; + } + } + + if (e < 0) InvMod(res, res, F); + + h = res; +} + +void InvMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvMod: bad args"); + + ZZ_pEX d, xx, t; + + XGCD(d, xx, t, a, f); + if (!IsOne(d)) + InvModError("ZZ_pEX InvMod: can't compute multiplicative inverse"); + + x = xx; +} + +long InvModStatus(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvModStatus: bad args"); + ZZ_pEX d, t; + + XGCD(d, x, t, a, f); + if (!IsOne(d)) { + x = d; + return 1; + } + else + return 0; +} + + +void MulMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& b, const ZZ_pEX& f) +{ + if (deg(a) >= deg(f) || deg(b) >= deg(f) || deg(f) == 0) + LogicError("MulMod: bad args"); + + ZZ_pEX t; + + mul(t, a, b); + rem(x, t, f); +} + +void SqrMod(ZZ_pEX& x, const ZZ_pEX& a, const ZZ_pEX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("SqrMod: bad args"); + + ZZ_pEX t; + + sqr(t, a); + rem(x, t, f); +} + + +void PowerXMod(ZZ_pEX& hh, const ZZ& e, const ZZ_pEXModulus& F) +{ + if (F.n < 0) LogicError("PowerXMod: uninitialized modulus"); + + if (IsZero(e)) { + set(hh); + return; + } + + long n = NumBits(e); + long i; + + ZZ_pEX h; + + h.SetMaxLength(F.n); + set(h); + + for (i = n - 1; i >= 0; i--) { + SqrMod(h, h, F); + if (bit(e, i)) + MulByXMod(h, h, F.f); + } + + if (e < 0) InvMod(h, h, F); + + hh = h; +} + + +void reverse(ZZ_pEX& x, const ZZ_pEX& a, long hi) +{ + if (hi < 0) { clear(x); return; } + if (NTL_OVERFLOW(hi, 1, 0)) + ResourceError("overflow in reverse"); + + if (&x == &a) { + ZZ_pEX tmp; + CopyReverse(tmp, a, hi); + x = tmp; + } + else + CopyReverse(x, a, hi); +} + + +void power(ZZ_pEX& x, const ZZ_pEX& a, long e) +{ + if (e < 0) { + ArithmeticError("power: negative exponent"); + } + + if (e == 0) { + x = 1; + return; + } + + if (a == 0 || a == 1) { + x = a; + return; + } + + long da = deg(a); + + if (da == 0) { + x = power(ConstTerm(a), e); + return; + } + + if (da > (NTL_MAX_LONG-1)/e) + ResourceError("overflow in power"); + + ZZ_pEX res; + res.SetMaxLength(da*e + 1); + res = 1; + + long k = NumBits(e); + long i; + + for (i = k - 1; i >= 0; i--) { + sqr(res, res); + if (bit(e, i)) + mul(res, res, a); + } + + x = res; +} + + + +static +void FastTraceVec(vec_ZZ_pE& S, const ZZ_pEXModulus& f) +{ + long n = deg(f); + + ZZ_pEX x = reverse(-LeftShift(reverse(diff(reverse(f)), n-1), n-1)/f, n-1); + + S.SetLength(n); + S[0] = n; + + long i; + for (i = 1; i < n; i++) + S[i] = coeff(x, i); +} + + +void PlainTraceVec(vec_ZZ_pE& S, const ZZ_pEX& ff) +{ + if (deg(ff) <= 0) + LogicError("TraceVec: bad args"); + + ZZ_pEX f; + f = ff; + + MakeMonic(f); + + long n = deg(f); + + S.SetLength(n); + + if (n == 0) + return; + + long k, i; + ZZ_pX acc, t; + ZZ_pE t1; + + S[0] = n; + + for (k = 1; k < n; k++) { + mul(acc, rep(f.rep[n-k]), k); + + for (i = 1; i < k; i++) { + mul(t, rep(f.rep[n-i]), rep(S[k-i])); + add(acc, acc, t); + } + + conv(t1, acc); + negate(S[k], t1); + } +} + +void TraceVec(vec_ZZ_pE& S, const ZZ_pEX& f) +{ + if (deg(f) < ZZ_pE::DivCross()) + PlainTraceVec(S, f); + else + FastTraceVec(S, f); +} + +static + +void ComputeTraceVec(vec_ZZ_pE& S, const ZZ_pEXModulus& F) +{ + if (F.method == ZZ_pEX_MOD_PLAIN) { + PlainTraceVec(S, F.f); + } + else { + FastTraceVec(S, F); + } +} + +void TraceMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + long n = F.n; + + if (deg(a) >= n) + LogicError("trace: bad args"); + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(F.tracevec.val()); + if (!builder()) break; + UniquePtr p; + p.make(); + ComputeTraceVec(*p, F); + builder.move(p); + } while (0); + + InnerProduct(x, a.rep, *F.tracevec.val()); +} + +void TraceMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& f) +{ + if (deg(a) >= deg(f) || deg(f) <= 0) + LogicError("trace: bad args"); + + project(x, TraceVec(f), a); +} + + +void PlainResultant(ZZ_pE& rres, const ZZ_pEX& a, const ZZ_pEX& b) +{ + ZZ_pE res; + + if (IsZero(a) || IsZero(b)) + clear(res); + else if (deg(a) == 0 && deg(b) == 0) + set(res); + else { + long d0, d1, d2; + ZZ_pE lc; + set(res); + + long n = max(deg(a),deg(b)) + 1; + ZZ_pEX u(INIT_SIZE, n), v(INIT_SIZE, n); + vec_ZZ_pX tmp; + SetSize(tmp, n, 2*ZZ_pE::degree()); + + u = a; + v = b; + + for (;;) { + d0 = deg(u); + d1 = deg(v); + lc = LeadCoeff(v); + + PlainRem(u, u, v, tmp); + swap(u, v); + + d2 = deg(v); + if (d2 >= 0) { + power(lc, lc, d0-d2); + mul(res, res, lc); + if (d0 & d1 & 1) negate(res, res); + } + else { + if (d1 == 0) { + power(lc, lc, d0); + mul(res, res, lc); + } + else + clear(res); + + break; + } + } + } + rres = res; +} + +void resultant(ZZ_pE& rres, const ZZ_pEX& a, const ZZ_pEX& b) +{ + PlainResultant(rres, a, b); +} + + +void NormMod(ZZ_pE& x, const ZZ_pEX& a, const ZZ_pEX& f) +{ + if (deg(f) <= 0 || deg(a) >= deg(f)) + LogicError("norm: bad args"); + + if (IsZero(a)) { + clear(x); + return; + } + + ZZ_pE t; + resultant(t, f, a); + if (!IsOne(LeadCoeff(f))) { + ZZ_pE t1; + power(t1, LeadCoeff(f), deg(a)); + inv(t1, t1); + mul(t, t, t1); + } + + x = t; +} + + + +// tower stuff... + + + +void InnerProduct(ZZ_pEX& x, const vec_ZZ_p& v, long low, long high, + const vec_ZZ_pEX& H, long n, vec_ZZ_pE& t) +{ + ZZ_pE s; + long i, j; + + for (j = 0; j < n; j++) + clear(t[j]); + + high = min(high, v.length()-1); + for (i = low; i <= high; i++) { + const vec_ZZ_pE& h = H[i-low].rep; + long m = h.length(); + const ZZ_p& w = v[i]; + + for (j = 0; j < m; j++) { + mul(s, h[j], w); + add(t[j], t[j], s); + } + } + + x.rep.SetLength(n); + for (j = 0; j < n; j++) + x.rep[j] = t[j]; + + x.normalize(); +} + + + +void CompTower(ZZ_pEX& x, const ZZ_pX& g, const ZZ_pEXArgument& A, + const ZZ_pEXModulus& F) +{ + if (deg(g) <= 0) { + conv(x, g); + return; + } + + + ZZ_pEX s, t; + vec_ZZ_pE scratch; + scratch.SetLength(deg(F)); + + long m = A.H.length() - 1; + long l = ((g.rep.length()+m-1)/m) - 1; + + const ZZ_pEX& M = A.H[m]; + + InnerProduct(t, g.rep, l*m, l*m + m - 1, A.H, F.n, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g.rep, i*m, i*m + m - 1, A.H, F.n, scratch); + MulMod(t, t, M, F); + add(t, t, s); + } + x = t; +} + + +void CompTower(ZZ_pEX& x, const ZZ_pX& g, const ZZ_pEX& h, + const ZZ_pEXModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(g.rep.length()); + + if (m == 0) { + clear(x); + return; + } + + + ZZ_pEXArgument A; + + build(A, h, F, m); + + CompTower(x, g, A, F); +} + +void PrepareProjection(vec_vec_ZZ_p& tt, const vec_ZZ_pE& s, + const vec_ZZ_p& proj) +{ + long l = s.length(); + tt.SetLength(l); + + ZZ_pXMultiplier M; + long i; + + for (i = 0; i < l; i++) { + build(M, rep(s[i]), ZZ_pE::modulus()); + UpdateMap(tt[i], proj, M, ZZ_pE::modulus()); + } +} + +void ProjectedInnerProduct(ZZ_p& x, const vec_ZZ_pE& a, + const vec_vec_ZZ_p& b) +{ + long n = min(a.length(), b.length()); + + ZZ_p t, res; + + res = 0; + + long i; + for (i = 0; i < n; i++) { + project(t, b[i], rep(a[i])); + res += t; + } + + x = res; +} + + +void PrecomputeProj(vec_ZZ_p& proj, const ZZ_pX& f) +{ + long n = deg(f); + + if (n <= 0) LogicError("PrecomputeProj: bad args"); + + if (ConstTerm(f) != 0) { + proj.SetLength(1); + proj[0] = 1; + } + else { + proj.SetLength(n); + clear(proj); + proj[n-1] = 1; + } +} + + + +void ProjectPowersTower(vec_ZZ_p& x, const vec_ZZ_pE& a, long k, + const ZZ_pEXArgument& H, const ZZ_pEXModulus& F, + const vec_ZZ_p& proj) + +{ + long n = F.n; + + if (a.length() > n || k < 0) + LogicError("ProjectPowers: bad args"); + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive args"); + + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + ZZ_pEXTransMultiplier M; + build(M, H.H[m], F); + + vec_ZZ_pE s(INIT_SIZE, n); + s = a; + + x.SetLength(k); + + vec_vec_ZZ_p tt; + + for (long i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + ZZ_p* w = &x[i*m]; + + PrepareProjection(tt, s, proj); + + for (long j = 0; j < m1; j++) + ProjectedInnerProduct(w[j], H.H[j].rep, tt); + if (i < l) + UpdateMap(s, s, M, F); + } +} + + + + +void ProjectPowersTower(vec_ZZ_p& x, const vec_ZZ_pE& a, long k, + const ZZ_pEX& h, const ZZ_pEXModulus& F, + const vec_ZZ_p& proj) + +{ + if (a.length() > F.n || k < 0) LogicError("ProjectPowers: bad args"); + + if (k == 0) { + x.SetLength(0); + return; + } + + long m = SqrRoot(k); + + ZZ_pEXArgument H; + + build(H, h, F, m); + ProjectPowersTower(x, a, k, H, F, proj); +} + + +void DoMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m, + const vec_ZZ_pE& R, const vec_ZZ_p& proj) +{ + vec_ZZ_p x; + + ProjectPowersTower(x, R, 2*m, g, F, proj); + + MinPolySeq(h, x, m); +} + + +void ProbMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m) +{ + long n = F.n; + if (m < 1 || m > n*ZZ_pE::degree()) + LogicError("MinPoly: bad args"); + + vec_ZZ_pE R; + R.SetLength(n); + long i; + for (i = 0; i < n; i++) + random(R[i]); + + vec_ZZ_p proj; + PrecomputeProj(proj, ZZ_pE::modulus()); + + DoMinPolyTower(h, g, F, m, R, proj); +} + + +void ProbMinPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, + long m, const vec_ZZ_p& proj) +{ + long n = F.n; + if (m < 1 || m > n*ZZ_pE::degree()) + LogicError("MinPoly: bad args"); + + vec_ZZ_pE R; + R.SetLength(n); + long i; + for (i = 0; i < n; i++) + random(R[i]); + + DoMinPolyTower(h, g, F, m, R, proj); +} + +void MinPolyTower(ZZ_pX& hh, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m) +{ + ZZ_pX h; + ZZ_pEX h1; + long n = F.n; + + if (m < 1 || m > n*ZZ_pE::degree()) + LogicError("MinPoly: bad args"); + + vec_ZZ_p proj; + PrecomputeProj(proj, ZZ_pE::modulus()); + + /* probabilistically compute min-poly */ + + ProbMinPolyTower(h, g, F, m, proj); + if (deg(h) == m) { hh = h; return; } + CompTower(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + long i; + + ZZ_pX h2; + ZZ_pEX h3; + vec_ZZ_pE R; + ZZ_pEXTransMultiplier H1; + + + for (;;) { + R.SetLength(n); + for (i = 0; i < n; i++) random(R[i]); + build(H1, h1, F); + UpdateMap(R, R, H1, F); + DoMinPolyTower(h2, g, F, m-deg(h), R, proj); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompTower(h3, h2, g, F); + MulMod(h1, h3, h1, F); + if (IsZero(h1)) { hh = h; return; } + } +} + +void IrredPolyTower(ZZ_pX& h, const ZZ_pEX& g, const ZZ_pEXModulus& F, long m) +{ + if (m < 1 || m > deg(F)*ZZ_pE::degree()) + LogicError("IrredPoly: bad args"); + + vec_ZZ_pE R; + R.SetLength(1); + R[0] = 1; + + vec_ZZ_p proj; + proj.SetLength(1); + proj[0] = 1; + + DoMinPolyTower(h, g, F, m, R, proj); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ_pEXFactoring.c b/thirdparty/linux/ntl/src/ZZ_pEXFactoring.c new file mode 100644 index 0000000000..7b19e55b78 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_pEXFactoring.c @@ -0,0 +1,1594 @@ + +#include +#include +#include +#include + + +NTL_START_IMPL + + + +static +void IterPower(ZZ_pE& c, const ZZ_pE& a, long n) +{ + ZZ_pE res; + + long i; + + res = a; + + for (i = 0; i < n; i++) + power(res, res, ZZ_p::modulus()); + + c = res; +} + + + +void SquareFreeDecomp(vec_pair_ZZ_pEX_long& u, const ZZ_pEX& ff) +{ + ZZ_pEX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SquareFreeDecomp: bad args"); + + ZZ_pEX r, t, v, tmp1; + long m, j, finished, done; + + u.SetLength(0); + + if (deg(f) == 0) + return; + + m = 1; + finished = 0; + + do { + j = 1; + diff(tmp1, f); + GCD(r, f, tmp1); + div(t, f, r); + + if (deg(t) > 0) { + done = 0; + do { + GCD(v, r, t); + div(tmp1, t, v); + if (deg(tmp1) > 0) append(u, cons(tmp1, j*m)); + if (deg(v) > 0) { + div(r, r, v); + t = v; + j++; + } + else + done = 1; + } while (!done); + if (deg(r) == 0) finished = 1; + } + + if (!finished) { + /* r is a p-th power */ + + long k, d; + long p = to_long(ZZ_p::modulus()); + + d = deg(r)/p; + f.rep.SetLength(d+1); + for (k = 0; k <= d; k++) + IterPower(f.rep[k], r.rep[k*p], ZZ_pE::degree()-1); + m = m*p; + } + } while (!finished); +} + + + +static +void AbsTraceMap(ZZ_pEX& h, const ZZ_pEX& a, const ZZ_pEXModulus& F) +{ + ZZ_pEX res, tmp; + + long k = NumBits(ZZ_pE::cardinality())-1; + + res = a; + tmp = a; + + long i; + for (i = 0; i < k-1; i++) { + SqrMod(tmp, tmp, F); + add(res, res, tmp); + } + + h = res; +} + +void FrobeniusMap(ZZ_pEX& h, const ZZ_pEXModulus& F) +{ + PowerXMod(h, ZZ_pE::cardinality(), F); +} + + +static +void RecFindRoots(vec_ZZ_pE& x, const ZZ_pEX& f) +{ + if (deg(f) == 0) return; + + if (deg(f) == 1) { + long k = x.length(); + x.SetLength(k+1); + negate(x[k], ConstTerm(f)); + return; + } + + ZZ_pEX h; + + ZZ_pEX r; + + + { + ZZ_pEXModulus F; + build(F, f); + + do { + random(r, deg(F)); + if (IsOdd(ZZ_pE::cardinality())) { + PowerMod(h, r, RightShift(ZZ_pE::cardinality(), 1), F); + sub(h, h, 1); + } + else { + AbsTraceMap(h, r, F); + } + GCD(h, h, f); + } while (deg(h) <= 0 || deg(h) == deg(f)); + } + + RecFindRoots(x, h); + div(h, f, h); + RecFindRoots(x, h); +} + +void FindRoots(vec_ZZ_pE& x, const ZZ_pEX& ff) +{ + ZZ_pEX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoots: bad args"); + + x.SetMaxLength(deg(f)); + x.SetLength(0); + RecFindRoots(x, f); +} + +void split(ZZ_pEX& f1, ZZ_pEX& g1, ZZ_pEX& f2, ZZ_pEX& g2, + const ZZ_pEX& f, const ZZ_pEX& g, + const vec_ZZ_pE& roots, long lo, long mid) +{ + long r = mid-lo+1; + + ZZ_pEXModulus F; + build(F, f); + + vec_ZZ_pE lroots(INIT_SIZE, r); + long i; + + for (i = 0; i < r; i++) + lroots[i] = roots[lo+i]; + + + ZZ_pEX h, a, d; + BuildFromRoots(h, lroots); + CompMod(a, h, g, F); + + + GCD(f1, a, f); + + div(f2, f, f1); + + rem(g1, g, f1); + rem(g2, g, f2); +} + +void RecFindFactors(vec_ZZ_pEX& factors, const ZZ_pEX& f, const ZZ_pEX& g, + const vec_ZZ_pE& roots, long lo, long hi) +{ + long r = hi-lo+1; + + if (r == 0) return; + + if (r == 1) { + append(factors, f); + return; + } + + ZZ_pEX f1, g1, f2, g2; + + long mid = (lo+hi)/2; + + split(f1, g1, f2, g2, f, g, roots, lo, mid); + + RecFindFactors(factors, f1, g1, roots, lo, mid); + RecFindFactors(factors, f2, g2, roots, mid+1, hi); +} + + +void FindFactors(vec_ZZ_pEX& factors, const ZZ_pEX& f, const ZZ_pEX& g, + const vec_ZZ_pE& roots) +{ + long r = roots.length(); + + factors.SetMaxLength(r); + factors.SetLength(0); + + RecFindFactors(factors, f, g, roots, 0, r-1); +} + +void IterFindFactors(vec_ZZ_pEX& factors, const ZZ_pEX& f, + const ZZ_pEX& g, const vec_ZZ_pE& roots) +{ + long r = roots.length(); + long i; + ZZ_pEX h; + + factors.SetLength(r); + + for (i = 0; i < r; i++) { + sub(h, g, roots[i]); + GCD(factors[i], f, h); + } +} + + +void TraceMap(ZZ_pEX& w, const ZZ_pEX& a, long d, const ZZ_pEXModulus& F, + const ZZ_pEX& b) + +{ + if (d < 0) LogicError("TraceMap: bad args"); + + ZZ_pEX y, z, t; + + z = b; + y = a; + clear(w); + + while (d) { + if (d == 1) { + if (IsZero(w)) + w = y; + else { + CompMod(w, w, z, F); + add(w, w, y); + } + } + else if ((d & 1) == 0) { + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else if (IsZero(w)) { + w = y; + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else { + Comp3Mod(z, t, w, z, y, w, z, F); + add(w, w, y); + add(y, t, y); + } + + d = d >> 1; + } +} + + +void PowerCompose(ZZ_pEX& y, const ZZ_pEX& h, long q, const ZZ_pEXModulus& F) +{ + if (q < 0) LogicError("PowerCompose: bad args"); + + ZZ_pEX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y); + + while (q) { + sw = 0; + + if (q > 1) sw = 2; + if (q & 1) { + if (IsX(y)) + y = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y, y, z, F); + break; + + case 2: + CompMod(z, z, z, F); + break; + + case 3: + Comp2Mod(y, z, y, z, z, F); + break; + } + + q = q >> 1; + } +} + + +long ProbIrredTest(const ZZ_pEX& f, long iter) +{ + long n = deg(f); + + if (n <= 0) return 0; + if (n == 1) return 1; + + ZZ_pEXModulus F; + + build(F, f); + + ZZ_pEX b, r, s; + + FrobeniusMap(b, F); + + long all_zero = 1; + + long i; + + for (i = 0; i < iter; i++) { + random(r, n); + TraceMap(s, r, n, F, b); + + all_zero = all_zero && IsZero(s); + + if (deg(s) > 0) return 0; + } + + if (!all_zero || (n & 1)) return 1; + + PowerCompose(s, b, n/2, F); + return !IsX(s); +} + + +NTL_CHEAP_THREAD_LOCAL long ZZ_pEX_BlockingFactor = 10; + + + + +void RootEDF(vec_ZZ_pEX& factors, const ZZ_pEX& f, long verbose) +{ + vec_ZZ_pE roots; + double t; + + if (verbose) { cerr << "finding roots..."; t = GetTime(); } + FindRoots(roots, f); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + long r = roots.length(); + factors.SetLength(r); + for (long j = 0; j < r; j++) { + SetX(factors[j]); + sub(factors[j], factors[j], roots[j]); + } +} + +void EDFSplit(vec_ZZ_pEX& v, const ZZ_pEX& f, const ZZ_pEX& b, long d) +{ + ZZ_pEX a, g, h; + ZZ_pEXModulus F; + vec_ZZ_pE roots; + + build(F, f); + long n = F.n; + long r = n/d; + random(a, n); + TraceMap(g, a, d, F, b); + MinPolyMod(h, g, F, r); + FindRoots(roots, h); + FindFactors(v, f, g, roots); +} + +void RecEDF(vec_ZZ_pEX& factors, const ZZ_pEX& f, const ZZ_pEX& b, long d, + long verbose) +{ + vec_ZZ_pEX v; + long i; + ZZ_pEX bb; + + if (verbose) cerr << "+"; + + EDFSplit(v, f, b, d); + for (i = 0; i < v.length(); i++) { + if (deg(v[i]) == d) { + append(factors, v[i]); + } + else { + ZZ_pEX bb; + rem(bb, b, v[i]); + RecEDF(factors, v[i], bb, d, verbose); + } + } +} + + +void EDF(vec_ZZ_pEX& factors, const ZZ_pEX& ff, const ZZ_pEX& bb, + long d, long verbose) + +{ + ZZ_pEX f = ff; + ZZ_pEX b = bb; + + if (!IsOne(LeadCoeff(f))) + LogicError("EDF: bad args"); + + long n = deg(f); + long r = n/d; + + if (r == 0) { + factors.SetLength(0); + return; + } + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (d == 1) { + RootEDF(factors, f, verbose); + return; + } + + + double t; + if (verbose) { + cerr << "computing EDF(" << d << "," << r << ")..."; + t = GetTime(); + } + + factors.SetLength(0); + + RecEDF(factors, f, b, d, verbose); + + if (verbose) cerr << (GetTime()-t) << "\n"; +} + + +void SFCanZass(vec_ZZ_pEX& factors, const ZZ_pEX& ff, long verbose) +{ + ZZ_pEX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SFCanZass: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + factors.SetLength(0); + + double t; + + + ZZ_pEXModulus F; + build(F, f); + + ZZ_pEX h; + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + FrobeniusMap(h, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + vec_pair_ZZ_pEX_long u; + if (verbose) { cerr << "computing DDF..."; t = GetTime(); } + NewDDF(u, f, h, verbose); + if (verbose) { + t = GetTime()-t; + cerr << "DDF time: " << t << "\n"; + } + + ZZ_pEX hh; + vec_ZZ_pEX v; + + long i; + for (i = 0; i < u.length(); i++) { + const ZZ_pEX& g = u[i].a; + long d = u[i].b; + long r = deg(g)/d; + + if (r == 1) { + // g is already irreducible + + append(factors, g); + } + else { + // must perform EDF + + if (d == 1) { + // root finding + RootEDF(v, g, verbose); + append(factors, v); + } + else { + // general case + rem(hh, h, g); + EDF(v, g, hh, d, verbose); + append(factors, v); + } + } + } +} + +void CanZass(vec_pair_ZZ_pEX_long& factors, const ZZ_pEX& f, long verbose) +{ + if (!IsOne(LeadCoeff(f))) + LogicError("CanZass: bad args"); + + double t; + vec_pair_ZZ_pEX_long sfd; + vec_ZZ_pEX x; + + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFCanZass(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + +void mul(ZZ_pEX& f, const vec_pair_ZZ_pEX_long& v) +{ + long i, j, n; + + n = 0; + for (i = 0; i < v.length(); i++) + n += v[i].b*deg(v[i].a); + + ZZ_pEX g(INIT_SIZE, n+1); + + set(g); + for (i = 0; i < v.length(); i++) + for (j = 0; j < v[i].b; j++) { + mul(g, g, v[i].a); + } + + f = g; +} + + +long BaseCase(const ZZ_pEX& h, long q, long a, const ZZ_pEXModulus& F) +{ + long b, e; + ZZ_pEX lh(INIT_SIZE, F.n); + + lh = h; + b = 1; + e = 0; + while (e < a-1 && !IsX(lh)) { + e++; + b *= q; + PowerCompose(lh, lh, q, F); + } + + if (!IsX(lh)) b *= q; + + return b; +} + + + +void TandemPowerCompose(ZZ_pEX& y1, ZZ_pEX& y2, const ZZ_pEX& h, + long q1, long q2, const ZZ_pEXModulus& F) +{ + ZZ_pEX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y1); + SetX(y2); + + while (q1 || q2) { + sw = 0; + + if (q1 > 1 || q2 > 1) sw = 4; + + if (q1 & 1) { + if (IsX(y1)) + y1 = z; + else + sw = sw | 2; + } + + if (q2 & 1) { + if (IsX(y2)) + y2 = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y2, y2, z, F); + break; + + case 2: + CompMod(y1, y1, z, F); + break; + + case 3: + Comp2Mod(y1, y2, y1, y2, z, F); + break; + + case 4: + CompMod(z, z, z, F); + break; + + case 5: + Comp2Mod(z, y2, z, y2, z, F); + break; + + case 6: + Comp2Mod(z, y1, z, y1, z, F); + break; + + case 7: + Comp3Mod(z, y1, y2, z, y1, y2, z, F); + break; + } + + q1 = q1 >> 1; + q2 = q2 >> 1; + } +} + + +long RecComputeDegree(long u, const ZZ_pEX& h, const ZZ_pEXModulus& F, + FacVec& fvec) +{ + if (IsX(h)) return 1; + + if (fvec[u].link == -1) return BaseCase(h, fvec[u].q, fvec[u].a, F); + + ZZ_pEX h1, h2; + long q1, q2, r1, r2; + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + r1 = RecComputeDegree(fvec[u].link, h2, F, fvec); + r2 = RecComputeDegree(fvec[u].link+1, h1, F, fvec); + return r1*r2; +} + + + + +long RecComputeDegree(const ZZ_pEX& h, const ZZ_pEXModulus& F) + // f = F.f is assumed to be an "equal degree" polynomial + // h = X^p mod f + // the common degree of the irreducible factors of f is computed +{ + if (F.n == 1 || IsX(h)) + return 1; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecComputeDegree(fvec.length()-1, h, F, fvec); +} + + +void FindRoot(ZZ_pE& root, const ZZ_pEX& ff) +// finds a root of ff. +// assumes that ff is monic and splits into distinct linear factors + +{ + ZZ_pEXModulus F; + ZZ_pEX h, h1, f; + ZZ_pEX r; + + f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoot: bad args"); + + if (deg(f) == 0) + LogicError("FindRoot: bad args"); + + + while (deg(f) > 1) { + build(F, f); + random(r, deg(F)); + if (IsOdd(ZZ_pE::cardinality())) { + PowerMod(h, r, RightShift(ZZ_pE::cardinality(), 1), F); + sub(h, h, 1); + } + else { + AbsTraceMap(h, r, F); + } + GCD(h, h, f); + if (deg(h) > 0 && deg(h) < deg(f)) { + if (deg(h) > deg(f)/2) + div(f, f, h); + else + f = h; + } + } + + negate(root, ConstTerm(f)); +} + + +static +long power(long a, long e) +{ + long i, res; + + res = 1; + for (i = 1; i <= e; i++) + res = res * a; + + return res; +} + + +static +long IrredBaseCase(const ZZ_pEX& h, long q, long a, const ZZ_pEXModulus& F) +{ + long e; + ZZ_pEX X, s, d; + + e = power(q, a-1); + PowerCompose(s, h, e, F); + SetX(X); + sub(s, s, X); + GCD(d, F.f, s); + return IsOne(d); +} + + +static +long RecIrredTest(long u, const ZZ_pEX& h, const ZZ_pEXModulus& F, + const FacVec& fvec) +{ + long q1, q2; + ZZ_pEX h1, h2; + + if (IsX(h)) return 0; + + if (fvec[u].link == -1) { + return IrredBaseCase(h, fvec[u].q, fvec[u].a, F); + } + + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + return RecIrredTest(fvec[u].link, h2, F, fvec) + && RecIrredTest(fvec[u].link+1, h1, F, fvec); +} + +long DetIrredTest(const ZZ_pEX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + ZZ_pEXModulus F; + + build(F, f); + + ZZ_pEX h; + + FrobeniusMap(h, F); + + ZZ_pEX s; + PowerCompose(s, h, F.n, F); + if (!IsX(s)) return 0; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecIrredTest(fvec.length()-1, h, F, fvec); +} + + + +long IterIrredTest(const ZZ_pEX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + ZZ_pEXModulus F; + + build(F, f); + + ZZ_pEX h; + + FrobeniusMap(h, F); + + long CompTableSize = 2*SqrRoot(deg(f)); + + ZZ_pEXArgument H; + + build(H, h, F, CompTableSize); + + long i, d, limit, limit_sqr; + ZZ_pEX g, X, t, prod; + + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = 2; + limit_sqr = limit*limit; + + set(prod); + + + while (2*d <= deg(f)) { + sub(t, g, X); + MulMod(prod, prod, t, F); + i++; + if (i == limit_sqr) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + + set(prod); + limit++; + limit_sqr = limit*limit; + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + CompMod(g, g, H, F); + } + } + + if (i > 0) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + } + + return 1; +} + +static +void MulByXPlusY(vec_ZZ_pEX& h, const ZZ_pEX& f, const ZZ_pEX& g) +// h represents the bivariate polynomial h[0] + h[1]*Y + ... + h[n-1]*Y^k, +// where the h[i]'s are polynomials in X, each of degree < deg(f), +// and k < deg(g). +// h is replaced by the bivariate polynomial h*(X+Y) (mod f(X), g(Y)). + +{ + long n = deg(g); + long k = h.length()-1; + + if (k < 0) return; + + if (k < n-1) { + h.SetLength(k+2); + h[k+1] = h[k]; + for (long i = k; i >= 1; i--) { + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + } + MulByXMod(h[0], h[0], f); + } + else { + ZZ_pEX b, t; + + b = h[n-1]; + for (long i = n-1; i >= 1; i--) { + mul(t, b, g.rep[i]); + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + sub(h[i], h[i], t); + } + mul(t, b, g.rep[0]); + MulByXMod(h[0], h[0], f); + sub(h[0], h[0], t); + } + + // normalize + + k = h.length()-1; + while (k >= 0 && IsZero(h[k])) k--; + h.SetLength(k+1); +} + + +static +void IrredCombine(ZZ_pEX& x, const ZZ_pEX& f, const ZZ_pEX& g) +{ + if (deg(f) < deg(g)) { + IrredCombine(x, g, f); + return; + } + + // deg(f) >= deg(g)...not necessary, but maybe a little more + // time & space efficient + + long df = deg(f); + long dg = deg(g); + long m = df*dg; + + vec_ZZ_pEX h(INIT_SIZE, dg); + + long i; + for (i = 0; i < dg; i++) h[i].SetMaxLength(df); + + h.SetLength(1); + set(h[0]); + + vec_ZZ_pE a; + + a.SetLength(2*m); + + for (i = 0; i < 2*m; i++) { + a[i] = ConstTerm(h[0]); + if (i < 2*m-1) + MulByXPlusY(h, f, g); + } + + MinPolySeq(x, a, m); +} + + +static +void BuildPrimePowerIrred(ZZ_pEX& f, long q, long e) +{ + long n = power(q, e); + + do { + random(f, n); + SetCoeff(f, n); + } while (!IterIrredTest(f)); +} + +static +void RecBuildIrred(ZZ_pEX& f, long u, const FacVec& fvec) +{ + if (fvec[u].link == -1) + BuildPrimePowerIrred(f, fvec[u].q, fvec[u].a); + else { + ZZ_pEX g, h; + RecBuildIrred(g, fvec[u].link, fvec); + RecBuildIrred(h, fvec[u].link+1, fvec); + IrredCombine(f, g, h); + } +} + + +void BuildIrred(ZZ_pEX& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in BuildIrred"); + + if (n == 1) { + SetX(f); + return; + } + + FacVec fvec; + + FactorInt(fvec, n); + + RecBuildIrred(f, fvec.length()-1, fvec); +} + + + +#if 0 +void BuildIrred(ZZ_pEX& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (n == 1) { + SetX(f); + return; + } + + ZZ_pEX g; + + do { + random(g, n); + SetCoeff(g, n); + } while (!IterIrredTest(g)); + + f = g; + +} +#endif + + + +void BuildRandomIrred(ZZ_pEX& f, const ZZ_pEX& g) +{ + ZZ_pEXModulus G; + ZZ_pEX h, ff; + + build(G, g); + do { + random(h, deg(g)); + IrredPolyMod(ff, h, G); + } while (deg(ff) < deg(g)); + + f = ff; +} + + +/************* NEW DDF ****************/ + +NTL_CHEAP_THREAD_LOCAL long ZZ_pEX_GCDTableSize = 4; +NTL_CHEAP_THREAD_LOCAL double ZZ_pEXFileThresh = NTL_FILE_THRESH; +static NTL_CHEAP_THREAD_LOCAL vec_ZZ_pEX *BabyStepFile=0; +static NTL_CHEAP_THREAD_LOCAL vec_ZZ_pEX *GiantStepFile=0; +static NTL_CHEAP_THREAD_LOCAL long use_files; + + +static +double CalcTableSize(long n, long k) +{ + double sz = ZZ_p::storage(); + sz = sz*ZZ_pE::degree(); + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_ZZ_p); + sz = sz*n; + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_ZZ_pE); + sz = sz * k; + sz = sz/1024; + return sz; +} + + +static +void GenerateBabySteps(ZZ_pEX& h1, const ZZ_pEX& f, const ZZ_pEX& h, long k, + FileList& flist, long verbose) + +{ + double t; + + if (verbose) { cerr << "generating baby steps..."; t = GetTime(); } + + ZZ_pEXModulus F; + build(F, f); + + ZZ_pEXArgument H; + +#if 0 + double n2 = sqrt(double(F.n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + + h1 = h; + + long i; + + if (!use_files) { + (*BabyStepFile).SetLength(k-1); + } + + for (i = 1; i <= k-1; i++) { + if (use_files) { + ofstream s; + OpenWrite(s, FileName("baby", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*BabyStepFile)(i) = h1; + + CompMod(h1, h1, H, F); + if (verbose) cerr << "+"; + } + + if (verbose) + cerr << (GetTime()-t) << "\n"; + +} + + +static +void GenerateGiantSteps(const ZZ_pEX& f, const ZZ_pEX& h, long l, + FileList& flist, long verbose) +{ + + double t; + + if (verbose) { cerr << "generating giant steps..."; t = GetTime(); } + + ZZ_pEXModulus F; + build(F, f); + + ZZ_pEXArgument H; + +#if 0 + double n2 = sqrt(double(F.n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + ZZ_pEX h1; + + h1 = h; + + long i; + + if (!use_files) { + (*GiantStepFile).SetLength(l); + } + + for (i = 1; i <= l-1; i++) { + if (use_files) { + ofstream s; + OpenWrite(s, FileName("giant", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*GiantStepFile)(i) = h1; + + CompMod(h1, h1, H, F); + if (verbose) cerr << "+"; + } + + if (use_files) { + ofstream s; + OpenWrite(s, FileName("giant", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*GiantStepFile)(i) = h1; + + if (verbose) + cerr << (GetTime()-t) << "\n"; + +} + + +static +void NewAddFactor(vec_pair_ZZ_pEX_long& u, const ZZ_pEX& g, long m, long verbose) +{ + long len = u.length(); + + u.SetLength(len+1); + u[len].a = g; + u[len].b = m; + + if (verbose) { + cerr << "split " << m << " " << deg(g) << "\n"; + } +} + + + + +static +void NewProcessTable(vec_pair_ZZ_pEX_long& u, ZZ_pEX& f, const ZZ_pEXModulus& F, + vec_ZZ_pEX& buf, long size, long StartInterval, + long IntervalLength, long verbose) + +{ + if (size == 0) return; + + ZZ_pEX& g = buf[size-1]; + + long i; + + for (i = 0; i < size-1; i++) + MulMod(g, g, buf[i], F); + + GCD(g, f, g); + + if (deg(g) == 0) return; + + div(f, f, g); + + long d = (StartInterval-1)*IntervalLength + 1; + i = 0; + long interval = StartInterval; + + while (i < size-1 && 2*d <= deg(g)) { + GCD(buf[i], buf[i], g); + if (deg(buf[i]) > 0) { + NewAddFactor(u, buf[i], interval, verbose); + div(g, g, buf[i]); + } + + i++; + interval++; + d += IntervalLength; + } + + if (deg(g) > 0) { + if (i == size-1) + NewAddFactor(u, g, interval, verbose); + else + NewAddFactor(u, g, (deg(g)+IntervalLength-1)/IntervalLength, verbose); + } +} + + +static +void FetchGiantStep(ZZ_pEX& g, long gs, const ZZ_pEXModulus& F) +{ + if (use_files) { + ifstream s; + OpenRead(s, FileName("giant", gs)); + NTL_INPUT_CHECK_ERR(s >> g); + } + else + g = (*GiantStepFile)(gs); + + + rem(g, g, F); +} + + +static +void FetchBabySteps(vec_ZZ_pEX& v, long k) +{ + v.SetLength(k); + + SetX(v[0]); + + long i; + for (i = 1; i <= k-1; i++) { + if (use_files) { + ifstream s; + OpenRead(s, FileName("baby", i)); + NTL_INPUT_CHECK_ERR(s >> v[i]); + } + else + v[i] = (*BabyStepFile)(i); + } +} + + + +static +void GiantRefine(vec_pair_ZZ_pEX_long& u, const ZZ_pEX& ff, long k, long l, + long verbose) + +{ + double t; + + if (verbose) { + cerr << "giant refine..."; + t = GetTime(); + } + + u.SetLength(0); + + vec_ZZ_pEX BabyStep; + + FetchBabySteps(BabyStep, k); + + vec_ZZ_pEX buf(INIT_SIZE, ZZ_pEX_GCDTableSize); + + ZZ_pEX f; + f = ff; + + ZZ_pEXModulus F; + build(F, f); + + ZZ_pEX g; + ZZ_pEX h; + + long size = 0; + + long first_gs; + + long d = 1; + + while (2*d <= deg(f)) { + + long old_n = deg(f); + + long gs = (d+k-1)/k; + long bs = gs*k - d; + + if (bs == k-1) { + size++; + if (size == 1) first_gs = gs; + FetchGiantStep(g, gs, F); + sub(buf[size-1], g, BabyStep[bs]); + } + else { + sub(h, g, BabyStep[bs]); + MulMod(buf[size-1], buf[size-1], h, F); + } + + if (verbose && bs == 0) cerr << "+"; + + if (size == ZZ_pEX_GCDTableSize && bs == 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + size = 0; + } + + d++; + + if (2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + + long i; + for (i = 1; i <= k-1; i++) + rem(BabyStep[i], BabyStep[i], F); + } + } + + if (size > 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + } + + if (deg(f) > 0) + NewAddFactor(u, f, 0, verbose); + + if (verbose) { + t = GetTime()-t; + cerr << "giant refine time: " << t << "\n"; + } +} + + +static +void IntervalRefine(vec_pair_ZZ_pEX_long& factors, const ZZ_pEX& ff, + long k, long gs, const vec_ZZ_pEX& BabyStep, long verbose) + +{ + vec_ZZ_pEX buf(INIT_SIZE, ZZ_pEX_GCDTableSize); + + ZZ_pEX f; + f = ff; + + ZZ_pEXModulus F; + build(F, f); + + ZZ_pEX g; + + FetchGiantStep(g, gs, F); + + long size = 0; + + long first_d; + + long d = (gs-1)*k + 1; + long bs = k-1; + + while (bs >= 0 && 2*d <= deg(f)) { + + long old_n = deg(f); + + if (size == 0) first_d = d; + rem(buf[size], BabyStep[bs], F); + sub(buf[size], buf[size], g); + size++; + + if (size == ZZ_pEX_GCDTableSize) { + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + size = 0; + } + + d++; + bs--; + + if (bs >= 0 && 2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + rem(g, g, F); + } + } + + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + + if (deg(f) > 0) + NewAddFactor(factors, f, deg(f), verbose); +} + + + + +static +void BabyRefine(vec_pair_ZZ_pEX_long& factors, const vec_pair_ZZ_pEX_long& u, + long k, long l, long verbose) + +{ + double t; + + if (verbose) { + cerr << "baby refine..."; + t = GetTime(); + } + + factors.SetLength(0); + + vec_ZZ_pEX BabyStep; + + long i; + for (i = 0; i < u.length(); i++) { + const ZZ_pEX& g = u[i].a; + long gs = u[i].b; + + if (gs == 0 || 2*((gs-1)*k+1) > deg(g)) + NewAddFactor(factors, g, deg(g), verbose); + else { + if (BabyStep.length() == 0) + FetchBabySteps(BabyStep, k); + IntervalRefine(factors, g, k, gs, BabyStep, verbose); + } + } + + if (verbose) { + t = GetTime()-t; + cerr << "baby refine time: " << t << "\n"; + } +} + + + + + + +void NewDDF(vec_pair_ZZ_pEX_long& factors, + const ZZ_pEX& f, + const ZZ_pEX& h, + long verbose) + +{ + if (!IsOne(LeadCoeff(f))) + LogicError("NewDDF: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(0); + append(factors, cons(f, 1L)); + return; + } + + long B = deg(f)/2; + long k = SqrRoot(B); + long l = (B+k-1)/k; + + ZZ_pEX h1; + + if (CalcTableSize(deg(f), k + l - 1) > ZZ_pEXFileThresh) + use_files = 1; + else + use_files = 0; + + + FileList flist; + + vec_ZZ_pEX local_BabyStepFile; + vec_ZZ_pEX local_GiantStepFile; + + BabyStepFile = &local_BabyStepFile; + GiantStepFile = &local_GiantStepFile; + + + GenerateBabySteps(h1, f, h, k, flist, verbose); + + GenerateGiantSteps(f, h1, l, flist, verbose); + + vec_pair_ZZ_pEX_long u; + GiantRefine(u, f, k, l, verbose); + BabyRefine(factors, u, k, l, verbose); +} + +long IterComputeDegree(const ZZ_pEX& h, const ZZ_pEXModulus& F) +{ + long n = deg(F); + + if (n == 1 || IsX(h)) return 1; + + long B = n/2; + long k = SqrRoot(B); + long l = (B+k-1)/k; + + + ZZ_pEXArgument H; + +#if 0 + double n2 = sqrt(double(n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + ZZ_pEX h1; + h1 = h; + + vec_ZZ_pEX baby; + baby.SetLength(k); + + SetX(baby[0]); + + long i; + + for (i = 1; i <= k-1; i++) { + baby[i] = h1; + CompMod(h1, h1, H, F); + if (IsX(h1)) return i+1; + } + + build(H, h1, F, sz); + + long j; + + for (j = 2; j <= l; j++) { + CompMod(h1, h1, H, F); + + for (i = k-1; i >= 0; i--) { + if (h1 == baby[i]) + return j*k-i; + } + } + + return n; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ_pEXTest.c b/thirdparty/linux/ntl/src/ZZ_pEXTest.c new file mode 100644 index 0000000000..964b861994 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_pEXTest.c @@ -0,0 +1,57 @@ + +#include +#include + +NTL_CLIENT + +int main() +{ + ZZ_p::init(to_ZZ(17)); + + ZZ_pX P; + BuildIrred(P, 10); + + ZZ_pE::init(P); + + ZZ_pEX f, g, h; + + random(f, 20); + SetCoeff(f, 20); + + random(h, 20); + + g = MinPolyMod(h, f); + + if (deg(g) < 0) TerminalError("bad ZZ_pEXTest (1)"); + if (CompMod(g, h, f) != 0) + TerminalError("bad ZZ_pEXTest (2)"); + + + + vec_pair_ZZ_pEX_long v; + + long j; + for (j = 0; j < 5; j++) { + long n = RandomBnd(40)+10; + cerr << n << " "; + + random(f, n); + SetCoeff(f, n); + + v = CanZass(f); + + g = mul(v); + if (f != g) cerr << "oops1\n"; + + long i; + for (i = 0; i < v.length(); i++) + if (!DetIrredTest(v[i].a)) + TerminalError("bad ZZ_pEXTest (3)"); + + + } + + cerr << "\n"; + + cerr << "ZZ_pEXTest OK\n"; +} diff --git a/thirdparty/linux/ntl/src/ZZ_pX.c b/thirdparty/linux/ntl/src/ZZ_pX.c new file mode 100644 index 0000000000..3e7c23cc91 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_pX.c @@ -0,0 +1,4036 @@ +#include +#include +#include + + +// The mul & sqr routines use routines from ZZX, +// which is faster for small degree polynomials. +// Define this macro to revert to old strategy. + + +#ifndef NTL_WIZARD_HACK + +#include + +#endif + + + +#if (defined(NTL_GMP_LIP)) +#define KARX 200 +#else +#define KARX 80 +#endif + + + +NTL_START_IMPL + + + + +const ZZ_pX& ZZ_pX::zero() +{ + static const ZZ_pX z; // GLOBAL (relies on C++11 thread-safe init) + return z; +} + + +ZZ_pX& ZZ_pX::operator=(long a) +{ + conv(*this, a); + return *this; +} + + +ZZ_pX& ZZ_pX::operator=(const ZZ_p& a) +{ + conv(*this, a); + return *this; +} + + +istream& operator>>(istream& s, ZZ_pX& x) +{ + NTL_INPUT_CHECK_RET(s, s >> x.rep); + x.normalize(); + return s; +} + +ostream& operator<<(ostream& s, const ZZ_pX& a) +{ + return s << a.rep; +} + + +void ZZ_pX::normalize() +{ + long n; + const ZZ_p* p; + + n = rep.length(); + if (n == 0) return; + p = rep.elts() + n; + while (n > 0 && IsZero(*--p)) { + n--; + } + rep.SetLength(n); +} + + +long IsZero(const ZZ_pX& a) +{ + return a.rep.length() == 0; +} + + +long IsOne(const ZZ_pX& a) +{ + return a.rep.length() == 1 && IsOne(a.rep[0]); +} + +void GetCoeff(ZZ_p& x, const ZZ_pX& a, long i) +{ + if (i < 0 || i > deg(a)) + clear(x); + else + x = a.rep[i]; +} + +void SetCoeff(ZZ_pX& x, long i, const ZZ_p& a) +{ + long j, m; + + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m && IsZero(a)) return; + + if (i > m) { + /* careful: a may alias a coefficient of x */ + + long alloc = x.rep.allocated(); + + if (alloc > 0 && i >= alloc) { + NTL_ZZ_pRegister(aa); + aa = a; + x.rep.SetLength(i+1); + x.rep[i] = aa; + } + else { + x.rep.SetLength(i+1); + x.rep[i] = a; + } + + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + else + x.rep[i] = a; + + x.normalize(); +} + +void SetCoeff(ZZ_pX& x, long i, long a) +{ + if (a == 1) + SetCoeff(x, i); + else { + NTL_ZZ_pRegister(T); + conv(T, a); + SetCoeff(x, i, T); + } +} + +void SetCoeff(ZZ_pX& x, long i) +{ + long j, m; + + if (i < 0) + LogicError("coefficient index out of range"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + set(x.rep[i]); + x.normalize(); +} + + +void SetX(ZZ_pX& x) +{ + clear(x); + SetCoeff(x, 1); +} + + +long IsX(const ZZ_pX& a) +{ + return deg(a) == 1 && IsOne(LeadCoeff(a)) && IsZero(ConstTerm(a)); +} + + + +const ZZ_p& coeff(const ZZ_pX& a, long i) +{ + if (i < 0 || i > deg(a)) + return ZZ_p::zero(); + else + return a.rep[i]; +} + + +const ZZ_p& LeadCoeff(const ZZ_pX& a) +{ + if (IsZero(a)) + return ZZ_p::zero(); + else + return a.rep[deg(a)]; +} + +const ZZ_p& ConstTerm(const ZZ_pX& a) +{ + if (IsZero(a)) + return ZZ_p::zero(); + else + return a.rep[0]; +} + + + +void conv(ZZ_pX& x, const ZZ_p& a) +{ + if (IsZero(a)) + x.rep.SetLength(0); + else { + x.rep.SetLength(1); + x.rep[0] = a; + + // note: if a aliases x.rep[i], i > 0, this code + // will still work, since is is assumed that + // SetLength(1) will not relocate or destroy x.rep[i] + } +} + +void conv(ZZ_pX& x, long a) +{ + if (a == 0) + clear(x); + else if (a == 1) + set(x); + else { + NTL_ZZ_pRegister(T); + conv(T, a); + conv(x, T); + } +} + +void conv(ZZ_pX& x, const ZZ& a) +{ + if (IsZero(a)) + clear(x); + else { + NTL_ZZ_pRegister(T); + conv(T, a); + conv(x, T); + } +} + +void conv(ZZ_pX& x, const vec_ZZ_p& a) +{ + x.rep = a; + x.normalize(); +} + + +void add(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const ZZ_p *ap, *bp; + ZZ_p* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + add(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab && &x != &b) + for (i = db-minab; i; i--, xp++, bp++) + *xp = *bp; + else + x.normalize(); +} + + +void add(ZZ_pX& x, const ZZ_pX& a, const ZZ_p& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (&x == &a) { + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ_p *xp = x.rep.elts(); + add(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ_p *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + +void add(ZZ_pX& x, const ZZ_pX& a, long b) +{ + if (a.rep.length() == 0) { + conv(x, b); + } + else { + if (&x != &a) x = a; + add(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + +void sub(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const ZZ_p *ap, *bp; + ZZ_p* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + sub(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab) + for (i = db-minab; i; i--, xp++, bp++) + negate(*xp, *bp); + else + x.normalize(); + +} + +void sub(ZZ_pX& x, const ZZ_pX& a, const ZZ_p& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + negate(x, x); + } + else if (&x == &a) { + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + ZZ_p *xp = x.rep.elts(); + sub(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const ZZ_p *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + +void sub(ZZ_pX& x, const ZZ_pX& a, long b) +{ + if (b == 0) { + x = a; + return; + } + + if (a.rep.length() == 0) { + x.rep.SetLength(1); + x.rep[0] = b; + negate(x.rep[0], x.rep[0]); + } + else { + if (&x != &a) x = a; + sub(x.rep[0], x.rep[0], b); + } + x.normalize(); +} + +void sub(ZZ_pX& x, const ZZ_p& a, const ZZ_pX& b) +{ + NTL_ZZ_pRegister(T); + T = a; + + negate(x, b); + add(x, x, T); +} + +void sub(ZZ_pX& x, long a, const ZZ_pX& b) +{ + NTL_ZZ_pRegister(T); + T = a; + + negate(x, b); + add(x, x, T); +} + +void negate(ZZ_pX& x, const ZZ_pX& a) +{ + long n = a.rep.length(); + x.rep.SetLength(n); + + const ZZ_p* ap = a.rep.elts(); + ZZ_p* xp = x.rep.elts(); + long i; + + for (i = n; i; i--, ap++, xp++) + negate((*xp), (*ap)); + +} + + +#ifndef NTL_WIZARD_HACK + + +void mul(ZZ_pX& c, const ZZ_pX& a, const ZZ_pX& b) +{ + if (IsZero(a) || IsZero(b)) { + clear(c); + return; + } + + if (&a == &b) { + sqr(c, a); + return; + } + + long k = ZZ_p::ModulusSize(); + long s = min(deg(a), deg(b)) + 1; + + if (s == 1 || (k == 1 && s < 40) || (k == 2 && s < 20) || + (k == 3 && s < 12) || (k <= 5 && s < 8) || + (k <= 12 && s < 4) ) { + PlainMul(c, a, b); + } + else if (s < KARX) { + ZZX A, B, C; + conv(A, a); + conv(B, b); + KarMul(C, A, B); + conv(c, C); + } + else { + long mbits; + mbits = NumBits(ZZ_p::modulus()); + + long nt = 1; + // FIXME: needs to be updated when I thread-enable the SS + // mul routine + +#ifdef NTL_THREAD_BOOST + BasicThreadPool *pool = GetThreadPool(); + if (pool && !pool->active()) nt = pool->NumThreads(); +#endif + + double rat = SSRatio(deg(a), mbits, deg(b), mbits); + + if ( nt == 1 && ( + + (k >= 53 && rat < 1.10) || + (k >= 106 && rat < 1.30) || + (k >= 212 && rat < 1.75) + + )) { + ZZX A, B, C; + conv(A, a); + conv(B, b); + SSMul(C, A, B); + conv(c, C); + } + else { + FFTMul(c, a, b); + } + } +} + +void sqr(ZZ_pX& c, const ZZ_pX& a) +{ + if (IsZero(a)) { + clear(c); + return; + } + + long k = ZZ_p::ModulusSize(); + long s = deg(a) + 1; + + if (s == 1 || (k == 1 && s < 50) || (k == 2 && s < 25) || + (k == 3 && s < 25) || (k <= 6 && s < 12) || + (k <= 8 && s < 8) || (k == 9 && s < 6) || + (k <= 30 && s < 4) ) { + + PlainSqr(c, a); + } + else if (s < 80) { + ZZX C, A; + conv(A, a); + KarSqr(C, A); + conv(c, C); + } + else { + long mbits; + mbits = NumBits(ZZ_p::modulus()); + + + long nt = 1; + // FIXME: needs to be updated when I thread-enable the SS + // mul routine + +#ifdef NTL_THREAD_BOOST + BasicThreadPool *pool = GetThreadPool(); + if (pool && !pool->active()) nt = pool->NumThreads(); +#endif + + double rat = SSRatio(deg(a), mbits, deg(a), mbits); + + if ( nt == 1 && ( + + (k >= 53 && rat < 1.10) || + (k >= 106 && rat < 1.30) || + (k >= 212 && rat < 1.75) + + )) { + ZZX A, C; + conv(A, a); + SSSqr(C, A); + conv(c, C); + } + else { + FFTSqr(c, a); + } + } +} + +#else + +void mul(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b) +{ + if (&a == &b) { + sqr(x, a); + return; + } + + if (deg(a) > NTL_ZZ_pX_FFT_CROSSOVER && deg(b) > NTL_ZZ_pX_FFT_CROSSOVER) + FFTMul(x, a, b); + else + PlainMul(x, a, b); +} + +void sqr(ZZ_pX& x, const ZZ_pX& a) +{ + if (deg(a) > NTL_ZZ_pX_FFT_CROSSOVER) + FFTSqr(x, a); + else + PlainSqr(x, a); +} + + +#endif + + +void PlainMul(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b) +{ + long da = deg(a); + long db = deg(b); + + if (da < 0 || db < 0) { + clear(x); + return; + } + + if (da == 0) { + mul(x, b, a.rep[0]); + return; + } + + if (db == 0) { + mul(x, a, b.rep[0]); + return; + } + + long d = da+db; + + + + const ZZ_p *ap, *bp; + ZZ_p *xp; + + ZZ_pX la, lb; + + if (&x == &a) { + la = a; + ap = la.rep.elts(); + } + else + ap = a.rep.elts(); + + if (&x == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + x.rep.SetLength(d+1); + + xp = x.rep.elts(); + + long i, j, jmin, jmax; + NTL_ZZRegister(t); + NTL_ZZRegister(accum); + + for (i = 0; i <= d; i++) { + jmin = max(0, i-db); + jmax = min(da, i); + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, rep(ap[j]), rep(bp[i-j])); + add(accum, accum, t); + } + conv(xp[i], accum); + } + x.normalize(); +} + +void PlainSqr(ZZ_pX& x, const ZZ_pX& a) +{ + long da = deg(a); + + if (da < 0) { + clear(x); + return; + } + + long d = 2*da; + + const ZZ_p *ap; + ZZ_p *xp; + + ZZ_pX la; + + if (&x == &a) { + la = a; + ap = la.rep.elts(); + } + else + ap = a.rep.elts(); + + + x.rep.SetLength(d+1); + + xp = x.rep.elts(); + + long i, j, jmin, jmax; + long m, m2; + NTL_ZZRegister(t); + NTL_ZZRegister(accum); + + for (i = 0; i <= d; i++) { + jmin = max(0, i-da); + jmax = min(da, i); + m = jmax - jmin + 1; + m2 = m >> 1; + jmax = jmin + m2 - 1; + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, rep(ap[j]), rep(ap[i-j])); + add(accum, accum, t); + } + add(accum, accum, accum); + if (m & 1) { + sqr(t, rep(ap[jmax + 1])); + add(accum, accum, t); + } + + conv(xp[i], accum); + } + + x.normalize(); +} + +void PlainDivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_p *bp; + ZZ_p *qp; + ZZ *xp; + + + ZZ_p LCInv, t; + NTL_ZZRegister(s); + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + ZZ_pX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + ZZVec x(da + 1, ZZ_p::ExtendedModulusSize()); + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainRem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b, ZZVec& x) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_p *bp; + ZZ *xp; + + + ZZ_p LCInv, t; + NTL_ZZRegister(s); + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainDivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b, ZZVec& x) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_p *bp; + ZZ_p *qp; + ZZ *xp; + + + ZZ_p LCInv, t; + NTL_ZZRegister(s); + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + ZZ_pX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainDiv(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_p *bp; + ZZ_p *qp; + ZZ *xp; + + + ZZ_p LCInv, t; + NTL_ZZRegister(s); + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pX: division by zero"); + + if (da < db) { + clear(q); + return; + } + + ZZ_pX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + ZZVec x(da + 1 - db, ZZ_p::ExtendedModulusSize()); + + for (i = db; i <= da; i++) + x[i-db] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + long lastj = max(0, db-i); + + for (j = db-1; j >= lastj; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j-db], xp[i+j-db], s); + } + } +} + +void PlainRem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b) +{ + long da, db, dq, i, j, LCIsOne; + const ZZ_p *bp; + ZZ *xp; + + + ZZ_p LCInv, t; + NTL_ZZRegister(s); + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("ZZ_pX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + ZZVec x(da + 1, ZZ_p::ExtendedModulusSize()); + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + + +NTL_TBDECL_static(MulAux)(ZZ_p* xp, const ZZ_p* ap, const ZZ_p& t, long n) +{ + for (long i = 0; i < n; i++) + mul(xp[i], ap[i], t); +} + +#ifdef NTL_THREAD_BOOST +static void MulAux(ZZ_p* xp, const ZZ_p* ap, const ZZ_p& t, long n) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_MulAux(xp, ap, t, n); + return; + } + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(n, + [xp, ap, &t, &local_context](long first, long last) { + local_context.restore(); + for (long i = first; i < last; i++) + mul(xp[i], ap[i], t); + } ); +} +#endif + + + +void mul(ZZ_pX& x, const ZZ_pX& a, const ZZ_p& b) +{ + if (IsZero(b)) { + clear(x); + return; + } + + if (IsOne(b)) { + x = a; + return; + } + + NTL_ZZ_pRegister(t); + + long da; + + const ZZ_p *ap; + ZZ_p* xp; + + t = b; + + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + MulAux(xp, ap, t, da+1); + + x.normalize(); +} + +void mul(ZZ_pX& x, const ZZ_pX& a, long b) +{ + NTL_ZZ_pRegister(T); + conv(T, b); + mul(x, a, T); +} + + +void PlainGCD(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b) +{ + ZZ_p t; + + if (IsZero(b)) + x = a; + else if (IsZero(a)) + x = b; + else { + long n = max(deg(a),deg(b)) + 1; + ZZ_pX u(INIT_SIZE, n), v(INIT_SIZE, n); + ZZVec tmp(n, ZZ_p::ExtendedModulusSize()); + + u = a; + v = b; + do { + PlainRem(u, u, v, tmp); + swap(u, v); + } while (!IsZero(v)); + + x = u; + } + + if (IsZero(x)) return; + if (IsOne(LeadCoeff(x))) return; + + /* make gcd monic */ + + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + + + + +void PlainXGCD(ZZ_pX& d, ZZ_pX& s, ZZ_pX& t, const ZZ_pX& a, const ZZ_pX& b) +{ + ZZ_p z; + + + if (IsZero(b)) { + set(s); + clear(t); + d = a; + } + else if (IsZero(a)) { + clear(s); + set(t); + d = b; + } + else { + long e = max(deg(a), deg(b)) + 1; + + ZZ_pX temp(INIT_SIZE, e), u(INIT_SIZE, e), v(INIT_SIZE, e), + u0(INIT_SIZE, e), v0(INIT_SIZE, e), + u1(INIT_SIZE, e), v1(INIT_SIZE, e), + u2(INIT_SIZE, e), v2(INIT_SIZE, e), q(INIT_SIZE, e); + + + set(u1); clear(v1); + clear(u2); set(v2); + u = a; v = b; + + do { + DivRem(q, u, u, v); + swap(u, v); + u0 = u2; + v0 = v2; + mul(temp, q, u2); + sub(u2, u1, temp); + mul(temp, q, v2); + sub(v2, v1, temp); + u1 = u0; + v1 = v0; + } while (!IsZero(v)); + + d = u; + s = u1; + t = v1; + } + + if (IsZero(d)) return; + if (IsOne(LeadCoeff(d))) return; + + /* make gcd monic */ + + inv(z, LeadCoeff(d)); + mul(d, d, z); + mul(s, s, z); + mul(t, t, z); +} + + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, const ZZ_pX& f) +{ + if (deg(a) >= deg(f) || deg(b) >= deg(f) || deg(f) == 0) + LogicError("MulMod: bad args"); + + ZZ_pX t; + + mul(t, a, b); + rem(x, t, f); +} + +void SqrMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("SqrMod: bad args"); + + ZZ_pX t; + + sqr(t, a); + rem(x, t, f); +} + + +void InvMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvMod: bad args"); + + ZZ_pX d, t; + + XGCD(d, x, t, a, f); + if (!IsOne(d)) + InvModError("ZZ_pX InvMod: can't compute multiplicative inverse"); +} + +long InvModStatus(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvModStatus: bad args"); + ZZ_pX d, t; + + XGCD(d, x, t, a, f); + if (!IsOne(d)) { + x = d; + return 1; + } + else + return 0; +} + + +NTL_TBDECL_static(MulByXModAux1)(long n, ZZ_p *hh, const ZZ_p* aa, const ZZ_p *ff, const ZZ_p& z) +{ + NTL_ZZ_pRegister(t); + + for (long i = n-1; i >= 1; i--) { + // hh[i] = aa[i-1] + z*ff[i] + mul(t, z, ff[i]); + add(hh[i], aa[i-1], t); + } +} + +#ifdef NTL_THREAD_BOOST + +static void MulByXModAux1(long n, ZZ_p *hh, const ZZ_p* aa, const ZZ_p *ff, const ZZ_p& z) +{ + + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1 || hh == aa) { + // Careful! can't parallelize if hh == aa + basic_MulByXModAux1(n, hh, aa, ff, z); + return; + } + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(n-1, + [n, hh, aa, ff, &z, &local_context] + (long first, long last) { + local_context.restore(); + NTL_ZZ_pRegister(t); + + for (long idx = first; idx < last; idx++) { + long i = n-1-idx; + // hh[i] = aa[i-1] + z*ff[i] + mul(t, z, ff[i]); + add(hh[i], aa[i-1], t); + } + } ); +} + + +#endif + + +static +void MulByXModAux(ZZ_pX& h, const ZZ_pX& a, const ZZ_pX& f) +{ + long i, n, m; + ZZ_p* hh; + const ZZ_p *aa, *ff; + + NTL_ZZ_pRegister(z); + + n = deg(f); + m = deg(a); + + if (m >= n || n == 0) LogicError("MulByXMod: bad args"); + + if (m < 0) { + clear(h); + return; + } + + if (m < n-1) { + h.rep.SetLength(m+2); + hh = h.rep.elts(); + aa = a.rep.elts(); + for (i = m+1; i >= 1; i--) + hh[i] = aa[i-1]; + clear(hh[0]); + } + else { + h.rep.SetLength(n); + hh = h.rep.elts(); + aa = a.rep.elts(); + ff = f.rep.elts(); + negate(z, aa[n-1]); + if (!IsOne(ff[n])) + div(z, z, ff[n]); + + MulByXModAux1(n, hh, aa, ff, z); + + mul(hh[0], z, ff[0]); + h.normalize(); + } +} + + +void MulByXMod(ZZ_pX& h, const ZZ_pX& a, const ZZ_pX& f) +{ + if (&h == &f) { + ZZ_pX hh; + MulByXModAux(hh, a, f); + h = hh; + } + else + MulByXModAux(h, a, f); +} + + + +void random(ZZ_pX& x, long n) +{ + long i; + + x.rep.SetLength(n); + + for (i = 0; i < n; i++) + random(x.rep[i]); + + x.normalize(); +} + + +void FFTRep::DoSetSize(long NewK, long NewNumPrimes) +{ + + if (NewK < -1) LogicError("bad arg to FFTRep::SetSize()"); + + if (NewK >= NTL_BITS_PER_LONG-1) + ResourceError("bad arg to FFTRep::SetSize()"); + + if (NewK == -1) { + k = -1; + return; + } + + if (NewNumPrimes == 0) { + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + NewNumPrimes = FFTInfo->NumPrimes; + } + + if (MaxK >= 0 && NumPrimes != NewNumPrimes) + LogicError("FFTRep: inconsistent use"); + + if (NewK <= MaxK) { + k = NewK; + return; + } + + tbl.SetDims(NewNumPrimes, 1L << NewK); + NumPrimes = NewNumPrimes; + k = MaxK = NewK; +} + +void FFTRep::SetSize(long NewK) +{ + DoSetSize(NewK, 0); +} + + +FFTRep& FFTRep::operator=(const FFTRep& R) +{ + if (this == &R) return *this; + + if (MaxK >= 0 && R.MaxK >= 0 && NumPrimes != R.NumPrimes) + LogicError("FFTRep: inconsistent use"); + + if (R.k < 0) { + k = -1; + return *this; + } + + DoSetSize(R.k, R.NumPrimes); + long i, j, n; + + n = 1L << k; + + for (i = 0; i < NumPrimes; i++) + for (j = 0; j < n; j++) + tbl[i][j] = R.tbl[i][j]; + + return *this; +} + + + +void ZZ_pXModRep::SetSize(long NewN) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + + if (NewN < 0) + LogicError("bad arg to ZZ_pXModRep::SetSize()"); + + if (NewN <= MaxN) { + n = NewN; + return; + } + + tbl.SetDims(FFTInfo->NumPrimes, NewN); + n = MaxN = NewN; + NumPrimes = FFTInfo->NumPrimes; +} + + + +// FIXME: maybe I could put this is scratch space associated +// with the current modulus +static inline +vec_long& ModularRepBuf() +{ + NTL_TLS_LOCAL(vec_long, t); + return t; +} + + +void ToModularRep(vec_long& x, const ZZ_p& a, const ZZ_pFFTInfoT *FFTInfo, + ZZ_pTmpSpaceT *TmpSpace) +{ + FFTInfo->rem_struct.eval(&x[0], rep(a), TmpSpace->rem_tmp_vec); +} + + +void FromModularRep(ZZ_p& x, vec_long& avec, const ZZ_pFFTInfoT *FFTInfo, + ZZ_pTmpSpaceT *TmpSpace) +// NOTE: a gets destroyed + +{ + NTL_ZZRegister(t); + long * NTL_RESTRICT a = avec.elts(); + + if (FFTInfo->crt_struct.special()) { + FFTInfo->crt_struct.eval(t, a, TmpSpace->crt_tmp_vec); + x.LoopHole() = t; + return; + } + + long nprimes = FFTInfo->NumPrimes; + const long *u = FFTInfo->u.elts(); + const long *prime = FFTInfo->prime.elts(); + const mulmod_precon_t *uqinv = FFTInfo->uqinv.elts(); + const double *prime_recip = FFTInfo->prime_recip.elts(); + + double y = 0.0; + + for (long i = 0; i < nprimes; i++) { + long r = MulModPrecon(a[i], u[i], prime[i], uqinv[i]); + a[i] = r; + y += double(r)*prime_recip[i]; + } + + long q = long(y + 0.5); + + FFTInfo->crt_struct.eval(t, a, TmpSpace->crt_tmp_vec); + + MulAddTo(t, FFTInfo->MinusMModP, q); + // TODO: this MulAddTo could be folded into the above + // crt_struct.eval as just another product to accumulate... + // but, savings would be marginal and a number of interfaces + // would have to be modified... + + // montgomery + FFTInfo->reduce_struct.eval(x.LoopHole(), t); +} + + + + + +NTL_TBDECL(ToFFTRep)(FFTRep& y, const ZZ_pX& x, long k, long lo, long hi) +// computes an n = 2^k point convolution. +// if deg(x) >= 2^k, then x is first reduced modulo X^n-1. +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + + + long n, i, j, m, j1; + vec_long& t = ModularRepBuf(); + + + if (k > FFTInfo->MaxRoot) + ResourceError("Polynomial too big for FFT"); + + if (lo < 0) + LogicError("bad arg to ToFFTRep"); + + long nprimes = FFTInfo->NumPrimes; + t.SetLength(nprimes); + + hi = min(hi, deg(x)); + + y.SetSize(k); + + n = 1L << k; + + m = max(hi-lo + 1, 0); + + const ZZ_p *xx = x.rep.elts(); + + if (n >= m) { + for (j = 0; j < m; j++) { + ToModularRep(t, xx[j+lo], FFTInfo, TmpSpace); + for (i = 0; i < nprimes; i++) { + y.tbl[i][j] = t[i]; + } + } + + if (n > m) { + for (i = 0; i < nprimes; i++) { + long *yp = &y.tbl[i][0]; + for (j = m; j < n; j++) { + yp[j] = 0; + } + } + } + } + else { + NTL_ZZ_pRegister(accum); + for (j = 0; j < n; j++) { + accum = xx[j+lo]; + for (j1 = j + n; j1 < m; j1 += n) + add(accum, accum, xx[j1+lo]); + ToModularRep(t, accum, FFTInfo, TmpSpace); + for (i = 0; i < nprimes; i++) { + y.tbl[i][j] = t[i]; + } + } + } + + // FIXME: something to think about...part of the above logic + // is essentially a matrix transpose, which could lead to bad + // cache performance. I don't really know if that is an issue. + + for (i = 0; i < nprimes; i++) { + long *yp = &y.tbl[i][0]; + FFTFwd(yp, yp, k, i); + } +} + + +#ifdef NTL_THREAD_BOOST + +void ToFFTRep(FFTRep& y, const ZZ_pX& x, long k, long lo, long hi) +// computes an n = 2^k point convolution. +// if deg(x) >= 2^k, then x is first reduced modulo X^n-1. +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_ToFFTRep(y, x, k, lo, hi); + return; + } + + + + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long n, m; + + + if (k > FFTInfo->MaxRoot) + ResourceError("Polynomial too big for FFT"); + + if (lo < 0) + LogicError("bad arg to ToFFTRep"); + + long nprimes = FFTInfo->NumPrimes; + + hi = min(hi, deg(x)); + + y.SetSize(k); + + n = 1L << k; + + m = max(hi-lo + 1, 0); + + const ZZ_p *xx = x.rep.elts(); + + + ZZ_pContext local_context; + local_context.save(); + + if (n >= m) { + pool->exec_range(m, + [lo, xx, &y, nprimes, &local_context, FFTInfo] + (long first, long last) { + + local_context.restore(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + // TmpSpace is thread local! + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + for (long j = first; j < last; j++) { + ToModularRep(t, xx[j+lo], FFTInfo, TmpSpace); + for (long i = 0; i < nprimes; i++) { + y.tbl[i][j] = t[i]; + } + } + } ); + } + else { + pool->exec_range(n, + [lo, m, n, xx, &y, nprimes, &local_context, FFTInfo] + (long first, long last) { + local_context.restore(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + // TmpSpace is thread local! + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + NTL_ZZ_pRegister(accum); + for (long j = first; j < last; j++) { + accum = xx[j+lo]; + for (long j1 = j + n; j1 < m; j1 += n) + add(accum, accum, xx[j1+lo]); + ToModularRep(t, accum, FFTInfo, TmpSpace); + for (long i = 0; i < nprimes; i++) { + y.tbl[i][j] = t[i]; + } + } + } ); + } + + // FIXME: something to think about...part of the above logic + // is essentially a matrix transpose, which could lead to bad + // cache performance. I don't really know if that is an issue. + + pool->exec_range(nprimes, + [&y, m, n, k](long first, long last) { + for (long i = first; i < last; i++) { + long *yp = &y.tbl[i][0]; + for (long j = m; j < n; j++) yp[j] = 0; + FFTFwd(yp, yp, k, i); + } + } ); +} + +#endif + + + +NTL_TBDECL(RevToFFTRep)(FFTRep& y, const vec_ZZ_p& x, + long k, long lo, long hi, long offset) +// computes an n = 2^k point convolution of X^offset*x[lo..hi] mod X^n-1 +// using "inverted" evaluation points. + +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + + + long n, i, j, m, j1; + vec_long& t = ModularRepBuf(); + NTL_ZZ_pRegister(accum); + + if (k > FFTInfo->MaxRoot) + ResourceError("Polynomial too big for FFT"); + + if (lo < 0) + LogicError("bad arg to ToFFTRep"); + + long nprimes = FFTInfo->NumPrimes; + t.SetLength(nprimes); + + hi = min(hi, x.length()-1); + + y.SetSize(k); + + n = 1L << k; + + m = max(hi-lo + 1, 0); + + const ZZ_p *xx = x.elts(); + + offset = offset & (n-1); + + for (j = 0; j < n; j++) { + if (j >= m) { + for (i = 0; i < nprimes; i++) + y.tbl[i][offset] = 0; + } + else { + accum = xx[j+lo]; + for (j1 = j + n; j1 < m; j1 += n) + add(accum, accum, xx[j1+lo]); + ToModularRep(t, accum, FFTInfo, TmpSpace); + for (i = 0; i < nprimes; i++) { + y.tbl[i][offset] = t[i]; + + } + } + + offset = (offset + 1) & (n-1); + } + + + for (i = 0; i < nprimes; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + +} + + + +#ifdef NTL_THREAD_BOOST + +void RevToFFTRep(FFTRep& y, const vec_ZZ_p& x, + long k, long lo, long hi, long offset) +// computes an n = 2^k point convolution of X^offset*x[lo..hi] mod X^n-1 +// using "inverted" evaluation points. + +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_RevToFFTRep(y, x, k, lo, hi, offset); + return; + } + + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long n, m; + + if (k > FFTInfo->MaxRoot) + ResourceError("Polynomial too big for FFT"); + + if (lo < 0) + LogicError("bad arg to ToFFTRep"); + + long nprimes = FFTInfo->NumPrimes; + + hi = min(hi, x.length()-1); + + y.SetSize(k); + + n = 1L << k; + + m = max(hi-lo + 1, 0); + + const ZZ_p *xx = x.elts(); + + offset = offset & (n-1); + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(n, + [lo, m, n, offset, xx, &y, nprimes, &local_context, FFTInfo] + (long first, long last) { + + local_context.restore(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + // TmpSpace is thread local! + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + long local_offset = (offset + first) & (n-1); + + NTL_ZZ_pRegister(accum); + + for (long j = first; j < last; j++) { + if (j >= m) { + for (long i = 0; i < nprimes; i++) + y.tbl[i][local_offset] = 0; + } + else { + accum = xx[j+lo]; + for (long j1 = j + n; j1 < m; j1 += n) + add(accum, accum, xx[j1+lo]); + ToModularRep(t, accum, FFTInfo, TmpSpace); + for (long i = 0; i < nprimes; i++) { + y.tbl[i][local_offset] = t[i]; + + } + } + + local_offset = (local_offset + 1) & (n-1); + } + } ); + + pool->exec_range(nprimes, + [&y, k](long first, long last) { + for (long i = first; i < last; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + } ); + +} + + +#endif + + + + + + +NTL_TBDECL(FromFFTRep)(ZZ_pX& x, FFTRep& y, long lo, long hi) + + // converts from FFT-representation to coefficient representation + // only the coefficients lo..hi are computed + + +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + + long k, n, i, j, l; + + vec_long& t = ModularRepBuf(); + + long nprimes = FFTInfo->NumPrimes; + t.SetLength(nprimes); + + k = y.k; + n = (1L << k); + + + for (i = 0; i < nprimes; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.rep.SetLength(l); + + for (j = 0; j < l; j++) { + for (i = 0; i < nprimes; i++) + t[i] = y.tbl[i][j+lo]; + + FromModularRep(x.rep[j], t, FFTInfo, TmpSpace); + } + + x.normalize(); +} + +#ifdef NTL_THREAD_BOOST + +void FromFFTRep(ZZ_pX& x, FFTRep& y, long lo, long hi) + + // converts from FFT-representation to coefficient representation + // only the coefficients lo..hi are computed + + +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_FromFFTRep(x, y, lo, hi); + return; + } + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n, l; + + long nprimes = FFTInfo->NumPrimes; + + k = y.k; + n = (1L << k); + + + pool->exec_range(nprimes, + [&y, k](long first, long last) { + for (long i = first; i < last; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + } ); + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.rep.SetLength(l); + ZZ_p *xx = x.rep.elts(); + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(l, + [lo, xx, &y, nprimes, &local_context, FFTInfo] + (long first, long last) { + + local_context.restore(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + // TmpSpace is thread local! + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + for (long j = first; j < last; j++) { + for (long i = 0; i < nprimes; i++) + t[i] = y.tbl[i][j+lo]; + + FromModularRep(xx[j], t, FFTInfo, TmpSpace); + } + } ); + + x.normalize(); +} + + + +#endif + + + + + + +NTL_TBDECL(RevFromFFTRep)(vec_ZZ_p& x, FFTRep& y, long lo, long hi) + + // converts from FFT-representation to coefficient representation + // using "inverted" evaluation points. + // only the coefficients lo..hi are computed + + +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + + + long k, n, i, j, l; + + vec_long& t = ModularRepBuf(); + + k = y.k; + n = (1L << k); + + long nprimes = FFTInfo->NumPrimes; + t.SetLength(nprimes); + + for (i = 0; i < nprimes; i++) { + long *yp = &y.tbl[i][0]; + FFTFwd(yp, yp, k, i); + } + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.SetLength(l); + + for (j = 0; j < l; j++) { + for (i = 0; i < nprimes; i++) + t[i] = y.tbl[i][j+lo]; + + FromModularRep(x[j], t, FFTInfo, TmpSpace); + } +} + + +#ifdef NTL_THREAD_BOOST + +void RevFromFFTRep(vec_ZZ_p& x, FFTRep& y, long lo, long hi) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_RevFromFFTRep(x, y, lo, hi); + return; + } + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n, l; + + long nprimes = FFTInfo->NumPrimes; + + k = y.k; + n = (1L << k); + + + pool->exec_range(nprimes, + [&y, k](long first, long last) { + for (long i = first; i < last; i++) { + long *yp = &y.tbl[i][0]; + FFTFwd(yp, yp, k, i); + } + } ); + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.SetLength(l); + ZZ_p *xx = x.elts(); + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(l, + [lo, xx, &y, nprimes, &local_context, FFTInfo] + (long first, long last) { + + local_context.restore(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + // TmpSpace is thread local! + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + for (long j = first; j < last; j++) { + for (long i = 0; i < nprimes; i++) + t[i] = y.tbl[i][j+lo]; + + FromModularRep(xx[j], t, FFTInfo, TmpSpace); + } + } ); + +} + + + + +#endif + + + + + + + +NTL_TBDECL(NDFromFFTRep)(ZZ_pX& x, const FFTRep& y, long lo, long hi, FFTRep& z) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + + + long k, n, i, j, l; + + vec_long& t = ModularRepBuf(); + + long nprimes = FFTInfo->NumPrimes; + t.SetLength(nprimes); + k = y.k; + n = (1L << k); + + z.SetSize(k); + + for (i = 0; i < nprimes; i++) { + long *zp = &z.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + + FFTRev1(zp, yp, k, i); + } + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.rep.SetLength(l); + + for (j = 0; j < l; j++) { + for (i = 0; i < nprimes; i++) + t[i] = z.tbl[i][j+lo]; + + FromModularRep(x.rep[j], t, FFTInfo, TmpSpace); + } + + x.normalize(); +} + +#ifdef NTL_THREAD_BOOST + +void NDFromFFTRep(ZZ_pX& x, const FFTRep& y, long lo, long hi, FFTRep& z) + + // converts from FFT-representation to coefficient representation + // only the coefficients lo..hi are computed + + +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_NDFromFFTRep(x, y, lo, hi, z); + return; + } + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n, l; + + long nprimes = FFTInfo->NumPrimes; + + k = y.k; + n = (1L << k); + + z.SetSize(k); + + pool->exec_range(nprimes, + [&y, &z, k](long first, long last) { + for (long i = first; i < last; i++) { + long *zp = &z.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + FFTRev1(zp, yp, k, i); + } + } ); + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.rep.SetLength(l); + ZZ_p *xx = x.rep.elts(); + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(l, + [lo, xx, &z, nprimes, &local_context, FFTInfo] + (long first, long last) { + + local_context.restore(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + // TmpSpace is thread local! + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + for (long j = first; j < last; j++) { + for (long i = 0; i < nprimes; i++) + t[i] = z.tbl[i][j+lo]; + + FromModularRep(xx[j], t, FFTInfo, TmpSpace); + } + } ); + + x.normalize(); +} + + + +#endif + +void NDFromFFTRep(ZZ_pX& x, FFTRep& y, long lo, long hi) +{ + FFTRep z; + NDFromFFTRep(x, y, lo, hi, z); +} + +NTL_TBDECL(FromFFTRep)(ZZ_p* x, FFTRep& y, long lo, long hi) + + // converts from FFT-representation to coefficient representation + // only the coefficients lo..hi are computed + + +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + + + long k, n, i, j; + + vec_long& t = ModularRepBuf(); + + k = y.k; + n = (1L << k); + + long nprimes = FFTInfo->NumPrimes; + t.SetLength(nprimes); + + for (i = 0; i < nprimes; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + + for (j = lo; j <= hi; j++) { + if (j >= n) + clear(x[j-lo]); + else { + for (i = 0; i < nprimes; i++) + t[i] = y.tbl[i][j]; + + FromModularRep(x[j-lo], t, FFTInfo, TmpSpace); + } + } +} + + +#ifdef NTL_THREAD_BOOST + +void FromFFTRep(ZZ_p* x, FFTRep& y, long lo, long hi) + + // converts from FFT-representation to coefficient representation + // only the coefficients lo..hi are computed + + +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_FromFFTRep(x, y, lo, hi); + return; + } + + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + + long k, n, l; + + k = y.k; + n = (1L << k); + + long nprimes = FFTInfo->NumPrimes; + + + pool->exec_range(nprimes, + [&y, k](long first, long last) { + for (long i = first; i < last; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + } ); + + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(hi-lo+1, + [n, lo, x, &y, nprimes, &local_context, FFTInfo] + (long first, long last) { + + local_context.restore(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + // TmpSpace is thread local! + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + for (long idx = first; idx < last; idx++) { + long j = lo + idx; + + if (j >= n) + clear(x[j-lo]); + else { + for (long i = 0; i < nprimes; i++) + t[i] = y.tbl[i][j]; + + FromModularRep(x[j-lo], t, FFTInfo, TmpSpace); + } + } + } ); +} + +#endif + + +NTL_TBDECL(mul)(FFTRep& z, const FFTRep& x, const FFTRep& y) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n, i, j; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + long nprimes = FFTInfo->NumPrimes; + + for (i = 0; i < nprimes; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + mulmod_t qinv = GetFFTPrimeInv(i); + + for (j = 0; j < n; j++) + zp[j] = NormalizedMulMod(xp[j], yp[j], q, qinv); + } + +} + + +#ifdef NTL_THREAD_BOOST + +void mul(FFTRep& z, const FFTRep& x, const FFTRep& y) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_mul(z, x, y); + return; + } + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + long nprimes = FFTInfo->NumPrimes; + + pool->exec_range(nprimes, + [&x, &y, &z, n](long first, long last) { + for (long i = first; i < last; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + mulmod_t qinv = GetFFTPrimeInv(i); + + for (long j = 0; j < n; j++) + zp[j] = NormalizedMulMod(xp[j], yp[j], q, qinv); + } + } ); + +} + +#endif + + + +NTL_TBDECL(sub)(FFTRep& z, const FFTRep& x, const FFTRep& y) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n, i, j; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + long nprimes = FFTInfo->NumPrimes; + + for (i = 0; i < nprimes; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + + for (j = 0; j < n; j++) + zp[j] = SubMod(xp[j], yp[j], q); + } + +} + + +#ifdef NTL_THREAD_BOOST + +void sub(FFTRep& z, const FFTRep& x, const FFTRep& y) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_sub(z, x, y); + return; + } + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + long nprimes = FFTInfo->NumPrimes; + + pool->exec_range(nprimes, + [&x, &y, &z, n](long first, long last) { + for (long i = first; i < last; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + + for (long j = 0; j < n; j++) + zp[j] = SubMod(xp[j], yp[j], q); + } + } ); + +} + +#endif + + + +NTL_TBDECL(add)(FFTRep& z, const FFTRep& x, const FFTRep& y) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n, i, j; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + long nprimes = FFTInfo->NumPrimes; + + for (i = 0; i < nprimes; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + + for (j = 0; j < n; j++) + zp[j] = AddMod(xp[j], yp[j], q); + } + +} + + +#ifdef NTL_THREAD_BOOST + +void add(FFTRep& z, const FFTRep& x, const FFTRep& y) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_add(z, x, y); + return; + } + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long k, n; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + long nprimes = FFTInfo->NumPrimes; + + pool->exec_range(nprimes, + [&x, &y, &z, n](long first, long last) { + for (long i = first; i < last; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + + for (long j = 0; j < n; j++) + zp[j] = AddMod(xp[j], yp[j], q); + } + } ); + +} + +#endif + + + + + + +NTL_TBDECL(reduce)(FFTRep& x, const FFTRep& a, long k) + // reduces a 2^l point FFT-rep to a 2^k point FFT-rep + // input may alias output +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long i, j, l, n; + long* xp; + const long* ap; + + l = a.k; + n = 1L << k; + + if (l < k) LogicError("reduce: bad operands"); + + x.SetSize(k); + + + long nprimes = FFTInfo->NumPrimes; + + for (i = 0; i < nprimes; i++) { + ap = &a.tbl[i][0]; + xp = &x.tbl[i][0]; + for (j = 0; j < n; j++) + xp[j] = ap[j << (l-k)]; + } +} + + +#ifdef NTL_THREAD_BOOST + +void reduce(FFTRep& x, const FFTRep& a, long k) + // reduces a 2^l point FFT-rep to a 2^k point FFT-rep + // input may alias output +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_reduce(x, a, k); + return; + } + + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long l, n; + + l = a.k; + n = 1L << k; + + if (l < k) LogicError("reduce: bad operands"); + + x.SetSize(k); + + + long nprimes = FFTInfo->NumPrimes; + + pool->exec_range(nprimes, + [&x, &a, n, l, k](long first, long last) { + for (long i = first; i < last; i++) { + const long *ap = &a.tbl[i][0]; + long *xp = &x.tbl[i][0]; + for (long j = 0; j < n; j++) + xp[j] = ap[j << (l-k)]; + } + } ); +} + +#endif + + + + +NTL_TBDECL(AddExpand)(FFTRep& x, const FFTRep& a) +// x = x + (an "expanded" version of a) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long i, j, l, k, n; + + l = x.k; + k = a.k; + n = 1L << k; + + if (l < k) LogicError("AddExpand: bad args"); + + + long nprimes = FFTInfo->NumPrimes; + + for (i = 0; i < nprimes; i++) { + long q = GetFFTPrime(i); + const long *ap = &a.tbl[i][0]; + long *xp = &x.tbl[i][0]; + for (j = 0; j < n; j++) { + long j1 = j << (l-k); + xp[j1] = AddMod(xp[j1], ap[j], q); + } + } +} + +#ifdef NTL_THREAD_BOOST + +void AddExpand(FFTRep& x, const FFTRep& a) +// x = x + (an "expanded" version of a) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_AddExpand(x, a); + return; + } + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long l, k, n; + + l = x.k; + k = a.k; + n = 1L << k; + + if (l < k) LogicError("AddExpand: bad args"); + + + long nprimes = FFTInfo->NumPrimes; + + pool->exec_range(nprimes, + [&x, &a, n, l, k](long first, long last) { + for (long i = first; i < last; i++) { + long q = GetFFTPrime(i); + const long *ap = &a.tbl[i][0]; + long *xp = &x.tbl[i][0]; + for (long j = 0; j < n; j++) { + long j1 = j << (l-k); + xp[j1] = AddMod(xp[j1], ap[j], q); + } + } + } ); +} + + +#endif + + + + + +NTL_TBDECL(ToZZ_pXModRep)(ZZ_pXModRep& y, const ZZ_pX& x, long lo, long hi) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + + + long n, i, j; + vec_long& t = ModularRepBuf(); + + + long nprimes = FFTInfo->NumPrimes; + t.SetLength(FFTInfo->NumPrimes); + + if (lo < 0) + LogicError("bad arg to ToZZ_pXModRep"); + hi = min(hi, deg(x)); + n = max(hi-lo+1, 0); + + y.SetSize(n); + + const ZZ_p *xx = x.rep.elts(); + + for (j = 0; j < n; j++) { + ToModularRep(t, xx[j+lo], FFTInfo, TmpSpace); + for (i = 0; i < nprimes; i++) + y.tbl[i][j] = t[i]; + } +} + +#ifdef NTL_THREAD_BOOST +void ToZZ_pXModRep(ZZ_pXModRep& y, const ZZ_pX& x, long lo, long hi) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_ToZZ_pXModRep(y, x, lo, hi); + return; + } + + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + + long n; + + long nprimes = FFTInfo->NumPrimes; + + if (lo < 0) + LogicError("bad arg to ToZZ_pXModRep"); + + hi = min(hi, deg(x)); + n = max(hi-lo+1, 0); + + y.SetSize(n); + + const ZZ_p *xx = x.rep.elts(); + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(n, + [lo, xx, &y, nprimes, &local_context, FFTInfo](long first, long last) { + + local_context.restore(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + // TmpSpace is thread local! + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + for (long j = first; j < last; j++) { + ToModularRep(t, xx[j+lo], FFTInfo, TmpSpace); + for (long i = 0; i < nprimes; i++) + y.tbl[i][j] = t[i]; + } + } ); +} +#endif + + + + + + + + + +NTL_TBDECL(ToFFTRep)(FFTRep& x, const ZZ_pXModRep& a, long k, long lo, long hi) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long n, m, i, j; + + if (k < 0 || lo < 0) + LogicError("bad args to ToFFTRep"); + + if (hi > a.n-1) hi = a.n-1; + + n = 1L << k; + m = max(hi-lo+1, 0); + + if (m > n) + LogicError("bad args to ToFFTRep"); + + + x.SetSize(k); + + long nprimes = FFTInfo->NumPrimes; + + if (m == 0) { + for (i = 0; i < nprimes; i++) { + long *xp = &x.tbl[i][0]; + for (j = m; j < n; j++) + xp[j] = 0; + } + } + else { + for (i = 0; i < nprimes; i++) { + long *xp = &x.tbl[i][0]; + long *ap = &a.tbl[i][0]; + for (j = 0; j < m; j++) + xp[j] = ap[lo+j]; + for (j = m; j < n; j++) + xp[j] = 0; + + FFTFwd(xp, xp, k, i); + } + } +} + +#ifdef NTL_THREAD_BOOST +void ToFFTRep(FFTRep& x, const ZZ_pXModRep& a, long k, long lo, long hi) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_ToFFTRep(x, a, k, lo, hi); + return; + } + + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + + long n, m; + + if (k < 0 || lo < 0) + LogicError("bad args to ToFFTRep"); + + if (hi > a.n-1) hi = a.n-1; + + n = 1L << k; + m = max(hi-lo+1, 0); + + if (m > n) + LogicError("bad args to ToFFTRep"); + + + x.SetSize(k); + + long nprimes = FFTInfo->NumPrimes; + + if (m == 0) { + for (long i = 0; i < nprimes; i++) { + long *xp = &x.tbl[i][0]; + for (long j = m; j < n; j++) + xp[j] = 0; + } + } + else { + + pool->exec_range(nprimes, + [&x, &a, lo, m, n, k](long first, long last) { + + for (long i = first; i < last; i++) { + long *xp = &x.tbl[i][0]; + long *ap = &a.tbl[i][0]; + for (long j = 0; j < m; j++) + xp[j] = ap[lo+j]; + for (long j = m; j < n; j++) + xp[j] = 0; + + FFTFwd(xp, xp, k, i); + } + } ); + + } +} +#endif + + + +void FromFFTRep(ZZ_pXModRep& x, const FFTRep& a) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + long nprimes = FFTInfo->NumPrimes; + long k = a.k; + long n = 1L << k; + + x.SetSize(n); + for (long i = 0; i < nprimes; i++) { + long *xp = &x.tbl[i][0]; + long *ap = &a.tbl[i][0]; + FFTRev1(xp, ap, k, i); + } +} + +void FromZZ_pXModRep(ZZ_pX& x, const ZZ_pXModRep& a, long lo, long hi) +{ + const ZZ_pFFTInfoT *FFTInfo = ZZ_p::GetFFTInfo(); + ZZ_pTmpSpaceT *TmpSpace = ZZ_p::GetTmpSpace(); + + long n = a.n; + long nprimes = FFTInfo->NumPrimes; + + vec_long& t = ModularRepBuf(); + t.SetLength(nprimes); + + hi = min(hi, n-1); + long l = hi-lo+1; + l = max(l, 0); + x.rep.SetLength(l); + + for (long j = 0; j < l; j++) { + for (long i = 0; i < nprimes; i++) + t[i] = a.tbl[i][j+lo]; + + FromModularRep(x.rep[j], t, FFTInfo, TmpSpace); + } + + x.normalize(); +} + + + + + + + + +void FFTMul(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b) +{ + long k, d; + + if (IsZero(a) || IsZero(b)) { + clear(x); + return; + } + + d = deg(a) + deg(b); + k = NextPowerOfTwo(d+1); + + FFTRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); + + ToFFTRep(R1, a, k); + ToFFTRep(R2, b, k); + mul(R1, R1, R2); + FromFFTRep(x, R1, 0, d); +} + +void FFTSqr(ZZ_pX& x, const ZZ_pX& a) +{ + long k, d; + + if (IsZero(a)) { + clear(x); + return; + } + + d = 2*deg(a); + k = NextPowerOfTwo(d+1); + + FFTRep R1(INIT_SIZE, k); + + ToFFTRep(R1, a, k); + mul(R1, R1, R1); + FromFFTRep(x, R1, 0, d); +} + + +void CopyReverse(ZZ_pX& x, const ZZ_pX& a, long lo, long hi) + + // x[0..hi-lo] = reverse(a[lo..hi]), with zero fill + // input may not alias output + +{ + long i, j, n, m; + + n = hi-lo+1; + m = a.rep.length(); + + x.rep.SetLength(n); + + const ZZ_p* ap = a.rep.elts(); + ZZ_p* xp = x.rep.elts(); + + for (i = 0; i < n; i++) { + j = hi-i; + if (j < 0 || j >= m) + clear(xp[i]); + else + xp[i] = ap[j]; + } + + x.normalize(); +} + +void copy(ZZ_pX& x, const ZZ_pX& a, long lo, long hi) + + // x[0..hi-lo] = a[lo..hi], with zero fill + // input may not alias output + +{ + long i, j, n, m; + + n = hi-lo+1; + m = a.rep.length(); + + x.rep.SetLength(n); + + const ZZ_p* ap = a.rep.elts(); + ZZ_p* xp = x.rep.elts(); + + for (i = 0; i < n; i++) { + j = lo + i; + if (j < 0 || j >= m) + clear(xp[i]); + else + xp[i] = ap[j]; + } + + x.normalize(); +} + + +void rem21(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F) +{ + long i, da, ds, n, kk; + + da = deg(a); + n = F.n; + + if (da > 2*n-2) + LogicError("bad args to rem(ZZ_pX,ZZ_pX,ZZ_pXModulus)"); + + + if (da < n) { + x = a; + return; + } + + if (!F.UseFFT || da - n <= NTL_ZZ_pX_FFT_CROSSOVER) { + PlainRem(x, a, F.f); + return; + } + + FFTRep R1(INIT_SIZE, F.l); + ZZ_pX P1(INIT_SIZE, n); + + ToFFTRep(R1, a, F.l, n, 2*(n-1)); + mul(R1, R1, F.HRep); + FromFFTRep(P1, R1, n-2, 2*n-4); + + ToFFTRep(R1, P1, F.k); + mul(R1, R1, F.FRep); + FromFFTRep(P1, R1, 0, n-1); + + ds = deg(P1); + + kk = 1L << F.k; + + x.rep.SetLength(n); + const ZZ_p* aa = a.rep.elts(); + const ZZ_p* ss = P1.rep.elts(); + ZZ_p* xx = x.rep.elts(); + + for (i = 0; i < n; i++) { + if (i <= ds) + sub(xx[i], aa[i], ss[i]); + else + xx[i] = aa[i]; + + if (i + kk <= da) + add(xx[i], xx[i], aa[i+kk]); + } + + x.normalize(); +} + +void DivRem21(ZZ_pX& q, ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F) +{ + long i, da, ds, n, kk; + + da = deg(a); + n = F.n; + + if (da > 2*n-2) + LogicError("bad args to rem(ZZ_pX,ZZ_pX,ZZ_pXModulus)"); + + + if (da < n) { + x = a; + clear(q); + return; + } + + if (!F.UseFFT || da - n <= NTL_ZZ_pX_FFT_CROSSOVER) { + PlainDivRem(q, x, a, F.f); + return; + } + + FFTRep R1(INIT_SIZE, F.l); + ZZ_pX P1(INIT_SIZE, n), qq; + + ToFFTRep(R1, a, F.l, n, 2*(n-1)); + mul(R1, R1, F.HRep); + FromFFTRep(P1, R1, n-2, 2*n-4); + qq = P1; + + ToFFTRep(R1, P1, F.k); + mul(R1, R1, F.FRep); + FromFFTRep(P1, R1, 0, n-1); + + ds = deg(P1); + + kk = 1L << F.k; + + x.rep.SetLength(n); + const ZZ_p* aa = a.rep.elts(); + const ZZ_p* ss = P1.rep.elts(); + ZZ_p* xx = x.rep.elts(); + + for (i = 0; i < n; i++) { + if (i <= ds) + sub(xx[i], aa[i], ss[i]); + else + xx[i] = aa[i]; + + if (i + kk <= da) + add(xx[i], xx[i], aa[i+kk]); + } + + x.normalize(); + q = qq; +} + +void div21(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F) +{ + long da, n; + + da = deg(a); + n = F.n; + + if (da > 2*n-2) + LogicError("bad args to rem(ZZ_pX,ZZ_pX,ZZ_pXModulus)"); + + + if (da < n) { + clear(x); + return; + } + + if (!F.UseFFT || da - n <= NTL_ZZ_pX_FFT_CROSSOVER) { + PlainDiv(x, a, F.f); + return; + } + + FFTRep R1(INIT_SIZE, F.l); + ZZ_pX P1(INIT_SIZE, n); + + ToFFTRep(R1, a, F.l, n, 2*(n-1)); + mul(R1, R1, F.HRep); + FromFFTRep(x, R1, n-2, 2*n-4); +} + + +void rem(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F) +{ + long da = deg(a); + long n = F.n; + + if (n < 0) LogicError("rem: unitialized modulus"); + + if (da <= 2*n-2) { + rem21(x, a, F); + return; + } + else if (!F.UseFFT || da - n <= NTL_ZZ_pX_FFT_CROSSOVER) { + PlainRem(x, a, F.f); + return; + } + + ZZ_pX buf(INIT_SIZE, 2*n-1); + + long a_len = da+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + rem21(buf, buf, F); + + a_len -= amt; + } + + x = buf; +} + +void DivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pXModulus& F) +{ + long da = deg(a); + long n = F.n; + + if (n < 0) LogicError("uninitialized modulus"); + + if (da <= 2*n-2) { + DivRem21(q, r, a, F); + return; + } + else if (!F.UseFFT || da - n <= NTL_ZZ_pX_FFT_CROSSOVER) { + PlainDivRem(q, r, a, F.f); + return; + } + + ZZ_pX buf(INIT_SIZE, 2*n-1); + ZZ_pX qbuf(INIT_SIZE, n-1); + + ZZ_pX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + DivRem21(qbuf, buf, buf, F); + long dl = qbuf.rep.length(); + a_len = a_len - amt; + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + r = buf; + + qq.normalize(); + q = qq; +} + +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_pXModulus& F) +{ + long da = deg(a); + long n = F.n; + + if (n < 0) LogicError("uninitialized modulus"); + + if (da <= 2*n-2) { + div21(q, a, F); + return; + } + else if (!F.UseFFT || da - n <= NTL_ZZ_pX_FFT_CROSSOVER) { + PlainDiv(q, a, F.f); + return; + } + + ZZ_pX buf(INIT_SIZE, 2*n-1); + ZZ_pX qbuf(INIT_SIZE, n-1); + + ZZ_pX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + a_len = a_len - amt; + if (a_len > 0) + DivRem21(qbuf, buf, buf, F); + else + div21(qbuf, buf, F); + + long dl = qbuf.rep.length(); + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + qq.normalize(); + q = qq; +} + + + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, const ZZ_pXModulus& F) +{ + long da, db, d, n, k; + + da = deg(a); + db = deg(b); + n = F.n; + + if (n < 0) LogicError("MulMod: uninitialized modulus"); + + if (da >= n || db >= n) + LogicError("bad args to MulMod(ZZ_pX,ZZ_pX,ZZ_pX,ZZ_pXModulus)"); + + if (da < 0 || db < 0) { + clear(x); + return; + } + + if (!F.UseFFT || da <= NTL_ZZ_pX_FFT_CROSSOVER || db <= NTL_ZZ_pX_FFT_CROSSOVER) { + ZZ_pX P1; + mul(P1, a, b); + rem(x, P1, F); + return; + } + + d = da + db + 1; + + k = NextPowerOfTwo(d); + k = max(k, F.k); + + FFTRep R1(INIT_SIZE, k), R2(INIT_SIZE, F.l); + ZZ_pX P1(INIT_SIZE, n); + + ToFFTRep(R1, a, k); + ToFFTRep(R2, b, k); + mul(R1, R1, R2); + NDFromFFTRep(P1, R1, n, d-1, R2); // save R1 for future use + + ToFFTRep(R2, P1, F.l); + mul(R2, R2, F.HRep); + FromFFTRep(P1, R2, n-2, 2*n-4); + + ToFFTRep(R2, P1, F.k); + mul(R2, R2, F.FRep); + reduce(R1, R1, F.k); + sub(R1, R1, R2); + FromFFTRep(x, R1, 0, n-1); +} + +void SqrMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXModulus& F) +{ + long da, d, n, k; + + da = deg(a); + n = F.n; + + if (n < 0) LogicError("SqrMod: uninitailized modulus"); + + if (da >= n) + LogicError("bad args to SqrMod(ZZ_pX,ZZ_pX,ZZ_pXModulus)"); + + if (!F.UseFFT || da <= NTL_ZZ_pX_FFT_CROSSOVER) { + ZZ_pX P1; + sqr(P1, a); + rem(x, P1, F); + return; + } + + + d = 2*da + 1; + + k = NextPowerOfTwo(d); + k = max(k, F.k); + + FFTRep R1(INIT_SIZE, k), R2(INIT_SIZE, F.l); + ZZ_pX P1(INIT_SIZE, n); + + ToFFTRep(R1, a, k); + mul(R1, R1, R1); + NDFromFFTRep(P1, R1, n, d-1, R2); // save R1 for future use + + ToFFTRep(R2, P1, F.l); + mul(R2, R2, F.HRep); + FromFFTRep(P1, R2, n-2, 2*n-4); + + ToFFTRep(R2, P1, F.k); + mul(R2, R2, F.FRep); + reduce(R1, R1, F.k); + sub(R1, R1, R2); + FromFFTRep(x, R1, 0, n-1); +} + +void PlainInvTrunc(ZZ_pX& x, const ZZ_pX& a, long m) + + /* x = (1/a) % X^m, input not output, constant term a is nonzero */ + +{ + long i, k, n, lb; + NTL_ZZRegister(v); + NTL_ZZRegister(t); + ZZ_p s; + const ZZ_p* ap; + ZZ_p* xp; + + + n = deg(a); + + if (n < 0) ArithmeticError("division by zero"); + + inv(s, ConstTerm(a)); + + if (n == 0) { + conv(x, s); + return; + } + + ap = a.rep.elts(); + x.rep.SetLength(m); + xp = x.rep.elts(); + + xp[0] = s; + + long is_one = IsOne(s); + + for (k = 1; k < m; k++) { + clear(v); + lb = max(k-n, 0); + for (i = lb; i <= k-1; i++) { + mul(t, rep(xp[i]), rep(ap[k-i])); + add(v, v, t); + } + conv(xp[k], v); + negate(xp[k], xp[k]); + if (!is_one) mul(xp[k], xp[k], s); + } + + x.normalize(); +} + + +void trunc(ZZ_pX& x, const ZZ_pX& a, long m) + +// x = a % X^m, output may alias input + +{ + if (m < 0) LogicError("trunc: bad args"); + + if (&x == &a) { + if (x.rep.length() > m) { + x.rep.SetLength(m); + x.normalize(); + } + } + else { + long n; + long i; + ZZ_p* xp; + const ZZ_p* ap; + + n = min(a.rep.length(), m); + x.rep.SetLength(n); + + xp = x.rep.elts(); + ap = a.rep.elts(); + + for (i = 0; i < n; i++) xp[i] = ap[i]; + + x.normalize(); + } +} + +void CyclicReduce(ZZ_pX& x, const ZZ_pX& a, long m) + +// computes x = a mod X^m-1 + +{ + long n = deg(a); + long i, j; + ZZ_p accum; + + if (n < m) { + x = a; + return; + } + + if (&x != &a) + x.rep.SetLength(m); + + for (i = 0; i < m; i++) { + accum = a.rep[i]; + for (j = i + m; j <= n; j += m) + add(accum, accum, a.rep[j]); + x.rep[i] = accum; + } + + if (&x == &a) + x.rep.SetLength(m); + + x.normalize(); +} + + + +void InvTrunc(ZZ_pX& x, const ZZ_pX& a, long m) +{ + if (m < 0) LogicError("InvTrunc: bad args"); + + if (m == 0) { + clear(x); + return; + } + + if (NTL_OVERFLOW(m, 1, 0)) + ResourceError("overflow in InvTrunc"); + + if (&x == &a) { + ZZ_pX la; + la = a; + if (m > NTL_ZZ_pX_NEWTON_CROSSOVER && deg(a) > 0) + NewtonInvTrunc(x, la, m); + else + PlainInvTrunc(x, la, m); + } + else { + if (m > NTL_ZZ_pX_NEWTON_CROSSOVER && deg(a) > 0) + NewtonInvTrunc(x, a, m); + else + PlainInvTrunc(x, a, m); + } +} + + + +void build(ZZ_pXModulus& x, const ZZ_pX& f) +{ + x.f = f; + x.n = deg(f); + + x.tracevec.make(); + + if (x.n <= 0) + LogicError("build: deg(f) must be at least 1"); + + if (x.n <= NTL_ZZ_pX_FFT_CROSSOVER + 1) { + x.UseFFT = 0; + return; + } + + x.UseFFT = 1; + + x.k = NextPowerOfTwo(x.n); + x.l = NextPowerOfTwo(2*x.n - 3); + ToFFTRep(x.FRep, f, x.k); + + ZZ_pX P1(INIT_SIZE, x.n+1), P2(INIT_SIZE, x.n); + + CopyReverse(P1, f, 0, x.n); + InvTrunc(P2, P1, x.n-1); + + CopyReverse(P1, P2, 0, x.n-2); + ToFFTRep(x.HRep, P1, x.l); +} + +ZZ_pXModulus::ZZ_pXModulus(const ZZ_pX& ff) +{ + build(*this, ff); +} + +ZZ_pXMultiplier::ZZ_pXMultiplier(const ZZ_pX& b, const ZZ_pXModulus& F) +{ + build(*this, b, F); +} + +void build(ZZ_pXMultiplier& x, const ZZ_pX& b, + const ZZ_pXModulus& F) +{ + long db; + long n = F.n; + + if (n < 0) LogicError("build ZZ_pXMultiplier: uninitialized modulus"); + + x.b = b; + db = deg(b); + + if (db >= n) LogicError("build ZZ_pXMultiplier: deg(b) >= deg(f)"); + + if (!F.UseFFT || db <= NTL_ZZ_pX_FFT_CROSSOVER) { + x.UseFFT = 0; + return; + } + + x.UseFFT = 1; + + FFTRep R1(INIT_SIZE, F.l); + ZZ_pX P1(INIT_SIZE, n); + + + ToFFTRep(R1, b, F.l); + reduce(x.B2, R1, F.k); + mul(R1, R1, F.HRep); + FromFFTRep(P1, R1, n-1, 2*n-3); + ToFFTRep(x.B1, P1, F.l); +} + + +void MulMod(ZZ_pX& x, const ZZ_pX& a, const ZZ_pXMultiplier& B, + const ZZ_pXModulus& F) +{ + + long n = F.n; + long da; + + da = deg(a); + + if (da >= n) + LogicError(" bad args to MulMod(ZZ_pX,ZZ_pX,ZZ_pXMultiplier,ZZ_pXModulus)"); + + if (da < 0) { + clear(x); + return; + } + + if (!B.UseFFT || !F.UseFFT || da <= NTL_ZZ_pX_FFT_CROSSOVER) { + ZZ_pX P1; + mul(P1, a, B.b); + rem(x, P1, F); + return; + } + + ZZ_pX P1(INIT_SIZE, n), P2(INIT_SIZE, n); + FFTRep R1(INIT_SIZE, F.l), R2(INIT_SIZE, F.l); + + ToFFTRep(R1, a, F.l); + mul(R2, R1, B.B1); + FromFFTRep(P1, R2, n-1, 2*n-3); + + reduce(R1, R1, F.k); + mul(R1, R1, B.B2); + ToFFTRep(R2, P1, F.k); + mul(R2, R2, F.FRep); + sub(R1, R1, R2); + + FromFFTRep(x, R1, 0, n-1); +} + + +void PowerXMod(ZZ_pX& hh, const ZZ& e, const ZZ_pXModulus& F) +{ + if (F.n < 0) LogicError("PowerXMod: uninitialized modulus"); + + if (IsZero(e)) { + set(hh); + return; + } + + long n = NumBits(e); + long i; + + ZZ_pX h, h1; + + h.SetMaxLength(F.n); + set(h); + + for (i = n - 1; i >= 0; i--) { + if (bit(e, i)) { + SqrMod(h1, h, F); + MulByXMod(h, h1, F); + // NOTE: MulByXMod gives much faster multicore performance + // when output does not alias input + } + else + SqrMod(h, h, F); + } + + if (e < 0) InvMod(h, h, F); + + hh = h; +} + + +void PowerXPlusAMod(ZZ_pX& hh, const ZZ_p& a, const ZZ& e, const ZZ_pXModulus& F) +{ + if (F.n < 0) LogicError("PowerXPlusAMod: uninitialized modulus"); + + if (IsZero(e)) { + set(hh); + return; + } + + ZZ_pX t1(INIT_SIZE, F.n), t2(INIT_SIZE, F.n); + long n = NumBits(e); + long i; + + ZZ_pX h; + + h.SetMaxLength(F.n); + set(h); + + for (i = n - 1; i >= 0; i--) { + SqrMod(h, h, F); + if (bit(e, i)) { + MulByXMod(t1, h, F); + mul(t2, h, a); + add(h, t1, t2); + } + } + + if (e < 0) InvMod(h, h, F); + + hh = h; +} + + +void PowerMod(ZZ_pX& h, const ZZ_pX& g, const ZZ& e, const ZZ_pXModulus& F) +{ + if (deg(g) >= F.n) + LogicError("PowerMod: bad args"); + + if (IsZero(e)) { + set(h); + return; + } + + ZZ_pXMultiplier G; + + ZZ_pX res; + + long n = NumBits(e); + long i; + + build(G, g, F); + + res.SetMaxLength(F.n); + set(res); + + for (i = n - 1; i >= 0; i--) { + SqrMod(res, res, F); + if (bit(e, i)) + MulMod(res, res, G, F); + } + + if (e < 0) InvMod(res, res, F); + + h = res; +} + + +void NewtonInvTrunc(ZZ_pX& x, const ZZ_pX& a, long m) +{ + x.SetMaxLength(m); + long i, t, k; + + long log2_newton = NextPowerOfTwo(NTL_ZZ_pX_NEWTON_CROSSOVER)-1; + PlainInvTrunc(x, a, 1L << log2_newton); + + t = NextPowerOfTwo(m); + + FFTRep R1(INIT_SIZE, t), R2(INIT_SIZE, t); + ZZ_pX P1(INIT_SIZE, m/2); + + long a_len = min(m, a.rep.length()); + + ZZ_pXModRep a_rep; + ToZZ_pXModRep(a_rep, a, 0, a_len-1); + + k = 1L << log2_newton; + t = log2_newton; + + while (k < m) { + long l = min(2*k, m); + + ToFFTRep(R1, x, t+1); + ToFFTRep(R2, a_rep, t+1, 0, l-1); + mul(R2, R2, R1); + FromFFTRep(P1, R2, k, l-1); + + ToFFTRep(R2, P1, t+1); + mul(R2, R2, R1); + FromFFTRep(P1, R2, 0, l-k-1); + + x.rep.SetLength(l); + long y_len = P1.rep.length(); + for (i = k; i < l; i++) { + if (i-k >= y_len) + clear(x.rep[i]); + else + negate(x.rep[i], P1.rep[i-k]); + } + x.normalize(); + + t++; + k = l; + } +} + + + +void FFTDivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b) +{ + long n = deg(b); + long m = deg(a); + long k, l; + + if (m < n) { + clear(q); + r = a; + return; + } + + if (m >= 3*n) { + ZZ_pXModulus B; + build(B, b); + DivRem(q, r, a, B); + return; + } + + ZZ_pX P1, P2, P3; + + CopyReverse(P3, b, 0, n); + InvTrunc(P2, P3, m-n+1); + CopyReverse(P1, P2, 0, m-n); + + k = NextPowerOfTwo(2*(m-n)+1); + long k1 = NextPowerOfTwo(n); + long mx = max(k1, k); + + FFTRep R1(INIT_SIZE, mx), R2(INIT_SIZE, mx); + + ToFFTRep(R1, P1, k); + ToFFTRep(R2, a, k, n, m); + mul(R1, R1, R2); + FromFFTRep(P3, R1, m-n, 2*(m-n)); + + l = 1L << k1; + + + ToFFTRep(R1, b, k1); + ToFFTRep(R2, P3, k1); + mul(R1, R1, R2); + FromFFTRep(P1, R1, 0, n-1); + CyclicReduce(P2, a, l); + trunc(r, P2, n); + sub(r, r, P1); + q = P3; +} + + + + +void FFTDiv(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b) +{ + + long n = deg(b); + long m = deg(a); + long k; + + if (m < n) { + clear(q); + return; + } + + if (m >= 3*n) { + ZZ_pXModulus B; + build(B, b); + div(q, a, B); + return; + } + + ZZ_pX P1, P2, P3; + + CopyReverse(P3, b, 0, n); + InvTrunc(P2, P3, m-n+1); + CopyReverse(P1, P2, 0, m-n); + + k = NextPowerOfTwo(2*(m-n)+1); + + FFTRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); + + ToFFTRep(R1, P1, k); + ToFFTRep(R2, a, k, n, m); + mul(R1, R1, R2); + FromFFTRep(q, R1, m-n, 2*(m-n)); +} + + + +void FFTRem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b) +{ + long n = deg(b); + long m = deg(a); + long k, l; + + if (m < n) { + r = a; + return; + } + + if (m >= 3*n) { + ZZ_pXModulus B; + build(B, b); + rem(r, a, B); + return; + } + + ZZ_pX P1, P2, P3; + + CopyReverse(P3, b, 0, n); + InvTrunc(P2, P3, m-n+1); + CopyReverse(P1, P2, 0, m-n); + + k = NextPowerOfTwo(2*(m-n)+1); + long k1 = NextPowerOfTwo(n); + long mx = max(k, k1); + + FFTRep R1(INIT_SIZE, mx), R2(INIT_SIZE, mx); + + ToFFTRep(R1, P1, k); + ToFFTRep(R2, a, k, n, m); + mul(R1, R1, R2); + FromFFTRep(P3, R1, m-n, 2*(m-n)); + + l = 1L << k1; + + ToFFTRep(R1, b, k1); + ToFFTRep(R2, P3, k1); + mul(R1, R1, R2); + FromFFTRep(P3, R1, 0, n-1); + CyclicReduce(P2, a, l); + trunc(r, P2, n); + sub(r, r, P3); +} + + +void DivRem(ZZ_pX& q, ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b) +{ + if (deg(b) > NTL_ZZ_pX_DIV_CROSSOVER && deg(a) - deg(b) > NTL_ZZ_pX_DIV_CROSSOVER) + FFTDivRem(q, r, a, b); + else + PlainDivRem(q, r, a, b); +} + +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b) +{ + if (deg(b) > NTL_ZZ_pX_DIV_CROSSOVER && deg(a) - deg(b) > NTL_ZZ_pX_DIV_CROSSOVER) + FFTDiv(q, a, b); + else + PlainDiv(q, a, b); +} + +void div(ZZ_pX& q, const ZZ_pX& a, const ZZ_p& b) +{ + NTL_ZZ_pRegister(T); + + inv(T, b); + mul(q, a, T); +} + +void div(ZZ_pX& q, const ZZ_pX& a, long b) +{ + NTL_ZZ_pRegister(T); + + T = b; + inv(T, T); + mul(q, a, T); +} + + + +void rem(ZZ_pX& r, const ZZ_pX& a, const ZZ_pX& b) +{ + if (deg(b) > NTL_ZZ_pX_DIV_CROSSOVER && deg(a) - deg(b) > NTL_ZZ_pX_DIV_CROSSOVER) + FFTRem(r, a, b); + else + PlainRem(r, a, b); +} + + +long operator==(const ZZ_pX& a, long b) +{ + if (b == 0) + return IsZero(a); + + if (b == 1) + return IsOne(a); + + long da = deg(a); + + if (da > 0) + return 0; + + NTL_ZZ_pRegister(bb); + bb = b; + + if (da < 0) + return IsZero(bb); + + return a.rep[0] == bb; +} + +long operator==(const ZZ_pX& a, const ZZ_p& b) +{ + if (IsZero(b)) + return IsZero(a); + + long da = deg(a); + + if (da != 0) + return 0; + + return a.rep[0] == b; +} + +void power(ZZ_pX& x, const ZZ_pX& a, long e) +{ + if (e < 0) { + LogicError("power: negative exponent"); + } + + if (e == 0) { + x = 1; + return; + } + + if (a == 0 || a == 1) { + x = a; + return; + } + + long da = deg(a); + + if (da == 0) { + x = power(ConstTerm(a), e); + return; + } + + if (da > (NTL_MAX_LONG-1)/e) + ResourceError("overflow in power"); + + ZZ_pX res; + res.SetMaxLength(da*e + 1); + res = 1; + + long k = NumBits(e); + long i; + + for (i = k - 1; i >= 0; i--) { + sqr(res, res); + if (bit(e, i)) + mul(res, res, a); + } + + x = res; +} + +void reverse(ZZ_pX& x, const ZZ_pX& a, long hi) +{ + if (hi < 0) { clear(x); return; } + if (NTL_OVERFLOW(hi, 1, 0)) + ResourceError("overflow in reverse"); + + if (&x == &a) { + ZZ_pX tmp; + CopyReverse(tmp, a, 0, hi); + x = tmp; + } + else + CopyReverse(x, a, 0, hi); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ_pX1.c b/thirdparty/linux/ntl/src/ZZ_pX1.c new file mode 100644 index 0000000000..b6cf25bc75 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_pX1.c @@ -0,0 +1,2079 @@ + +#include +#include +#include + + + + + + +NTL_START_IMPL + + + + +long divide(ZZ_pX& q, const ZZ_pX& a, const ZZ_pX& b) +{ + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + ZZ_pX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + q = lq; + return 1; +} + +long divide(const ZZ_pX& a, const ZZ_pX& b) +{ + if (IsZero(b)) return IsZero(a); + ZZ_pX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + return 1; +} + + + + +void ZZ_pXMatrix::operator=(const ZZ_pXMatrix& M) +{ + elts[0][0] = M.elts[0][0]; + elts[0][1] = M.elts[0][1]; + elts[1][0] = M.elts[1][0]; + elts[1][1] = M.elts[1][1]; +} + + +void RightShift(ZZ_pX& x, const ZZ_pX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) ResourceError("overflow in RightShift"); + LeftShift(x, a, -n); + return; + } + + long da = deg(a); + long i; + + if (da < n) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(da-n+1); + + for (i = 0; i <= da-n; i++) + x.rep[i] = a.rep[i+n]; + + if (&x == &a) + x.rep.SetLength(da-n+1); + + x.normalize(); +} + +void LeftShift(ZZ_pX& x, const ZZ_pX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) + clear(x); + else + RightShift(x, a, -n); + return; + } + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in LeftShift"); + + long m = a.rep.length(); + + x.rep.SetLength(m+n); + + long i; + for (i = m-1; i >= 0; i--) + x.rep[i+n] = a.rep[i]; + + for (i = 0; i < n; i++) + clear(x.rep[i]); +} + + +void ShiftAdd(ZZ_pX& U, const ZZ_pX& V, long n) +// assumes input does not alias output +{ + if (IsZero(V)) + return; + + long du = deg(U); + long dv = deg(V); + + long d = max(du, n+dv); + + U.rep.SetLength(d+1); + long i; + + for (i = du+1; i <= d; i++) + clear(U.rep[i]); + + for (i = 0; i <= dv; i++) + add(U.rep[i+n], U.rep[i+n], V.rep[i]); + + U.normalize(); +} + +void ShiftSub(ZZ_pX& U, const ZZ_pX& V, long n) +// assumes input does not alias output +{ + if (IsZero(V)) + return; + + long du = deg(U); + long dv = deg(V); + + long d = max(du, n+dv); + + U.rep.SetLength(d+1); + long i; + + for (i = du+1; i <= d; i++) + clear(U.rep[i]); + + for (i = 0; i <= dv; i++) + sub(U.rep[i+n], U.rep[i+n], V.rep[i]); + + U.normalize(); +} + + +void mul(ZZ_pX& U, ZZ_pX& V, const ZZ_pXMatrix& M) +// (U, V)^T = M*(U, V)^T +{ + long d = deg(U) - deg(M(1,1)); + long k = NextPowerOfTwo(d - 1); + + // When the GCD algorithm is run on polynomials of degree n, n-1, + // where n is a power of two, then d-1 is likely to be a power of two. + // It would be more natural to set k = NextPowerOfTwo(d+1), but this + // would be much less efficient in this case. + + // We optimize this case, as it does sometimes arise naturally + // in some situations. + + long n = (1L << k); + long xx; + ZZ_p a0, a1, b0, b1, c0, d0, u0, u1, v0, v1, nu0, nu1, nv0; + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + + if (n == d-1) + xx = 1; + else if (n == d) + xx = 2; + else + xx = 3; + + switch (xx) { + case 1: + GetCoeff(a0, M(0,0), 0); + GetCoeff(a1, M(0,0), 1); + GetCoeff(b0, M(0,1), 0); + GetCoeff(b1, M(0,1), 1); + GetCoeff(c0, M(1,0), 0); + GetCoeff(d0, M(1,1), 0); + + GetCoeff(u0, U, 0); + GetCoeff(u1, U, 1); + GetCoeff(v0, V, 0); + GetCoeff(v1, V, 1); + + mul(t1, rep(a0), rep(u0)); + mul(t2, rep(b0), rep(v0)); + add(t1, t1, t2); + conv(nu0, t1); + + mul(t1, rep(a1), rep(u0)); + mul(t2, rep(a0), rep(u1)); + add(t1, t1, t2); + mul(t2, rep(b1), rep(v0)); + add(t1, t1, t2); + mul(t2, rep(b0), rep(v1)); + add(t1, t1, t2); + conv(nu1, t1); + + mul(t1, rep(c0), rep(u0)); + mul(t2, rep(d0), rep(v0)); + add (t1, t1, t2); + conv(nv0, t1); + + break; + + case 2: + GetCoeff(a0, M(0,0), 0); + GetCoeff(b0, M(0,1), 0); + + GetCoeff(u0, U, 0); + GetCoeff(v0, V, 0); + + mul(t1, rep(a0), rep(u0)); + mul(t2, rep(b0), rep(v0)); + add(t1, t1, t2); + conv(nu0, t1); + + break; + + case 3: + break; + + } + + FFTRep RU(INIT_SIZE, k), RV(INIT_SIZE, k), R1(INIT_SIZE, k), + R2(INIT_SIZE, k); + + ToFFTRep(RU, U, k); + ToFFTRep(RV, V, k); + + ToFFTRep(R1, M(0,0), k); + mul(R1, R1, RU); + ToFFTRep(R2, M(0,1), k); + mul(R2, R2, RV); + add(R1, R1, R2); + FromFFTRep(U, R1, 0, d); + + ToFFTRep(R1, M(1,0), k); + mul(R1, R1, RU); + ToFFTRep(R2, M(1,1), k); + mul(R2, R2, RV); + add(R1, R1, R2); + FromFFTRep(V, R1, 0, d-1); + + // now fix-up results + + switch (xx) { + case 1: + GetCoeff(u0, U, 0); + sub(u0, u0, nu0); + SetCoeff(U, d-1, u0); + SetCoeff(U, 0, nu0); + + GetCoeff(u1, U, 1); + sub(u1, u1, nu1); + SetCoeff(U, d, u1); + SetCoeff(U, 1, nu1); + + GetCoeff(v0, V, 0); + sub(v0, v0, nv0); + SetCoeff(V, d-1, v0); + SetCoeff(V, 0, nv0); + + break; + + + case 2: + GetCoeff(u0, U, 0); + sub(u0, u0, nu0); + SetCoeff(U, d, u0); + SetCoeff(U, 0, nu0); + + break; + + } +} + + +void mul(ZZ_pXMatrix& A, ZZ_pXMatrix& B, ZZ_pXMatrix& C) +// A = B*C, B and C are destroyed +{ + long db = deg(B(1,1)); + long dc = deg(C(1,1)); + long da = db + dc; + + long k = NextPowerOfTwo(da+1); + + FFTRep B00, B01, B10, B11, C0, C1, T1, T2; + + ToFFTRep(B00, B(0,0), k); B(0,0).kill(); + ToFFTRep(B01, B(0,1), k); B(0,1).kill(); + ToFFTRep(B10, B(1,0), k); B(1,0).kill(); + ToFFTRep(B11, B(1,1), k); B(1,1).kill(); + + ToFFTRep(C0, C(0,0), k); C(0,0).kill(); + ToFFTRep(C1, C(1,0), k); C(1,0).kill(); + + mul(T1, B00, C0); + mul(T2, B01, C1); + add(T1, T1, T2); + FromFFTRep(A(0,0), T1, 0, da); + + mul(T1, B10, C0); + mul(T2, B11, C1); + add(T1, T1, T2); + FromFFTRep(A(1,0), T1, 0, da); + + ToFFTRep(C0, C(0,1), k); C(0,1).kill(); + ToFFTRep(C1, C(1,1), k); C(1,1).kill(); + + mul(T1, B00, C0); + mul(T2, B01, C1); + add(T1, T1, T2); + FromFFTRep(A(0,1), T1, 0, da); + + mul(T1, B10, C0); + mul(T2, B11, C1); + add(T1, T1, T2); + FromFFTRep(A(1,1), T1, 0, da); +} + +void IterHalfGCD(ZZ_pXMatrix& M_out, ZZ_pX& U, ZZ_pX& V, long d_red) +{ + M_out(0,0).SetMaxLength(d_red); + M_out(0,1).SetMaxLength(d_red); + M_out(1,0).SetMaxLength(d_red); + M_out(1,1).SetMaxLength(d_red); + + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + long goal = deg(U) - d_red; + + if (deg(V) <= goal) + return; + + ZZVec tmp(deg(U)+1, ZZ_p::ExtendedModulusSize()); + ZZ_pX Q, t(INIT_SIZE, d_red); + + while (deg(V) > goal) { + PlainDivRem(Q, U, U, V, tmp); + swap(U, V); + + mul(t, Q, M_out(1,0)); + sub(t, M_out(0,0), t); + M_out(0,0) = M_out(1,0); + M_out(1,0) = t; + + mul(t, Q, M_out(1,1)); + sub(t, M_out(0,1), t); + M_out(0,1) = M_out(1,1); + M_out(1,1) = t; + } +} + + + +void HalfGCD(ZZ_pXMatrix& M_out, const ZZ_pX& U, const ZZ_pX& V, long d_red) +{ + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + return; + } + + + long n = deg(U) - 2*d_red + 2; + if (n < 0) n = 0; + + ZZ_pX U1, V1; + + RightShift(U1, U, n); + RightShift(V1, V, n); + + if (d_red <= NTL_ZZ_pX_HalfGCD_CROSSOVER) { + IterHalfGCD(M_out, U1, V1, d_red); + return; + } + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + ZZ_pXMatrix M1; + + HalfGCD(M1, U1, V1, d1); + mul(U1, V1, M1); + + long d2 = deg(V1) - deg(U) + n + d_red; + + if (IsZero(V1) || d2 <= 0) { + M_out = M1; + return; + } + + + ZZ_pX Q; + ZZ_pXMatrix M2; + + DivRem(Q, U1, U1, V1); + swap(U1, V1); + + HalfGCD(M2, U1, V1, d2); + + ZZ_pX t(INIT_SIZE, deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,0)); + sub(t, M1(0,0), t); + swap(M1(0,0), M1(1,0)); + swap(M1(1,0), t); + + t.kill(); + + t.SetMaxLength(deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,1)); + sub(t, M1(0,1), t); + swap(M1(0,1), M1(1,1)); + swap(M1(1,1), t); + + t.kill(); + + mul(M_out, M2, M1); +} + + + + +void XHalfGCD(ZZ_pXMatrix& M_out, ZZ_pX& U, ZZ_pX& V, long d_red) +{ + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + return; + } + + long du = deg(U); + + if (d_red <= NTL_ZZ_pX_HalfGCD_CROSSOVER) { + IterHalfGCD(M_out, U, V, d_red); + return; + } + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + ZZ_pXMatrix M1; + + HalfGCD(M1, U, V, d1); + mul(U, V, M1); + + long d2 = deg(V) - du + d_red; + + if (IsZero(V) || d2 <= 0) { + M_out = M1; + return; + } + + + ZZ_pX Q; + ZZ_pXMatrix M2; + + DivRem(Q, U, U, V); + swap(U, V); + + XHalfGCD(M2, U, V, d2); + + ZZ_pX t(INIT_SIZE, deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,0)); + sub(t, M1(0,0), t); + swap(M1(0,0), M1(1,0)); + swap(M1(1,0), t); + + t.kill(); + + t.SetMaxLength(deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,1)); + sub(t, M1(0,1), t); + swap(M1(0,1), M1(1,1)); + swap(M1(1,1), t); + + t.kill(); + + mul(M_out, M2, M1); +} + +void HalfGCD(ZZ_pX& U, ZZ_pX& V) +{ + long d_red = (deg(U)+1)/2; + + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + return; + } + + long du = deg(U); + + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + ZZ_pXMatrix M1; + + HalfGCD(M1, U, V, d1); + mul(U, V, M1); + + long d2 = deg(V) - du + d_red; + + if (IsZero(V) || d2 <= 0) { + return; + } + + M1(0,0).kill(); + M1(0,1).kill(); + M1(1,0).kill(); + M1(1,1).kill(); + + + ZZ_pX Q; + + DivRem(Q, U, U, V); + swap(U, V); + + HalfGCD(M1, U, V, d2); + + mul(U, V, M1); +} + + +void GCD(ZZ_pX& d, const ZZ_pX& u, const ZZ_pX& v) +{ + ZZ_pX u1, v1; + + u1 = u; + v1 = v; + + if (deg(u1) == deg(v1)) { + if (IsZero(u1)) { + clear(d); + return; + } + + rem(v1, v1, u1); + } + else if (deg(u1) < deg(v1)) { + swap(u1, v1); + } + + // deg(u1) > deg(v1) + + while (deg(u1) > NTL_ZZ_pX_GCD_CROSSOVER && !IsZero(v1)) { + HalfGCD(u1, v1); + + if (!IsZero(v1)) { + rem(u1, u1, v1); + swap(u1, v1); + } + } + + PlainGCD(d, u1, v1); +} + + + +void XGCD(ZZ_pX& d, ZZ_pX& s, ZZ_pX& t, const ZZ_pX& a, const ZZ_pX& b) +{ + ZZ_p w; + + if (IsZero(a) && IsZero(b)) { + clear(d); + set(s); + clear(t); + return; + } + + ZZ_pX U, V, Q; + + U = a; + V = b; + + long flag = 0; + + if (deg(U) == deg(V)) { + DivRem(Q, U, U, V); + swap(U, V); + flag = 1; + } + else if (deg(U) < deg(V)) { + swap(U, V); + flag = 2; + } + + ZZ_pXMatrix M; + + XHalfGCD(M, U, V, deg(U)+1); + + d = U; + + if (flag == 0) { + s = M(0,0); + t = M(0,1); + } + else if (flag == 1) { + s = M(0,1); + mul(t, Q, M(0,1)); + sub(t, M(0,0), t); + } + else { /* flag == 2 */ + s = M(0,1); + t = M(0,0); + } + + // normalize + + inv(w, LeadCoeff(d)); + mul(d, d, w); + mul(s, s, w); + mul(t, t, w); +} + + + + + + + +void IterBuild(ZZ_p* a, long n) +{ + long i, k; + ZZ_p b, t; + + if (n <= 0) return; + + negate(a[0], a[0]); + + for (k = 1; k <= n-1; k++) { + negate(b, a[k]); + add(a[k], b, a[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t, a[i], b); + add(a[i], t, a[i-1]); + } + mul(a[0], a[0], b); + } +} + +void mul(ZZ_p* x, const ZZ_p* a, const ZZ_p* b, long n) +{ + NTL_ZZRegister(t); + NTL_ZZRegister(accum); + + long i, j, jmin, jmax; + + long d = 2*n-1; + + for (i = 0; i <= d; i++) { + jmin = max(0, i-(n-1)); + jmax = min(n-1, i); + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, rep(a[j]), rep(b[i-j])); + add(accum, accum, t); + } + if (i >= n) { + add(accum, accum, rep(a[i-n])); + add(accum, accum, rep(b[i-n])); + } + + conv(x[i], accum); + } +} + + +void BuildFromRoots(ZZ_pX& x, const vec_ZZ_p& a) +{ + long n = a.length(); + + if (n == 0) { + set(x); + return; + } + + long k0 = NextPowerOfTwo(NTL_ZZ_pX_FFT_CROSSOVER); + long crossover = 1L << k0; + + if (n <= crossover) { + x.rep.SetMaxLength(n+1); + x.rep = a; + IterBuild(&x.rep[0], n); + x.rep.SetLength(n+1); + SetCoeff(x, n); + return; + } + + long k = NextPowerOfTwo(n); + + long m = 1L << k; + long i, j; + long l, width; + + ZZ_pX b(INIT_SIZE, m+1); + + b.rep = a; + b.rep.SetLength(m+1); + for (i = n; i < m; i++) + clear(b.rep[i]); + + set(b.rep[m]); + + FFTRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); + + + ZZ_p t1, one; + set(one); + + vec_ZZ_p G(INIT_SIZE, crossover), H(INIT_SIZE, crossover); + ZZ_p *g = G.elts(); + ZZ_p *h = H.elts(); + ZZ_p *tmp; + + for (i = 0; i < m; i+= crossover) { + for (j = 0; j < crossover; j++) + negate(g[j], b.rep[i+j]); + + if (k0 > 0) { + for (j = 0; j < crossover; j+=2) { + mul(t1, g[j], g[j+1]); + add(g[j+1], g[j], g[j+1]); + g[j] = t1; + } + } + + for (l = 1; l < k0; l++) { + width = 1L << l; + + for (j = 0; j < crossover; j += 2*width) + mul(&h[j], &g[j], &g[j+width], width); + + tmp = g; g = h; h = tmp; + } + + for (j = 0; j < crossover; j++) + b.rep[i+j] = g[j]; + } + + for (l = k0; l < k; l++) { + width = 1L << l; + for (i = 0; i < m; i += 2*width) { + t1 = b.rep[i+width]; + set(b.rep[i+width]); + ToFFTRep(R1, b, l+1, i, i+width); + b.rep[i+width] = t1; + t1 = b.rep[i+2*width]; + set(b.rep[i+2*width]); + ToFFTRep(R2, b, l+1, i+width, i+2*width); + b.rep[i+2*width] = t1; + mul(R1, R1, R2); + FromFFTRep(&b.rep[i], R1, 0, 2*width-1); + sub(b.rep[i], b.rep[i], one); + } + } + + x.rep.SetLength(n+1); + long delta = m-n; + for (i = 0; i <= n; i++) + x.rep[i] = b.rep[i+delta]; + + // no need to normalize +} + + + +void eval(ZZ_p& b, const ZZ_pX& f, const ZZ_p& a) +// does a Horner evaluation +{ + ZZ_p acc; + long i; + + clear(acc); + for (i = deg(f); i >= 0; i--) { + mul(acc, acc, a); + add(acc, acc, f.rep[i]); + } + + b = acc; +} + + + +void eval(vec_ZZ_p& b, const ZZ_pX& f, const vec_ZZ_p& a) +// naive algorithm: repeats Horner +{ + if (&b == &f.rep) { + vec_ZZ_p bb; + eval(bb, f, a); + b = bb; + return; + } + + long m = a.length(); + b.SetLength(m); + long i; + for (i = 0; i < m; i++) + eval(b[i], f, a[i]); +} + + + + +void interpolate(ZZ_pX& f, const vec_ZZ_p& a, const vec_ZZ_p& b) +{ + long m = a.length(); + if (b.length() != m) LogicError("interpolate: vector length mismatch"); + + if (m == 0) { + clear(f); + return; + } + + vec_ZZ_p prod; + prod = a; + + ZZ_p t1, t2; + + long k, i; + + vec_ZZ_p res; + res.SetLength(m); + + for (k = 0; k < m; k++) { + + const ZZ_p& aa = a[k]; + + set(t1); + for (i = k-1; i >= 0; i--) { + mul(t1, t1, aa); + add(t1, t1, prod[i]); + } + + clear(t2); + for (i = k-1; i >= 0; i--) { + mul(t2, t2, aa); + add(t2, t2, res[i]); + } + + + inv(t1, t1); + sub(t2, b[k], t2); + mul(t1, t1, t2); + + for (i = 0; i < k; i++) { + mul(t2, prod[i], t1); + add(res[i], res[i], t2); + } + + res[k] = t1; + + if (k < m-1) { + if (k == 0) + negate(prod[0], prod[0]); + else { + negate(t1, a[k]); + add(prod[k], t1, prod[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t2, prod[i], t1); + add(prod[i], t2, prod[i-1]); + } + mul(prod[0], prod[0], t1); + } + } + } + + while (m > 0 && IsZero(res[m-1])) m--; + res.SetLength(m); + f.rep = res; +} + + + + +NTL_TBDECL(InnerProduct)(ZZ_pX& x, const vec_ZZ_p& v, long low, long high, + const vec_ZZ_pX& H, long n, ZZVec& t) +{ + NTL_ZZRegister(s); + long i, j; + + for (j = 0; j < n; j++) + clear(t[j]); + + high = min(high, v.length()-1); + for (i = low; i <= high; i++) { + const vec_ZZ_p& h = H[i-low].rep; + long m = h.length(); + const ZZ& w = rep(v[i]); + + for (j = 0; j < m; j++) { + mul(s, w, rep(h[j])); + add(t[j], t[j], s); + } + } + + x.rep.SetLength(n); + for (j = 0; j < n; j++) + conv(x.rep[j], t[j]); + x.normalize(); +} + + +#ifdef NTL_THREAD_BOOST + +void InnerProduct(ZZ_pX& x, const vec_ZZ_p& v, long low, long high, + const vec_ZZ_pX& H, long n, ZZVec& t) +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_InnerProduct(x, v, low, high, H, n, t); + return; + } + + high = min(high, v.length()-1); + x.rep.SetLength(n); + + ZZ_pContext local_context; + local_context.save(); + + pool->exec_range(n, + [low, high, &x, &t, &H, &v, &local_context](long first, long last) { + + local_context.restore(); + + NTL_ZZRegister(s); + + for (long j = first; j < last; j++) clear(t[j]); + + for (long i = low; i <= high; i++) { + const vec_ZZ_p& h = H[i-low].rep; + long m = min(h.length(), last); + const ZZ& w = rep(v[i]); + + for (long j = first; j < m; j++) { + mul(s, w, rep(h[j])); + add(t[j], t[j], s); + } + } + + for (long j = first; j < last; j++) conv(x.rep[j], t[j]); + } ); + + x.normalize(); +} + +#endif + + +void CompMod(ZZ_pX& x, const ZZ_pX& g, const ZZ_pXArgument& A, + const ZZ_pXModulus& F) +{ + if (deg(g) <= 0) { + x = g; + return; + } + + + ZZ_pX s, t; + ZZVec scratch(F.n, ZZ_p::ExtendedModulusSize()); + + long m = A.H.length() - 1; + long l = ((g.rep.length()+m-1)/m) - 1; + + ZZ_pXMultiplier M; + build(M, A.H[m], F); + + InnerProduct(t, g.rep, l*m, l*m + m - 1, A.H, F.n, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g.rep, i*m, i*m + m - 1, A.H, F.n, scratch); + MulMod(t, t, M, F); + add(t, t, s); + } + + x = t; +} + + +void build(ZZ_pXArgument& A, const ZZ_pX& h, const ZZ_pXModulus& F, long m) +{ + if (m <= 0 || deg(h) >= F.n) LogicError("build: bad args"); + + if (m > F.n) m = F.n; + + long i; + + if (ZZ_pXArgBound > 0) { + double sz = ZZ_p::storage(); + sz = sz*F.n; + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_ZZ_p); + sz = sz/1024; + m = min(m, long(ZZ_pXArgBound/sz)); + m = max(m, 1); + } + + ZZ_pXMultiplier M; + + build(M, h, F); + + A.H.SetLength(m+1); + + set(A.H[0]); + A.H[1] = h; + for (i = 2; i <= m; i++) + MulMod(A.H[i], A.H[i-1], M, F); +} + + + + +NTL_CHEAP_THREAD_LOCAL long ZZ_pXArgBound = 0; + + +void CompMod(ZZ_pX& x, const ZZ_pX& g, const ZZ_pX& h, const ZZ_pXModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(g.rep.length()); + + if (m == 0) { + clear(x); + return; + } + + ZZ_pXArgument A; + + build(A, h, F, m); + + CompMod(x, g, A, F); +} + + + + +void Comp2Mod(ZZ_pX& x1, ZZ_pX& x2, const ZZ_pX& g1, const ZZ_pX& g2, + const ZZ_pX& h, const ZZ_pXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + return; + } + + ZZ_pXArgument A; + + build(A, h, F, m); + + ZZ_pX xx1, xx2; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + + x1 = xx1; + x2 = xx2; +} + +void Comp3Mod(ZZ_pX& x1, ZZ_pX& x2, ZZ_pX& x3, + const ZZ_pX& g1, const ZZ_pX& g2, const ZZ_pX& g3, + const ZZ_pX& h, const ZZ_pXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length() + g3.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + clear(x3); + return; + } + + ZZ_pXArgument A; + + build(A, h, F, m); + + ZZ_pX xx1, xx2, xx3; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + CompMod(xx3, g3, A, F); + + x1 = xx1; + x2 = xx2; + x3 = xx3; +} + + +static void StripZeroes(vec_ZZ_p& x) +{ + long n = x.length(); + while (n > 0 && IsZero(x[n-1])) + n--; + x.SetLength(n); +} + + +void PlainUpdateMap(vec_ZZ_p& xx, const vec_ZZ_p& a, + const ZZ_pX& b, const ZZ_pX& f) +{ + long n = deg(f); + long i, m; + + if (IsZero(b)) { + xx.SetLength(0); + return; + } + + m = n-1 - deg(b); + + vec_ZZ_p x(INIT_SIZE, n); + + for (i = 0; i <= m; i++) + InnerProduct(x[i], a, b.rep, i); + + if (deg(b) != 0) { + ZZ_pX c(INIT_SIZE, n); + LeftShift(c, b, m); + + for (i = m+1; i < n; i++) { + MulByXMod(c, c, f); + InnerProduct(x[i], a, c.rep); + } + } + + xx = x; +} + + +void UpdateMap(vec_ZZ_p& x, const vec_ZZ_p& aa, + const ZZ_pXMultiplier& B, const ZZ_pXModulus& F) +{ + long n = F.n; + long i; + + + vec_ZZ_p a; + a = aa; + StripZeroes(a); + + if (a.length() > n) LogicError("UpdateMap: bad args"); + + if (!B.UseFFT) { + PlainUpdateMap(x, a, B.b, F.f); + StripZeroes(x); + return; + } + + FFTRep R1(INIT_SIZE, F.k), R2(INIT_SIZE, F.l); + vec_ZZ_p V1(INIT_SIZE, n); + + + RevToFFTRep(R1, a, F.k, 0, a.length()-1, 0); + mul(R2, R1, F.FRep); + RevFromFFTRep(V1, R2, 0, n-2); + for (i = 0; i <= n-2; i++) negate(V1[i], V1[i]); + RevToFFTRep(R2, V1, F.l, 0, n-2, n-1); + mul(R2, R2, B.B1); + mul(R1, R1, B.B2); + + AddExpand(R2, R1); + RevFromFFTRep(x, R2, 0, n-1); + StripZeroes(x); +} + + + +NTL_TBDECL(ProjectPowers)(vec_ZZ_p& x, const vec_ZZ_p& a, long k, + const ZZ_pXArgument& H, const ZZ_pXModulus& F) + +{ + long n = F.n; + + if (a.length() > n || k < 0) + LogicError("ProjectPowers: bad args"); + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive args"); + + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + ZZ_pXMultiplier M; + build(M, H.H[m], F); + + vec_ZZ_p s(INIT_SIZE, n); + s = a; + StripZeroes(s); + + x.SetLength(k); + + for (long i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + ZZ_p* w = &x[i*m]; + for (long j = 0; j < m1; j++) + InnerProduct(w[j], H.H[j].rep, s); + if (i < l) + UpdateMap(s, s, M, F); + } +} + + +#ifdef NTL_THREAD_BOOST + +void ProjectPowers(vec_ZZ_p& x, const vec_ZZ_p& a, long k, + const ZZ_pXArgument& H, const ZZ_pXModulus& F) + +{ + BasicThreadPool *pool = GetThreadPool(); + + if (!pool || pool->active() || pool->NumThreads() == 1) { + basic_ProjectPowers(x, a, k, H, F); + return; + } + + long n = F.n; + + if (a.length() > n || k < 0) + LogicError("ProjectPowers: bad args"); + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive args"); + + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + ZZ_pXMultiplier M; + build(M, H.H[m], F); + + vec_ZZ_p s(INIT_SIZE, n); + s = a; + StripZeroes(s); + + x.SetLength(k); + + ZZ_pContext local_context; + local_context.save(); + + + for (long i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + ZZ_p* w = &x[i*m]; + + pool->exec_range(m1, + [w, &H, &s, &local_context](long first, long last) { + local_context.restore(); + for (long j = first; j < last; j++) + InnerProduct(w[j], H.H[j].rep, s); + } ); + + + if (i < l) + UpdateMap(s, s, M, F); + } +} + + +#endif + + +void ProjectPowers(vec_ZZ_p& x, const vec_ZZ_p& a, long k, + const ZZ_pX& h, const ZZ_pXModulus& F) + +{ + if (a.length() > F.n || k < 0) LogicError("ProjectPowers: bad args"); + + if (k == 0) { + x.SetLength(0); + return; + } + + long m = SqrRoot(k); + + ZZ_pXArgument H; + + build(H, h, F, m); + ProjectPowers(x, a, k, H, F); +} + + +void BerlekampMassey(ZZ_pX& h, const vec_ZZ_p& a, long m) +{ + ZZ_pX Lambda, Sigma, Temp; + long L; + ZZ_p Delta, Delta1, t1; + long shamt; + + // cerr << "*** " << m << "\n"; + + Lambda.SetMaxLength(m+1); + Sigma.SetMaxLength(m+1); + Temp.SetMaxLength(m+1); + + L = 0; + set(Lambda); + clear(Sigma); + set(Delta); + shamt = 0; + + long i, r, dl; + + for (r = 1; r <= 2*m; r++) { + // cerr << r << "--"; + clear(Delta1); + dl = deg(Lambda); + for (i = 0; i <= dl; i++) { + mul(t1, Lambda.rep[i], a[r-i-1]); + add(Delta1, Delta1, t1); + } + + if (IsZero(Delta1)) { + shamt++; + // cerr << "case 1: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else if (2*L < r) { + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + Sigma = Lambda; + ShiftSub(Lambda, Temp, shamt+1); + shamt = 0; + L = r-L; + Delta = Delta1; + // cerr << "case 2: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else { + shamt++; + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + ShiftSub(Lambda, Temp, shamt); + // cerr << "case 3: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + } + + // cerr << "finished: " << L << " " << deg(Lambda) << "\n"; + + dl = deg(Lambda); + h.rep.SetLength(L + 1); + + for (i = 0; i < L - dl; i++) + clear(h.rep[i]); + + for (i = L - dl; i <= L; i++) + h.rep[i] = Lambda.rep[L - i]; +} + + + + +void GCDMinPolySeq(ZZ_pX& h, const vec_ZZ_p& x, long m) +{ + long i; + ZZ_pX a, b; + ZZ_pXMatrix M; + ZZ_p t; + + a.rep.SetLength(2*m); + for (i = 0; i < 2*m; i++) a.rep[i] = x[2*m-1-i]; + a.normalize(); + + SetCoeff(b, 2*m); + + HalfGCD(M, b, a, m+1); + + /* make monic */ + + inv(t, LeadCoeff(M(1,1))); + mul(h, M(1,1), t); +} + + +void MinPolySeq(ZZ_pX& h, const vec_ZZ_p& a, long m) +{ + if (m < 0) LogicError("MinPoly: bad args"); + if (NTL_OVERFLOW(m, 1, 0)) LogicError("MinPoly: bad args"); + if (a.length() < 2*m) LogicError("MinPoly: sequence too short"); + + if (m > NTL_ZZ_pX_BERMASS_CROSSOVER) + GCDMinPolySeq(h, a, m); + else + BerlekampMassey(h, a, m); +} + + +void DoMinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m, + const vec_ZZ_p& R) +{ + vec_ZZ_p x; + + ProjectPowers(x, R, 2*m, g, F); + MinPolySeq(h, x, m); +} + + +void ProbMinPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m) +{ + long n = F.n; + if (m < 1 || m > n) LogicError("ProbMinPoly: bad args"); + + long i; + vec_ZZ_p R(INIT_SIZE, n); + + for (i = 0; i < n; i++) random(R[i]); + DoMinPolyMod(h, g, F, m, R); +} + +void MinPolyMod(ZZ_pX& hh, const ZZ_pX& g, const ZZ_pXModulus& F, long m) +{ + ZZ_pX h, h1; + long n = F.n; + if (m < 1 || m > n) LogicError("MinPoly: bad args"); + + /* probabilistically compute min-poly */ + + ProbMinPolyMod(h, g, F, m); + if (deg(h) == m) { hh = h; return; } + CompMod(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + long i; + + ZZ_pX h2, h3; + ZZ_pXMultiplier H1; + vec_ZZ_p R(INIT_SIZE, n); + + for (;;) { + R.SetLength(n); + for (i = 0; i < n; i++) random(R[i]); + build(H1, h1, F); + UpdateMap(R, R, H1, F); + DoMinPolyMod(h2, g, F, m-deg(h), R); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompMod(h3, h2, g, F); + MulMod(h1, h3, H1, F); + if (IsZero(h1)) { hh = h; return; } + } +} + + + +void IrredPolyMod(ZZ_pX& h, const ZZ_pX& g, const ZZ_pXModulus& F, long m) +{ + vec_ZZ_p R(INIT_SIZE, 1); + if (m < 1 || m > F.n) LogicError("IrredPoly: bad args"); + + set(R[0]); + DoMinPolyMod(h, g, F, m, R); +} + + + +void diff(ZZ_pX& x, const ZZ_pX& a) +{ + long n = deg(a); + long i; + + if (n <= 0) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(n); + + for (i = 0; i <= n-1; i++) { + mul(x.rep[i], a.rep[i+1], i+1); + } + + if (&x == &a) + x.rep.SetLength(n); + + x.normalize(); +} + + +void MakeMonic(ZZ_pX& x) +{ + if (IsZero(x)) + return; + + if (IsOne(LeadCoeff(x))) + return; + + ZZ_p t; + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + + +void PlainMulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n) +{ + ZZ_pX y; + mul(y, a, b); + trunc(x, y, n); +} + + +void FFTMulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n) +{ + if (IsZero(a) || IsZero(b)) { + clear(x); + return; + } + + long d = deg(a) + deg(b); + if (n > d + 1) + n = d + 1; + + long k = NextPowerOfTwo(d + 1); + FFTRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); + + ToFFTRep(R1, a, k); + ToFFTRep(R2, b, k); + mul(R1, R1, R2); + FromFFTRep(x, R1, 0, n-1); +} + +void MulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n) +{ + if (n < 0) LogicError("MulTrunc: bad args"); + + if (deg(a) <= NTL_ZZ_pX_FFT_CROSSOVER || deg(b) <= NTL_ZZ_pX_FFT_CROSSOVER) + PlainMulTrunc(x, a, b, n); + else + FFTMulTrunc(x, a, b, n); +} + + +void PlainSqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n) +{ + ZZ_pX y; + sqr(y, a); + trunc(x, y, n); +} + + +void FFTSqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + long d = 2*deg(a); + if (n > d + 1) + n = d + 1; + + long k = NextPowerOfTwo(d + 1); + FFTRep R1(INIT_SIZE, k); + + ToFFTRep(R1, a, k); + mul(R1, R1, R1); + FromFFTRep(x, R1, 0, n-1); +} + +void SqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n) +{ + if (n < 0) LogicError("SqrTrunc: bad args"); + + if (deg(a) <= NTL_ZZ_pX_FFT_CROSSOVER) + PlainSqrTrunc(x, a, n); + else + FFTSqrTrunc(x, a, n); +} + + +void FastTraceVec(vec_ZZ_p& S, const ZZ_pX& f) +{ + long n = deg(f); + + if (n <= 0) + LogicError("FastTraceVec: bad args"); + + if (n == 0) { + S.SetLength(0); + return; + } + + if (n == 1) { + S.SetLength(1); + set(S[0]); + return; + } + + long i; + ZZ_pX f1; + + f1.rep.SetLength(n-1); + for (i = 0; i <= n-2; i++) + f1.rep[i] = f.rep[n-i]; + f1.normalize(); + + ZZ_pX f2; + f2.rep.SetLength(n-1); + for (i = 0; i <= n-2; i++) + mul(f2.rep[i], f.rep[n-1-i], i+1); + f2.normalize(); + + ZZ_pX f3; + InvTrunc(f3, f1, n-1); + MulTrunc(f3, f3, f2, n-1); + + S.SetLength(n); + + S[0] = n; + for (i = 1; i < n; i++) + negate(S[i], coeff(f3, i-1)); +} + + +void PlainTraceVec(vec_ZZ_p& S, const ZZ_pX& ff) +{ + if (deg(ff) <= 0) + LogicError("TraceVec: bad args"); + + ZZ_pX f; + f = ff; + + MakeMonic(f); + + long n = deg(f); + + S.SetLength(n); + + if (n == 0) + return; + + long k, i; + ZZ acc, t; + ZZ_p t1; + + S[0] = n; + + for (k = 1; k < n; k++) { + mul(acc, rep(f.rep[n-k]), k); + + for (i = 1; i < k; i++) { + mul(t, rep(f.rep[n-i]), rep(S[k-i])); + add(acc, acc, t); + } + + conv(t1, acc); + negate(S[k], t1); + } +} + +void TraceVec(vec_ZZ_p& S, const ZZ_pX& f) +{ + if (deg(f) <= NTL_ZZ_pX_TRACE_CROSSOVER) + PlainTraceVec(S, f); + else + FastTraceVec(S, f); +} + +static +void ComputeTraceVec(vec_ZZ_p& S, const ZZ_pXModulus& F) +{ + if (!F.UseFFT) { + PlainTraceVec(S, F.f); + return; + } + + long i; + long n = F.n; + + FFTRep R; + ZZ_pX P, g; + + g.rep.SetLength(n-1); + for (i = 1; i < n; i++) + mul(g.rep[n-i-1], F.f.rep[n-i], i); + g.normalize(); + + ToFFTRep(R, g, F.l); + mul(R, R, F.HRep); + FromFFTRep(P, R, n-2, 2*n-4); + + S.SetLength(n); + + S[0] = n; + for (i = 1; i < n; i++) + negate(S[i], coeff(P, n-1-i)); +} + +void TraceMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pXModulus& F) +{ + long n = F.n; + + if (deg(a) >= n) + LogicError("trace: bad args"); + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(F.tracevec.val()); + if (!builder()) break; + UniquePtr p; + p.make(); + ComputeTraceVec(*p, F); + builder.move(p); + } while (0); + + InnerProduct(x, a.rep, *F.tracevec.val()); +} + +void TraceMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& f) +{ + if (deg(a) >= deg(f) || deg(f) <= 0) + LogicError("trace: bad args"); + + project(x, TraceVec(f), a); +} + +void PlainResultant(ZZ_p& rres, const ZZ_pX& a, const ZZ_pX& b) +{ + ZZ_p res; + + if (IsZero(a) || IsZero(b)) + clear(res); + else if (deg(a) == 0 && deg(b) == 0) + set(res); + else { + long d0, d1, d2; + ZZ_p lc; + set(res); + + long n = max(deg(a),deg(b)) + 1; + ZZ_pX u(INIT_SIZE, n), v(INIT_SIZE, n); + ZZVec tmp(n, ZZ_p::ExtendedModulusSize()); + + u = a; + v = b; + + for (;;) { + d0 = deg(u); + d1 = deg(v); + lc = LeadCoeff(v); + + PlainRem(u, u, v, tmp); + swap(u, v); + + d2 = deg(v); + if (d2 >= 0) { + power(lc, lc, d0-d2); + mul(res, res, lc); + if (d0 & d1 & 1) negate(res, res); + } + else { + if (d1 == 0) { + power(lc, lc, d0); + mul(res, res, lc); + } + else + clear(res); + + break; + } + } + } + + rres = res; +} + + +void ResIterHalfGCD(ZZ_pXMatrix& M_out, ZZ_pX& U, ZZ_pX& V, long d_red, + vec_ZZ_p& cvec, vec_long& dvec) +{ + M_out(0,0).SetMaxLength(d_red); + M_out(0,1).SetMaxLength(d_red); + M_out(1,0).SetMaxLength(d_red); + M_out(1,1).SetMaxLength(d_red); + + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + long goal = deg(U) - d_red; + + if (deg(V) <= goal) + return; + + ZZVec tmp(deg(U)+1, ZZ_p::ExtendedModulusSize()); + ZZ_pX Q, t(INIT_SIZE, d_red); + + + while (deg(V) > goal) { + append(cvec, LeadCoeff(V)); + append(dvec, dvec[dvec.length()-1]-deg(U)+deg(V)); + PlainDivRem(Q, U, U, V, tmp); + swap(U, V); + + mul(t, Q, M_out(1,0)); + sub(t, M_out(0,0), t); + M_out(0,0) = M_out(1,0); + M_out(1,0) = t; + + mul(t, Q, M_out(1,1)); + sub(t, M_out(0,1), t); + M_out(0,1) = M_out(1,1); + M_out(1,1) = t; + } +} + + + +void ResHalfGCD(ZZ_pXMatrix& M_out, const ZZ_pX& U, const ZZ_pX& V, long d_red, + vec_ZZ_p& cvec, vec_long& dvec) +{ + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + return; + } + + + long n = deg(U) - 2*d_red + 2; + if (n < 0) n = 0; + + ZZ_pX U1, V1; + + RightShift(U1, U, n); + RightShift(V1, V, n); + + if (d_red <= NTL_ZZ_pX_HalfGCD_CROSSOVER) { + ResIterHalfGCD(M_out, U1, V1, d_red, cvec, dvec); + return; + } + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + ZZ_pXMatrix M1; + + ResHalfGCD(M1, U1, V1, d1, cvec, dvec); + mul(U1, V1, M1); + + long d2 = deg(V1) - deg(U) + n + d_red; + + if (IsZero(V1) || d2 <= 0) { + M_out = M1; + return; + } + + + ZZ_pX Q; + ZZ_pXMatrix M2; + + append(cvec, LeadCoeff(V1)); + append(dvec, dvec[dvec.length()-1]-deg(U1)+deg(V1)); + DivRem(Q, U1, U1, V1); + swap(U1, V1); + + ResHalfGCD(M2, U1, V1, d2, cvec, dvec); + + ZZ_pX t(INIT_SIZE, deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,0)); + sub(t, M1(0,0), t); + swap(M1(0,0), M1(1,0)); + swap(M1(1,0), t); + + t.kill(); + + t.SetMaxLength(deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,1)); + sub(t, M1(0,1), t); + swap(M1(0,1), M1(1,1)); + swap(M1(1,1), t); + + t.kill(); + + mul(M_out, M2, M1); +} + +void ResHalfGCD(ZZ_pX& U, ZZ_pX& V, vec_ZZ_p& cvec, vec_long& dvec) +{ + long d_red = (deg(U)+1)/2; + + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + return; + } + + long du = deg(U); + + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + ZZ_pXMatrix M1; + + ResHalfGCD(M1, U, V, d1, cvec, dvec); + mul(U, V, M1); + + long d2 = deg(V) - du + d_red; + + if (IsZero(V) || d2 <= 0) { + return; + } + + M1(0,0).kill(); + M1(0,1).kill(); + M1(1,0).kill(); + M1(1,1).kill(); + + + ZZ_pX Q; + + append(cvec, LeadCoeff(V)); + append(dvec, dvec[dvec.length()-1]-deg(U)+deg(V)); + DivRem(Q, U, U, V); + swap(U, V); + + ResHalfGCD(M1, U, V, d2, cvec, dvec); + + mul(U, V, M1); +} + + +void resultant(ZZ_p& rres, const ZZ_pX& u, const ZZ_pX& v) +{ + if (deg(u) <= NTL_ZZ_pX_GCD_CROSSOVER || deg(v) <= NTL_ZZ_pX_GCD_CROSSOVER) { + PlainResultant(rres, u, v); + return; + } + + ZZ_pX u1, v1; + + u1 = u; + v1 = v; + + ZZ_p res, t; + set(res); + + if (deg(u1) == deg(v1)) { + rem(u1, u1, v1); + swap(u1, v1); + + if (IsZero(v1)) { + clear(rres); + return; + } + + power(t, LeadCoeff(u1), deg(u1) - deg(v1)); + mul(res, res, t); + if (deg(u1) & 1) + negate(res, res); + } + else if (deg(u1) < deg(v1)) { + swap(u1, v1); + if (deg(u1) & deg(v1) & 1) + negate(res, res); + } + + // deg(u1) > deg(v1) && v1 != 0 + + vec_ZZ_p cvec; + vec_long dvec; + + cvec.SetMaxLength(deg(v1)+2); + dvec.SetMaxLength(deg(v1)+2); + + append(cvec, LeadCoeff(u1)); + append(dvec, deg(u1)); + + + while (deg(u1) > NTL_ZZ_pX_GCD_CROSSOVER && !IsZero(v1)) { + ResHalfGCD(u1, v1, cvec, dvec); + + if (!IsZero(v1)) { + append(cvec, LeadCoeff(v1)); + append(dvec, deg(v1)); + rem(u1, u1, v1); + swap(u1, v1); + } + } + + if (IsZero(v1) && deg(u1) > 0) { + clear(rres); + return; + } + + long i, l; + l = dvec.length(); + + if (deg(u1) == 0) { + // we went all the way... + + for (i = 0; i <= l-3; i++) { + power(t, cvec[i+1], dvec[i]-dvec[i+2]); + mul(res, res, t); + if (dvec[i] & dvec[i+1] & 1) + negate(res, res); + } + + power(t, cvec[l-1], dvec[l-2]); + mul(res, res, t); + } + else { + for (i = 0; i <= l-3; i++) { + power(t, cvec[i+1], dvec[i]-dvec[i+2]); + mul(res, res, t); + if (dvec[i] & dvec[i+1] & 1) + negate(res, res); + } + + power(t, cvec[l-1], dvec[l-2]-deg(v1)); + mul(res, res, t); + if (dvec[l-2] & dvec[l-1] & 1) + negate(res, res); + + PlainResultant(t, u1, v1); + mul(res, res, t); + } + + rres = res; +} + +void NormMod(ZZ_p& x, const ZZ_pX& a, const ZZ_pX& f) +{ + if (deg(f) <= 0 || deg(a) >= deg(f)) + LogicError("norm: bad args"); + + if (IsZero(a)) { + clear(x); + return; + } + + ZZ_p t; + resultant(t, f, a); + if (!IsOne(LeadCoeff(f))) { + ZZ_p t1; + power(t1, LeadCoeff(f), deg(a)); + inv(t1, t1); + mul(t, t, t1); + } + + x = t; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ_pXCharPoly.c b/thirdparty/linux/ntl/src/ZZ_pXCharPoly.c new file mode 100644 index 0000000000..e09591f5b7 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_pXCharPoly.c @@ -0,0 +1,77 @@ +#include + +#include + +NTL_START_IMPL + +static +void HessCharPoly(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& f) +{ + long n = deg(f); + if (n <= 0 || deg(a) >= n) + LogicError("HessCharPoly: bad args"); + + mat_ZZ_p M; + M.SetDims(n, n); + + long i, j; + + ZZ_pX t; + t = a; + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) + M[i][j] = coeff(t, j); + + if (i < n-1) + MulByXMod(t, t, f); + } + + CharPoly(g, M); +} + +void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& ff) +{ + ZZ_pX f = ff; + MakeMonic(f); + long n = deg(f); + + if (n <= 0 || deg(a) >= n) + LogicError("CharPoly: bad args"); + + if (IsZero(a)) { + clear(g); + SetCoeff(g, n); + return; + } + + if (n > 25) { + ZZ_pX h; + MinPolyMod(h, a, f); + if (deg(h) == n) { + g = h; + return; + } + } + + if (ZZ_p::modulus() < n+1) { + HessCharPoly(g, a, f); + return; + } + + vec_ZZ_p u(INIT_SIZE, n+1), v(INIT_SIZE, n+1); + + ZZ_pX h, h1; + negate(h, a); + long i; + + for (i = 0; i <= n; i++) { + u[i] = i; + add(h1, h, u[i]); + resultant(v[i], f, h1); + } + + interpolate(g, u, v); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/ZZ_pXFactoring.c b/thirdparty/linux/ntl/src/ZZ_pXFactoring.c new file mode 100644 index 0000000000..cf31c9f3c1 --- /dev/null +++ b/thirdparty/linux/ntl/src/ZZ_pXFactoring.c @@ -0,0 +1,1916 @@ + +#include +#include +#include +#include +#include + + +NTL_START_IMPL + + + + + +void SquareFreeDecomp(vec_pair_ZZ_pX_long& u, const ZZ_pX& ff) +{ + ZZ_pX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SquareFreeDecomp: bad args"); + + ZZ_pX r, t, v, tmp1; + long m, j, finished, done; + + u.SetLength(0); + + if (deg(f) == 0) + return; + + m = 1; + finished = 0; + + do { + j = 1; + diff(tmp1, f); + GCD(r, f, tmp1); + div(t, f, r); + + if (deg(t) > 0) { + done = 0; + do { + GCD(v, r, t); + div(tmp1, t, v); + if (deg(tmp1) > 0) append(u, cons(tmp1, j*m)); + if (deg(v) > 0) { + div(r, r, v); + t = v; + j++; + } + else + done = 1; + } while (!done); + if (deg(r) == 0) finished = 1; + } + + if (!finished) { + /* r is a p-th power */ + long p, k, d; + conv(p, ZZ_p::modulus()); + d = deg(r)/p; + f.rep.SetLength(d+1); + for (k = 0; k <= d; k++) + f.rep[k] = r.rep[k*p]; + m = m*p; + } + } while (!finished); +} + + + +static +void NullSpace(long& r, vec_long& D, vec_ZZVec& M, long verbose) +{ + long k, l, n; + long i, j; + long pos; + ZZ t1, t2; + ZZ *x, *y; + + const ZZ& p = ZZ_p::modulus(); + + n = M.length(); + + D.SetLength(n); + for (j = 0; j < n; j++) D[j] = -1; + + r = 0; + + l = 0; + for (k = 0; k < n; k++) { + + if (verbose && k % 10 == 0) cerr << "+"; + + pos = -1; + for (i = l; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) + pos = i; + } + + if (pos != -1) { + swap(M[pos], M[l]); + + // make M[l, k] == -1 mod p, and make row l reduced + + InvMod(t1, M[l][k], p); + NegateMod(t1, t1, p); + for (j = k+1; j < n; j++) { + rem(t2, M[l][j], p); + MulMod(M[l][j], t2, t1, p); + } + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[l].elts() + (k+1); + + for (j = k+1; j < n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + + D[k] = l; // variable k is defined by row l + l++; + + } + else { + r++; + } + } +} + + + +static +void BuildMatrix(vec_ZZVec& M, long n, const ZZ_pX& g, const ZZ_pXModulus& F, + long verbose) +{ + long i, j, m; + ZZ_pXMultiplier G; + ZZ_pX h; + + ZZ t; + sqr(t, ZZ_p::modulus()); + mul(t, t, n); + + long size = t.size(); + + M.SetLength(n); + for (i = 0; i < n; i++) + M[i].SetSize(n, size); + + build(G, g, F); + + set(h); + for (j = 0; j < n; j++) { + if (verbose && j % 10 == 0) cerr << "+"; + + m = deg(h); + for (i = 0; i < n; i++) { + if (i <= m) + M[i][j] = rep(h.rep[i]); + else + clear(M[i][j]); + } + + if (j < n-1) + MulMod(h, h, G, F); + } + + for (i = 0; i < n; i++) + AddMod(M[i][i], M[i][i], -1, ZZ_p::modulus()); + +} + + + +static +void RecFindRoots(vec_ZZ_p& x, const ZZ_pX& f) +{ + if (deg(f) == 0) return; + + if (deg(f) == 1) { + long k = x.length(); + x.SetLength(k+1); + negate(x[k], ConstTerm(f)); + return; + } + + ZZ_pX h; + + ZZ_p r; + ZZ p1; + + + RightShift(p1, ZZ_p::modulus(), 1); + + { + ZZ_pXModulus F; + build(F, f); + + do { + random(r); + PowerXPlusAMod(h, r, p1, F); + add(h, h, -1); + GCD(h, h, f); + } while (deg(h) <= 0 || deg(h) == deg(f)); + } + + RecFindRoots(x, h); + div(h, f, h); + RecFindRoots(x, h); +} + +void FindRoots(vec_ZZ_p& x, const ZZ_pX& ff) +{ + ZZ_pX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoots: bad args"); + + x.SetMaxLength(deg(f)); + x.SetLength(0); + RecFindRoots(x, f); +} + + +static +void RandomBasisElt(ZZ_pX& g, const vec_long& D, const vec_ZZVec& M) +{ + ZZ t1, t2; + + long n = D.length(); + + long i, j, s; + + g.rep.SetLength(n); + + vec_ZZ_p& v = g.rep; + + for (j = n-1; j >= 0; j--) { + if (D[j] == -1) + random(v[j]); + else { + i = D[j]; + + // v[j] = sum_{s=j+1}^{n-1} v[s]*M[i,s] + + clear(t1); + + for (s = j+1; s < n; s++) { + mul(t2, rep(v[s]), M[i][s]); + add(t1, t1, t2); + } + + conv(v[j], t1); + } + } + + g.normalize(); +} + + + +static +void split(ZZ_pX& f1, ZZ_pX& g1, ZZ_pX& f2, ZZ_pX& g2, + const ZZ_pX& f, const ZZ_pX& g, + const vec_ZZ_p& roots, long lo, long mid) +{ + long r = mid-lo+1; + + ZZ_pXModulus F; + build(F, f); + + vec_ZZ_p lroots(INIT_SIZE, r); + long i; + + for (i = 0; i < r; i++) + lroots[i] = roots[lo+i]; + + + ZZ_pX h, a, d; + BuildFromRoots(h, lroots); + CompMod(a, h, g, F); + + + GCD(f1, a, f); + + div(f2, f, f1); + + rem(g1, g, f1); + rem(g2, g, f2); +} + +static +void RecFindFactors(vec_ZZ_pX& factors, const ZZ_pX& f, const ZZ_pX& g, + const vec_ZZ_p& roots, long lo, long hi) +{ + long r = hi-lo+1; + + if (r == 0) return; + + if (r == 1) { + append(factors, f); + return; + } + + ZZ_pX f1, g1, f2, g2; + + long mid = (lo+hi)/2; + + split(f1, g1, f2, g2, f, g, roots, lo, mid); + + RecFindFactors(factors, f1, g1, roots, lo, mid); + RecFindFactors(factors, f2, g2, roots, mid+1, hi); +} + + +static +void FindFactors(vec_ZZ_pX& factors, const ZZ_pX& f, const ZZ_pX& g, + const vec_ZZ_p& roots) +{ + long r = roots.length(); + + factors.SetMaxLength(r); + factors.SetLength(0); + + RecFindFactors(factors, f, g, roots, 0, r-1); +} + +#if 0 + +static +void IterFindFactors(vec_ZZ_pX& factors, const ZZ_pX& f, + const ZZ_pX& g, const vec_ZZ_p& roots) +{ + long r = roots.length(); + long i; + ZZ_pX h; + + factors.SetLength(r); + + for (i = 0; i < r; i++) { + sub(h, g, roots[i]); + GCD(factors[i], f, h); + } +} + +#endif + + + + +void SFBerlekamp(vec_ZZ_pX& factors, const ZZ_pX& ff, long verbose) +{ + ZZ_pX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SFBerlekamp: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + double t; + + const ZZ& p = ZZ_p::modulus(); + + long n = deg(f); + + ZZ_pXModulus F; + + build(F, f); + + ZZ_pX g, h; + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + PowerXMod(g, p, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + vec_long D; + long r; + + vec_ZZVec M; + + if (verbose) { cerr << "building matrix..."; t = GetTime(); } + BuildMatrix(M, n, g, F, verbose); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + if (verbose) { cerr << "diagonalizing..."; t = GetTime(); } + NullSpace(r, D, M, verbose); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + + if (verbose) cerr << "number of factors = " << r << "\n"; + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (verbose) { cerr << "factor extraction..."; t = GetTime(); } + + vec_ZZ_p roots; + + RandomBasisElt(g, D, M); + MinPolyMod(h, g, F, r); + if (deg(h) == r) M.kill(); + FindRoots(roots, h); + FindFactors(factors, f, g, roots); + + ZZ_pX g1; + vec_ZZ_pX S, S1; + long i; + + while (factors.length() < r) { + if (verbose) cerr << "+"; + RandomBasisElt(g, D, M); + S.kill(); + for (i = 0; i < factors.length(); i++) { + const ZZ_pX& f = factors[i]; + if (deg(f) == 1) { + append(S, f); + continue; + } + build(F, f); + rem(g1, g, F); + if (deg(g1) <= 0) { + append(S, f); + continue; + } + MinPolyMod(h, g1, F, min(deg(f), r-factors.length()+1)); + FindRoots(roots, h); + S1.kill(); + FindFactors(S1, f, g1, roots); + append(S, S1); + } + swap(factors, S); + } + + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + if (verbose) { + cerr << "degrees:"; + long i; + for (i = 0; i < factors.length(); i++) + cerr << " " << deg(factors[i]); + cerr << "\n"; + } +} + + +void berlekamp(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, long verbose) +{ + double t; + vec_pair_ZZ_pX_long sfd; + vec_ZZ_pX x; + + if (!IsOne(LeadCoeff(f))) + LogicError("berlekamp: bad args"); + + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFBerlekamp(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + + + +static +void AddFactor(vec_pair_ZZ_pX_long& factors, const ZZ_pX& g, long d, long verbose) +{ + if (verbose) + cerr << "degree=" << d << ", number=" << deg(g)/d << "\n"; + append(factors, cons(g, d)); +} + +static +void ProcessTable(ZZ_pX& f, vec_pair_ZZ_pX_long& factors, + const ZZ_pXModulus& F, long limit, const vec_ZZ_pX& tbl, + long d, long verbose) + +{ + if (limit == 0) return; + + if (verbose) cerr << "+"; + + ZZ_pX t1; + + if (limit == 1) { + GCD(t1, f, tbl[0]); + if (deg(t1) > 0) { + AddFactor(factors, t1, d, verbose); + div(f, f, t1); + } + + return; + } + + long i; + + t1 = tbl[0]; + for (i = 1; i < limit; i++) + MulMod(t1, t1, tbl[i], F); + + GCD(t1, f, t1); + + if (deg(t1) == 0) return; + + div(f, f, t1); + + ZZ_pX t2; + + i = 0; + d = d - limit + 1; + + while (2*d <= deg(t1)) { + GCD(t2, tbl[i], t1); + if (deg(t2) > 0) { + AddFactor(factors, t2, d, verbose); + div(t1, t1, t2); + } + + i++; + d++; + } + + if (deg(t1) > 0) + AddFactor(factors, t1, deg(t1), verbose); +} + + +void TraceMap(ZZ_pX& w, const ZZ_pX& a, long d, const ZZ_pXModulus& F, + const ZZ_pX& b) + +{ + if (d < 0) LogicError("TraceMap: bad args"); + + ZZ_pX y, z, t; + + z = b; + y = a; + clear(w); + + while (d) { + if (d == 1) { + if (IsZero(w)) + w = y; + else { + CompMod(w, w, z, F); + add(w, w, y); + } + } + else if ((d & 1) == 0) { + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else if (IsZero(w)) { + w = y; + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else { + Comp3Mod(z, t, w, z, y, w, z, F); + add(w, w, y); + add(y, t, y); + } + + d = d >> 1; + } +} + + +void PowerCompose(ZZ_pX& y, const ZZ_pX& h, long q, const ZZ_pXModulus& F) +{ + if (q < 0) LogicError("PowerCompose: bad args"); + + ZZ_pX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y); + + while (q) { + sw = 0; + + if (q > 1) sw = 2; + if (q & 1) { + if (IsX(y)) + y = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y, y, z, F); + break; + + case 2: + CompMod(z, z, z, F); + break; + + case 3: + Comp2Mod(y, z, y, z, z, F); + break; + } + + q = q >> 1; + } +} + + +long ProbIrredTest(const ZZ_pX& f, long iter) +{ + long n = deg(f); + + if (n <= 0) return 0; + if (n == 1) return 1; + + const ZZ& p = ZZ_p::modulus(); + + ZZ_pXModulus F; + + build(F, f); + + ZZ_pX b, r, s; + + PowerXMod(b, p, F); + + long i; + + for (i = 0; i < iter; i++) { + random(r, n); + TraceMap(s, r, n, F, b); + + if (deg(s) > 0) return 0; + } + + if (p >= n) return 1; + + long pp; + + conv(pp, p); + + if (n % pp != 0) return 1; + + PowerCompose(s, b, n/pp, F); + return !IsX(s); +} + +NTL_CHEAP_THREAD_LOCAL long ZZ_pX_BlockingFactor = 10; + +void DDF(vec_pair_ZZ_pX_long& factors, const ZZ_pX& ff, const ZZ_pX& hh, + long verbose) +{ + ZZ_pX f = ff; + ZZ_pX h = hh; + + if (!IsOne(LeadCoeff(f))) + LogicError("DDF: bad args"); + + factors.SetLength(0); + + if (deg(f) == 0) + return; + + if (deg(f) == 1) { + AddFactor(factors, f, 1, verbose); + return; + } + + long CompTableSize = 2*SqrRoot(deg(f)); + + long GCDTableSize = ZZ_pX_BlockingFactor; + + ZZ_pXModulus F; + build(F, f); + + ZZ_pXArgument H; + + build(H, h, F, min(CompTableSize, deg(f))); + + long i, d, limit, old_n; + ZZ_pX g, X; + + + vec_ZZ_pX tbl(INIT_SIZE, GCDTableSize); + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = GCDTableSize; + + + while (2*d <= deg(f)) { + + old_n = deg(f); + sub(tbl[i], g, X); + i++; + if (i == limit) { + ProcessTable(f, factors, F, i, tbl, d, verbose); + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + // we need to go further + + if (deg(f) < old_n) { + // f has changed + + build(F, f); + rem(h, h, f); + rem(g, g, f); + build(H, h, F, min(CompTableSize, deg(f))); + } + + CompMod(g, g, H, F); + } + } + + ProcessTable(f, factors, F, i, tbl, d-1, verbose); + + if (!IsOne(f)) AddFactor(factors, f, deg(f), verbose); +} + + + +void RootEDF(vec_ZZ_pX& factors, const ZZ_pX& f, long verbose) +{ + vec_ZZ_p roots; + double t; + + if (verbose) { cerr << "finding roots..."; t = GetTime(); } + FindRoots(roots, f); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + long r = roots.length(); + factors.SetLength(r); + for (long j = 0; j < r; j++) { + SetX(factors[j]); + sub(factors[j], factors[j], roots[j]); + } +} + +static +void EDFSplit(vec_ZZ_pX& v, const ZZ_pX& f, const ZZ_pX& b, long d) +{ + ZZ_pX a, g, h; + ZZ_pXModulus F; + vec_ZZ_p roots; + + build(F, f); + long n = F.n; + long r = n/d; + random(a, n); + TraceMap(g, a, d, F, b); + MinPolyMod(h, g, F, r); + FindRoots(roots, h); + FindFactors(v, f, g, roots); +} + +static +void RecEDF(vec_ZZ_pX& factors, const ZZ_pX& f, const ZZ_pX& b, long d, + long verbose) +{ + vec_ZZ_pX v; + long i; + ZZ_pX bb; + + if (verbose) cerr << "+"; + + EDFSplit(v, f, b, d); + for (i = 0; i < v.length(); i++) { + if (deg(v[i]) == d) { + append(factors, v[i]); + } + else { + ZZ_pX bb; + rem(bb, b, v[i]); + RecEDF(factors, v[i], bb, d, verbose); + } + } +} + + +void EDF(vec_ZZ_pX& factors, const ZZ_pX& ff, const ZZ_pX& bb, + long d, long verbose) + +{ + ZZ_pX f = ff; + ZZ_pX b = bb; + + if (!IsOne(LeadCoeff(f))) + LogicError("EDF: bad args"); + + long n = deg(f); + long r = n/d; + + if (r == 0) { + factors.SetLength(0); + return; + } + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (d == 1) { + RootEDF(factors, f, verbose); + return; + } + + + double t; + if (verbose) { + cerr << "computing EDF(" << d << "," << r << ")..."; + t = GetTime(); + } + + factors.SetLength(0); + + RecEDF(factors, f, b, d, verbose); + + if (verbose) cerr << (GetTime()-t) << "\n"; +} + + +void SFCanZass(vec_ZZ_pX& factors, const ZZ_pX& ff, long verbose) +{ + ZZ_pX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SFCanZass: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + factors.SetLength(0); + + double t; + + const ZZ& p = ZZ_p::modulus(); + + + ZZ_pXModulus F; + build(F, f); + + ZZ_pX h; + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + PowerXMod(h, p, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + vec_pair_ZZ_pX_long u; + if (verbose) { cerr << "computing DDF..."; t = GetTime(); } + NewDDF(u, f, h, verbose); + if (verbose) { + t = GetTime()-t; + cerr << "DDF time: " << t << "\n"; + } + + ZZ_pX hh; + vec_ZZ_pX v; + + long i; + for (i = 0; i < u.length(); i++) { + const ZZ_pX& g = u[i].a; + long d = u[i].b; + long r = deg(g)/d; + + if (r == 1) { + // g is already irreducible + + append(factors, g); + } + else { + // must perform EDF + + if (d == 1) { + // root finding + RootEDF(v, g, verbose); + append(factors, v); + } + else { + // general case + rem(hh, h, g); + EDF(v, g, hh, d, verbose); + append(factors, v); + } + } + } +} + +void CanZass(vec_pair_ZZ_pX_long& factors, const ZZ_pX& f, long verbose) +{ + if (!IsOne(LeadCoeff(f))) + LogicError("CanZass: bad args"); + + double t; + vec_pair_ZZ_pX_long sfd; + vec_ZZ_pX x; + + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFCanZass(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + +void mul(ZZ_pX& f, const vec_pair_ZZ_pX_long& v) +{ + long i, j, n; + + n = 0; + for (i = 0; i < v.length(); i++) + n += v[i].b*deg(v[i].a); + + ZZ_pX g(INIT_SIZE, n+1); + + set(g); + for (i = 0; i < v.length(); i++) + for (j = 0; j < v[i].b; j++) { + mul(g, g, v[i].a); + } + + f = g; +} + + + + +static +long BaseCase(const ZZ_pX& h, long q, long a, const ZZ_pXModulus& F) +{ + long b, e; + ZZ_pX lh(INIT_SIZE, F.n); + + lh = h; + b = 1; + e = 0; + while (e < a-1 && !IsX(lh)) { + e++; + b *= q; + PowerCompose(lh, lh, q, F); + } + + if (!IsX(lh)) b *= q; + + return b; +} + + + +static +void TandemPowerCompose(ZZ_pX& y1, ZZ_pX& y2, const ZZ_pX& h, + long q1, long q2, const ZZ_pXModulus& F) +{ + ZZ_pX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y1); + SetX(y2); + + while (q1 || q2) { + sw = 0; + + if (q1 > 1 || q2 > 1) sw = 4; + + if (q1 & 1) { + if (IsX(y1)) + y1 = z; + else + sw = sw | 2; + } + + if (q2 & 1) { + if (IsX(y2)) + y2 = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y2, y2, z, F); + break; + + case 2: + CompMod(y1, y1, z, F); + break; + + case 3: + Comp2Mod(y1, y2, y1, y2, z, F); + break; + + case 4: + CompMod(z, z, z, F); + break; + + case 5: + Comp2Mod(z, y2, z, y2, z, F); + break; + + case 6: + Comp2Mod(z, y1, z, y1, z, F); + break; + + case 7: + Comp3Mod(z, y1, y2, z, y1, y2, z, F); + break; + } + + q1 = q1 >> 1; + q2 = q2 >> 1; + } +} + + + +static +long RecComputeDegree(long u, const ZZ_pX& h, const ZZ_pXModulus& F, + FacVec& fvec) +{ + if (IsX(h)) return 1; + + if (fvec[u].link == -1) return BaseCase(h, fvec[u].q, fvec[u].a, F); + + ZZ_pX h1, h2; + long q1, q2, r1, r2; + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + r1 = RecComputeDegree(fvec[u].link, h2, F, fvec); + r2 = RecComputeDegree(fvec[u].link+1, h1, F, fvec); + return r1*r2; +} + + + + +long ComputeDegree(const ZZ_pX& h, const ZZ_pXModulus& F) + // f = F.f is assumed to be an "equal degree" polynomial + // h = X^p mod f + // the common degree of the irreducible factors of f is computed +{ + if (F.n == 1 || IsX(h)) return 1; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecComputeDegree(fvec.length()-1, h, F, fvec); +} + +long ProbComputeDegree(const ZZ_pX& h, const ZZ_pXModulus& F) +{ + if (F.n == 1 || IsX(h)) + return 1; + + long n = F.n; + + ZZ_pX P1, P2, P3; + + random(P1, n); + TraceMap(P2, P1, n, F, h); + ProbMinPolyMod(P3, P2, F, n/2); + + long r = deg(P3); + + if (r <= 0 || n % r != 0) + return 0; + else + return n/r; +} + + + +void FindRoot(ZZ_p& root, const ZZ_pX& ff) +// finds a root of ff. +// assumes that ff is monic and splits into distinct linear factors + +{ + ZZ_pXModulus F; + ZZ_pX h, h1, f; + ZZ_p r; + ZZ p1; + + f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoot: bad args"); + + if (deg(f) == 0) + LogicError("FindRoot: bad args"); + + RightShift(p1, ZZ_p::modulus(), 1); + h1 = 1; + + while (deg(f) > 1) { + build(F, f); + random(r); + PowerXPlusAMod(h, r, p1, F); + sub(h, h, h1); + GCD(h, h, f); + if (deg(h) > 0 && deg(h) < deg(f)) { + if (deg(h) > deg(f)/2) + div(f, f, h); + else + f = h; + } + } + + negate(root, ConstTerm(f)); +} + + +static +long power(long a, long e) +{ + long i, res; + + res = 1; + for (i = 1; i <= e; i++) + res = res * a; + + return res; +} + + +static +long IrredBaseCase(const ZZ_pX& h, long q, long a, const ZZ_pXModulus& F) +{ + long e; + ZZ_pX X, s, d; + + e = power(q, a-1); + PowerCompose(s, h, e, F); + SetX(X); + sub(s, s, X); + GCD(d, F.f, s); + return IsOne(d); +} + + +static +long RecIrredTest(long u, const ZZ_pX& h, const ZZ_pXModulus& F, + const FacVec& fvec) +{ + long q1, q2; + ZZ_pX h1, h2; + + if (IsX(h)) return 0; + + if (fvec[u].link == -1) { + return IrredBaseCase(h, fvec[u].q, fvec[u].a, F); + } + + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + return RecIrredTest(fvec[u].link, h2, F, fvec) + && RecIrredTest(fvec[u].link+1, h1, F, fvec); +} + +long DetIrredTest(const ZZ_pX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + ZZ_pXModulus F; + + build(F, f); + + ZZ_pX h; + + PowerXMod(h, ZZ_p::modulus(), F); + + ZZ_pX s; + PowerCompose(s, h, F.n, F); + if (!IsX(s)) return 0; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecIrredTest(fvec.length()-1, h, F, fvec); +} + + + +long IterIrredTest(const ZZ_pX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + ZZ_pXModulus F; + + build(F, f); + + ZZ_pX h; + + PowerXMod(h, ZZ_p::modulus(), F); + + long CompTableSize = 2*SqrRoot(deg(f)); + + ZZ_pXArgument H; + + build(H, h, F, CompTableSize); + + long i, d, limit, limit_sqr; + ZZ_pX g, X, t, prod; + + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = 2; + limit_sqr = limit*limit; + + set(prod); + + + while (2*d <= deg(f)) { + sub(t, g, X); + MulMod(prod, prod, t, F); + i++; + if (i == limit_sqr) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + + set(prod); + limit++; + limit_sqr = limit*limit; + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + CompMod(g, g, H, F); + } + } + + if (i > 0) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + } + + return 1; +} + + +static +void MulByXPlusY(vec_ZZ_pX& h, const ZZ_pX& f, const ZZ_pX& g) +// h represents the bivariate polynomial h[0] + h[1]*Y + ... + h[n-1]*Y^k, +// where the h[i]'s are polynomials in X, each of degree < deg(f), +// and k < deg(g). +// h is replaced by the bivariate polynomial h*(X+Y) (mod f(X), g(Y)). + +{ + long n = deg(g); + long k = h.length()-1; + + if (k < 0) return; + + if (k < n-1) { + h.SetLength(k+2); + h[k+1] = h[k]; + for (long i = k; i >= 1; i--) { + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + } + MulByXMod(h[0], h[0], f); + } + else { + ZZ_pX b, t; + + b = h[n-1]; + for (long i = n-1; i >= 1; i--) { + mul(t, b, g.rep[i]); + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + sub(h[i], h[i], t); + } + mul(t, b, g.rep[0]); + MulByXMod(h[0], h[0], f); + sub(h[0], h[0], t); + } + + // normalize + + k = h.length()-1; + while (k >= 0 && IsZero(h[k])) k--; + h.SetLength(k+1); +} + + +static +void IrredCombine(ZZ_pX& x, const ZZ_pX& f, const ZZ_pX& g) +{ + if (deg(f) < deg(g)) { + IrredCombine(x, g, f); + return; + } + + // deg(f) >= deg(g)...not necessary, but maybe a little more + // time & space efficient + + long df = deg(f); + long dg = deg(g); + long m = df*dg; + + vec_ZZ_pX h(INIT_SIZE, dg); + + long i; + for (i = 0; i < dg; i++) h[i].SetMaxLength(df); + + h.SetLength(1); + set(h[0]); + + vec_ZZ_p a; + + a.SetLength(2*m); + + for (i = 0; i < 2*m; i++) { + a[i] = ConstTerm(h[0]); + if (i < 2*m-1) + MulByXPlusY(h, f, g); + } + + MinPolySeq(x, a, m); +} + + +static +void BuildPrimePowerIrred(ZZ_pX& f, long q, long e) +{ + long n = power(q, e); + + do { + random(f, n); + SetCoeff(f, n); + } while (!IterIrredTest(f)); +} + +static +void RecBuildIrred(ZZ_pX& f, long u, const FacVec& fvec) +{ + if (fvec[u].link == -1) + BuildPrimePowerIrred(f, fvec[u].q, fvec[u].a); + else { + ZZ_pX g, h; + RecBuildIrred(g, fvec[u].link, fvec); + RecBuildIrred(h, fvec[u].link+1, fvec); + IrredCombine(f, g, h); + } +} + + +void BuildIrred(ZZ_pX& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in BuildIrred"); + + if (n == 1) { + SetX(f); + return; + } + + FacVec fvec; + + FactorInt(fvec, n); + + RecBuildIrred(f, fvec.length()-1, fvec); +} + + + +void BuildRandomIrred(ZZ_pX& f, const ZZ_pX& g) +{ + ZZ_pXModulus G; + ZZ_pX h, ff; + + build(G, g); + do { + random(h, deg(g)); + IrredPolyMod(ff, h, G); + } while (deg(ff) < deg(g)); + + f = ff; +} + + +/************* NEW DDF ****************/ + +NTL_CHEAP_THREAD_LOCAL long ZZ_pX_GCDTableSize = 4; +NTL_CHEAP_THREAD_LOCAL double ZZ_pXFileThresh = NTL_FILE_THRESH; +static NTL_CHEAP_THREAD_LOCAL vec_ZZ_pX *BabyStepFile = 0; +static NTL_CHEAP_THREAD_LOCAL vec_ZZ_pX *GiantStepFile = 0; +static NTL_CHEAP_THREAD_LOCAL long use_files; + + +static +double CalcTableSize(long n, long k) +{ + double sz = ZZ_p::storage(); + sz = sz * n; + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_ZZ_p); + sz = sz * k; + sz = sz/1024; + return sz; +} + + +static +void GenerateBabySteps(ZZ_pX& h1, const ZZ_pX& f, const ZZ_pX& h, long k, + FileList& flist, long verbose) + +{ + double t; + + if (verbose) { cerr << "generating baby steps..."; t = GetTime(); } + + ZZ_pXModulus F; + build(F, f); + + ZZ_pXArgument H; + build(H, h, F, 2*SqrRoot(F.n)); + + + h1 = h; + + long i; + + if (!use_files) { + (*BabyStepFile).SetLength(k-1); + } + + for (i = 1; i <= k-1; i++) { + if (use_files) { + ofstream s; + OpenWrite(s, FileName("baby", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*BabyStepFile)(i) = h1; + + CompMod(h1, h1, H, F); + if (verbose) cerr << "+"; + } + + if (verbose) + cerr << (GetTime()-t) << "\n"; +} + + +static +void GenerateGiantSteps(const ZZ_pX& f, const ZZ_pX& h, long l, + FileList& flist, long verbose) +{ + + double t; + + if (verbose) { cerr << "generating giant steps..."; t = GetTime(); } + + ZZ_pXModulus F; + build(F, f); + + ZZ_pXArgument H; + build(H, h, F, 2*SqrRoot(F.n)); + + ZZ_pX h1; + + h1 = h; + + long i; + + if (!use_files) { + (*GiantStepFile).SetLength(l); + } + + for (i = 1; i <= l-1; i++) { + if (use_files) { + ofstream s; + OpenWrite(s, FileName("giant", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*GiantStepFile)(i) = h1; + + CompMod(h1, h1, H, F); + if (verbose) cerr << "+"; + } + + if (use_files) { + ofstream s; + OpenWrite(s, FileName("giant", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*GiantStepFile)(i) = h1; + + if (verbose) + cerr << (GetTime()-t) << "\n"; +} + + + +static +void NewAddFactor(vec_pair_ZZ_pX_long& u, const ZZ_pX& g, long m, long verbose) +{ + long len = u.length(); + + u.SetLength(len+1); + u[len].a = g; + u[len].b = m; + + if (verbose) { + cerr << "split " << m << " " << deg(g) << "\n"; + } +} + + + + +static +void NewProcessTable(vec_pair_ZZ_pX_long& u, ZZ_pX& f, const ZZ_pXModulus& F, + vec_ZZ_pX& buf, long size, long StartInterval, + long IntervalLength, long verbose) + +{ + if (size == 0) return; + + ZZ_pX& g = buf[size-1]; + + long i; + + for (i = 0; i < size-1; i++) + MulMod(g, g, buf[i], F); + + GCD(g, f, g); + + if (deg(g) == 0) return; + + div(f, f, g); + + long d = (StartInterval-1)*IntervalLength + 1; + i = 0; + long interval = StartInterval; + + while (i < size-1 && 2*d <= deg(g)) { + GCD(buf[i], buf[i], g); + if (deg(buf[i]) > 0) { + NewAddFactor(u, buf[i], interval, verbose); + div(g, g, buf[i]); + } + + i++; + interval++; + d += IntervalLength; + } + + if (deg(g) > 0) { + if (i == size-1) + NewAddFactor(u, g, interval, verbose); + else + NewAddFactor(u, g, (deg(g)+IntervalLength-1)/IntervalLength, verbose); + } +} + + +static +void FetchGiantStep(ZZ_pX& g, long gs, const ZZ_pXModulus& F) +{ + if (use_files) { + ifstream s; + OpenRead(s, FileName("giant", gs)); + NTL_INPUT_CHECK_ERR(s >> g); + } + else + g = (*GiantStepFile)(gs); + + rem(g, g, F); +} + + +static +void FetchBabySteps(vec_ZZ_pX& v, long k) +{ + v.SetLength(k); + + SetX(v[0]); + + long i; + for (i = 1; i <= k-1; i++) { + if (use_files) { + ifstream s; + OpenRead(s, FileName("baby", i)); + NTL_INPUT_CHECK_ERR(s >> v[i]); + } + else + v[i] = (*BabyStepFile)(i); + } +} + + + +static +void GiantRefine(vec_pair_ZZ_pX_long& u, const ZZ_pX& ff, long k, long l, + long verbose) + +{ + double t; + + if (verbose) { + cerr << "giant refine..."; + t = GetTime(); + } + + u.SetLength(0); + + vec_ZZ_pX BabyStep; + + FetchBabySteps(BabyStep, k); + + vec_ZZ_pX buf(INIT_SIZE, ZZ_pX_GCDTableSize); + + ZZ_pX f; + f = ff; + + ZZ_pXModulus F; + build(F, f); + + ZZ_pX g; + ZZ_pX h; + + long size = 0; + + long first_gs; + + long d = 1; + + while (2*d <= deg(f)) { + + long old_n = deg(f); + + long gs = (d+k-1)/k; + long bs = gs*k - d; + + if (bs == k-1) { + size++; + if (size == 1) first_gs = gs; + FetchGiantStep(g, gs, F); + sub(buf[size-1], g, BabyStep[bs]); + } + else { + sub(h, g, BabyStep[bs]); + MulMod(buf[size-1], buf[size-1], h, F); + } + + if (verbose && bs == 0) cerr << "+"; + + if (size == ZZ_pX_GCDTableSize && bs == 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + size = 0; + } + + d++; + + if (2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + + long i; + for (i = 1; i <= k-1; i++) + rem(BabyStep[i], BabyStep[i], F); + } + } + + if (size > 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + } + + if (deg(f) > 0) + NewAddFactor(u, f, 0, verbose); + + if (verbose) { + t = GetTime()-t; + cerr << "giant refine time: " << t << "\n"; + } +} + + +static +void IntervalRefine(vec_pair_ZZ_pX_long& factors, const ZZ_pX& ff, + long k, long gs, const vec_ZZ_pX& BabyStep, long verbose) + +{ + vec_ZZ_pX buf(INIT_SIZE, ZZ_pX_GCDTableSize); + + ZZ_pX f; + f = ff; + + ZZ_pXModulus F; + build(F, f); + + ZZ_pX g; + + FetchGiantStep(g, gs, F); + + long size = 0; + + long first_d; + + long d = (gs-1)*k + 1; + long bs = k-1; + + while (bs >= 0 && 2*d <= deg(f)) { + + long old_n = deg(f); + + if (size == 0) first_d = d; + rem(buf[size], BabyStep[bs], F); + sub(buf[size], buf[size], g); + size++; + + if (size == ZZ_pX_GCDTableSize) { + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + size = 0; + } + + d++; + bs--; + + if (bs >= 0 && 2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + rem(g, g, F); + } + } + + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + + if (deg(f) > 0) + NewAddFactor(factors, f, deg(f), verbose); +} + + + + +static +void BabyRefine(vec_pair_ZZ_pX_long& factors, const vec_pair_ZZ_pX_long& u, + long k, long l, long verbose) + +{ + double t; + + if (verbose) { + cerr << "baby refine..."; + t = GetTime(); + } + + factors.SetLength(0); + + vec_ZZ_pX BabyStep; + + long i; + for (i = 0; i < u.length(); i++) { + const ZZ_pX& g = u[i].a; + long gs = u[i].b; + + if (gs == 0 || 2*((gs-1)*k+1) > deg(g)) + NewAddFactor(factors, g, deg(g), verbose); + else { + if (BabyStep.length() == 0) + FetchBabySteps(BabyStep, k); + IntervalRefine(factors, g, k, gs, BabyStep, verbose); + } + } + + if (verbose) { + t = GetTime()-t; + cerr << "baby refine time: " << t << "\n"; + } +} + + + +void NewDDF(vec_pair_ZZ_pX_long& factors, + const ZZ_pX& f, + const ZZ_pX& h, + long verbose) + +{ + if (!IsOne(LeadCoeff(f))) + LogicError("NewDDF: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(0); + append(factors, cons(f, 1L)); + return; + } + + long B = deg(f)/2; + long k = SqrRoot(B); + long l = (B+k-1)/k; + + ZZ_pX h1; + + if (CalcTableSize(deg(f), k + l - 1) > ZZ_pXFileThresh) + use_files = 1; + else + use_files = 0; + + + FileList flist; + + vec_ZZ_pX local_BabyStepFile; + vec_ZZ_pX local_GiantStepFile; + + BabyStepFile = &local_BabyStepFile; + GiantStepFile = &local_GiantStepFile; + + GenerateBabySteps(h1, f, h, k, flist, verbose); + + GenerateGiantSteps(f, h1, l, flist, verbose); + + + vec_pair_ZZ_pX_long u; + GiantRefine(u, f, k, l, verbose); + BabyRefine(factors, u, k, l, verbose); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/all b/thirdparty/linux/ntl/src/all new file mode 100644 index 0000000000..e69de29bb2 diff --git a/thirdparty/linux/ntl/src/c_lip_impl.h b/thirdparty/linux/ntl/src/c_lip_impl.h new file mode 100644 index 0000000000..1794bc3272 --- /dev/null +++ b/thirdparty/linux/ntl/src/c_lip_impl.h @@ -0,0 +1,6774 @@ + +#include +#include +#include +#include + +NTL_CLIENT + +#ifdef NTL_THREADS +#error "NTL_THREADS does not work with classic LIP: use GMP instead" +#endif + + +#define MustAlloc(c, len) (!(c) || ((c)[-1] >> 1) < (len)) + /* Fast test to determine if allocation is necessary */ + + +class _ntl_verylong_watcher { +public: + _ntl_verylong *watched; + + explicit + _ntl_verylong_watcher(_ntl_verylong *_watched) : watched(_watched) {} + + ~_ntl_verylong_watcher() + { + if (*watched && ((*watched)[-1] >> 1) > NTL_RELEASE_THRESH) + _ntl_zfree(watched); + } +}; + + +class _ntl_verylong_deleter { +public: + static void apply(_ntl_verylong& p) { _ntl_zfree(&p); } +}; + +typedef WrappedPtr _ntl_verylong_wrapped; + +#define CRegister(x) NTL_THREAD_LOCAL static _ntl_verylong_wrapped x; _ntl_verylong_watcher _WATCHER__ ## x(&x) + +// #define CRegister(x) NTL_THREAD_LOCAL static _ntl_verylong x = 0; _ntl_verylong_watcher _WATCHER__ ## x(&x) + + + + +#define MIN_SETL (4) + /* _ntl_zsetlength allocates a multiple of MIN_SETL digits */ + + + +#define MulLo(rres,a,b) rres = S(U(a)*U(b)) + +#define S cast_signed +#define U cast_unsigned + +/* + * definitions of zaddmulp, zxmulp, zaddmulpsq for the various + * long integer arithmentic implementation options. + */ + +#if (defined(NTL_LONG_LONG)) + + +#if (!defined(NTL_CLEAN_INT)) + + +/* + * One might get slightly better code with this version. + */ + +// NOTE: this zaddmulp may get called with negative d + +#define zaddmulp(a, b, d, t) { \ + NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (d)) + ((t)+(a)); \ + (a) = S(((unsigned long)(_pp)) & U(NTL_RADIXM)); \ + (t) = S((unsigned long) (((NTL_ULL_TYPE)_pp) >> NTL_NBITS)); \ +} + + +#define zxmulp(a, b, d, t) { \ + NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (d)) + (t); \ + (a) = S(((unsigned long)(_pp)) & U(NTL_RADIXM)); \ + (t) = S((unsigned long) (((NTL_ULL_TYPE)_pp) >> NTL_NBITS)); \ +} + +#define zaddmulpsq(a,b,t) { \ + NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (b)) + (a); \ + (a) = ((long)(_pp)) & NTL_RADIXM; \ + (t) = (long) (_pp >> NTL_NBITS); \ +} + +#else + +/* + * This version conforms to the language standard when d is non-negative. + * Some compilers may emit sub-optimal code, though. + */ + + + +#define zaddmulp(a, b, d, t) { \ + NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (d)) + ((t)+(a)); \ + (a) = (long) (_pp & NTL_RADIXM); \ + (t) = (long) (_pp >> NTL_NBITS); \ +} + + +#define zxmulp(a, b, d, t) { \ + NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (d)) + (t); \ + (a) = (long) (_pp & NTL_RADIXM); \ + (t) = (long) (_pp >> NTL_NBITS); \ +} + +#define zaddmulpsq(a,b,t) { \ + NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (b)) + (a); \ + (a) = (long) (_pp & NTL_RADIXM); \ + (t) = (long) (_pp >> NTL_NBITS); \ +} + + +#endif + + +#elif (defined(NTL_AVOID_FLOAT)) + + +#define zaddmulp( a, b, d, t) { \ + unsigned long _b1 = b & NTL_RADIXROOTM; \ + unsigned long _d1 = d & NTL_RADIXROOTM; \ + unsigned long _bd,_b1d1,_m,_aa= (a) + (t); \ + unsigned long _ld = (d>>NTL_NBITSH); \ + unsigned long _lb = (b>>NTL_NBITSH); \ + \ + _bd=_lb*_ld; \ + _b1d1=_b1*_d1; \ + _m=(_lb+_b1)*(_ld+_d1) - _bd - _b1d1; \ + _aa += ( _b1d1+ ((_m&NTL_RADIXROOTM)<> NTL_NBITS) + _bd + (_m>>NTL_NBITSH); \ + (a) = _aa & NTL_RADIXM; \ +} + + + + + +#define zxmulp( a, b, d, t) { \ + unsigned long _b1 = b & NTL_RADIXROOTM; \ + unsigned long _d1 = d & NTL_RADIXROOTM; \ + unsigned long _bd,_b1d1,_m,_aa= (t); \ + unsigned long _ld = (d>>NTL_NBITSH); \ + unsigned long _lb = (b>>NTL_NBITSH); \ + \ + _bd=_lb*_ld; \ + _b1d1=_b1*_d1; \ + _m=(_lb+_b1)*(_ld+_d1) - _bd - _b1d1; \ + _aa += ( _b1d1+ ((_m&NTL_RADIXROOTM)<> NTL_NBITS) + _bd + (_m>>NTL_NBITSH); \ + (a) = _aa & NTL_RADIXM; \ +} + + +#define zaddmulpsq(_a, _b, _t) \ +{ \ + long _lb = (_b); \ + long _b1 = (_b) & NTL_RADIXROOTM; \ + long _aa = (_a) + _b1 * _b1; \ + \ + _b1 = (_b1 * (_lb >>= NTL_NBITSH) << 1) + (_aa >> NTL_NBITSH); \ + _aa = (_aa & NTL_RADIXROOTM) + ((_b1 & NTL_RADIXROOTM) << NTL_NBITSH); \ + (_t) = _lb * _lb + (_b1 >> NTL_NBITSH) + (_aa >> NTL_NBITS); \ + (_a) = (_aa & NTL_RADIXM); \ +} + + + +#else + +/* default long integer arithemtic */ +/* various "software pipelining" routines are also defined */ + + +/* + * The macros CARRY_TYPE and CARRY_CONV are only used in the submul + * logic. + */ + + +#if (defined(NTL_CLEAN_INT)) + +#define CARRY_TYPE unsigned long +#define CARRY_CONV(x) (-((long)(-x))) + +#else + +#define CARRY_TYPE long +#define CARRY_CONV(x) (x) + +#endif + + +#if (NTL_BITS_PER_LONG <= NTL_NBITS + 2) + +#if (NTL_ARITH_RIGHT_SHIFT && !defined(NTL_CLEAN_INT)) +/* value right-shifted is -1..1 */ +#define zaddmulp(a, b, d, t) \ +{ \ + long _a = (a), _b = (b), _d = (d), _t = (t); \ + long _t1 = S(U(_b)*U(_d)); \ + long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ); \ + _t2 = S(U(_t2) + U(S(U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + _t1 = S((U(_t1) & U(NTL_RADIXM)) + U(_a) + U(_t)); \ + (t) = S(U(_t2) + (U(_t1) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + + +#define zxmulp(a, b, d, t) \ +{ \ + long _b = (b), _d = (d), _t = (t); \ + long _t1 = S(U(_b)*U(_d) + U(_t)); \ + long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + (t) = S(U(_t2) + ((U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + +/* value shifted is -1..1 */ +#define zaddmulpsq(a, b, t) \ +{ \ + long _a = (a), _b = (b); \ + long _t1 = S(U(_b)*U(_b)); \ + long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ); \ + _t2 = S(U(_t2) + U(S(U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + _t1 = S((U(_t1) & U(NTL_RADIXM)) + U(_a)); \ + (t) = S(U(_t2) + (U(_t1) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + + +/* + * In the following definition of zam_init, the value _ds is computed so that + * it is slightly bigger than s*NTL_RADIX_INV. This has the consequence that + * the value _hi is equal to floor(_b*_s/NTL_RADIX) or + * floor(_b*_s/NTL_RADIX) + 1, assuming only that (1) conversion of "small" + * integer to doubles is exact, (2) multiplication by powers of 2 is exact, and + * (3) multiplication of two general doubles yields a result with relative + * error 1/2^{NTL_DOUBLE_PRECISION-1}. These assumptions are very + * conservative, and in fact, the IEEE floating point standard would guarantee + * this result *without* making _ds slightly bigger. + */ + +#define zam_decl double _ds; long _hi, _lo, _s; + + +#define zam_init(b,s) \ +{ \ + long _b = (b); \ + _s = (s); \ + _ds = ((_s << 1)+1)*(NTL_FRADIX_INV/2.0); \ + _lo = S(U(_b)*U(_s)); \ + _hi = (long) (((double) _b)*_ds); \ +} + +/* value shifted is 0..3 */ +#define zam_loop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _lo = S(U(_lo) + U(_a) + U(_t)); \ + _hi--; \ + (t) = S(U(_hi) + ((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is -1..+1 */ +#define zsx_loop(a,t,nb) \ +{ \ + long _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _lo = S(U(_lo) + U(_t)); \ + (t) = S(U(_hi) + U(S(U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + + +/* value shifted is -2..+1 */ +#define zam_subloop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _lo = S(U(_a) + U(_t) - U(_lo)); \ + (t) = S(U(S(U(_lo) + (U(_hi)<> NTL_NBITS) - U(_hi)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + + +/* value shifted is 0..3 */ +#define zam_finish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = S(U(_lo) + U(_a) + U(_t)); \ + _hi--; \ + (t) = S(U(_hi) + ((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +/* value shifted is -1..+1 */ +#define zsx_finish(a,t) \ +{ \ + long _t = (t); \ + _lo = S(U(_lo) + U(_t)); \ + (t) = S(U(_hi) + U(S(U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +/* value shifted is -2..+1 */ +#define zam_subfinish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = S(U(_a) + U(_t) - U(_lo)); \ + (t) = S(U(S(U(_lo) + (U(_hi)<> NTL_NBITS) - U(_hi)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + + +#elif (!defined(NTL_CLEAN_INT)) + + +/* right shift is not arithmetic */ + +/* value right-shifted is 0..2 */ +#define zaddmulp(a, b, d, t) \ +{ \ + long _a = (a), _b = (b), _d = (d), _t = (t); \ + long _t1 = S(U(_b)*U(_d)); \ + long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + _t2 = U(_t2) + ((U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS); \ + _t1 = S((U(_t1) & U(NTL_RADIXM)) + U(_a) + U(_t)); \ + (t) = S(U(_t2) + (U(_t1) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + + +#define zxmulp(a, b, d, t) \ +{ \ + long _b = (b), _d = (d), _t = (t); \ + long _t1 = S(U(_b)*U(_d) + U(_t)); \ + long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + (t) = S(U(_t2) + ((U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + +/* value shifted is 0..2 */ +#define zaddmulpsq(a, b, t) \ +{ \ + long _a = (a), _b = (b); \ + long _t1 = S(U(_b)*U(_b)); \ + long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ) - 1; \ + _t2 = S(U(_t2) + ((U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + _t1 = S((U(_t1) & U(NTL_RADIXM)) + U(_a)); \ + (t) = S(U(_t2) + (U(_t1) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + + +#define zam_decl double _ds; long _hi, _lo, _s; + +#define zam_init(b,s) \ +{ \ + long _b = (b); \ + _s = (s); \ + _ds = ((_s << 1)+1)*(NTL_FRADIX_INV/2.0); \ + _lo = S(U(_b)*U(_s)); \ + _hi = (long) (((double) _b)*_ds); \ +} + +/* value shifted is 0..3 */ +#define zam_loop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _lo = S(U(_lo) + U(_a) + U(_t)); \ + _hi--; \ + (t) = S( U(_hi) + ((U(_lo) - (U(_hi)<> NTL_NBITS) ); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* value shifted is 0..2 */ +#define zsx_loop(a,t,nb) \ +{ \ + long _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _lo = S(U(_lo) + U(_t)); \ + _hi--; \ + (t) = S(U(_hi) + ((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* value shifted is 0..3 */ +#define zam_subloop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _hi += 2; \ + _lo = S(U(_a) + U(_t) - U(_lo)); \ + (t) = S(((U(_lo) + (U(_hi)<> NTL_NBITS) - U(_hi)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* value shifted is 0..3 */ +#define zam_finish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = S(U(_lo) + U(_a) + U(_t)); \ + _hi--; \ + (t) = S(U(_hi) + (((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +/* value shifted is 0..2 */ +#define zsx_finish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = S(U(_lo) + U(_t)); \ + _hi--; \ + (t) = S(U(_hi) + (((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +/* value shifted is 0..3 */ +#define zam_subfinish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _hi += 2; \ + _lo = S(U(_a) + U(_t) - U(_lo)); \ + (t) = S(((U(_lo) + (U(_hi)<> NTL_NBITS) - U(_hi)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +#else +/* clean int version */ + + +/* value right-shifted is 0..2 */ +#define zaddmulp(a, b, d, t) \ +{ \ + long _a = (a), _b = (b), _d = (d), _t = (t); \ + unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _d); \ + unsigned long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + _t2 = _t2 + ( (_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS ); \ + _t1 = (_t1 & NTL_RADIXM) + ((unsigned long) _a) + ((unsigned long) _t); \ + (t) = (long) (_t2 + (_t1 >> NTL_NBITS)); \ + (a) = (long) (_t1 & NTL_RADIXM); \ +} + + +#define zxmulp(a, b, d, t) \ +{ \ + long _b = (b), _d = (d), _t = (t); \ + unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _d) + ((unsigned long) _t); \ + unsigned long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + (t) = (long) (_t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = (long) (_t1 & NTL_RADIXM); \ +} + +/* value shifted is 0..2 */ +#define zaddmulpsq(a, b, t) \ +{ \ + long _a = (a), _b = (b); \ + unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _b); \ + unsigned long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ) - 1; \ + _t2 = _t2 + ( (_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS ); \ + _t1 = (_t1 & NTL_RADIXM) + ((unsigned long) _a); \ + (t) = (long) (_t2 + (_t1 >> NTL_NBITS)); \ + (a) = (long) (_t1 & NTL_RADIXM); \ +} + +#define zam_decl double _ds; long _s; unsigned long _hi, _lo; + +#define zam_init(b,s) \ +{ \ + long _b = (b); \ + _s = (s); \ + _ds = ((_s << 1)+1)*(NTL_FRADIX_INV/2.0); \ + _lo = ((unsigned long) _b)*((unsigned long) _s); \ + _hi = (long) (((double) _b)*_ds); \ +} + +/* value shifted is 0..3 */ +#define zam_loop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + unsigned long _vv; \ + double _yy; \ + _vv = ((unsigned long) _nb)*((unsigned long)_s); \ + _yy = ((double) _nb)*_ds; \ + _lo = _lo + ((unsigned long) _a) + ((unsigned long) _t); \ + _hi--; \ + (t) = (long) (_hi + ((_lo - (_hi<> NTL_NBITS)); \ + (a) = (long) (_lo & NTL_RADIXM); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* value shifted is 0..2 */ +#define zsx_loop(a,t,nb) \ +{ \ + long _t = (t), _nb = (nb); \ + unsigned long _vv; \ + double _yy; \ + _vv = ((unsigned long) _nb)*((unsigned long) _s); \ + _yy = ((double) _nb)*_ds; \ + _lo = _lo + ((unsigned long) _t); \ + _hi--; \ + (t) = (long) (_hi + ((_lo - (_hi<> NTL_NBITS)); \ + (a) = (long) (_lo & NTL_RADIXM); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* value shifted is 0..3 */ +#define zam_subloop(a,t,nb) \ +{ \ + long _a = (a); unsigned long _t = (t); long _nb = (nb); \ + unsigned long _vv; \ + double _yy; \ + _vv = ((unsigned long) _nb)*((unsigned long) _s); \ + _yy = ((double) _nb)*_ds; \ + _hi += 2; \ + _lo = ((unsigned long) _a) + _t - _lo; \ + (t) = ((_lo + (_hi<> NTL_NBITS) - _hi; \ + (a) = (long) (_lo & NTL_RADIXM); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* value shifted is 0..3 */ +#define zam_finish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = _lo + ((unsigned long) _a) + ((unsigned long) _t); \ + _hi--; \ + (t) = (long) (_hi + ((_lo - (_hi<> NTL_NBITS)); \ + (a) = (long) (_lo & NTL_RADIXM); \ +} + +/* value shifted is 0..2 */ +#define zsx_finish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = _lo + ((unsigned long) _t); \ + _hi--; \ + (t) = (long) (_hi + ((_lo - (_hi<> NTL_NBITS)); \ + (a) = (long) (_lo & NTL_RADIXM); \ +} + +/* value shifted is 0..3 */ +#define zam_subfinish(a,t) \ +{ \ + long _a = (a); unsigned long _t = (t); \ + _hi += 2; \ + _lo = ((unsigned long) _a) + _t - _lo; \ + (t) = ((_lo + (_hi<> NTL_NBITS) - _hi; \ + (a) = (long) (_lo & NTL_RADIXM); \ +} + +#endif +/* end of arithmemtic-right-shift if-then else */ + +#else +/* NTL_BITS_PER_LONG > NTL_NBITS + 2, and certain optimizations can be + made. Useful on 64-bit machines. */ + +#if (NTL_ARITH_RIGHT_SHIFT && !defined(NTL_CLEAN_INT)) + + + +/* shift is -1..+3 */ +#define zaddmulp(a, b, d, t) \ +{ \ + long _a = (a), _b = (b), _d = (d), _t = (t); \ + long _t1 = S(U(_b)*U(_d) + U(_a) + U(_t)); \ + long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ); \ + (t) = S(U(_t2) + U(S(U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + +#define zxmulp(a, b, d, t) \ +{ \ + long _b = (b), _d = (d), _t = (t); \ + long _t1 = S(U(_b)*U(_d) + U(_t)); \ + long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ); \ + (t) = S(U(_t2) + U(S(U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + +/* shift is -1..+2 */ +#define zaddmulpsq(a, b, t) \ +{ \ + long _a = (a), _b = (b), _t = (t); \ + long _t1 = S(U(_b)*U(_b) + U(_a)); \ + long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ); \ + (t) = S(U(_t2) + U(S(U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + +#define zam_decl double _ds; long _hi, _lo, _s; + +#define zam_init(b,s) \ +{ \ + long _b = (b); \ + _s = (s); \ + _ds = _s*NTL_FRADIX_INV; \ + _lo = S(U(_b)*U(_s)); \ + _hi = (long) (((double) _b)*_ds); \ +} + +/* shift is -1..+3 */ +#define zam_loop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _lo = S(U(_lo) + U(_a) + U(_t)); \ + (t) = S(U(_hi) + U(S(U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is -1..+2 */ +#define zsx_loop(a,t,nb) \ +{ \ + long _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _lo = S(U(_lo) + U(_t)); \ + (t) = S(U(_hi) + U(S(U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is -3..+1 */ +#define zam_subloop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _lo = S(U(_a) + U(_t) - U(_lo)); \ + (t) = S(U(S(U(_lo) + (U(_hi)<> NTL_NBITS) - U(_hi)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is -1..+3 */ +#define zam_finish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = S(U(_lo) + U(_a) + U(_t)); \ + (t) = S(U(_hi) + U(S(U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +/* shift is -1..+2 */ +#define zsx_finish(a,t) \ +{ \ + long _t = (t); \ + _lo = S(U(_lo) + U(_t)); \ + (t) = S(U(_hi) + U(S(U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +/* shift is -3..+1 */ +#define zam_subfinish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = S(U(_a) + U(_t) - U(_lo)); \ + (t) = S(U(S(U(_lo) + (U(_hi)<> NTL_NBITS) - U(_hi)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +#elif (!defined(NTL_CLEAN_INT)) +/* right shift is not arithmetic */ + + +/* shift is 0..4 */ +#define zaddmulp(a, b, d, t) \ +{ \ + long _a = (a), _b = (b), _d = (d), _t = (t); \ + long _t1 = S(U(_b)*U(_d) + U(_a) + U(_t)); \ + long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + (t) = S(U(_t2) + ((U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + +#define zxmulp(a, b, d, t) \ +{ \ + long _b = (b), _d = (d), _t = (t); \ + long _t1 = S(U(_b)*U(_d) + U(_t)); \ + long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + (t) = S(U(_t2) + ((U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + +/* shift is 0..3 */ +#define zaddmulpsq(a, b, t) \ +{ \ + long _a = (a), _b = (b), _t = (t); \ + long _t1 = S(U(_b)*U(_b) + U(_a)); \ + long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ) - 1; \ + (t) = S(U(_t2) + ((U(_t1) - (U(_t2) << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = S(U(_t1) & U(NTL_RADIXM)); \ +} + +#define zam_decl double _ds; long _hi, _lo, _s; + +#define zam_init(b,s) \ +{ \ + long _b = (b); \ + _s = (s); \ + _ds = _s*NTL_FRADIX_INV; \ + _lo = S(U(_b)*U(_s)); \ + _hi = (long) (((double) _b)*_ds); \ +} + +/* shift is 0..4 */ +#define zam_loop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _hi--; \ + _lo = S(U(_lo) + U(_a) + U(_t)); \ + (t) = S(U(_hi) + ((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is 0..3 */ +#define zsx_loop(a,t,nb) \ +{ \ + long _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _hi--; \ + _lo = S(U(_lo) + U(_t)); \ + (t) = S(U(_hi) + ((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is 0..4 */ +#define zam_subloop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + long _vv; \ + double _yy; \ + _vv = S(U(_nb)*U(_s)); \ + _yy = ((double) _nb)*_ds; \ + _hi += 3; \ + _lo = S(U(_a) + U(_t) - U(_lo)); \ + (t) = S(((U(_lo) + (U(_hi)<> NTL_NBITS) - U(_hi)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is 0..4 */ +#define zam_finish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = S(U(_lo) + U(_a) + U(_t)); \ + _hi--; \ + (t) = S(U(_hi) + ((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +/* shift is 0..3 */ +#define zsx_finish(a,t) \ +{ \ + long _t = (t); \ + _lo = S(U(_lo) + U(_t)); \ + _hi--; \ + (t) = S(U(_hi) + ((U(_lo) - (U(_hi)<> NTL_NBITS)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} + +/* shift is 0..4 */ +#define zam_subfinish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _hi += 3; \ + _lo = S(U(_a) + U(_t) - U(_lo)); \ + (t) = S(((U(_lo) + (U(_hi)<> NTL_NBITS) - U(_hi)); \ + (a) = S(U(_lo) & U(NTL_RADIXM)); \ +} +#else + +/* clean int version */ + +/* shift is 0..4 */ +#define zaddmulp(a, b, d, t) \ +{ \ + long _a = (a), _b = (b), _d = (d), _t = (t); \ + unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _d) + ((unsigned long) _a) + ((unsigned long) _t); \ + unsigned long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + (t) = (long) (_t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = (long) (_t1 & NTL_RADIXM); \ +} + +#define zxmulp(a, b, d, t) \ +{ \ + long _b = (b), _d = (d), _t = (t); \ + unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _d) + ((unsigned long) _t); \ + unsigned long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \ + (t) = (long) (_t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = (long) (_t1 & NTL_RADIXM); \ +} + +/* shift is 0..3 */ +#define zaddmulpsq(a, b, t) \ +{ \ + long _a = (a), _b = (b), _t = (t); \ + unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _b) + ((unsigned long) _a); \ + unsigned long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ) - 1; \ + (t) = (long) (_t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS)); \ + (a) = (long) (_t1 & NTL_RADIXM); \ +} + +#define zam_decl double _ds; long _s; unsigned long _hi, _lo; + +#define zam_init(b,s) \ +{ \ + long _b = (b); \ + _s = (s); \ + _ds = _s*NTL_FRADIX_INV; \ + _lo = ((unsigned long) _b)*((unsigned long) _s); \ + _hi = (long) (((double) _b)*_ds); \ +} + +/* shift is 0..4 */ +#define zam_loop(a,t,nb) \ +{ \ + long _a = (a), _t = (t), _nb = (nb); \ + unsigned long _vv; \ + double _yy; \ + _vv = ((unsigned long) _nb)*((unsigned long) _s); \ + _yy = ((double) _nb)*_ds; \ + _hi--; \ + _lo = _lo + ((unsigned long) _a) + ((unsigned long) _t); \ + (t) = (long) (_hi + ((_lo - (_hi<> NTL_NBITS)); \ + (a) = (long) (_lo & NTL_RADIXM); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is 0..3 */ +#define zsx_loop(a,t,nb) \ +{ \ + long _t = (t), _nb = (nb); \ + unsigned long _vv; \ + double _yy; \ + _vv = ((unsigned long) _nb)*((unsigned long) _s); \ + _yy = ((double) _nb)*_ds; \ + _hi--; \ + _lo = _lo + ((unsigned long) _t); \ + (t) = (long) (_hi + ((_lo - (_hi<> NTL_NBITS)); \ + (a) = (long) (_lo & NTL_RADIXM); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is 0..4 */ +#define zam_subloop(a,t,nb) \ +{ \ + long _a = (a); unsigned long _t = (t); long _nb = (nb); \ + unsigned long _vv; \ + double _yy; \ + _vv = ((unsigned long) _nb)*((unsigned long) _s); \ + _yy = ((double) _nb)*_ds; \ + _hi += 3; \ + _lo = ((unsigned long) _a) + _t - _lo; \ + (t) = ((_lo + (_hi<> NTL_NBITS) - _hi; \ + (a) = (long) (_lo & NTL_RADIXM); \ + _lo = _vv; \ + _hi = (long) _yy; \ +} + +/* shift is 0..4 */ +#define zam_finish(a,t) \ +{ \ + long _a = (a), _t = (t); \ + _lo = _lo + ((unsigned long) _a) + ((unsigned long) _t); \ + _hi--; \ + (t) = (long) (_hi + ((_lo - (_hi<> NTL_NBITS)); \ + (a) = _lo & NTL_RADIXM; \ +} + +/* shift is 0..3 */ +#define zsx_finish(a,t) \ +{ \ + long _t = (t); \ + _lo = _lo + ((unsigned long) _t); \ + _hi--; \ + (t) = (long) (_hi + ((_lo - (_hi<> NTL_NBITS)); \ + (a) = (long) (_lo & NTL_RADIXM); \ +} + +/* shift is 0..4 */ +#define zam_subfinish(a,t) \ +{ \ + long _a = (a); unsigned long _t = (t); \ + _hi += 3; \ + _lo = ((unsigned long) _a) + _t - _lo; \ + (t) = ((_lo + (_hi<> NTL_NBITS) - _hi; \ + (a) = (long) (_lo & NTL_RADIXM); \ +} + +#endif +/* end of arithmetic-right-shift if-then-else */ + +#endif +/* end of "NTL_BITS_PER_LONG <= NTL_NBITS + 2" if-then-else */ + +#endif +/* end of long-integer-implementation if-then-else */ + + + + + + +static +void zaddmulone(long *lama, long *lamb) +{ + long lami; + long lams = 0; + + lams = 0; + for (lami = (*lamb++); lami > 0; lami--) { + lams += (*lama + *lamb++); + *lama++ = lams & NTL_RADIXM; + lams >>= NTL_NBITS; + } + *lama += lams; +} + +#if (NTL_ARITH_RIGHT_SHIFT && !defined(NTL_CLEAN_INT)) + +static +void zsubmulone(long *lama, long *lamb) +{ + long lami; + long lams = 0; + + lams = 0; + for (lami = (*lamb++); lami > 0; lami--) { + lams += (*lama - *lamb++); + *lama++ = lams & NTL_RADIXM; + lams >>= NTL_NBITS; + } + *lama += lams; +} + + +#else + + +static +void zsubmulone(long *lama, long *lamb) +{ + long lami; + long lams = 0; + + lams = 0; + for (lami = (*lamb++); lami > 0; lami--) { + lams = *lama - *lamb++ - lams; + *lama++ = lams & NTL_RADIXM; + lams = (lams < 0); + } + *lama -= lams; +} + +#endif + + +/* + * definitions of zaddmul, zsxmul, zaddmulsq for the various + * long integer implementation options. + */ + + +#if (defined(NTL_AVOID_FLOAT) || defined(NTL_LONG_LONG)) + +static +void zaddmul(long lams, long *lama, long *lamb) +{ + long lami; + long lamcarry = 0; + + for (lami = (*lamb++); lami > 0; lami--) + { + zaddmulp(*lama, *lamb, lams, lamcarry); + lama++; + lamb++; + } + *lama += lamcarry; +} + +static +void zsxmul(long lams, long *lama, long *lamb) +{ + long lami; + long lamcarry = 0; + + for (lami = (*lamb++); lami > 0; lami--) + { + zxmulp(*lama, *lamb, lams, lamcarry); + lama++; + lamb++; + } + *lama = lamcarry; +} + +static +void zaddmulsq(long lsqi, long *lsqa, long *lsqb) +{ + long lsqs = *(lsqb); + long lsqcarry = 0; + + lsqb++; + for (; lsqi > 0; lsqi--) + { + zaddmulp(*lsqa, *lsqb, lsqs, lsqcarry); + lsqa++; + lsqb++; + } + *lsqa += lsqcarry; +} + + +#else +/* default long integer arithmetic */ + +static +void zaddmul(long lams, long *lama, long *lamb) +{ + long lami = (*lamb++)-1; + long lamcarry = 0; + zam_decl; + + zam_init(*lamb, lams); + lamb++; + + + for (; lami > 0; lami--) { + zam_loop(*lama, lamcarry, *lamb); + lama++; + lamb++; + } + zam_finish(*lama, lamcarry); + lama++; + *lama += lamcarry; +} + + + +static +void zsxmul(long lams, long *lama, long *lamb) +{ + long lami = (*lamb++)-1; + long lamcarry = 0; + zam_decl; + + zam_init(*lamb, lams); + lamb++; + + + for (; lami > 0; lami--) { + zsx_loop(*lama, lamcarry, *lamb); + lama++; + lamb++; + } + zsx_finish(*lama, lamcarry); + lama++; + *lama = lamcarry; +} + + + +static +void zaddmulsq(long lsqi, long *lsqa, long *lsqb) +{ + long lsqs; + long lsqcarry; + zam_decl + + if (lsqi <= 0) return; + + lsqs = *lsqb; + lsqcarry = 0; + + lsqb++; + zam_init(*lsqb, lsqs); + lsqb++; + lsqi--; + for (; lsqi > 0; lsqi--) { + zam_loop(*lsqa, lsqcarry, *lsqb); + lsqa++; + lsqb++; + } + zam_finish(*lsqa, lsqcarry); + lsqa++; + *lsqa += lsqcarry; +} + + +#endif + + + + + + + +/* + * definition of zsubmul for the various long integer implementation options. + * Note that zsubmul is only called with a positive first argument. + */ + + + +#if (defined(NTL_AVOID_FLOAT) || (defined(NTL_LONG_LONG) && defined(NTL_CLEAN_INT))) + +static void +zsubmul( + long r, + _ntl_verylong a, + _ntl_verylong b + ) +{ + long rd = NTL_RADIX - r; + long i; + long carry = NTL_RADIX; + + for (i = (*b++); i > 0; i--) + { + zaddmulp(*a, *b, rd, carry); + a++; + carry += NTL_RADIXM - (*b++); + } + *a += carry - NTL_RADIX; /* unnormalized */ +} + +#elif (defined(NTL_LONG_LONG)) + +/* + * NOTE: the implementation of zaddmulp for the NTL_LONG_LONG option + * will work on most machines even when the single-precision + * multiplicand is negative; however, the C language standard does + * not guarantee correct behaviour in this case, which is why the above + * implementation is used when NTL_CLEAN_INT is set. + */ + +static +void zsubmul(long lams, long *lama, long *lamb) +{ + long lami; + long lamcarry = 0; + + lams = -lams; + + for (lami = (*lamb++); lami > 0; lami--) + { + zaddmulp(*lama, *lamb, lams, lamcarry); + lama++; + lamb++; + } + *lama += lamcarry; +} + + +#else +/* default long integer arithmetic */ + +static +void zsubmul(long lams, long *lama, long *lamb) +{ + long lami = (*lamb++)-1; + CARRY_TYPE lamcarry = 0; + zam_decl; + + zam_init(*lamb, lams); + lamb++; + + + for (; lami > 0; lami--) { + zam_subloop(*lama, lamcarry, *lamb); + lama++; + lamb++; + } + zam_subfinish(*lama, lamcarry); + lama++; + *lama += CARRY_CONV(lamcarry); +} + +#endif + + + + + +/* + * + * zdiv21 returns quot, numhigh so + * + * quot = (numhigh*NTL_RADIX + numlow)/denom; + * numhigh = (numhigh*NTL_RADIX + numlow)%denom; + * Assumes 0 <= numhigh < denom < NTL_RADIX and 0 <= numlow < NTL_RADIX. + */ + + +#if (defined(NTL_CLEAN_INT)) + +/* + * This "clean" version relies on the guaranteed semantics of + * unsigned integer arithmetic. + */ + +#define zdiv21(numhigh, numlow, denom, deninv, quot) \ +{ \ + unsigned long udenom = denom; \ + unsigned long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) + \ + (double) (numlow)) * (deninv)); \ + unsigned long lr21 = (((unsigned long) numhigh) << NTL_NBITS) + \ + ((unsigned long) numlow) - udenom*lq21 ; \ + \ + if (lr21 >> (NTL_BITS_PER_LONG-1)) { \ + lq21--; \ + lr21 += udenom; \ + } \ + else if (lr21 >= udenom) { \ + lr21 -= udenom; \ + lq21++; \ + } \ + quot = (long) lq21; \ + numhigh = (long) lr21; \ +} + + +#else + +/* + * This "less clean" version relies on wrap-around semantics for + * signed integer arithmetic. + */ + + +#define zdiv21(numhigh, numlow, denom, deninv, quot) \ +{ \ + long lr21; \ + long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) \ + + (double) (numlow)) * (deninv)); \ + long lp21; \ + MulLo(lp21, lq21, denom); \ + lr21 = S((U(numhigh) << NTL_NBITS) + U(numlow) - U(lp21)); \ + if (lr21 < 0) { \ + lq21--; \ + lr21 += denom; \ + } \ + else if (lr21 >= denom) { \ + lr21 -= denom; \ + lq21++; \ + } \ + quot = lq21; \ + numhigh = lr21; \ +} + +#endif + + + + +/* + * zrem21 behaves just like zdiv21, except the only the remainder is computed. + */ + +#if (defined(NTL_CLEAN_INT) || (defined(NTL_AVOID_BRANCHING) && !NTL_ARITH_RIGHT_SHIFT)) +#define NTL_CLEAN_SPMM +#endif + +#if (defined(NTL_CLEAN_SPMM) && !defined(NTL_AVOID_BRANCHING)) + +#define zrem21(numhigh, numlow, denom, deninv) \ +{ \ + unsigned long udenom = denom; \ + unsigned long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) + \ + (double) (numlow)) * (deninv)); \ + unsigned long lr21 = (((unsigned long) numhigh) << NTL_NBITS) + \ + ((unsigned long) numlow) - udenom*lq21 ; \ + \ + if (lr21 >> (NTL_BITS_PER_LONG-1)) { \ + lr21 += udenom; \ + } \ + else if (lr21 >= udenom) { \ + lr21 -= udenom; \ + } \ + numhigh = (long) lr21; \ +} + +#elif (defined(NTL_CLEAN_SPMM) && defined(NTL_AVOID_BRANCHING)) + + +#define zrem21(numhigh, numlow, denom, deninv) \ +{ \ + unsigned long udenom = denom; \ + unsigned long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) + \ + (double) (numlow)) * (deninv)); \ + unsigned long lr21 = (((unsigned long) numhigh) << NTL_NBITS) + \ + ((unsigned long) numlow) - udenom*lq21 ; \ + lr21 += (-(lr21 >> (NTL_BITS_PER_LONG-1))) & udenom; \ + lr21 -= udenom; \ + lr21 += (-(lr21 >> (NTL_BITS_PER_LONG-1))) & udenom; \ + numhigh = (long) lr21; \ +} + + +#elif (NTL_ARITH_RIGHT_SHIFT && defined(NTL_AVOID_BRANCHING)) + + +#define zrem21(numhigh, numlow, denom, deninv) \ +{ \ + long lr21; \ + long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) \ + + (double) (numlow)) * (deninv)); \ + long lp21; \ + MulLo(lp21, lq21, denom); \ + lr21 = S((U(numhigh) << NTL_NBITS) + U(numlow) - U(lp21)); \ + lr21 += S(U(lr21 >> (NTL_BITS_PER_LONG-1)) & U(denom)); \ + lr21 -= denom; \ + lr21 += S(U(lr21 >> (NTL_BITS_PER_LONG-1)) & U(denom)); \ + numhigh = lr21; \ +} + +#else + + +#define zrem21(numhigh, numlow, denom, deninv) \ +{ \ + long lr21; \ + long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) \ + + (double) (numlow)) * (deninv)); \ + long lp21; \ + MulLo(lp21, lq21, denom); \ + lr21 = S((U(numhigh) << NTL_NBITS) + U(numlow) - U(lp21)); \ + if (lr21 < 0) lr21 += denom; \ + else if (lr21 >= denom) lr21 -= denom; \ + numhigh = lr21; \ +} + +#endif + + +long _ntl_zmaxalloc(_ntl_verylong x) +{ + if (!x) + return 0; + else + return (x[-1] >> 1); +} + + +void _ntl_zsetlength(_ntl_verylong *v, long len) +{ + _ntl_verylong x = *v; + + if (len < 0) + LogicError("negative size allocation in _ntl_zsetlength"); + + if (NTL_OVERFLOW(len, NTL_NBITS, 0)) + ResourceError("size too big in _ntl_zsetlength"); + + if (x) { + long oldlen = x[-1]; + long fixed = oldlen & 1; + + oldlen = oldlen >> 1; + + if (fixed) { + if (len > oldlen) + LogicError("can't grow this _ntl_verylong"); + else + return; + } + + if (len <= oldlen) return; + + len++; /* always allocate at least one more than requested */ + + oldlen = (long) (oldlen * 1.2); /* always increase by at least 20% */ + if (len < oldlen) + len = oldlen; + + /* round up to multiple of MIN_SETL */ + len = ((len+(MIN_SETL-1))/MIN_SETL)*MIN_SETL; + + /* test len again */ + if (NTL_OVERFLOW(len, NTL_NBITS, 0)) + ResourceError("size too big in _ntl_zsetlength"); + + x--; + if (!(x = (_ntl_verylong)NTL_REALLOC(x, + len, sizeof(long), 2*sizeof(long)))) { + MemoryError(); + } + x[0] = len << 1; + } + else { + len++; /* as above, always allocate one more than requested */ + len = ((len+(MIN_SETL-1))/MIN_SETL)*MIN_SETL; + + /* test len again */ + if (NTL_OVERFLOW(len, NTL_NBITS, 0)) + ResourceError("size too big in _ntl_zsetlength"); + + + if (!(x = (_ntl_verylong)NTL_MALLOC(len, + sizeof(long), 2*sizeof(long)))) { + MemoryError(); + } + x[0] = len << 1; + x[1] = 1; + x[2] = 0; + } + + *v = x+1; +} + +void _ntl_zfree(_ntl_verylong *x) +{ + _ntl_verylong y; + + if (!(*x)) + return; + + if ((*x)[-1] & 1) + LogicError("Internal error: can't free this _ntl_verylong"); + + y = (*x - 1); + free((void*)y); + *x = 0; +} + + + + + + +long _ntl_zround_correction(_ntl_verylong a, long k, long residual) +{ + long direction; + long p; + long sgn; + long bl; + long wh; + long i; + + if (a[0] > 0) + sgn = 1; + else + sgn = -1; + + p = k - 1; + bl = (p/NTL_NBITS); + wh = 1L << (p - NTL_NBITS*bl); + bl++; + + if (a[bl] & wh) { + /* bit is 1...we have to see if lower bits are all 0 + in order to implement "round to even" */ + + if (a[bl] & (wh - 1)) + direction = 1; + else { + i = bl - 1; + while (i > 0 && a[i] == 0) i--; + if (i > 0) + direction = 1; + else + direction = 0; + } + + /* use residual to break ties */ + + if (direction == 0 && residual != 0) { + if (residual == sgn) + direction = 1; + else + direction = -1; + } + + if (direction == 0) { + /* round to even */ + + wh = wh << 1; + if (wh == NTL_RADIX) { + wh = 1; + bl++; + } + + if (a[bl] & wh) + direction = 1; + else + direction = -1; + } + } + else + direction = -1; + + if (direction == 1) + return sgn; + + return 0; +} + + + +double _ntl_zdoub_aux(_ntl_verylong n) +{ + double res; + long i; + + if (!n) + return ((double) 0); + if ((i = n[0]) < 0) + i = -i; + res = (double) (n[i--]); + for (; i; i--) + res = res * NTL_FRADIX + (double) (n[i]); + if (n[0] > 0) + return (res); + return (-res); +} + + + +double _ntl_zdoub(_ntl_verylong n) +{ + CRegister(tmp); + + long s; + long shamt; + long correction; + double x; + + s = _ntl_z2log(n); + shamt = s - NTL_DOUBLE_PRECISION; + + if (shamt <= 0) + return _ntl_zdoub_aux(n); + + _ntl_zrshift(n, shamt, &tmp); + + correction = _ntl_zround_correction(n, shamt, 0); + + if (correction) _ntl_zsadd(tmp, correction, &tmp); + + x = _ntl_zdoub_aux(tmp); + + x = _ntl_ldexp(x, shamt); + + return x; +} + + +double _ntl_zlog(_ntl_verylong n) +{ + CRegister(tmp); + + NTL_THREAD_LOCAL static double log_2; + NTL_THREAD_LOCAL static long init = 0; + + long s; + long shamt; + long correction; + double x; + + if (!init) { + log_2 = log(2.0); + init = 1; + } + + if (_ntl_zsign(n) <= 0) + ArithmeticError("log argument <= 0"); + + s = _ntl_z2log(n); + shamt = s - NTL_DOUBLE_PRECISION; + + if (shamt <= 0) + return log(_ntl_zdoub_aux(n)); + + _ntl_zrshift(n, shamt, &tmp); + + correction = _ntl_zround_correction(n, shamt, 0); + + if (correction) _ntl_zsadd(tmp, correction, &tmp); + + x = _ntl_zdoub_aux(tmp); + + return log(x) + shamt*log_2; +} + + + +void _ntl_zdoubtoz(double a, _ntl_verylong *xx) +{ + _ntl_verylong x; + long neg, i, t, sz; + + + a = floor(a); + + if (!_ntl_IsFinite(&a)) + ArithmeticError("_ntl_zdoubtoz: attempt to convert non-finite value"); + + + if (a < 0) { + a = -a; + neg = 1; + } + else + neg = 0; + + if (a == 0) { + _ntl_zzero(xx); + return; + } + + sz = 1; + a = a*NTL_FRADIX_INV; + + while (a >= 1) { + a = a*NTL_FRADIX_INV; + sz++; + } + + x = *xx; + if (MustAlloc(x, sz)) { + _ntl_zsetlength(&x, sz); + *xx = x; + } + + for (i = sz; i > 0; i--) { + a = a*NTL_FRADIX; + t = (long) a; + x[i] = t; + a = a - t; + } + + x[0] = (neg ? -sz : sz); +} + +void _ntl_zzero(_ntl_verylong *aa) +{ + if (!(*aa)) _ntl_zsetlength(aa, 1); + (*aa)[0] = 1; + (*aa)[1] = 0; +} + +/* same as _ntl_zzero, except does not unnecessarily allocate space */ + +void _ntl_zzero1(_ntl_verylong *aa) +{ + if (!(*aa)) return; + (*aa)[0] = 1; + (*aa)[1] = 0; +} + +void _ntl_zone(_ntl_verylong *aa) +{ + if (!(*aa)) _ntl_zsetlength(aa, 1); + (*aa)[0] = 1; + (*aa)[1] = 1; +} + +void _ntl_zcopy(_ntl_verylong a, _ntl_verylong *bb) +{ + long i; + _ntl_verylong b = *bb; + + if (!a) { + _ntl_zzero(bb); + return; + } + if (a != b) { + if ((i = *a) < 0) + i = (-i); + if (MustAlloc(b, i)) { + _ntl_zsetlength(&b, i); + *bb = b; + } + for (; i >= 0; i--) + *b++ = *a++; + } +} + +/* same as _ntl_zcopy, but does not unnecessarily allocate space */ + +void _ntl_zcopy1(_ntl_verylong a, _ntl_verylong *bb) +{ + long i; + _ntl_verylong b = *bb; + + if (!a) { + _ntl_zzero1(bb); + return; + } + if (a != b) { + if ((i = *a) < 0) + i = (-i); + if (MustAlloc(b, i)) { + _ntl_zsetlength(&b, i); + *bb = b; + } + for (; i >= 0; i--) + *b++ = *a++; + } +} + +void _ntl_zintoz(long d, _ntl_verylong *aa) +{ + long i; + long anegative; + unsigned long d1, d2; + _ntl_verylong a = *aa; + + anegative = 0; + if (d < 0) { + anegative = 1; + d1 = - ((unsigned long) d); /* careful: avoid overflow */ + } + else + d1 = d; + + i = 0; + d2 = d1; + do { + d2 >>= NTL_NBITS; + i++; + } + while (d2 > 0); + + if (MustAlloc(a, i)) { + _ntl_zsetlength(&a, i); + *aa = a; + } + + i = 0; + a[1] = 0; + while (d1 > 0) { + a[++i] = d1 & NTL_RADIXM; + d1 >>= NTL_NBITS; + } + if (i > 0) + a[0] = i; + else + a[0] = 1; + + if (anegative) + a[0] = (-a[0]); +} + + +/* same as _ntl_zintoz, but does not unnecessarily allocate space */ + +void _ntl_zintoz1(long d, _ntl_verylong *aa) +{ + long i; + long anegative; + unsigned long d1, d2; + _ntl_verylong a = *aa; + + if (!d && !a) return; + + anegative = 0; + if (d < 0) { + anegative = 1; + d1 = - ((unsigned long) d); /* careful: avoid overlow */ + } + else + d1 = d; + + i = 0; + d2 = d1; + do { + d2 >>= NTL_NBITS; + i++; + } + while (d2 > 0); + + if (MustAlloc(a, i)) { + _ntl_zsetlength(&a, i); + *aa = a; + } + + i = 0; + a[1] = 0; + while (d1 > 0) { + a[++i] = d1 & NTL_RADIXM; + d1 >>= NTL_NBITS; + } + if (i > 0) + a[0] = i; + else + a[0] = 1; + + if (anegative) + a[0] = (-a[0]); +} + + +void _ntl_zuintoz(unsigned long d, _ntl_verylong *aa) +{ + long i; + unsigned long d1, d2; + _ntl_verylong a = *aa; + + d1 = d; + i = 0; + d2 = d1; + do { + d2 >>= NTL_NBITS; + i++; + } + while (d2 > 0); + + if (MustAlloc(a, i)) { + _ntl_zsetlength(&a, i); + *aa = a; + } + + i = 0; + a[1] = 0; + while (d1 > 0) { + a[++i] = d1 & NTL_RADIXM; + d1 >>= NTL_NBITS; + } + if (i > 0) + a[0] = i; + else + a[0] = 1; +} + + +unsigned long _ntl_ztouint(_ntl_verylong a) +{ + unsigned long d; + long sa; + + if (!a) + return (0); + + if ((sa = *a) < 0) + sa = -sa; + + d = (unsigned long) (*(a += sa)); + while (--sa) { + d <<= NTL_NBITS; + d += (unsigned long) (*(--a)); + } + + if ((*(--a)) < 0) + return (-d); + return (d); +} + + +long _ntl_ztoint(_ntl_verylong a) +{ + unsigned long res = _ntl_ztouint(a); + return NTL_ULONG_TO_LONG(res); +} + + + +long _ntl_zcompare(_ntl_verylong a, _ntl_verylong b) +{ + long sa; + long sb; + + if (!a) { + if (!b) + return (0); + if (b[0] < 0) + return (1); + if (b[0] > 1) + return (-1); + if (b[1]) + return (-1); + return (0); + } + if (!b) { + if (a[0] < 0) + return (-1); + if (a[0] > 1) + return (1); + if (a[1]) + return (1); + return (0); + } + + if ((sa = *a) > (sb = *b)) + return (1); + if (sa < sb) + return (-1); + if (sa < 0) + sa = (-sa); + a += sa; + b += sa; + for (; sa; sa--) { + long diff = *a - *b; + + if (diff > 0) { + if (sb < 0) + return (-1); + return (1); + } + if (diff < 0) { + if (sb < 0) + return (1); + return (-1); + } + + a--; + b--; + } + return (0); +} + +void _ntl_znegate(_ntl_verylong *aa) +{ + _ntl_verylong a = *aa; + + if (!a) + return; + if (a[1] || a[0] != 1) + a[0] = (-a[0]); +} + +void _ntl_zsadd(_ntl_verylong a, long d, _ntl_verylong *b) +{ + CRegister(x); + + _ntl_zintoz(d, &x); + _ntl_zadd(a, x, b); +} + + +void +_ntl_zadd(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *cc) +{ + long sa; + long sb; + long anegative; + _ntl_verylong c; + long a_alias, b_alias; + + if (!a) { + if (b) + _ntl_zcopy(b, cc); + else + _ntl_zzero(cc); + return; + } + + if (!b) { + _ntl_zcopy(a, cc); + return; + } + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + if ((anegative = ((sa = a[0]) < 0)) == ((sb = b[0]) < 0)) { + /* signs a and b are the same */ + _ntl_verylong pc; + long carry; + long i; + long maxab; + + if (anegative) { + sa = -sa; + sb = -sb; + } + + if (sa < sb) { + i = sa; + maxab = sb; + } + else { + i = sb; + maxab = sa; + } + + if (MustAlloc(c, maxab+1)) { + _ntl_zsetlength(&c, maxab + 1); + *cc = c; + if (a_alias) a = c; + if (b_alias) b = c; + } + + pc = c; + carry = 0; + + do { + long t = (*(++a)) + (*(++b)) + carry; + carry = t >> NTL_NBITS; + *(++pc) = t & NTL_RADIXM; + i--; + } while (i); + + i = sa-sb; + if (!i) goto i_exit; + + if (i < 0) { + i = -i; + a = b; + } + + if (!carry) goto carry_exit; + + for (;;) { + long t = (*(++a)) + 1; + carry = t >> NTL_NBITS; + *(++pc) = t & NTL_RADIXM; + i--; + if (!i) goto i_exit; + if (!carry) goto carry_exit; + } + + i_exit: + if (carry) { + *(++pc) = 1; + maxab++; + } + *c = anegative ? -maxab : maxab; + return; + + carry_exit: + if (pc != a) { + do { + *(++pc) = *(++a); + i--; + } while (i); + } + *c = anegative ? -maxab : maxab; + } + else { + /* signs a and b are different...use _ntl_zsub */ + + if (anegative) { + // UNSAFE + // FIXME: this is too ugly + a[0] = -sa; + NTL_SCOPE(guard) { if (!a_alias) a[0] = sa; }; + + _ntl_zsub(b, a, cc); + + if (!a_alias) a[0] = sa; + guard.relax(); + } + else { + // UNSAFE + // FIXME: this is too ugly + b[0] = -sb; + NTL_SCOPE(guard) { if (!b_alias) b[0] = sb; }; + + _ntl_zsub(a, b, cc); + + if (!b_alias) b[0] = sb; + guard.relax(); + } + } +} + +void +_ntl_zsub(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *cc) +{ + long sa; + long sb; + long anegative; + long a_alias, b_alias; + _ntl_verylong c; + + if (!b) { + if (a) + _ntl_zcopy(a, cc); + else + _ntl_zzero(cc); + return; + } + + if (!a) { + _ntl_zcopy(b, cc); + _ntl_znegate(cc); + return; + } + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + if ((anegative = ((sa = a[0]) < 0)) == ((sb = b[0]) < 0)) { + /* signs agree */ + + long i, carry, *pc; + + if (anegative) { + sa = -sa; + sb = -sb; + } + + carry = sa - sb; + if (!carry) { + long *aa = a + sa; + long *bb = b + sa; + + i = sa; + while (i && !(carry = (*aa - *bb))) { + aa--; bb--; i--; + } + } + + if (!carry) { + _ntl_zzero(cc); + return; + } + + if (carry < 0) { + { long t = sa; sa = sb; sb = t; } + { long t = a_alias; a_alias = b_alias; b_alias = t; } + { long *t = a; a = b; b = t; } + anegative = !anegative; + } + + if (MustAlloc(c, sa)) { + _ntl_zsetlength(&c, sa); + *cc = c; + /* must have !a_alias */ + if (b_alias) b = c; + } + + i = sb; + carry = 0; + pc = c; + + do { +#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT)) + long t = (*(++a)) - (*(++b)) - carry; + carry = (t < 0); +#else + long t = (*(++a)) - (*(++b)) + carry; + carry = t >> NTL_NBITS; +#endif + *(++pc) = t & NTL_RADIXM; + i--; + } while (i); + + i = sa-sb; + while (carry) { + long t = (*(++a)) - 1; +#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT)) + carry = (t < 0); +#else + carry = t >> NTL_NBITS; +#endif + *(++pc) = t & NTL_RADIXM; + i--; + } + + if (i) { + if (pc != a) { + do { + *(++pc) = *(++a); + i--; + } while (i); + } + } + else { + while (sa > 1 && *pc == 0) { sa--; pc--; } + } + + if (anegative) sa = -sa; + *c = sa; + } + else { + /* signs of a and b are different...use _ntl_zadd */ + + if (anegative) { + // UNSAFE + // FIXME: this is too ugly + a[0] = -sa; + NTL_SCOPE(guard) { if (!a_alias) a[0] = sa; }; + + _ntl_zadd(a, b, cc); + + if (!a_alias) a[0] = sa; + guard.relax(); + + c = *cc; + c[0] = -c[0]; + } + else { + // UNSAFE + // FIXME: this is too ugly + b[0] = -sb; + NTL_SCOPE(guard) { if (!b_alias) b[0] = sb; }; + + _ntl_zadd(a, b, cc); + + if (!b_alias) b[0] = sb; + guard.relax(); + } + } +} + + +void +_ntl_zsmul(_ntl_verylong a, long d, _ntl_verylong *bb) +{ + long sa; + long anegative, bnegative; + _ntl_verylong b; + long a_alias; + + + if (d == 2) { + _ntl_z2mul(a, bb); + return; + } + + + if ((d >= NTL_RADIX) || (d <= -NTL_RADIX)) { + CRegister(x); + _ntl_zintoz(d,&x); + _ntl_zmul(a, x, bb); + return; + } + + if (!a || (a[0] == 1 && a[1] == 0)) { + _ntl_zzero(bb); + return; + } + + if (!d) { + _ntl_zzero(bb); + return; + } + + /* both inputs non-zero */ + + anegative = 0; + bnegative = 0; + + if ((sa = a[0]) < 0) { + anegative = 1; + sa = (-sa); + if (d < 0) + d = (-d); + else + bnegative = 1; + } + else if (bnegative = (d < 0)) + d = (-d); + + b = *bb; + a_alias = (a == b); + + if (MustAlloc(b, sa + 1)) { + _ntl_zsetlength(&b, sa + 1); + if (a_alias) a = b; + *bb = b; + } + + // EXCEPTIONS: delay assignment to a[0] until after memory allocation, + // the remaining code is exception free + + // UNSAFE + + a[0] = sa; + + zsxmul(d, b+1, a); + + sa++; + while ((sa > 1) && (!(b[sa]))) + sa--; + b[0] = sa; + + if (bnegative) + b[0] = (-b[0]); + + if (anegative && !a_alias) + a[0] = -a[0]; +} + +void _ntl_zsubpos(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *cc) +{ + long sa; + long sb; + + long *c, *pc; + long i, carry; + + long b_alias; + + if (!b) { + if (a) + _ntl_zcopy(a, cc); + else + _ntl_zzero(cc); + return; + } + + if (!a) { + _ntl_zzero(cc); + return; + } + + sa = a[0]; + sb = b[0]; + + c = *cc; + b_alias = (b == c); + + if (MustAlloc(c, sa)) { + _ntl_zsetlength(&c, sa); + *cc = c; + if (b_alias) b = c; + } + + i = sb; + carry = 0; + pc = c; + + while (i) { +#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT)) + long t = (*(++a)) - (*(++b)) - carry; + carry = (t < 0); +#else + long t = (*(++a)) - (*(++b)) + carry; + carry = t >> NTL_NBITS; +#endif + *(++pc) = t & NTL_RADIXM; + i--; + } + + i = sa-sb; + while (carry) { + long t = (*(++a)) - 1; +#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT)) + carry = (t < 0); +#else + carry = t >> NTL_NBITS; +#endif + *(++pc) = t & NTL_RADIXM; + i--; + } + + if (i) { + if (pc != a) { + do { + *(++pc) = *(++a); + i--; + } while (i); + } + } + else { + while (sa > 1 && *pc == 0) { sa--; pc--; } + } + + *c = sa; +} + + + + +NTL_THREAD_LOCAL static Vec kmem; +/* storage for Karatsuba */ + + +/* These cross-over points were estimated using + a Sparc-10, a Sparc-20, and a Pentium-90. */ + +#define KARX (16) + +/* Auxilliary routines for Karatsuba */ + + +static +void kar_fold(long *T, long *b, long hsa) +{ + long sb, *p2, *p3, i, carry; + + sb = *b; + p2 = b + hsa; + p3 = T; + carry = 0; + + for (i = sb-hsa; i>0; i--) { + long t = (*(++b)) + (*(++p2)) + carry; + carry = t >> NTL_NBITS; + *(++p3) = t & NTL_RADIXM; + } + + for (i = (hsa << 1) - sb; i>0; i--) { + long t = (*(++b)) + carry; + carry = t >> NTL_NBITS; + *(++p3) = t & NTL_RADIXM; + } + + if (carry) { + *(++p3) = carry; + *T = hsa + 1; + } + else + *T = hsa; +} + +static +void kar_sub(long *T, long *c) +{ + long i, carry; + + i = *c; + carry = 0; + + while (i>0) { +#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT)) + long t = (*(++T)) - (*(++c)) - carry; + carry = (t < 0); +#else + long t = (*(++T)) - (*(++c)) + carry; + carry = t >> NTL_NBITS; +#endif + *T = t & NTL_RADIXM; + i--; + } + + while (carry) { + long t = (*(++T)) - 1; +#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT)) + carry = (t < 0); +#else + carry = t >> NTL_NBITS; +#endif + *T = t & NTL_RADIXM; + } +} + + +static +void kar_add(long *c, long *T, long hsa) +{ + long i, carry; + + c += hsa; + i = *T; + while (T[i] == 0 && i > 0) i--; + carry = 0; + + while (i>0) { + long t = (*(++c)) + (*(++T)) + carry; + carry = t >> NTL_NBITS; + *c = t & NTL_RADIXM; + i--; + } + + while (carry) { + long t = (*(++c)) + 1; + carry = t >> NTL_NBITS; + *c = t & NTL_RADIXM; + } +} + +static +void kar_fix(long *c, long *T, long hsa) +{ + long i, carry, s; + + s = *T; + + i = hsa; + while (i>0) { + *(++c) = *(++T); + i--; + } + + + i = s - hsa; + carry = 0; + + while (i > 0) { + long t = (*(++c)) + (*(++T)) + carry; + carry = t >> NTL_NBITS; + *c = t & NTL_RADIXM; + i--; + } + + while (carry) { + long t = (*(++c)) + 1; + carry = t >> NTL_NBITS; + *c = t & NTL_RADIXM; + } +} + + + +static +void kar_mul(long *c, long *a, long *b, long *stk) +{ + long sa, sb, sc; + + if (*a < *b) { long *t = a; a = b; b = t; } + + sa = *a; + sb = *b; + sc = sa + sb; + + if (sb < KARX) { + /* classic algorithm */ + + long *pc, i, *pb; + + pc = c; + for (i = sc; i; i--) { + pc++; + *pc = 0; + } + + pc = c; + pb = b; + for (i = sb; i; i--) { + pb++; + pc++; + zaddmul(*pb, pc, a); + } + } + else { + long hsa = (sa + 1) >> 1; + + if (hsa < sb) { + /* normal case */ + + long *T1, *T2, *T3; + + /* allocate space */ + + T1 = stk; stk += hsa + 2; + T2 = stk; stk += hsa + 2; + T3 = stk; stk += (hsa << 1) + 3; + + if (stk-kmem.elts() > kmem.length()) + TerminalError("internal error: kmem overflow"); + + /* compute T1 = a_lo + a_hi */ + + kar_fold(T1, a, hsa); + + /* compute T2 = b_lo + b_hi */ + + kar_fold(T2, b, hsa); + + /* recursively compute T3 = T1 * T2 */ + + kar_mul(T3, T1, T2, stk); + + /* recursively compute a_hi * b_hi into high part of c */ + /* and subtract from T3 */ + + { + // UNSAFE + + long olda, oldb; + + olda = a[hsa]; a[hsa] = sa-hsa; + oldb = b[hsa]; b[hsa] = sb-hsa; + + kar_mul(c + (hsa << 1), a + hsa, b + hsa, stk); + kar_sub(T3, c + (hsa << 1)); + + a[hsa] = olda; + b[hsa] = oldb; + } + + /* recursively compute a_lo*b_lo into low part of c */ + /* and subtract from T3 */ + + // UNSAFE + + *a = hsa; + *b = hsa; + + kar_mul(c, a, b, stk); + kar_sub(T3, c); + + *a = sa; + *b = sb; + + /* finally, add T3 * NTL_RADIX^{hsa} to c */ + + kar_add(c, T3, hsa); + } + else { + /* degenerate case */ + + long *T; + + T = stk; stk += sb + hsa + 1; + + if (stk-kmem.elts() > kmem.length()) + TerminalError("internal error: kmem overflow"); + + /* recursively compute b*a_hi into high part of c */ + { + // UNSAFE + + long olda; + + olda = a[hsa]; a[hsa] = sa-hsa; + kar_mul(c + hsa, a + hsa, b, stk); + a[hsa] = olda; + } + + /* recursively compute b*a_lo into T */ + + // UNSAFE + + *a = hsa; + kar_mul(T, a, b, stk); + *a = sa; + + /* fix-up result */ + + kar_fix(c, T, hsa); + } + } + + /* normalize result */ + + while (c[sc] == 0 && sc > 1) sc--; + *c = sc; +} + + + +#define KARSX (32) + +static +void kar_sq(long *c, long *a, long *stk) +{ + long sa, sc; + + sa = *a; + sc = sa << 1; + + if (sa < KARSX) { + /* classic algorithm */ + + long carry, i, j, *pc; + + pc = c; + for (i = sc; i; i--) { + pc++; + *pc = 0; + } + + carry = 0; + i = 0; + for (j = 1; j <= sa; j++) { + unsigned long uncar; + long t; + + i += 2; + uncar = ((unsigned long) carry) + (((unsigned long) c[i-1]) << 1); + t = uncar & NTL_RADIXM; + zaddmulpsq(t, a[j], carry); + c[i-1] = t; + zaddmulsq(sa-j, c+i, a+j); + uncar = (uncar >> NTL_NBITS) + (((unsigned long) c[i]) << 1); + uncar += ((unsigned long) carry); + carry = uncar >> NTL_NBITS; + c[i] = uncar & NTL_RADIXM; + } + } + else { + long hsa = (sa + 1) >> 1; + long *T1, *T2, olda; + + T1 = stk; stk += hsa + 2; + T2 = stk; stk += (hsa << 1) + 3; + + if (stk-kmem.elts() > kmem.length()) + TerminalError("internal error: kmem overflow"); + + kar_fold(T1, a, hsa); + kar_sq(T2, T1, stk); + + // UNSAFE + + olda = a[hsa]; a[hsa] = sa - hsa; + kar_sq(c + (hsa << 1), a + hsa, stk); + kar_sub(T2, c + (hsa << 1)); + a[hsa] = olda; + + // UNSAFE + + *a = hsa; + kar_sq(c, a, stk); + kar_sub(T2, c); + *a = sa; + + kar_add(c, T2, hsa); + } + + while (c[sc] == 0 && sc > 1) sc--; + *c = sc; +} + + +void _ntl_zmul(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *cc) +{ + CRegister(mem); + _ntl_verylong c = *cc; + + if (!a || (a[0] == 1 && a[1] == 0) || !b || (b[0] == 1 && b[1] == 0)) { + _ntl_zzero(cc); + return; + } + + if (a == b) { + if (a == c) { + _ntl_zcopy(a, &mem); + a = mem; + } + + _ntl_zsq(a, cc); + } + else { + long aneg, bneg, sa, sb, sc; + + if (a == c) { + _ntl_zcopy(a, &mem); + a = mem; + } + else if (b == c) { + _ntl_zcopy(b, &mem); + b = mem; + } + + + + // UNSAFE + + sa = *a; + if (sa < 0) { + *a = sa = -sa; + aneg = 1; + } + else + aneg = 0; + + sb = *b; + if (*b < 0) { + *b = sb = -sb; + bneg = 1; + } + else + bneg = 0; + + // FIXME: this is really ugly + NTL_SCOPE(guard) { + if (aneg) *a = - *a; + if (bneg) *b = - *b; + }; + + sc = sa + sb; + if (MustAlloc(c, sc)) { + _ntl_zsetlength(&c, sc); + *cc = c; + } + + /* we optimize for *very* small numbers, + * avoiding all function calls and loops */ + + if (sa <= 3 && sb <= 3) { + long carry, d; + + switch (sa) { + case 1: + switch (sb) { + case 1: + carry = 0; + zxmulp(c[1], a[1], b[1], carry); + c[2] = carry; + break; + case 2: + carry = 0; + d = a[1]; + zxmulp(c[1], b[1], d, carry); + zxmulp(c[2], b[2], d, carry); + c[3] = carry; + break; + case 3: + carry = 0; + d = a[1]; + zxmulp(c[1], b[1], d, carry); + zxmulp(c[2], b[2], d, carry); + zxmulp(c[3], b[3], d, carry); + c[4] = carry; + break; + } + break; + + case 2: + switch (sb) { + case 1: + carry = 0; + d = b[1]; + zxmulp(c[1], a[1], d, carry); + zxmulp(c[2], a[2], d, carry); + c[3] = carry; + break; + case 2: + carry = 0; + d = b[1]; + zxmulp(c[1], a[1], d, carry); + zxmulp(c[2], a[2], d, carry); + c[3] = carry; + carry = 0; + d = b[2]; + zaddmulp(c[2], a[1], d, carry); + zaddmulp(c[3], a[2], d, carry); + c[4] = carry; + break; + case 3: + carry = 0; + d = a[1]; + zxmulp(c[1], b[1], d, carry); + zxmulp(c[2], b[2], d, carry); + zxmulp(c[3], b[3], d, carry); + c[4] = carry; + carry = 0; + d = a[2]; + zaddmulp(c[2], b[1], d, carry); + zaddmulp(c[3], b[2], d, carry); + zaddmulp(c[4], b[3], d, carry); + c[5] = carry; + break; + } + break; + + case 3: + switch (sb) { + case 1: + carry = 0; + d = b[1]; + zxmulp(c[1], a[1], d, carry); + zxmulp(c[2], a[2], d, carry); + zxmulp(c[3], a[3], d, carry); + c[4] = carry; + break; + case 2: + carry = 0; + d = b[1]; + zxmulp(c[1], a[1], d, carry); + zxmulp(c[2], a[2], d, carry); + zxmulp(c[3], a[3], d, carry); + c[4] = carry; + carry = 0; + d = b[2]; + zaddmulp(c[2], a[1], d, carry); + zaddmulp(c[3], a[2], d, carry); + zaddmulp(c[4], a[3], d, carry); + c[5] = carry; + break; + case 3: + carry = 0; + d = b[1]; + zxmulp(c[1], a[1], d, carry); + zxmulp(c[2], a[2], d, carry); + zxmulp(c[3], a[3], d, carry); + c[4] = carry; + carry = 0; + d = b[2]; + zaddmulp(c[2], a[1], d, carry); + zaddmulp(c[3], a[2], d, carry); + zaddmulp(c[4], a[3], d, carry); + c[5] = carry; + carry = 0; + d = b[3]; + zaddmulp(c[3], a[1], d, carry); + zaddmulp(c[4], a[2], d, carry); + zaddmulp(c[5], a[3], d, carry); + c[6] = carry; + break; + } + } + + if (c[sc] == 0) sc--; + if (aneg != bneg) sc = -sc; + *c = sc; + + if (aneg) *a = -sa; + if (bneg) *b = -sb; + } + else if (*a < KARX || *b < KARX) { + /* classic algorithm */ + + long i, *pc; + + pc = c; + for (i = sc; i; i--) { + pc++; + *pc = 0; + } + + pc = c; + + if (*a >= *b) { + long *pb = b; + for (i = *pb; i; i--) { + pb++; + pc++; + zaddmul(*pb, pc, a); + } + } + else { + long *pa = a; + for (i = *pa; i; i--) { + pa++; + pc++; + zaddmul(*pa, pc, b); + } + } + + while (c[sc] == 0 && sc > 1) sc--; + if (aneg != bneg) sc = -sc; + c[0] = sc; + if (aneg) *a = - *a; + if (bneg) *b = - *b; + } + else { + /* karatsuba */ + + long n, hn, sp; + + if (*a < *b) + n = *b; + else + n = *a; + + sp = 0; + do { + hn = (n + 1) >> 1; + sp += (hn << 2) + 7; + n = hn+1; + } while (n >= KARX); + + kmem.SetLength(sp); + kar_mul(c, a, b, kmem.elts()); + if (aneg != bneg) *c = - *c; + + if (aneg) *a = - *a; + if (bneg) *b = - *b; + } + + guard.relax(); + } +} + + +void _ntl_zsq(_ntl_verylong a, _ntl_verylong *cc) +{ + CRegister(mem); + _ntl_verylong c = *cc; + long sa, aneg, sc; + + if (!a || (a[0] == 1 && a[1] == 0)) { + _ntl_zzero(cc); + return; + } + + if (a == c) { + _ntl_zcopy(a, &mem); + a = mem; + } + + + // UNSAFE + + sa = *a; + + if (*a < 0) { + *a = sa = -sa; + aneg = 1; + } + else + aneg = 0; + + + // FIXME: this is really ugly + NTL_SCOPE(guard) { if (aneg) *a = - *a; }; + + sc = (sa) << 1; + if (MustAlloc(c, sc)) { + _ntl_zsetlength(&c, sc); + *cc = c; + } + + if (sa <= 3) { + long carry, d; + + switch (sa) { + case 1: + carry = 0; + zxmulp(c[1], a[1], a[1], carry); + c[2] = carry; + break; + + case 2: + carry = 0; + d = a[1]; + zxmulp(c[1], a[1], d, carry); + zxmulp(c[2], a[2], d, carry); + c[3] = carry; + carry = 0; + d = a[2]; + zaddmulp(c[2], a[1], d, carry); + zaddmulp(c[3], a[2], d, carry); + c[4] = carry; + break; + + case 3: + carry = 0; + d = a[1]; + zxmulp(c[1], a[1], d, carry); + zxmulp(c[2], a[2], d, carry); + zxmulp(c[3], a[3], d, carry); + c[4] = carry; + carry = 0; + d = a[2]; + zaddmulp(c[2], a[1], d, carry); + zaddmulp(c[3], a[2], d, carry); + zaddmulp(c[4], a[3], d, carry); + c[5] = carry; + carry = 0; + d = a[3]; + zaddmulp(c[3], a[1], d, carry); + zaddmulp(c[4], a[2], d, carry); + zaddmulp(c[5], a[3], d, carry); + c[6] = carry; + break; + } + + if (c[sc] == 0) sc--; + *c = sc; + if (aneg) *a = -sa; + } + else if (sa < KARSX) { + /* classic algorithm */ + + long carry, i, j, *pc; + + pc = c; + for (i = sc; i; i--) { + pc++; + *pc = 0; + } + + carry = 0; + i = 0; + for (j = 1; j <= sa; j++) { + unsigned long uncar; + long t; + + i += 2; + uncar = ((unsigned long) carry) + (((unsigned long) c[i-1]) << 1); + t = uncar & NTL_RADIXM; + zaddmulpsq(t, a[j], carry); + c[i-1] = t; + zaddmulsq(sa-j, c+i, a+j); + uncar = (uncar >> NTL_NBITS) + (((unsigned long) c[i]) << 1); + uncar += ((unsigned long) carry); + carry = uncar >> NTL_NBITS; + c[i] = uncar & NTL_RADIXM; + } + + + while (c[sc] == 0 && sc > 1) sc--; + c[0] = sc; + if (aneg) *a = - *a; + } + else { + /* karatsuba */ + + long n, hn, sp; + + n = *a; + + sp = 0; + do { + hn = (n + 1) >> 1; + sp += hn + hn + hn + 5; + n = hn+1; + } while (n >= KARSX); + + kmem.SetLength(sp); + kar_sq(c, a, kmem.elts()); + + if (aneg) *a = - *a; + } + + guard.relax(); +} + + + + +long _ntl_zsdiv(_ntl_verylong a, long d, _ntl_verylong *bb) +{ + long sa; + _ntl_verylong b = *bb; + + if (!d) { + ArithmeticError("division by zero in _ntl_zsdiv"); + } + + if (!a) { + _ntl_zzero(bb); + return (0); + } + + + if (d == 2) { + long is_odd = a[1] & 1; + long fix = (a[0] < 0) & is_odd; + _ntl_zrshift(a, 1, bb); + if (fix) _ntl_zsadd(*bb, -1, bb); + return is_odd; + } + + + if ((sa = a[0]) < 0) + sa = (-sa); + + /* if b aliases a, then b won't move */ + _ntl_zsetlength(&b, sa); + *bb = b; + + if ((d >= NTL_RADIX) || (d <= -NTL_RADIX)) { + CRegister(zd); + CRegister(zb); + + _ntl_zintoz(d, &zb); + _ntl_zdiv(a, zb, &b, &zd); + *bb = b; + return (_ntl_ztoint(zd)); + } + else { + long den = d; + double deninv; + long carry = 0; + long i; + long flag = (*a < 0 ? 2 : 0) | (den < 0 ? 1 : 0); + + if (den < 0) + den = -den; + deninv = 1.0 / ((double) den); + + if (a[sa] < den && sa > 1) + carry = a[sa--]; + + for (i = sa; i; i--) { + zdiv21(carry, a[i], den, deninv, b[i]); + } + + while ((sa > 1) && (!(b[sa]))) + sa--; + b[0] = sa; + + if (flag) { + if (flag <= 2) { + if (!carry) + _ntl_znegate(&b); + else { + _ntl_zsadd(b, 1, &b); + b[0] = -b[0]; + if (flag == 1) + carry = carry - den; + else + carry = den - carry; + *bb = b; + } + } + else + carry = -carry; + } + + return (carry); + } +} + +long _ntl_zsmod(_ntl_verylong a, long d) +{ + long sa; + + if (!a) { + return (0); + } + + if (d == 2) return (a[1] & 1); + + if (!d) { + ArithmeticError("division by zero in _ntl_zsdiv"); + } + + if ((sa = a[0]) < 0) + sa = (-sa); + + if ((d >= NTL_RADIX) || (d <= -NTL_RADIX)) { + CRegister(zd); + CRegister(zb); + + _ntl_zintoz(d, &zb); + _ntl_zmod(a, zb, &zd); + return (_ntl_ztoint(zd)); + } + else { + long den = d; + double deninv; + long carry = 0; + long i; + long flag = (*a < 0 ? 2 : 0) | (den < 0 ? 1 : 0); + + if (den < 0) + den = -den; + deninv = 1.0 / ((double) den); + + if (a[sa] < den && sa > 1) + carry = a[sa--]; + + for (i = sa; i; i--) { + zrem21(carry, a[i], den, deninv); + } + + if (flag) { + if (flag <= 2) { + if (carry) { + if (flag == 1) + carry = carry - den; + else + carry = den - carry; + } + } + else + carry = -carry; + } + + return (carry); + } +} + +void _ntl_zmultirem(_ntl_verylong a, long n, long* dd, long *rr) +{ + long j; + long sa; + + if (!a || (a[0] == 1 && a[1] == 0)) { + for (j = 0; j < n; j++) rr[j] = 0; + return; + } + + sa = a[0]; + + for (j = 0; j < n; j++) { + long den = dd[j]; + double deninv; + long carry = 0; + long i; + long lsa = sa; + + deninv = 1.0 / ((double) den); + + if (a[lsa] < den && lsa > 1) + carry = a[lsa--]; + + for (i = lsa; i; i--) { + zrem21(carry, a[i], den, deninv); + } + + rr[j] = carry; + } +} + + + +#if (defined(NTL_TBL_REM_LL)) + +/* This version uses the double-word long type directly. + * It's a little faster that the other one. + * It accumlates 8 double-word products before stepping + * a higher-level accumulator. + */ + +// I noticed that this can be significantly faster than the other +// one, even if we are not using NTL_LONG_LONG. So we introduce +// another flag. + +static +void multirem3(_ntl_verylong a, long n, long* dd, + long **ttbl, long *rr) +{ + long sa, i, j, d, *tbl, ac0, ac1, ac2, *ap, *tp, k, carry; + double dinv; + NTL_LL_TYPE acc; + + if (!a || a[0] < 8 || a[0] >= NTL_RADIX) { + _ntl_zmultirem(a, n, dd, rr); + return; + } + + sa = a[0]; + + for (i = 0; i < n; i++) { + d = dd[i]; + tbl = ttbl[i]; + acc = a[1]; + ac2 = 0; + ap = &a[2]; + tp = &tbl[1]; + + k = sa - 7; + + for (j = 0; j < k; j += 7) { + acc += ((NTL_LL_TYPE) ap[j+0]) * ((NTL_LL_TYPE) tp[j+0]); + acc += ((NTL_LL_TYPE) ap[j+1]) * ((NTL_LL_TYPE) tp[j+1]); + acc += ((NTL_LL_TYPE) ap[j+2]) * ((NTL_LL_TYPE) tp[j+2]); + acc += ((NTL_LL_TYPE) ap[j+3]) * ((NTL_LL_TYPE) tp[j+3]); + acc += ((NTL_LL_TYPE) ap[j+4]) * ((NTL_LL_TYPE) tp[j+4]); + acc += ((NTL_LL_TYPE) ap[j+5]) * ((NTL_LL_TYPE) tp[j+5]); + acc += ((NTL_LL_TYPE) ap[j+6]) * ((NTL_LL_TYPE) tp[j+6]); + ac2 += (long) (acc >> (2*NTL_NBITS)); + acc &= (((NTL_LL_TYPE) 1) << (2*NTL_NBITS)) - ((NTL_LL_TYPE) 1); + } + + k = sa - 1; + + for (; j < k; j++) + acc += ((NTL_LL_TYPE) ap[j+0]) * ((NTL_LL_TYPE) tp[j+0]); + + ac2 += (long) (acc >> (2*NTL_NBITS)); + acc &= (((NTL_LL_TYPE) 1) << (2*NTL_NBITS)) - ((NTL_LL_TYPE) 1); + + ac0 = (long) (acc & ( (((NTL_LL_TYPE) 1) << (NTL_NBITS)) - ((NTL_LL_TYPE) 1) )); + ac1 = (long) (acc >> NTL_NBITS); + + + carry = 0; + dinv = ((double) 1)/((double) d); + if (ac2 >= d) { + zrem21(carry, ac2, d, dinv); + } + else + carry = ac2; + + zrem21(carry, ac1, d, dinv); + zrem21(carry, ac0, d, dinv); + + rr[i] = carry; + } +} + +#endif + + +#if (defined(NTL_TBL_REM)) + +static +void multirem3(_ntl_verylong a, long n, long* dd, + long **ttbl, long *rr) +{ + long sa, i, d, *tbl, ac0, ac1, ac2, *ap, *tp, k, t, carry; + double dinv; + + if (!a || a[0] < 8 || a[0] >= NTL_RADIX) { + _ntl_zmultirem(a, n, dd, rr); + return; + } + + sa = a[0]; + + for (i = 0; i < n; i++) { + d = dd[i]; + tbl = ttbl[i]; + ac0 = a[1]; + ac1 = 0; + ac2 = 0; + ap = &a[2]; + tp = &tbl[1]; + k = sa-1; + + while (k) { + zxmulp(t, *ap, *tp, ac0); + ac1 += ac0; + ac2 += ac1 >> NTL_NBITS; + ac1 &= NTL_RADIXM; + ac0 = t; + k--; + ap++; + tp++; + } + + carry = 0; + dinv = ((double) 1)/((double) d); + if (ac2 >= d) { + zrem21(carry, ac2, d, dinv); + } + else + carry = ac2; + + zrem21(carry, ac1, d, dinv); + zrem21(carry, ac0, d, dinv); + + rr[i] = carry; + } +} + +#endif + + + + +long _ntl_zsfastrem(_ntl_verylong a, long d) +/* assumes a >= 0, and 0 < d < NTL_RADIX */ +/* computes a % d */ + +{ + long sa; + + if (!a || (a[0] == 1 && a[1] == 0)) { + return 0; + } + + sa = a[0]; + + { + long den = d; + double deninv = ((double)1)/((double)den); + long carry = 0; + long i; + long lsa = sa; + + if (a[lsa] < den && lsa > 1) + carry = a[lsa--]; + for (i = lsa; i; i--) { + zrem21(carry, a[i], den, deninv); + } + return carry; + } +} + + + +void _ntl_zdiv(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *qq, _ntl_verylong *rr) +{ + long sa, sb, sq, i; + long sign; + long q1; + long *rp; + double btopinv, aux; + CRegister(q); + CRegister(r); + + if (!b || (((sb=b[0]) == 1) && (!b[1]))) { + ArithmeticError("division by zero in _ntl_zdiv"); + } + + if (!a || (((sa=a[0]) == 1) && (!a[1]))) { + _ntl_zzero(qq); + if (rr) _ntl_zzero(rr); + return; + } + + + if (sb == 1) { + long t1 = _ntl_zsdiv(a, b[1], qq); + if (rr) _ntl_zintoz(t1, rr); + return; + } + + if (sb == -1) { + long t1 = _ntl_zsdiv(a, -b[1], qq); + if (rr) _ntl_zintoz(t1, rr); + return; + } + + // UNSAFE + + sign = 0; + if (sa < 0) { + a[0] = sa = -sa; + sign = 2; + } + + if (sb < 0) { + b[0] = sb = -sb; + sign |= 1; + } + + + // FIXME: this is really ugly + NTL_SCOPE(guard) { + if (sign & 2) + a[0] = -sa; + + if (sign & 1) + b[0] = -sb; + }; + + + + sq = sa-sb+1; + + if (sq <= 0) { + _ntl_zcopy(a, &r); + _ntl_zzero(&q); + goto done; + } + + + _ntl_zsetlength(&q, sq); + _ntl_zsetlength(&r, sa+1); + + _ntl_zcopy(a, &r); + rp = &r[sa+1]; + *rp = 0; + + r[0] = 0; /* this streamlines the last evaluation of aux */ + + btopinv = b[sb]*NTL_FRADIX + b[sb-1]; + if (sb > 2) + btopinv = NTL_FRADIX / (btopinv*NTL_FRADIX + b[sb-2]); + else + btopinv = 1.0 / btopinv; + + + aux = btopinv*(rp[-1]*NTL_FRADIX + rp[-2]); + if (aux >= NTL_FRADIX) + aux = NTL_FRADIX-1; + + for (i = sq; i >= 1; i--, rp--) { + q1 = (long) aux; + if (q1) { + zsubmul(q1, &r[i], b); + } + + while (rp[0] < 0) { + zaddmulone(&r[i], b); + q1--; + } + + while (rp[0] > 0) { + zsubmulone(&r[i], b); + q1++; + } + + aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]); + while (aux > NTL_FRADIX - 16) { + /* q1 might be too small */ + if (aux >= NTL_FRADIX) + aux = NTL_FRADIX-1; + + + zsubmulone(&r[i], b); + if (rp[0] < 0) { + /* oops...false alarm! */ + zaddmulone(&r[i], b); + break; + } + else { + q1++; + aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]); + } + } + + q[i] = q1; + } + + while (sq > 1 && q[sq] == 0) sq--; + q[0] = sq; + + i = sb; + while (i > 1 && r[i] == 0) i--; + r[0] = i; + +done: + if (sign) + { + if (sign <= 2) + { + if (!(r[1]) && (r[0] == 1)) + _ntl_znegate(&q); + else + { + _ntl_zsadd(q, 1, &q); + _ntl_znegate(&q); + if (sign == 1) + _ntl_zsub(r, b, &r); + else + _ntl_zsub(b, r, &r); + } + } + else + _ntl_znegate(&r); + + if (sign & 2) + a[0] = -sa; + + if (sign & 1) + b[0] = -sb; + } + + guard.relax(); + + _ntl_zcopy(q, qq); + if (rr) _ntl_zcopy(r, rr); + + +} + +void +_ntl_zmod(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *rr) +{ + long sa, sb, sq, i; + long sign; + long q1; + long *rp; + double btopinv, aux; + CRegister(r); + + if (!b || (((sb=b[0]) == 1) && (!b[1]))) { + ArithmeticError("division by zero in _ntl_zdiv"); + } + + if (!a || (((sa=a[0]) == 1) && (!a[1]))) { + _ntl_zzero(rr); + return; + } + + + if (sb == 1) { + _ntl_zintoz(_ntl_zsmod(a, b[1]), rr); + return; + } + + if (sb == -1) { + _ntl_zintoz(_ntl_zsmod(a, -b[1]), rr); + return; + } + + // UNSAFE + + sign = 0; + if (sa < 0) { + a[0] = sa = -sa; + sign = 2; + } + + if (sb < 0) { + b[0] = sb = -sb; + sign |= 1; + } + + + NTL_SCOPE(guard) { + if (sign & 2) + a[0] = -sa; + + if (sign & 1) + b[0] = -sb; + }; + + + sq = sa-sb+1; + + if (sq <= 0) { + _ntl_zcopy(a, &r); + goto done; + } + + + _ntl_zsetlength(&r, sa+1); + + _ntl_zcopy(a, &r); + rp = &r[sa+1]; + *rp = 0; + + r[0] = 0; /* this streamlines the last evaluation of aux */ + + btopinv = b[sb]*NTL_FRADIX + b[sb-1]; + if (sb > 2) + btopinv = NTL_FRADIX / (btopinv*NTL_FRADIX + b[sb-2]); + else + btopinv = 1.0 / btopinv; + + + aux = btopinv*(rp[-1]*NTL_FRADIX + rp[-2]); + if (aux >= NTL_FRADIX) + aux = NTL_FRADIX-1; + + for (i = sq; i >= 1; i--, rp--) { + q1 = (long) aux; + if (q1) { + zsubmul(q1, &r[i], b); + } + + while (rp[0] < 0) { + zaddmulone(&r[i], b); + } + + while (rp[0] > 0) { + zsubmulone(&r[i], b); + } + + aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]); + while (aux > NTL_FRADIX - 16) { + /* q1 might be too small */ + if (aux >= NTL_FRADIX) + aux = NTL_FRADIX-1; + + + zsubmulone(&r[i], b); + if (rp[0] < 0) { + /* oops...false alarm! */ + zaddmulone(&r[i], b); + break; + } + else { + aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]); + } + } + } + + i = sb; + while (i > 1 && r[i] == 0) i--; + r[0] = i; + +done: + if (sign) + { + if (sign <= 2) + { + if (!(r[1]) && (r[0] == 1)) + /* no op */; + else + { + if (sign == 1) + _ntl_zsub(r, b, &r); + else + _ntl_zsub(b, r, &r); + } + } + else + _ntl_znegate(&r); + + if (sign & 2) + a[0] = -sa; + + if (sign & 1) + b[0] = -sb; + + } + + guard.relax(); + + _ntl_zcopy(r, rr); +} + +void +_ntl_zquickmod(_ntl_verylong *rr, _ntl_verylong b) +{ + long sa, sb, sq, i; + long q1; + long *rp; + double btopinv, aux; + _ntl_verylong r; + + sb = b[0]; + + r = *rr; + + if (!r || (((sa=r[0]) == 1) && (!r[1]))) { + _ntl_zzero(rr); + return; + } + + + if (sb == 1) { + _ntl_zintoz(_ntl_zsmod(r, b[1]), rr); + return; + } + + sq = sa-sb+1; + + if (sq <= 0) { + return; + } + + + _ntl_zsetlength(rr, sa+1); + r = *rr; + + rp = &r[sa+1]; + *rp = 0; + + r[0] = 0; /* this streamlines the last evaluation of aux */ + + btopinv = b[sb]*NTL_FRADIX + b[sb-1]; + if (sb > 2) + btopinv = NTL_FRADIX / (btopinv*NTL_FRADIX + b[sb-2]); + else + btopinv = 1.0 / btopinv; + + + aux = btopinv*(rp[-1]*NTL_FRADIX + rp[-2]); + if (aux >= NTL_FRADIX) + aux = NTL_FRADIX-1; + + for (i = sq; i >= 1; i--, rp--) { + q1 = (long) aux; + if (q1) { + zsubmul(q1, &r[i], b); + } + + while (rp[0] < 0) { + zaddmulone(&r[i], b); + } + + while (rp[0] > 0) { + zsubmulone(&r[i], b); + } + + aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]); + while (aux > NTL_FRADIX - 16) { + /* q1 might be too small */ + if (aux >= NTL_FRADIX) + aux = NTL_FRADIX-1; + + + zsubmulone(&r[i], b); + if (rp[0] < 0) { + /* oops...false alarm! */ + zaddmulone(&r[i], b); + break; + } + else { + aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]); + } + } + } + + i = sb; + while (i > 1 && r[i] == 0) i--; + r[0] = i; +} + + +void +_ntl_zaddmod( + _ntl_verylong a, + _ntl_verylong b, + _ntl_verylong n, + _ntl_verylong *c + ) +{ + if (*c != n) { + _ntl_zadd(a, b, c); + if (_ntl_zcompare(*c, n) >= 0) + _ntl_zsubpos(*c, n, c); + } + else { + CRegister(mem); + + _ntl_zadd(a, b, &mem); + if (_ntl_zcompare(mem, n) >= 0) + _ntl_zsubpos(mem, n, c); + else + _ntl_zcopy(mem, c); + } +} + +void +_ntl_zsubmod( + _ntl_verylong a, + _ntl_verylong b, + _ntl_verylong n, + _ntl_verylong *c + ) +{ + CRegister(mem); + long cmp; + + if ((cmp=_ntl_zcompare(a, b)) < 0) { + _ntl_zadd(n, a, &mem); + _ntl_zsubpos(mem, b, c); + } else if (!cmp) + _ntl_zzero(c); + else + _ntl_zsubpos(a, b, c); +} + +void +_ntl_zsmulmod( + _ntl_verylong a, + long d, + _ntl_verylong n, + _ntl_verylong *c + ) +{ + CRegister(mem); + + _ntl_zsmul(a, d, &mem); + _ntl_zquickmod(&mem, n); + _ntl_zcopy(mem, c); +} + + + + +void +_ntl_zmulmod( + _ntl_verylong a, + _ntl_verylong b, + _ntl_verylong n, + _ntl_verylong *c + ) +{ + CRegister(mem); + + _ntl_zmul(a, b, &mem); + _ntl_zquickmod(&mem, n); + _ntl_zcopy(mem, c); +} + +void +_ntl_zsqmod( + _ntl_verylong a, + _ntl_verylong n, + _ntl_verylong *c + ) +{ + CRegister(mem); + + _ntl_zsq(a, &mem); + _ntl_zquickmod(&mem, n); + _ntl_zcopy(mem, c); +} + + +void +_ntl_zinvmod( + _ntl_verylong a, + _ntl_verylong n, + _ntl_verylong *c + ) +{ + if (_ntl_zinv(a, n, c)) + ArithmeticError("undefined inverse in _ntl_zinvmod"); +} + + +static long +zxxeucl( + _ntl_verylong ain, + _ntl_verylong nin, + _ntl_verylong *invv, + _ntl_verylong *uu + ) +{ + CRegister(a); + CRegister(n); + CRegister(q); + CRegister(w); + CRegister(x); + CRegister(y); + CRegister(z); + _ntl_verylong inv = *invv; + _ntl_verylong u = *uu; + long diff; + long ilo; + long sa; + long sn; + long temp; + long e; + long fast; + long parity; + long gotthem; + _ntl_verylong pin; + _ntl_verylong p; + long i; + long try11; + long try12; + long try21; + long try22; + long got11; + long got12; + long got21; + long got22; + double hi; + double lo; + double dt; + double fhi, fhi1; + double flo, flo1; + double num; + double den; + double dirt; + + + _ntl_zsetlength(&a, (e = (ain[0] > nin[0] ? ain[0] : nin[0]))); + _ntl_zsetlength(&n, e); + _ntl_zsetlength(&q, e); + _ntl_zsetlength(&w, e); + _ntl_zsetlength(&x, e); + _ntl_zsetlength(&y, e); + _ntl_zsetlength(&z, e); + _ntl_zsetlength(&inv, e); + *invv = inv; + _ntl_zsetlength(&u, e); + *uu = u; + + fhi1 = 1.0 + ((double) 32.0)/NTL_FDOUBLE_PRECISION; + flo1 = 1.0 - ((double) 32.0)/NTL_FDOUBLE_PRECISION; + + fhi = 1.0 + ((double) 8.0)/NTL_FDOUBLE_PRECISION; + flo = 1.0 - ((double) 8.0)/NTL_FDOUBLE_PRECISION; + + pin = &ain[0]; + p = &a[0]; + for (i = (*pin); i >= 0; i--) + *p++ = *pin++; + pin = &nin[0]; + p = &n[0]; + for (i = (*pin); i >= 0; i--) + *p++ = *pin++; + inv[0] = 1; + inv[1] = 1; + w[0] = 1; + w[1] = 0; + while (n[0] > 1 || n[1] > 0) + { + gotthem = 0; + sa = a[0]; + sn = n[0]; + diff = sa - sn; + if (!diff || diff == 1) + { + sa = a[0]; + p = &a[sa]; + num = ((double) (*p)) * NTL_FRADIX; + if (sa > 1) + num += (*(--p)); + num *= NTL_FRADIX; + if (sa > 2) + num += (*(p - 1)); + sn = n[0]; + p = &n[sn]; + den = (double) (*p) * NTL_FRADIX; + if (sn > 1) + den += (*(--p)); + den *= NTL_FRADIX; + if (sn > 2) + den += (*(p - 1)); + hi = fhi1 * (num + 1.0) / den; + lo = flo1 * num / (den + 1.0); + if (diff > 0) + { + hi *= NTL_FRADIX; + lo *= NTL_FRADIX; + } + try11 = 1; + try12 = 0; + try21 = 0; + try22 = 1; + parity = 1; + fast = 1; + while (fast > 0) + { + parity = 1 - parity; + if (hi >= NTL_FRADIX) + fast = 0; + else + { + ilo = (long)lo; + dirt = hi - ilo; + if (dirt < 1.0/NTL_FDOUBLE_PRECISION || !ilo || ilo < (long)hi) + fast = 0; + else + { + dt = lo-ilo; + lo = flo / dirt; + if (dt > 1.0/NTL_FDOUBLE_PRECISION) + hi = fhi / dt; + else + hi = NTL_FRADIX; + temp = try11; + try11 = try21; + if ((NTL_RADIX - temp) / ilo < try21) + fast = 0; + else + try21 = temp + ilo * try21; + temp = try12; + try12 = try22; + if ((NTL_RADIX - temp) / ilo < try22) + fast = 0; + else + try22 = temp + ilo * try22; + if ((fast > 0) && (parity > 0)) + { + gotthem = 1; + got11 = try11; + got12 = try12; + got21 = try21; + got22 = try22; + } + } + } + } + } + if (gotthem) + { + _ntl_zsmul(inv, got11, &x); + _ntl_zsmul(w, got12, &y); + _ntl_zsmul(inv, got21, &z); + _ntl_zsmul(w, got22, &w); + _ntl_zadd(x, y, &inv); + _ntl_zadd(z, w, &w); + _ntl_zsmul(a, got11, &x); + _ntl_zsmul(n, got12, &y); + _ntl_zsmul(a, got21, &z); + _ntl_zsmul(n, got22, &n); + _ntl_zsub(x, y, &a); + _ntl_zsub(n, z, &n); + } + else + { + _ntl_zdiv(a, n, &q, &a); + _ntl_zmul(q, w, &x); + _ntl_zadd(inv, x, &inv); + if (a[0] > 1 || a[1] > 0) + { + _ntl_zdiv(n, a, &q, &n); + _ntl_zmul(q, inv, &x); + _ntl_zadd(w, x, &w); + } + else + { + p = &a[0]; + pin = &n[0]; + for (i = (*pin); i >= 0; i--) + *p++ = *pin++; + n[0] = 1; + n[1] = 0; + _ntl_zcopy(w, &inv); + _ntl_znegate(&inv); + } + } + } + + if ((a[0] == 1) && (a[1] == 1)) + e = 0; + else + e = 1; + + p = &u[0]; + pin = &a[0]; + for (i = (*pin); i >= 0; i--) + *p++ = *pin++; + *invv = inv; + *uu = u; + + return (e); +} + +long +_ntl_zinv( + _ntl_verylong ain, + _ntl_verylong nin, + _ntl_verylong *invv + ) +{ + CRegister(u); + CRegister(v); + long sgn; + + + if (_ntl_zscompare(nin, 1) <= 0) { + LogicError("InvMod: second input <= 1"); + } + + sgn = _ntl_zsign(ain); + if (sgn < 0) { + LogicError("InvMod: first input negative"); + } + + if (_ntl_zcompare(ain, nin) >= 0) { + LogicError("InvMod: first input too big"); + } + + + if (sgn == 0) { + _ntl_zcopy(nin, invv); + return 1; + } + + + if (!(zxxeucl(ain, nin, &v, &u))) { + if (_ntl_zsign(v) < 0) _ntl_zadd(v, nin, &v); + _ntl_zcopy(v, invv); + return 0; + } + + _ntl_zcopy(u, invv); + return 1; +} + +void +_ntl_zexteucl( + _ntl_verylong aa, + _ntl_verylong *xa, + _ntl_verylong bb, + _ntl_verylong *xb, + _ntl_verylong *d + ) +{ + CRegister(modcon); + CRegister(a); + CRegister(b); + long anegative = 0; + long bnegative = 0; + + _ntl_zcopy(aa, &a); + _ntl_zcopy(bb, &b); + + if (anegative = (a[0] < 0)) + a[0] = -a[0]; + if (bnegative = (b[0] < 0)) + b[0] = -b[0]; + + if (!b[1] && (b[0] == 1)) + { + _ntl_zone(xa); + _ntl_zzero(xb); + _ntl_zcopy(a, d); + goto done; + } + + if (!a[1] && (a[0] == 1)) + { + _ntl_zzero(xa); + _ntl_zone(xb); + _ntl_zcopy(b, d); + goto done; + } + + zxxeucl(a, b, xa, d); + _ntl_zmul(a, *xa, xb); + _ntl_zsub(*d, *xb, xb); + _ntl_zdiv(*xb, b, xb, &modcon); + + if ((modcon[1]) || (modcon[0] != 1)) + { + LogicError("non-zero remainder in _ntl_zexteucl BUG"); + } +done: + if (anegative) + { + _ntl_znegate(xa); + } + if (bnegative) + { + _ntl_znegate(xb); + } +} + + + +/* I've adapted LIP's extended euclidean algorithm to + * do rational reconstruction. -- VJS. + */ + + +long +_ntl_zxxratrecon( + _ntl_verylong ain, + _ntl_verylong nin, + _ntl_verylong num_bound, + _ntl_verylong den_bound, + _ntl_verylong *num_out, + _ntl_verylong *den_out + ) +{ + CRegister(a); + CRegister(n); + CRegister(q); + CRegister(w); + CRegister(x); + CRegister(y); + CRegister(z); + CRegister(inv); + CRegister(u); + CRegister(a_bak); + CRegister(n_bak); + CRegister(inv_bak); + CRegister(w_bak); + + _ntl_verylong p; + + long diff; + long ilo; + long sa; + long sn; + long snum; + long sden; + long e; + long fast; + long temp; + long parity; + long gotthem; + long try11; + long try12; + long try21; + long try22; + long got11; + long got12; + long got21; + long got22; + + double hi; + double lo; + double dt; + double fhi, fhi1; + double flo, flo1; + double num; + double den; + double dirt; + + if (_ntl_zsign(num_bound) < 0) + LogicError("rational reconstruction: bad numerator bound"); + + if (!num_bound) + snum = 1; + else + snum = num_bound[0]; + + if (_ntl_zsign(den_bound) <= 0) + LogicError("rational reconstruction: bad denominator bound"); + + sden = den_bound[0]; + + if (_ntl_zsign(nin) <= 0) + LogicError("rational reconstruction: bad modulus"); + + if (_ntl_zsign(ain) < 0 || _ntl_zcompare(ain, nin) >= 0) + LogicError("rational reconstruction: bad residue"); + + + e = nin[0]; + _ntl_zsetlength(&a, e); + _ntl_zsetlength(&n, e); + _ntl_zsetlength(&q, e); + _ntl_zsetlength(&w, e); + _ntl_zsetlength(&x, e); + _ntl_zsetlength(&y, e); + _ntl_zsetlength(&z, e); + _ntl_zsetlength(&inv, e); + _ntl_zsetlength(&u, e); + _ntl_zsetlength(&a_bak, e); + _ntl_zsetlength(&n_bak, e); + _ntl_zsetlength(&inv_bak, e); + _ntl_zsetlength(&w_bak, e); + + fhi1 = 1.0 + ((double) 32.0)/NTL_FDOUBLE_PRECISION; + flo1 = 1.0 - ((double) 32.0)/NTL_FDOUBLE_PRECISION; + + fhi = 1.0 + ((double) 8.0)/NTL_FDOUBLE_PRECISION; + flo = 1.0 - ((double) 8.0)/NTL_FDOUBLE_PRECISION; + + _ntl_zcopy(ain, &a); + _ntl_zcopy(nin, &n); + + _ntl_zone(&inv); + _ntl_zzero(&w); + + while (1) + { + if (w[0] >= sden && _ntl_zcompare(w, den_bound) > 0) break; + if (n[0] <= snum && _ntl_zcompare(n, num_bound) <= 0) break; + + _ntl_zcopy(a, &a_bak); + _ntl_zcopy(n, &n_bak); + _ntl_zcopy(w, &w_bak); + _ntl_zcopy(inv, &inv_bak); + + gotthem = 0; + sa = a[0]; + sn = n[0]; + diff = sa - sn; + if (!diff || diff == 1) + { + sa = a[0]; + p = &a[sa]; + num = (double) (*p) * NTL_FRADIX; + if (sa > 1) + num += (*(--p)); + num *= NTL_FRADIX; + if (sa > 2) + num += (*(p - 1)); + sn = n[0]; + p = &n[sn]; + den = (double) (*p) * NTL_FRADIX; + if (sn > 1) + den += (*(--p)); + den *= NTL_FRADIX; + if (sn > 2) + den += (*(p - 1)); + hi = fhi1 * (num + 1.0) / den; + lo = flo1 * num / (den + 1.0); + if (diff > 0) + { + hi *= NTL_FRADIX; + lo *= NTL_FRADIX; + } + try11 = 1; + try12 = 0; + try21 = 0; + try22 = 1; + parity = 1; + fast = 1; + while (fast > 0) + { + parity = 1 - parity; + if (hi >= NTL_FRADIX) + fast = 0; + else + { + ilo = (long)lo; + dirt = hi - ilo; + if (dirt < 1.0/NTL_FDOUBLE_PRECISION || !ilo || ilo < (long)hi) + fast = 0; + else + { + dt = lo-ilo; + lo = flo / dirt; + if (dt > 1.0/NTL_FDOUBLE_PRECISION) + hi = fhi / dt; + else + hi = NTL_FRADIX; + temp = try11; + try11 = try21; + if ((NTL_RADIX - temp) / ilo < try21) + fast = 0; + else + try21 = temp + ilo * try21; + temp = try12; + try12 = try22; + if ((NTL_RADIX - temp) / ilo < try22) + fast = 0; + else + try22 = temp + ilo * try22; + if ((fast > 0) && (parity > 0)) + { + gotthem = 1; + got11 = try11; + got12 = try12; + got21 = try21; + got22 = try22; + } + } + } + } + } + if (gotthem) + { + _ntl_zsmul(inv, got11, &x); + _ntl_zsmul(w, got12, &y); + _ntl_zsmul(inv, got21, &z); + _ntl_zsmul(w, got22, &w); + _ntl_zadd(x, y, &inv); + _ntl_zadd(z, w, &w); + _ntl_zsmul(a, got11, &x); + _ntl_zsmul(n, got12, &y); + _ntl_zsmul(a, got21, &z); + _ntl_zsmul(n, got22, &n); + _ntl_zsub(x, y, &a); + _ntl_zsub(n, z, &n); + } + else + { + _ntl_zdiv(a, n, &q, &a); + _ntl_zmul(q, w, &x); + _ntl_zadd(inv, x, &inv); + if (a[0] > 1 || a[1] > 0) + { + _ntl_zdiv(n, a, &q, &n); + _ntl_zmul(q, inv, &x); + _ntl_zadd(w, x, &w); + } + else + { + break; + } + } + } + + _ntl_zcopy(a_bak, &a); + _ntl_zcopy(n_bak, &n); + _ntl_zcopy(w_bak, &w); + _ntl_zcopy(inv_bak, &inv); + + _ntl_znegate(&w); + + while (1) + { + sa = w[0]; + if (sa < 0) w[0] = -sa; + if (w[0] >= sden && _ntl_zcompare(w, den_bound) > 0) return 0; + w[0] = sa; + + if (n[0] <= snum && _ntl_zcompare(n, num_bound) <= 0) break; + + fast = 0; + sa = a[0]; + sn = n[0]; + diff = sa - sn; + if (!diff || diff == 1) + { + sa = a[0]; + p = &a[sa]; + num = (double) (*p) * NTL_FRADIX; + if (sa > 1) + num += (*(--p)); + num *= NTL_FRADIX; + if (sa > 2) + num += (*(p - 1)); + sn = n[0]; + p = &n[sn]; + den = (double) (*p) * NTL_FRADIX; + if (sn > 1) + den += (*(--p)); + den *= NTL_FRADIX; + if (sn > 2) + den += (*(p - 1)); + hi = fhi1 * (num + 1.0) / den; + lo = flo1 * num / (den + 1.0); + if (diff > 0) + { + hi *= NTL_FRADIX; + lo *= NTL_FRADIX; + } + if (hi < NTL_FRADIX) + { + ilo = (long)lo; + if (ilo == (long)hi) + fast = 1; + } + } + + if (fast) + { + if (ilo != 0) { + if (ilo == 1) { + _ntl_zsub(inv, w, &inv); + _ntl_zsubpos(a, n, &a); + } + else if (ilo == 2) { + _ntl_z2mul(w, &x); + _ntl_zsub(inv, x, &inv); + _ntl_z2mul(n, &x); + _ntl_zsubpos(a, x, &a); + } + else if (ilo ==3) { + _ntl_z2mul(w, &x); + _ntl_zadd(w, x, &x); + _ntl_zsub(inv, x, &inv); + _ntl_z2mul(n, &x); + _ntl_zadd(n, x, &x); + _ntl_zsubpos(a, x, &a); + } + else if (ilo == 4) { + _ntl_zlshift(w, 2, &x); + _ntl_zsub(inv, x, &inv); + _ntl_zlshift(n, 2, &x); + _ntl_zsubpos(a, x, &a); + } + else { + _ntl_zsmul(w, ilo, &x); + _ntl_zsub(inv, x, &inv); + _ntl_zsmul(n, ilo, &x); + _ntl_zsubpos(a, x, &a); + } + } + } + else { + _ntl_zdiv(a, n, &q, &a); + _ntl_zmul(q, w, &x); + _ntl_zsub(inv, x, &inv); + } + + _ntl_zswap(&a, &n); + _ntl_zswap(&inv, &w); + } + + if (_ntl_zsign(w) < 0) { + _ntl_znegate(&w); + _ntl_znegate(&n); + } + + _ntl_zcopy(n, num_out); + _ntl_zcopy(w, den_out); + + return 1; +} + + + +static +long OptWinSize(long n) +/* finds k that minimizes n/(k+1) + 2^{k-1} */ + +{ + long k; + double v, v_new; + + + v = n/2.0 + 1.0; + k = 1; + + for (;;) { + v_new = n/((double)(k+2)) + ((double)(1L << k)); + if (v_new >= v) break; + v = v_new; + k++; + } + + return k; +} + + + +static +void _ntl_zsppowermod(long a, _ntl_verylong e, _ntl_verylong n, + _ntl_verylong *x) +{ + _ntl_verylong_wrapped res; + long i, k; + + if (_ntl_ziszero(e)) { + _ntl_zone(x); + return; + } + + res = 0; + _ntl_zsetlength(&res, n[0]); + _ntl_zone(&res); + + k = _ntl_z2log(e); + + for (i = k - 1; i >= 0; i--) { + _ntl_zsqmod(res, n, &res); + if (_ntl_zbit(e, i)) + _ntl_zsmulmod(res, a, n, &res); + } + + if (_ntl_zsign(e) < 0) _ntl_zinvmod(res, n, &res); + + _ntl_zcopy(res, x); +} + + + +static +void _ntl_ztwopowermod( _ntl_verylong e, _ntl_verylong n, + _ntl_verylong *x) +{ + _ntl_verylong_wrapped res; + long i, k; + + if (_ntl_ziszero(e)) { + _ntl_zone(x); + return; + } + + res = 0; + _ntl_zsetlength(&res, n[0]); + _ntl_zone(&res); + + k = _ntl_z2log(e); + + for (i = k - 1; i >= 0; i--) { + _ntl_zsqmod(res, n, &res); + if (_ntl_zbit(e, i)) + _ntl_zaddmod(res, res, n, &res); + } + + if (_ntl_zsign(e) < 0) _ntl_zinvmod(res, n, &res); + + _ntl_zcopy(res, x); +} + + +void _ntl_zpowermod(_ntl_verylong g, _ntl_verylong e, _ntl_verylong F, + _ntl_verylong *h) + +/* h = g^e mod f using "sliding window" algorithm + + remark: the notation (h, g, e, F) is strange, because I + copied the code from BB.c. +*/ + +{ + _ntl_verylong_wrapped res, t; + Vec<_ntl_verylong_wrapped> v; + + long n, i, k, val, cnt, m; + + if (_ntl_zsign(g) < 0 || _ntl_zcompare(g, F) >= 0 || + _ntl_zscompare(F, 1) <= 0) + LogicError("PowerMod: bad args"); + + + if (!g || g[0] == 1 || g[0] == -1) { + long gg = _ntl_ztoint(g); + if (gg == 2) + _ntl_ztwopowermod(e, F, h); + else + _ntl_zsppowermod(gg, e, F, h); + return; + } + + if (_ntl_zscompare(e, 0) == 0) { + _ntl_zone(h); + return; + } + + if (_ntl_zscompare(e, 1) == 0) { + _ntl_zcopy(g, h); + return; + } + + if (_ntl_zscompare(e, -1) == 0) { + _ntl_zinvmod(g, F, h); + return; + } + + if (_ntl_zscompare(e, 2) == 0) { + _ntl_zsqmod(g, F, h); + return; + } + + if (_ntl_zscompare(e, -2) == 0) { + res = 0; + _ntl_zsqmod(g, F, &res); + _ntl_zinvmod(res, F, h); + return; + } + + n = _ntl_z2log(e); + + res = 0; + _ntl_zone(&res); + + if (n < 16) { + /* plain square-and-multiply algorithm */ + + for (i = n - 1; i >= 0; i--) { + _ntl_zsqmod(res, F, &res); + if (_ntl_zbit(e, i)) + _ntl_zmulmod(res, g, F, &res); + } + + if (_ntl_zsign(e) < 0) _ntl_zinvmod(res, F, &res); + + _ntl_zcopy(res, h); + return; + } + + k = OptWinSize(n); + + if (k > 5) k = 5; + + v.SetLength(1L << (k-1)); + + _ntl_zcopy(g, &v[0]); + + if (k > 1) { + t = 0; + _ntl_zsqmod(g, F, &t); + + for (i = 1; i < (1L << (k-1)); i++) + _ntl_zmulmod(v[i-1], t, F, &v[i]); + + } + + + val = 0; + for (i = n-1; i >= 0; i--) { + val = (val << 1) | _ntl_zbit(e, i); + if (val == 0) + _ntl_zsqmod(res, F, &res); + else if (val >= (1L << (k-1)) || i == 0) { + cnt = 0; + while ((val & 1) == 0) { + val = val >> 1; + cnt++; + } + + m = val; + while (m > 0) { + _ntl_zsqmod(res, F, &res); + m = m >> 1; + } + + _ntl_zmulmod(res, v[val >> 1], F, &res); + + while (cnt > 0) { + _ntl_zsqmod(res, F, &res); + cnt--; + } + + val = 0; + } + } + + if (_ntl_zsign(e) < 0) _ntl_zinvmod(res, F, &res); + + _ntl_zcopy(res, h); +} + + + + + + +void +_ntl_zexp( + _ntl_verylong a, + long e, + _ntl_verylong *bb + ) +{ + long k; + long len_a; + long sa; + CRegister(res); + + if (!a) + sa = 0; + else { + sa = a[0]; + if (sa < 0) sa = -sa; + } + + if (sa <= 1) { + _ntl_zexps(_ntl_ztoint(a), e, bb); + return; + } + + + if (!e) + { + _ntl_zone(bb); + return; + } + + if (e < 0) + ArithmeticError("negative exponent in _ntl_zexp"); + + if (_ntl_ziszero(a)) + { + _ntl_zzero(bb); + return; + } + + len_a = _ntl_z2log(a); + if (len_a > (NTL_MAX_LONG-(NTL_NBITS-1))/e) + ResourceError("overflow in _ntl_zexp"); + + _ntl_zsetlength(&res, (len_a*e+NTL_NBITS-1)/NTL_NBITS); + + _ntl_zcopy(a, &res); + k = 1; + while ((k << 1) <= e) + k <<= 1; + while (k >>= 1) { + _ntl_zsq(res, &res); + if (e & k) + _ntl_zmul(a, res, &res); + } + + _ntl_zcopy(res, bb); +} + +void +_ntl_zexps( + long a, + long e, + _ntl_verylong *bb + ) +{ + long k; + long len_a; + CRegister(res); + + if (!e) + { + _ntl_zone(bb); + return; + } + + if (e < 0) + ArithmeticError("negative exponent in _ntl_zexps"); + + if (!a) + { + _ntl_zzero(bb); + return; + } + + if (a >= NTL_RADIX || a <= -NTL_RADIX) { + _ntl_zintoz(a, &res); + _ntl_zexp(res, e, &res); + return; + } + + len_a = _ntl_z2logs(a); + if (len_a > (NTL_MAX_LONG-(NTL_NBITS-1))/e) + ResourceError("overflow in _ntl_zexps"); + + _ntl_zsetlength(&res, (len_a*e+NTL_NBITS-1)/NTL_NBITS); + + _ntl_zintoz(a, &res); + k = 1; + while ((k << 1) <= e) + k <<= 1; + while (k >>= 1) { + _ntl_zsq(res, &res); + if (e & k) + _ntl_zsmul(res, a, &res); + } + + _ntl_zcopy(res, bb); +} + + +void +_ntl_z2mul( + _ntl_verylong n, + _ntl_verylong *rres + ) +{ + long sn; + long i; + long n_alias; + long carry; + _ntl_verylong res; + + if (!n) + { + _ntl_zzero(rres); + return; + } + + + if ((!n[1]) && (n[0] == 1)) + { + _ntl_zzero(rres); + return; + } + + if ((sn = n[0]) < 0) + sn = -sn; + + res = *rres; + n_alias = (n == res); + + _ntl_zsetlength(&res, sn + 1); + *rres = res; + if (n_alias) n = res; + + carry = 0; + + for (i = 1; i <= sn; i++) + { + if ((res[i] = (n[i] << 1) + carry) >= NTL_RADIX) + { + res[i] -= NTL_RADIX; + carry = 1; + } + else + carry = 0; + } + + if (carry) + res[++sn] = 1; + + if (n[0] < 0) + res[0] = -sn; + else + res[0] = sn; +} + + +long +_ntl_z2div( + _ntl_verylong n, + _ntl_verylong *rres + ) +{ + long sn; + long i; + long result; + _ntl_verylong res = *rres; + + if ((!n) || ((!n[1]) && (n[0] == 1))) + { + _ntl_zzero(rres); + return (0); + } + + if ((sn = n[0]) < 0) + sn = -sn; + + /* n won't move if res aliases n */ + _ntl_zsetlength(&res, sn); + *rres = res; + + result = n[1] & 1; + for (i = 1; i < sn; i++) + { + res[i] = (n[i] >> 1); + if (n[i + 1] & 1) + res[i] += (NTL_RADIX >> 1); + } + + if (res[sn] = (n[sn] >> 1)) + res[0] = n[0]; + else if (sn == 1) + { + res[0] = 1; + } + else if (n[0] < 0) + res[0] = -sn + 1; + else + res[0] = sn - 1; + + return (result); +} + + +void +_ntl_zlshift( + _ntl_verylong n, + long k, + _ntl_verylong *rres + ) +{ + long big; + long small; + long sn; + long i; + long cosmall; + long n_alias; + _ntl_verylong res; + + + if (!n) + { + _ntl_zzero(rres); + return; + } + + if ((!n[1]) && (n[0] == 1)) + { + _ntl_zzero(rres); + return; + } + + res = *rres; + n_alias = (n == res); + + + if (!k) + { + if (!n_alias) + _ntl_zcopy(n, rres); + return; + } + + if (k < 0) + { + if (k < -NTL_MAX_LONG) + _ntl_zzero(rres); + else + _ntl_zrshift(n, -k, rres); + return; + } + if (k == 1) + { + _ntl_z2mul(n, rres); + return; + } + + if ((sn = n[0]) < 0) + sn = -sn; + + i = sn + (big = k / NTL_NBITS); + if (small = k - big * NTL_NBITS) + { + _ntl_zsetlength(&res, i + 1); + *rres = res; + if (n_alias) n = res; + + res[i + 1] = n[sn] >> (cosmall = NTL_NBITS - small); + for (i = sn; i > 1; i--) + res[i + big] = ((((unsigned long) n[i]) << small) & NTL_RADIXM) + (n[i - 1] >> cosmall); + res[big + 1] = (((unsigned long) n[1]) << small) & NTL_RADIXM; + for (i = big; i; i--) + res[i] = 0; + if (res[sn + big + 1]) + big++; + } + else + { + _ntl_zsetlength(&res, i); + *rres = res; + if (n_alias) n = res; + + for (i = sn; i; i--) + res[i + big] = n[i]; + for (i = big; i; i--) + res[i] = 0; + } + if (n[0] > 0) + res[0] = n[0] + big; + else + res[0] = n[0] - big; +} + + +void +_ntl_zrshift( + _ntl_verylong n, + long k, + _ntl_verylong *rres + ) +{ + long big; + long small; + long sn; + long i; + long cosmall; + _ntl_verylong res; + + if (!n) + { + _ntl_zzero(rres); + return; + } + + if ((!n[1]) && (n[0] == 1)) + { + _ntl_zzero(rres); + return; + } + + res = *rres; + + if (!k) + { + if (n != res) + _ntl_zcopy(n, rres); + return; + } + + if (k < 0) + { + if (k < -NTL_MAX_LONG) ResourceError("overflow in _ntl_zrshift"); + _ntl_zlshift(n, -k, rres); + return; + } + + if (k == 1) + { + _ntl_z2div(n, rres); + return; + } + + big = k / NTL_NBITS; + small = k - big * NTL_NBITS; + + if ((sn = n[0]) < 0) + sn = -sn; + + if ((big >= sn) || + ((big == sn - 1) && small && (!(n[sn] >> small)))) + /* The microsoft optimizer generates bad code without + the above test for small != 0 */ + { + _ntl_zzero(rres); + return; + } + + sn -= big; + + /* n won't move if res aliases n */ + _ntl_zsetlength(&res, sn); + *rres = res; + + if (small) + { + cosmall = NTL_NBITS - small; + for (i = 1; i < sn; i++) + res[i] = (n[i + big] >> small) + + ((((unsigned long) n[i + big + 1]) << cosmall) & NTL_RADIXM); + if (!(res[sn] = (n[sn + big] >> small))) + sn--; + } + else + for (i = 1; i <= sn; i++) + res[i] = n[i + big]; + if (n[0] > 0) + res[0] = sn; + else + res[0] = -sn; +} + + +long +_ntl_zmakeodd( + _ntl_verylong *nn + ) +{ + _ntl_verylong n = *nn; + long i; + long shift = 1; + + if (!n || (!n[1] && (n[0] == 1))) + return (0); + while (!(n[shift])) + shift++; + i = n[shift]; + shift = NTL_NBITS * (shift - 1); + while (!(i & 1)) + { + shift++; + i >>= 1; + } + _ntl_zrshift(n, shift, &n); + return (shift); +} + + +long +_ntl_znumtwos( + _ntl_verylong n + ) +{ + long i; + long shift = 1; + + if (!n || (!n[1] && (n[0] == 1))) + return (0); + while (!(n[shift])) + shift++; + i = n[shift]; + shift = NTL_NBITS * (shift - 1); + while (!(i & 1)) + { + shift++; + i >>= 1; + } + return (shift); +} + + +long +_ntl_zsqrts( + long n + ) +{ + long a; + long ndiva; + long newa; + CRegister(ln); + CRegister(rr); + + if (n < 0) + ArithmeticError("_ntl_zsqrts: negative argument"); + + if (n <= 0) + return (0); + if (n <= 3) + return (1); + if (n <= 8) + return (2); + if (n >= NTL_RADIX) + { + _ntl_zintoz(n,&ln); + _ntl_zsqrt(ln,&rr); + return(_ntl_ztoint(rr)); + } + newa = 3L << (2 * (NTL_NBITSH - 1)); + a = 1L << NTL_NBITSH; + while (!(n & newa)) + { + newa >>= 2; + a >>= 1; + } + while (1) + { + newa = ((ndiva = n / a) + a) / 2; + if (newa - ndiva <= 1) + { + if (newa * newa <= n) + return (newa); + else + return (ndiva); + } + a = newa; + } +} + + +void _ntl_zsqrt(_ntl_verylong n, _ntl_verylong *rr) +{ + CRegister(a); + CRegister(ndiva); + CRegister(diff); + CRegister(r); + long i; + + if (!n) { + _ntl_zzero(rr); + return; + } + + if ((i = n[0]) < 0) + ArithmeticError("negative argument in _ntl_zsqrt"); + + if (i == 1) { + _ntl_zintoz(_ntl_zsqrts(n[1]), rr); + return; + } + + + _ntl_zsetlength(&a, i); + _ntl_zsetlength(&ndiva, i); + _ntl_zsetlength(&diff, i); + + a[(a[0] = (i + 1) / 2)] = _ntl_zsqrts(n[i]) + 1; + if (!(i & 1)) + a[a[0]] <<= NTL_NBITSH; + + if (a[a[0]] & NTL_RADIX) { + a[a[0]] = 0; + a[0]++; + a[a[0]] = 1; + } + + for (i = a[0] - 1; i; i--) + a[i] = 0; + + while (1) { + _ntl_zdiv(n, a, &ndiva, &r); + _ntl_zadd(a, ndiva, &r); + _ntl_zrshift(r, 1, &r); + if (_ntl_zcompare(r, ndiva) <= 0) + goto done; + + _ntl_zsubpos(r, ndiva, &diff); + if ((diff[0] == 1) && (diff[1] <= 1)) { + _ntl_zsq(r, &diff); + if (_ntl_zcompare(diff, n) > 0) + _ntl_zcopy(ndiva, &r); + + goto done; + } + _ntl_zcopy(r, &a); + } +done: + _ntl_zcopy(r, rr); +} + + + +void +_ntl_zgcd( + _ntl_verylong mm1, + _ntl_verylong mm2, + _ntl_verylong *rres + ) +{ + long agrb; + long shibl; + CRegister(aa); + CRegister(bb); + CRegister(cc); + _ntl_verylong a; + _ntl_verylong b; + _ntl_verylong c; + _ntl_verylong d; + long m1negative; + long m2negative; + + /* _ntl_ziszero is necessary here and below to fix an + an aliasing bug in LIP */ + + if (_ntl_ziszero(mm1)) + { + if (mm2 != *rres) + _ntl_zcopy(mm2,rres); + _ntl_zabs(rres); + return; + } + + if (_ntl_ziszero(mm2)) + { + if (mm1 != *rres) + _ntl_zcopy(mm1,rres); + _ntl_zabs(rres); + return; + } + + if (mm1 == mm2) + { + if (mm1 != *rres) + _ntl_zcopy(mm1, rres); + _ntl_zabs(rres); + return; + } + + // UNSAFE + + if (m1negative = (mm1[0] < 0)) + mm1[0] = -mm1[0]; + if (m2negative = (mm2[0] < 0)) + mm2[0] = -mm2[0]; + + // FIXME: this is really ugly + NTL_SCOPE(guard) { + if (m1negative) + mm1[0] = -mm1[0]; + if (m2negative) + mm2[0] = -mm2[0]; + }; + + if ((agrb = mm1[0]) < mm2[0]) + agrb = mm2[0]; + + _ntl_zsetlength(&aa, agrb+1); + _ntl_zsetlength(&bb, agrb+1); + _ntl_zsetlength(&cc, agrb+1); + + if (mm1[0] != mm2[0]) + { + if (mm1[0] > mm2[0]) + { + _ntl_zcopy(mm2, &aa); + _ntl_zmod(mm1, aa, &bb); + } + else + { + _ntl_zcopy(mm1, &aa); + _ntl_zmod(mm2, aa, &bb); + } + if (!(bb[1]) && (bb[0] == 1)) + { + a = aa; + goto done; + } + } + else + { + _ntl_zcopy(mm1, &aa); + _ntl_zcopy(mm2, &bb); + } + if ((agrb = _ntl_zmakeodd(&aa)) < (shibl = _ntl_zmakeodd(&bb))) + shibl = agrb; + if (!(agrb = _ntl_zcompare(aa, bb))) + { + a = aa; + goto endshift; + } + else if (agrb < 0) + { + a = bb; + b = aa; + } + else + { + a = aa; + b = bb; + } + c = cc; + _ntl_zsubpos(a, b, &c); + do + { + _ntl_zmakeodd(&c); + if (!(agrb = _ntl_zcompare(b, c))) + { + a = b; + goto endshift; + } + else if (agrb > 0) + { + a = b; + b = c; + c = a; + } + else + { + d = a; + a = c; + c = d; + } + _ntl_zsubpos(a, b, &c); + } while (c[1] || c[0] != 1); +endshift: + _ntl_zlshift(a, shibl, &a); +done: + if (m1negative) + mm1[0] = -mm1[0]; + if (m2negative) + mm2[0] = -mm2[0]; + + guard.relax(); + + _ntl_zcopy(a, rres); +} + + +long _ntl_zsign(_ntl_verylong a) +{ + if (!a) + { + return (0); + } + if (a[0] < 0) + return (-1); + if (a[0] > 1) + return (1); + if (a[1]) + return (1); + return (0); +} + +void _ntl_zabs(_ntl_verylong *pa) +{ + _ntl_verylong a = *pa; + + if (!a) + return; + if (a[0] < 0) + a[0] = (-a[0]); +} + +long +_ntl_z2logs( + long aa + ) +{ + long i = 0; + unsigned long a; + + if (aa < 0) + a = - ((unsigned long) aa); + else + a = aa; + + while (a>=256) + i += 8, a >>= 8; + if (a >=16) + i += 4, a >>= 4; + if (a >= 4) + i += 2, a >>= 2; + if (a >= 2) + i += 2; + else if (a >= 1) + i++; + return (i); +} + +long +_ntl_z2log( + _ntl_verylong a + ) +{ + long la; + + if (!a) + return (0); + la = (a[0] > 0 ? a[0] : -a[0]); + return ( NTL_NBITS * (la - 1) + _ntl_z2logs(a[la]) ); +} + + + +long +_ntl_zscompare( + _ntl_verylong a, + long b + ) +{ + if (!b) + return _ntl_zsign(a); + else { + CRegister(c); + _ntl_zintoz(b, &c); + return (_ntl_zcompare(a, c)); + } +} + +void +_ntl_zswap( + _ntl_verylong *a, + _ntl_verylong *b + ) +{ + _ntl_verylong c; + + if ((*a && ((*a)[-1] & 1)) || (*b && ((*b)[-1] & 1))) { + // one of the inputs points to an bigint that is + // "pinned down" in memory, so we have to swap the data, + // not just the pointers + + CRegister(t); + long sz_a, sz_b, sz; + + sz_a = _ntl_zsize(*a); + sz_b = _ntl_zsize(*b); + sz = (sz_a > sz_b) ? sz_a : sz_b; + + _ntl_zsetlength(a, sz); + _ntl_zsetlength(b, sz); + + // EXCEPTIONS: all of the above ensures that swap provides strong ES + + + _ntl_zcopy(*a, &t); + _ntl_zcopy(*b, a); + _ntl_zcopy(t, b); + return; + } + + _ntl_swap(*a, *b); +} + +long +_ntl_ziszero( + _ntl_verylong a + ) +{ + if (!a) return (1); + if (a[1]) return (0); + if (a[0]==1) return (1); + return (0); +} + +long +_ntl_zodd( + _ntl_verylong a + ) +{ + if (!a) return (0); + return (a[1]&1); +} + + +long +_ntl_zbit( + _ntl_verylong a, + long p + ) +{ + long bl; + long wh; + long sa; + + if (p < 0 || !a) return 0; + bl = (p/NTL_NBITS); + wh = 1L << (p - NTL_NBITS*bl); + bl ++; + sa = a[0]; + if (sa < 0) sa = -sa; + if (sa < bl) return (0); + if (a[bl] & wh) return (1); + return (0); +} + + +void +_ntl_zlowbits( + _ntl_verylong a, + long b, + _ntl_verylong *cc + ) +{ + _ntl_verylong c; + long bl; + long wh; + long sa; + + if (_ntl_ziszero(a) || (b<=0)) { + _ntl_zzero(cc); + return; + } + + + bl = b/NTL_NBITS; + wh = b - NTL_NBITS*bl; + if (wh != 0) + bl++; + else + wh = NTL_NBITS; + + sa = a[0]; + if (sa < 0) sa = -sa; + + if (sa < bl) { + _ntl_zcopy(a,cc); + _ntl_zabs(cc); + return; + } + + c = *cc; + + /* a won't move if c aliases a */ + _ntl_zsetlength(&c, bl); + *cc = c; + + for (sa=1; sa1) && (!c[bl])) + bl --; + c[0] = bl; +} + + +long _ntl_zslowbits(_ntl_verylong a, long p) +{ + CRegister(x); + + if (p > NTL_BITS_PER_LONG) + p = NTL_BITS_PER_LONG; + + _ntl_zlowbits(a, p, &x); + + return _ntl_ztoint(x); +} + + + + +long +_ntl_zweights( + long aa + ) +{ + unsigned long a; + long res = 0; + + if (aa < 0) + a = - ((unsigned long) aa); + else + a = aa; + + while (a) { + if (a & 1) res ++; + a >>= 1; + } + return (res); +} + +long +_ntl_zweight( + _ntl_verylong a + ) +{ + long i; + long res = 0; + + if (!a) return (0); + i = a[0]; + if (i<0) i = -i; + for (;i;i--) + res += _ntl_zweights(a[i]); + return (res); +} + + + +void +_ntl_zand( + _ntl_verylong a, + _ntl_verylong b, + _ntl_verylong *cc + ) + +{ + _ntl_verylong c; + long sa; + long sb; + long sm; + long a_alias; + long b_alias; + + if (_ntl_ziszero(a) || _ntl_ziszero(b)) { + _ntl_zzero(cc); + return; + } + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + sa = a[0]; + if (sa < 0) sa = -sa; + + sb = b[0]; + if (sb < 0) sb = -sb; + + sm = (sa > sb ? sb : sa ); + + _ntl_zsetlength(&c, sm); + *cc = c; + if (a_alias) a = c; + if (b_alias) b = c; + + for (sa = 1; sa <= sm; sa ++) + c[sa] = a[sa] & b[sa]; + + while ((sm > 1) && (!(c[sm]))) + sm --; + c[0] = sm; +} + +void +_ntl_zxor( + _ntl_verylong a, + _ntl_verylong b, + _ntl_verylong *cc + ) +{ + _ntl_verylong c; + long sa; + long sb; + long sm; + long la; + long i; + long a_alias; + long b_alias; + + if (_ntl_ziszero(a)) { + _ntl_zcopy(b,cc); + _ntl_zabs(cc); + return; + } + + if (_ntl_ziszero(b)) { + _ntl_zcopy(a,cc); + _ntl_zabs(cc); + return; + } + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + sa = a[0]; + if (sa < 0) sa = -sa; + + sb = b[0]; + if (sb < 0) sb = -sb; + + if (sa > sb) { + la = sa; + sm = sb; + } else { + la = sb; + sm = sa; + } + + _ntl_zsetlength(&c, la); + *cc = c; + if (a_alias) a = c; + if (b_alias) b = c; + + for (i = 1; i <= sm; i ++) + c[i] = a[i] ^ b[i]; + + if (sa > sb) + for (;i <= la; i++) c[i] = a[i]; + else + for (;i <= la; i++) c[i] = b[i]; + + while ((la > 1) && (!(c[la]))) + la --; + c[0] = la; +} + +void +_ntl_zor( + _ntl_verylong a, + _ntl_verylong b, + _ntl_verylong *cc + ) +{ + _ntl_verylong c; + long sa; + long sb; + long sm; + long la; + long i; + long a_alias; + long b_alias; + + if (_ntl_ziszero(a)) { + _ntl_zcopy(b,cc); + _ntl_zabs(cc); + return; + } + + if (_ntl_ziszero(b)) { + _ntl_zcopy(a,cc); + _ntl_zabs(cc); + return; + } + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + sa = a[0]; + if (sa < 0) sa = -sa; + + sb = b[0]; + if (sb < 0) sb = -sb; + + if (sa > sb) { + la = sa; + sm = sb; + } else { + la = sb; + sm = sa; + } + + _ntl_zsetlength(&c, la); + *cc = c; + if (a_alias) a = c; + if (b_alias) b = c; + + for (i = 1; i <= sm; i ++) + c[i] = a[i] | b[i]; + + if (sa > sb) + for (;i <= la; i++) c[i] = a[i]; + else + for (;i <= la; i++) c[i] = b[i]; + + c[0] = la; +} + +long +_ntl_zsetbit( + _ntl_verylong *a, + long b + ) +{ + long bl; + long wh; + long sa; + + if (b<0) LogicError("_ntl_zsetbit: negative index"); + + if (_ntl_ziszero(*a)) { + _ntl_zintoz(1,a); + _ntl_zlshift(*a,b,a); + return (0); + } + + bl = (b/NTL_NBITS); + wh = 1L << (b - NTL_NBITS*bl); + bl ++; + sa = (*a)[0]; + if (sa<0) sa = -sa; + if (sa >= bl) { + sa = (*a)[bl] & wh; + (*a)[bl] |= wh; + if (sa) return (1); + return (0); + } else { + _ntl_zsetlength(a,bl); + sa ++; + for (;sa<=bl;sa++) (*a)[sa]=0; + if ((*a)[0] < 0) + (*a)[0] = -bl; + else (*a)[0] = bl; + (*a)[bl] |= wh; + return (0); + } +} + +long +_ntl_zswitchbit( + _ntl_verylong *a, + long p + ) +{ + long bl; + long wh; + long sa; + + if (p < 0) LogicError("_ntl_zswitchbit: negative index"); + + if (_ntl_ziszero(*a)) { + _ntl_zintoz(1,a); + _ntl_zlshift(*a,p,a); + return (0); + } + + bl = (p/NTL_NBITS); + wh = 1L << (p - NTL_NBITS*bl); + bl ++; + sa = (*a)[0]; + if (sa < 0) sa = -sa; + if ((sa < bl) || (!((*a)[bl] & wh))) { + _ntl_zsetbit(a,p); + return (0); + } + (*a)[bl] ^= wh; + while ((sa>1) && (!(*a)[sa])) + sa --; + if ((*a)[0] > 0) (*a)[0] = sa; + else (*a)[0] = -sa; + return (1); +} + + +long _ntl_zsize(_ntl_verylong rep) +{ + if (!rep || (rep[0] == 1 && rep[1] == 0)) + return 0; + else if (rep[0] < 0) + return -rep[0]; + else + return rep[0]; +} + + +long _ntl_zdigit(_ntl_verylong rep, long i) +{ + long sa; + if (i < 0 || !rep) return 0; + + sa = rep[0]; + if (sa < 0) sa = -sa; + if (i >= sa) return 0; + return rep[i+1]; +} + +long _ntl_zisone(_ntl_verylong rep) +{ + return rep != 0 && rep[0] == 1 && rep[1] == 1; +} + +long _ntl_zsptest(_ntl_verylong rep) +{ + return !rep || rep[0] == 1 || rep[0] == -1; +} + +long _ntl_zwsptest(_ntl_verylong rep) +{ + return !rep || rep[0] == 1 || rep[0] == -1; +} + +long _ntl_zcrtinrange(_ntl_verylong g, _ntl_verylong a) +{ + long sa, sg, carry, i, diff; + + if (!a || a[0] < 0 || (a[0] == 1 && a[1] == 0)) return 0; + + sa = a[0]; + + if (!g) return 1; + + sg = g[0]; + + if (sg == 1 && g[1] == 0) return 1; + + if (sg < 0) sg = -sg; + + if (sa-sg > 1) return 1; + + if (sa-sg < 0) return 0; + + carry=0; + + if (sa-sg == 1) { + if (a[sa] > 1) return 1; + carry = 1; + } + + i = sg; + diff = 0; + while (i > 0 && diff == 0) { + diff = (carry << (NTL_NBITS-1)) + (a[i] >> 1) - g[i]; + carry = (a[i] & 1); + i--; + } + + if (diff == 0) { + if (carry) return 1; + return (g[0] > 0); + } + else + return (diff > 0); +} + + + +void _ntl_zfrombytes(_ntl_verylong *x, const unsigned char *p, long n) +{ + long sz; + long i; + _ntl_verylong a; + long bitpos, wordpos, bitoffset, diff; + long nbits; + unsigned long carry, tmp; + + while (n > 0 && p[n-1] == 0) n--; + + if (n <= 0) { + _ntl_zzero(x); + return; + } + + + if (n > (NTL_MAX_LONG-(NTL_NBITS-1))/8) + ResourceError("ZZFromBytes: excessive length"); + + nbits = 0; + tmp = p[n-1]; + while (tmp) { + tmp >>= 1; + nbits++; + } + + sz = ((n-1)*8 + nbits + NTL_NBITS-1)/NTL_NBITS; + + _ntl_zsetlength(x, sz); + + a = *x; + + for (i = 1; i <= sz; i++) + a[i] = 0; + + carry = 0; + for (i = 0; i < n; i++) { + bitpos = i*8; + wordpos = bitpos/NTL_NBITS; + bitoffset = bitpos - wordpos*NTL_NBITS; + diff = NTL_NBITS-bitoffset; + + a[wordpos+1] |= carry | + ((( ((unsigned long)(p[i])) & 255UL ) << bitoffset) & NTL_RADIXM); + + carry = ( ((unsigned long)(p[i])) & 255UL ) >> diff; + } + + a[sz] |= carry; + a[0] = sz; +} + + +void _ntl_zbytesfromz(unsigned char *p, _ntl_verylong a, long nn) +{ + long k = _ntl_z2log(a); + long n = (k+7)/8; + long sz = _ntl_zsize(a); + long min_n = ((n < nn) ? n : nn); + + long i; + + for (i = 0; i < min_n; i++) { + long bitpos = i*8; + long wordpos = bitpos/NTL_NBITS; + long bitoffset = bitpos - wordpos*NTL_NBITS; + long diff; + + p[i] = (a[wordpos+1] >> bitoffset) & 255; + + diff = NTL_NBITS - bitoffset; + + if (diff < 8 && wordpos < sz-1) { + long msk = (1L << (8-diff))-1; + p[i] |= ((a[wordpos+2] & msk) << diff); + } + } + + for (i = min_n; i < nn; i++) + p[i] = 0; +} + + +long _ntl_zblock_construct_alloc(_ntl_verylong *x, long d, long n) +{ + long nwords, nbytes, AllocAmt, m, *p, *q, j; + + + /* check n value */ + + if (n <= 0) + LogicError("block construct: n must be positive"); + + /* check d value */ + + if (d <= 0) + LogicError("block construct: d must be positive"); + + if (NTL_OVERFLOW(d, NTL_NBITS, NTL_NBITS) || + NTL_OVERFLOW(d, sizeof(long), 3*sizeof(long))) + ResourceError("block construct: d too large"); + + nwords = d + 3; + nbytes = nwords*sizeof(long); + + AllocAmt = (NTL_MAX_ALLOC_BLOCK - sizeof(long)) / nbytes; + if (AllocAmt == 0) AllocAmt = 1; + + if (AllocAmt < n) + m = AllocAmt; + else + m = n; + + p = (long *) NTL_MALLOC(m, nbytes, sizeof(long)); + if (!p) MemoryError(); + + *p = m; + + q = p+2; + *x = q; + + for (j = 0; j < m; j++) { + q[-1] = ((d+1) << 1) | 1; + q[0] = 1; + q[1] = 0; + q += nwords; + } + + return m; +} + +void _ntl_zblock_construct_set(_ntl_verylong x, _ntl_verylong *y, long i) +{ + long d, size; + + d = (x[-1] >> 1) - 1; + size = d + 3; + + *y = x + i*size; +} + + +long _ntl_zblock_destroy(_ntl_verylong x) +{ + long m, *p; + + p = x - 2; + m = *p; + free(p); + return m; +} + + +long _ntl_zblock_storage(long d) +{ + long size = d+3; + return size * (sizeof (long)) + sizeof(_ntl_verylong); +} + + + + +/* The following routines provide special support for ZZ_pX + * arithmetic. */ + + + +/* this is a generic single-precision mul mod that will work + * on any platform */ + + +#define SP_MUL_MOD(r, a, b, n) \ +{ \ + long l__a = (a); \ + long l__b = (b); \ + long l__n = (n); \ + long l__q; \ + unsigned long l__res; \ + \ + l__q = (long) ((((double) l__a) * ((double) l__b)) / ((double) l__n)); \ + l__res = ((unsigned long) l__a)*((unsigned long) l__b) - \ + ((unsigned long) l__q)*((unsigned long) l__n); \ + if (l__res >> (NTL_BITS_PER_LONG-1)) \ + l__res += l__n; \ + else if (((long) l__res) >= l__n) \ + l__res -= l__n; \ + \ + r = (long) l__res; \ +} + + + + +static +void sp_ext_eucl(long *dd, long *ss, long *tt, long a, long b) +{ + long u, v, u0, v0, u1, v1, u2, v2, q, r; + + long aneg = 0, bneg = 0; + + if (a < 0) { + if (a < -NTL_MAX_LONG) ResourceError("integer overflow"); + a = -a; + aneg = 1; + } + + if (b < 0) { + if (b < -NTL_MAX_LONG) ResourceError("integer overflow"); + b = -b; + bneg = 1; + } + + u1=1; v1=0; + u2=0; v2=1; + u = a; v = b; + + while (v != 0) { + q = u / v; + r = u % v; + u = v; + v = r; + u0 = u2; + v0 = v2; + u2 = u1 - q*u2; + v2 = v1- q*v2; + u1 = u0; + v1 = v0; + } + + if (aneg) + u1 = -u1; + + if (bneg) + v1 = -v1; + + *dd = u; + *ss = u1; + *tt = v1; +} + + +static +long sp_inv_mod(long a, long n) +{ + long d, s, t; + + sp_ext_eucl(&d, &s, &t, a, n); + if (d != 1) ArithmeticError("inverse undefined"); + if (s < 0) + return s + n; + else + return s; +} + + +/* Data structures and algorithms for fast Chinese Remaindering */ + +/* these first few functions are just placeholders to make + * the interface consistent with the GMP interface. + */ + + +class _ntl_crt_struct_impl : public _ntl_crt_struct { +public: + Vec<_ntl_verylong_wrapped> v; + long sbuf; + long n; + + bool special(); + void insert(long i, NTL_verylong m); + _ntl_tmp_vec *extract(); + _ntl_tmp_vec *fetch(); + void eval(NTL_verylong *x, const long *b, + _ntl_tmp_vec *tmp_vec); +}; + + + +_ntl_crt_struct * +_ntl_crt_struct_build(long n, _ntl_verylong p, long (*primes)(long)) +{ + UniquePtr<_ntl_crt_struct_impl> res; + res.make(); + res->v.SetLength(n); + res->sbuf = p[0]+3; + res->n = n; + + return res.release(); +} + +bool _ntl_crt_struct_impl::special() { return false; } + +void _ntl_crt_struct_impl::insert(long i, _ntl_verylong m) +{ + _ntl_zcopy(m, &v[i]); +} + + + +void _ntl_crt_struct_impl::eval(_ntl_verylong *x, const long *b, + _ntl_tmp_vec *tmp_vec) +{ + _ntl_verylong xx, yy; + long i, sx; + + sx = sbuf; + + _ntl_zsetlength(x, sx); + xx = *x; + + + for (i = 1; i <= sx; i++) + xx[i] = 0; + + xx++; + + for (i = 0; i < n; i++) { + yy = v[i]; + + if (!yy || !b[i]) continue; + + zaddmul(b[i], xx, yy); + yy = xx + yy[0]; + + if ((*yy) >= NTL_RADIX) { + (*yy) -= NTL_RADIX; + yy++; + while ((*yy) == NTL_RADIX-1) { + *yy = 0; + yy++; + } + (*yy)++; + } + } + + xx--; + while (sx > 1 && xx[sx] == 0) sx--; + xx[0] = sx; +} + +_ntl_tmp_vec *_ntl_crt_struct_impl::extract() { return 0; } +_ntl_tmp_vec *_ntl_crt_struct_impl::fetch() { return 0; } + + + +/* Data structures and algorithms for multi-modulus remaindering */ + + + +class _ntl_rem_struct_impl_basic : public _ntl_rem_struct { +public: + long n; + Vec primes; + + void eval(long *x, _ntl_verylong a, _ntl_tmp_vec *tmp_vec); + _ntl_tmp_vec *fetch(); +}; + + + + + + + +#if (defined(NTL_TBL_REM) || defined(NTL_TBL_REM_LL)) + + +class _ntl_rem_struct_impl_tbl : public _ntl_rem_struct { +public: + long n; + Vec primes; + Unique2DArray tbl; + + void eval(long *x, _ntl_verylong a, _ntl_tmp_vec *tmp_vec); + _ntl_tmp_vec *fetch(); +}; + +#endif + + + +_ntl_rem_struct * +_ntl_rem_struct_build(long n, _ntl_verylong modulus, long (*p)(long)) +{ +#if (defined(NTL_TBL_REM) || defined(NTL_TBL_REM_LL)) + + /* we should not use this for extremely large moduli, + as the space is quadratic. On a 64-bit machine, + the bound of 1000 limits table size to about 4MB, + and allows moduli of up to about 25,000 bits. */ + + if (n < 1000) { + UniquePtr<_ntl_rem_struct_impl_tbl> res; + res.make(); + + + long i; + long t, t1, j, q; + + long sz = modulus[0]; + res->n = n; + res->primes.SetLength(n); + + for (i = 0; i < n; i++) + res->primes[i] = p(i); + + res->tbl.SetDims(n, sz); + + for (i = 0; i < n; i++) { + q = res->primes[i]; + t = (((long)1) << NTL_NBITS) % q; + t1 = 1; + res->tbl[i][0] = 1; + for (j = 1; j < sz; j++) { + SP_MUL_MOD(t1, t1, t, q); + res->tbl[i][j] = t1; + } + } + + return res.release(); + } + +#endif + + + { + UniquePtr<_ntl_rem_struct_impl_basic> res; + res.make(); + + long i; + + res->n = n; + res->primes.SetLength(n); + for (i = 0; i < n; i++) + res->primes[i] = p(i); + + return res.release(); + } +} + + + + +void _ntl_rem_struct_impl_basic::eval(long *x, _ntl_verylong a, _ntl_tmp_vec *tmp_vec) +{ + _ntl_zmultirem(a, n, &primes[0], x); +} + +_ntl_tmp_vec *_ntl_rem_struct_impl_basic::fetch() { return 0; } + + + + +#if (defined(NTL_TBL_REM) || defined(NTL_TBL_REM_LL)) +void _ntl_rem_struct_impl_tbl::eval(long *x, _ntl_verylong a, _ntl_tmp_vec *tmp_vec) +{ + multirem3(a, n, &primes[0], tbl.get(), x); +} + +_ntl_tmp_vec *_ntl_rem_struct_impl_tbl::fetch() { return 0; } +#endif + + + +void +_ntl_zaorsmul_1(_ntl_verylong x, long y, long sub, _ntl_verylong *ww) +{ + CRegister(tmp); + + if (y == 0) return; + + if (y == 1) { + if (sub) + _ntl_zsub(*ww, x, ww); + else + _ntl_zadd(*ww, x, ww); + return; + } + + if (y == -1) { + if (!sub) + _ntl_zsub(*ww, x, ww); + else + _ntl_zadd(*ww, x, ww); + return; + } + + _ntl_zsmul(x, y, &tmp); + if (sub) + _ntl_zsub(*ww, tmp, ww); + else + _ntl_zadd(*ww, tmp, ww); +} + + +void +_ntl_zsaddmul(_ntl_verylong x, long y, _ntl_verylong *ww) +{ + _ntl_zaorsmul_1(x, y, 0, ww); +} + +void +_ntl_zssubmul(_ntl_verylong x, long y, _ntl_verylong *ww) +{ + _ntl_zaorsmul_1(x, y, 1, ww); +} + + + + + + +void +_ntl_zaorsmul(_ntl_verylong x, _ntl_verylong y, long sub, _ntl_verylong *ww) +{ + CRegister(tmp); + + _ntl_zmul(x, y, &tmp); + if (sub) + _ntl_zsub(*ww, tmp, ww); + else + _ntl_zadd(*ww, tmp, ww); +} + + +void +_ntl_zaddmul(_ntl_verylong x, _ntl_verylong y, _ntl_verylong *ww) +{ + _ntl_zaorsmul(x, y, 0, ww); +} + +void +_ntl_zsubmul(_ntl_verylong x, _ntl_verylong y, _ntl_verylong *ww) +{ + _ntl_zaorsmul(x, y, 1, ww); +} + + +// boilerplate to provide compatible interface +class _ntl_reduce_struct_plain : public _ntl_reduce_struct { +public: + _ntl_verylong_wrapped N; + + void eval(_ntl_verylong *rres, _ntl_verylong *TT) + { + _ntl_zmod(*TT, N, rres); + } + + void adjust(_ntl_verylong *x) { } +}; + +_ntl_reduce_struct * +_ntl_reduce_struct_build(_ntl_verylong modulus, _ntl_verylong excess) +{ + UniquePtr<_ntl_reduce_struct_plain> C; + C.make(); + + _ntl_zcopy(modulus, &C->N); + + return C.release(); +} + + + + +// general preconditioned remainder + +struct _ntl_general_rem_one_struct { }; + +_ntl_general_rem_one_struct * +_ntl_general_rem_one_struct_build(long p) +{ + return 0; +} + +long +_ntl_general_rem_one_struct_apply(NTL_verylong a, long p, _ntl_general_rem_one_struct *pinfo) +{ + return _ntl_zsmod(a, p); +} + +void +_ntl_general_rem_one_struct_delete(_ntl_general_rem_one_struct *pinfo) +{ +} + + diff --git a/thirdparty/linux/ntl/src/cfile b/thirdparty/linux/ntl/src/cfile new file mode 100644 index 0000000000..bca8338894 --- /dev/null +++ b/thirdparty/linux/ntl/src/cfile @@ -0,0 +1,632 @@ + +#ifndef NTL_config__H +#define NTL_config__H + +/************************************************************************* + + NTL Configuration File + ---------------------- + +This file may be modified prior to building NTL so as to specify +some basic configuration options, and to customize +how code is generated so as to improve performance. + +The Basic Configuration Options must be set by hand. If you use the +configuration wizard, then these flags should be set before +the installation process begins; there values will be retained +by the wizard. + +The Performance Options can be set either by hand, by editing this +file, or (on most Unix platforms) can be set automatically using +the configuration wizard which runs when NTL is installed. + +All NTL header files include this file. +By setting these flags here, instead of on the compiler command line, +it is easier to guarantee that NTL library and client code use +consistent settings. + + + How to do it + ------------ + +To set a flag, just replace the pre-processor directive +'if 0' by 'if 1' for that flag, which causes the appropriate macro +to be defined. Of course, to unset a flag, just replace the +'if 1' by an 'if 0'. + +You can also do this more conveniently via the command line +using the configure script. + + + *************************************************************************/ + + + +/************************************************************************* + * + * Basic Configuration Options + * + *************************************************************************/ + + + /* None of these flags are set by the configuration wizard; + * they must be set by hand, before installation begins. + */ + + +#if @{NTL_LEGACY_NO_NAMESPACE} +#define NTL_LEGACY_NO_NAMESPACE + +/* + * By default, NTL components are declared inside the namespace NTL. + * Set this flag if you want to instead have these components + * declared in the global namespace. This is for backward + * compatibility only -- not recommended. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if @{NTL_LEGACY_INPUT_ERROR} +#define NTL_LEGACY_INPUT_ERROR + +/* + * Also for backward compatibility. Set if you want input + * operations to abort on error, instead of just setting the + * "fail bit" of the input stream. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + +#if @{NTL_DISABLE_TLS_HACK} +#define NTL_DISABLE_TLS_HACK + +/* Set if you want to compile NTL without "TLS hack" + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if @{NTL_ENABLE_TLS_HACK} +#define NTL_ENABLE_TLS_HACK + +/* Set if you want to compile NTL with "TLS hack" + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if @{NTL_THREADS} +#define NTL_THREADS + +/* Set if you want to compile NTL as a thread-safe library. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if @{NTL_EXCEPTIONS} +#define NTL_EXCEPTIONS + +/* Set if you want to compile NTL with exceptions enabled + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if @{NTL_THREAD_BOOST} +#define NTL_THREAD_BOOST + +/* Set if you want to compile NTL to exploit threads internally. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif +# + +#if @{NTL_GMP_LIP} +#define NTL_GMP_LIP + +/* + * Use this flag if you want to use GMP as the long integer package. + * This can result in significantly faster code on some platforms. + * It requires that the GMP package (version >= 3.1) has already been + * installed. You will also have to set the variables GMP_OPT_INCDIR, + * GMP_OPT_LIBDIR, GMP_OPT_LIB in the makefile (these are set automatically + * by the confiuration script when you pass the flag NTL_GMP_LIP=on + * to that script. + * + * Beware that setting this flag can break some very old NTL codes. + * + * To re-build after changing this flag: + * rm *.o; make setup3; make ntl.a + * You may also have to edit the makefile to modify the variables + * GMP_OPT_INCDIR, GMP_OPT_LIBDIR, and GMP_OPT_LIB. + */ + +#endif + +#if @{NTL_GF2X_LIB} +#define NTL_GF2X_LIB + +/* + * Use this flag if you want to use the gf2x library for + * faster GF2X arithmetic. + * This can result in significantly faster code, especially + * when working with polynomials of huge degree. + * You will also have to set the variables GF2X_OPT_INCDIR, + * GF2X_OPT_LIBDIR, GF2X_OPT_LIB in the makefile (these are set automatically + * by the confiuration script when you pass the flag NTL_GF2X_LIB=on + * to that script. + * + * To re-build after changing this flag: + * rm GF2X.o; GF2X1.o; make ntl.a + * You may also have to edit the makefile to modify the variables + * GF2X_OPT_INCDIR, GF2X_OPT_LIBDIR, and GF2X_OPT_LIB. + */ + +#endif + + +#if @{FLAG_LONG_LONG_TYPE} +#define NTL_LONG_LONG_TYPE @{NTL_LONG_LONG_TYPE} + +/* + * If you set the flag NTL_LONG_LONG, then the value of + * NTL_LONG_LONG_TYPE will be used + * to declare 'double word' signed integer types. + * Irrelevant when NTL_GMP_LIP is set. + * If left undefined, some "ifdef magic" will attempt + * to find the best choice for your platform, depending + * on the compiler and wordsize. On 32-bit machines, + * this is usually 'long long'. + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#endif + + +#if @{FLAG_UNSIGNED_LONG_LONG_TYPE} +#define NTL_UNSIGNED_LONG_LONG_TYPE @{NTL_UNSIGNED_LONG_LONG_TYPE} + +/* + * If you set the flag NTL_SPMM_ULL, then the value of + * NTL_UNSIGNED_LONG_LONG_TYPE will be used + * to declare 'double word' unsigned integer types. + * If left undefined, some "ifdef magic" will attempt + * to find the best choice for your platform, depending + * on the compiler and wordsize. On 32-bit machines, + * this is usually 'unsigned long long'. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if @{NTL_CLEAN_INT} +#define NTL_CLEAN_INT + +/* + * This will disallow the use of some non-standard integer arithmetic + * that may improve performance somewhat. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if @{NTL_CLEAN_PTR} +#define NTL_CLEAN_PTR + +/* + * This will disallow the use of some non-standard pointer arithmetic + * that may improve performance somewhat. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if @{NTL_RANGE_CHECK} +#define NTL_RANGE_CHECK + +/* + * This will generate vector subscript range-check code. + * Useful for debugging, but it slows things down of course. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + + + + +#if @{NTL_NO_INIT_TRANS} +#define NTL_NO_INIT_TRANS + +/* + * Without this flag, NTL uses a special code sequence to avoid + * copying large objects in return statements. However, if your + * compiler optimizes away the return of a *named* local object, + * this is not necessary, and setting this flag will result + * in *slightly* more compact and efficient code. Although + * the emeriging C++ standard allows compilers to perform + * this optimization, I know of none that currently do. + * Most will avoid copying *temporary* objects in return statements, + * and NTL's default code sequence exploits this fact. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if @{NTL_X86_FIX} +#define NTL_X86_FIX + +/* + * Forces the "x86 floating point fix", overriding the default behavior. + * By default, NTL will apply the "fix" if it looks like it is + * necessary, and if knows how to fix it. + * The problem addressed here is that x86 processors sometimes + * run in a mode where FP registers have more precision than doubles. + * This will cause code in quad_float.c some trouble. + * NTL can normally correctly detect the problem, and fix it, + * so you shouldn't need to worry about this or the next flag. + + * To re-build after changing this flag: rm quad_float.o; make ntl.a + * + */ + +#elif @{NTL_NO_X86_FIX} +#define NTL_NO_X86_FIX +/* + * Forces no "x86 floating point fix", overriding the default behavior. + + * To re-build after changing this flag: rm quad_float.o; make ntl.a + */ + +#endif + + + +#if @{NTL_LEGACY_SP_MULMOD} +#define NTL_LEGACY_SP_MULMOD + +/* Forces legacy single-precision MulMod implementation. + */ + +#endif + + +#if @{NTL_DISABLE_LONGDOUBLE} +#define NTL_DISABLE_LONGDOUBLE + +/* Explicitly disables us of long double arithmetic + */ + +#endif + + +#if @{NTL_DISABLE_LONGLONG} +#define NTL_DISABLE_LONGLONG + +/* Explicitly disables us of long long arithmetic + */ + +#endif + +#if @{NTL_DISABLE_LL_ASM} +#define NTL_DISABLE_LL_ASM + +/* Explicitly disables us of inline assembly as a replacement + * for long lobg arithmetic. + */ + +#endif + + +#if @{NTL_MAXIMIZE_SP_NBITS} +#define NTL_MAXIMIZE_SP_NBITS + +/* Allows for 62-bit single-precision moduli on 64-bit platforms. + * By default, such moduli are restricted to 60 bits, which + * usually gives slightly better performance across a range of + * of parameters. + */ + +#endif + +/************************************************************************* + * + * Performance Options + * + *************************************************************************/ + + +/* One can choose one of three different stragtegies for long integer + * arithmetic: the default, NTL_LONG_LONG, or NTL_AVOID_FLOAT. + * The configuration wizard will choose among them. + * + */ + +#if @{NTL_LONG_LONG} +#define NTL_LONG_LONG + +/* + * + * For platforms that support it, this flag can be set to cause + * the low-level multiplication code to use the type "long long", + * which may yield a significant performance gain, + * but on others, it can yield no improvement and can even + * slow things down. + * + * + * See below (NTL_LONG_LONG_TYPE) for how to use a type name + * other than "long long". + * + * If you set NTL_LONG_LONG, you might also want to set + * the flag NTL_TBL_REM (see below). + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#elif @{NTL_AVOID_FLOAT} +#define NTL_AVOID_FLOAT + +/* + * + * On machines with slow floating point or---more comminly---slow int/float + * conversions, this flag can lead to faster code. + * + * If you set NTL_AVOID_FLOAT, you should probably also + * set NTL_TBL_REM (see below). + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#endif + + +/* There are three strategies to implmement single-precision + * modular multiplication with precondinition (see the MulModPrecon + * function in the ZZ module): the default, and NTL_SPMM_ULL, + * and NTL_SPMM_ASM. + * This plays a crucial role in the "small prime FFT" used to + * implement polynomial arithmetic, and in other CRT-based methods + * (such as linear algebra over ZZ), as well as polynomial and matrix + * arithmetic over zz_p. + */ + + + +#if @{NTL_SPMM_ULL} +#define NTL_SPMM_ULL + +/* This also causes an "all integer" + * implementation of MulModPrecon to be used. + * It us usually a faster implementation, + * but it is not enturely portable. + * It relies on double-word unsigned multiplication + * (see NTL_UNSIGNED_LONG_LONG_TYPE above). + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#elif @{NTL_SPMM_ASM} +#define NTL_SPMM_ASM + +/* Like this previous flag, this also causes an "all integer" + * implementation of MulModPrecon to be used. + * It relies assembler code to do double-word unsigned multiplication. + * This is only supported on a select mechines under GCC. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + + +/* + * The following two flags provide additional control for how the + * FFT modulo single-precision primes is implemented. + */ + +#if @{NTL_FFT_BIGTAB} +#define NTL_FFT_BIGTAB + +/* + * Precomputed tables are used to store all the roots of unity + * used in FFT computations. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + +#if @{NTL_FFT_LAZYMUL} +#define NTL_FFT_LAZYMUL + +/* + * This flag only has an effect when combined with + * either the NTL_SPMM_ULL or NTL_SPMM_ASM flags. + * When set, a "lazy multiplication" strategy due to David Harvey: + * see his paper "FASTER ARITHMETIC FOR NUMBER-THEORETIC TRANSFORMS". + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + + + + +/* The next six flags NTL_AVOID_BRANCHING, NTL_TBL_REM, NTL_TBL_REM_LL, + * NTL_GF2X_ALTCODE, NTL_GF2X_ALTCODE1, and NTL_GF2X_NOINLINE + * are also set by the configuration wizard. + */ + + + +#if @{NTL_AVOID_BRANCHING} +#define NTL_AVOID_BRANCHING + +/* + * With this option, branches are replaced at several + * key points with equivalent code using shifts and masks. + * It may speed things up on machines with + * deep pipelines and high branch penalities. + * This flag mainly affects the implementation of the + * single-precision modular arithmetic routines. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + + +#if @{NTL_TBL_REM} +#define NTL_TBL_REM + +/* + * + * With this flag, some divisions are avoided in the + * ZZ_pX multiplication routines. If you use the NTL_AVOID_FLOAT + * or NTL_LONG_LONG flags, then you should probably use this one too. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if @{NTL_TBL_REM_LL} +#define NTL_TBL_REM_LL + +/* + * + * This forces the LONG_LONG version if TBL_REM + * + * Irrelevent when NTL_GMP_LIP is set. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if @{NTL_CRT_ALTCODE} +#define NTL_CRT_ALTCODE + +/* + * Employs an alternative CRT strategy. + * Only relevant with GMP. + * Seems to be marginally faster on some x86_64 platforms. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + +#if @{NTL_CRT_ALTCODE_SMALL} +#define NTL_CRT_ALTCODE_SMALL + +/* + * Employs an alternative CRT strategy for small moduli. + * Only relevant with GMP. + * Seems to be marginally faster on some x86_64 platforms. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if @{NTL_GF2X_ALTCODE} +#define NTL_GF2X_ALTCODE + +/* + * With this option, the default strategy for implmenting low-level + * GF2X multiplication is replaced with an alternative strategy. + * This alternative strategy seems to work better on RISC machines + * with deep pipelines and high branch penalties (like a powerpc), + * but does no better (or even worse) on x86s. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + +#elif @{NTL_GF2X_ALTCODE1} +#define NTL_GF2X_ALTCODE1 + + +/* + * Yest another alternative strategy for implementing GF2X + * multiplication. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + + +#endif + +#if @{NTL_GF2X_NOINLINE} +#define NTL_GF2X_NOINLINE + +/* + * By default, the low-level GF2X multiplication routine in inlined. + * This can potentially lead to some trouble on some platforms, + * and you can override the default by setting this flag. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + +#endif + + +#if @{NTL_PCLMUL} +#define NTL_PCLMUL + +/* + * Use this flag for faster GF2X arithmetc. + * This enables the use of the PCLMUL instruction on x86-64 + * machines. + * + * To re-build after changing this flag: + * rm GF2X.o; make ntl.a + */ + +#endif + + + +@{WIZARD_HACK} + + +#endif diff --git a/thirdparty/linux/ntl/src/cfileout b/thirdparty/linux/ntl/src/cfileout new file mode 100644 index 0000000000..aaf48aa5a6 --- /dev/null +++ b/thirdparty/linux/ntl/src/cfileout @@ -0,0 +1,632 @@ + +#ifndef NTL_config__H +#define NTL_config__H + +/************************************************************************* + + NTL Configuration File + ---------------------- + +This file may be modified prior to building NTL so as to specify +some basic configuration options, and to customize +how code is generated so as to improve performance. + +The Basic Configuration Options must be set by hand. If you use the +configuration wizard, then these flags should be set before +the installation process begins; there values will be retained +by the wizard. + +The Performance Options can be set either by hand, by editing this +file, or (on most Unix platforms) can be set automatically using +the configuration wizard which runs when NTL is installed. + +All NTL header files include this file. +By setting these flags here, instead of on the compiler command line, +it is easier to guarantee that NTL library and client code use +consistent settings. + + + How to do it + ------------ + +To set a flag, just replace the pre-processor directive +'if 0' by 'if 1' for that flag, which causes the appropriate macro +to be defined. Of course, to unset a flag, just replace the +'if 1' by an 'if 0'. + +You can also do this more conveniently via the command line +using the configure script. + + + *************************************************************************/ + + + +/************************************************************************* + * + * Basic Configuration Options + * + *************************************************************************/ + + + /* None of these flags are set by the configuration wizard; + * they must be set by hand, before installation begins. + */ + + +#if 0 +#define NTL_LEGACY_NO_NAMESPACE + +/* + * By default, NTL components are declared inside the namespace NTL. + * Set this flag if you want to instead have these components + * declared in the global namespace. This is for backward + * compatibility only -- not recommended. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_LEGACY_INPUT_ERROR + +/* + * Also for backward compatibility. Set if you want input + * operations to abort on error, instead of just setting the + * "fail bit" of the input stream. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + +#if 0 +#define NTL_DISABLE_TLS_HACK + +/* Set if you want to compile NTL without "TLS hack" + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_ENABLE_TLS_HACK + +/* Set if you want to compile NTL with "TLS hack" + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_THREADS + +/* Set if you want to compile NTL as a thread-safe library. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_EXCEPTIONS + +/* Set if you want to compile NTL with exceptions enabled + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_THREAD_BOOST + +/* Set if you want to compile NTL to exploit threads internally. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif +# + +#if 1 +#define NTL_GMP_LIP + +/* + * Use this flag if you want to use GMP as the long integer package. + * This can result in significantly faster code on some platforms. + * It requires that the GMP package (version >= 3.1) has already been + * installed. You will also have to set the variables GMP_OPT_INCDIR, + * GMP_OPT_LIBDIR, GMP_OPT_LIB in the makefile (these are set automatically + * by the confiuration script when you pass the flag NTL_GMP_LIP=on + * to that script. + * + * Beware that setting this flag can break some very old NTL codes. + * + * To re-build after changing this flag: + * rm *.o; make setup3; make ntl.a + * You may also have to edit the makefile to modify the variables + * GMP_OPT_INCDIR, GMP_OPT_LIBDIR, and GMP_OPT_LIB. + */ + +#endif + +#if 0 +#define NTL_GF2X_LIB + +/* + * Use this flag if you want to use the gf2x library for + * faster GF2X arithmetic. + * This can result in significantly faster code, especially + * when working with polynomials of huge degree. + * You will also have to set the variables GF2X_OPT_INCDIR, + * GF2X_OPT_LIBDIR, GF2X_OPT_LIB in the makefile (these are set automatically + * by the confiuration script when you pass the flag NTL_GF2X_LIB=on + * to that script. + * + * To re-build after changing this flag: + * rm GF2X.o; GF2X1.o; make ntl.a + * You may also have to edit the makefile to modify the variables + * GF2X_OPT_INCDIR, GF2X_OPT_LIBDIR, and GF2X_OPT_LIB. + */ + +#endif + + +#if 0 +#define NTL_LONG_LONG_TYPE long long + +/* + * If you set the flag NTL_LONG_LONG, then the value of + * NTL_LONG_LONG_TYPE will be used + * to declare 'double word' signed integer types. + * Irrelevant when NTL_GMP_LIP is set. + * If left undefined, some "ifdef magic" will attempt + * to find the best choice for your platform, depending + * on the compiler and wordsize. On 32-bit machines, + * this is usually 'long long'. + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_UNSIGNED_LONG_LONG_TYPE unsigned long long + +/* + * If you set the flag NTL_SPMM_ULL, then the value of + * NTL_UNSIGNED_LONG_LONG_TYPE will be used + * to declare 'double word' unsigned integer types. + * If left undefined, some "ifdef magic" will attempt + * to find the best choice for your platform, depending + * on the compiler and wordsize. On 32-bit machines, + * this is usually 'unsigned long long'. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_CLEAN_INT + +/* + * This will disallow the use of some non-standard integer arithmetic + * that may improve performance somewhat. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_CLEAN_PTR + +/* + * This will disallow the use of some non-standard pointer arithmetic + * that may improve performance somewhat. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_RANGE_CHECK + +/* + * This will generate vector subscript range-check code. + * Useful for debugging, but it slows things down of course. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + + + + +#if 0 +#define NTL_NO_INIT_TRANS + +/* + * Without this flag, NTL uses a special code sequence to avoid + * copying large objects in return statements. However, if your + * compiler optimizes away the return of a *named* local object, + * this is not necessary, and setting this flag will result + * in *slightly* more compact and efficient code. Although + * the emeriging C++ standard allows compilers to perform + * this optimization, I know of none that currently do. + * Most will avoid copying *temporary* objects in return statements, + * and NTL's default code sequence exploits this fact. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_X86_FIX + +/* + * Forces the "x86 floating point fix", overriding the default behavior. + * By default, NTL will apply the "fix" if it looks like it is + * necessary, and if knows how to fix it. + * The problem addressed here is that x86 processors sometimes + * run in a mode where FP registers have more precision than doubles. + * This will cause code in quad_float.c some trouble. + * NTL can normally correctly detect the problem, and fix it, + * so you shouldn't need to worry about this or the next flag. + + * To re-build after changing this flag: rm quad_float.o; make ntl.a + * + */ + +#elif 0 +#define NTL_NO_X86_FIX +/* + * Forces no "x86 floating point fix", overriding the default behavior. + + * To re-build after changing this flag: rm quad_float.o; make ntl.a + */ + +#endif + + + +#if 0 +#define NTL_LEGACY_SP_MULMOD + +/* Forces legacy single-precision MulMod implementation. + */ + +#endif + + +#if 0 +#define NTL_DISABLE_LONGDOUBLE + +/* Explicitly disables us of long double arithmetic + */ + +#endif + + +#if 0 +#define NTL_DISABLE_LONGLONG + +/* Explicitly disables us of long long arithmetic + */ + +#endif + +#if 0 +#define NTL_DISABLE_LL_ASM + +/* Explicitly disables us of inline assembly as a replacement + * for long lobg arithmetic. + */ + +#endif + + +#if 0 +#define NTL_MAXIMIZE_SP_NBITS + +/* Allows for 62-bit single-precision moduli on 64-bit platforms. + * By default, such moduli are restricted to 60 bits, which + * usually gives slightly better performance across a range of + * of parameters. + */ + +#endif + +/************************************************************************* + * + * Performance Options + * + *************************************************************************/ + + +/* One can choose one of three different stragtegies for long integer + * arithmetic: the default, NTL_LONG_LONG, or NTL_AVOID_FLOAT. + * The configuration wizard will choose among them. + * + */ + +#if 0 +#define NTL_LONG_LONG + +/* + * + * For platforms that support it, this flag can be set to cause + * the low-level multiplication code to use the type "long long", + * which may yield a significant performance gain, + * but on others, it can yield no improvement and can even + * slow things down. + * + * + * See below (NTL_LONG_LONG_TYPE) for how to use a type name + * other than "long long". + * + * If you set NTL_LONG_LONG, you might also want to set + * the flag NTL_TBL_REM (see below). + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#elif 0 +#define NTL_AVOID_FLOAT + +/* + * + * On machines with slow floating point or---more comminly---slow int/float + * conversions, this flag can lead to faster code. + * + * If you set NTL_AVOID_FLOAT, you should probably also + * set NTL_TBL_REM (see below). + * + * To re-build after changing this flag: rm lip.o; make ntl.a + */ + +#endif + + +/* There are three strategies to implmement single-precision + * modular multiplication with precondinition (see the MulModPrecon + * function in the ZZ module): the default, and NTL_SPMM_ULL, + * and NTL_SPMM_ASM. + * This plays a crucial role in the "small prime FFT" used to + * implement polynomial arithmetic, and in other CRT-based methods + * (such as linear algebra over ZZ), as well as polynomial and matrix + * arithmetic over zz_p. + */ + + + +#if 0 +#define NTL_SPMM_ULL + +/* This also causes an "all integer" + * implementation of MulModPrecon to be used. + * It us usually a faster implementation, + * but it is not enturely portable. + * It relies on double-word unsigned multiplication + * (see NTL_UNSIGNED_LONG_LONG_TYPE above). + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#elif 0 +#define NTL_SPMM_ASM + +/* Like this previous flag, this also causes an "all integer" + * implementation of MulModPrecon to be used. + * It relies assembler code to do double-word unsigned multiplication. + * This is only supported on a select mechines under GCC. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + + +/* + * The following two flags provide additional control for how the + * FFT modulo single-precision primes is implemented. + */ + +#if 0 +#define NTL_FFT_BIGTAB + +/* + * Precomputed tables are used to store all the roots of unity + * used in FFT computations. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + +#if 0 +#define NTL_FFT_LAZYMUL + +/* + * This flag only has an effect when combined with + * either the NTL_SPMM_ULL or NTL_SPMM_ASM flags. + * When set, a "lazy multiplication" strategy due to David Harvey: + * see his paper "FASTER ARITHMETIC FOR NUMBER-THEORETIC TRANSFORMS". + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + + +#endif + + + + + +/* The next six flags NTL_AVOID_BRANCHING, NTL_TBL_REM, NTL_TBL_REM_LL, + * NTL_GF2X_ALTCODE, NTL_GF2X_ALTCODE1, and NTL_GF2X_NOINLINE + * are also set by the configuration wizard. + */ + + + +#if 0 +#define NTL_AVOID_BRANCHING + +/* + * With this option, branches are replaced at several + * key points with equivalent code using shifts and masks. + * It may speed things up on machines with + * deep pipelines and high branch penalities. + * This flag mainly affects the implementation of the + * single-precision modular arithmetic routines. + * + * To re-build after changing this flag: rm *.o; make ntl.a + */ + +#endif + + + +#if 0 +#define NTL_TBL_REM + +/* + * + * With this flag, some divisions are avoided in the + * ZZ_pX multiplication routines. If you use the NTL_AVOID_FLOAT + * or NTL_LONG_LONG flags, then you should probably use this one too. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_TBL_REM_LL + +/* + * + * This forces the LONG_LONG version if TBL_REM + * + * Irrelevent when NTL_GMP_LIP is set. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_CRT_ALTCODE + +/* + * Employs an alternative CRT strategy. + * Only relevant with GMP. + * Seems to be marginally faster on some x86_64 platforms. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + +#if 0 +#define NTL_CRT_ALTCODE_SMALL + +/* + * Employs an alternative CRT strategy for small moduli. + * Only relevant with GMP. + * Seems to be marginally faster on some x86_64 platforms. + * + * To re-build after changing this flag: + * rm lip.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_GF2X_ALTCODE + +/* + * With this option, the default strategy for implmenting low-level + * GF2X multiplication is replaced with an alternative strategy. + * This alternative strategy seems to work better on RISC machines + * with deep pipelines and high branch penalties (like a powerpc), + * but does no better (or even worse) on x86s. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + +#elif 0 +#define NTL_GF2X_ALTCODE1 + + +/* + * Yest another alternative strategy for implementing GF2X + * multiplication. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + + +#endif + +#if 0 +#define NTL_GF2X_NOINLINE + +/* + * By default, the low-level GF2X multiplication routine in inlined. + * This can potentially lead to some trouble on some platforms, + * and you can override the default by setting this flag. + * + * To re-build after changing this flag: rm GF2X.o; make ntl.a + */ + +#endif + + +#if 0 +#define NTL_PCLMUL + +/* + * Use this flag for faster GF2X arithmetc. + * This enables the use of the PCLMUL instruction on x86-64 + * machines. + * + * To re-build after changing this flag: + * rm GF2X.o; make ntl.a + */ + +#endif + + + + + + +#endif diff --git a/thirdparty/linux/ntl/src/configure b/thirdparty/linux/ntl/src/configure new file mode 100644 index 0000000000..4edfc6ff37 --- /dev/null +++ b/thirdparty/linux/ntl/src/configure @@ -0,0 +1,19 @@ +#!/bin/sh + +# This is just a shell script that calls perl. +# Since perl may be located in a wierd place, this +# should be more portable than using a direct "shebang". + +# Also, some shells do not handle "$@" correctly when +# no options are supplied, so this is handled as a special case. + + + + +if test $# -ne 0 +then + perl DoConfig "$@" +else + perl DoConfig +fi + diff --git a/thirdparty/linux/ntl/src/ctools.c b/thirdparty/linux/ntl/src/ctools.c new file mode 100644 index 0000000000..d3626d64dd --- /dev/null +++ b/thirdparty/linux/ntl/src/ctools.c @@ -0,0 +1,92 @@ + +#include + + +#include +#include + +using namespace std; + + +/* + * An IEEE double x is finite if and only if x - x == 0. + * The function _ntl_IsFinite implements this logic; however, + * it does not completely trust that an optimizing compiler + * really implements this correctly, and so it goes out of its way to + * confuse the compiler. For a good compiler that respects IEEE floating + * point arithmetic, this may not be necessary, but it is better + * to be a bit paranoid. + * + * Like the routine _ntl_ForceToMem below, this routine has the + * side effect of forcing its argument into memory. + */ + +NTL_CHEAP_THREAD_LOCAL volatile double _ntl_IsFinite__local = 0; + +long _ntl_IsFinite(double *p) +{ + _ntl_IsFinite__local = *p; + double x1 = _ntl_IsFinite__local; + double x2 = _ntl_IsFinite__local; + double x3 = x1-x2; + if (x3 != 0.0) return 0; + return 1; +} + + +/* + * On machines with wide floating point registers, the routine _ntl_ForceToMem + * is used to force a floating point double to a memory location. + * This relies on "separate compilation" model, so that optimizing + * compilers cannot "optimize away" the whole thing. + */ + + +#if (NTL_EXT_DOUBLE) + +void _ntl_ForceToMem(double *p) +{ + _ntl_IsFinite__local = *p; + *p = _ntl_IsFinite__local; +} + + +#else + +void _ntl_ForceToMem(double *p) +{ } + +#endif + + + +/* + * The routine _ntl_ldexp(x, e) is like the standard ldexp(x, e) routine, + * except that it takes a long exponent e, rather than an int exponenet. + * Some care is taken to ensure reasonable overflow/undeflow behavior. + * If the value of e does not fit into an int, then the result + * is x*infinity or x*0, as appropriate. + * Of course, this can only happen on platforms where long is wider + * than int (e.g., most 64-bit platforms). + * + * We go out of our way to hide the fact that we are multiplying/dividing + * by zero, so as to avoid unnecessary warnings, and to prevent + * overly-agressive optimizing compilers from screwing things up. + */ + +NTL_CHEAP_THREAD_LOCAL volatile double _ntl_ldexp_zero = 0.0; + +double _ntl_ldexp(double x, long e) +{ + if (e > NTL_MAX_INT) + return x/_ntl_ldexp_zero; + else if (e < NTL_MIN_INT) + return x*_ntl_ldexp_zero; + else + return ldexp(x, ((int) e)); +} + + + + + diff --git a/thirdparty/linux/ntl/src/def_makefile b/thirdparty/linux/ntl/src/def_makefile new file mode 100644 index 0000000000..9412b05455 --- /dev/null +++ b/thirdparty/linux/ntl/src/def_makefile @@ -0,0 +1,575 @@ +############################################################### +# +# First, choose a C++ compiler, and set compiler flags. +# This is done by setting the variables CXX and CXXFLAGS. +# +############################################################### + + + +CXX=g++ +# A C++ compiler, e.g., g++, CC, xlC + + +CXXFLAGS=-g -O2 +# Flags for the C++ compiler + +CXXAUTOFLAGS= +# Flags for the C++ compiler, automatically generated by configuration script + + +AR=ar +# command to make a library + +ARFLAGS=ruv +# arguments for AR + +RANLIB=ranlib +# set to echo if you want to disable it completely + +LDFLAGS= +# libraries for linking C++ programs + +LDLIBS=-lm +# libraries for linking C++ programs + +CPPFLAGS= +# arguments for the C preprocessor + +LIBTOOL=libtool +# libtool command + +DEF_PREFIX=/usr/local + +PREFIX=$(DEF_PREFIX) +LIBDIR=$(PREFIX)/lib +INCLUDEDIR=$(PREFIX)/include +DOCDIR=$(PREFIX)/share/doc +# where to install NTL + +############################################################### +# +# Second, if you want to use GMP (the GNU Multi-Precision library), +# define the variables GMP_OPT_INCDIR, GMP_OPT_LIBDIR, GMP_OPT_LIB below. +# You also will have to set either NTL_GMP_LIP or NTL_GMP_HACK +# in the config.h file. +# +# Using GMP can lead to significant performance gains on some +# platforms. You can obtain GMP from http://www.swox.com/gmp. +# Once you unpack it into a directory, just execute +# ./configure; make +# in that directory. +# +############################################################### + + +GMP_PREFIX=$(DEF_PREFIX) + +GMP_INCDIR=$(GMP_PREFIX)/include +# directory containing gmp.h if using GMP + +GMP_LIBDIR=$(GMP_PREFIX)/lib +# directory containing libgmp.a if using GMP + +GMP_OPT_INCDIR=# -I$(GMP_INCDIR) # GMPI +GMP_OPT_LIBDIR=# -L$(GMP_LIBDIR) # GMPL +GMP_OPT_LIB=-lgmp # GMP +# uncomment these if using GMP + + +############################################################### +# +# Third, if you want to use gf2x (a library for fast +# multiplication over GF(2)[X]), you need to +# define the variables GF2X_OPT_INCDIR, GF2X_OPT_LIBDIR, GF2X_OPT_LIB below. +# You also will have to set NTL_GF2X_LIB +# in the config.h file. +# +############################################################### + +GF2X_PREFIX=$(DEF_PREFIX) + +GF2X_INCDIR=$(GF2X_PREFIX)/include +# directory containing gf2x.h if using gf2x + +GF2X_LIBDIR=$(GF2X_PREFIX)/lib +# directory containing libgf2x.a + +GF2X_OPT_INCDIR=# -I$(GF2X_INCDIR) # GF2X +GF2X_OPT_LIBDIR=# -L$(GF2X_LIBDIR) # GF2X +GF2X_OPT_LIB=# -lgf2x # GF2X +# uncomment these if using gf2x + + +############################################################### +# +# Fourth, if you do not want to run the wizard that automagically +# sets some performace related flags in config.h, set the flag below. +# +############################################################### + + +WIZARD=on +# Set to off if you want to bypass the wizard; otherwise, set to on. + + +################################################################# +# +# That's it! You can ignore everything else in this file! +# +################################################################# + + +# object files + +O01=FFT.o FacVec.o GF2.o GF2E.o GF2EX.o GF2EXFactoring.o GF2X.o GF2X1.o +O02=$(O01) GF2XFactoring.o GF2XVec.o GetTime.o GetPID.o HNF.o ctools.o LLL.o +O03=$(O02) LLL_FP.o LLL_QP.o LLL_RR.o LLL_XD.o RR.o WordVector.o ZZ.o ZZVec.o +O04=$(O03) ZZX.o ZZX1.o ZZXCharPoly.o ZZXFactoring.o ZZ_p.o ZZ_pE.o ZZ_pEX.o +O05=$(O04) ZZ_pEXFactoring.o ZZ_pX.o ZZ_pX1.o ZZ_pXCharPoly.o ZZ_pXFactoring.o +O06=$(O05) fileio.o lip.o lzz_p.o lzz_pE.o lzz_pEX.o lzz_pEXFactoring.o +O07=$(O06) lzz_pX.o lzz_pX1.o lzz_pXCharPoly.o lzz_pXFactoring.o +O08=$(O07) mat_GF2.o mat_GF2E.o mat_RR.o mat_ZZ.o mat_ZZ_p.o +O09=$(O08) mat_ZZ_pE.o mat_lzz_p.o mat_lzz_pE.o mat_poly_ZZ.o +O10=$(O09) mat_poly_ZZ_p.o mat_poly_lzz_p.o +O11=$(O10) +O12=$(O11) +O13=$(O12) quad_float.o tools.o vec_GF2.o vec_GF2E.o +O14=$(O13) vec_RR.o vec_ZZ.o vec_ZZ_p.o vec_ZZ_pE.o +O15=$(O14) vec_lzz_p.o vec_lzz_pE.o +O16=$(O15) +O17=$(O16) +O18=$(O17) xdouble.o +O19=$(O18) G_LLL_FP.o G_LLL_QP.o G_LLL_XD.o G_LLL_RR.o thread.o BasicThreadPool.o + +OBJ=$(O19) + +# library source files + + +S01=FFT.c FacVec.c GF2.c GF2E.c GF2EX.c GF2EXFactoring.c GF2X.c GF2X1.c +S02=$(S01) GF2XFactoring.c GF2XVec.c HNF.c ctools.c LLL.c LLL_FP.c LLL_QP.c +S03=$(S02) LLL_RR.c LLL_XD.c RR.c WordVector.c ZZ.c ZZVec.c ZZX.c ZZX1.c +S04=$(S03) ZZXCharPoly.c ZZXFactoring.c ZZ_p.c ZZ_pE.c ZZ_pEX.c +S05=$(S04) ZZ_pEXFactoring.c ZZ_pX.c ZZ_pX1.c ZZ_pXCharPoly.c +S06=$(S05) ZZ_pXFactoring.c fileio.c lip.c lzz_p.c lzz_pE.c lzz_pEX.c +S07=$(S06) lzz_pEXFactoring.c lzz_pX.c lzz_pX1.c +S08=$(S07) lzz_pXCharPoly.c lzz_pXFactoring.c mat_GF2.c mat_GF2E.c +S09=$(S08) mat_RR.c mat_ZZ.c mat_ZZ_p.c mat_ZZ_pE.c mat_lzz_p.c mat_lzz_pE.c +S10=$(S09) mat_poly_ZZ.c mat_poly_ZZ_p.c mat_poly_lzz_p.c +S11=$(S10) +S12=$(S11) +S13=$(S12) quad_float.c tools.c vec_GF2.c vec_GF2E.c vec_RR.c +S14=$(S13) vec_ZZ.c vec_ZZ_p.c vec_ZZ_pE.c +S15=$(S14) vec_lzz_p.c vec_lzz_pE.c +S16=$(S15) +S17=$(S16) +S18=$(S17) xdouble.c +S19=$(S18) G_LLL_FP.c G_LLL_QP.c G_LLL_XD.c G_LLL_RR.c thread.c BasicThreadPool.c + +SRC = $(S19) + +# library source files that are header files + +SINC = c_lip_impl.h g_lip_impl.h + + + + + +# library header files + +IN01= FFT.h FacVec.h GF2.h GF2E.h GF2EX.h GF2EXFactoring.h GF2X.h +IN02=$(IN01) GF2XFactoring.h GF2XVec.h HNF.h ctools.h LLL.h +IN03=$(IN02) RR.h SPMM_ASM.h WordVector.h ZZ.h sp_arith.h ZZVec.h ZZX.h ZZXFactoring.h +IN04=$(IN03) ZZ_p.h ZZ_pE.h ZZ_pEX.h ZZ_pEXFactoring.h ZZ_pX.h ZZ_pXFactoring.h +IN05=$(IN04) fileio.h lip.h lzz_p.h lzz_pE.h lzz_pEX.h lzz_pEXFactoring.h +IN06=$(IN05) lzz_pX.h lzz_pXFactoring.h mat_GF2.h mat_GF2E.h mat_RR.h +IN07=$(IN06) mat_ZZ.h mat_ZZ_p.h mat_ZZ_pE.h mat_lzz_p.h mat_lzz_pE.h +IN08=$(IN07) mat_poly_ZZ.h mat_poly_ZZ_p.h mat_poly_lzz_p.h matrix.h +IN09=$(IN08) pair.h vector.h pair_GF2EX_long.h pair_GF2X_long.h +IN10=$(IN09) pair_ZZX_long.h pair_ZZ_pEX_long.h pair_ZZ_pX_long.h +IN11=$(IN10) pair_lzz_pEX_long.h pair_lzz_pX_long.h quad_float.h +IN12=$(IN11) tools.h vec_GF2.h vec_GF2E.h vec_GF2XVec.h vec_RR.h +IN13=$(IN12) vec_ZZ.h vec_ZZVec.h vec_ZZ_p.h vec_ZZ_pE.h vec_double.h +IN14=$(IN13) vec_long.h vec_lzz_p.h vec_lzz_pE.h vec_quad_float.h +IN15=$(IN14) vec_vec_GF2.h vec_vec_GF2E.h vec_vec_RR.h vec_vec_ZZ.h +IN16=$(IN15) vec_vec_ZZ_p.h vec_vec_ZZ_pE.h vec_vec_long.h vec_vec_lzz_p.h +IN17=$(IN16) vec_vec_lzz_pE.h vec_xdouble.h xdouble.h config.h version.h +IN18=$(IN17) def_config.h new.h vec_ulong.h vec_vec_ulong.h c_lip.h g_lip.h +IN19=$(IN18) SmartPtr.h Lazy.h LazyTable.h thread.h BasicThreadPool.h +INCL=$(IN19) + + + +# test data + +TD1=BerlekampTestIn BerlekampTestOut CanZassTestIn CanZassTestOut +TD2=$(TD1) ZZXFacTestIn ZZXFacTestOut MoreFacTestIn LLLTestIn LLLTestOut RRTestIn RRTestOut +TD3=$(TD2) MatrixTestIn MatrixTestOut CharPolyTestIn +TD4=$(TD3) CharPolyTestOut QuadTestIn QuadTestOut + +TD = $(TD4) + +# test source files + +TS1=QuickTest.c BerlekampTest.c CanZassTest.c ZZXFacTest.c MoreFacTest.c LLLTest.c +TS2=$(TS1) subset.c MatrixTest.c mat_lzz_pTest.c CharPolyTest.c RRTest.c QuadTest.c +TS3=$(TS2) GF2XTest.c GF2EXTest.c BitMatTest.c ZZ_pEXTest.c lzz_pEXTest.c Timing.c +TS4=$(TS3) ThreadTest.c ExceptionTest.c +TS = $(TS4) + +# scripts + +SCRIPTS1=MakeGetTime MakeGetPID MakeCheckFeature ResetFeatures CopyFeatures TestScript dosify unixify RemoveProg +SCRIPTS2=$(SCRIPTS1) configure DoConfig mfile cfile ppscript + +SCRIPTS=$(SCRIPTS2) + +# auxilliary source + +MD=MakeDesc.c MakeDescAux.c newnames.c gen_gmp_aux.c +GT=GetTime0.c GetTime1.c GetTime2.c GetTime3.c GetTime4.c GetTime5.c TestGetTime.c +GP=GetPID1.c GetPID2.c TestGetPID.c +CH=CheckCLZL.c CheckCLZLAux.c CheckLL.c CheckLLAux.c CheckAVX.c CheckFMA.c CheckCompile.c + +AUXPROGS = TestGetTime TestGetPID CheckFeature CheckCompile + + + +# documentation + + +D01=copying.txt BasicThreadPool.txt GF2.txt GF2E.txt GF2EX.txt GF2EXFactoring.txt GF2X.txt +D02=$(D01) GF2XFactoring.txt GF2XVec.txt HNF.txt Lazy.txt LazyTable.txt LLL.txt RR.txt SmartPtr.txt +D03=$(D02) ZZ.txt ZZVec.txt ZZX.txt ZZXFactoring.txt ZZ_p.txt ZZ_pE.txt +D04=$(D03) ZZ_pEX.txt ZZ_pEXFactoring.txt ZZ_pX.txt ZZ_pXFactoring.txt +D05=$(D04) conversions.txt flags.txt lzz_p.txt lzz_pE.txt lzz_pEX.txt +D06=$(D05) lzz_pEXFactoring.txt lzz_pX.txt lzz_pXFactoring.txt mat_GF2.txt +D07=$(D06) mat_GF2E.txt mat_RR.txt mat_ZZ.txt mat_ZZ_p.txt mat_ZZ_pE.txt +D08=$(D07) mat_lzz_p.txt mat_lzz_pE.txt mat_poly_ZZ.txt mat_poly_ZZ_p.txt +D09=$(D08) mat_poly_lzz_p.txt matrix.txt pair.txt vector.txt +D10=$(D09) quad_float.txt sedscript.txt tools.txt vec_GF2.txt +D11=$(D10) vec_GF2E.txt vec_RR.txt vec_ZZ.txt vec_ZZ_p.txt vec_ZZ_pE.txt +D12=$(D11) vec_lzz_p.txt vec_lzz_pE.txt xdouble.txt names.txt +D13=$(D12) tour-ack.html tour-intro.html tour-time.html tour-changes.html +D14=$(D13) tour-modules.html tour-unix.html tour-examples.html +D15=$(D14) tour-roadmap.html tour-win.html tour-impl.html tour-struct.html +D16=$(D15) tour.html tour-ex1.html tour-ex2.html tour-ex3.html tour-ex4.html +D17=$(D16) tour-ex5.html tour-ex6.html tour-ex7.html arrow1.gif arrow2.gif arrow3.gif +D18=$(D17) tour-gmp.html tour-gf2x.html tour-tips.html config.txt version.txt + +TX01=GF2.txt GF2E.txt GF2EX.txt GF2EXFactoring.txt GF2X.txt GF2XFactoring.txt +TX02=GF2XVec.txt HNF.txt Lazy.txt LazyTable.txt LLL.txt RR.txt SmartPtr.txt ZZ.txt ZZVec.txt ZZX.txt ZZXFactoring.txt +TX03=ZZ_p.txt ZZ_pE.txt ZZ_pEX.txt ZZ_pEXFactoring.txt ZZ_pX.txt ZZ_pXFactoring.txt +TX04=lzz_p.txt lzz_pE.txt lzz_pEX.txt lzz_pEXFactoring.txt lzz_pX.txt +TX05=lzz_pXFactoring.txt mat_GF2.txt mat_GF2E.txt mat_RR.txt mat_ZZ.txt mat_ZZ_p.txt +TX06=mat_ZZ_pE.txt mat_lzz_p.txt mat_lzz_pE.txt mat_poly_ZZ.txt mat_poly_ZZ_p.txt +TX07=mat_poly_lzz_p.txt matrix.txt pair.txt quad_float.txt tools.txt vec_GF2.txt +TX08=vec_GF2E.txt vec_RR.txt vec_ZZ.txt vec_ZZ_p.txt vec_ZZ_pE.txt vec_lzz_p.txt +TX09=vec_lzz_pE.txt vector.txt version.txt xdouble.txt BasicThreadPool.txt + +TXFILES=$(TX01) $(TX02) $(TX03) $(TX04) $(TX05) $(TX06) $(TX07) $(TX08) $(TX09) + +HT01=GF2.cpp.html GF2E.cpp.html GF2EX.cpp.html GF2EXFactoring.cpp.html GF2X.cpp.html GF2XFactoring.cpp.html +HT02=GF2XVec.cpp.html HNF.cpp.html Lazy.cpp.html LazyTable.cpp.html LLL.cpp.html RR.cpp.html SmartPtr.cpp.html ZZ.cpp.html ZZVec.cpp.html ZZX.cpp.html ZZXFactoring.cpp.html +HT03=ZZ_p.cpp.html ZZ_pE.cpp.html ZZ_pEX.cpp.html ZZ_pEXFactoring.cpp.html ZZ_pX.cpp.html ZZ_pXFactoring.cpp.html +HT04=lzz_p.cpp.html lzz_pE.cpp.html lzz_pEX.cpp.html lzz_pEXFactoring.cpp.html lzz_pX.cpp.html +HT05=lzz_pXFactoring.cpp.html mat_GF2.cpp.html mat_GF2E.cpp.html mat_RR.cpp.html mat_ZZ.cpp.html mat_ZZ_p.cpp.html +HT06=mat_ZZ_pE.cpp.html mat_lzz_p.cpp.html mat_lzz_pE.cpp.html mat_poly_ZZ.cpp.html mat_poly_ZZ_p.cpp.html +HT07=mat_poly_lzz_p.cpp.html matrix.cpp.html pair.cpp.html quad_float.cpp.html tools.cpp.html vec_GF2.cpp.html +HT08=vec_GF2E.cpp.html vec_RR.cpp.html vec_ZZ.cpp.html vec_ZZ_p.cpp.html vec_ZZ_pE.cpp.html vec_lzz_p.cpp.html +HT09=vec_lzz_pE.cpp.html vector.cpp.html version.cpp.html xdouble.cpp.html BasicThreadPool.cpp.html + +HTFILES=$(HT01) $(HT02) $(HT03) $(HT04) $(HT05) $(HT06) $(HT07) $(HT08) $(HT09) + + +DOC = $(D18) $(HTFILES) + + + +# test program executables + +PROG1=QuickTest BerlekampTest CanZassTest ZZXFacTest MoreFacTest LLLTest BitMatTest +PROG2=$(PROG1) MatrixTest mat_lzz_pTest CharPolyTest RRTest QuadTest +PROG3=$(PROG2) GF2XTest GF2EXTest subset ZZ_pEXTest lzz_pEXTest Timing ThreadTest +PROGS = $(PROG3) + +# things to save to a tar file + +SFI1=makefile $(SRC) $(SINC) $(SCRIPTS) $(MD) $(GT) $(GP) $(CH) $(TS) $(TD) mach_desc.win +SFI2=$(SFI1) MulTimeTest.c Poly1TimeTest.c Poly2TimeTest.c Poly3TimeTest.c GF2XTimeTest.c +SFI3=$(SFI2) InitSettings.c DispSettings.c WizardAux Wizard def_makefile +SFILES=$(SFI3) + + +################################################################# +# +# Rules for compiling the library +# +################################################################# + + +NTL_INCLUDE = -I../include -I. +# NTL needs this to find its include files + +COMPILE = $(CXX) $(NTL_INCLUDE) $(CPPFLAGS) $(CXXAUTOFLAGS) $(CXXFLAGS) -c + +LINK = $(CXX) $(NTL_INCLUDE) $(CPPFLAGS) $(CXXAUTOFLAGS) $(CXXFLAGS) $(LDFLAGS) + + + +# 'make all' does a complete make, including all setup. +# It also creates the file 'all', which means you should +# run 'make clobber' before running 'make' or 'make all' +# again. + +all: + make setup1 + make setup2 + make setup3 + make setup4 + make ntl.a + touch all + + +# setup1 generates the file ../incluse/NTL/mach_desc.h + +setup1: + $(COMPILE) MakeDescAux.c + $(LINK) -o MakeDesc MakeDesc.c MakeDescAux.o $(LDLIBS) + ./MakeDesc + mv mach_desc.h ../include/NTL/mach_desc.h + + +# setup2 does some dynamic checks for GetTime, GetPID, __builtin_clzl, and LL types + +setup2: + echo "*** CheckFeature log ***" > CheckFeature.log + sh MakeGetTime "$(LINK)" "$(LDLIBS)" + sh MakeGetPID "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature BUILTIN_CLZL "CheckCLZL.c CheckCLZLAux.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature LL_TYPE "CheckLL.c CheckLLAux.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature AVX "CheckAVX.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature FMA "CheckFMA.c" "$(LINK)" "$(LDLIBS)" + +# setup3 generates the file ../include/NTL/gmp_aux.h +# The file ../include/NTL/gmp_aux.h is included in ../include/NTL/lip.h +# when NTL_GMP_LIP is set. +# When this flag is not set, an empty files produced. + +setup3: + $(LINK) $(GMP_OPT_INCDIR) -o gen_gmp_aux gen_gmp_aux.c $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + ./gen_gmp_aux > ../include/NTL/gmp_aux.h + +# setup4 runs the wizard + +setup4: + sh Wizard $(WIZARD) + + +ntl.a: $(OBJ) + $(AR) $(ARFLAGS) ntl.a $(OBJ) #LSTAT + - $(RANLIB) ntl.a #LSTAT +# $(LIBTOOL) --tag=CXX --mode=link $(LINK) -o libntl.la $(OBJ:.o=.lo) $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(GF2X_OPT_LIBDIR) $(GF2X_OPT_LIB) $(LDLIBS) -rpath $(LIBDIR) -version-info `cat VERSION_INFO` #LSHAR + +LCOMP= #LSTAT +# LCOMP=$(LIBTOOL) --tag=CXX --mode=compile #LSHAR + +lip.o: lip.c g_lip_impl.h c_lip_impl.h + $(LCOMP) $(COMPILE) $(GMP_OPT_INCDIR) lip.c + +ctools.o: ctools.c + $(LCOMP) $(COMPILE) ctools.c + + +GetTime.o: GetTime.c + $(LCOMP) $(COMPILE) GetTime.c + +GetPID.o: GetPID.c + $(LCOMP) $(COMPILE) GetPID.c + +CheckCompile: CheckCompile.c + $(LINK) -o CheckCompile CheckCompile.c $(LDLIBS) + + +.c.o: + $(LCOMP) $(COMPILE) $(GF2X_OPT_INCDIR) $< + +.c: + $(LINK) -o $@ $< ntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(GF2X_OPT_LIBDIR) $(GF2X_OPT_LIB) $(LDLIBS) #LSTAT +# $(LIBTOOL) --tag=CXX --mode=link $(LINK) -o $@ $< libntl.la #LSHAR + +################################################################# +# +# Rule for running tests +# make check runs a series of tests +# +################################################################# + +check: + sh RemoveProg $(PROGS) + make QuickTest + ./QuickTest + sh RemoveProg QuickTest + sh TestScript + +################################################################# +# +# Rule for installing +# make install just does a simple copy of the include file +# and library. The -p option is used to preserve file attributes. +# This helps avoid some problems (especially when copying ntl.a). +# Also, an attempt is made to make everything that is +# installed readable by everyone. +# +# make uninstall removes these files +# +################################################################# + + + + +install: + mkdir -p -m 755 $(INCLUDEDIR) + rm -rf $(INCLUDEDIR)/NTL + mkdir -m 755 $(INCLUDEDIR)/NTL + cp -p ../include/NTL/*.h $(INCLUDEDIR)/NTL + - chmod -R a+r $(INCLUDEDIR)/NTL + mkdir -p -m 755 $(DOCDIR) + rm -rf $(DOCDIR)/NTL + mkdir -m 755 $(DOCDIR)/NTL + cp -p ../doc/*.txt $(DOCDIR)/NTL + cp -p ../doc/*.html $(DOCDIR)/NTL + cp -p ../doc/*.gif $(DOCDIR)/NTL + - chmod -R a+r $(DOCDIR)/NTL + mkdir -p -m 755 $(LIBDIR) + cp -p ntl.a $(LIBDIR)/libntl.a #LSTAT + - chmod a+r $(LIBDIR)/libntl.a #LSTAT +# $(LIBTOOL) --mode=install cp -p libntl.la $(LIBDIR) #LSHAR + + +uninstall: + rm -f $(LIBDIR)/libntl.a #LSTAT +# $(LIBTOOL) --mode=uninstall rm -f $(LIBDIR)/libntl.la #LSHAR + rm -rf $(INCLUDEDIR)/NTL + rm -rf $(DOCDIR)/NTL + +################################################################# +# +# Rules for cleaning up +# +# make clobber removes *everything* created by make, +# but it does not restore config.h to its default. +# +# make clean tidies up a bit +# +################################################################# + +clobber: + rm -f ntl.a mach_desc.h ../include/NTL/mach_desc.h GetTime.c GetPID.c + sh ResetFeatures '..' + rm -f ../include/NTL/gmp_aux.h + sh RemoveProg $(PROGS) MakeDesc $(AUXPROGS) gen_gmp_aux + rm -f *.o + rm -rf small + rm -f cfileout mfileout + rm -rf .libs *.lo libntl.la + rm -f all + +clean: + sh RemoveProg $(PROGS) MakeDesc $(AUXPROGS) gen_gmp_aux + rm -f *.o + rm -rf small +# - $(LIBTOOL) --mode=clean rm -f libntl.la *.lo #LSHAR + +################################################################# +# +# Rules for making tar and zip files +# +# make ppdoc creates pretty-printed versions of some documentation +# - run before make package or make winpack +# +# make package creates a tar.gz file suitable for Unix +# +# make winpack creates a zip file suitable for Windows +# +################################################################# + +ppdoc: + sh ppscript "$(TXFILES)" + +ppclean: + rm -f ../doc/*.cpp + + +package: + ./configure --nowrite + cp mfileout def_makefile + cp cfileout ../include/NTL/def_config.h + sh unixify "$(SFILES) DIRNAME WINDIR VERSION_INFO NOTES" "$(INCL)" "$(DOC)" + rm -rf `cat DIRNAME` + rm -f `cat DIRNAME`.tar + rm -f `cat DIRNAME`.tar.gz + mv unix `cat DIRNAME` + chmod -R a+rX `cat DIRNAME` + tar -cvf `cat DIRNAME`.tar `cat DIRNAME` + gzip `cat DIRNAME`.tar + rm -rf `cat DIRNAME` + +winpack: + ./configure --nowrite NTL_GMP_LIP=off + cp mfileout def_makefile + cp cfileout ../include/NTL/def_config.h + sh dosify "$(SRC)" "$(INCL)" "$(DOC)" "$(TS)" "$(TD)" "$(SINC)" + rm -rf `cat WINDIR` + rm -f `cat WINDIR`.zip + mv dos `cat WINDIR` + chmod -R a+rX `cat WINDIR` + find ./`cat WINDIR` '!' -name '*.gif' -print | zip -l `cat WINDIR` -@ + find ./`cat WINDIR` -name '*.gif' -print | zip -u `cat WINDIR` -@ + rm -rf `cat WINDIR` + + +###################################################################### +# +# config wizard related stuff +# +###################################################################### + +WO1 = FFT.o GetTime.o GetPID.o ctools.o ZZ.o ZZVec.o ZZ_p.o ZZ_pX.o +WO2 = $(WO1) ZZ_pX1.o lip.o tools.o vec_ZZ.o vec_ZZ_p.o +WO3 = $(WO2) GF2.o WordVector.o vec_GF2.o GF2X.o GF2X1.o thread.o BasicThreadPool.o fileio.o + +WOBJ = $(WO3) + +# wntl.a: LCOMP= #LSHAR +wntl.a: $(WOBJ) + $(AR) $(ARFLAGS) wntl.a $(WOBJ) + - $(RANLIB) wntl.a + +MulTimeTest: + $(LINK) -o MulTimeTest MulTimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + + +Poly1TimeTest: + $(LINK) -o Poly1TimeTest Poly1TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) +Poly2TimeTest: + $(LINK) -o Poly2TimeTest Poly2TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) +Poly3TimeTest: + $(LINK) -o Poly3TimeTest Poly3TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + + +GF2XTimeTest: + $(LINK) -o GF2XTimeTest GF2XTimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + +InitSettings: + $(LINK) -o InitSettings InitSettings.c $(LDLIBS) + + +DispSettings: + $(LINK) -o DispSettings DispSettings.c $(LDLIBS) + + + + diff --git a/thirdparty/linux/ntl/src/dosify b/thirdparty/linux/ntl/src/dosify new file mode 100644 index 0000000000..1be81fa5d4 --- /dev/null +++ b/thirdparty/linux/ntl/src/dosify @@ -0,0 +1,67 @@ + +rm -r dos + +mkdir dos +mkdir dos/src +mkdir dos/include +mkdir dos/include/NTL +mkdir dos/doc +mkdir dos/GetTime +mkdir dos/GetPID +mkdir dos/MakeDesc +mkdir dos/misc +mkdir dos/tests + +cp ../README dos/README.txt + +cp GetTime4.c dos/src/GetTime.cpp + +cp GetPID2.c dos/src/GetPID.cpp + +cp mach_desc.win dos/include/NTL/mach_desc.h + + +cp GetTime0.c dos/GetTime/GetTime0.cpp +cp GetTime1.c dos/GetTime/GetTime1.cpp +cp GetTime2.c dos/GetTime/GetTime2.cpp +cp GetTime3.c dos/GetTime/GetTime3.cpp +cp GetTime4.c dos/GetTime/GetTime4.cpp +cp GetTime5.c dos/GetTime/GetTime5.cpp + +cp GetPID1.c dos/GetPID/GetPID1.cpp +cp GetPID2.c dos/GetPID/GetPID2.cpp + +cp MakeDesc.c dos/MakeDesc/MakeDesc.cpp +cp MakeDescAux.c dos/MakeDesc/MakeDescAux.cpp + +cp newnames.c dos/misc/newnames.cpp +cp gen_gmp_aux.c dos/misc/gen_gmp_aux.cpp + +for i in $1 +do + cp $i dos/src/`basename $i .c`.cpp +done + +for i in $2 +do + cp ../include/NTL/$i dos/include/NTL/$i +done + +for i in $3 +do + cp ../doc/$i dos/doc/$i +done + +for i in $4 +do + cp $i dos/tests/`basename $i .c`.cpp +done + +cp $5 dos/tests + +cp $6 dos/src + +cp ../include/NTL/def_config.h dos/include/NTL/config.h +sh ResetFeatures dos + + diff --git a/thirdparty/linux/ntl/src/fileio.c b/thirdparty/linux/ntl/src/fileio.c new file mode 100644 index 0000000000..6dc8bc59d4 --- /dev/null +++ b/thirdparty/linux/ntl/src/fileio.c @@ -0,0 +1,134 @@ + +#include +#include + +#include +#include +#include +#include +#include + + + +NTL_START_IMPL + + +void OpenWrite(ofstream& s, const char *name) +{ + s.open(name, ios::out); + + if (!s) { + FileError("write open failed"); + } +} + +void OpenWrite(ofstream& s, const char *name, FileList& flist) +{ + // post condition: file is successfully opened iff + // name is added to flist (even if exception is thrown). + // We do the AddFile first, since that can conceivably fail. + + flist.AddFile(name); + s.open(name, ios::out); + + if (!s) { + flist.RemoveLast(); + FileError("write open failed"); + } +} + + +void OpenRead(ifstream& s, const char *name) +{ + s.open(name, ios::in); + if (!s) { + FileError("read open failed"); + } +} + +void CloseWrite(ofstream& s) +{ + s.close(); + if (s.fail()) FileError("close failed"); +} + + +void FileList::AddFile(const char *name) +{ + Vec item; + item.SetLength(strlen(name)+1); + strcpy(item.elts(), name); + + data.append(item); +} + +void FileList::RemoveLast() +{ + data.SetLength(data.length()-1); +} + + +FileList::~FileList() +{ + long i, n; + + n = data.length(); + for (i = 0; i < n; i++) + remove(data[i].elts()); +} + + + + +const char *FileName(const char* stem, long d) +{ + NTL_TLS_LOCAL(string, sbuf); + + stringstream ss; + ss << "tmp-ntl-" << stem; + ss << "-" << setfill('0') << setw(5) << d << "-"; + sbuf = ss.str() + UniqueID(); + return sbuf.c_str(); +} + +// UniqueID: +// +// builds a string of the form cnt-time-clock-pid-tid, where +// - cnt is a global counter +// - time is the value returned by time(0) +// - clock is the value returned by clock() +// - pid is the value returned by getpid() (or "0" if getpid() +// is not available) +// - tid is the value returned by this_thread::get_id() +// (or "0" if not using threads) +// each thread should have its own unique ID, which is guaranteed +// to be unique across all threads in a process, and which +// is hopefully unique across the entire system (but this +// harder to guarantee) + + +const string& UniqueID() +{ + static AtomicCounter cnt; // a GLOBAL counter + + + NTL_TLS_LOCAL(string, ID); + + NTL_TLS_LOCAL_INIT(bool, initialized, (false)); + NTL_TLS_LOCAL_INIT(unsigned long, local_cnt, (cnt.inc())); + NTL_TLS_LOCAL_INIT(unsigned long, local_time, (time(0))); + NTL_TLS_LOCAL_INIT(unsigned long, local_clock, (clock())); + + if (!initialized) { + stringstream ss; + ss << local_cnt << "-" << local_time << "-" + << local_clock << "-" << GetPID() << "-" << CurrentThreadID(); + ID = ss.str(); + initialized = true; + } + + return ID; +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/g_lip_impl.h b/thirdparty/linux/ntl/src/g_lip_impl.h new file mode 100644 index 0000000000..44b87d4c96 --- /dev/null +++ b/thirdparty/linux/ntl/src/g_lip_impl.h @@ -0,0 +1,7057 @@ + +/* + * This is a "wrapper" layer that builds on top of the "mpn" layer of gmp. + * This layer provides much of the same functionality of the "mpz" + * layer of gmp, but the interface it provides is much more like + * the interface provided by lip. + * + * This layer was written under the following assumptions about gmp: + * 1) mp_limb_t is an unsigned integral type + * 2) sizeof(mp_limb_t) == sizeof(long) or sizeof(mp_limb_t) == 2*sizeof(long) + * 3) the number of bits of an mp_limb_t is equal to that of a long, + * or twice that of a long + * 4) the number of bits of a gmp radix is equal to the number of bits + * of an mp_limb_t + * + * Except for assumption (1), these assumptions are verified in the + * installation script, and they should be universally satisfied in practice, + * except when gmp is built using the proposed, new "nail" fetaure + * (in which some bits of an mp_limb_t are unused). + * The code here will not work properly with the "nail" feature; + * however, I have (attempted to) identify all such problem spots, + * and any other places where assumptions (2-4) are made, + * with a comment labeled "DIRT". + */ + + + +#include + +#include +#include +#include + +#include + + +//#include +//#include +#include + +#include + +NTL_CLIENT + +typedef mp_limb_t *_ntl_limb_t_ptr; + + +#if (__GNU_MP_VERSION < 3) + +#error "You have to use GNP version >= 3.1" + +#endif + +#if ((__GNU_MP_VERSION == 3) && (__GNU_MP_VERSION_MINOR < 1)) + +#error "You have to use GNP version >= 3.1" + +#endif + + + +/* v 3.1 is supposed mpn_tdiv_qr defined, but it doesn't. + Here's a workaround */ + +#if ((__GNU_MP_VERSION == 3) && (__GNU_MP_VERSION_MINOR == 1) && (__GNU_MP_VERSION_PATCHLEVEL == 0)) + +#define mpn_tdiv_qr __MPN(tdiv_qr) + + +extern "C" +void mpn_tdiv_qr(mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, + mp_srcptr, mp_size_t); + +#endif + + + +union gbigint_header { + + long info[2]; + mp_limb_t alignment; + +}; + +/* A bigint is represented as two long's, ALLOC and SIZE, followed by a + * vector DATA of mp_limb_t's. + * + * ALLOC is of the form + * (alloc << 2) | continue_flag | frozen_flag + * where + * - alloc is the number of allocated mp_limb_t's, + * - continue flag is either 2 or 0, + * - frozen_flag is either 1 or 0. + * If frozen_flag is set, then the space for this bigint is *not* + * managed by the _ntl_gsetlength and _ntl_gfree routines, + * but are instead managed by the vec_ZZ_p and ZZVec routines. + * The continue_flag is only set when the frozen_flag is set. + * + * SIZE is the number of mp_limb_t's actually + * used by the bigint, with the sign of SIZE having + * the sign of the bigint. + * Note that the zero bigint is represented as SIZE=0. + * + * Bigint's are accessed through a handle, which is pointer to void. + * A null handle logically represents the bigint zero. + * This is done so that the interface presented to higher level + * routines is essentially the same as that of NTL's traditional + * long integer package. + * + * The components ALLOC, SIZE, and DATA are all accessed through + * macros using pointer casts. While all of may seem a bit dirty, + * it should be quite portable: objects are never referenced + * through pointers of different types, and no alignmement + * problems should arise. + * + * DIRT: This rule is broken in the file g_lip.h: the inline definition + * of _ntl_gmaxalloc in that file has the definition of ALLOC pasted in. + * + * Actually, mp_limb_t is usually the type unsigned long. + * However, on some 64-bit platforms, the type long is only 32 bits, + * and gmp makes mp_limb_t unsigned long long in this case. + * This is fairly rare, as the industry standard for Unix is to + * have 64-bit longs on 64-bit machines. + */ + +#if 0 + +#define ALLOC(p) (((long *) (p))[0]) +#define SIZE(p) (((long *) (p))[1]) +#define DATA(p) ((mp_limb_t *) (((long *) (p)) + 2)) + +#define STORAGE(len) ((long)(2*sizeof(long) + (len)*sizeof(mp_limb_t))) + +/* DIRT: STORAGE computes the number of bytes to allocate for a bigint + * of maximal SIZE len. This should be computed so that one + * can store several such bigints in a contiguous array + * of memory without breaking any alignment requirements. + * Currently, it is assumed (and explicitly checked in the NTL installation + * script) that sizeof(mp_limb_t) is either sizeof(long) or + * 2*sizeof(long), and therfore, nothing special needs to + * be done to enfoce alignment requirements. If this assumption + * should change, then the storage layout for bigints must be + * re-designed. + */ + +#define MustAlloc(c, len) (!(c) || (ALLOC(c) >> 2) < (len)) + + + +#define GET_SIZE_NEG(sz, neg, p) \ +do \ +{ \ + long _s; \ + _s = SIZE(p); \ + if (_s < 0) { \ + sz = -_s; \ + neg = 1; \ + } \ + else { \ + sz = _s; \ + neg = 0; \ + } \ +} \ +while (0) + +#define STRIP(sz, p) \ +do \ +{ \ + long _i; \ + _i = sz - 1; \ + while (_i >= 0 && p[_i] == 0) _i--; \ + sz = _i + 1; \ +} \ +while (0) + +#define ZEROP(p) (!p || !SIZE(p)) + +#define ONEP(p) (p && SIZE(p) == 1 && DATA(p)[0] == 1) + +#define SWAP_BIGINT(a, b) \ +do \ +{ \ + _ntl_gbigint _t; \ + _t = a; \ + a = b; \ + b = _t; \ +} \ +while (0) + +#define SWAP_LONG(a, b) \ +do \ +{ \ + long _t; \ + _t = a; \ + a = b; \ + b = _t; \ +} \ +while (0) + +#define SWAP_LIMB_PTR(a, b) \ +do \ +{ \ + _ntl_limb_t_ptr _t; \ + _t = a; \ + a = b; \ + b = _t; \ +} \ +while (0) + +#define COUNT_BITS(cnt, a) \ +do \ +{ \ + long _i = 0; \ + mp_limb_t _a = (a); \ + \ + while (_a>=256) \ + _i += 8, _a >>= 8; \ + if (_a >=16) \ + _i += 4, _a >>= 4; \ + if (_a >= 4) \ + _i += 2, _a >>= 2; \ + if (_a >= 2) \ + _i += 2; \ + else if (_a >= 1) \ + _i++; \ + \ + cnt = _i; \ +} \ +while (0) + +#else + +/* These are C++ inline functions that are equivalent to the above + * macros. They are mainly intended as a debugging aid. + */ + + +static +inline long& ALLOC(_ntl_gbigint p) + { return (((long *) p)[0]); } + +static +inline long& SIZE(_ntl_gbigint p) + { return (((long *) p)[1]); } + +static +inline mp_limb_t * DATA(_ntl_gbigint p) + { return ((mp_limb_t *) (((long *) (p)) + 2)); } + +static +inline long STORAGE(long len) + { return ((long)(2*sizeof(long) + (len)*sizeof(mp_limb_t))); } + +static +inline long MustAlloc(_ntl_gbigint c, long len) + { return (!(c) || (ALLOC(c) >> 2) < (len)); } + + +static +inline void GET_SIZE_NEG(long& sz, long& neg, _ntl_gbigint p) +{ + long s; + s = SIZE(p); + if (s < 0) { + sz = -s; + neg = 1; + } + else { + sz = s; + neg = 0; + } +} + +static +inline void STRIP(long& sz, mp_limb_t *p) +{ + long i; + i = sz - 1; + while (i >= 0 && p[i] == 0) i--; + sz = i + 1; +} + +static +inline long ZEROP(_ntl_gbigint p) +{ + return !p || !SIZE(p); +} + +static +inline long ONEP(_ntl_gbigint p) +{ + return p && SIZE(p) == 1 && DATA(p)[0] == 1; +} + +static +inline void SWAP_BIGINT(_ntl_gbigint& a, _ntl_gbigint& b) +{ + _ntl_gbigint t; + t = a; + a = b; + b = t; +} + +static +inline void SWAP_LONG(long& a, long& b) +{ + long t; + t = a; + a = b; + b = t; +} + +static +inline void SWAP_LIMB_PTR(_ntl_limb_t_ptr& a, _ntl_limb_t_ptr& b) +{ + _ntl_limb_t_ptr t; + t = a; + a = b; + b = t; +} + + +static +inline void COUNT_BITS(long& cnt, mp_limb_t a) +{ + long i = 0; + + while (a>=256) + i += 8, a >>= 8; + if (a >=16) + i += 4, a >>= 4; + if (a >= 4) + i += 2, a >>= 2; + if (a >= 2) + i += 2; + else if (a >= 1) + i++; + + cnt = i; +} + +#endif + + + +#if (defined(NTL_HAVE_LL_TYPE) && NTL_ZZ_NBITS == NTL_BITS_PER_LONG) +#define NTL_VIABLE_LL +#endif + +#if (defined(NTL_CRT_ALTCODE) || defined(NTL_CRT_ALTCODE_SMALL)) +#define NTL_TBL_CRT +#endif + + + +class _ntl_gbigint_watcher { +public: + _ntl_gbigint *watched; + + explicit + _ntl_gbigint_watcher(_ntl_gbigint *_watched) : watched(_watched) {} + + ~_ntl_gbigint_watcher() + { + if (*watched && (ALLOC(*watched) >> 2) > NTL_RELEASE_THRESH) + _ntl_gfree(watched); + } +}; + + + +class _ntl_gbigint_deleter { +public: + static void apply(_ntl_gbigint& p) { _ntl_gfree(&p); } +}; + +typedef WrappedPtr<_ntl_gbigint_body, _ntl_gbigint_deleter> _ntl_gbigint_wrapped; + + + +// GRegisters are used for local "scratch" variables. + +// NOTE: the first implementation of GRegister below wraps a bigint in a class +// whose destructor ensures that its space is reclaimed at program/thread termination. +// It really only is necesary in a multi-threading environment, but it doesn't +// seem to incurr significant cost. + +// The second implementation does not do this wrapping, and so should not be +// used in a multi-threading environment. + +// Both versions use a local "watcher" variable, which does the following: +// when the local scope closes (e.g., the function returns), the space +// for the bigint is freed *unless* it is fairly small. This balanced +// approach leads significantly faster performance, while not holding +// to too many resouces. + +// The third version releases local memory every time. It can be significantly +// slower. + +// The fourth version --- which was the original strategy --- never releases +// memory. It can be faster, but can become a memory hog. + +// All of this code is overly complicated, due to the fact that I'm "retrofitting" +// this logic onto what was originally pure-C code. + + +#define GRegister(x) NTL_TLS_LOCAL(_ntl_gbigint_wrapped, x); _ntl_gbigint_watcher _WATCHER__ ## x(&x) + +// #define GRegister(x) NTL_THREAD_LOCAL static _ntl_gbigint x(0); _ntl_gbigint_watcher _WATCHER__ ## x(&x) + +// #define GRegister(x) _ntl_gbigint_wrapper x(0); + +// #define GRegister(x) static _ntl_gbigint x = 0 + + + + + +#define STORAGE_OVF(len) NTL_OVERFLOW(len, sizeof(mp_limb_t), 2*sizeof(long)) + + +/* ForceNormal ensures a normalized bigint */ + +static +void ForceNormal(_ntl_gbigint x) +{ + long sx, xneg; + mp_limb_t *xdata; + + if (!x) return; + GET_SIZE_NEG(sx, xneg, x); + xdata = DATA(x); + STRIP(sx, xdata); + if (xneg) sx = -sx; + SIZE(x) = sx; +} + + +#define MIN_SETL (4) + /* _ntl_gsetlength allocates a multiple of MIN_SETL digits */ + + + +void _ntl_gsetlength(_ntl_gbigint *v, long len) +{ + _ntl_gbigint x = *v; + + if (len < 0) + LogicError("negative size allocation in _ntl_zgetlength"); + + if (NTL_OVERFLOW(len, NTL_ZZ_NBITS, 0)) + ResourceError("size too big in _ntl_gsetlength"); + +#ifdef NTL_SMALL_MP_SIZE_T + /* this makes sure that numbers don't get too big for GMP */ + if (len >= (1L << (NTL_BITS_PER_INT-4))) + ResourceError("size too big for GMP"); +#endif + + + if (x) { + long oldlen = ALLOC(x); + long fixed = oldlen & 1; + oldlen = oldlen >> 2; + + if (fixed) { + if (len > oldlen) + LogicError("internal error: can't grow this _ntl_gbigint"); + else + return; + } + + if (len <= oldlen) return; + + len++; /* always allocate at least one more than requested */ + + oldlen = (long) (oldlen * 1.2); /* always increase by at least 20% */ + if (len < oldlen) + len = oldlen; + + /* round up to multiple of MIN_SETL */ + len = ((len+(MIN_SETL-1))/MIN_SETL)*MIN_SETL; + + /* test len again */ + if (NTL_OVERFLOW(len, NTL_ZZ_NBITS, 0)) + ResourceError("size too big in _ntl_gsetlength"); + + if (STORAGE_OVF(len)) + ResourceError("reallocation failed in _ntl_gsetlength"); + + if (!(x = (_ntl_gbigint)NTL_REALLOC((void *) x, 1, STORAGE(len), 0))) { + MemoryError(); + } + ALLOC(x) = len << 2; + } + else { + len++; /* as above, always allocate one more than explicitly reqested */ + len = ((len+(MIN_SETL-1))/MIN_SETL)*MIN_SETL; + + /* test len again */ + if (NTL_OVERFLOW(len, NTL_ZZ_NBITS, 0)) + ResourceError("size too big in _ntl_gsetlength"); + + if (STORAGE_OVF(len)) + ResourceError("reallocation failed in _ntl_gsetlength"); + + if (!(x = (_ntl_gbigint)NTL_MALLOC(1, STORAGE(len), 0))) { + MemoryError(); + } + ALLOC(x) = len << 2; + SIZE(x) = 0; + } + + *v = x; +} + +void _ntl_gfree(_ntl_gbigint *xx) +{ + _ntl_gbigint x = *xx; + + + if (!x) + return; + + if (ALLOC(x) & 1) + LogicError("Internal error: can't free this _ntl_gbigint"); + + free((void*) x); + *xx = 0; + return; +} + +void +_ntl_gswap(_ntl_gbigint *a, _ntl_gbigint *b) +{ + if ((*a && (ALLOC(*a) & 1)) || (*b && (ALLOC(*b) & 1))) { + // one of the inputs points to an bigint that is + // "pinned down" in memory, so we have to swap the data, + // not just the pointers + + GRegister(t); + long sz_a, sz_b, sz; + + sz_a = _ntl_gsize(*a); + sz_b = _ntl_gsize(*b); + sz = (sz_a > sz_b) ? sz_a : sz_b; + + _ntl_gsetlength(a, sz); + _ntl_gsetlength(b, sz); + + // EXCEPTIONS: all of the above ensures that swap provides strong ES + + _ntl_gcopy(*a, &t); + _ntl_gcopy(*b, a); + _ntl_gcopy(t, b); + return; + } + + SWAP_BIGINT(*a, *b); +} + + +void _ntl_gcopy(_ntl_gbigint a, _ntl_gbigint *bb) +{ + _ntl_gbigint b; + long sa, abs_sa, i; + mp_limb_t *adata, *bdata; + + b = *bb; + + if (!a || (sa = SIZE(a)) == 0) { + if (b) SIZE(b) = 0; + } + else { + if (a != b) { + if (sa >= 0) + abs_sa = sa; + else + abs_sa = -sa; + + if (MustAlloc(b, abs_sa)) { + _ntl_gsetlength(&b, abs_sa); + *bb = b; + } + + adata = DATA(a); + bdata = DATA(b); + + for (i = 0; i < abs_sa; i++) + bdata[i] = adata[i]; + + SIZE(b) = sa; + } + } +} + + +void _ntl_gzero(_ntl_gbigint *aa) +{ + _ntl_gbigint a = *aa; + + if (a) SIZE(a) = 0; +} + +void _ntl_gone(_ntl_gbigint *aa) +{ + _ntl_gbigint a = *aa; + if (!a) { + _ntl_gsetlength(&a, 1); + *aa = a; + } + + SIZE(a) = 1; + DATA(a)[0] = 1; +} + +long _ntl_giszero(_ntl_gbigint a) +{ + return ZEROP(a); +} + +long _ntl_godd(_ntl_gbigint a) +{ + if (ZEROP(a)) + return 0; + else + return DATA(a)[0]&1; +} + +long _ntl_gbit(_ntl_gbigint a, long p) +{ + long bl; + long sa; + mp_limb_t wh; + + if (p < 0 || !a) return 0; + + bl = p/NTL_ZZ_NBITS; + wh = ((mp_limb_t) 1) << (p - NTL_ZZ_NBITS*bl); + + sa = SIZE(a); + if (sa < 0) sa = -sa; + + if (sa <= bl) return 0; + if (DATA(a)[bl] & wh) return 1; + return 0; +} + +void _ntl_glowbits(_ntl_gbigint a, long b, _ntl_gbigint *cc) +{ + _ntl_gbigint c; + + long bl; + long wh; + long sa; + long i; + mp_limb_t *adata, *cdata; + + if (ZEROP(a) || (b<=0)) { + _ntl_gzero(cc); + return; + } + + bl = b/NTL_ZZ_NBITS; + wh = b - NTL_ZZ_NBITS*bl; + if (wh != 0) + bl++; + else + wh = NTL_ZZ_NBITS; + + sa = SIZE(a); + if (sa < 0) sa = -sa; + + if (sa < bl) { + _ntl_gcopy(a,cc); + _ntl_gabs(cc); + return; + } + + c = *cc; + + /* a won't move if c aliases a */ + _ntl_gsetlength(&c, bl); + *cc = c; + + adata = DATA(a); + cdata = DATA(c); + + for (i = 0; i < bl-1; i++) + cdata[i] = adata[i]; + + if (wh == NTL_ZZ_NBITS) + cdata[bl-1] = adata[bl-1]; + else + cdata[bl-1] = adata[bl-1] & ((((mp_limb_t) 1) << wh) - ((mp_limb_t) 1)); + + STRIP(bl, cdata); + SIZE(c) = bl; +} + +long _ntl_gslowbits(_ntl_gbigint a, long p) +{ + GRegister(x); + + if (p > NTL_BITS_PER_LONG) + p = NTL_BITS_PER_LONG; + + _ntl_glowbits(a, p, &x); + + return _ntl_gtoint(x); +} + +long _ntl_gsetbit(_ntl_gbigint *a, long b) +{ + long bl; + long sa, aneg; + long i; + mp_limb_t wh, *adata, tmp; + + if (b<0) LogicError("_ntl_gsetbit: negative index"); + + if (ZEROP(*a)) { + _ntl_gintoz(1, a); + _ntl_glshift(*a, b, a); + return 0; + } + + bl = (b/NTL_ZZ_NBITS); + wh = ((mp_limb_t) 1) << (b - NTL_ZZ_NBITS*bl); + + GET_SIZE_NEG(sa, aneg, *a); + + if (sa > bl) { + adata = DATA(*a); + tmp = adata[bl] & wh; + adata[bl] |= wh; + if (tmp) return 1; + return 0; + } + else { + _ntl_gsetlength(a, bl+1); + adata = DATA(*a); + for (i = sa; i < bl; i++) + adata[i] = 0; + adata[bl] = wh; + + sa = bl+1; + if (aneg) sa = -sa; + SIZE(*a) = sa; + return 0; + } +} + +long _ntl_gswitchbit(_ntl_gbigint *a, long b) +{ + long bl; + long sa, aneg; + long i; + mp_limb_t wh, *adata, tmp; + + if (b<0) LogicError("_ntl_gswitchbit: negative index"); + + + if (ZEROP(*a)) { + _ntl_gintoz(1, a); + _ntl_glshift(*a, b, a); + return 0; + } + + bl = (b/NTL_ZZ_NBITS); + wh = ((mp_limb_t) 1) << (b - NTL_ZZ_NBITS*bl); + + GET_SIZE_NEG(sa, aneg, *a); + + if (sa > bl) { + adata = DATA(*a); + tmp = adata[bl] & wh; + adata[bl] ^= wh; + + if (bl == sa-1) { + STRIP(sa, adata); + if (aneg) sa = -sa; + SIZE(*a) = sa; + } + + if (tmp) return 1; + return 0; + } + else { + _ntl_gsetlength(a, bl+1); + adata = DATA(*a); + for (i = sa; i < bl; i++) + adata[i] = 0; + adata[bl] = wh; + + sa = bl+1; + if (aneg) sa = -sa; + SIZE(*a) = sa; + return 0; + } +} + +long +_ntl_gweights( + long aa + ) +{ + unsigned long a; + long res = 0; + if (aa < 0) + a = -((unsigned long) aa); + else + a = aa; + + while (a) { + if (a & 1) res ++; + a >>= 1; + } + return (res); +} + +static long +gweights_mp_limb( + mp_limb_t a + ) +{ + long res = 0; + + while (a) { + if (a & 1) res ++; + a >>= 1; + } + return (res); +} + +long +_ntl_gweight( + _ntl_gbigint a + ) +{ + long i; + long sa; + mp_limb_t *adata; + long res; + + if (!a) return (0); + + sa = SIZE(a); + if (sa < 0) sa = -sa; + adata = DATA(a); + + res = 0; + for (i = 0; i < sa; i++) + res += gweights_mp_limb(adata[i]); + + return (res); +} + +long +_ntl_g2logs( + long aa + ) +{ + long i = 0; + unsigned long a; + + if (aa < 0) + a = - ((unsigned long) aa); + else + a = aa; + + while (a>=256) + i += 8, a >>= 8; + if (a >=16) + i += 4, a >>= 4; + if (a >= 4) + i += 2, a >>= 2; + if (a >= 2) + i += 2; + else if (a >= 1) + i++; + return (i); +} + +long _ntl_g2log(_ntl_gbigint a) +{ + long la; + long t; + + if (!a) return 0; + la = SIZE(a); + if (la == 0) return 0; + if (la < 0) la = -la; + COUNT_BITS(t, DATA(a)[la-1]); + return NTL_ZZ_NBITS*(la - 1) + t; +} + + + +long _ntl_gmakeodd(_ntl_gbigint *nn) +{ + _ntl_gbigint n = *nn; + long shift; + mp_limb_t *ndata; + mp_limb_t i; + + if (ZEROP(n)) + return (0); + + shift = 0; + ndata = DATA(n); + + while (ndata[shift] == 0) + shift++; + + i = ndata[shift]; + + shift = NTL_ZZ_NBITS * shift; + + while ((i & 1) == 0) { + shift++; + i >>= 1; + } + _ntl_grshift(n, shift, &n); + return shift; +} + + +long _ntl_gnumtwos(_ntl_gbigint n) +{ + long shift; + mp_limb_t *ndata; + mp_limb_t i; + + if (ZEROP(n)) + return (0); + + shift = 0; + ndata = DATA(n); + + while (ndata[shift] == 0) + shift++; + + i = ndata[shift]; + + shift = NTL_ZZ_NBITS * shift; + + while ((i & 1) == 0) { + shift++; + i >>= 1; + } + + return shift; +} + + +void _ntl_gand(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) +{ + _ntl_gbigint c; + long sa; + long sb; + long sm; + long i; + long a_alias, b_alias; + mp_limb_t *adata, *bdata, *cdata; + + if (ZEROP(a) || ZEROP(b)) { + _ntl_gzero(cc); + return; + } + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + sa = SIZE(a); + if (sa < 0) sa = -sa; + + sb = SIZE(b); + if (sb < 0) sb = -sb; + + sm = (sa > sb ? sb : sa); + + _ntl_gsetlength(&c, sm); + if (a_alias) a = c; + if (b_alias) b = c; + *cc = c; + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + for (i = 0; i < sm; i++) + cdata[i] = adata[i] & bdata[i]; + + STRIP(sm, cdata); + SIZE(c) = sm; +} + + +void _ntl_gxor(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) +{ + _ntl_gbigint c; + long sa; + long sb; + long sm; + long la; + long i; + long a_alias, b_alias; + mp_limb_t *adata, *bdata, *cdata; + + if (ZEROP(a)) { + _ntl_gcopy(b,cc); + _ntl_gabs(cc); + return; + } + + if (ZEROP(b)) { + _ntl_gcopy(a,cc); + _ntl_gabs(cc); + return; + } + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + sa = SIZE(a); + if (sa < 0) sa = -sa; + + sb = SIZE(b); + if (sb < 0) sb = -sb; + + if (sa > sb) { + la = sa; + sm = sb; + } + else { + la = sb; + sm = sa; + } + + _ntl_gsetlength(&c, la); + if (a_alias) a = c; + if (b_alias) b = c; + *cc = c; + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + for (i = 0; i < sm; i ++) + cdata[i] = adata[i] ^ bdata[i]; + + if (sa > sb) + for (;i < la; i++) cdata[i] = adata[i]; + else + for (;i < la; i++) cdata[i] = bdata[i]; + + STRIP(la, cdata); + SIZE(c) = la; +} + + +void _ntl_gor(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) +{ + _ntl_gbigint c; + long sa; + long sb; + long sm; + long la; + long i; + long a_alias, b_alias; + mp_limb_t *adata, *bdata, *cdata; + + if (ZEROP(a)) { + _ntl_gcopy(b,cc); + _ntl_gabs(cc); + return; + } + + if (ZEROP(b)) { + _ntl_gcopy(a,cc); + _ntl_gabs(cc); + return; + } + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + sa = SIZE(a); + if (sa < 0) sa = -sa; + + sb = SIZE(b); + if (sb < 0) sb = -sb; + + if (sa > sb) { + la = sa; + sm = sb; + } + else { + la = sb; + sm = sa; + } + + _ntl_gsetlength(&c, la); + if (a_alias) a = c; + if (b_alias) b = c; + *cc = c; + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + for (i = 0; i < sm; i ++) + cdata[i] = adata[i] | bdata[i]; + + if (sa > sb) + for (;i < la; i++) cdata[i] = adata[i]; + else + for (;i < la; i++) cdata[i] = bdata[i]; + + STRIP(la, cdata); + SIZE(c) = la; +} + + +void _ntl_gnegate(_ntl_gbigint *aa) +{ + _ntl_gbigint a = *aa; + if (a) SIZE(a) = -SIZE(a); +} + + +/* + * DIRT: this implementation of _ntl_gintoz relies crucially + * on the assumption that the number of bits per limb_t is at least + * equal to the number of bits per long. + */ + +void _ntl_gintoz(long d, _ntl_gbigint *aa) +{ + _ntl_gbigint a = *aa; + + if (d == 0) { + if (a) SIZE(a) = 0; + } + else if (d > 0) { + if (!a) { + _ntl_gsetlength(&a, 1); + *aa = a; + } + + SIZE(a) = 1; + DATA(a)[0] = d; + } + else { + if (!a) { + _ntl_gsetlength(&a, 1); + *aa = a; + } + + SIZE(a) = -1; + DATA(a)[0] = -((mp_limb_t) d); /* careful! */ + } +} + + +/* + * DIRT: this implementation of _ntl_guintoz relies crucially + * on the assumption that the number of bits per limb_t is at least + * equal to the number of bits per long. + */ + +void _ntl_guintoz(unsigned long d, _ntl_gbigint *aa) +{ + _ntl_gbigint a = *aa; + + if (d == 0) { + if (a) SIZE(a) = 0; + } + else { + if (!a) { + _ntl_gsetlength(&a, 1); + *aa = a; + } + + SIZE(a) = 1; + DATA(a)[0] = d; + } +} + + +long _ntl_gtoint(_ntl_gbigint a) +{ + unsigned long res = _ntl_gtouint(a); + return NTL_ULONG_TO_LONG(res); +} + +/* + * DIRT: this implementation of _ntl_gtouint relies crucially + * on the assumption that the number of bits per limb_t is at least + * equal to the number of bits per long. + */ + +unsigned long _ntl_gtouint(_ntl_gbigint a) +{ + if (ZEROP(a)) + return 0; + + if (SIZE(a) > 0) + return DATA(a)[0]; + + return -DATA(a)[0]; +} + + +long _ntl_gcompare(_ntl_gbigint a, _ntl_gbigint b) +{ + long sa, sb, cmp; + mp_limb_t *adata, *bdata; + + if (!a) + sa = 0; + else + sa = SIZE(a); + + if (!b) + sb = 0; + else + sb = SIZE(b); + + if (sa != sb) { + if (sa > sb) + return 1; + else + return -1; + } + + if (sa == 0) + return 0; + + adata = DATA(a); + bdata = DATA(b); + + if (sa > 0) { + cmp = mpn_cmp(adata, bdata, sa); + + if (cmp > 0) + return 1; + else if (cmp < 0) + return -1; + else + return 0; + } + else { + cmp = mpn_cmp(adata, bdata, -sa); + + if (cmp > 0) + return -1; + else if (cmp < 0) + return 1; + else + return 0; + } +} + + +long _ntl_gsign(_ntl_gbigint a) +{ + long sa; + + if (!a) return 0; + + sa = SIZE(a); + if (sa > 0) return 1; + if (sa == 0) return 0; + return -1; +} + +void _ntl_gabs(_ntl_gbigint *pa) +{ + _ntl_gbigint a = *pa; + + if (!a) return; + if (SIZE(a) < 0) SIZE(a) = -SIZE(a); +} + +long _ntl_gscompare(_ntl_gbigint a, long b) +{ + if (b == 0) { + long sa; + if (!a) return 0; + sa = SIZE(a); + if (sa > 0) return 1; + if (sa == 0) return 0; + return -1; + } + else { + GRegister(B); + _ntl_gintoz(b, &B); + return _ntl_gcompare(a, B); + } +} + + +void _ntl_glshift(_ntl_gbigint n, long k, _ntl_gbigint *rres) +{ + _ntl_gbigint res; + mp_limb_t *ndata, *resdata, *resdata1; + long limb_cnt, i, sn, nneg, sres; + long n_alias; + + if (ZEROP(n)) { + _ntl_gzero(rres); + return; + } + + res = *rres; + n_alias = (n == res); + + if (!k) { + if (!n_alias) + _ntl_gcopy(n, rres); + return; + } + + if (k < 0) { + if (k < -NTL_MAX_LONG) + _ntl_gzero(rres); + else + _ntl_grshift(n, -k, rres); + return; + } + + GET_SIZE_NEG(sn, nneg, n); + + limb_cnt = ((unsigned long) k) / NTL_ZZ_NBITS; + k = ((unsigned long) k) % NTL_ZZ_NBITS; + sres = sn + limb_cnt; + if (k != 0) sres++; + + if (MustAlloc(res, sres)) { + _ntl_gsetlength(&res, sres); + if (n_alias) n = res; + *rres = res; + } + + ndata = DATA(n); + resdata = DATA(res); + resdata1 = resdata + limb_cnt; + + if (k != 0) { + mp_limb_t t = mpn_lshift(resdata1, ndata, sn, k); + if (t != 0) + resdata[sres-1] = t; + else + sres--; + } + else { + for (i = sn-1; i >= 0; i--) + resdata1[i] = ndata[i]; + } + + for (i = 0; i < limb_cnt; i++) + resdata[i] = 0; + + if (nneg) sres = -sres; + SIZE(res) = sres; +} + +void _ntl_grshift(_ntl_gbigint n, long k, _ntl_gbigint *rres) +{ + _ntl_gbigint res; + mp_limb_t *ndata, *resdata, *ndata1; + long limb_cnt, i, sn, nneg, sres; + + if (ZEROP(n)) { + _ntl_gzero(rres); + return; + } + + if (!k) { + if (n != *rres) + _ntl_gcopy(n, rres); + return; + } + + if (k < 0) { + if (k < -NTL_MAX_LONG) ResourceError("overflow in _ntl_glshift"); + _ntl_glshift(n, -k, rres); + return; + } + + GET_SIZE_NEG(sn, nneg, n); + + limb_cnt = ((unsigned long) k) / NTL_ZZ_NBITS; + + sres = sn - limb_cnt; + + if (sres <= 0) { + _ntl_gzero(rres); + return; + } + + res = *rres; + if (MustAlloc(res, sres)) { + /* n won't move if res aliases n */ + _ntl_gsetlength(&res, sres); + *rres = res; + } + + ndata = DATA(n); + resdata = DATA(res); + ndata1 = ndata + limb_cnt; + k = ((unsigned long) k) % NTL_ZZ_NBITS; + + if (k != 0) { + mpn_rshift(resdata, ndata1, sres, k); + if (resdata[sres-1] == 0) + sres--; + } + else { + for (i = 0; i < sres; i++) + resdata[i] = ndata1[i]; + } + + if (nneg) sres = -sres; + SIZE(res) = sres; +} + + + +void +_ntl_gadd(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) +{ + long sa, aneg, sb, bneg, sc, cmp; + mp_limb_t *adata, *bdata, *cdata, carry; + _ntl_gbigint c; + long a_alias, b_alias; + + if (ZEROP(a)) { + _ntl_gcopy(b, cc); + return; + } + + if (ZEROP(b)) { + _ntl_gcopy(a, cc); + return; + } + + GET_SIZE_NEG(sa, aneg, a); + GET_SIZE_NEG(sb, bneg, b); + + if (sa < sb) { + SWAP_BIGINT(a, b); + SWAP_LONG(sa, sb); + SWAP_LONG(aneg, bneg); + } + + /* sa >= sb */ + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + if (aneg == bneg) { + /* same sign => addition */ + + sc = sa + 1; + if (MustAlloc(c, sc)) { + _ntl_gsetlength(&c, sc); + if (a_alias) a = c; + if (b_alias) b = c; + *cc = c; + } + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + carry = mpn_add(cdata, adata, sa, bdata, sb); + if (carry) + cdata[sc-1] = carry; + else + sc--; + + if (aneg) sc = -sc; + SIZE(c) = sc; + } + else { + /* opposite sign => subtraction */ + + sc = sa; + if (MustAlloc(c, sc)) { + _ntl_gsetlength(&c, sc); + if (a_alias) a = c; + if (b_alias) b = c; + *cc = c; + } + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + if (sa > sb) + cmp = 1; + else + cmp = mpn_cmp(adata, bdata, sa); + + if (cmp == 0) { + SIZE(c) = 0; + } + else { + if (cmp < 0) cmp = 0; + if (cmp > 0) cmp = 1; + /* abs(a) != abs(b) && (abs(a) > abs(b) <=> cmp) */ + + if (cmp) + mpn_sub(cdata, adata, sa, bdata, sb); + else + mpn_sub(cdata, bdata, sb, adata, sa); /* sa == sb */ + + STRIP(sc, cdata); + if (aneg == cmp) sc = -sc; + SIZE(c) = sc; + } + } +} + +void +_ntl_gsadd(_ntl_gbigint a, long b, _ntl_gbigint *cc) +{ + // FIXME: this is really inefficient...too much overhead + GRegister(B); + _ntl_gintoz(b, &B); + _ntl_gadd(a, B, cc); +} + +void +_ntl_gsub(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) +{ + long sa, aneg, sb, bneg, sc, cmp, rev; + mp_limb_t *adata, *bdata, *cdata, carry; + _ntl_gbigint c; + long a_alias, b_alias; + + if (ZEROP(a)) { + _ntl_gcopy(b, cc); + c = *cc; + if (c) SIZE(c) = -SIZE(c); + return; + } + + if (ZEROP(b)) { + _ntl_gcopy(a, cc); + return; + } + + GET_SIZE_NEG(sa, aneg, a); + GET_SIZE_NEG(sb, bneg, b); + + if (sa < sb) { + SWAP_BIGINT(a, b); + SWAP_LONG(sa, sb); + SWAP_LONG(aneg, bneg); + rev = 1; + } + else + rev = 0; + + /* sa >= sb */ + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + if (aneg != bneg) { + /* opposite sign => addition */ + + sc = sa + 1; + if (MustAlloc(c, sc)) { + _ntl_gsetlength(&c, sc); + if (a_alias) a = c; + if (b_alias) b = c; + *cc = c; + } + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + carry = mpn_add(cdata, adata, sa, bdata, sb); + if (carry) + cdata[sc-1] = carry; + else + sc--; + + if (aneg ^ rev) sc = -sc; + SIZE(c) = sc; + } + else { + /* same sign => subtraction */ + + sc = sa; + if (MustAlloc(c, sc)) { + _ntl_gsetlength(&c, sc); + if (a_alias) a = c; + if (b_alias) b = c; + *cc = c; + } + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + if (sa > sb) + cmp = 1; + else + cmp = mpn_cmp(adata, bdata, sa); + + if (cmp == 0) { + SIZE(c) = 0; + } + else { + if (cmp < 0) cmp = 0; + if (cmp > 0) cmp = 1; + /* abs(a) != abs(b) && (abs(a) > abs(b) <=> cmp) */ + + if (cmp) + mpn_sub(cdata, adata, sa, bdata, sb); + else + mpn_sub(cdata, bdata, sb, adata, sa); /* sa == sb */ + + STRIP(sc, cdata); + if ((aneg == cmp) ^ rev) sc = -sc; + SIZE(c) = sc; + } + } +} + +void +_ntl_gsubpos(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) +{ + long sa, sb, sc; + mp_limb_t *adata, *bdata, *cdata; + _ntl_gbigint c; + long a_alias, b_alias; + + if (ZEROP(a)) { + _ntl_gzero(cc); + return; + } + + if (ZEROP(b)) { + _ntl_gcopy(a, cc); + return; + } + + sa = SIZE(a); + sb = SIZE(b); + + c = *cc; + a_alias = (a == c); + b_alias = (b == c); + + sc = sa; + if (MustAlloc(c, sc)) { + _ntl_gsetlength(&c, sc); + if (a_alias) a = c; + if (b_alias) b = c; + *cc = c; + } + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + mpn_sub(cdata, adata, sa, bdata, sb); + + STRIP(sc, cdata); + SIZE(c) = sc; +} + +void _ntl_gmul(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) +{ + GRegister(mem); + + long sa, aneg, sb, bneg, alias, sc; + mp_limb_t *adata, *bdata, *cdata, msl; + _ntl_gbigint c; + + if (ZEROP(a) || ZEROP(b)) { + _ntl_gzero(cc); + return; + } + + GET_SIZE_NEG(sa, aneg, a); + GET_SIZE_NEG(sb, bneg, b); + + if (a == *cc || b == *cc) { + c = mem; + alias = 1; + } + else { + c = *cc; + alias = 0; + } + + sc = sa + sb; + if (MustAlloc(c, sc)) + _ntl_gsetlength(&c, sc); + + if (alias) + mem = c; + else + *cc = c; + + adata = DATA(a); + bdata = DATA(b); + cdata = DATA(c); + + if (sa >= sb) + msl = mpn_mul(cdata, adata, sa, bdata, sb); + else + msl = mpn_mul(cdata, bdata, sb, adata, sa); + + if (!msl) sc--; + if (aneg != bneg) sc = -sc; + SIZE(c) = sc; + + if (alias) _ntl_gcopy(mem, cc); +} + +void _ntl_gsq(_ntl_gbigint a, _ntl_gbigint *cc) +{ + _ntl_gmul(a, a, cc); + /* this is good enough...eventually, mpn_sqr_n will be called */ +} + + +/* + * DIRT: this implementation of _ntl_gsmul relies crucially + * on the assumption that the number of bits per limb_t is at least + * equal to the number of bits per long. + */ + +void +_ntl_gsmul(_ntl_gbigint a, long d, _ntl_gbigint *bb) +{ + long sa, sb; + long anegative, bnegative; + _ntl_gbigint b; + mp_limb_t *adata, *bdata; + mp_limb_t dd, carry; + long a_alias; + + if (ZEROP(a) || !d) { + _ntl_gzero(bb); + return; + } + + GET_SIZE_NEG(sa, anegative, a); + + if (d < 0) { + dd = - ((mp_limb_t) d); /* careful ! */ + bnegative = 1-anegative; + } + else { + dd = (mp_limb_t) d; + bnegative = anegative; + } + + sb = sa + 1; + + b = *bb; + a_alias = (a == b); + + if (MustAlloc(b, sb)) { + _ntl_gsetlength(&b, sb); + if (a_alias) a = b; + *bb = b; + } + + adata = DATA(a); + bdata = DATA(b); + + if (dd == 2) + carry = mpn_lshift(bdata, adata, sa, 1); + else + carry = mpn_mul_1(bdata, adata, sa, dd); + + if (carry) + bdata[sa] = carry; + else + sb--; + + if (bnegative) sb = -sb; + SIZE(b) = sb; +} + +/* + * DIRT: this implementation of _ntl_gsdiv relies crucially + * on the assumption that the number of bits per limb_t is at least + * equal to the number of bits per long. + */ + +long _ntl_gsdiv(_ntl_gbigint a, long d, _ntl_gbigint *bb) +{ + long sa, aneg, sb, dneg; + _ntl_gbigint b; + mp_limb_t dd, *adata, *bdata; + long r; + + if (!d) { + ArithmeticError("division by zero in _ntl_gsdiv"); + } + + if (ZEROP(a)) { + _ntl_gzero(bb); + return (0); + } + + GET_SIZE_NEG(sa, aneg, a); + + if (d < 0) { + dd = - ((mp_limb_t) d); /* careful ! */ + dneg = 1; + } + else { + dd = (mp_limb_t) d; + dneg = 0; + } + + sb = sa; + b = *bb; + if (MustAlloc(b, sb)) { + /* if b aliases a, then b won't move */ + _ntl_gsetlength(&b, sb); + *bb = b; + } + + adata = DATA(a); + bdata = DATA(b); + + if (dd == 2) + r = mpn_rshift(bdata, adata, sa, 1) >> (NTL_ZZ_NBITS - 1); + else + r = mpn_divmod_1(bdata, adata, sa, dd); + + if (bdata[sb-1] == 0) + sb--; + + SIZE(b) = sb; + + if (aneg || dneg) { + if (aneg != dneg) { + if (!r) { + SIZE(b) = -SIZE(b); + } + else { + _ntl_gsadd(b, 1, &b); + SIZE(b) = -SIZE(b); + if (dneg) + r = r + d; + else + r = d - r; + *bb = b; + } + } + else + r = -r; + } + + return r; +} + +/* + * DIRT: this implementation of _ntl_gsmod relies crucially + * on the assumption that the number of bits per limb_t is at least + * equal to the number of bits per long. + */ + +long _ntl_gsmod(_ntl_gbigint a, long d) +{ + long sa, aneg, dneg; + mp_limb_t dd, *adata; + long r; + + if (!d) { + ArithmeticError("division by zero in _ntl_gsmod"); + } + + if (ZEROP(a)) { + return (0); + } + + GET_SIZE_NEG(sa, aneg, a); + + if (d < 0) { + dd = - ((mp_limb_t) d); /* careful ! */ + dneg = 1; + } + else { + dd = (mp_limb_t) d; + dneg = 0; + } + + adata = DATA(a); + + if (dd == 2) + r = adata[0] & 1; + else + r = mpn_mod_1(adata, sa, dd); + + if (aneg || dneg) { + if (aneg != dneg) { + if (r) { + if (dneg) + r = r + d; + else + r = d - r; + } + } + else + r = -r; + } + + return r; +} + + +void _ntl_gdiv(_ntl_gbigint a, _ntl_gbigint d, + _ntl_gbigint *bb, _ntl_gbigint *rr) +{ + GRegister(b); + GRegister(rmem); + + _ntl_gbigint *rp; + + long sa, aneg, sb, sd, dneg, sr, in_place; + mp_limb_t *adata, *ddata, *bdata, *rdata; + + if (ZEROP(d)) { + ArithmeticError("division by zero in _ntl_gdiv"); + } + + if (ZEROP(a)) { + if (bb) _ntl_gzero(bb); + if (rr) _ntl_gzero(rr); + return; + } + + GET_SIZE_NEG(sa, aneg, a); + GET_SIZE_NEG(sd, dneg, d); + + if (!aneg && !dneg && rr && *rr != a && *rr != d) { + in_place = 1; + rp = rr; + } + else { + in_place = 0; + rp = &rmem; + } + + + if (sa < sd) { + _ntl_gzero(&b); + _ntl_gcopy(a, rp); + if (aneg) SIZE(*rp) = -SIZE(*rp); + goto done; + } + + sb = sa-sd+1; + if (MustAlloc(b, sb)) + _ntl_gsetlength(&b, sb); + + sr = sd; + if (MustAlloc(*rp, sr)) + _ntl_gsetlength(rp, sr); + + + adata = DATA(a); + ddata = DATA(d); + bdata = DATA(b); + rdata = DATA(*rp); + + mpn_tdiv_qr(bdata, rdata, 0, adata, sa, ddata, sd); + + if (bdata[sb-1] == 0) + sb--; + SIZE(b) = sb; + + STRIP(sr, rdata); + SIZE(*rp) = sr; + +done: + + if (aneg || dneg) { + if (aneg != dneg) { + if (ZEROP(*rp)) { + SIZE(b) = -SIZE(b); + } + else { + if (bb) { + _ntl_gsadd(b, 1, &b); + SIZE(b) = -SIZE(b); + } + if (rr) { + if (dneg) + _ntl_gadd(*rp, d, rp); + else + _ntl_gsub(d, *rp, rp); + } + } + } + else + SIZE(*rp) = -SIZE(*rp); + } + + if (bb) _ntl_gcopy(b, bb); + + if (rr && !in_place) + _ntl_gcopy(*rp, rr); +} + + +/* a simplified mod operation: assumes a >= 0, d > 0 are non-negative, + * that space for the result has already been allocated, + * and that inputs do not alias output. */ + +static +void gmod_simple(_ntl_gbigint a, _ntl_gbigint d, _ntl_gbigint *rr) +{ + GRegister(b); + + long sa, sb, sd, sr; + mp_limb_t *adata, *ddata, *bdata, *rdata; + _ntl_gbigint r; + + if (ZEROP(a)) { + _ntl_gzero(rr); + return; + } + + sa = SIZE(a); + sd = SIZE(d); + + if (sa < sd) { + _ntl_gcopy(a, rr); + return; + } + + sb = sa-sd+1; + if (MustAlloc(b, sb)) + _ntl_gsetlength(&b, sb); + + sr = sd; + r = *rr; + + adata = DATA(a); + ddata = DATA(d); + bdata = DATA(b); + rdata = DATA(r); + + mpn_tdiv_qr(bdata, rdata, 0, adata, sa, ddata, sd); + + STRIP(sr, rdata); + SIZE(r) = sr; +} + + +void _ntl_gmod(_ntl_gbigint a, _ntl_gbigint d, _ntl_gbigint *rr) +{ + _ntl_gdiv(a, d, 0, rr); +} + +void _ntl_gquickmod(_ntl_gbigint *rr, _ntl_gbigint d) +{ + _ntl_gdiv(*rr, d, 0, rr); +} + +void _ntl_gsqrt(_ntl_gbigint n, _ntl_gbigint *rr) +{ + GRegister(r); + + long sn, sr; + mp_limb_t *ndata, *rdata; + + if (ZEROP(n)) { + _ntl_gzero(rr); + return; + } + + sn = SIZE(n); + if (sn < 0) ArithmeticError("negative argument to _ntl_sqrt"); + + sr = (sn+1)/2; + _ntl_gsetlength(&r, sr); + + ndata = DATA(n); + rdata = DATA(r); + + mpn_sqrtrem(rdata, 0, ndata, sn); + + STRIP(sr, rdata); + SIZE(r) = sr; + + _ntl_gcopy(r, rr); +} + +/* + * DIRT: this implementation of _ntl_gsqrts relies crucially + * on the assumption that the number of bits per limb_t is at least + * equal to the number of bits per long. + */ + +long _ntl_gsqrts(long n) +{ + mp_limb_t ndata, rdata; + + if (n == 0) { + return 0; + } + + if (n < 0) ArithmeticError("negative argument to _ntl_sqrts"); + + ndata = n; + + mpn_sqrtrem(&rdata, 0, &ndata, 1); + + return rdata; +} + + +void _ntl_ggcd(_ntl_gbigint m1, _ntl_gbigint m2, _ntl_gbigint *r) +{ + GRegister(s1); + GRegister(s2); + GRegister(res); + + long k1, k2, k_min, l1, l2, ss1, ss2, sres; + + _ntl_gcopy(m1, &s1); + _ntl_gabs(&s1); + + _ntl_gcopy(m2, &s2); + _ntl_gabs(&s2); + + if (ZEROP(s1)) { + _ntl_gcopy(s2, r); + return; + } + + if (ZEROP(s2)) { + _ntl_gcopy(s1, r); + return; + } + + k1 = _ntl_gmakeodd(&s1); + k2 = _ntl_gmakeodd(&s2); + + if (k1 <= k2) + k_min = k1; + else + k_min = k2; + + l1 = _ntl_g2log(s1); + l2 = _ntl_g2log(s2); + + ss1 = SIZE(s1); + ss2 = SIZE(s2); + + if (ss1 >= ss2) + sres = ss1; + else + sres = ss2; + + /* set to max: gmp documentation is unclear on this point */ + + _ntl_gsetlength(&res, sres); + + if (l1 >= l2) + SIZE(res) = mpn_gcd(DATA(res), DATA(s1), ss1, DATA(s2), ss2); + else + SIZE(res) = mpn_gcd(DATA(res), DATA(s2), ss2, DATA(s1), ss1); + + _ntl_glshift(res, k_min, &res); + + _ntl_gcopy(res, r); +} + +static long +gxxeucl( + _ntl_gbigint ain, + _ntl_gbigint nin, + _ntl_gbigint *invv, + _ntl_gbigint *uu + ) +{ + GRegister(a); + GRegister(n); + GRegister(q); + GRegister(w); + GRegister(x); + GRegister(y); + GRegister(z); + + _ntl_gbigint inv = *invv; + _ntl_gbigint u = *uu; + long diff; + long ilo; + long sa; + long sn; + long temp; + long e; + long fast; + long parity; + long gotthem; + mp_limb_t *p; + long try11; + long try12; + long try21; + long try22; + long got11; + long got12; + long got21; + long got22; + double hi; + double lo; + double dt; + double fhi, fhi1; + double flo, flo1; + double num; + double den; + double dirt; + + _ntl_gsetlength(&a, (e = (SIZE(ain) > SIZE(nin) ? SIZE(ain) : SIZE(nin)))); + _ntl_gsetlength(&n, e); + _ntl_gsetlength(&q, e); + _ntl_gsetlength(&w, e); + _ntl_gsetlength(&x, e); + _ntl_gsetlength(&y, e); + _ntl_gsetlength(&z, e); + _ntl_gsetlength(&inv, e); + *invv = inv; + _ntl_gsetlength(&u, e); + *uu = u; + + fhi1 = double(1L) + double(32L)/NTL_FDOUBLE_PRECISION; + flo1 = double(1L) - double(32L)/NTL_FDOUBLE_PRECISION; + + fhi = double(1L) + double(8L)/NTL_FDOUBLE_PRECISION; + flo = double(1L) - double(8L)/NTL_FDOUBLE_PRECISION; + + _ntl_gcopy(ain, &a); + _ntl_gcopy(nin, &n); + + _ntl_gone(&inv); + _ntl_gzero(&w); + + while (SIZE(n) > 0) + { + gotthem = 0; + sa = SIZE(a); + sn = SIZE(n); + diff = sa - sn; + if (!diff || diff == 1) + { + sa = SIZE(a); + p = DATA(a) + (sa-1); + num = double(*p) * NTL_ZZ_FRADIX; + if (sa > 1) + num += double(*(--p)); + num *= NTL_ZZ_FRADIX; + if (sa > 2) + num += double(*(p - 1)); + + sn = SIZE(n); + p = DATA(n) + (sn-1); + den = double(*p) * NTL_ZZ_FRADIX; + if (sn > 1) + den += double(*(--p)); + den *= NTL_ZZ_FRADIX; + if (sn > 2) + den += double(*(p - 1)); + + hi = fhi1 * (num + double(1L)) / den; + lo = flo1 * num / (den + double(1L)); + if (diff > 0) + { + hi *= NTL_ZZ_FRADIX; + lo *= NTL_ZZ_FRADIX; + } + try11 = 1; + try12 = 0; + try21 = 0; + try22 = 1; + parity = 1; + fast = 1; + while (fast > 0) + { + parity = 1 - parity; + if (hi >= NTL_NSP_BOUND) + fast = 0; + else + { + ilo = (long)lo; + dirt = hi - double(ilo); + if (dirt < 1.0/NTL_FDOUBLE_PRECISION || !ilo || ilo < (long)hi) + fast = 0; + else + { + dt = lo-double(ilo); + lo = flo / dirt; + if (dt > 1.0/NTL_FDOUBLE_PRECISION) + hi = fhi / dt; + else + hi = double(NTL_NSP_BOUND); + temp = try11; + try11 = try21; + if ((NTL_WSP_BOUND - temp) / ilo < try21) + fast = 0; + else + try21 = temp + ilo * try21; + temp = try12; + try12 = try22; + if ((NTL_WSP_BOUND - temp) / ilo < try22) + fast = 0; + else + try22 = temp + ilo * try22; + if ((fast > 0) && (parity > 0)) + { + gotthem = 1; + got11 = try11; + got12 = try12; + got21 = try21; + got22 = try22; + } + } + } + } + } + if (gotthem) + { + _ntl_gsmul(inv, got11, &x); + _ntl_gsmul(w, got12, &y); + _ntl_gsmul(inv, got21, &z); + _ntl_gsmul(w, got22, &w); + _ntl_gadd(x, y, &inv); + _ntl_gadd(z, w, &w); + _ntl_gsmul(a, got11, &x); + _ntl_gsmul(n, got12, &y); + _ntl_gsmul(a, got21, &z); + _ntl_gsmul(n, got22, &n); + _ntl_gsub(x, y, &a); + _ntl_gsub(n, z, &n); + } + else + { + _ntl_gdiv(a, n, &q, &a); + _ntl_gmul(q, w, &x); + _ntl_gadd(inv, x, &inv); + if (!ZEROP(a)) + { + _ntl_gdiv(n, a, &q, &n); + _ntl_gmul(q, inv, &x); + _ntl_gadd(w, x, &w); + } + else + { + _ntl_gcopy(n, &a); + _ntl_gzero(&n); + _ntl_gcopy(w, &inv); + _ntl_gnegate(&inv); + } + } + } + + if (_ntl_gscompare(a, 1) == 0) + e = 0; + else + e = 1; + + _ntl_gcopy(a, &u); + + *invv = inv; + *uu = u; + + return (e); +} + +#if 0 +void +_ntl_gexteucl( + _ntl_gbigint aa, + _ntl_gbigint *xa, + _ntl_gbigint bb, + _ntl_gbigint *xb, + _ntl_gbigint *d + ) +{ + GRegister(modcon); + GRegister(a); + GRegister(b); + + long anegative = 0; + long bnegative = 0; + + _ntl_gcopy(aa, &a); + _ntl_gcopy(bb, &b); + + if (a && SIZE(a) < 0) { + anegative = 1; + SIZE(a) = -SIZE(a); + } + else + anegative = 0; + + if (b && SIZE(b) < 0) { + bnegative = 1; + SIZE(b) = -SIZE(b); + } + else + bnegative = 0; + + + if (ZEROP(b)) + { + _ntl_gone(xa); + _ntl_gzero(xb); + _ntl_gcopy(a, d); + goto done; + } + + if (ZEROP(a)) + { + _ntl_gzero(xa); + _ntl_gone(xb); + _ntl_gcopy(b, d); + goto done; + } + + gxxeucl(a, b, xa, d); + _ntl_gmul(a, *xa, xb); + _ntl_gsub(*d, *xb, xb); + _ntl_gdiv(*xb, b, xb, &modcon); + + if (!ZEROP(modcon)) + { + ghalt("non-zero remainder in _ntl_gexteucl BUG"); + } + + +done: + if (anegative) + { + _ntl_gnegate(xa); + } + if (bnegative) + { + _ntl_gnegate(xb); + } +} +#endif + +void +_ntl_gexteucl( + _ntl_gbigint ain, + _ntl_gbigint *xap, + _ntl_gbigint bin, + _ntl_gbigint *xbp, + _ntl_gbigint *dp + ) +{ + if (ZEROP(bin)) { + long asign = _ntl_gsign(ain); + + _ntl_gcopy(ain, dp); + _ntl_gabs(dp); + _ntl_gintoz( (asign >= 0 ? 1 : -1), xap); + _ntl_gzero(xbp); + } + else if (ZEROP(ain)) { + long bsign = _ntl_gsign(bin); + + _ntl_gcopy(bin, dp); + _ntl_gabs(dp); + _ntl_gzero(xap); + _ntl_gintoz(bsign, xbp); + } + else { + GRegister(a); + GRegister(b); + GRegister(xa); + GRegister(xb); + GRegister(d); + GRegister(tmp); + + long sa, aneg, sb, bneg, rev; + mp_limb_t *adata, *bdata, *ddata, *xadata; + mp_size_t sxa, sd; + + GET_SIZE_NEG(sa, aneg, ain); + GET_SIZE_NEG(sb, bneg, bin); + + _ntl_gsetlength(&a, sa+1); /* +1 because mpn_gcdext may need it */ + _ntl_gcopy(ain, &a); + + _ntl_gsetlength(&b, sb+1); /* +1 because mpn_gcdext may need it */ + _ntl_gcopy(bin, &b); + + + adata = DATA(a); + bdata = DATA(b); + + if (sa < sb || (sa == sb && mpn_cmp(adata, bdata, sa) < 0)) { + SWAP_BIGINT(ain, bin); + SWAP_LONG(sa, sb); + SWAP_LONG(aneg, bneg); + SWAP_LIMB_PTR(adata, bdata); + rev = 1; + } + else + rev = 0; + + _ntl_gsetlength(&d, sa+1); /* +1 because mpn_gcdext may need it... + documentation is unclear, but this is + what is done in mpz_gcdext */ + _ntl_gsetlength(&xa, sa+1); /* ditto */ + + ddata = DATA(d); + xadata = DATA(xa); + + sd = mpn_gcdext(ddata, xadata, &sxa, adata, sa, bdata, sb); + + SIZE(d) = sd; + SIZE(xa) = sxa; + + /* Thes two ForceNormal's are work-arounds for GMP bugs + in GMP 4.3.0 */ + ForceNormal(d); + ForceNormal(xa); + + /* now we normalize xa, so that so that xa in ( -b/2d, b/2d ], + which makes the output agree with Euclid's algorithm, + regardless of what mpn_gcdext does */ + + if (!ZEROP(xa)) { + _ntl_gcopy(bin, &b); + SIZE(b) = sb; + if (!ONEP(d)) { + _ntl_gdiv(b, d, &b, &tmp); + if (!ZEROP(tmp)) TerminalError("internal bug in _ntl_gexteucl"); + } + + if (SIZE(xa) > 0) { /* xa positive */ + if (_ntl_gcompare(xa, b) > 0) { + _ntl_gmod(xa, b, &xa); + } + _ntl_glshift(xa, 1, &tmp); + if (_ntl_gcompare(tmp, b) > 0) { + _ntl_gsub(xa, b, &xa); + } + } + else { /* xa negative */ + SIZE(xa) = -SIZE(xa); + if (_ntl_gcompare(xa, b) > 0) { + SIZE(xa) = -SIZE(xa); + _ntl_gmod(xa, b, &xa); + _ntl_gsub(xa, b, &xa); + } + else { + SIZE(xa) = -SIZE(xa); + } + _ntl_glshift(xa, 1, &tmp); + SIZE(tmp) = -SIZE(tmp); + if (_ntl_gcompare(tmp, b) >= 0) { + _ntl_gadd(xa, b, &xa); + } + } + } + + /* end normalize */ + + + if (aneg) _ntl_gnegate(&xa); + + _ntl_gmul(ain, xa, &tmp); + _ntl_gsub(d, tmp, &tmp); + _ntl_gdiv(tmp, bin, &xb, &tmp); + + if (!ZEROP(tmp)) TerminalError("internal bug in _ntl_gexteucl"); + + if (rev) SWAP_BIGINT(xa, xb); + + _ntl_gcopy(xa, xap); + _ntl_gcopy(xb, xbp); + _ntl_gcopy(d, dp); + } +} + + +long _ntl_ginv(_ntl_gbigint ain, _ntl_gbigint nin, _ntl_gbigint *invv) +{ + GRegister(u); + GRegister(d); + GRegister(a); + GRegister(n); + + long sz; + long sd; + mp_size_t su; + + if (_ntl_gscompare(nin, 1) <= 0) { + LogicError("InvMod: second input <= 1"); + } + + if (_ntl_gsign(ain) < 0) { + LogicError("InvMod: first input negative"); + } + + if (_ntl_gcompare(ain, nin) >= 0) { + LogicError("InvMod: first input too big"); + } + + sz = SIZE(nin) + 2; + + if (MustAlloc(a, sz)) + _ntl_gsetlength(&a, sz); + + + if (MustAlloc(n, sz)) + _ntl_gsetlength(&n, sz); + + + if (MustAlloc(d, sz)) + _ntl_gsetlength(&d, sz); + + if (MustAlloc(u, sz)) + _ntl_gsetlength(&u, sz); + + _ntl_gadd(ain, nin, &a); + _ntl_gcopy(nin, &n); + + /* We apply mpn_gcdext to (a, n) = (ain+nin, nin), because that function + * only computes the co-factor of the larger input. This way, we avoid + * a multiplication and a division. + */ + + sd = mpn_gcdext(DATA(d), DATA(u), &su, DATA(a), SIZE(a), DATA(n), SIZE(n)); + + SIZE(d) = sd; + SIZE(u) = su; + + /* Thes two ForceNormal's are work-arounds for GMP bugs + in GMP 4.3.0 */ + ForceNormal(d); + ForceNormal(u); + + + if (ONEP(d)) { + + /* + * We make sure that u is in range 0..n-1, just in case + * GMP is sloppy. + */ + + + if (_ntl_gsign(u) < 0) { + _ntl_gadd(u, nin, &u); + if (_ntl_gsign(u) < 0) { + _ntl_gmod(u, nin, &u); + } + } + else if (_ntl_gcompare(u, nin) >= 0) { + _ntl_gsub(u, nin, &u); + if (_ntl_gcompare(u, nin) >= 0) { + _ntl_gmod(u, nin, &u); + } + } + + _ntl_gcopy(u, invv); + return 0; + } + else { + _ntl_gcopy(d, invv); + return 1; + } +} + + +void +_ntl_ginvmod( + _ntl_gbigint a, + _ntl_gbigint n, + _ntl_gbigint *c + ) +{ + if (_ntl_ginv(a, n, c)) + ArithmeticError("undefined inverse in _ntl_ginvmod"); +} + + +void +_ntl_gaddmod( + _ntl_gbigint a, + _ntl_gbigint b, + _ntl_gbigint n, + _ntl_gbigint *c + ) +{ + if (*c != n) { + _ntl_gadd(a, b, c); + if (_ntl_gcompare(*c, n) >= 0) + _ntl_gsubpos(*c, n, c); + } + else { + GRegister(mem); + + _ntl_gadd(a, b, &mem); + if (_ntl_gcompare(mem, n) >= 0) + _ntl_gsubpos(mem, n, c); + else + _ntl_gcopy(mem, c); + } +} + + +void +_ntl_gsubmod( + _ntl_gbigint a, + _ntl_gbigint b, + _ntl_gbigint n, + _ntl_gbigint *c + ) +{ + GRegister(mem); + long cmp; + + if ((cmp=_ntl_gcompare(a, b)) < 0) { + _ntl_gadd(n, a, &mem); + _ntl_gsubpos(mem, b, c); + } else if (!cmp) + _ntl_gzero(c); + else + _ntl_gsubpos(a, b, c); +} + +void +_ntl_gsmulmod( + _ntl_gbigint a, + long d, + _ntl_gbigint n, + _ntl_gbigint *c + ) +{ + GRegister(mem); + + _ntl_gsmul(a, d, &mem); + _ntl_gmod(mem, n, c); +} + + + +void +_ntl_gmulmod( + _ntl_gbigint a, + _ntl_gbigint b, + _ntl_gbigint n, + _ntl_gbigint *c + ) +{ + GRegister(mem); + + _ntl_gmul(a, b, &mem); + _ntl_gmod(mem, n, c); +} + +void +_ntl_gsqmod( + _ntl_gbigint a, + _ntl_gbigint n, + _ntl_gbigint *c + ) +{ + _ntl_gmulmod(a, a, n, c); +} + + +double _ntl_gdoub_aux(_ntl_gbigint n) +{ + double res; + mp_limb_t *ndata; + long i, sn, nneg; + + if (!n) + return ((double) 0); + + GET_SIZE_NEG(sn, nneg, n); + + ndata = DATA(n); + + res = 0; + for (i = sn-1; i >= 0; i--) + res = res * NTL_ZZ_FRADIX + ((double) ndata[i]); + + if (nneg) res = -res; + + return res; +} + +long _ntl_ground_correction(_ntl_gbigint a, long k, long residual) +{ + long direction; + long p; + long sgn; + long bl; + mp_limb_t wh; + long i; + mp_limb_t *adata; + + if (SIZE(a) > 0) + sgn = 1; + else + sgn = -1; + + adata = DATA(a); + + p = k - 1; + bl = (p/NTL_ZZ_NBITS); + wh = ((mp_limb_t) 1) << (p - NTL_ZZ_NBITS*bl); + + if (adata[bl] & wh) { + /* bit is 1...we have to see if lower bits are all 0 + in order to implement "round to even" */ + + if (adata[bl] & (wh - ((mp_limb_t) 1))) + direction = 1; + else { + i = bl - 1; + while (i >= 0 && adata[i] == 0) i--; + if (i >= 0) + direction = 1; + else + direction = 0; + } + + /* use residual to break ties */ + + if (direction == 0 && residual != 0) { + if (residual == sgn) + direction = 1; + else + direction = -1; + } + + if (direction == 0) { + /* round to even */ + + wh = wh << 1; + + /* + * DIRT: if GMP has non-empty "nails", this won't work. + */ + + if (wh == 0) { + wh = 1; + bl++; + } + + if (adata[bl] & wh) + direction = 1; + else + direction = -1; + } + } + else + direction = -1; + + if (direction == 1) + return sgn; + + return 0; +} + + + + +double _ntl_gdoub(_ntl_gbigint n) +{ + GRegister(tmp); + + long s; + long shamt; + long correction; + double x; + + s = _ntl_g2log(n); + shamt = s - NTL_DOUBLE_PRECISION; + + if (shamt <= 0) + return _ntl_gdoub_aux(n); + + _ntl_grshift(n, shamt, &tmp); + + correction = _ntl_ground_correction(n, shamt, 0); + + if (correction) _ntl_gsadd(tmp, correction, &tmp); + + x = _ntl_gdoub_aux(tmp); + + x = _ntl_ldexp(x, shamt); + + return x; +} + + +double _ntl_glog(_ntl_gbigint n) +{ + GRegister(tmp); + + static const double log_2 = log(2.0); // GLOBAL (assumes C++11 thread-safe init) + + long s; + long shamt; + long correction; + double x; + + if (_ntl_gsign(n) <= 0) + ArithmeticError("log argument <= 0"); + + s = _ntl_g2log(n); + shamt = s - NTL_DOUBLE_PRECISION; + + if (shamt <= 0) + return log(_ntl_gdoub_aux(n)); + + _ntl_grshift(n, shamt, &tmp); + + correction = _ntl_ground_correction(n, shamt, 0); + + if (correction) _ntl_gsadd(tmp, correction, &tmp); + + x = _ntl_gdoub_aux(tmp); + + return log(x) + shamt*log_2; +} + + + + + +/* To implement _ntl_gdoubtoz, I've implemented essentially the + * same algorithm as in LIP, processing in blocks of + * NTL_NSP_NBITS bits, rather than NTL_ZZ_NBITS. + * This is conversion is rather delicate, and I don't want to + * make any new assumptions about the underlying arithmetic. + * This implementation should be quite portable. */ + +void _ntl_gdoubtoz(double a, _ntl_gbigint *xx) +{ + GRegister(x); + + long neg, i, t, sz; + + a = floor(a); + + if (!_ntl_IsFinite(&a)) + ArithmeticError("_ntl_gdoubtoz: attempt to convert non-finite value"); + + if (a < 0) { + a = -a; + neg = 1; + } + else + neg = 0; + + if (a == 0) { + _ntl_gzero(xx); + return; + } + + sz = 0; + while (a >= 1) { + a = a*(1.0/double(NTL_NSP_BOUND)); + sz++; + } + + i = 0; + _ntl_gzero(&x); + + while (a != 0) { + i++; + a = a*double(NTL_NSP_BOUND); + t = (long) a; + a = a - t; + + if (i == 1) { + _ntl_gintoz(t, &x); + } + else { + _ntl_glshift(x, NTL_NSP_NBITS, &x); + _ntl_gsadd(x, t, &x); + } + } + + if (i > sz) TerminalError("bug in _ntl_gdoubtoz"); + + _ntl_glshift(x, (sz-i)*NTL_NSP_NBITS, xx); + if (neg) _ntl_gnegate(xx); +} + + + +/* I've adapted LIP's extended euclidean algorithm to + * do rational reconstruction. -- VJS. + */ + + +long +_ntl_gxxratrecon( + _ntl_gbigint ain, + _ntl_gbigint nin, + _ntl_gbigint num_bound, + _ntl_gbigint den_bound, + _ntl_gbigint *num_out, + _ntl_gbigint *den_out + ) +{ + GRegister(a); + GRegister(n); + GRegister(q); + GRegister(w); + GRegister(x); + GRegister(y); + GRegister(z); + GRegister(inv); + GRegister(u); + GRegister(a_bak); + GRegister(n_bak); + GRegister(inv_bak); + GRegister(w_bak); + + mp_limb_t *p; + + long diff; + long ilo; + long sa; + long sn; + long snum; + long sden; + long e; + long fast; + long temp; + long parity; + long gotthem; + long try11; + long try12; + long try21; + long try22; + long got11; + long got12; + long got21; + long got22; + + double hi; + double lo; + double dt; + double fhi, fhi1; + double flo, flo1; + double num; + double den; + double dirt; + + if (_ntl_gsign(num_bound) < 0) + LogicError("rational reconstruction: bad numerator bound"); + + if (!num_bound) + snum = 0; + else + snum = SIZE(num_bound); + + if (_ntl_gsign(den_bound) <= 0) + LogicError("rational reconstruction: bad denominator bound"); + + sden = SIZE(den_bound); + + if (_ntl_gsign(nin) <= 0) + LogicError("rational reconstruction: bad modulus"); + + if (_ntl_gsign(ain) < 0 || _ntl_gcompare(ain, nin) >= 0) + LogicError("rational reconstruction: bad residue"); + + + e = SIZE(nin); + + _ntl_gsetlength(&a, e); + _ntl_gsetlength(&n, e); + _ntl_gsetlength(&q, e); + _ntl_gsetlength(&w, e); + _ntl_gsetlength(&x, e); + _ntl_gsetlength(&y, e); + _ntl_gsetlength(&z, e); + _ntl_gsetlength(&inv, e); + _ntl_gsetlength(&u, e); + _ntl_gsetlength(&a_bak, e); + _ntl_gsetlength(&n_bak, e); + _ntl_gsetlength(&inv_bak, e); + _ntl_gsetlength(&w_bak, e); + + fhi1 = double(1L) + double(32L)/NTL_FDOUBLE_PRECISION; + flo1 = double(1L) - double(32L)/NTL_FDOUBLE_PRECISION; + + fhi = double(1L) + double(8L)/NTL_FDOUBLE_PRECISION; + flo = double(1L) - double(8L)/NTL_FDOUBLE_PRECISION; + + _ntl_gcopy(ain, &a); + _ntl_gcopy(nin, &n); + + _ntl_gone(&inv); + _ntl_gzero(&w); + + while (1) + { + if (SIZE(w) >= sden && _ntl_gcompare(w, den_bound) > 0) break; + if (SIZE(n) <= snum && _ntl_gcompare(n, num_bound) <= 0) break; + + _ntl_gcopy(a, &a_bak); + _ntl_gcopy(n, &n_bak); + _ntl_gcopy(w, &w_bak); + _ntl_gcopy(inv, &inv_bak); + + gotthem = 0; + sa = SIZE(a); + sn = SIZE(n); + diff = sa - sn; + if (!diff || diff == 1) + { + sa = SIZE(a); + p = DATA(a) + (sa-1); + num = double(*p) * NTL_ZZ_FRADIX; + if (sa > 1) + num += double(*(--p)); + num *= NTL_ZZ_FRADIX; + if (sa > 2) + num += double(*(p - 1)); + + sn = SIZE(n); + p = DATA(n) + (sn-1); + den = double(*p) * NTL_ZZ_FRADIX; + if (sn > 1) + den += double(*(--p)); + den *= NTL_ZZ_FRADIX; + if (sn > 2) + den += double(*(p - 1)); + + hi = fhi1 * (num + double(1L)) / den; + lo = flo1 * num / (den + double(1L)); + if (diff > 0) + { + hi *= NTL_ZZ_FRADIX; + lo *= NTL_ZZ_FRADIX; + } + + try11 = 1; + try12 = 0; + try21 = 0; + try22 = 1; + parity = 1; + fast = 1; + while (fast > 0) + { + parity = 1 - parity; + if (hi >= NTL_NSP_BOUND) + fast = 0; + else + { + ilo = (long)lo; + dirt = hi - double(ilo); + if (dirt < 1.0/NTL_FDOUBLE_PRECISION || !ilo || ilo < (long)hi) + fast = 0; + else + { + dt = lo-double(ilo); + lo = flo / dirt; + if (dt > 1.0/NTL_FDOUBLE_PRECISION) + hi = fhi / dt; + else + hi = double(NTL_NSP_BOUND); + temp = try11; + try11 = try21; + if ((NTL_WSP_BOUND - temp) / ilo < try21) + fast = 0; + else + try21 = temp + ilo * try21; + temp = try12; + try12 = try22; + if ((NTL_WSP_BOUND - temp) / ilo < try22) + fast = 0; + else + try22 = temp + ilo * try22; + if ((fast > 0) && (parity > 0)) + { + gotthem = 1; + got11 = try11; + got12 = try12; + got21 = try21; + got22 = try22; + } + } + } + } + } + if (gotthem) + { + _ntl_gsmul(inv, got11, &x); + _ntl_gsmul(w, got12, &y); + _ntl_gsmul(inv, got21, &z); + _ntl_gsmul(w, got22, &w); + _ntl_gadd(x, y, &inv); + _ntl_gadd(z, w, &w); + _ntl_gsmul(a, got11, &x); + _ntl_gsmul(n, got12, &y); + _ntl_gsmul(a, got21, &z); + _ntl_gsmul(n, got22, &n); + _ntl_gsub(x, y, &a); + _ntl_gsub(n, z, &n); + } + else + { + _ntl_gdiv(a, n, &q, &a); + _ntl_gmul(q, w, &x); + _ntl_gadd(inv, x, &inv); + if (!ZEROP(a)) + { + _ntl_gdiv(n, a, &q, &n); + _ntl_gmul(q, inv, &x); + _ntl_gadd(w, x, &w); + } + else + { + break; + } + } + } + + _ntl_gcopy(a_bak, &a); + _ntl_gcopy(n_bak, &n); + _ntl_gcopy(w_bak, &w); + _ntl_gcopy(inv_bak, &inv); + + _ntl_gnegate(&w); + + while (1) + { + sa = SIZE(w); + if (sa < 0) SIZE(w) = -sa; + if (SIZE(w) >= sden && _ntl_gcompare(w, den_bound) > 0) return 0; + SIZE(w) = sa; + + if (SIZE(n) <= snum && _ntl_gcompare(n, num_bound) <= 0) break; + + fast = 0; + sa = SIZE(a); + sn = SIZE(n); + diff = sa - sn; + if (!diff || diff == 1) + { + sa = SIZE(a); + p = DATA(a) + (sa-1); + num = double(*p) * NTL_ZZ_FRADIX; + if (sa > 1) + num += double(*(--p)); + num *= NTL_ZZ_FRADIX; + if (sa > 2) + num += double(*(p - 1)); + + sn = SIZE(n); + p = DATA(n) + (sn-1); + den = double(*p) * NTL_ZZ_FRADIX; + if (sn > 1) + den += double(*(--p)); + den *= NTL_ZZ_FRADIX; + if (sn > 2) + den += double(*(p - 1)); + + hi = fhi1 * (num + double(1L)) / den; + lo = flo1 * num / (den + double(1L)); + if (diff > 0) + { + hi *= NTL_ZZ_FRADIX; + lo *= NTL_ZZ_FRADIX; + } + + if (hi < NTL_NSP_BOUND) + { + ilo = (long)lo; + if (ilo == (long)hi) + fast = 1; + } + } + + if (fast) + { + if (ilo != 0) { + if (ilo == 1) { + _ntl_gsub(inv, w, &inv); + _ntl_gsubpos(a, n, &a); + } + else { + _ntl_gsmul(w, ilo, &x); + _ntl_gsub(inv, x, &inv); + _ntl_gsmul(n, ilo, &x); + _ntl_gsubpos(a, x, &a); + } + } + } + else { + _ntl_gdiv(a, n, &q, &a); + _ntl_gmul(q, w, &x); + _ntl_gsub(inv, x, &inv); + } + + _ntl_gswap(&a, &n); + _ntl_gswap(&inv, &w); + } + + if (_ntl_gsign(w) < 0) { + _ntl_gnegate(&w); + _ntl_gnegate(&n); + } + + _ntl_gcopy(n, num_out); + _ntl_gcopy(w, den_out); + + return 1; +} + + +void +_ntl_gexp( + _ntl_gbigint a, + long e, + _ntl_gbigint *bb + ) +{ + long k; + long len_a; + GRegister(res); + + if (!e) + { + _ntl_gone(bb); + return; + } + + if (e < 0) + ArithmeticError("negative exponent in _ntl_gexp"); + + if (_ntl_giszero(a)) + { + _ntl_gzero(bb); + return; + } + + len_a = _ntl_g2log(a); + if (len_a > (NTL_MAX_LONG-(NTL_ZZ_NBITS-1))/e) + ResourceError("overflow in _ntl_gexp"); + + _ntl_gsetlength(&res, (len_a*e+NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); + + _ntl_gcopy(a, &res); + k = 1; + while ((k << 1) <= e) + k <<= 1; + while (k >>= 1) { + _ntl_gsq(res, &res); + if (e & k) + _ntl_gmul(a, res, &res); + } + + _ntl_gcopy(res, bb); +} + +void +_ntl_gexps( + long a, + long e, + _ntl_gbigint *bb + ) +{ + long k; + long len_a; + GRegister(res); + + if (!e) + { + _ntl_gone(bb); + return; + } + + if (e < 0) + ArithmeticError("negative exponent in _ntl_zexps"); + + if (!a) + { + _ntl_gzero(bb); + return; + } + + len_a = _ntl_g2logs(a); + if (len_a > (NTL_MAX_LONG-(NTL_ZZ_NBITS-1))/e) + ResourceError("overflow in _ntl_gexps"); + + _ntl_gsetlength(&res, (len_a*e+NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); + + _ntl_gintoz(a, &res); + k = 1; + while ((k << 1) <= e) + k <<= 1; + while (k >>= 1) { + _ntl_gsq(res, &res); + if (e & k) + _ntl_gsmul(res, a, &res); + } + + _ntl_gcopy(res, bb); +} + + +static +long OptWinSize(long n) +/* finds k that minimizes n/(k+1) + 2^{k-1} */ + +{ + long k; + double v, v_new; + + + v = n/2.0 + 1.0; + k = 1; + + for (;;) { + v_new = n/((double)(k+2)) + ((double)(1L << k)); + if (v_new >= v) break; + v = v_new; + k++; + } + + return k; +} + + + +/* DIRT: will not work with non-empty "nails" */ + +static +mp_limb_t neg_inv_mod_limb(mp_limb_t m0) +{ + mp_limb_t x; + long k; + + x = 1; + k = 1; + while (k < NTL_ZZ_NBITS) { + x += x * (1 - x * m0); + k <<= 1; + } + + + return - x; +} + + +/* Montgomery reduction: + * This computes res = T/b^m mod N, where b = 2^{NTL_ZZ_NBITS}. + * It is assumed that N has n limbs, and that T has at most n + m limbs. + * Also, inv should be set to -N^{-1} mod b. + * Finally, it is assumed that T has space allocated for n + m limbs, + * and that res has space allocated for n limbs. + * Note: res should not overlap any inputs, and T is destroyed. + * Note: res will have at most n limbs, but may not be fully reduced + * mod N. In general, we will have res < T/b^m + N. + */ + +/* DIRT: this routine may not work with non-empty "nails" */ + +static +void redc(_ntl_gbigint T, _ntl_gbigint N, long m, mp_limb_t inv, + _ntl_gbigint res) +{ + long n, sT, i; + mp_limb_t *Ndata, *Tdata, *resdata, q, d, t, c; + + n = SIZE(N); + Ndata = DATA(N); + sT = SIZE(T); + Tdata = DATA(T); + resdata = DATA(res); + + for (i = sT; i < m+n; i++) + Tdata[i] = 0; + + c = 0; + for (i = 0; i < m; i++) { + q = Tdata[i]*inv; + d = mpn_addmul_1(Tdata+i, Ndata, n, q); + + // (c, Tdata[i+n]) = c + d + Tdata[i+n] + t = Tdata[i+n] + d; + Tdata[i+n] = t + c; + if (t < d || (c == 1 && t + c == 0)) + c = 1; + else + c = 0; + } + + if (c) { + mpn_sub_n(resdata, Tdata + m, Ndata, n); + } + else { + for (i = 0; i < n; i++) + resdata[i] = Tdata[m + i]; + } + + i = n; + STRIP(i, resdata); + + SIZE(res) = i; + SIZE(T) = 0; +} + + +// This montgomery code is for external consumption... +// This is currently used in the CRT reconstruction step +// for ZZ_pX arithmetic. It gives a nontrivial speedup +// for smallish p (up to a few hundred bits) + +class _ntl_reduce_struct_montgomery : public _ntl_reduce_struct { +public: + long m; + mp_limb_t inv; + _ntl_gbigint_wrapped N; + + void eval(_ntl_gbigint *rres, _ntl_gbigint *TT); + void adjust(_ntl_gbigint *x); +}; + + + +// DIRT: may not work with non-empty "nails" + +void _ntl_reduce_struct_montgomery::eval(_ntl_gbigint *rres, _ntl_gbigint *TT) +{ + long n, sT, i; + mp_limb_t *Ndata, *Tdata, *resdata, q, d, t, c; + _ntl_gbigint res, T; + + + T = *TT; + + // quick zero test, in case of sparse polynomials + if (ZEROP(T)) { + _ntl_gzero(rres); + return; + } + + n = SIZE(N); + Ndata = DATA(N); + + if (MustAlloc(T, m+n)) { + _ntl_gsetlength(&T, m+n); + *TT = T; + } + + res = *rres; + if (MustAlloc(res, n)) { + _ntl_gsetlength(&res, n); + *rres = res; + } + + sT = SIZE(T); + Tdata = DATA(T); + resdata = DATA(res); + + for (i = sT; i < m+n; i++) + Tdata[i] = 0; + + c = 0; + for (i = 0; i < m; i++) { + q = Tdata[i]*inv; + d = mpn_addmul_1(Tdata+i, Ndata, n, q); + + // (c, Tdata[i+n]) = c + d + Tdata[i+n] + t = Tdata[i+n] + d; + Tdata[i+n] = t + c; + if (t < d || (c == 1 && t + c == 0)) + c = 1; + else + c = 0; + } + + if (c || mpn_cmp(Tdata + m, Ndata, n) >= 0) { + mpn_sub_n(resdata, Tdata + m, Ndata, n); + } + else { + for (i = 0; i < n; i++) + resdata[i] = Tdata[m + i]; + } + + i = n; + STRIP(i, resdata); + + SIZE(res) = i; + SIZE(T) = 0; +} + +// this will adjust the given number by multiplying by the +// montgomery scaling factor + +void _ntl_reduce_struct_montgomery::adjust(_ntl_gbigint *x) +{ + GRegister(tmp); + _ntl_glshift(*x, m*NTL_ZZ_NBITS, &tmp); + _ntl_gmod(tmp, N, x); +} + + + + +class _ntl_reduce_struct_plain : public _ntl_reduce_struct { +public: + _ntl_gbigint_wrapped N; + + void eval(_ntl_gbigint *rres, _ntl_gbigint *TT) + { + _ntl_gmod(*TT, N, rres); + } + + void adjust(_ntl_gbigint *x) { } +}; + +// assumption: all values passed to eval for montgomery reduction +// are in [0, modulus*excess] + +_ntl_reduce_struct * +_ntl_reduce_struct_build(_ntl_gbigint modulus, _ntl_gbigint excess) +{ + if (_ntl_godd(modulus)) { + UniquePtr<_ntl_reduce_struct_montgomery> C; + C.make(); + + C->m = _ntl_gsize(excess); + C->inv = neg_inv_mod_limb(DATA(modulus)[0]); + _ntl_gcopy(modulus, &C->N); + + return C.release(); + } + else { + UniquePtr<_ntl_reduce_struct_plain> C; + C.make(); + + _ntl_gcopy(modulus, &C->N); + + return C.release(); + } +} + + + +#define REDC_CROSS (32) + +void _ntl_gpowermod(_ntl_gbigint g, _ntl_gbigint e, _ntl_gbigint F, + _ntl_gbigint *h) + +/* h = g^e mod f using "sliding window" algorithm + + remark: the notation (h, g, e, F) is strange, because I + copied the code from BB.c. +*/ + +{ + _ntl_gbigint_wrapped res, gg, t; + Vec<_ntl_gbigint_wrapped> v; + + long n, i, k, val, cnt, m; + long use_redc, sF; + mp_limb_t inv; + + if (_ntl_gsign(g) < 0 || _ntl_gcompare(g, F) >= 0 || + _ntl_gscompare(F, 1) <= 0) + LogicError("PowerMod: bad args"); + + if (_ntl_gscompare(e, 0) == 0) { + _ntl_gone(h); + return; + } + + if (_ntl_gscompare(e, 1) == 0) { + _ntl_gcopy(g, h); + return; + } + + if (_ntl_gscompare(e, -1) == 0) { + _ntl_ginvmod(g, F, h); + return; + } + + if (_ntl_gscompare(e, 2) == 0) { + _ntl_gsqmod(g, F, h); + return; + } + + if (_ntl_gscompare(e, -2) == 0) { + res = 0; + _ntl_gsqmod(g, F, &res); + _ntl_ginvmod(res, F, h); + return; + } + + n = _ntl_g2log(e); + + sF = SIZE(F); + + res = 0; + _ntl_gsetlength(&res, sF*2); + + t = 0; + _ntl_gsetlength(&t, sF*2); + + use_redc = (DATA(F)[0] & 1) && sF < REDC_CROSS; + + gg = 0; + + if (use_redc) { + _ntl_glshift(g, sF*NTL_ZZ_NBITS, &res); + _ntl_gmod(res, F, &gg); + + inv = neg_inv_mod_limb(DATA(F)[0]); + } + else + _ntl_gcopy(g, &gg); + + + if (_ntl_gscompare(g, 2) == 0) { + /* plain square-and-multiply algorithm, optimized for g == 2 */ + + _ntl_gbigint_wrapped F1; + + if (use_redc) { + long shamt; + + COUNT_BITS(shamt, DATA(F)[sF-1]); + shamt = NTL_ZZ_NBITS - shamt; + _ntl_glshift(F, shamt, &F1); + } + + _ntl_gcopy(gg, &res); + + for (i = n - 2; i >= 0; i--) { + _ntl_gsq(res, &t); + if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); + + if (_ntl_gbit(e, i)) { + _ntl_gadd(res, res, &res); + + if (use_redc) { + while (SIZE(res) > sF) { + _ntl_gsubpos(res, F1, &res); + } + } + else { + if (_ntl_gcompare(res, F) >= 0) + _ntl_gsubpos(res, F, &res); + } + } + } + + + if (use_redc) { + _ntl_gcopy(res, &t); + redc(t, F, sF, inv, res); + if (_ntl_gcompare(res, F) >= 0) { + _ntl_gsub(res, F, &res); + } + } + + if (_ntl_gsign(e) < 0) _ntl_ginvmod(res, F, &res); + + _ntl_gcopy(res, h); + return; + } + + + if (n < 16) { + /* plain square-and-multiply algorithm */ + + _ntl_gcopy(gg, &res); + + for (i = n - 2; i >= 0; i--) { + _ntl_gsq(res, &t); + if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); + + if (_ntl_gbit(e, i)) { + _ntl_gmul(res, gg, &t); + if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); + } + } + + + if (use_redc) { + _ntl_gcopy(res, &t); + redc(t, F, sF, inv, res); + if (_ntl_gcompare(res, F) >= 0) { + _ntl_gsub(res, F, &res); + } + } + + if (_ntl_gsign(e) < 0) _ntl_ginvmod(res, F, &res); + + _ntl_gcopy(res, h); + return; + } + + k = OptWinSize(n); + + if (k > 5) k = 5; + + v.SetLength(1L << (k-1)); + for (i = 0; i < (1L << (k-1)); i++) { + v[i] = 0; + _ntl_gsetlength(&v[i], sF); + } + + _ntl_gcopy(gg, &v[0]); + + if (k > 1) { + _ntl_gsq(gg, &t); + if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); + + for (i = 1; i < (1L << (k-1)); i++) { + _ntl_gmul(v[i-1], res, &t); + if (use_redc) redc(t, F, sF, inv, v[i]); else _ntl_gmod(t, F, &v[i]); + } + } + + _ntl_gcopy(gg, &res); + + val = 0; + for (i = n-2; i >= 0; i--) { + val = (val << 1) | _ntl_gbit(e, i); + if (val == 0) { + _ntl_gsq(res, &t); + if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); + } + else if (val >= (1L << (k-1)) || i == 0) { + cnt = 0; + while ((val & 1) == 0) { + val = val >> 1; + cnt++; + } + + m = val; + while (m > 0) { + _ntl_gsq(res, &t); + if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); + m = m >> 1; + } + + _ntl_gmul(res, v[val >> 1], &t); + if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); + + while (cnt > 0) { + _ntl_gsq(res, &t); + if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); + cnt--; + } + + val = 0; + } + } + + if (use_redc) { + _ntl_gcopy(res, &t); + redc(t, F, sF, inv, res); + if (_ntl_gcompare(res, F) >= 0) { + _ntl_gsub(res, F, &res); + } + } + + if (_ntl_gsign(e) < 0) _ntl_ginvmod(res, F, &res); + + _ntl_gcopy(res, h); +} + +long _ntl_gsize(_ntl_gbigint rep) +{ + if (!rep) + return 0; + else if (SIZE(rep) < 0) + return -SIZE(rep); + else + return SIZE(rep); +} + +long _ntl_gisone(_ntl_gbigint rep) +{ + return rep != 0 && SIZE(rep) == 1 && DATA(rep)[0] == 1; +} + +long _ntl_gsptest(_ntl_gbigint rep) +{ + return !rep || SIZE(rep) == 0 || + ((SIZE(rep) == 1 || SIZE(rep) == -1) && + DATA(rep)[0] < ((mp_limb_t) NTL_SP_BOUND)); +} + +long _ntl_gwsptest(_ntl_gbigint rep) +{ + return !rep || SIZE(rep) == 0 || + ((SIZE(rep) == 1 || SIZE(rep) == -1) && + DATA(rep)[0] < ((mp_limb_t) NTL_WSP_BOUND)); +} + + + +long _ntl_gcrtinrange(_ntl_gbigint g, _ntl_gbigint a) +{ + long sa, sg, i; + mp_limb_t carry, u, v; + mp_limb_t *adata, *gdata; + + if (!a || SIZE(a) <= 0) return 0; + + sa = SIZE(a); + + if (!g) return 1; + + sg = SIZE(g); + + if (sg == 0) return 1; + + if (sg < 0) sg = -sg; + + if (sa-sg > 1) return 1; + + if (sa-sg < 0) return 0; + + adata = DATA(a); + gdata = DATA(g); + + carry=0; + + if (sa-sg == 1) { + if (adata[sa-1] > ((mp_limb_t) 1)) return 1; + carry = 1; + } + + i = sg-1; + u = 0; + v = 0; + while (i >= 0 && u == v) { + u = (carry << (NTL_ZZ_NBITS-1)) + (adata[i] >> 1); + v = gdata[i]; + carry = (adata[i] & 1); + i--; + } + + if (u == v) { + if (carry) return 1; + return (SIZE(g) > 0); + } + else + return (u > v); +} + + + +/* DIRT: this routine will not work with non-empty "nails" */ + +void _ntl_gfrombytes(_ntl_gbigint *x, const unsigned char *p, long n) +{ + long lw, r, i, j; + mp_limb_t *xp, t; + + while (n > 0 && p[n-1] == 0) n--; + + if (n <= 0) { + _ntl_gzero(x); + return; + } + + const long BytesPerLimb = NTL_ZZ_NBITS/8; + + + lw = n/BytesPerLimb; + r = n - lw*BytesPerLimb; + + if (r != 0) + lw++; + else + r = BytesPerLimb; + + _ntl_gsetlength(x, lw); + xp = DATA(*x); + + for (i = 0; i < lw-1; i++) { + t = 0; + for (j = 0; j < BytesPerLimb; j++) { + t >>= 8; + t += (((mp_limb_t)(*p)) & ((mp_limb_t) 255)) << ((BytesPerLimb-1)*8); + p++; + } + xp[i] = t; + } + + t = 0; + for (j = 0; j < r; j++) { + t >>= 8; + t += (((mp_limb_t)(*p)) & ((mp_limb_t) 255)) << ((BytesPerLimb-1)*8); + p++; + } + + t >>= (BytesPerLimb-r)*8; + xp[lw-1] = t; + + // strip not necessary here + // STRIP(lw, xp); + SIZE(*x) = lw; +} + + + +/* DIRT: this routine will not work with non-empty "nails" */ + +void _ntl_gbytesfromz(unsigned char *p, _ntl_gbigint a, long n) +{ + long lbits, lbytes, min_bytes, min_words, r; + long i, j; + mp_limb_t *ap, t; + + if (n < 0) n = 0; + + const long BytesPerLimb = NTL_ZZ_NBITS/8; + + lbits = _ntl_g2log(a); + lbytes = (lbits+7)/8; + + min_bytes = (lbytes < n) ? lbytes : n; + + min_words = min_bytes/BytesPerLimb; + + r = min_bytes - min_words*BytesPerLimb; + if (r != 0) + min_words++; + else + r = BytesPerLimb; + + if (a) + ap = DATA(a); + else + ap = 0; + + + for (i = 0; i < min_words-1; i++) { + t = ap[i]; + for (j = 0; j < BytesPerLimb; j++) { + *p = t & ((mp_limb_t) 255); + t >>= 8; + p++; + } + } + + if (min_words > 0) { + t = ap[min_words-1]; + for (j = 0; j < r; j++) { + *p = t & ((mp_limb_t) 255); + t >>= 8; + p++; + } + } + + for (j = min_bytes; j < n; j++) { + *p = 0; + p++; + } +} + + + + +long _ntl_gblock_construct_alloc(_ntl_gbigint *x, long d, long n) +{ + long d1, sz, AllocAmt, m, j, alloc; + char *p; + _ntl_gbigint t; + + + /* check n value */ + + if (n <= 0) + LogicError("block construct: n must be positive"); + + + + /* check d value */ + + if (d <= 0) + LogicError("block construct: d must be positive"); + + if (NTL_OVERFLOW(d, NTL_ZZ_NBITS, NTL_ZZ_NBITS)) + ResourceError("block construct: d too large"); + + d1 = d + 1; + +#ifdef NTL_SMALL_MP_SIZE_T + /* this makes sure that numbers don't get too big for GMP */ + if (d1 >= (1L << (NTL_BITS_PER_INT-4))) + ResourceError("size too big for GMP"); +#endif + + + if (STORAGE_OVF(d1)) + ResourceError("block construct: d too large"); + + + + sz = STORAGE(d1); + + AllocAmt = NTL_MAX_ALLOC_BLOCK/sz; + if (AllocAmt == 0) AllocAmt = 1; + + if (AllocAmt < n) + m = AllocAmt; + else + m = n; + + p = (char *) NTL_MALLOC(m, sz, 0); + if (!p) MemoryError(); + + *x = (_ntl_gbigint) p; + + for (j = 0; j < m; j++) { + t = (_ntl_gbigint) p; + alloc = (d1 << 2) | 1; + if (j < m-1) alloc |= 2; + ALLOC(t) = alloc; + SIZE(t) = 0; + p += sz; + } + + return m; +} + + +void _ntl_gblock_construct_set(_ntl_gbigint x, _ntl_gbigint *y, long i) +{ + long d1, sz; + + + d1 = ALLOC(x) >> 2; + sz = STORAGE(d1); + + *y = (_ntl_gbigint) (((char *) x) + i*sz); +} + + +long _ntl_gblock_destroy(_ntl_gbigint x) +{ + long d1, sz, alloc, m; + char *p; + _ntl_gbigint t; + + + d1 = ALLOC(x) >> 2; + sz = STORAGE(d1); + + p = (char *) x; + + m = 1; + + for (;;) { + t = (_ntl_gbigint) p; + alloc = ALLOC(t); + + // NOTE: this must not throw + if ((alloc & 1) == 0) + TerminalError("corrupted memory detected in _ntl_gblock_destroy"); + + if ((alloc & 2) == 0) break; + m++; + p += sz; + } + + free(x); + return m; +} + + +long _ntl_gblock_storage(long d) +{ + long d1, sz; + + d1 = d + 1; + sz = STORAGE(d1) + sizeof(_ntl_gbigint); + + return sz; +} + + + +static +long SpecialPower(long e, long p) +{ + long a; + long x, y; + + a = (long) ((((mp_limb_t) 1) << (NTL_ZZ_NBITS-2)) % ((mp_limb_t) p)); + a = MulMod(a, 2, p); + a = MulMod(a, 2, p); + + x = 1; + y = a; + while (e) { + if (e & 1) x = MulMod(x, y, p); + y = MulMod(y, y, p); + e = e >> 1; + } + + return x; +} + + +static +void sp_ext_eucl(long *dd, long *ss, long *tt, long a, long b) +{ + long u, v, u0, v0, u1, v1, u2, v2, q, r; + + long aneg = 0, bneg = 0; + + if (a < 0) { + if (a < -NTL_MAX_LONG) ResourceError("integer overflow"); + a = -a; + aneg = 1; + } + + if (b < 0) { + if (b < -NTL_MAX_LONG) ResourceError("integer overflow"); + b = -b; + bneg = 1; + } + + u1=1; v1=0; + u2=0; v2=1; + u = a; v = b; + + while (v != 0) { + q = u / v; + r = u % v; + u = v; + v = r; + u0 = u2; + v0 = v2; + u2 = u1 - q*u2; + v2 = v1- q*v2; + u1 = u0; + v1 = v0; + } + + if (aneg) + u1 = -u1; + + if (bneg) + v1 = -v1; + + *dd = u; + *ss = u1; + *tt = v1; +} + +static +long sp_inv_mod(long a, long n) +{ + long d, s, t; + + sp_ext_eucl(&d, &s, &t, a, n); + if (d != 1) ArithmeticError("inverse undefined"); + if (s < 0) + return s + n; + else + return s; +} + + + + +class _ntl_tmp_vec_crt_fast : public _ntl_tmp_vec { +public: + UniqueArray<_ntl_gbigint_wrapped> rem_vec; + UniqueArray<_ntl_gbigint_wrapped> temps; + UniqueArray val_vec; + +}; + + +class _ntl_crt_struct_basic : public _ntl_crt_struct { +public: + UniqueArray<_ntl_gbigint_wrapped> v; + long sbuf; + long n; + + bool special(); + void insert(long i, _ntl_gbigint m); + _ntl_tmp_vec *extract(); + _ntl_tmp_vec *fetch(); + void eval(_ntl_gbigint *x, const long *b, _ntl_tmp_vec *tmp_vec); +}; + + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_CRT)) + +class _ntl_crt_struct_tbl : public _ntl_crt_struct { +public: + Unique2DArray v; + long n; + long sz; + + bool special(); + void insert(long i, _ntl_gbigint m); + _ntl_tmp_vec *extract(); + _ntl_tmp_vec *fetch(); + void eval(_ntl_gbigint *x, const long *b, _ntl_tmp_vec *tmp_vec); + +}; + +#endif + + + + +class _ntl_crt_struct_fast : public _ntl_crt_struct { +public: + long n; + long levels; + UniqueArray primes; + UniqueArray inv_vec; + UniqueArray index_vec; + UniqueArray<_ntl_gbigint_wrapped> prod_vec; + UniqueArray<_ntl_gbigint_wrapped> coeff_vec; + _ntl_gbigint_wrapped modulus; + UniquePtr<_ntl_tmp_vec_crt_fast> stored_tmp_vec; + + bool special(); + void insert(long i, _ntl_gbigint m); + _ntl_tmp_vec *extract(); + _ntl_tmp_vec *fetch(); + void eval(_ntl_gbigint *x, const long *b, _ntl_tmp_vec *tmp_vec); +}; + + + + +#define GCRT_TMPS (2) + + +_ntl_crt_struct * +_ntl_crt_struct_build(long n, _ntl_gbigint p, long (*primes)(long)) +{ + if (n >= 600) { + UniqueArray q; + UniqueArray inv_vec; + UniqueArray index_vec; + UniqueArray<_ntl_gbigint_wrapped> prod_vec, rem_vec, coeff_vec; + UniqueArray<_ntl_gbigint_wrapped> temps; + + long i, j; + long levels, vec_len; + + levels = 0; + while ((n >> levels) >= 16) levels++; + vec_len = (1L << levels) - 1; + + temps.SetLength(GCRT_TMPS); + rem_vec.SetLength(vec_len); + + q.SetLength(n); + for (i = 0; i < n; i++) + q[i] = primes(i); + + inv_vec.SetLength(n); + + + index_vec.SetLength(vec_len+1); + prod_vec.SetLength(vec_len); + coeff_vec.SetLength(n); + + index_vec[0] = 0; + index_vec[1] = n; + + for (i = 0; i <= levels-2; i++) { + long start = (1L << i) - 1; + long finish = (1L << (i+1)) - 2; + for (j = finish; j >= start; j--) { + index_vec[2*j+2] = index_vec[j] + (index_vec[j+1] - index_vec[j])/2; + index_vec[2*j+1] = index_vec[j]; + } + index_vec[2*finish+3] = n; + } + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + /* multiply primes index_vec[i]..index_vec[i+1]-1 into + * prod_vec[i] + */ + + _ntl_gone(&prod_vec[i]); + for (j = index_vec[i]; j < index_vec[i+1]; j++) + _ntl_gsmul(prod_vec[i], q[j], &prod_vec[i]); + } + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + for (j = index_vec[i]; j < index_vec[i+1]; j++) + _ntl_gsdiv(prod_vec[i], q[j], &coeff_vec[j]); + } + + for (i = (1L << (levels-1)) - 2; i >= 0; i--) + _ntl_gmul(prod_vec[2*i+1], prod_vec[2*i+2], &prod_vec[i]); + + /*** new asymptotically fast code to compute inv_vec ***/ + + _ntl_gone(&rem_vec[0]); + for (i = 0; i < (1L << (levels-1)) - 1; i++) { + _ntl_gmod(rem_vec[i], prod_vec[2*i+1], &temps[0]); + _ntl_gmul(temps[0], prod_vec[2*i+2], &temps[1]); + _ntl_gmod(temps[1], prod_vec[2*i+1], &rem_vec[2*i+1]); + + _ntl_gmod(rem_vec[i], prod_vec[2*i+2], &temps[0]); + _ntl_gmul(temps[0], prod_vec[2*i+1], &temps[1]); + _ntl_gmod(temps[1], prod_vec[2*i+2], &rem_vec[2*i+2]); + } + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + for (j = index_vec[i]; j < index_vec[i+1]; j++) { + long tt, tt1, tt2; + _ntl_gsdiv(prod_vec[i], q[j], &temps[0]); + tt = _ntl_gsmod(temps[0], q[j]); + tt1 = _ntl_gsmod(rem_vec[i], q[j]); + tt2 = MulMod(tt, tt1, q[j]); + inv_vec[j] = sp_inv_mod(tt2, q[j]); + } + } + + + UniquePtr<_ntl_crt_struct_fast> C; + C.make(); + + C->n = n; + C->primes.move(q); + C->inv_vec.move(inv_vec); + C->levels = levels; + C->index_vec.move(index_vec); + C->prod_vec.move(prod_vec); + C->coeff_vec.move(coeff_vec); + + _ntl_gcopy(p, &C->modulus); + + C->stored_tmp_vec.make(); + C->stored_tmp_vec->rem_vec.move(rem_vec); + C->stored_tmp_vec->temps.move(temps); + C->stored_tmp_vec->val_vec.SetLength(n); + + return C.release(); + } + + +#if (defined(NTL_VIABLE_LL)) + +// alternative CRT code is viable + +#if (defined(NTL_CRT_ALTCODE)) +// unconditionally use the alternative code, +// as the tuning wizard says its preferable for larger moduli + + { + UniquePtr<_ntl_crt_struct_tbl> C; + C.make(); + C->n = n; + C->sz = SIZE(p); + C->v.SetDims(C->sz, C->n); + + return C.release(); + } +#elif (defined(NTL_CRT_ALTCODE_SMALL)) +// use the alternative code on "smaller" moduli... +// For now, this triggers when n <= 16. +// Unless the "long long" compiler support is really bad, +// this should be a marginal win, as it avoids some +// procedure call overhead. + + if (n <= 16) { + UniquePtr<_ntl_crt_struct_tbl> C; + C.make(); + C->n = n; + C->sz = SIZE(p); + C->v.SetDims(C->sz, C->n); + + return C.release(); + } + else { + UniquePtr<_ntl_crt_struct_basic> C; + C.make(); + + long i; + + C->n = n; + C->v.SetLength(n); + C->sbuf = SIZE(p)+2; + + return C.release(); + } +#else + { + UniquePtr<_ntl_crt_struct_basic> C; + C.make(); + + long i; + + C->n = n; + C->v.SetLength(n); + C->sbuf = SIZE(p)+2; + + return C.release(); + } +#endif + +#else + { + UniquePtr<_ntl_crt_struct_basic> C; + C.make(); + + long i; + + C->n = n; + C->v.SetLength(n); + C->sbuf = SIZE(p)+2; + + return C.release(); + } +#endif + +} + +/* extracts existing tmp_vec, if possible -- read/write operation */ + +_ntl_tmp_vec *_ntl_crt_struct_basic::extract() +{ + return 0; +} + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_CRT)) +_ntl_tmp_vec *_ntl_crt_struct_tbl::extract() +{ + return 0; +} +#endif + +_ntl_tmp_vec *_ntl_crt_struct_fast::extract() +{ + if (stored_tmp_vec) + return stored_tmp_vec.release(); + else + return fetch(); +} + + +/* read only operation */ + +_ntl_tmp_vec *_ntl_crt_struct_basic::fetch() +{ + return 0; +} + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_CRT)) +_ntl_tmp_vec *_ntl_crt_struct_tbl::fetch() +{ + return 0; +} +#endif + +_ntl_tmp_vec *_ntl_crt_struct_fast::fetch() +{ + long vec_len = (1L << levels) - 1; + + UniquePtr<_ntl_tmp_vec_crt_fast> res; + res.make(); + res->temps.SetLength(GCRT_TMPS); + res->rem_vec.SetLength(vec_len); + res->val_vec.SetLength(n); + + return res.release(); +} + + +void _ntl_crt_struct_basic::insert(long i, _ntl_gbigint m) +{ + _ntl_gcopy(m, &v[i]); +} + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_CRT)) +void _ntl_crt_struct_tbl::insert(long i, _ntl_gbigint m) +{ + if (i < 0 || i >= n) LogicError("insert: bad args"); + + if (!m) + for (long j = 0; j < sz; j++) v[j][i] = 0; + else { + long sm = SIZE(m); + if (sm < 0 || sm > sz) LogicError("insert: bad args"); + const mp_limb_t *mdata = DATA(m); + for (long j = 0; j < sm; j++) + v[j][i] = mdata[j]; + for (long j = sm; j < sz; j++) + v[j][i] = 0; + } +} +#endif + +void _ntl_crt_struct_fast::insert(long i, _ntl_gbigint m) +{ + LogicError("insert called improperly"); +} + + +static +void gadd_mul_many(_ntl_gbigint *res, _ntl_gbigint *a, long *b, + long n, long sz) + +{ + mp_limb_t *xx, *yy; + long i, sx; + long sy; + mp_limb_t carry; + + sx = sz + 2; + if (MustAlloc(*res, sx)) + _ntl_gsetlength(res, sx); + + xx = DATA(*res); + + for (i = 0; i < sx; i++) + xx[i] = 0; + + for (i = 0; i < n; i++) { + if (!a[i]) continue; + + yy = DATA(a[i]); + sy = SIZE(a[i]); + + if (!sy || !b[i]) continue; + + carry = mpn_addmul_1(xx, yy, sy, b[i]); + yy = xx + sy; + *yy += carry; + + if (*yy < carry) { /* unsigned comparison! */ + do { + yy++; + *yy += 1; + } while (*yy == 0); + } + } + + while (sx > 0 && xx[sx-1] == 0) sx--; + SIZE(*res) = sx; +} + +void _ntl_crt_struct_basic::eval(_ntl_gbigint *x, const long *b, _ntl_tmp_vec *generic_tmp_vec) +{ + mp_limb_t *xx, *yy; + _ntl_gbigint *a; + _ntl_gbigint x1; + long i, sx; + long sy; + mp_limb_t carry; + + sx = sbuf; + _ntl_gsetlength(x, sx); + x1 = *x; + xx = DATA(x1); + + for (i = 0; i < sx; i++) + xx[i] = 0; + + for (i = 0; i < n; i++) { + if (!v[i]) continue; + + yy = DATA(v[i]); + sy = SIZE(v[i]); + + if (!sy || !b[i]) continue; + + carry = mpn_addmul_1(xx, yy, sy, b[i]); + yy = xx + sy; + *yy += carry; + + if (*yy < carry) { /* unsigned comparison! */ + do { + yy++; + *yy += 1; + } while (*yy == 0); + } + } + + while (sx > 0 && xx[sx-1] == 0) sx--; + SIZE(x1) = sx; +} + + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_CRT)) + +#define CRT_ALTCODE_UNROLL (1) + +void _ntl_crt_struct_tbl::eval(_ntl_gbigint *x, const long *b, _ntl_tmp_vec *generic_tmp_vec) +{ + long sx; + _ntl_gbigint x1; + long i, j; + + // quick test for zero vector + // most likely, they are either all zero (if we are working + // with some sparse polynomials) or none of them are zero, + // so in the general case, this should go fast + if (!b[0]) { + i = 1; + while (i < n && !b[i]) i++; + if (i >= n) { + _ntl_gzero(x); + return; + } + } + + sx = sz + 2; + _ntl_gsetlength(x, sx); + x1 = *x; + mp_limb_t * NTL_RESTRICT xx = DATA(x1); + + + const long Bnd = 1L << (NTL_BITS_PER_LONG-NTL_SP_NBITS); + + if (n <= Bnd) { + mp_limb_t carry=0; + + for (i = 0; i < sz; i++) { + const mp_limb_t *row = v[i]; + + ll_type acc; + ll_mul(acc, row[0], b[0]); + +#if (CRT_ALTCODE_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 4) + switch (n) { + case 16: ll_mul_add(acc, row[16-1], b[16-1]); + case 15: ll_mul_add(acc, row[15-1], b[15-1]); + case 14: ll_mul_add(acc, row[14-1], b[14-1]); + case 13: ll_mul_add(acc, row[13-1], b[13-1]); + case 12: ll_mul_add(acc, row[12-1], b[12-1]); + case 11: ll_mul_add(acc, row[11-1], b[11-1]); + case 10: ll_mul_add(acc, row[10-1], b[10-1]); + case 9: ll_mul_add(acc, row[9-1], b[9-1]); + case 8: ll_mul_add(acc, row[8-1], b[8-1]); + case 7: ll_mul_add(acc, row[7-1], b[7-1]); + case 6: ll_mul_add(acc, row[6-1], b[6-1]); + case 5: ll_mul_add(acc, row[5-1], b[5-1]); + case 4: ll_mul_add(acc, row[4-1], b[4-1]); + case 3: ll_mul_add(acc, row[3-1], b[3-1]); + case 2: ll_mul_add(acc, row[2-1], b[2-1]); + } +#else + for (j = 1; j < n; j++) + ll_mul_add(acc, row[j], b[j]); +#endif + + ll_add(acc, carry); + xx[i] = ll_get_lo(acc); + carry = ll_get_hi(acc); + } + + xx[sz] = carry; + xx[sz+1] = 0; + } + else { + ll_type carry; + ll_init(carry, 0); + + for (i = 0; i < sz; i++) { + const mp_limb_t *row = v[i]; + + ll_type acc21; + mp_limb_t acc0; + + { + ll_type sum; + ll_mul(sum, row[0], b[0]); + +#if (CRT_ALTCODE_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 4) + ll_mul_add(sum, row[1], b[1]); + ll_mul_add(sum, row[2], b[2]); + ll_mul_add(sum, row[3], b[3]); + ll_mul_add(sum, row[4], b[4]); + ll_mul_add(sum, row[5], b[5]); + ll_mul_add(sum, row[6], b[6]); + ll_mul_add(sum, row[7], b[7]); + ll_mul_add(sum, row[8], b[8]); + ll_mul_add(sum, row[9], b[9]); + ll_mul_add(sum, row[10], b[10]); + ll_mul_add(sum, row[11], b[11]); + ll_mul_add(sum, row[12], b[12]); + ll_mul_add(sum, row[13], b[13]); + ll_mul_add(sum, row[14], b[14]); + ll_mul_add(sum, row[15], b[15]); +#elif (CRT_ALTCODE_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 2) + ll_mul_add(sum, row[1], b[1]); + ll_mul_add(sum, row[2], b[2]); + ll_mul_add(sum, row[3], b[3]); +#else + for (j = 1; j < Bnd; j++) + ll_mul_add(sum, row[j], b[j]); +#endif + + + ll_init(acc21, ll_get_hi(sum)); + acc0 = ll_get_lo(sum); + } + + const mp_limb_t *ap = row; + const long *tp = b; + +#if (CRT_ALTCODE_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 2) + long m = n - 4; + ap += 4; + tp += 4; + + for (; m >= 8; m -= 8, ap += 8, tp += 8) { + { + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + ll_mul_add(sum, ap[1], tp[1]); + ll_mul_add(sum, ap[2], tp[2]); + ll_mul_add(sum, ap[3], tp[3]); + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + { + ll_type sum; + ll_mul(sum, ap[4+0], tp[4+0]); + ll_mul_add(sum, ap[4+1], tp[4+1]); + ll_mul_add(sum, ap[4+2], tp[4+2]); + ll_mul_add(sum, ap[4+3], tp[4+3]); + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + } + + for (; m >= 4; m -= 4, ap += 4, tp += 4) { + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + ll_mul_add(sum, ap[1], tp[1]); + ll_mul_add(sum, ap[2], tp[2]); + ll_mul_add(sum, ap[3], tp[3]); + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + +#else + long m; + for (m = n-Bnd, ap += Bnd, tp += Bnd; m >= Bnd; m -= Bnd, ap += Bnd, tp += Bnd) { + + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + +#if (CRT_ALTCODE_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 4) + ll_mul_add(sum, ap[1], tp[1]); + ll_mul_add(sum, ap[2], tp[2]); + ll_mul_add(sum, ap[3], tp[3]); + ll_mul_add(sum, ap[4], tp[4]); + ll_mul_add(sum, ap[5], tp[5]); + ll_mul_add(sum, ap[6], tp[6]); + ll_mul_add(sum, ap[7], tp[7]); + ll_mul_add(sum, ap[8], tp[8]); + ll_mul_add(sum, ap[9], tp[9]); + ll_mul_add(sum, ap[10], tp[10]); + ll_mul_add(sum, ap[11], tp[11]); + ll_mul_add(sum, ap[12], tp[12]); + ll_mul_add(sum, ap[13], tp[13]); + ll_mul_add(sum, ap[14], tp[14]); + ll_mul_add(sum, ap[15], tp[15]); +#else + for (long j = 1; j < Bnd; j++) + ll_mul_add(sum, ap[j], tp[j]); +#endif + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } +#endif + + if (m > 0) { + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + +#if (CRT_ALTCODE_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 4) + switch (m) { + case 15: ll_mul_add(sum, ap[15-1], tp[15-1]); + case 14: ll_mul_add(sum, ap[14-1], tp[14-1]); + case 13: ll_mul_add(sum, ap[13-1], tp[13-1]); + case 12: ll_mul_add(sum, ap[12-1], tp[12-1]); + case 11: ll_mul_add(sum, ap[11-1], tp[11-1]); + case 10: ll_mul_add(sum, ap[10-1], tp[10-1]); + case 9: ll_mul_add(sum, ap[9-1], tp[9-1]); + case 8: ll_mul_add(sum, ap[8-1], tp[8-1]); + case 7: ll_mul_add(sum, ap[7-1], tp[7-1]); + case 6: ll_mul_add(sum, ap[6-1], tp[6-1]); + case 5: ll_mul_add(sum, ap[5-1], tp[5-1]); + case 4: ll_mul_add(sum, ap[4-1], tp[4-1]); + case 3: ll_mul_add(sum, ap[3-1], tp[3-1]); + case 2: ll_mul_add(sum, ap[2-1], tp[2-1]); + } +#else + for (m--, ap++, tp++; m > 0; m--, ap++, tp++) + ll_mul_add(sum, ap[0], tp[0]); +#endif + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + + } + + ll_add(carry, acc0); + xx[i] = ll_get_lo(carry); + ll_add(acc21, ll_get_hi(carry)); + carry = acc21; + } + + xx[sz] = ll_get_lo(carry); + xx[sz+1] = ll_get_hi(carry); + } + + + while (sx > 0 && xx[sx-1] == 0) sx--; + SIZE(x1) = sx; +} +#endif + +void _ntl_crt_struct_fast::eval(_ntl_gbigint *x, const long *b, _ntl_tmp_vec *generic_tmp_vec) +{ + _ntl_tmp_vec_crt_fast *tmp_vec = static_cast<_ntl_tmp_vec_crt_fast*> (generic_tmp_vec); + + long *val_vec = tmp_vec->val_vec.get(); + _ntl_gbigint_wrapped *temps = tmp_vec->temps.get(); + _ntl_gbigint_wrapped *rem_vec = tmp_vec->rem_vec.get(); + + long vec_len = (1L << levels) - 1; + + long i; + + for (i = 0; i < n; i++) { + val_vec[i] = MulMod(b[i], inv_vec[i], primes[i]); + } + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + long j1 = index_vec[i]; + long j2 = index_vec[i+1]; + gadd_mul_many(&rem_vec[i], &coeff_vec[j1], &val_vec[j1], j2-j1, + SIZE(prod_vec[i])); + } + + for (i = (1L << (levels-1)) - 2; i >= 0; i--) { + _ntl_gmul(prod_vec[2*i+1], rem_vec[2*i+2], &temps[0]); + _ntl_gmul(rem_vec[2*i+1], prod_vec[2*i+2], &temps[1]); + _ntl_gadd(temps[0], temps[1], &rem_vec[i]); + } + + /* temps[0] = rem_vec[0] mod prod_vec[0] (least absolute residue) */ + _ntl_gmod(rem_vec[0], prod_vec[0], &temps[0]); + _ntl_gsub(temps[0], prod_vec[0], &temps[1]); + _ntl_gnegate(&temps[1]); + if (_ntl_gcompare(temps[0], temps[1]) > 0) { + _ntl_gnegate(&temps[1]); + _ntl_gcopy(temps[1], &temps[0]); + } + + _ntl_gmod(temps[0], modulus, &temps[1]); + _ntl_gcopy(temps[1], x); +} + + +bool _ntl_crt_struct_basic::special() { return false; } + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_CRT)) +bool _ntl_crt_struct_tbl::special() { return false; } +#endif + + +bool _ntl_crt_struct_fast::special() { return true; } + + + +/* end crt code */ + + + +class _ntl_tmp_vec_rem_impl : public _ntl_tmp_vec { +public: + UniqueArray<_ntl_gbigint_wrapped> rem_vec; +}; + + + + + + +class _ntl_rem_struct_basic : public _ntl_rem_struct { +public: + long n; + UniqueArray primes; + + void eval(long *x, _ntl_gbigint a, _ntl_tmp_vec *tmp_vec); + _ntl_tmp_vec *fetch(); +}; + + +class _ntl_rem_struct_fast : public _ntl_rem_struct { +public: + long n; + long levels; + UniqueArray primes; + UniqueArray index_vec; + UniqueArray<_ntl_gbigint_wrapped> prod_vec; + long modulus_size; + + void eval(long *x, _ntl_gbigint a, _ntl_tmp_vec *tmp_vec); + _ntl_tmp_vec *fetch(); +}; + + +class _ntl_rem_struct_medium : public _ntl_rem_struct { +public: + long n; + long levels; + UniqueArray primes; + UniqueArray index_vec; + UniqueArray len_vec; + UniqueArray inv_vec; + UniqueArray corr_vec; + UniqueArray corraux_vec; + UniqueArray<_ntl_gbigint_wrapped> prod_vec; + + void eval(long *x, _ntl_gbigint a, _ntl_tmp_vec *tmp_vec); + _ntl_tmp_vec *fetch(); +}; + + + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_REM)) + +class _ntl_rem_struct_tbl : public _ntl_rem_struct { +public: + long n; + UniqueArray primes; + UniqueArray inv_primes; + Unique2DArray tbl; + + void eval(long *x, _ntl_gbigint a, _ntl_tmp_vec *tmp_vec); + _ntl_tmp_vec *fetch(); + +}; + +#endif + + + +_ntl_rem_struct *_ntl_rem_struct_build(long n, _ntl_gbigint modulus, long (*p)(long)) +{ + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_REM)) + if (n <= 800) { + UniqueArray q; + UniqueArray inv_primes; + Unique2DArray tbl; + long i, j; + long qq, t, t1; + long sz = SIZE(modulus); + + q.SetLength(n); + for (i = 0; i < n; i++) + q[i] = p(i); + + inv_primes.SetLength(n); + for (i = 0; i < n; i++) + inv_primes[i] = (unsigned long) ( ((((NTL_ULL_TYPE) 1) << (NTL_SP_NBITS+NTL_BITS_PER_LONG))-1UL) / ((NTL_ULL_TYPE) q[i]) ); + + + tbl.SetDims(n, sz); + + for (i = 0; i < n; i++) { + qq = q[i]; + t = 1; + for (j = 0; j < NTL_ZZ_NBITS; j++) { + t += t; + if (t >= qq) t -= qq; + } + t1 = 1; + tbl[i][0] = 1; + for (j = 1; j < sz; j++) { + t1 = MulMod(t1, t, qq); + tbl[i][j] = t1; + } + } + + UniquePtr<_ntl_rem_struct_tbl> R; + R.make(); + + R->n = n; + R->primes.move(q); + R->inv_primes.move(inv_primes); + R->tbl.move(tbl); + + return R.release(); + } +#endif + + if (0) { + // this no longer seems to be useful + + UniqueArray q; + long i, j; + long levels, vec_len; + UniqueArray index_vec; + UniqueArray len_vec, corr_vec; + UniqueArray corraux_vec; + UniqueArray inv_vec; + UniqueArray<_ntl_gbigint_wrapped> prod_vec; + + + q.SetLength(n); + for (i = 0; i < n; i++) + q[i] = p(i); + + levels = 0; + while ((n >> levels) >= 4) levels++; + + vec_len = (1L << levels) - 1; + + index_vec.SetLength(vec_len+1); + len_vec.SetLength(vec_len); + inv_vec.SetLength(vec_len); + + corr_vec.SetLength(n); + corraux_vec.SetLength(n); + + prod_vec.SetLength(vec_len); + + index_vec[0] = 0; + index_vec[1] = n; + + for (i = 0; i <= levels-2; i++) { + long start = (1L << i) - 1; + long finish = (1L << (i+1)) - 2; + for (j = finish; j >= start; j--) { + index_vec[2*j+2] = index_vec[j] + (index_vec[j+1] - index_vec[j])/2; + index_vec[2*j+1] = index_vec[j]; + } + index_vec[2*finish+3] = n; + } + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + /* multiply primes index_vec[i]..index_vec[i+1]-1 into + * prod_vec[i] + */ + + _ntl_gone(&prod_vec[i]); + for (j = index_vec[i]; j < index_vec[i+1]; j++) + _ntl_gsmul(prod_vec[i], q[j], &prod_vec[i]); + } + + for (i = (1L << (levels-1)) - 2; i >= 3; i--) + _ntl_gmul(prod_vec[2*i+1], prod_vec[2*i+2], &prod_vec[i]); + + + for (i = 3; i < vec_len; i++) + len_vec[i] = _ntl_gsize(prod_vec[i]); + + /* Set len_vec[1] = len_vec[2] = + * max(_ntl_gsize(modulus), len_vec[3..6]). + * This is a bit paranoid, but it makes the code + * more robust. */ + + j = _ntl_gsize(modulus); + for (i = 3; i <= 6; i++) + if (len_vec[i] > j) j = len_vec[i]; + + len_vec[1] = len_vec[2] = j; + + for (i = 3; i < vec_len; i++) + inv_vec[i] = neg_inv_mod_limb(DATA(prod_vec[i])[0]); + + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + for (j = index_vec[i]; j < index_vec[i+1]; j++) { + corr_vec[j] = SpecialPower(len_vec[1] - len_vec[i], q[j]); + corraux_vec[j] = PrepMulModPrecon(corr_vec[j], q[j]); + } + } + + + + UniquePtr<_ntl_rem_struct_medium> R; + R.make(); + + R->n = n; + R->levels = levels; + R->primes.move(q); + R->index_vec.move(index_vec); + R->len_vec.move(len_vec); + R->inv_vec.move(inv_vec); + R->corr_vec.move(corr_vec); + R->corraux_vec.move(corraux_vec); + R->prod_vec.move(prod_vec); + + return R.release(); + } + + + if (n > 800) { + UniqueArray q; + long i, j; + long levels, vec_len; + UniqueArray index_vec; + UniqueArray<_ntl_gbigint_wrapped> prod_vec; + + q.SetLength(n); + for (i = 0; i < n; i++) + q[i] = p(i); + + levels = 0; + while ((n >> levels) >= 4) levels++; + + vec_len = (1L << levels) - 1; + + index_vec.SetLength(vec_len+1); + prod_vec.SetLength(vec_len); + + index_vec[0] = 0; + index_vec[1] = n; + + for (i = 0; i <= levels-2; i++) { + long start = (1L << i) - 1; + long finish = (1L << (i+1)) - 2; + for (j = finish; j >= start; j--) { + index_vec[2*j+2] = index_vec[j] + (index_vec[j+1] - index_vec[j])/2; + index_vec[2*j+1] = index_vec[j]; + } + index_vec[2*finish+3] = n; + } + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + /* multiply primes index_vec[i]..index_vec[i+1]-1 into + * prod_vec[i] + */ + + _ntl_gone(&prod_vec[i]); + for (j = index_vec[i]; j < index_vec[i+1]; j++) + _ntl_gsmul(prod_vec[i], q[j], &prod_vec[i]); + } + + for (i = (1L << (levels-1)) - 2; i >= 3; i--) + _ntl_gmul(prod_vec[2*i+1], prod_vec[2*i+2], &prod_vec[i]); + + + + UniquePtr<_ntl_rem_struct_fast> R; + R.make(); + + R->n = n; + R->levels = levels; + R->primes.move(q); + R->index_vec.move(index_vec); + R->prod_vec.move(prod_vec); + R->modulus_size = _ntl_gsize(modulus); + + return R.release(); + } + + { + // basic case + + UniqueArray q; + long i; + + UniquePtr<_ntl_rem_struct_basic> R; + R.make(); + + R->n = n; + R->primes.SetLength(n); + for (i = 0; i < n; i++) + R->primes[i] = p(i); + + return R.release(); + } +} + +_ntl_tmp_vec *_ntl_rem_struct_basic::fetch() +{ + return 0; +} + + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_REM)) + +_ntl_tmp_vec *_ntl_rem_struct_tbl::fetch() +{ + return 0; +} + +#endif + +_ntl_tmp_vec *_ntl_rem_struct_fast::fetch() +{ + long vec_len = (1L << levels) - 1; + UniquePtr<_ntl_tmp_vec_rem_impl> res; + res.make(); + res->rem_vec.SetLength(vec_len); + _ntl_gbigint_wrapped *rem_vec = res->rem_vec.get(); + + long i; + + /* allocate length in advance to streamline eval code */ + + _ntl_gsetlength(&rem_vec[1], modulus_size); + _ntl_gsetlength(&rem_vec[2], modulus_size); + + for (i = 1; i < (1L << (levels-1)) - 1; i++) { + _ntl_gsetlength(&rem_vec[2*i+1], _ntl_gsize(prod_vec[2*i+1])); + _ntl_gsetlength(&rem_vec[2*i+2], _ntl_gsize(prod_vec[2*i+2])); + } + + return res.release(); +} + +_ntl_tmp_vec *_ntl_rem_struct_medium::fetch() +{ + long vec_len = (1L << levels) - 1; + UniquePtr<_ntl_tmp_vec_rem_impl> res; + res.make(); + res->rem_vec.SetLength(vec_len); + _ntl_gbigint_wrapped *rem_vec = res->rem_vec.get(); + + long i; + + /* allocate length in advance to streamline eval code */ + + _ntl_gsetlength(&rem_vec[0], len_vec[1]); /* a special temp */ + + for (i = 1; i < vec_len; i++) + _ntl_gsetlength(&rem_vec[i], len_vec[i]); + + return res.release(); +} + + + + + +#if (defined(NTL_VIABLE_LL) && defined(NTL_TBL_REM)) + +static inline +mp_limb_t tbl_red_21(mp_limb_t hi, mp_limb_t lo, long d, mp_limb_t dinv) +{ + unsigned long H = (hi << (NTL_BITS_PER_LONG-NTL_SP_NBITS)) | (lo >> NTL_SP_NBITS); + unsigned long Q = MulHiUL(H, dinv) + H; + unsigned long rr = lo - Q*cast_unsigned(d); // rr in [0..4*d) + long r = sp_CorrectExcess(rr, 2*d); // r in [0..2*d) + r = sp_CorrectExcess(r, d); + return r; +} + +static inline +mp_limb_t tbl_red_n1(const mp_limb_t *x, long n, long d, mp_limb_t dinv) +{ + mp_limb_t carry = 0; + long i; + for (i = n-1; i >= 0; i--) + carry = tbl_red_21(carry, x[i], d, dinv); + return carry; +} + +// NOTE: tbl_red_n1 playes the same role as mpn_mod_1. +// It assumes that the modulus is d is normalized, i.e., +// has exactly NTL_SP_NBITS bits. This will be the case for +// the FFT primes that are used. + +static inline +mp_limb_t tbl_red_31(mp_limb_t x2, mp_limb_t x1, mp_limb_t x0, + long d, mp_limb_t dinv) +{ + mp_limb_t carry = tbl_red_21(x2, x1, d, dinv); + return tbl_red_21(carry, x0, d, dinv); +} + +// NOTE: tbl_red_31 assumes x2 < d + + +#if (NTL_SP_NBITS == NTL_BITS_PER_LONG-2) + +// special case, some loop unrolling: slightly faster + + +// DIRT: won't work if GMP has nails +void _ntl_rem_struct_tbl::eval(long *x, _ntl_gbigint a, + _ntl_tmp_vec *generic_tmp_vec) +{ + if (ZEROP(a)) { + long i; + for (i = 0; i < n; i++) x[i] = 0; + return; + } + + long sa = SIZE(a); + mp_limb_t *adata = DATA(a); + + if (sa <= 4) { + long i; + for (i = 0; i < n; i++) { + mp_limb_t *tp = tbl[i]; + ll_type acc; + ll_init(acc, adata[0]); + long j; + for (j = 1; j < sa; j++) + ll_mul_add(acc, adata[j], tp[j]); + + mp_limb_t accvec[2]; + x[i] = tbl_red_31(0, ll_get_hi(acc), ll_get_lo(acc), primes[i], inv_primes[i]); + } + } + else { + long i; + for (i = 0; i < n; i++) { + mp_limb_t *ap = adata; + mp_limb_t *tp = tbl[i]; + + ll_type acc21; + mp_limb_t acc0; + + { + ll_type sum; + ll_init(sum, ap[0]); + + ll_mul_add(sum, ap[1], tp[1]); + ll_mul_add(sum, ap[2], tp[2]); + ll_mul_add(sum, ap[3], tp[3]); + + ll_init(acc21, ll_get_hi(sum)); + acc0 = ll_get_lo(sum); + } + + long m=sa-4; + ap += 4; + tp += 4; + + for (; m >= 8; m -= 8, ap += 8, tp += 8) { + { + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + ll_mul_add(sum, ap[1], tp[1]); + ll_mul_add(sum, ap[2], tp[2]); + ll_mul_add(sum, ap[3], tp[3]); + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + { + + ll_type sum; + ll_mul(sum, ap[4+0], tp[4+0]); + ll_mul_add(sum, ap[4+1], tp[4+1]); + ll_mul_add(sum, ap[4+2], tp[4+2]); + ll_mul_add(sum, ap[4+3], tp[4+3]); + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + } + + for (; m >= 4; m -= 4, ap += 4, tp += 4) { + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + ll_mul_add(sum, ap[1], tp[1]); + ll_mul_add(sum, ap[2], tp[2]); + ll_mul_add(sum, ap[3], tp[3]); + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + if (m > 0) { + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + for (m--, ap++, tp++; m > 0; m--, ap++, tp++) + ll_mul_add(sum, ap[0], tp[0]); + + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + x[i] = tbl_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, primes[i], inv_primes[i]); + } + } +} + +#else + +// General case: some loop unrolling (also using "Duff's Device") +// for the case where BPL-SPNBITS == 4: this is the common +// case on 64-bit machines. The loop unrolling and Duff seems +// to shave off 5-10% + +#define TBL_UNROLL (1) + +// DIRT: won't work if GMP has nails +void _ntl_rem_struct_tbl::eval(long *x, _ntl_gbigint a, + _ntl_tmp_vec *generic_tmp_vec) +{ + if (ZEROP(a)) { + long i; + for (i = 0; i < n; i++) x[i] = 0; + return; + } + + long sa = SIZE(a); + mp_limb_t *adata = DATA(a); + + const long Bnd = 1L << (NTL_BITS_PER_LONG-NTL_SP_NBITS); + + if (sa <= Bnd) { + long i; + for (i = 0; i < n; i++) { + mp_limb_t *tp = tbl[i]; + + + ll_type acc; + ll_init(acc, adata[0]); + +#if (TBL_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 4) + switch (sa) { + case 16: ll_mul_add(acc, adata[16-1], tp[16-1]); + case 15: ll_mul_add(acc, adata[15-1], tp[15-1]); + case 14: ll_mul_add(acc, adata[14-1], tp[14-1]); + case 13: ll_mul_add(acc, adata[13-1], tp[13-1]); + case 12: ll_mul_add(acc, adata[12-1], tp[12-1]); + case 11: ll_mul_add(acc, adata[11-1], tp[11-1]); + case 10: ll_mul_add(acc, adata[10-1], tp[10-1]); + case 9: ll_mul_add(acc, adata[9-1], tp[9-1]); + case 8: ll_mul_add(acc, adata[8-1], tp[8-1]); + case 7: ll_mul_add(acc, adata[7-1], tp[7-1]); + case 6: ll_mul_add(acc, adata[6-1], tp[6-1]); + case 5: ll_mul_add(acc, adata[5-1], tp[5-1]); + case 4: ll_mul_add(acc, adata[4-1], tp[4-1]); + case 3: ll_mul_add(acc, adata[3-1], tp[3-1]); + case 2: ll_mul_add(acc, adata[2-1], tp[2-1]); + } + +#else + long j; + for (j = 1; j < sa; j++) + ll_mul_add(acc, adata[j], tp[j]); +#endif + + x[i] = tbl_red_31(0, ll_get_hi(acc), ll_get_lo(acc), primes[i], inv_primes[i]); + } + } + else { + long i; + for (i = 0; i < n; i++) { + mp_limb_t *ap = adata; + mp_limb_t *tp = tbl[i]; + + ll_type acc21; + mp_limb_t acc0; + + { + ll_type sum; + ll_init(sum, ap[0]); + +#if (TBL_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 4) + ll_mul_add(sum, ap[1], tp[1]); + ll_mul_add(sum, ap[2], tp[2]); + ll_mul_add(sum, ap[3], tp[3]); + ll_mul_add(sum, ap[4], tp[4]); + ll_mul_add(sum, ap[5], tp[5]); + ll_mul_add(sum, ap[6], tp[6]); + ll_mul_add(sum, ap[7], tp[7]); + ll_mul_add(sum, ap[8], tp[8]); + ll_mul_add(sum, ap[9], tp[9]); + ll_mul_add(sum, ap[10], tp[10]); + ll_mul_add(sum, ap[11], tp[11]); + ll_mul_add(sum, ap[12], tp[12]); + ll_mul_add(sum, ap[13], tp[13]); + ll_mul_add(sum, ap[14], tp[14]); + ll_mul_add(sum, ap[15], tp[15]); +#else + for (long j = 1; j < Bnd; j++) + ll_mul_add(sum, ap[j], tp[j]); +#endif + + ll_init(acc21, ll_get_hi(sum)); + acc0 = ll_get_lo(sum); + } + + long m; + for (m = sa-Bnd, ap += Bnd, tp += Bnd; m >= Bnd; m -= Bnd, ap += Bnd, tp += Bnd) { + + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + +#if (TBL_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 4) + ll_mul_add(sum, ap[1], tp[1]); + ll_mul_add(sum, ap[2], tp[2]); + ll_mul_add(sum, ap[3], tp[3]); + ll_mul_add(sum, ap[4], tp[4]); + ll_mul_add(sum, ap[5], tp[5]); + ll_mul_add(sum, ap[6], tp[6]); + ll_mul_add(sum, ap[7], tp[7]); + ll_mul_add(sum, ap[8], tp[8]); + ll_mul_add(sum, ap[9], tp[9]); + ll_mul_add(sum, ap[10], tp[10]); + ll_mul_add(sum, ap[11], tp[11]); + ll_mul_add(sum, ap[12], tp[12]); + ll_mul_add(sum, ap[13], tp[13]); + ll_mul_add(sum, ap[14], tp[14]); + ll_mul_add(sum, ap[15], tp[15]); +#else + for (long j = 1; j < Bnd; j++) + ll_mul_add(sum, ap[j], tp[j]); +#endif + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + if (m > 0) { + ll_type sum; + ll_mul(sum, ap[0], tp[0]); + +#if (TBL_UNROLL && NTL_BITS_PER_LONG-NTL_SP_NBITS == 4) + switch (m) { + case 15: ll_mul_add(sum, ap[15-1], tp[15-1]); + case 14: ll_mul_add(sum, ap[14-1], tp[14-1]); + case 13: ll_mul_add(sum, ap[13-1], tp[13-1]); + case 12: ll_mul_add(sum, ap[12-1], tp[12-1]); + case 11: ll_mul_add(sum, ap[11-1], tp[11-1]); + case 10: ll_mul_add(sum, ap[10-1], tp[10-1]); + case 9: ll_mul_add(sum, ap[9-1], tp[9-1]); + case 8: ll_mul_add(sum, ap[8-1], tp[8-1]); + case 7: ll_mul_add(sum, ap[7-1], tp[7-1]); + case 6: ll_mul_add(sum, ap[6-1], tp[6-1]); + case 5: ll_mul_add(sum, ap[5-1], tp[5-1]); + case 4: ll_mul_add(sum, ap[4-1], tp[4-1]); + case 3: ll_mul_add(sum, ap[3-1], tp[3-1]); + case 2: ll_mul_add(sum, ap[2-1], tp[2-1]); + } +#else + for (m--, ap++, tp++; m > 0; m--, ap++, tp++) + ll_mul_add(sum, ap[0], tp[0]); +#endif + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + x[i] = tbl_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, + primes[i], inv_primes[i]); + } + } +} + +#endif + + +#endif + + +void _ntl_rem_struct_basic::eval(long *x, _ntl_gbigint a, + _ntl_tmp_vec *generic_tmp_vec) +{ + long *q = primes.get(); + + long j; + mp_limb_t *adata; + long sa; + + if (!a) + sa = 0; + else + sa = SIZE(a); + + if (sa == 0) { + for (j = 0; j < n; j++) + x[j] = 0; + + return; + } + + adata = DATA(a); + + for (j = 0; j < n; j++) + x[j] = mpn_mod_1(adata, sa, q[j]); + +} + +void _ntl_rem_struct_fast::eval(long *x, _ntl_gbigint a, + _ntl_tmp_vec *generic_tmp_vec) +{ + long *q = primes.get(); + _ntl_gbigint_wrapped *rem_vec = + (static_cast<_ntl_tmp_vec_rem_impl *> (generic_tmp_vec))->rem_vec.get(); + long vec_len = (1L << levels) - 1; + + long i, j; + + if (ZEROP(a)) { + for (j = 0; j < n; j++) + x[j] = 0; + + return; + } + + _ntl_gcopy(a, &rem_vec[1]); + _ntl_gcopy(a, &rem_vec[2]); + + for (i = 1; i < (1L << (levels-1)) - 1; i++) { + gmod_simple(rem_vec[i], prod_vec[2*i+1], &rem_vec[2*i+1]); + gmod_simple(rem_vec[i], prod_vec[2*i+2], &rem_vec[2*i+2]); + } + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + long lo = index_vec[i]; + long hi = index_vec[i+1]; + mp_limb_t *s1p = DATA(rem_vec[i]); + long s1size = SIZE(rem_vec[i]); + if (s1size == 0) { + for (j = lo; j (generic_tmp_vec))->rem_vec.get(); + long vec_len = (1L << levels) - 1; + + long i, j; + + if (ZEROP(a)) { + for (j = 0; j < n; j++) + x[j] = 0; + + return; + } + + _ntl_gcopy(a, &rem_vec[1]); + _ntl_gcopy(a, &rem_vec[2]); + + for (i = 1; i < (1L << (levels-1)) - 1; i++) { + _ntl_gcopy(rem_vec[i], &rem_vec[0]); + redc(rem_vec[0], prod_vec[2*i+1], len_vec[i]-len_vec[2*i+1], + inv_vec[2*i+1], rem_vec[2*i+1]); + redc(rem_vec[i], prod_vec[2*i+2], len_vec[i]-len_vec[2*i+2], + inv_vec[2*i+2], rem_vec[2*i+2]); + } + + for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { + long lo = index_vec[i]; + long hi = index_vec[i+1]; + mp_limb_t *s1p = DATA(rem_vec[i]); + long s1size = SIZE(rem_vec[i]); + if (s1size == 0) { + for (j = lo; j 0) { \ + (*__p)++; \ + if (*__p != 0) break; \ + __p++; \ + __n--; \ + } \ + } while (0); + +#define _ntl_g_inc_carry(c, p, n) \ + do { \ + mp_limb_t * __p = (p); \ + long __n = (n); \ + long __addc = 1; \ + while (__n > 0) { \ + (*__p)++; \ + if (*__p != 0) { __addc = 0; break; } \ + __p++; \ + __n--; \ + } \ + c += __addc; \ + } while (0); + +#define _ntl_g_dec(p, n) \ + do { \ + mp_limb_t * __p = (p); \ + mp_limb_t __tmp; \ + long __n = (n); \ + while (__n > 0) { \ + __tmp = *__p; \ + (*__p)--; \ + if (__tmp != 0) break; \ + __p++; \ + __n--; \ + } \ + } while (0); + + + +/* sub==0 means an addmul w += x*y, sub==1 means a submul w -= x*y. */ +void +_ntl_gaorsmul_1(_ntl_gbigint x, long yy, long sub, _ntl_gbigint *ww) +{ + long xsize, wsize, wsize_signed, new_wsize, min_size, dsize; + _ntl_gbigint w; + mp_limb_t *xp; + mp_limb_t *wp; + mp_limb_t cy; + mp_limb_t y; + + if (ZEROP(x) || yy == 0) + return; + + if (ZEROP(*ww)) { + _ntl_gsmul(x, yy, ww); + if (sub) SIZE(*ww) = -SIZE(*ww); + return; + } + + if (yy == 1) { + if (sub) + _ntl_gsub(*ww, x, ww); + else + _ntl_gadd(*ww, x, ww); + return; + } + + if (yy == -1) { + if (sub) + _ntl_gadd(*ww, x, ww); + else + _ntl_gsub(*ww, x, ww); + return; + } + + if (*ww == x) { + GRegister(tmp); + _ntl_gsmul(x, yy, &tmp); + if (sub) + _ntl_gsub(*ww, tmp, ww); + else + _ntl_gadd(*ww, tmp, ww); + return; + } + + xsize = SIZE(x); + if (xsize < 0) { + xsize = -xsize; + sub = 1-sub; + } + + if (yy < 0) { + y = - ((mp_limb_t) yy); /* careful! */ + sub = 1-sub; + } + else { + y = (mp_limb_t) yy; + } + + + w = *ww; + + wsize_signed = SIZE(w); + if (wsize_signed < 0) { + sub = 1-sub; + wsize = -wsize_signed; + } + else { + wsize = wsize_signed; + } + + + if (wsize > xsize) { + new_wsize = wsize; + min_size = xsize; + } + else { + new_wsize = xsize; + min_size = wsize; + } + + if (MustAlloc(w, new_wsize+1)) { + _ntl_gsetlength(&w, new_wsize+1); + *ww = w; + } + + wp = DATA(w); + xp = DATA(x); + + if (sub == 0) + { + /* addmul of absolute values */ + + cy = mpn_addmul_1 (wp, xp, min_size, y); + wp += min_size; + xp += min_size; + + dsize = xsize - wsize; + if (dsize != 0) + { + mp_limb_t cy2; + if (dsize > 0) { + cy2 = mpn_mul_1 (wp, xp, dsize, y); + } + else + { + dsize = -dsize; + cy2 = 0; + } + cy = cy2 + mpn_add_1 (wp, wp, dsize, cy); + } + + wp[dsize] = cy; + new_wsize += (cy != 0); + } + else + { + /* submul of absolute values */ + + cy = mpn_submul_1 (wp, xp, min_size, y); + if (wsize >= xsize) + { + /* if w bigger than x, then propagate borrow through it */ + if (wsize != xsize) { + cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy); + } + + if (cy != 0) + { + /* Borrow out of w, take twos complement negative to get + absolute value, flip sign of w. */ + wp[new_wsize] = ~-cy; /* extra limb is 0-cy */ + _ntl_mpn_com_n (wp, wp, new_wsize); + new_wsize++; + _ntl_g_inc(wp, new_wsize); + wsize_signed = -wsize_signed; + } + } + else /* wsize < xsize */ + { + /* x bigger than w, so want x*y-w. Submul has given w-x*y, so + take twos complement and use an mpn_mul_1 for the rest. */ + + mp_limb_t cy2; + + /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */ + _ntl_mpn_com_n (wp, wp, wsize); + _ntl_g_inc_carry(cy, wp, wsize); + cy -= 1; + + /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never + returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */ + cy2 = (cy == ((mp_limb_t) -1)); + cy += cy2; + _ntl_MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy); + wp[new_wsize] = cy; + new_wsize += (cy != 0); + + /* Apply any -1 from above. The value at wp+wsize is non-zero + because y!=0 and the high limb of x will be non-zero. */ + if (cy2) { + _ntl_g_dec(wp+wsize, new_wsize-wsize); + } + + wsize_signed = -wsize_signed; + } + + /* submul can produce high zero limbs due to cancellation, both when w + has more limbs or x has more */ + STRIP(new_wsize, wp); + } + + SIZE(w) = (wsize_signed >= 0 ? new_wsize : -new_wsize); +} + + +void +_ntl_gsaddmul(_ntl_gbigint x, long yy, _ntl_gbigint *ww) +{ + _ntl_gaorsmul_1(x, yy, 0, ww); +} + +void +_ntl_gssubmul(_ntl_gbigint x, long yy, _ntl_gbigint *ww) +{ + _ntl_gaorsmul_1(x, yy, 1, ww); +} + + +void +_ntl_gaorsmul(_ntl_gbigint x, _ntl_gbigint y, long sub, _ntl_gbigint *ww) +{ + GRegister(tmp); + + _ntl_gmul(x, y, &tmp); + if (sub) + _ntl_gsub(*ww, tmp, ww); + else + _ntl_gadd(*ww, tmp, ww); +} + + +void +_ntl_gaddmul(_ntl_gbigint x, _ntl_gbigint y, _ntl_gbigint *ww) +{ + _ntl_gaorsmul(x, y, 0, ww); +} + +void +_ntl_gsubmul(_ntl_gbigint x, _ntl_gbigint y, _ntl_gbigint *ww) +{ + _ntl_gaorsmul(x, y, 1, ww); +} + + +// general preconditioned remainder + + + +#ifndef NTL_VIABLE_LL + + +struct _ntl_general_rem_one_struct { }; + +_ntl_general_rem_one_struct * +_ntl_general_rem_one_struct_build(long p) +{ + return 0; +} + +long +_ntl_general_rem_one_struct_apply(NTL_verylong a, long p, _ntl_general_rem_one_struct *pinfo) +{ + return _ntl_gsmod(a, p); +} + +void +_ntl_general_rem_one_struct_delete(_ntl_general_rem_one_struct *pinfo) +{ +} + + +#else + + +#define REM_ONE_SZ (128) + +struct _ntl_general_rem_one_struct { + sp_ll_reduce_struct red_struct; + long Bnd; + UniqueArray tbl; +}; + + + + + +#if 0 + +_ntl_general_rem_one_struct * +_ntl_general_rem_one_struct_build(long p) +{ + if (p < 2 || p >= NTL_SP_BOUND) + LogicError("_ntl_general_rem_one_struct_build: bad args (p)"); + + UniquePtr<_ntl_general_rem_one_struct> pinfo; + pinfo.make(); + + pinfo->red_struct = make_sp_ll_reduce_struct(p); + + pinfo->Bnd = 1L << (NTL_BITS_PER_LONG-_ntl_g2logs(p)); + + pinfo->tbl.SetLength(REM_ONE_SZ); + + long t = 1; + for (long j = 0; j < NTL_ZZ_NBITS; j++) { + t += t; + if (t >= p) t -= p; + } + + long t1 = 1; + pinfo->tbl[0] = 1; + for (long j = 1; j < REM_ONE_SZ; j++) { + t1 = MulMod(t1, t, p); + pinfo->tbl[j] = t1; + } + + return pinfo.release(); +} + + + + +long +_ntl_general_rem_one_struct_apply(NTL_verylong a, long p, _ntl_general_rem_one_struct *pinfo) +{ + if (ZEROP(a)) return 0; + + if (!pinfo) { + return _ntl_gsmod(a, p); + } + + + sp_ll_reduce_struct red_struct = pinfo->red_struct; + long Bnd = pinfo->Bnd; + mp_limb_t *tbl = pinfo->tbl.elts(); + + long a_sz, a_neg; + mp_limb_t *a_data; + GET_SIZE_NEG(a_sz, a_neg, a); + a_data = DATA(a); + + if (a_sz > REM_ONE_SZ) { + long res = mpn_mod_1(a_data, a_sz, p); + if (a_neg) res = NegateMod(res, p); + return res; + } + else if (a_sz <= Bnd) { + ll_type acc; + ll_init(acc, a_data[0]); + + { + long j = 1; + + for (; j <= a_sz-16; j += 16) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + } + + for (; j <= a_sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + } + + + long res = sp_ll_red_31(0, ll_get_hi(acc), ll_get_lo(acc), p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else if (Bnd > 16) { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + long jj = 0; + for (; jj <= a_sz-Bnd; jj += Bnd) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= jj+Bnd-16; j += 16) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + } + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < a_sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= a_sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else if (Bnd == 16) { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + long jj = 0; + for (; jj <= a_sz-16; jj += 16) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < a_sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= a_sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else if (Bnd == 8) { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + long jj = 0; + for (; jj <= a_sz-8; jj += 8) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < a_sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else /* Bnd == 4 */ { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + long jj = 0; + for (; jj <= a_sz-4; jj += 4) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < a_sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } +} + +void +_ntl_general_rem_one_struct_delete(_ntl_general_rem_one_struct *pinfo) +{ + delete pinfo; +} + +#else + +// EXPERIMENTAL VERSION + +_ntl_general_rem_one_struct * +_ntl_general_rem_one_struct_build(long p) +{ + if (p < 2 || p >= NTL_SP_BOUND) + LogicError("_ntl_general_rem_one_struct_build: bad args (p)"); + + UniquePtr<_ntl_general_rem_one_struct> pinfo; + pinfo.make(); + + pinfo->red_struct = make_sp_ll_reduce_struct(p); + + pinfo->Bnd = 1L << (NTL_BITS_PER_LONG-_ntl_g2logs(p)); + + pinfo->tbl.SetLength(REM_ONE_SZ+3); + + long t = 1; + for (long j = 0; j < NTL_ZZ_NBITS; j++) { + t += t; + if (t >= p) t -= p; + } + + long t1 = 1; + pinfo->tbl[0] = 1; + for (long j = 1; j < REM_ONE_SZ+3; j++) { + t1 = MulMod(t1, t, p); + pinfo->tbl[j] = t1; + } + + return pinfo.release(); +} + + + + +long +_ntl_general_rem_one_struct_apply1(mp_limb_t *a_data, long a_sz, long a_neg, long p, + _ntl_general_rem_one_struct *pinfo) +{ + sp_ll_reduce_struct red_struct = pinfo->red_struct; + long Bnd = pinfo->Bnd; + mp_limb_t *tbl = pinfo->tbl.elts(); + + long idx = ((cast_unsigned(a_sz+REM_ONE_SZ-1)/REM_ONE_SZ)-1)*REM_ONE_SZ; + ll_type leftover; + long sz = a_sz-idx; + a_data += idx; + + for ( ; ; sz = REM_ONE_SZ, a_data -= REM_ONE_SZ, idx -= REM_ONE_SZ) { + if (sz <= Bnd) { + ll_type acc; + ll_init(acc, 0); + + { + long j = 0; + + for (; j <= sz-16; j += 16) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + } + + for (; j <= sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + } + + if (idx + REM_ONE_SZ >= a_sz) { // first time + if (idx == 0) { // last time + long res = sp_ll_red_31(0, ll_get_hi(acc), ll_get_lo(acc), p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else { + ll_mul(leftover, ll_get_lo(acc), tbl[REM_ONE_SZ]); + ll_mul_add(leftover, ll_get_hi(acc), tbl[REM_ONE_SZ+1]); + } + } + else { + ll_type acc21; + mp_limb_t acc0; + + ll_add(leftover, ll_get_lo(acc)); + acc0 = ll_get_lo(leftover); + ll_init(acc21, ll_get_hi(leftover)); + ll_add(acc21, ll_get_hi(acc)); + + if (idx == 0) { // last time + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else { + ll_mul(leftover, acc0, tbl[REM_ONE_SZ]); + ll_mul_add(leftover, ll_get_lo(acc21), tbl[REM_ONE_SZ+1]); + ll_mul_add(leftover, ll_get_hi(acc21), tbl[REM_ONE_SZ+2]); + } + } + } + else { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + if (Bnd > 16) { + long jj = 0; + for (; jj <= sz-Bnd; jj += Bnd) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= jj+Bnd-16; j += 16) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + } + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + } + else if (Bnd == 16) { + + long jj = 0; + for (; jj <= sz-16; jj += 16) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + } + else if (Bnd == 8) { + long jj = 0; + for (; jj <= sz-8; jj += 8) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j < sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + } + else /* Bnd == 4 */ { + long jj = 0; + for (; jj <= sz-4; jj += 4) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j < sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + } + + if (idx + REM_ONE_SZ < a_sz) { // not first time + ll_add(leftover, acc0); + acc0 = ll_get_lo(leftover); + ll_add(acc21, ll_get_hi(leftover)); + } + + if (idx == 0) { // last time + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else { + ll_mul(leftover, acc0, tbl[REM_ONE_SZ]); + ll_mul_add(leftover, ll_get_lo(acc21), tbl[REM_ONE_SZ+1]); + ll_mul_add(leftover, ll_get_hi(acc21), tbl[REM_ONE_SZ+2]); + } + } + } +} + + +long +_ntl_general_rem_one_struct_apply(NTL_verylong a, long p, _ntl_general_rem_one_struct *pinfo) +{ + if (ZEROP(a)) return 0; + + if (!pinfo) { + return _ntl_gsmod(a, p); + } + + sp_ll_reduce_struct red_struct = pinfo->red_struct; + long Bnd = pinfo->Bnd; + mp_limb_t *tbl = pinfo->tbl.elts(); + + long a_sz, a_neg; + mp_limb_t *a_data; + GET_SIZE_NEG(a_sz, a_neg, a); + a_data = DATA(a); + + if (a_sz > REM_ONE_SZ) { + return _ntl_general_rem_one_struct_apply1(a_data, a_sz, a_neg, p, pinfo); + } + + if (a_sz <= Bnd) { + ll_type acc; + ll_init(acc, 0); + + { + long j = 0; + + for (; j <= a_sz-16; j += 16) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + } + + for (; j <= a_sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + } + + + long res = sp_ll_red_31(0, ll_get_hi(acc), ll_get_lo(acc), p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else if (Bnd > 16) { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + long jj = 0; + for (; jj <= a_sz-Bnd; jj += Bnd) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= jj+Bnd-16; j += 16) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + } + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < a_sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= a_sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else if (Bnd == 16) { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + long jj = 0; + for (; jj <= a_sz-16; jj += 16) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + ll_mul_add(acc, a_data[j+8], tbl[j+8]); + ll_mul_add(acc, a_data[j+9], tbl[j+9]); + ll_mul_add(acc, a_data[j+10], tbl[j+10]); + ll_mul_add(acc, a_data[j+11], tbl[j+11]); + ll_mul_add(acc, a_data[j+12], tbl[j+12]); + ll_mul_add(acc, a_data[j+13], tbl[j+13]); + ll_mul_add(acc, a_data[j+14], tbl[j+14]); + ll_mul_add(acc, a_data[j+15], tbl[j+15]); + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < a_sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j <= a_sz-4; j += 4) { + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + } + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + +#if (NTL_BITS_PER_LONG-NTL_SP_NBITS==4) + long res = sp_ll_red_31_normalized(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); +#else + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); +#endif + if (a_neg) res = NegateMod(res, p); + return res; + } + else if (Bnd == 8) { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + long jj = 0; + for (; jj <= a_sz-8; jj += 8) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + ll_mul_add(acc, a_data[j+4], tbl[j+4]); + ll_mul_add(acc, a_data[j+5], tbl[j+5]); + ll_mul_add(acc, a_data[j+6], tbl[j+6]); + ll_mul_add(acc, a_data[j+7], tbl[j+7]); + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < a_sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); + if (a_neg) res = NegateMod(res, p); + return res; + } + else /* Bnd == 4 */ { + ll_type acc21; + ll_init(acc21, 0); + mp_limb_t acc0 = 0; + + long jj = 0; + for (; jj <= a_sz-4; jj += 4) { + ll_type acc; + + long j = jj; + + ll_mul(acc, a_data[j+0], tbl[j+0]); + ll_mul_add(acc, a_data[j+1], tbl[j+1]); + ll_mul_add(acc, a_data[j+2], tbl[j+2]); + ll_mul_add(acc, a_data[j+3], tbl[j+3]); + + + ll_add(acc, acc0); + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + + if (jj < a_sz) { + ll_type acc; + ll_init(acc, acc0); + + long j = jj; + + for (; j < a_sz; j++) + ll_mul_add(acc, a_data[j+0], tbl[j+0]); + + + acc0 = ll_get_lo(acc); + ll_add(acc21, ll_get_hi(acc)); + } + +#if (NTL_BITS_PER_LONG-NTL_SP_NBITS==2) + long res = sp_ll_red_31_normalized(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); +#else + long res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, red_struct); +#endif + if (a_neg) res = NegateMod(res, p); + return res; + } +} + +void +_ntl_general_rem_one_struct_delete(_ntl_general_rem_one_struct *pinfo) +{ + delete pinfo; +} + +#endif + +#endif + + + + + + + diff --git a/thirdparty/linux/ntl/src/gen_gmp_aux.c b/thirdparty/linux/ntl/src/gen_gmp_aux.c new file mode 100644 index 0000000000..3b509706ac --- /dev/null +++ b/thirdparty/linux/ntl/src/gen_gmp_aux.c @@ -0,0 +1,121 @@ + +#include +#include +#include + +#include + +using namespace std; + + +#ifndef NTL_GMP_LIP + + +int main() +{ + fprintf(stderr, "NTL_GMP_LIP flag not set\n"); + + return 0; +} + + + +#else + + +#include +#include + +void print2k(FILE *f, long k, long bpl) +{ + long m, l; + long first; + + if (k <= 0) { + fprintf(f, "((double) 1.0)"); + return; + } + + m = bpl - 2; + first = 1; + + fprintf(f, "("); + + while (k > 0) { + if (k > m) + l = m; + else + l = k; + + k = k - l; + + + if (first) + first = 0; + else + fprintf(f, "*"); + + fprintf(f, "((double)(1L<<%ld))", l); + } + + fprintf(f, ")"); +} + + + +void Error(const char *s) +{ + fprintf(stderr, "%s\n", s); + abort(); +} + + +int main() +{ + long bpl; + long ntl_zz_nbits; + + fprintf(stderr, "NTL_GMP_LIP flag set\n"); + + bpl = NTL_BITS_PER_LONG; + + + /* + * We require that the number of bits per limb quantity correspond to the + * number of bits of a long, or possibly a "long long" that is twice as wide + * as a long. These restrictions will almost certainly be satisfied, unless + * GMP is installed using the newly proposed "nail" option. + */ + + ntl_zz_nbits = 0; + + if (sizeof(mp_limb_t) == sizeof(long) && mp_bits_per_limb == bpl) + ntl_zz_nbits = bpl; + else if (sizeof(mp_limb_t) == 2*sizeof(long) && mp_bits_per_limb == 2*bpl) + ntl_zz_nbits = 2*bpl; + else + Error("sorry...this is a funny gmp"); + + if (sizeof(mp_size_t) != sizeof(long) && + sizeof(mp_size_t) != sizeof(int)) + + Error("sorry...this is a funny gmp"); + + + if (sizeof(mp_size_t) < sizeof(long)) { + printf("#define NTL_SMALL_MP_SIZE_T\n"); + fprintf(stderr, "setting NTL_SMALL_MP_SIZE_T\n"); + } + + fprintf(stderr, "NTL_ZZ_NBITS = %ld\n", ntl_zz_nbits); + + printf("#define NTL_ZZ_NBITS (%ld)\n", ntl_zz_nbits); + + printf("#define NTL_ZZ_FRADIX "); + print2k(stdout, ntl_zz_nbits, bpl); + printf("\n"); + + return 0; +} + +#endif diff --git a/thirdparty/linux/ntl/src/hfileout b/thirdparty/linux/ntl/src/hfileout new file mode 100644 index 0000000000..bd6e22409c --- /dev/null +++ b/thirdparty/linux/ntl/src/hfileout @@ -0,0 +1,2 @@ +// generated by ./configure +// CXXAUTOFLAGS=" -march=native" diff --git a/thirdparty/linux/ntl/src/lip.c b/thirdparty/linux/ntl/src/lip.c new file mode 100644 index 0000000000..01fa638da2 --- /dev/null +++ b/thirdparty/linux/ntl/src/lip.c @@ -0,0 +1,12 @@ + +#include + +#ifdef NTL_GMP_LIP + +#include "g_lip_impl.h" + +#else + +#include "c_lip_impl.h" + +#endif diff --git a/thirdparty/linux/ntl/src/lzz_p.c b/thirdparty/linux/ntl/src/lzz_p.c new file mode 100644 index 0000000000..6d0bb83e89 --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_p.c @@ -0,0 +1,402 @@ + +#include + +#include + +NTL_START_IMPL + + +NTL_TLS_GLOBAL_DECL(SmartPtr, zz_pInfo_stg) + +NTL_CHEAP_THREAD_LOCAL zz_pInfoT *zz_pInfo = 0; + + + +SmartPtr Build_zz_pInfo(FFTPrimeInfo *info) +{ + return MakeSmart(INIT_FFT, info); +} + + +zz_pInfoT::zz_pInfoT(long NewP, long maxroot) +{ + if (maxroot < 0) LogicError("zz_pContext: maxroot may not be negative"); + + if (NewP <= 1) LogicError("zz_pContext: p must be > 1"); + if (NumBits(NewP) > NTL_SP_NBITS) ResourceError("zz_pContext: modulus too big"); + + ZZ P, B, M, M1, MinusM; + long n, i; + long q, t; + mulmod_t qinv; + + p = NewP; + pinv = PrepMulMod(p); + red_struct = sp_PrepRem(p); + ll_red_struct = make_sp_ll_reduce_struct(p); + ZZ_red_struct.build(p); + + p_info = 0; + + conv(P, p); + + sqr(B, P); + LeftShift(B, B, maxroot+NTL_FFTFudge); + + set(M); + n = 0; + while (M <= B) { + UseFFTPrime(n); + q = GetFFTPrime(n); + n++; + mul(M, M, q); + } + + if (n > 4) LogicError("zz_pInit: too many primes"); + + NumPrimes = n; + PrimeCnt = n; + MaxRoot = CalcMaxRoot(q); + + if (maxroot < MaxRoot) + MaxRoot = maxroot; + + negate(MinusM, M); + MinusMModP = rem(MinusM, p); + MinusMModPpinv = PrepMulModPrecon(MinusMModP, p, pinv); + + CoeffModP.SetLength(n); + CoeffModPpinv.SetLength(n); + x.SetLength(n); + u.SetLength(n); + uqinv.SetLength(n); + + for (i = 0; i < n; i++) { + q = GetFFTPrime(i); + qinv = GetFFTPrimeInv(i); + + div(M1, M, q); + t = rem(M1, q); + t = InvMod(t, q); + CoeffModP[i] = rem(M1, p); + CoeffModPpinv[i] = PrepMulModPrecon(CoeffModP[i], p, pinv); + x[i] = ((double) t)/((double) q); + u[i] = t; + uqinv[i] = PrepMulModPrecon(t, q, qinv); + } +} + +zz_pInfoT::zz_pInfoT(INIT_FFT_TYPE, FFTPrimeInfo *info) +{ + p = info->q; + pinv = info->qinv; + red_struct = sp_PrepRem(p); + ll_red_struct = make_sp_ll_reduce_struct(p); + ZZ_red_struct.build(p); + + + p_info = info; + + NumPrimes = 1; + PrimeCnt = 0; + + MaxRoot = CalcMaxRoot(p); +} + +// FIXME: we could make bigtab an optional argument + +zz_pInfoT::zz_pInfoT(INIT_USER_FFT_TYPE, long q) +{ + long w; + if (!IsFFTPrime(q, w)) LogicError("invalid user supplied prime"); + + p = q; + pinv = PrepMulMod(p); + red_struct = sp_PrepRem(p); + ll_red_struct = make_sp_ll_reduce_struct(p); + ZZ_red_struct.build(p); + + + p_info_owner.make(); + p_info = p_info_owner.get(); + + bool bigtab = false; +#ifdef NTL_FFT_BIGTAB + bigtab = true; +#endif + InitFFTPrimeInfo(*p_info, q, w, bigtab); + + NumPrimes = 1; + PrimeCnt = 0; + + MaxRoot = CalcMaxRoot(p); +} + + +void zz_p::init(long p, long maxroot) +{ + zz_pContext c(p, maxroot); + c.restore(); + +} + +void zz_p::FFTInit(long index) +{ + zz_pContext c(INIT_FFT, index); + c.restore(); +} + +void zz_p::UserFFTInit(long q) +{ + zz_pContext c(INIT_USER_FFT, q); + c.restore(); +} + +zz_pContext::zz_pContext(long p, long maxroot) : + ptr(MakeSmart(p, maxroot)) +{ } + +zz_pContext::zz_pContext(INIT_FFT_TYPE, long index) +{ + if (index < 0) + LogicError("bad FFT prime index"); + + UseFFTPrime(index); + + ptr = FFTTables[index]->zz_p_context; +} + +zz_pContext::zz_pContext(INIT_USER_FFT_TYPE, long q) : + ptr(MakeSmart(INIT_USER_FFT, q)) +{ } + + +void zz_pContext::save() +{ + NTL_TLS_GLOBAL_ACCESS(zz_pInfo_stg); + ptr = zz_pInfo_stg; +} + +void zz_pContext::restore() const +{ + NTL_TLS_GLOBAL_ACCESS(zz_pInfo_stg); + zz_pInfo_stg = ptr; + zz_pInfo = zz_pInfo_stg.get(); +} + + + +zz_pBak::~zz_pBak() +{ + if (MustRestore) c.restore(); +} + +void zz_pBak::save() +{ + c.save(); + MustRestore = true; +} + + +void zz_pBak::restore() +{ + c.restore(); + MustRestore = false; +} + + + + + + + +istream& operator>>(istream& s, zz_p& x) +{ + NTL_ZZRegister(y); + NTL_INPUT_CHECK_RET(s, s >> y); + conv(x, y); + + return s; +} + +ostream& operator<<(ostream& s, zz_p a) +{ + NTL_ZZRegister(y); + y = rep(a); + s << y; + + return s; +} + + + +// *********************************************************************** + + +#ifdef NTL_HAVE_LL_TYPE + + +// NOTE: the following code sequence will generate imulq +// instructions on x86_64 machines, which empirically is faster +// than using the mulq instruction or even the mulxq instruction, +// (tested on a Haswell machine). + +long +InnerProd_LL(const long *ap, const zz_p *bp, long n, long d, + sp_ll_reduce_struct dinv) +{ + const long BLKSIZE = (1L << min(20, 2*(NTL_BITS_PER_LONG-NTL_SP_NBITS))); + + unsigned long acc0 = 0; + ll_type acc21; + ll_init(acc21, 0); + + long i; + for (i = 0; i <= n-BLKSIZE; i += BLKSIZE, ap += BLKSIZE, bp += BLKSIZE) { + // sum ap[j]*rep(bp[j]) for j in [0..BLKSIZE) + + ll_type sum; + ll_init(sum, 0); + for (long j = 0; j < BLKSIZE; j += 4) { + ll_imul_add(sum, ap[j+0], rep(bp[j+0])); + ll_imul_add(sum, ap[j+1], rep(bp[j+1])); + ll_imul_add(sum, ap[j+2], rep(bp[j+2])); + ll_imul_add(sum, ap[j+3], rep(bp[j+3])); + } + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + if (i < n) { + // sum ap[i]*rep(bp[j]) for j in [0..n-i) + + ll_type sum; + ll_init(sum, 0); + long j = 0; + for (; j <= n-i-4; j += 4) { + ll_imul_add(sum, ap[j+0], rep(bp[j+0])); + ll_imul_add(sum, ap[j+1], rep(bp[j+1])); + ll_imul_add(sum, ap[j+2], rep(bp[j+2])); + ll_imul_add(sum, ap[j+3], rep(bp[j+3])); + } + + for (; j < n-i; j++) + ll_imul_add(sum, ap[j], rep(bp[j])); + + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + if (dinv.nbits == NTL_SP_NBITS) + return sp_ll_red_31_normalized(ll_get_hi(acc21), ll_get_lo(acc21), acc0, d, dinv); + else + return sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, d, dinv); +} + + +long +InnerProd_LL(const zz_p *ap, const zz_p *bp, long n, long d, + sp_ll_reduce_struct dinv) +{ + const long BLKSIZE = (1L << min(20, 2*(NTL_BITS_PER_LONG-NTL_SP_NBITS))); + + unsigned long acc0 = 0; + ll_type acc21; + ll_init(acc21, 0); + + long i; + for (i = 0; i <= n-BLKSIZE; i += BLKSIZE, ap += BLKSIZE, bp += BLKSIZE) { + // sum ap[j]*rep(bp[j]) for j in [0..BLKSIZE) + + ll_type sum; + ll_init(sum, 0); + for (long j = 0; j < BLKSIZE; j += 4) { + ll_imul_add(sum, rep(ap[j+0]), rep(bp[j+0])); + ll_imul_add(sum, rep(ap[j+1]), rep(bp[j+1])); + ll_imul_add(sum, rep(ap[j+2]), rep(bp[j+2])); + ll_imul_add(sum, rep(ap[j+3]), rep(bp[j+3])); + } + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + if (i < n) { + // sum ap[i]*rep(bp[j]) for j in [0..n-i) + + ll_type sum; + ll_init(sum, 0); + long j = 0; + for (; j <= n-i-4; j += 4) { + ll_imul_add(sum, rep(ap[j+0]), rep(bp[j+0])); + ll_imul_add(sum, rep(ap[j+1]), rep(bp[j+1])); + ll_imul_add(sum, rep(ap[j+2]), rep(bp[j+2])); + ll_imul_add(sum, rep(ap[j+3]), rep(bp[j+3])); + } + + for (; j < n-i; j++) + ll_imul_add(sum, rep(ap[j]), rep(bp[j])); + + + ll_add(sum, acc0); + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + } + + if (dinv.nbits == NTL_SP_NBITS) + return sp_ll_red_31_normalized(ll_get_hi(acc21), ll_get_lo(acc21), acc0, d, dinv); + else + return sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, d, dinv); +} + + +long +InnerProd_L(const long *ap, const zz_p *bp, long n, long d, + sp_reduce_struct dinv) +{ + unsigned long sum = 0; + long j = 0; + + for (; j <= n-4; j += 4) { + sum += (ap[j+0]) * (rep(bp[j+0])); + sum += (ap[j+1]) * (rep(bp[j+1])); + sum += (ap[j+2]) * (rep(bp[j+2])); + sum += (ap[j+3]) * (rep(bp[j+3])); + } + + for (; j < n; j++) + sum += (ap[j]) * (rep(bp[j])); + + return rem(sum, d, dinv); +} + +long +InnerProd_L(const zz_p *ap, const zz_p *bp, long n, long d, + sp_reduce_struct dinv) +{ + unsigned long sum = 0; + long j = 0; + + for (; j <= n-4; j += 4) { + sum += (rep(ap[j+0])) * (rep(bp[j+0])); + sum += (rep(ap[j+1])) * (rep(bp[j+1])); + sum += (rep(ap[j+2])) * (rep(bp[j+2])); + sum += (rep(ap[j+3])) * (rep(bp[j+3])); + } + + for (; j < n; j++) + sum += (rep(ap[j])) * (rep(bp[j])); + + return rem(sum, d, dinv); +} + +#endif + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/lzz_pE.c b/thirdparty/linux/ntl/src/lzz_pE.c new file mode 100644 index 0000000000..44c33c7f47 --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_pE.c @@ -0,0 +1,149 @@ + + +#include + +#include + +NTL_START_IMPL + + +NTL_TLS_GLOBAL_DECL(SmartPtr, zz_pEInfo_stg) + +NTL_CHEAP_THREAD_LOCAL zz_pEInfoT *zz_pEInfo = 0; + + +zz_pEInfoT::zz_pEInfoT(const zz_pX& NewP) +{ + build(p, NewP); + + _card_base = zz_p::modulus(); + _card_exp = deg(NewP); +} + +const ZZ& zz_pE::cardinality() +{ + if (!zz_pEInfo) LogicError("zz_pE::cardinality: undefined modulus"); + + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(zz_pEInfo->_card); + if (!builder()) break; + UniquePtr p; + p.make(); + power(*p, zz_pEInfo->_card_base, zz_pEInfo->_card_exp); + builder.move(p); + } while (0); + + return *zz_pEInfo->_card; +} + + + + + +void zz_pE::init(const zz_pX& p) +{ + zz_pEContext c(p); + c.restore(); +} + + +void zz_pEContext::save() +{ + NTL_TLS_GLOBAL_ACCESS(zz_pEInfo_stg); + ptr = zz_pEInfo_stg; +} + +void zz_pEContext::restore() const +{ + NTL_TLS_GLOBAL_ACCESS(zz_pEInfo_stg); + zz_pEInfo_stg = ptr; + zz_pEInfo = zz_pEInfo_stg.get(); +} + + +zz_pEBak::~zz_pEBak() +{ + if (MustRestore) c.restore(); +} + +void zz_pEBak::save() +{ + c.save(); + MustRestore = true; +} + + +void zz_pEBak::restore() +{ + c.restore(); + MustRestore = false; +} + + + +const zz_pE& zz_pE::zero() +{ + static const zz_pE z(INIT_NO_ALLOC); // GLOBAL (assumes C++11 thread-safe init) + return z; +} + + + + +istream& operator>>(istream& s, zz_pE& x) +{ + zz_pX y; + + NTL_INPUT_CHECK_RET(s, s >> y); + conv(x, y); + + return s; +} + +void div(zz_pE& x, const zz_pE& a, const zz_pE& b) +{ + zz_pE t; + + inv(t, b); + mul(x, a, t); +} + +void div(zz_pE& x, const zz_pE& a, long b) +{ + NTL_zz_pRegister(B); + B = b; + inv(B, B); + mul(x, a, B); +} + +void div(zz_pE& x, const zz_pE& a, const zz_p& b) +{ + NTL_zz_pRegister(B); + B = b; + inv(B, B); + mul(x, a, B); +} + +void div(zz_pE& x, long a, const zz_pE& b) +{ + zz_pE t; + inv(t, b); + mul(x, a, t); +} + +void div(zz_pE& x, const zz_p& a, const zz_pE& b) +{ + zz_pE t; + inv(t, b); + mul(x, a, t); +} + + + +void inv(zz_pE& x, const zz_pE& a) +{ + InvMod(x._zz_pE__rep, a._zz_pE__rep, zz_pE::modulus()); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/lzz_pEX.c b/thirdparty/linux/ntl/src/lzz_pEX.c new file mode 100644 index 0000000000..a33467d7aa --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_pEX.c @@ -0,0 +1,3445 @@ + + + + +#include +#include +#include + +#include + +NTL_START_IMPL + + +const zz_pEX& zz_pEX::zero() +{ + static const zz_pEX z; // GLOBAL (assumes C++11 thread-safe init) + return z; +} + + +istream& operator>>(istream& s, zz_pEX& x) +{ + NTL_INPUT_CHECK_RET(s, s >> x.rep); + x.normalize(); + return s; +} + +ostream& operator<<(ostream& s, const zz_pEX& a) +{ + return s << a.rep; +} + + +void zz_pEX::normalize() +{ + long n; + const zz_pE* p; + + n = rep.length(); + if (n == 0) return; + p = rep.elts() + n; + while (n > 0 && IsZero(*--p)) { + n--; + } + rep.SetLength(n); +} + + +long IsZero(const zz_pEX& a) +{ + return a.rep.length() == 0; +} + + +long IsOne(const zz_pEX& a) +{ + return a.rep.length() == 1 && IsOne(a.rep[0]); +} + +long operator==(const zz_pEX& a, long b) +{ + if (b == 0) + return IsZero(a); + + if (b == 1) + return IsOne(a); + + long da = deg(a); + + if (da > 0) return 0; + + NTL_zz_pRegister(bb); + bb = b; + + if (da < 0) + return IsZero(bb); + + return a.rep[0] == bb; +} + +long operator==(const zz_pEX& a, const zz_p& b) +{ + if (IsZero(b)) + return IsZero(a); + + long da = deg(a); + + if (da != 0) + return 0; + + return a.rep[0] == b; +} + +long operator==(const zz_pEX& a, const zz_pE& b) +{ + if (IsZero(b)) + return IsZero(a); + + long da = deg(a); + + if (da != 0) + return 0; + + return a.rep[0] == b; +} + + + + + +void SetCoeff(zz_pEX& x, long i, const zz_pE& a) +{ + long j, m; + + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m && IsZero(a)) return; + + if (i > m) { + /* careful: a may alias a coefficient of x */ + + long alloc = x.rep.allocated(); + + if (alloc > 0 && i >= alloc) { + zz_pE aa = a; + x.rep.SetLength(i+1); + x.rep[i] = aa; + } + else { + x.rep.SetLength(i+1); + x.rep[i] = a; + } + + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + else + x.rep[i] = a; + + x.normalize(); +} + + +void SetCoeff(zz_pEX& x, long i, const zz_p& aa) +{ + long j, m; + + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + NTL_zz_pRegister(a); // watch out for aliases! + a = aa; + + m = deg(x); + + if (i > m && IsZero(a)) return; + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + x.rep[i] = a; + x.normalize(); +} + +void SetCoeff(zz_pEX& x, long i, long a) +{ + if (a == 1) + SetCoeff(x, i); + else { + NTL_zz_pRegister(T); + T = a; + SetCoeff(x, i, T); + } +} + + + +void SetCoeff(zz_pEX& x, long i) +{ + long j, m; + + if (i < 0) + LogicError("coefficient index out of range"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + set(x.rep[i]); + x.normalize(); +} + + +void SetX(zz_pEX& x) +{ + clear(x); + SetCoeff(x, 1); +} + + +long IsX(const zz_pEX& a) +{ + return deg(a) == 1 && IsOne(LeadCoeff(a)) && IsZero(ConstTerm(a)); +} + + + +const zz_pE& coeff(const zz_pEX& a, long i) +{ + if (i < 0 || i > deg(a)) + return zz_pE::zero(); + else + return a.rep[i]; +} + + +const zz_pE& LeadCoeff(const zz_pEX& a) +{ + if (IsZero(a)) + return zz_pE::zero(); + else + return a.rep[deg(a)]; +} + +const zz_pE& ConstTerm(const zz_pEX& a) +{ + if (IsZero(a)) + return zz_pE::zero(); + else + return a.rep[0]; +} + + + +void conv(zz_pEX& x, const zz_pE& a) +{ + if (IsZero(a)) + x.rep.SetLength(0); + else { + x.rep.SetLength(1); + x.rep[0] = a; + } +} + +void conv(zz_pEX& x, long a) +{ + if (a == 0) + clear(x); + else if (a == 1) + set(x); + else { + NTL_zz_pRegister(T); + T = a; + conv(x, T); + } +} + +void conv(zz_pEX& x, const ZZ& a) +{ + NTL_zz_pRegister(T); + conv(T, a); + conv(x, T); +} + +void conv(zz_pEX& x, const zz_p& a) +{ + if (IsZero(a)) + clear(x); + else if (IsOne(a)) + set(x); + else { + x.rep.SetLength(1); + conv(x.rep[0], a); + x.normalize(); + } +} + +void conv(zz_pEX& x, const zz_pX& aa) +{ + zz_pX a = aa; // in case a aliases the rep of a coefficient of x + + long n = deg(a)+1; + long i; + + x.rep.SetLength(n); + for (i = 0; i < n; i++) + conv(x.rep[i], coeff(a, i)); +} + + +void conv(zz_pEX& x, const vec_zz_pE& a) +{ + x.rep = a; + x.normalize(); +} + + + + +/* additional legacy conversions for v6 conversion regime */ + +void conv(zz_pEX& x, const ZZX& a) +{ + long n = a.rep.length(); + long i; + + x.rep.SetLength(n); + for (i = 0; i < n; i++) + conv(x.rep[i], a.rep[i]); + + x.normalize(); +} + + +/* ------------------------------------- */ + + + +void add(zz_pEX& x, const zz_pEX& a, const zz_pEX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const zz_pE *ap, *bp; + zz_pE* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + add(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab && &x != &b) + for (i = db-minab; i; i--, xp++, bp++) + *xp = *bp; + else + x.normalize(); +} + + +void add(zz_pEX& x, const zz_pEX& a, const zz_pE& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (&x == &a) { + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + zz_pE *xp = x.rep.elts(); + add(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const zz_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + +void add(zz_pEX& x, const zz_pEX& a, const zz_p& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (&x == &a) { + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + add(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + zz_pE *xp = x.rep.elts(); + add(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const zz_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + + +void add(zz_pEX& x, const zz_pEX& a, long b) +{ + if (a.rep.length() == 0) { + conv(x, b); + } + else { + if (&x != &a) x = a; + add(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + + +void sub(zz_pEX& x, const zz_pEX& a, const zz_pEX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const zz_pE *ap, *bp; + zz_pE* xp; + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + sub(*xp, (*ap), (*bp)); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab) + for (i = db-minab; i; i--, xp++, bp++) + negate(*xp, *bp); + else + x.normalize(); +} + + +void sub(zz_pEX& x, const zz_pEX& a, const zz_pE& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + negate(x, x); + } + else if (&x == &a) { + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + zz_pE *xp = x.rep.elts(); + sub(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const zz_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + +void sub(zz_pEX& x, const zz_pEX& a, const zz_p& b) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + negate(x, x); + } + else if (&x == &a) { + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else if (x.rep.MaxLength() == 0) { + x = a; + sub(x.rep[0], a.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + zz_pE *xp = x.rep.elts(); + sub(xp[0], a.rep[0], b); + x.rep.SetLength(n); + xp = x.rep.elts(); + const zz_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + xp[i] = ap[i]; + x.normalize(); + } +} + + +void sub(zz_pEX& x, const zz_pEX& a, long b) +{ + if (a.rep.length() == 0) { + conv(x, b); + negate(x, x); + } + else { + if (&x != &a) x = a; + sub(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + +void sub(zz_pEX& x, const zz_pE& b, const zz_pEX& a) +{ + long n = a.rep.length(); + if (n == 0) { + conv(x, b); + } + else if (x.rep.MaxLength() == 0) { + negate(x, a); + add(x.rep[0], x.rep[0], b); + x.normalize(); + } + else { + // ugly...b could alias a coeff of x + + zz_pE *xp = x.rep.elts(); + sub(xp[0], b, a.rep[0]); + x.rep.SetLength(n); + xp = x.rep.elts(); + const zz_pE *ap = a.rep.elts(); + long i; + for (i = 1; i < n; i++) + negate(xp[i], ap[i]); + x.normalize(); + } +} + + +void sub(zz_pEX& x, const zz_p& a, const zz_pEX& b) +{ + NTL_zz_pRegister(T); // avoids aliasing problems + T = a; + negate(x, b); + add(x, x, T); +} + +void sub(zz_pEX& x, long a, const zz_pEX& b) +{ + NTL_zz_pRegister(T); + T = a; + negate(x, b); + add(x, x, T); +} + +void mul(zz_pEX& c, const zz_pEX& a, const zz_pEX& b) +{ + if (&a == &b) { + sqr(c, a); + return; + } + + if (IsZero(a) || IsZero(b)) { + clear(c); + return; + } + + if (deg(a) == 0) { + mul(c, b, ConstTerm(a)); + return; + } + + if (deg(b) == 0) { + mul(c, a, ConstTerm(b)); + return; + } + + // general case...Kronecker subst + + zz_pX A, B, C; + + long da = deg(a); + long db = deg(b); + + long n = zz_pE::degree(); + long n2 = 2*n-1; + + if (NTL_OVERFLOW(da+db+1, n2, 0)) + ResourceError("overflow in zz_pEX mul"); + + + long i, j; + + A.rep.SetLength((da+1)*n2); + + for (i = 0; i <= da; i++) { + const zz_pX& coeff = rep(a.rep[i]); + long dcoeff = deg(coeff); + for (j = 0; j <= dcoeff; j++) + A.rep[n2*i + j] = coeff.rep[j]; + } + + A.normalize(); + + B.rep.SetLength((db+1)*n2); + + for (i = 0; i <= db; i++) { + const zz_pX& coeff = rep(b.rep[i]); + long dcoeff = deg(coeff); + for (j = 0; j <= dcoeff; j++) + B.rep[n2*i + j] = coeff.rep[j]; + } + + B.normalize(); + + mul(C, A, B); + + long Clen = C.rep.length(); + long lc = (Clen + n2 - 1)/n2; + long dc = lc - 1; + + c.rep.SetLength(dc+1); + + zz_pX tmp; + + for (i = 0; i <= dc; i++) { + tmp.rep.SetLength(n2); + for (j = 0; j < n2 && n2*i + j < Clen; j++) + tmp.rep[j] = C.rep[n2*i + j]; + for (; j < n2; j++) + clear(tmp.rep[j]); + tmp.normalize(); + conv(c.rep[i], tmp); + } + + c.normalize(); +} + + +void mul(zz_pEX& x, const zz_pEX& a, const zz_pE& b) +{ + if (IsZero(b)) { + clear(x); + return; + } + + zz_pE t; + t = b; + + long i, da; + + const zz_pE *ap; + zz_pE* xp; + + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + for (i = 0; i <= da; i++) + mul(xp[i], ap[i], t); + + x.normalize(); +} + + + +void mul(zz_pEX& x, const zz_pEX& a, const zz_p& b) +{ + if (IsZero(b)) { + clear(x); + return; + } + + NTL_zz_pRegister(t); + t = b; + + long i, da; + + const zz_pE *ap; + zz_pE* xp; + + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + for (i = 0; i <= da; i++) + mul(xp[i], ap[i], t); + + x.normalize(); +} + + +void mul(zz_pEX& x, const zz_pEX& a, long b) +{ + NTL_zz_pRegister(t); + t = b; + mul(x, a, t); +} + +void sqr(zz_pEX& c, const zz_pEX& a) +{ + if (IsZero(a)) { + clear(c); + return; + } + + if (deg(a) == 0) { + zz_pE res; + sqr(res, ConstTerm(a)); + conv(c, res); + return; + } + + // general case...Kronecker subst + + zz_pX A, C; + + long da = deg(a); + + long n = zz_pE::degree(); + long n2 = 2*n-1; + + if (NTL_OVERFLOW(2*da+1, n2, 0)) + ResourceError("overflow in zz_pEX sqr"); + + long i, j; + + A.rep.SetLength((da+1)*n2); + + for (i = 0; i <= da; i++) { + const zz_pX& coeff = rep(a.rep[i]); + long dcoeff = deg(coeff); + for (j = 0; j <= dcoeff; j++) + A.rep[n2*i + j] = coeff.rep[j]; + } + + A.normalize(); + + sqr(C, A); + + long Clen = C.rep.length(); + long lc = (Clen + n2 - 1)/n2; + long dc = lc - 1; + + c.rep.SetLength(dc+1); + + zz_pX tmp; + + for (i = 0; i <= dc; i++) { + tmp.rep.SetLength(n2); + for (j = 0; j < n2 && n2*i + j < Clen; j++) + tmp.rep[j] = C.rep[n2*i + j]; + for (; j < n2; j++) + clear(tmp.rep[j]); + tmp.normalize(); + conv(c.rep[i], tmp); + } + + + c.normalize(); +} + + +void MulTrunc(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, long n) +{ + if (n < 0) LogicError("MulTrunc: bad args"); + + zz_pEX t; + mul(t, a, b); + trunc(x, t, n); +} + +void SqrTrunc(zz_pEX& x, const zz_pEX& a, long n) +{ + if (n < 0) LogicError("SqrTrunc: bad args"); + + zz_pEX t; + sqr(t, a); + trunc(x, t, n); +} + + +void CopyReverse(zz_pEX& x, const zz_pEX& a, long hi) + + // x[0..hi] = reverse(a[0..hi]), with zero fill + // input may not alias output + +{ + long i, j, n, m; + + n = hi+1; + m = a.rep.length(); + + x.rep.SetLength(n); + + const zz_pE* ap = a.rep.elts(); + zz_pE* xp = x.rep.elts(); + + for (i = 0; i < n; i++) { + j = hi-i; + if (j < 0 || j >= m) + clear(xp[i]); + else + xp[i] = ap[j]; + } + + x.normalize(); +} + + +void trunc(zz_pEX& x, const zz_pEX& a, long m) + +// x = a % X^m, output may alias input + +{ + if (m < 0) LogicError("trunc: bad args"); + + if (&x == &a) { + if (x.rep.length() > m) { + x.rep.SetLength(m); + x.normalize(); + } + } + else { + long n; + long i; + zz_pE* xp; + const zz_pE* ap; + + n = min(a.rep.length(), m); + x.rep.SetLength(n); + + xp = x.rep.elts(); + ap = a.rep.elts(); + + for (i = 0; i < n; i++) xp[i] = ap[i]; + + x.normalize(); + } +} + + +void random(zz_pEX& x, long n) +{ + long i; + + x.rep.SetLength(n); + + for (i = 0; i < n; i++) + random(x.rep[i]); + + x.normalize(); +} + +void negate(zz_pEX& x, const zz_pEX& a) +{ + long n = a.rep.length(); + x.rep.SetLength(n); + + const zz_pE* ap = a.rep.elts(); + zz_pE* xp = x.rep.elts(); + long i; + + for (i = n; i; i--, ap++, xp++) + negate((*xp), (*ap)); +} + + + +static +void MulByXModAux(zz_pEX& h, const zz_pEX& a, const zz_pEX& f) +{ + long i, n, m; + zz_pE* hh; + const zz_pE *aa, *ff; + + zz_pE t, z; + + n = deg(f); + m = deg(a); + + if (m >= n || n == 0) LogicError("MulByXMod: bad args"); + + if (m < 0) { + clear(h); + return; + } + + if (m < n-1) { + h.rep.SetLength(m+2); + hh = h.rep.elts(); + aa = a.rep.elts(); + for (i = m+1; i >= 1; i--) + hh[i] = aa[i-1]; + clear(hh[0]); + } + else { + h.rep.SetLength(n); + hh = h.rep.elts(); + aa = a.rep.elts(); + ff = f.rep.elts(); + negate(z, aa[n-1]); + if (!IsOne(ff[n])) + div(z, z, ff[n]); + for (i = n-1; i >= 1; i--) { + mul(t, z, ff[i]); + add(hh[i], aa[i-1], t); + } + mul(hh[0], z, ff[0]); + h.normalize(); + } +} + +void MulByXMod(zz_pEX& h, const zz_pEX& a, const zz_pEX& f) +{ + if (&h == &f) { + zz_pEX hh; + MulByXModAux(hh, a, f); + h = hh; + } + else + MulByXModAux(h, a, f); +} + + + +void PlainMul(zz_pEX& x, const zz_pEX& a, const zz_pEX& b) +{ + long da = deg(a); + long db = deg(b); + + if (da < 0 || db < 0) { + clear(x); + return; + } + + long d = da+db; + + + + const zz_pE *ap, *bp; + zz_pE *xp; + + zz_pEX la, lb; + + if (&x == &a) { + la = a; + ap = la.rep.elts(); + } + else + ap = a.rep.elts(); + + if (&x == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + x.rep.SetLength(d+1); + + xp = x.rep.elts(); + + long i, j, jmin, jmax; + zz_pX t, accum; + + for (i = 0; i <= d; i++) { + jmin = max(0, i-db); + jmax = min(da, i); + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, rep(ap[j]), rep(bp[i-j])); + add(accum, accum, t); + } + conv(xp[i], accum); + } + x.normalize(); +} + +void SetSize(vec_zz_pX& x, long n, long m) +{ + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + x[i].rep.SetMaxLength(m); +} + + + +void PlainDivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEX& b) +{ + long da, db, dq, i, j, LCIsOne; + const zz_pE *bp; + zz_pE *qp; + zz_pX *xp; + + + zz_pE LCInv, t; + zz_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("zz_pEX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + zz_pEX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_zz_pX x; + + SetSize(x, da+1, 2*zz_pE::degree()); + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainRem(zz_pEX& r, const zz_pEX& a, const zz_pEX& b, vec_zz_pX& x) +{ + long da, db, dq, i, j, LCIsOne; + const zz_pE *bp; + zz_pX *xp; + + + zz_pE LCInv, t; + zz_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("zz_pEX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainDivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEX& b, + vec_zz_pX& x) +{ + long da, db, dq, i, j, LCIsOne; + const zz_pE *bp; + zz_pE *qp; + zz_pX *xp; + + + zz_pE LCInv, t; + zz_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("zz_pEX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + zz_pEX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + +void PlainDiv(zz_pEX& q, const zz_pEX& a, const zz_pEX& b) +{ + long da, db, dq, i, j, LCIsOne; + const zz_pE *bp; + zz_pE *qp; + zz_pX *xp; + + + zz_pE LCInv, t; + zz_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("zz_pEX: division by zero"); + + if (da < db) { + clear(q); + return; + } + + zz_pEX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_zz_pX x; + SetSize(x, da+1-db, 2*zz_pE::degree()); + + for (i = db; i <= da; i++) + x[i-db] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + for (i = dq; i >= 0; i--) { + conv(t, xp[i]); + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + long lastj = max(0, db-i); + + for (j = db-1; j >= lastj; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j-db], xp[i+j-db], s); + } + } +} + +void PlainRem(zz_pEX& r, const zz_pEX& a, const zz_pEX& b) +{ + long da, db, dq, i, j, LCIsOne; + const zz_pE *bp; + zz_pX *xp; + + + zz_pE LCInv, t; + zz_pX s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("zz_pEX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_zz_pX x; + SetSize(x, da + 1, 2*zz_pE::degree()); + + for (i = 0; i <= da; i++) + x[i] = rep(a.rep[i]); + + xp = x.elts(); + + dq = da - db; + + for (i = dq; i >= 0; i--) { + conv(t, xp[i+db]); + if (!LCIsOne) + mul(t, t, LCInv); + negate(t, t); + + for (j = db-1; j >= 0; j--) { + mul(s, rep(t), rep(bp[j])); + add(xp[i+j], xp[i+j], s); + } + } + + r.rep.SetLength(db); + for (i = 0; i < db; i++) + conv(r.rep[i], xp[i]); + r.normalize(); +} + + + +void RightShift(zz_pEX& x, const zz_pEX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) ResourceError("overflow in RightShift"); + LeftShift(x, a, -n); + return; + } + + long da = deg(a); + long i; + + if (da < n) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(da-n+1); + + for (i = 0; i <= da-n; i++) + x.rep[i] = a.rep[i+n]; + + if (&x == &a) + x.rep.SetLength(da-n+1); + + x.normalize(); +} + +void LeftShift(zz_pEX& x, const zz_pEX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) + clear(x); + else + RightShift(x, a, -n); + return; + } + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in LeftShift"); + + long m = a.rep.length(); + + x.rep.SetLength(m+n); + + long i; + for (i = m-1; i >= 0; i--) + x.rep[i+n] = a.rep[i]; + + for (i = 0; i < n; i++) + clear(x.rep[i]); +} + + + +void NewtonInv(zz_pEX& c, const zz_pEX& a, long e) +{ + zz_pE x; + + inv(x, ConstTerm(a)); + + if (e == 1) { + conv(c, x); + return; + } + + vec_long E; + E.SetLength(0); + append(E, e); + while (e > 1) { + e = (e+1)/2; + append(E, e); + } + + long L = E.length(); + + zz_pEX g, g0, g1, g2; + + + g.rep.SetMaxLength(E[0]); + g0.rep.SetMaxLength(E[0]); + g1.rep.SetMaxLength((3*E[0]+1)/2); + g2.rep.SetMaxLength(E[0]); + + conv(g, x); + + long i; + + for (i = L-1; i > 0; i--) { + // lift from E[i] to E[i-1] + + long k = E[i]; + long l = E[i-1]-E[i]; + + trunc(g0, a, k+l); + + mul(g1, g0, g); + RightShift(g1, g1, k); + trunc(g1, g1, l); + + mul(g2, g1, g); + trunc(g2, g2, l); + LeftShift(g2, g2, k); + + sub(g, g, g2); + } + + c = g; +} + +void InvTrunc(zz_pEX& c, const zz_pEX& a, long e) +{ + if (e < 0) LogicError("InvTrunc: bad args"); + if (e == 0) { + clear(c); + return; + } + + if (NTL_OVERFLOW(e, 1, 0)) + ResourceError("overflow in InvTrunc"); + + NewtonInv(c, a, e); +} + + + + +const long zz_pEX_MOD_PLAIN = 0; +const long zz_pEX_MOD_MUL = 1; + + +void build(zz_pEXModulus& F, const zz_pEX& f) +{ + long n = deg(f); + + if (n <= 0) LogicError("build(zz_pEXModulus,zz_pEX): deg(f) <= 0"); + + if (NTL_OVERFLOW(n, zz_pE::degree(), 0)) + ResourceError("build(zz_pEXModulus,zz_pEX): overflow"); + + F.tracevec.make(); + + F.f = f; + F.n = n; + + if (F.n < zz_pE::ModCross()) { + F.method = zz_pEX_MOD_PLAIN; + } + else { + F.method = zz_pEX_MOD_MUL; + zz_pEX P1; + zz_pEX P2; + + CopyReverse(P1, f, n); + InvTrunc(P2, P1, n-1); + CopyReverse(P1, P2, n-2); + trunc(F.h0, P1, n-2); + trunc(F.f0, f, n); + F.hlc = ConstTerm(P2); + } +} + + + +zz_pEXModulus::zz_pEXModulus() +{ + n = -1; + method = zz_pEX_MOD_PLAIN; +} + + +zz_pEXModulus::~zz_pEXModulus() +{ +} + + + +zz_pEXModulus::zz_pEXModulus(const zz_pEX& ff) +{ + n = -1; + method = zz_pEX_MOD_PLAIN; + + build(*this, ff); +} + + +void UseMulRem21(zz_pEX& r, const zz_pEX& a, const zz_pEXModulus& F) +{ + zz_pEX P1; + zz_pEX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + mul(P1, P2, F.f0); + trunc(P1, P1, F.n); + trunc(r, a, F.n); + sub(r, r, P1); +} + +void UseMulDivRem21(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEXModulus& F) +{ + zz_pEX P1; + zz_pEX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + mul(P1, P2, F.f0); + trunc(P1, P1, F.n); + trunc(r, a, F.n); + sub(r, r, P1); + q = P2; +} + +void UseMulDiv21(zz_pEX& q, const zz_pEX& a, const zz_pEXModulus& F) +{ + zz_pEX P1; + zz_pEX P2; + + RightShift(P1, a, F.n); + mul(P2, P1, F.h0); + RightShift(P2, P2, F.n-2); + if (!IsOne(F.hlc)) mul(P1, P1, F.hlc); + add(P2, P2, P1); + q = P2; + +} + + +void rem(zz_pEX& x, const zz_pEX& a, const zz_pEXModulus& F) +{ + if (F.method == zz_pEX_MOD_PLAIN) { + PlainRem(x, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulRem21(x, a, F); + return; + } + + zz_pEX buf(INIT_SIZE, 2*n-1); + + long a_len = da+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + UseMulRem21(buf, buf, F); + + a_len -= amt; + } + + x = buf; +} + +void DivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEXModulus& F) +{ + if (F.method == zz_pEX_MOD_PLAIN) { + PlainDivRem(q, r, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulDivRem21(q, r, a, F); + return; + } + + zz_pEX buf(INIT_SIZE, 2*n-1); + zz_pEX qbuf(INIT_SIZE, n-1); + + zz_pEX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + UseMulDivRem21(qbuf, buf, buf, F); + long dl = qbuf.rep.length(); + a_len = a_len - amt; + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + r = buf; + + qq.normalize(); + q = qq; +} + +void div(zz_pEX& q, const zz_pEX& a, const zz_pEXModulus& F) +{ + if (F.method == zz_pEX_MOD_PLAIN) { + PlainDiv(q, a, F.f); + return; + } + + long da = deg(a); + long n = F.n; + + if (da <= 2*n-2) { + UseMulDiv21(q, a, F); + return; + } + + zz_pEX buf(INIT_SIZE, 2*n-1); + zz_pEX qbuf(INIT_SIZE, n-1); + + zz_pEX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + a_len = a_len - amt; + if (a_len > 0) + UseMulDivRem21(qbuf, buf, buf, F); + else + UseMulDiv21(qbuf, buf, F); + + long dl = qbuf.rep.length(); + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + qq.normalize(); + q = qq; +} + + + + +void MulMod(zz_pEX& c, const zz_pEX& a, const zz_pEX& b, const zz_pEXModulus& F) +{ + if (deg(a) >= F.n || deg(b) >= F.n) LogicError("MulMod: bad args"); + + zz_pEX t; + mul(t, a, b); + rem(c, t, F); +} + + +void SqrMod(zz_pEX& c, const zz_pEX& a, const zz_pEXModulus& F) +{ + if (deg(a) >= F.n) LogicError("MulMod: bad args"); + + zz_pEX t; + sqr(t, a); + rem(c, t, F); +} + + + +void UseMulRem(zz_pEX& r, const zz_pEX& a, const zz_pEX& b) +{ + zz_pEX P1; + zz_pEX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + mul(P1, P2, b); + sub(P1, a, P1); + + r = P1; +} + +void UseMulDivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEX& b) +{ + zz_pEX P1; + zz_pEX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + mul(P1, P2, b); + sub(P1, a, P1); + + r = P1; + q = P2; +} + +void UseMulDiv(zz_pEX& q, const zz_pEX& a, const zz_pEX& b) +{ + zz_pEX P1; + zz_pEX P2; + + long da = deg(a); + long db = deg(b); + + CopyReverse(P1, b, db); + InvTrunc(P2, P1, da-db+1); + CopyReverse(P1, P2, da-db); + + RightShift(P2, a, db); + mul(P2, P1, P2); + RightShift(P2, P2, da-db); + + q = P2; +} + + + +void DivRem(zz_pEX& q, zz_pEX& r, const zz_pEX& a, const zz_pEX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < zz_pE::DivCross() || sa-sb < zz_pE::DivCross()) + PlainDivRem(q, r, a, b); + else if (sa < 4*sb) + UseMulDivRem(q, r, a, b); + else { + zz_pEXModulus B; + build(B, b); + DivRem(q, r, a, B); + } +} + +void div(zz_pEX& q, const zz_pEX& a, const zz_pEX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < zz_pE::DivCross() || sa-sb < zz_pE::DivCross()) + PlainDiv(q, a, b); + else if (sa < 4*sb) + UseMulDiv(q, a, b); + else { + zz_pEXModulus B; + build(B, b); + div(q, a, B); + } +} + +void div(zz_pEX& q, const zz_pEX& a, const zz_pE& b) +{ + zz_pE T; + inv(T, b); + mul(q, a, T); +} + +void div(zz_pEX& q, const zz_pEX& a, const zz_p& b) +{ + NTL_zz_pRegister(T); + inv(T, b); + mul(q, a, T); +} + +void div(zz_pEX& q, const zz_pEX& a, long b) +{ + NTL_zz_pRegister(T); + T = b; + inv(T, T); + mul(q, a, T); +} + +void rem(zz_pEX& r, const zz_pEX& a, const zz_pEX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sb < zz_pE::DivCross() || sa-sb < zz_pE::DivCross()) + PlainRem(r, a, b); + else if (sa < 4*sb) + UseMulRem(r, a, b); + else { + zz_pEXModulus B; + build(B, b); + rem(r, a, B); + } +} + +void GCD(zz_pEX& x, const zz_pEX& a, const zz_pEX& b) +{ + zz_pE t; + + if (IsZero(b)) + x = a; + else if (IsZero(a)) + x = b; + else { + long n = max(deg(a),deg(b)) + 1; + zz_pEX u(INIT_SIZE, n), v(INIT_SIZE, n); + + vec_zz_pX tmp; + SetSize(tmp, n, 2*zz_pE::degree()); + + u = a; + v = b; + do { + PlainRem(u, u, v, tmp); + swap(u, v); + } while (!IsZero(v)); + + x = u; + } + + if (IsZero(x)) return; + if (IsOne(LeadCoeff(x))) return; + + /* make gcd monic */ + + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + + + + +void XGCD(zz_pEX& d, zz_pEX& s, zz_pEX& t, const zz_pEX& a, const zz_pEX& b) +{ + zz_pE z; + + + if (IsZero(b)) { + set(s); + clear(t); + d = a; + } + else if (IsZero(a)) { + clear(s); + set(t); + d = b; + } + else { + long e = max(deg(a), deg(b)) + 1; + + zz_pEX temp(INIT_SIZE, e), u(INIT_SIZE, e), v(INIT_SIZE, e), + u0(INIT_SIZE, e), v0(INIT_SIZE, e), + u1(INIT_SIZE, e), v1(INIT_SIZE, e), + u2(INIT_SIZE, e), v2(INIT_SIZE, e), q(INIT_SIZE, e); + + + set(u1); clear(v1); + clear(u2); set(v2); + u = a; v = b; + + do { + DivRem(q, u, u, v); + swap(u, v); + u0 = u2; + v0 = v2; + mul(temp, q, u2); + sub(u2, u1, temp); + mul(temp, q, v2); + sub(v2, v1, temp); + u1 = u0; + v1 = v0; + } while (!IsZero(v)); + + d = u; + s = u1; + t = v1; + } + + if (IsZero(d)) return; + if (IsOne(LeadCoeff(d))) return; + + /* make gcd monic */ + + inv(z, LeadCoeff(d)); + mul(d, d, z); + mul(s, s, z); + mul(t, t, z); +} + + +void IterBuild(zz_pE* a, long n) +{ + long i, k; + zz_pE b, t; + + if (n <= 0) return; + + negate(a[0], a[0]); + + for (k = 1; k <= n-1; k++) { + negate(b, a[k]); + add(a[k], b, a[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t, a[i], b); + add(a[i], t, a[i-1]); + } + mul(a[0], a[0], b); + } +} + +void BuildFromRoots(zz_pEX& x, const vec_zz_pE& a) +{ + long n = a.length(); + + if (n == 0) { + set(x); + return; + } + + x.rep.SetMaxLength(n+1); + x.rep = a; + IterBuild(&x.rep[0], n); + x.rep.SetLength(n+1); + SetCoeff(x, n); +} + +void eval(zz_pE& b, const zz_pEX& f, const zz_pE& a) +// does a Horner evaluation +{ + zz_pE acc; + long i; + + clear(acc); + for (i = deg(f); i >= 0; i--) { + mul(acc, acc, a); + add(acc, acc, f.rep[i]); + } + + b = acc; +} + +void eval(vec_zz_pE& b, const zz_pEX& f, const vec_zz_pE& a) +// naive algorithm: repeats Horner +{ + if (&b == &f.rep) { + vec_zz_pE bb; + eval(bb, f, a); + b = bb; + return; + } + + long m = a.length(); + b.SetLength(m); + long i; + for (i = 0; i < m; i++) + eval(b[i], f, a[i]); +} + + +void interpolate(zz_pEX& f, const vec_zz_pE& a, const vec_zz_pE& b) +{ + long m = a.length(); + if (b.length() != m) LogicError("interpolate: vector length mismatch"); + + if (m == 0) { + clear(f); + return; + } + + vec_zz_pE prod; + prod = a; + + zz_pE t1, t2; + + long k, i; + + vec_zz_pE res; + res.SetLength(m); + + for (k = 0; k < m; k++) { + + const zz_pE& aa = a[k]; + + set(t1); + for (i = k-1; i >= 0; i--) { + mul(t1, t1, aa); + add(t1, t1, prod[i]); + } + + clear(t2); + for (i = k-1; i >= 0; i--) { + mul(t2, t2, aa); + add(t2, t2, res[i]); + } + + + inv(t1, t1); + sub(t2, b[k], t2); + mul(t1, t1, t2); + + for (i = 0; i < k; i++) { + mul(t2, prod[i], t1); + add(res[i], res[i], t2); + } + + res[k] = t1; + + if (k < m-1) { + if (k == 0) + negate(prod[0], prod[0]); + else { + negate(t1, a[k]); + add(prod[k], t1, prod[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t2, prod[i], t1); + add(prod[i], t2, prod[i-1]); + } + mul(prod[0], prod[0], t1); + } + } + } + + while (m > 0 && IsZero(res[m-1])) m--; + res.SetLength(m); + f.rep = res; +} + +void InnerProduct(zz_pEX& x, const vec_zz_pE& v, long low, long high, + const vec_zz_pEX& H, long n, vec_zz_pX& t) +{ + zz_pX s; + long i, j; + + for (j = 0; j < n; j++) + clear(t[j]); + + high = min(high, v.length()-1); + for (i = low; i <= high; i++) { + const vec_zz_pE& h = H[i-low].rep; + long m = h.length(); + const zz_pX& w = rep(v[i]); + + for (j = 0; j < m; j++) { + mul(s, w, rep(h[j])); + add(t[j], t[j], s); + } + } + + x.rep.SetLength(n); + for (j = 0; j < n; j++) + conv(x.rep[j], t[j]); + x.normalize(); +} + + + +void CompMod(zz_pEX& x, const zz_pEX& g, const zz_pEXArgument& A, + const zz_pEXModulus& F) +{ + if (deg(g) <= 0) { + x = g; + return; + } + + + zz_pEX s, t; + vec_zz_pX scratch; + SetSize(scratch, deg(F), 2*zz_pE::degree()); + + long m = A.H.length() - 1; + long l = ((g.rep.length()+m-1)/m) - 1; + + const zz_pEX& M = A.H[m]; + + InnerProduct(t, g.rep, l*m, l*m + m - 1, A.H, F.n, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g.rep, i*m, i*m + m - 1, A.H, F.n, scratch); + MulMod(t, t, M, F); + add(t, t, s); + } + + x = t; +} + + +void build(zz_pEXArgument& A, const zz_pEX& h, const zz_pEXModulus& F, long m) +{ + long i; + + if (m <= 0 || deg(h) >= F.n) + LogicError("build: bad args"); + + if (m > F.n) m = F.n; + + if (zz_pEXArgBound > 0) { + double sz = zz_p::storage(); + sz = sz*zz_pE::degree(); + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_zz_p); + sz = sz*F.n; + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_zz_pE); + sz = sz/1024; + m = min(m, long(zz_pEXArgBound/sz)); + m = max(m, 1); + } + + + + A.H.SetLength(m+1); + + set(A.H[0]); + A.H[1] = h; + for (i = 2; i <= m; i++) + MulMod(A.H[i], A.H[i-1], h, F); +} + +NTL_CHEAP_THREAD_LOCAL long zz_pEXArgBound = 0; + + + + +void CompMod(zz_pEX& x, const zz_pEX& g, const zz_pEX& h, const zz_pEXModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(g.rep.length()); + + if (m == 0) { + clear(x); + return; + } + + zz_pEXArgument A; + + build(A, h, F, m); + + CompMod(x, g, A, F); +} + + + + +void Comp2Mod(zz_pEX& x1, zz_pEX& x2, const zz_pEX& g1, const zz_pEX& g2, + const zz_pEX& h, const zz_pEXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + return; + } + + zz_pEXArgument A; + + build(A, h, F, m); + + zz_pEX xx1, xx2; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + + x1 = xx1; + x2 = xx2; +} + +void Comp3Mod(zz_pEX& x1, zz_pEX& x2, zz_pEX& x3, + const zz_pEX& g1, const zz_pEX& g2, const zz_pEX& g3, + const zz_pEX& h, const zz_pEXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length() + g3.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + clear(x3); + return; + } + + zz_pEXArgument A; + + build(A, h, F, m); + + zz_pEX xx1, xx2, xx3; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + CompMod(xx3, g3, A, F); + + x1 = xx1; + x2 = xx2; + x3 = xx3; +} + +void build(zz_pEXTransMultiplier& B, const zz_pEX& b, const zz_pEXModulus& F) +{ + long db = deg(b); + + if (db >= F.n) LogicError("build TransMultiplier: bad args"); + + zz_pEX t; + + LeftShift(t, b, F.n-1); + div(t, t, F); + + // we optimize for low degree b + + long d; + + d = deg(t); + if (d < 0) + B.shamt_fbi = 0; + else + B.shamt_fbi = F.n-2 - d; + + CopyReverse(B.fbi, t, d); + + // The following code optimizes the case when + // f = X^n + low degree poly + + trunc(t, F.f, F.n); + d = deg(t); + if (d < 0) + B.shamt = 0; + else + B.shamt = d; + + CopyReverse(B.f0, t, d); + + if (db < 0) + B.shamt_b = 0; + else + B.shamt_b = db; + + CopyReverse(B.b, b, db); +} + +void TransMulMod(zz_pEX& x, const zz_pEX& a, const zz_pEXTransMultiplier& B, + const zz_pEXModulus& F) +{ + if (deg(a) >= F.n) LogicError("TransMulMod: bad args"); + + zz_pEX t1, t2; + + mul(t1, a, B.b); + RightShift(t1, t1, B.shamt_b); + + mul(t2, a, B.f0); + RightShift(t2, t2, B.shamt); + trunc(t2, t2, F.n-1); + + mul(t2, t2, B.fbi); + if (B.shamt_fbi > 0) LeftShift(t2, t2, B.shamt_fbi); + trunc(t2, t2, F.n-1); + LeftShift(t2, t2, 1); + + sub(x, t1, t2); +} + + +void ShiftSub(zz_pEX& U, const zz_pEX& V, long n) +// assumes input does not alias output +{ + if (IsZero(V)) + return; + + long du = deg(U); + long dv = deg(V); + + long d = max(du, n+dv); + + U.rep.SetLength(d+1); + long i; + + for (i = du+1; i <= d; i++) + clear(U.rep[i]); + + for (i = 0; i <= dv; i++) + sub(U.rep[i+n], U.rep[i+n], V.rep[i]); + + U.normalize(); +} + + +void UpdateMap(vec_zz_pE& x, const vec_zz_pE& a, + const zz_pEXTransMultiplier& B, const zz_pEXModulus& F) +{ + zz_pEX xx; + TransMulMod(xx, to_zz_pEX(a), B, F); + x = xx.rep; +} + +static +void ProjectPowers(vec_zz_pE& x, const zz_pEX& a, long k, + const zz_pEXArgument& H, const zz_pEXModulus& F) +{ + if (k < 0 || deg(a) >= F.n) + LogicError("ProjectPowers: bad args"); + + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive args"); + + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + zz_pEXTransMultiplier M; + build(M, H.H[m], F); + + zz_pEX s; + s = a; + + x.SetLength(k); + + long i; + + for (i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + for (long j = 0; j < m1; j++) + InnerProduct(x[i*m+j], H.H[j].rep, s.rep); + if (i < l) + TransMulMod(s, s, M, F); + } +} + +static +void ProjectPowers(vec_zz_pE& x, const zz_pEX& a, long k, const zz_pEX& h, + const zz_pEXModulus& F) +{ + if (k < 0 || deg(a) >= F.n || deg(h) >= F.n) + LogicError("ProjectPowers: bad args"); + + if (k == 0) { + x.SetLength(0);; + return; + } + + long m = SqrRoot(k); + + zz_pEXArgument H; + build(H, h, F, m); + + ProjectPowers(x, a, k, H, F); +} + +void ProjectPowers(vec_zz_pE& x, const vec_zz_pE& a, long k, + const zz_pEXArgument& H, const zz_pEXModulus& F) +{ + ProjectPowers(x, to_zz_pEX(a), k, H, F); +} + +void ProjectPowers(vec_zz_pE& x, const vec_zz_pE& a, long k, + const zz_pEX& h, const zz_pEXModulus& F) +{ + ProjectPowers(x, to_zz_pEX(a), k, h, F); +} + + + + +void BerlekampMassey(zz_pEX& h, const vec_zz_pE& a, long m) +{ + zz_pEX Lambda, Sigma, Temp; + long L; + zz_pE Delta, Delta1, t1; + long shamt; + + // cerr << "*** " << m << "\n"; + + Lambda.SetMaxLength(m+1); + Sigma.SetMaxLength(m+1); + Temp.SetMaxLength(m+1); + + L = 0; + set(Lambda); + clear(Sigma); + set(Delta); + shamt = 0; + + long i, r, dl; + + for (r = 1; r <= 2*m; r++) { + // cerr << r << "--"; + clear(Delta1); + dl = deg(Lambda); + for (i = 0; i <= dl; i++) { + mul(t1, Lambda.rep[i], a[r-i-1]); + add(Delta1, Delta1, t1); + } + + if (IsZero(Delta1)) { + shamt++; + // cerr << "case 1: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else if (2*L < r) { + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + Sigma = Lambda; + ShiftSub(Lambda, Temp, shamt+1); + shamt = 0; + L = r-L; + Delta = Delta1; + // cerr << "case 2: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else { + shamt++; + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + ShiftSub(Lambda, Temp, shamt); + // cerr << "case 3: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + } + + // cerr << "finished: " << L << " " << deg(Lambda) << "\n"; + + dl = deg(Lambda); + h.rep.SetLength(L + 1); + + for (i = 0; i < L - dl; i++) + clear(h.rep[i]); + + for (i = L - dl; i <= L; i++) + h.rep[i] = Lambda.rep[L - i]; +} + + + + +void MinPolySeq(zz_pEX& h, const vec_zz_pE& a, long m) +{ + if (m < 0 || NTL_OVERFLOW(m, 1, 0)) LogicError("MinPoly: bad args"); + if (a.length() < 2*m) LogicError("MinPoly: sequence too short"); + + BerlekampMassey(h, a, m); +} + + +void DoMinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m, + const zz_pEX& R) +{ + vec_zz_pE x; + + ProjectPowers(x, R, 2*m, g, F); + MinPolySeq(h, x, m); +} + +void ProbMinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m) +{ + long n = F.n; + if (m < 1 || m > n) LogicError("ProbMinPoly: bad args"); + + zz_pEX R; + random(R, n); + + DoMinPolyMod(h, g, F, m, R); +} + +void ProbMinPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F) +{ + ProbMinPolyMod(h, g, F, F.n); +} + +void MinPolyMod(zz_pEX& hh, const zz_pEX& g, const zz_pEXModulus& F, long m) +{ + zz_pEX h, h1; + long n = F.n; + if (m < 1 || m > n) LogicError("MinPoly: bad args"); + + /* probabilistically compute min-poly */ + + ProbMinPolyMod(h, g, F, m); + if (deg(h) == m) { hh = h; return; } + CompMod(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + zz_pEX h2, h3; + zz_pEX R; + zz_pEXTransMultiplier H1; + + + for (;;) { + random(R, n); + build(H1, h1, F); + TransMulMod(R, R, H1, F); + DoMinPolyMod(h2, g, F, m-deg(h), R); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompMod(h3, h2, g, F); + MulMod(h1, h3, h1, F); + if (IsZero(h1)) { hh = h; return; } + } +} + +void IrredPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F, long m) +{ + if (m < 1 || m > F.n) LogicError("IrredPoly: bad args"); + + zz_pEX R; + set(R); + + DoMinPolyMod(h, g, F, m, R); +} + + + +void IrredPolyMod(zz_pEX& h, const zz_pEX& g, const zz_pEXModulus& F) +{ + IrredPolyMod(h, g, F, F.n); +} + + + +void MinPolyMod(zz_pEX& hh, const zz_pEX& g, const zz_pEXModulus& F) +{ + MinPolyMod(hh, g, F, F.n); +} + +void diff(zz_pEX& x, const zz_pEX& a) +{ + long n = deg(a); + long i; + + if (n <= 0) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(n); + + for (i = 0; i <= n-1; i++) { + mul(x.rep[i], a.rep[i+1], i+1); + } + + if (&x == &a) + x.rep.SetLength(n); + + x.normalize(); +} + + + +void MakeMonic(zz_pEX& x) +{ + if (IsZero(x)) + return; + + if (IsOne(LeadCoeff(x))) + return; + + zz_pE t; + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + +long divide(zz_pEX& q, const zz_pEX& a, const zz_pEX& b) +{ + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + zz_pEX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + q = lq; + return 1; +} + +long divide(const zz_pEX& a, const zz_pEX& b) +{ + if (IsZero(b)) return IsZero(a); + zz_pEX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + return 1; +} + + + +static +long OptWinSize(long n) +// finds k that minimizes n/(k+1) + 2^{k-1} + +{ + long k; + double v, v_new; + + + v = n/2.0 + 1.0; + k = 1; + + for (;;) { + v_new = n/(double(k+2)) + double(1L << k); + if (v_new >= v) break; + v = v_new; + k++; + } + + return k; +} + + + +void PowerMod(zz_pEX& h, const zz_pEX& g, const ZZ& e, const zz_pEXModulus& F) +// h = g^e mod f using "sliding window" algorithm +{ + if (deg(g) >= F.n) LogicError("PowerMod: bad args"); + + if (e == 0) { + set(h); + return; + } + + if (e == 1) { + h = g; + return; + } + + if (e == -1) { + InvMod(h, g, F); + return; + } + + if (e == 2) { + SqrMod(h, g, F); + return; + } + + if (e == -2) { + SqrMod(h, g, F); + InvMod(h, h, F); + return; + } + + + long n = NumBits(e); + + zz_pEX res; + res.SetMaxLength(F.n); + set(res); + + long i; + + if (n < 16) { + // plain square-and-multiply algorithm + + for (i = n - 1; i >= 0; i--) { + SqrMod(res, res, F); + if (bit(e, i)) + MulMod(res, res, g, F); + } + + if (e < 0) InvMod(res, res, F); + + h = res; + return; + } + + long k = OptWinSize(n); + k = min(k, 3); + + vec_zz_pEX v; + + v.SetLength(1L << (k-1)); + + v[0] = g; + + if (k > 1) { + zz_pEX t; + SqrMod(t, g, F); + + for (i = 1; i < (1L << (k-1)); i++) + MulMod(v[i], v[i-1], t, F); + } + + + long val; + long cnt; + long m; + + val = 0; + for (i = n-1; i >= 0; i--) { + val = (val << 1) | bit(e, i); + if (val == 0) + SqrMod(res, res, F); + else if (val >= (1L << (k-1)) || i == 0) { + cnt = 0; + while ((val & 1) == 0) { + val = val >> 1; + cnt++; + } + + m = val; + while (m > 0) { + SqrMod(res, res, F); + m = m >> 1; + } + + MulMod(res, res, v[val >> 1], F); + + while (cnt > 0) { + SqrMod(res, res, F); + cnt--; + } + + val = 0; + } + } + + if (e < 0) InvMod(res, res, F); + + h = res; +} + +void InvMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvMod: bad args"); + + zz_pEX d, xx, t; + + XGCD(d, xx, t, a, f); + if (!IsOne(d)) + InvModError("zz_pEX InvMod: can't compute multiplicative inverse"); + + x = xx; +} + +long InvModStatus(zz_pEX& x, const zz_pEX& a, const zz_pEX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvModStatus: bad args"); + zz_pEX d, t; + + XGCD(d, x, t, a, f); + if (!IsOne(d)) { + x = d; + return 1; + } + else + return 0; +} + + +void MulMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& b, const zz_pEX& f) +{ + if (deg(a) >= deg(f) || deg(b) >= deg(f) || deg(f) == 0) + LogicError("MulMod: bad args"); + + zz_pEX t; + + mul(t, a, b); + rem(x, t, f); +} + +void SqrMod(zz_pEX& x, const zz_pEX& a, const zz_pEX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("SqrMod: bad args"); + + zz_pEX t; + + sqr(t, a); + rem(x, t, f); +} + + +void PowerXMod(zz_pEX& hh, const ZZ& e, const zz_pEXModulus& F) +{ + if (F.n < 0) LogicError("PowerXMod: uninitialized modulus"); + + if (IsZero(e)) { + set(hh); + return; + } + + long n = NumBits(e); + long i; + + zz_pEX h; + + h.SetMaxLength(F.n); + set(h); + + for (i = n - 1; i >= 0; i--) { + SqrMod(h, h, F); + if (bit(e, i)) + MulByXMod(h, h, F.f); + } + + if (e < 0) InvMod(h, h, F); + + hh = h; +} + + +void reverse(zz_pEX& x, const zz_pEX& a, long hi) +{ + if (hi < 0) { clear(x); return; } + if (NTL_OVERFLOW(hi, 1, 0)) + ResourceError("overflow in reverse"); + + if (&x == &a) { + zz_pEX tmp; + CopyReverse(tmp, a, hi); + x = tmp; + } + else + CopyReverse(x, a, hi); +} + + +void power(zz_pEX& x, const zz_pEX& a, long e) +{ + if (e < 0) { + ArithmeticError("power: negative exponent"); + } + + if (e == 0) { + x = 1; + return; + } + + if (a == 0 || a == 1) { + x = a; + return; + } + + long da = deg(a); + + if (da == 0) { + x = power(ConstTerm(a), e); + return; + } + + if (da > (NTL_MAX_LONG-1)/e) + ResourceError("overflow in power"); + + zz_pEX res; + res.SetMaxLength(da*e + 1); + res = 1; + + long k = NumBits(e); + long i; + + for (i = k - 1; i >= 0; i--) { + sqr(res, res); + if (bit(e, i)) + mul(res, res, a); + } + + x = res; +} + + + +static +void FastTraceVec(vec_zz_pE& S, const zz_pEXModulus& f) +{ + long n = deg(f); + + zz_pEX x = reverse(-LeftShift(reverse(diff(reverse(f)), n-1), n-1)/f, n-1); + + S.SetLength(n); + S[0] = n; + + long i; + for (i = 1; i < n; i++) + S[i] = coeff(x, i); +} + + +void PlainTraceVec(vec_zz_pE& S, const zz_pEX& ff) +{ + if (deg(ff) <= 0) + LogicError("TraceVec: bad args"); + + zz_pEX f; + f = ff; + + MakeMonic(f); + + long n = deg(f); + + S.SetLength(n); + + if (n == 0) + return; + + long k, i; + zz_pX acc, t; + zz_pE t1; + + S[0] = n; + + for (k = 1; k < n; k++) { + mul(acc, rep(f.rep[n-k]), k); + + for (i = 1; i < k; i++) { + mul(t, rep(f.rep[n-i]), rep(S[k-i])); + add(acc, acc, t); + } + + conv(t1, acc); + negate(S[k], t1); + } +} + +void TraceVec(vec_zz_pE& S, const zz_pEX& f) +{ + if (deg(f) < zz_pE::DivCross()) + PlainTraceVec(S, f); + else + FastTraceVec(S, f); +} + +static +void ComputeTraceVec(vec_zz_pE& S, const zz_pEXModulus& F) +{ + if (F.method == zz_pEX_MOD_PLAIN) { + PlainTraceVec(S, F.f); + } + else { + FastTraceVec(S, F); + } +} + +void TraceMod(zz_pE& x, const zz_pEX& a, const zz_pEXModulus& F) +{ + long n = F.n; + + if (deg(a) >= n) + LogicError("trace: bad args"); + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(F.tracevec.val()); + if (!builder()) break; + UniquePtr p; + p.make(); + ComputeTraceVec(*p, F); + builder.move(p); + } while (0); + + InnerProduct(x, a.rep, *F.tracevec.val()); +} + +void TraceMod(zz_pE& x, const zz_pEX& a, const zz_pEX& f) +{ + if (deg(a) >= deg(f) || deg(f) <= 0) + LogicError("trace: bad args"); + + project(x, TraceVec(f), a); +} + + +void PlainResultant(zz_pE& rres, const zz_pEX& a, const zz_pEX& b) +{ + zz_pE res; + + if (IsZero(a) || IsZero(b)) + clear(res); + else if (deg(a) == 0 && deg(b) == 0) + set(res); + else { + long d0, d1, d2; + zz_pE lc; + set(res); + + long n = max(deg(a),deg(b)) + 1; + zz_pEX u(INIT_SIZE, n), v(INIT_SIZE, n); + vec_zz_pX tmp; + SetSize(tmp, n, 2*zz_pE::degree()); + + u = a; + v = b; + + for (;;) { + d0 = deg(u); + d1 = deg(v); + lc = LeadCoeff(v); + + PlainRem(u, u, v, tmp); + swap(u, v); + + d2 = deg(v); + if (d2 >= 0) { + power(lc, lc, d0-d2); + mul(res, res, lc); + if (d0 & d1 & 1) negate(res, res); + } + else { + if (d1 == 0) { + power(lc, lc, d0); + mul(res, res, lc); + } + else + clear(res); + + break; + } + } + } + rres = res; +} + +void resultant(zz_pE& rres, const zz_pEX& a, const zz_pEX& b) +{ + PlainResultant(rres, a, b); +} + + +void NormMod(zz_pE& x, const zz_pEX& a, const zz_pEX& f) +{ + if (deg(f) <= 0 || deg(a) >= deg(f)) + LogicError("norm: bad args"); + + if (IsZero(a)) { + clear(x); + return; + } + + zz_pE t; + resultant(t, f, a); + if (!IsOne(LeadCoeff(f))) { + zz_pE t1; + power(t1, LeadCoeff(f), deg(a)); + inv(t1, t1); + mul(t, t, t1); + } + + x = t; +} + + + +// tower stuff... + + + +void InnerProduct(zz_pEX& x, const vec_zz_p& v, long low, long high, + const vec_zz_pEX& H, long n, vec_zz_pE& t) +{ + zz_pE s; + long i, j; + + for (j = 0; j < n; j++) + clear(t[j]); + + high = min(high, v.length()-1); + for (i = low; i <= high; i++) { + const vec_zz_pE& h = H[i-low].rep; + long m = h.length(); + const zz_p& w = v[i]; + + for (j = 0; j < m; j++) { + mul(s, h[j], w); + add(t[j], t[j], s); + } + } + + x.rep.SetLength(n); + for (j = 0; j < n; j++) + x.rep[j] = t[j]; + + x.normalize(); +} + + + +void CompTower(zz_pEX& x, const zz_pX& g, const zz_pEXArgument& A, + const zz_pEXModulus& F) +{ + if (deg(g) <= 0) { + conv(x, g); + return; + } + + + zz_pEX s, t; + vec_zz_pE scratch; + scratch.SetLength(deg(F)); + + long m = A.H.length() - 1; + long l = ((g.rep.length()+m-1)/m) - 1; + + const zz_pEX& M = A.H[m]; + + InnerProduct(t, g.rep, l*m, l*m + m - 1, A.H, F.n, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g.rep, i*m, i*m + m - 1, A.H, F.n, scratch); + MulMod(t, t, M, F); + add(t, t, s); + } + x = t; +} + + +void CompTower(zz_pEX& x, const zz_pX& g, const zz_pEX& h, + const zz_pEXModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(g.rep.length()); + + if (m == 0) { + clear(x); + return; + } + + + zz_pEXArgument A; + + build(A, h, F, m); + + CompTower(x, g, A, F); +} + +void PrepareProjection(vec_vec_zz_p& tt, const vec_zz_pE& s, + const vec_zz_p& proj) +{ + long l = s.length(); + tt.SetLength(l); + + zz_pXMultiplier M; + long i; + + for (i = 0; i < l; i++) { + build(M, rep(s[i]), zz_pE::modulus()); + UpdateMap(tt[i], proj, M, zz_pE::modulus()); + } +} + +void ProjectedInnerProduct(zz_p& x, const vec_zz_pE& a, + const vec_vec_zz_p& b) +{ + long n = min(a.length(), b.length()); + + zz_p t, res; + + res = 0; + + long i; + for (i = 0; i < n; i++) { + project(t, b[i], rep(a[i])); + res += t; + } + + x = res; +} + + + +void PrecomputeProj(vec_zz_p& proj, const zz_pX& f) +{ + long n = deg(f); + + if (n <= 0) LogicError("PrecomputeProj: bad args"); + + if (ConstTerm(f) != 0) { + proj.SetLength(1); + proj[0] = 1; + } + else { + proj.SetLength(n); + clear(proj); + proj[n-1] = 1; + } +} + + +void ProjectPowersTower(vec_zz_p& x, const vec_zz_pE& a, long k, + const zz_pEXArgument& H, const zz_pEXModulus& F, + const vec_zz_p& proj) + +{ + long n = F.n; + + if (a.length() > n || k < 0) + LogicError("ProjectPowers: bad args"); + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive args"); + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + zz_pEXTransMultiplier M; + build(M, H.H[m], F); + + vec_zz_pE s(INIT_SIZE, n); + s = a; + + x.SetLength(k); + + vec_vec_zz_p tt; + + for (long i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + zz_p* w = &x[i*m]; + + PrepareProjection(tt, s, proj); + + for (long j = 0; j < m1; j++) + ProjectedInnerProduct(w[j], H.H[j].rep, tt); + if (i < l) + UpdateMap(s, s, M, F); + } +} + + + + +void ProjectPowersTower(vec_zz_p& x, const vec_zz_pE& a, long k, + const zz_pEX& h, const zz_pEXModulus& F, + const vec_zz_p& proj) + +{ + if (a.length() > F.n || k < 0) LogicError("ProjectPowers: bad args"); + + if (k == 0) { + x.SetLength(0); + return; + } + + long m = SqrRoot(k); + + zz_pEXArgument H; + + build(H, h, F, m); + ProjectPowersTower(x, a, k, H, F, proj); +} + + +void DoMinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, long m, + const vec_zz_pE& R, const vec_zz_p& proj) +{ + vec_zz_p x; + + ProjectPowersTower(x, R, 2*m, g, F, proj); + + MinPolySeq(h, x, m); +} + + +void ProbMinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, + long m) +{ + long n = F.n; + if (m < 1 || m > n*zz_pE::degree()) LogicError("ProbMinPoly: bad args"); + + vec_zz_pE R; + R.SetLength(n); + long i; + for (i = 0; i < n; i++) + random(R[i]); + + vec_zz_p proj; + PrecomputeProj(proj, zz_pE::modulus()); + + DoMinPolyTower(h, g, F, m, R, proj); +} + + +void ProbMinPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, + long m, const vec_zz_p& proj) +{ + long n = F.n; + if (m < 1 || m > n*zz_pE::degree()) LogicError("ProbMinPoly: bad args"); + + vec_zz_pE R; + R.SetLength(n); + long i; + for (i = 0; i < n; i++) + random(R[i]); + + DoMinPolyTower(h, g, F, m, R, proj); +} + +void MinPolyTower(zz_pX& hh, const zz_pEX& g, const zz_pEXModulus& F, long m) +{ + zz_pX h; + zz_pEX h1; + long n = F.n; + if (m < 1 || m > n*zz_pE::degree()) { + LogicError("MinPoly: bad args"); + } + + vec_zz_p proj; + PrecomputeProj(proj, zz_pE::modulus()); + + /* probabilistically compute min-poly */ + + ProbMinPolyTower(h, g, F, m, proj); + if (deg(h) == m) { hh = h; return; } + CompTower(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + long i; + + zz_pX h2; + zz_pEX h3; + vec_zz_pE R; + zz_pEXTransMultiplier H1; + + + for (;;) { + R.SetLength(n); + for (i = 0; i < n; i++) random(R[i]); + build(H1, h1, F); + UpdateMap(R, R, H1, F); + DoMinPolyTower(h2, g, F, m-deg(h), R, proj); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompTower(h3, h2, g, F); + MulMod(h1, h3, h1, F); + if (IsZero(h1)) { hh = h; return; } + } +} + +void IrredPolyTower(zz_pX& h, const zz_pEX& g, const zz_pEXModulus& F, long m) +{ + if (m < 1 || m > deg(F)*zz_pE::degree()) LogicError("IrredPoly: bad args"); + + vec_zz_pE R; + R.SetLength(1); + R[0] = 1; + + vec_zz_p proj; + proj.SetLength(1); + proj[0] = 1; + + DoMinPolyTower(h, g, F, m, R, proj); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/lzz_pEXFactoring.c b/thirdparty/linux/ntl/src/lzz_pEXFactoring.c new file mode 100644 index 0000000000..72b41aeb04 --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_pEXFactoring.c @@ -0,0 +1,1594 @@ + +#include +#include +#include +#include + +NTL_START_IMPL + + + +static +void IterPower(zz_pE& c, const zz_pE& a, long n) +{ + zz_pE res; + + long i; + + res = a; + + for (i = 0; i < n; i++) + power(res, res, zz_p::modulus()); + + c = res; +} + + + +void SquareFreeDecomp(vec_pair_zz_pEX_long& u, const zz_pEX& ff) +{ + zz_pEX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SquareFreeDecomp: bad args"); + + zz_pEX r, t, v, tmp1; + long m, j, finished, done; + + u.SetLength(0); + + if (deg(f) == 0) + return; + + m = 1; + finished = 0; + + do { + j = 1; + diff(tmp1, f); + GCD(r, f, tmp1); + div(t, f, r); + + if (deg(t) > 0) { + done = 0; + do { + GCD(v, r, t); + div(tmp1, t, v); + if (deg(tmp1) > 0) append(u, cons(tmp1, j*m)); + if (deg(v) > 0) { + div(r, r, v); + t = v; + j++; + } + else + done = 1; + } while (!done); + if (deg(r) == 0) finished = 1; + } + + if (!finished) { + /* r is a p-th power */ + + long k, d; + long p = to_long(zz_p::modulus()); + + d = deg(r)/p; + f.rep.SetLength(d+1); + for (k = 0; k <= d; k++) + IterPower(f.rep[k], r.rep[k*p], zz_pE::degree()-1); + m = m*p; + } + } while (!finished); +} + + + +static +void AbsTraceMap(zz_pEX& h, const zz_pEX& a, const zz_pEXModulus& F) +{ + zz_pEX res, tmp; + + long k = NumBits(zz_pE::cardinality())-1; + + res = a; + tmp = a; + + long i; + for (i = 0; i < k-1; i++) { + SqrMod(tmp, tmp, F); + add(res, res, tmp); + } + + h = res; +} + +void FrobeniusMap(zz_pEX& h, const zz_pEXModulus& F) +{ + PowerXMod(h, zz_pE::cardinality(), F); +} + + +static +void RecFindRoots(vec_zz_pE& x, const zz_pEX& f) +{ + if (deg(f) == 0) return; + + if (deg(f) == 1) { + long k = x.length(); + x.SetLength(k+1); + negate(x[k], ConstTerm(f)); + return; + } + + zz_pEX h; + + zz_pEX r; + + + { + zz_pEXModulus F; + build(F, f); + + do { + random(r, deg(F)); + if (IsOdd(zz_pE::cardinality())) { + PowerMod(h, r, RightShift(zz_pE::cardinality(), 1), F); + sub(h, h, 1); + } + else { + AbsTraceMap(h, r, F); + } + GCD(h, h, f); + } while (deg(h) <= 0 || deg(h) == deg(f)); + } + + RecFindRoots(x, h); + div(h, f, h); + RecFindRoots(x, h); +} + +void FindRoots(vec_zz_pE& x, const zz_pEX& ff) +{ + zz_pEX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoots: bad args"); + + x.SetMaxLength(deg(f)); + x.SetLength(0); + RecFindRoots(x, f); +} + +void split(zz_pEX& f1, zz_pEX& g1, zz_pEX& f2, zz_pEX& g2, + const zz_pEX& f, const zz_pEX& g, + const vec_zz_pE& roots, long lo, long mid) +{ + long r = mid-lo+1; + + zz_pEXModulus F; + build(F, f); + + vec_zz_pE lroots(INIT_SIZE, r); + long i; + + for (i = 0; i < r; i++) + lroots[i] = roots[lo+i]; + + + zz_pEX h, a, d; + BuildFromRoots(h, lroots); + CompMod(a, h, g, F); + + + GCD(f1, a, f); + + div(f2, f, f1); + + rem(g1, g, f1); + rem(g2, g, f2); +} + +void RecFindFactors(vec_zz_pEX& factors, const zz_pEX& f, const zz_pEX& g, + const vec_zz_pE& roots, long lo, long hi) +{ + long r = hi-lo+1; + + if (r == 0) return; + + if (r == 1) { + append(factors, f); + return; + } + + zz_pEX f1, g1, f2, g2; + + long mid = (lo+hi)/2; + + split(f1, g1, f2, g2, f, g, roots, lo, mid); + + RecFindFactors(factors, f1, g1, roots, lo, mid); + RecFindFactors(factors, f2, g2, roots, mid+1, hi); +} + + +void FindFactors(vec_zz_pEX& factors, const zz_pEX& f, const zz_pEX& g, + const vec_zz_pE& roots) +{ + long r = roots.length(); + + factors.SetMaxLength(r); + factors.SetLength(0); + + RecFindFactors(factors, f, g, roots, 0, r-1); +} + +void IterFindFactors(vec_zz_pEX& factors, const zz_pEX& f, + const zz_pEX& g, const vec_zz_pE& roots) +{ + long r = roots.length(); + long i; + zz_pEX h; + + factors.SetLength(r); + + for (i = 0; i < r; i++) { + sub(h, g, roots[i]); + GCD(factors[i], f, h); + } +} + + +void TraceMap(zz_pEX& w, const zz_pEX& a, long d, const zz_pEXModulus& F, + const zz_pEX& b) + +{ + if (d < 0) LogicError("TraceMap: bad args"); + + zz_pEX y, z, t; + + z = b; + y = a; + clear(w); + + while (d) { + if (d == 1) { + if (IsZero(w)) + w = y; + else { + CompMod(w, w, z, F); + add(w, w, y); + } + } + else if ((d & 1) == 0) { + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else if (IsZero(w)) { + w = y; + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else { + Comp3Mod(z, t, w, z, y, w, z, F); + add(w, w, y); + add(y, t, y); + } + + d = d >> 1; + } +} + + +void PowerCompose(zz_pEX& y, const zz_pEX& h, long q, const zz_pEXModulus& F) +{ + if (q < 0) LogicError("PowerCompose: bad args"); + + zz_pEX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y); + + while (q) { + sw = 0; + + if (q > 1) sw = 2; + if (q & 1) { + if (IsX(y)) + y = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y, y, z, F); + break; + + case 2: + CompMod(z, z, z, F); + break; + + case 3: + Comp2Mod(y, z, y, z, z, F); + break; + } + + q = q >> 1; + } +} + + +long ProbIrredTest(const zz_pEX& f, long iter) +{ + long n = deg(f); + + if (n <= 0) return 0; + if (n == 1) return 1; + + zz_pEXModulus F; + + build(F, f); + + zz_pEX b, r, s; + + FrobeniusMap(b, F); + + long all_zero = 1; + + long i; + + for (i = 0; i < iter; i++) { + random(r, n); + TraceMap(s, r, n, F, b); + + all_zero = all_zero && IsZero(s); + + if (deg(s) > 0) return 0; + } + + if (!all_zero || (n & 1)) return 1; + + PowerCompose(s, b, n/2, F); + return !IsX(s); +} + + +NTL_CHEAP_THREAD_LOCAL long zz_pEX_BlockingFactor = 10; + + + + +void RootEDF(vec_zz_pEX& factors, const zz_pEX& f, long verbose) +{ + vec_zz_pE roots; + double t; + + if (verbose) { cerr << "finding roots..."; t = GetTime(); } + FindRoots(roots, f); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + long r = roots.length(); + factors.SetLength(r); + for (long j = 0; j < r; j++) { + SetX(factors[j]); + sub(factors[j], factors[j], roots[j]); + } +} + +void EDFSplit(vec_zz_pEX& v, const zz_pEX& f, const zz_pEX& b, long d) +{ + zz_pEX a, g, h; + zz_pEXModulus F; + vec_zz_pE roots; + + build(F, f); + long n = F.n; + long r = n/d; + random(a, n); + TraceMap(g, a, d, F, b); + MinPolyMod(h, g, F, r); + FindRoots(roots, h); + FindFactors(v, f, g, roots); +} + +void RecEDF(vec_zz_pEX& factors, const zz_pEX& f, const zz_pEX& b, long d, + long verbose) +{ + vec_zz_pEX v; + long i; + zz_pEX bb; + + if (verbose) cerr << "+"; + + EDFSplit(v, f, b, d); + for (i = 0; i < v.length(); i++) { + if (deg(v[i]) == d) { + append(factors, v[i]); + } + else { + zz_pEX bb; + rem(bb, b, v[i]); + RecEDF(factors, v[i], bb, d, verbose); + } + } +} + + +void EDF(vec_zz_pEX& factors, const zz_pEX& ff, const zz_pEX& bb, + long d, long verbose) + +{ + zz_pEX f = ff; + zz_pEX b = bb; + + if (!IsOne(LeadCoeff(f))) + LogicError("EDF: bad args"); + + long n = deg(f); + long r = n/d; + + if (r == 0) { + factors.SetLength(0); + return; + } + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (d == 1) { + RootEDF(factors, f, verbose); + return; + } + + + double t; + if (verbose) { + cerr << "computing EDF(" << d << "," << r << ")..."; + t = GetTime(); + } + + factors.SetLength(0); + + RecEDF(factors, f, b, d, verbose); + + if (verbose) cerr << (GetTime()-t) << "\n"; +} + + +void SFCanZass(vec_zz_pEX& factors, const zz_pEX& ff, long verbose) +{ + zz_pEX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SFCanZass: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + factors.SetLength(0); + + double t; + + + zz_pEXModulus F; + build(F, f); + + zz_pEX h; + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + FrobeniusMap(h, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + vec_pair_zz_pEX_long u; + if (verbose) { cerr << "computing DDF..."; t = GetTime(); } + NewDDF(u, f, h, verbose); + if (verbose) { + t = GetTime()-t; + cerr << "DDF time: " << t << "\n"; + } + + zz_pEX hh; + vec_zz_pEX v; + + long i; + for (i = 0; i < u.length(); i++) { + const zz_pEX& g = u[i].a; + long d = u[i].b; + long r = deg(g)/d; + + if (r == 1) { + // g is already irreducible + + append(factors, g); + } + else { + // must perform EDF + + if (d == 1) { + // root finding + RootEDF(v, g, verbose); + append(factors, v); + } + else { + // general case + rem(hh, h, g); + EDF(v, g, hh, d, verbose); + append(factors, v); + } + } + } +} + +void CanZass(vec_pair_zz_pEX_long& factors, const zz_pEX& f, long verbose) +{ + if (!IsOne(LeadCoeff(f))) + LogicError("CanZass: bad args"); + + double t; + vec_pair_zz_pEX_long sfd; + vec_zz_pEX x; + + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFCanZass(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + +void mul(zz_pEX& f, const vec_pair_zz_pEX_long& v) +{ + long i, j, n; + + n = 0; + for (i = 0; i < v.length(); i++) + n += v[i].b*deg(v[i].a); + + zz_pEX g(INIT_SIZE, n+1); + + set(g); + for (i = 0; i < v.length(); i++) + for (j = 0; j < v[i].b; j++) { + mul(g, g, v[i].a); + } + + f = g; +} + + +long BaseCase(const zz_pEX& h, long q, long a, const zz_pEXModulus& F) +{ + long b, e; + zz_pEX lh(INIT_SIZE, F.n); + + lh = h; + b = 1; + e = 0; + while (e < a-1 && !IsX(lh)) { + e++; + b *= q; + PowerCompose(lh, lh, q, F); + } + + if (!IsX(lh)) b *= q; + + return b; +} + + + +void TandemPowerCompose(zz_pEX& y1, zz_pEX& y2, const zz_pEX& h, + long q1, long q2, const zz_pEXModulus& F) +{ + zz_pEX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y1); + SetX(y2); + + while (q1 || q2) { + sw = 0; + + if (q1 > 1 || q2 > 1) sw = 4; + + if (q1 & 1) { + if (IsX(y1)) + y1 = z; + else + sw = sw | 2; + } + + if (q2 & 1) { + if (IsX(y2)) + y2 = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y2, y2, z, F); + break; + + case 2: + CompMod(y1, y1, z, F); + break; + + case 3: + Comp2Mod(y1, y2, y1, y2, z, F); + break; + + case 4: + CompMod(z, z, z, F); + break; + + case 5: + Comp2Mod(z, y2, z, y2, z, F); + break; + + case 6: + Comp2Mod(z, y1, z, y1, z, F); + break; + + case 7: + Comp3Mod(z, y1, y2, z, y1, y2, z, F); + break; + } + + q1 = q1 >> 1; + q2 = q2 >> 1; + } +} + + +long RecComputeDegree(long u, const zz_pEX& h, const zz_pEXModulus& F, + FacVec& fvec) +{ + if (IsX(h)) return 1; + + if (fvec[u].link == -1) return BaseCase(h, fvec[u].q, fvec[u].a, F); + + zz_pEX h1, h2; + long q1, q2, r1, r2; + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + r1 = RecComputeDegree(fvec[u].link, h2, F, fvec); + r2 = RecComputeDegree(fvec[u].link+1, h1, F, fvec); + return r1*r2; +} + + + + +long RecComputeDegree(const zz_pEX& h, const zz_pEXModulus& F) + // f = F.f is assumed to be an "equal degree" polynomial + // h = X^p mod f + // the common degree of the irreducible factors of f is computed +{ + if (F.n == 1 || IsX(h)) + return 1; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecComputeDegree(fvec.length()-1, h, F, fvec); +} + + +void FindRoot(zz_pE& root, const zz_pEX& ff) +// finds a root of ff. +// assumes that ff is monic and splits into distinct linear factors + +{ + zz_pEXModulus F; + zz_pEX h, h1, f; + zz_pEX r; + + f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoot: bad args"); + + if (deg(f) == 0) + LogicError("FindRoot: bad args"); + + + while (deg(f) > 1) { + build(F, f); + random(r, deg(F)); + if (IsOdd(zz_pE::cardinality())) { + PowerMod(h, r, RightShift(zz_pE::cardinality(), 1), F); + sub(h, h, 1); + } + else { + AbsTraceMap(h, r, F); + } + GCD(h, h, f); + if (deg(h) > 0 && deg(h) < deg(f)) { + if (deg(h) > deg(f)/2) + div(f, f, h); + else + f = h; + } + } + + negate(root, ConstTerm(f)); +} + + +static +long power(long a, long e) +{ + long i, res; + + res = 1; + for (i = 1; i <= e; i++) + res = res * a; + + return res; +} + + +static +long IrredBaseCase(const zz_pEX& h, long q, long a, const zz_pEXModulus& F) +{ + long e; + zz_pEX X, s, d; + + e = power(q, a-1); + PowerCompose(s, h, e, F); + SetX(X); + sub(s, s, X); + GCD(d, F.f, s); + return IsOne(d); +} + + +static +long RecIrredTest(long u, const zz_pEX& h, const zz_pEXModulus& F, + const FacVec& fvec) +{ + long q1, q2; + zz_pEX h1, h2; + + if (IsX(h)) return 0; + + if (fvec[u].link == -1) { + return IrredBaseCase(h, fvec[u].q, fvec[u].a, F); + } + + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + return RecIrredTest(fvec[u].link, h2, F, fvec) + && RecIrredTest(fvec[u].link+1, h1, F, fvec); +} + +long DetIrredTest(const zz_pEX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + zz_pEXModulus F; + + build(F, f); + + zz_pEX h; + + FrobeniusMap(h, F); + + zz_pEX s; + PowerCompose(s, h, F.n, F); + if (!IsX(s)) return 0; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecIrredTest(fvec.length()-1, h, F, fvec); +} + + + +long IterIrredTest(const zz_pEX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + zz_pEXModulus F; + + build(F, f); + + zz_pEX h; + + FrobeniusMap(h, F); + + long CompTableSize = 2*SqrRoot(deg(f)); + + zz_pEXArgument H; + + build(H, h, F, CompTableSize); + + long i, d, limit, limit_sqr; + zz_pEX g, X, t, prod; + + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = 2; + limit_sqr = limit*limit; + + set(prod); + + + while (2*d <= deg(f)) { + sub(t, g, X); + MulMod(prod, prod, t, F); + i++; + if (i == limit_sqr) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + + set(prod); + limit++; + limit_sqr = limit*limit; + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + CompMod(g, g, H, F); + } + } + + if (i > 0) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + } + + return 1; +} + +static +void MulByXPlusY(vec_zz_pEX& h, const zz_pEX& f, const zz_pEX& g) +// h represents the bivariate polynomial h[0] + h[1]*Y + ... + h[n-1]*Y^k, +// where the h[i]'s are polynomials in X, each of degree < deg(f), +// and k < deg(g). +// h is replaced by the bivariate polynomial h*(X+Y) (mod f(X), g(Y)). + +{ + long n = deg(g); + long k = h.length()-1; + + if (k < 0) return; + + if (k < n-1) { + h.SetLength(k+2); + h[k+1] = h[k]; + for (long i = k; i >= 1; i--) { + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + } + MulByXMod(h[0], h[0], f); + } + else { + zz_pEX b, t; + + b = h[n-1]; + for (long i = n-1; i >= 1; i--) { + mul(t, b, g.rep[i]); + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + sub(h[i], h[i], t); + } + mul(t, b, g.rep[0]); + MulByXMod(h[0], h[0], f); + sub(h[0], h[0], t); + } + + // normalize + + k = h.length()-1; + while (k >= 0 && IsZero(h[k])) k--; + h.SetLength(k+1); +} + + +static +void IrredCombine(zz_pEX& x, const zz_pEX& f, const zz_pEX& g) +{ + if (deg(f) < deg(g)) { + IrredCombine(x, g, f); + return; + } + + // deg(f) >= deg(g)...not necessary, but maybe a little more + // time & space efficient + + long df = deg(f); + long dg = deg(g); + long m = df*dg; + + vec_zz_pEX h(INIT_SIZE, dg); + + long i; + for (i = 0; i < dg; i++) h[i].SetMaxLength(df); + + h.SetLength(1); + set(h[0]); + + vec_zz_pE a; + + a.SetLength(2*m); + + for (i = 0; i < 2*m; i++) { + a[i] = ConstTerm(h[0]); + if (i < 2*m-1) + MulByXPlusY(h, f, g); + } + + MinPolySeq(x, a, m); +} + + +static +void BuildPrimePowerIrred(zz_pEX& f, long q, long e) +{ + long n = power(q, e); + + do { + random(f, n); + SetCoeff(f, n); + } while (!IterIrredTest(f)); +} + +static +void RecBuildIrred(zz_pEX& f, long u, const FacVec& fvec) +{ + if (fvec[u].link == -1) + BuildPrimePowerIrred(f, fvec[u].q, fvec[u].a); + else { + zz_pEX g, h; + RecBuildIrred(g, fvec[u].link, fvec); + RecBuildIrred(h, fvec[u].link+1, fvec); + IrredCombine(f, g, h); + } +} + + +void BuildIrred(zz_pEX& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in BuildIrred"); + + if (n == 1) { + SetX(f); + return; + } + + FacVec fvec; + + FactorInt(fvec, n); + + RecBuildIrred(f, fvec.length()-1, fvec); +} + + + +#if 0 +void BuildIrred(zz_pEX& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (n == 1) { + SetX(f); + return; + } + + zz_pEX g; + + do { + random(g, n); + SetCoeff(g, n); + } while (!IterIrredTest(g)); + + f = g; + +} +#endif + + + +void BuildRandomIrred(zz_pEX& f, const zz_pEX& g) +{ + zz_pEXModulus G; + zz_pEX h, ff; + + build(G, g); + do { + random(h, deg(g)); + IrredPolyMod(ff, h, G); + } while (deg(ff) < deg(g)); + + f = ff; +} + + +/************* NEW DDF ****************/ + +NTL_CHEAP_THREAD_LOCAL long zz_pEX_GCDTableSize = 4; +NTL_CHEAP_THREAD_LOCAL double zz_pEXFileThresh = NTL_FILE_THRESH; +static NTL_CHEAP_THREAD_LOCAL vec_zz_pEX *BabyStepFile=0; +static NTL_CHEAP_THREAD_LOCAL vec_zz_pEX *GiantStepFile=0; +static NTL_CHEAP_THREAD_LOCAL long use_files; + + +static +double CalcTableSize(long n, long k) +{ + double sz = zz_p::storage(); + sz = sz*zz_pE::degree(); + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_zz_p); + sz = sz*n; + sz = sz + NTL_VECTOR_HEADER_SIZE + sizeof(vec_zz_pE); + sz = sz * k; + sz = sz/1024; + return sz; +} + + + +static +void GenerateBabySteps(zz_pEX& h1, const zz_pEX& f, const zz_pEX& h, long k, + FileList& flist, long verbose) + +{ + double t; + + if (verbose) { cerr << "generating baby steps..."; t = GetTime(); } + + zz_pEXModulus F; + build(F, f); + + zz_pEXArgument H; + +#if 0 + double n2 = sqrt(double(F.n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + + h1 = h; + + long i; + + if (!use_files) { + (*BabyStepFile).SetLength(k-1); + } + + for (i = 1; i <= k-1; i++) { + if (use_files) { + ofstream s; + OpenWrite(s, FileName("baby", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*BabyStepFile)(i) = h1; + + CompMod(h1, h1, H, F); + if (verbose) cerr << "+"; + } + + if (verbose) + cerr << (GetTime()-t) << "\n"; + +} + + +static +void GenerateGiantSteps(const zz_pEX& f, const zz_pEX& h, long l, + FileList& flist, long verbose) +{ + + double t; + + if (verbose) { cerr << "generating giant steps..."; t = GetTime(); } + + zz_pEXModulus F; + build(F, f); + + zz_pEXArgument H; + +#if 0 + double n2 = sqrt(double(F.n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + zz_pEX h1; + + h1 = h; + + long i; + + if (!use_files) { + (*GiantStepFile).SetLength(l); + } + + for (i = 1; i <= l-1; i++) { + if (use_files) { + ofstream s; + OpenWrite(s, FileName("giant", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*GiantStepFile)(i) = h1; + + CompMod(h1, h1, H, F); + if (verbose) cerr << "+"; + } + + if (use_files) { + ofstream s; + OpenWrite(s, FileName("giant", i), flist); + s << h1 << "\n"; + CloseWrite(s); + } + else + (*GiantStepFile)(i) = h1; + + if (verbose) + cerr << (GetTime()-t) << "\n"; + +} + + +static +void NewAddFactor(vec_pair_zz_pEX_long& u, const zz_pEX& g, long m, long verbose) +{ + long len = u.length(); + + u.SetLength(len+1); + u[len].a = g; + u[len].b = m; + + if (verbose) { + cerr << "split " << m << " " << deg(g) << "\n"; + } +} + + + + +static +void NewProcessTable(vec_pair_zz_pEX_long& u, zz_pEX& f, const zz_pEXModulus& F, + vec_zz_pEX& buf, long size, long StartInterval, + long IntervalLength, long verbose) + +{ + if (size == 0) return; + + zz_pEX& g = buf[size-1]; + + long i; + + for (i = 0; i < size-1; i++) + MulMod(g, g, buf[i], F); + + GCD(g, f, g); + + if (deg(g) == 0) return; + + div(f, f, g); + + long d = (StartInterval-1)*IntervalLength + 1; + i = 0; + long interval = StartInterval; + + while (i < size-1 && 2*d <= deg(g)) { + GCD(buf[i], buf[i], g); + if (deg(buf[i]) > 0) { + NewAddFactor(u, buf[i], interval, verbose); + div(g, g, buf[i]); + } + + i++; + interval++; + d += IntervalLength; + } + + if (deg(g) > 0) { + if (i == size-1) + NewAddFactor(u, g, interval, verbose); + else + NewAddFactor(u, g, (deg(g)+IntervalLength-1)/IntervalLength, verbose); + } +} + + + +static +void FetchGiantStep(zz_pEX& g, long gs, const zz_pEXModulus& F) +{ + if (use_files) { + ifstream s; + OpenRead(s, FileName("giant", gs)); + NTL_INPUT_CHECK_ERR(s >> g); + } + else + g = (*GiantStepFile)(gs); + + + rem(g, g, F); +} + + +static +void FetchBabySteps(vec_zz_pEX& v, long k) +{ + v.SetLength(k); + + SetX(v[0]); + + long i; + for (i = 1; i <= k-1; i++) { + if (use_files) { + ifstream s; + OpenRead(s, FileName("baby", i)); + NTL_INPUT_CHECK_ERR(s >> v[i]); + } + else + v[i] = (*BabyStepFile)(i); + } +} + + + +static +void GiantRefine(vec_pair_zz_pEX_long& u, const zz_pEX& ff, long k, long l, + long verbose) + +{ + double t; + + if (verbose) { + cerr << "giant refine..."; + t = GetTime(); + } + + u.SetLength(0); + + vec_zz_pEX BabyStep; + + FetchBabySteps(BabyStep, k); + + vec_zz_pEX buf(INIT_SIZE, zz_pEX_GCDTableSize); + + zz_pEX f; + f = ff; + + zz_pEXModulus F; + build(F, f); + + zz_pEX g; + zz_pEX h; + + long size = 0; + + long first_gs; + + long d = 1; + + while (2*d <= deg(f)) { + + long old_n = deg(f); + + long gs = (d+k-1)/k; + long bs = gs*k - d; + + if (bs == k-1) { + size++; + if (size == 1) first_gs = gs; + FetchGiantStep(g, gs, F); + sub(buf[size-1], g, BabyStep[bs]); + } + else { + sub(h, g, BabyStep[bs]); + MulMod(buf[size-1], buf[size-1], h, F); + } + + if (verbose && bs == 0) cerr << "+"; + + if (size == zz_pEX_GCDTableSize && bs == 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + size = 0; + } + + d++; + + if (2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + + long i; + for (i = 1; i <= k-1; i++) + rem(BabyStep[i], BabyStep[i], F); + } + } + + if (size > 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + } + + if (deg(f) > 0) + NewAddFactor(u, f, 0, verbose); + + if (verbose) { + t = GetTime()-t; + cerr << "giant refine time: " << t << "\n"; + } +} + + +static +void IntervalRefine(vec_pair_zz_pEX_long& factors, const zz_pEX& ff, + long k, long gs, const vec_zz_pEX& BabyStep, long verbose) + +{ + vec_zz_pEX buf(INIT_SIZE, zz_pEX_GCDTableSize); + + zz_pEX f; + f = ff; + + zz_pEXModulus F; + build(F, f); + + zz_pEX g; + + FetchGiantStep(g, gs, F); + + long size = 0; + + long first_d; + + long d = (gs-1)*k + 1; + long bs = k-1; + + while (bs >= 0 && 2*d <= deg(f)) { + + long old_n = deg(f); + + if (size == 0) first_d = d; + rem(buf[size], BabyStep[bs], F); + sub(buf[size], buf[size], g); + size++; + + if (size == zz_pEX_GCDTableSize) { + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + size = 0; + } + + d++; + bs--; + + if (bs >= 0 && 2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + rem(g, g, F); + } + } + + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + + if (deg(f) > 0) + NewAddFactor(factors, f, deg(f), verbose); +} + + + + +static +void BabyRefine(vec_pair_zz_pEX_long& factors, const vec_pair_zz_pEX_long& u, + long k, long l, long verbose) + +{ + double t; + + if (verbose) { + cerr << "baby refine..."; + t = GetTime(); + } + + factors.SetLength(0); + + vec_zz_pEX BabyStep; + + long i; + for (i = 0; i < u.length(); i++) { + const zz_pEX& g = u[i].a; + long gs = u[i].b; + + if (gs == 0 || 2*((gs-1)*k+1) > deg(g)) + NewAddFactor(factors, g, deg(g), verbose); + else { + if (BabyStep.length() == 0) + FetchBabySteps(BabyStep, k); + IntervalRefine(factors, g, k, gs, BabyStep, verbose); + } + } + + if (verbose) { + t = GetTime()-t; + cerr << "baby refine time: " << t << "\n"; + } +} + + + + + + +void NewDDF(vec_pair_zz_pEX_long& factors, + const zz_pEX& f, + const zz_pEX& h, + long verbose) + +{ + if (!IsOne(LeadCoeff(f))) + LogicError("NewDDF: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(0); + append(factors, cons(f, 1L)); + return; + } + + long B = deg(f)/2; + long k = SqrRoot(B); + long l = (B+k-1)/k; + + zz_pEX h1; + + if (CalcTableSize(deg(f), k + l - 1) > zz_pEXFileThresh) + use_files = 1; + else + use_files = 0; + + + FileList flist; + + vec_zz_pEX local_BabyStepFile; + vec_zz_pEX local_GiantStepFile; + + BabyStepFile = &local_BabyStepFile; + GiantStepFile = &local_GiantStepFile; + + GenerateBabySteps(h1, f, h, k, flist, verbose); + + GenerateGiantSteps(f, h1, l, flist, verbose); + + vec_pair_zz_pEX_long u; + GiantRefine(u, f, k, l, verbose); + BabyRefine(factors, u, k, l, verbose); +} + +long IterComputeDegree(const zz_pEX& h, const zz_pEXModulus& F) +{ + long n = deg(F); + + if (n == 1 || IsX(h)) return 1; + + long B = n/2; + long k = SqrRoot(B); + long l = (B+k-1)/k; + + + zz_pEXArgument H; + +#if 0 + double n2 = sqrt(double(n)); + double n4 = sqrt(n2); + double n34 = n2*n4; + long sz = long(ceil(n34/sqrt(sqrt(2.0)))); +#else + long sz = 2*SqrRoot(F.n); +#endif + + build(H, h, F, sz); + + zz_pEX h1; + h1 = h; + + vec_zz_pEX baby; + baby.SetLength(k); + + SetX(baby[0]); + + long i; + + for (i = 1; i <= k-1; i++) { + baby[i] = h1; + CompMod(h1, h1, H, F); + if (IsX(h1)) return i+1; + } + + build(H, h1, F, sz); + + long j; + + for (j = 2; j <= l; j++) { + CompMod(h1, h1, H, F); + + for (i = k-1; i >= 0; i--) { + if (h1 == baby[i]) + return j*k-i; + } + } + + return n; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/lzz_pEXTest.c b/thirdparty/linux/ntl/src/lzz_pEXTest.c new file mode 100644 index 0000000000..6120a564bb --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_pEXTest.c @@ -0,0 +1,57 @@ + +#include +#include + +NTL_CLIENT + +int main() +{ + zz_p::init(17); + + zz_pX P; + BuildIrred(P, 10); + + zz_pE::init(P); + + zz_pEX f, g, h; + + random(f, 20); + SetCoeff(f, 20); + + random(h, 20); + + g = MinPolyMod(h, f); + + if (deg(g) < 0) TerminalError("bad zz_pEXTest (1)"); + if (CompMod(g, h, f) != 0) + TerminalError("bad zz_pEXTest (2)"); + + + + vec_pair_zz_pEX_long v; + + long j; + for (j = 0; j < 5; j++) { + long n = RandomBnd(40)+10; + cerr << n << " "; + + random(f, n); + SetCoeff(f, n); + + v = CanZass(f); + + g = mul(v); + if (f != g) cerr << "oops1\n"; + + long i; + for (i = 0; i < v.length(); i++) + if (!DetIrredTest(v[i].a)) + TerminalError("bad zz_pEXTest (3)"); + + + } + + cerr << "\n"; + + cerr << "zz_pEXTest OK\n"; +} diff --git a/thirdparty/linux/ntl/src/lzz_pX.c b/thirdparty/linux/ntl/src/lzz_pX.c new file mode 100644 index 0000000000..f62acf8800 --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_pX.c @@ -0,0 +1,3330 @@ + +#include + +#include + +NTL_START_IMPL + + +// NOTE: these are declared extern in lzz_pX.h + +const long zz_pX_mod_crossover[5] = {45, 45, 90, 180, 180}; +const long zz_pX_mul_crossover[5] = {150, 150, 300, 500, 500}; +const long zz_pX_newton_crossover[5] = {150, 150, 300, 700, 700}; +const long zz_pX_div_crossover[5] = {180, 180, 350, 750, 750}; +const long zz_pX_halfgcd_crossover[5] = {90, 90, 180, 350, 350}; +const long zz_pX_gcd_crossover[5] = {400, 400, 800, 1400, 1400}; +const long zz_pX_bermass_crossover[5] = {400, 480, 900, 1600, 1600}; +const long zz_pX_trace_crossover[5] = {200, 350, 450, 800, 800}; + + + + +const zz_pX& zz_pX::zero() +{ + static const zz_pX z; // GLOBAL (assumes C++11 thread-safe init) + return z; +} + + + +istream& operator>>(istream& s, zz_pX& x) +{ + NTL_INPUT_CHECK_RET(s, s >> x.rep); + x.normalize(); + return s; +} + +ostream& operator<<(ostream& s, const zz_pX& a) +{ + return s << a.rep; +} + + +void zz_pX::normalize() +{ + long n; + const zz_p* p; + + n = rep.length(); + if (n == 0) return; + p = rep.elts() + n; + while (n > 0 && IsZero(*--p)) { + n--; + } + rep.SetLength(n); +} + + +long IsZero(const zz_pX& a) +{ + return a.rep.length() == 0; +} + + +long IsOne(const zz_pX& a) +{ + return a.rep.length() == 1 && IsOne(a.rep[0]); +} + +void GetCoeff(zz_p& x, const zz_pX& a, long i) +{ + if (i < 0 || i > deg(a)) + clear(x); + else + x = a.rep[i]; +} + +void SetCoeff(zz_pX& x, long i, zz_p a) +{ + long j, m; + + if (i < 0) + LogicError("SetCoeff: negative index"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m && IsZero(a)) return; + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + x.rep[i] = a; + x.normalize(); +} + +void SetCoeff(zz_pX& x, long i, long a) +{ + if (a == 1) + SetCoeff(x, i); + else + SetCoeff(x, i, to_zz_p(a)); +} + +void SetCoeff(zz_pX& x, long i) +{ + long j, m; + + if (i < 0) + LogicError("coefficient index out of range"); + + if (NTL_OVERFLOW(i, 1, 0)) + ResourceError("overflow in SetCoeff"); + + m = deg(x); + + if (i > m) { + x.rep.SetLength(i+1); + for (j = m+1; j < i; j++) + clear(x.rep[j]); + } + set(x.rep[i]); + x.normalize(); +} + + +void SetX(zz_pX& x) +{ + clear(x); + SetCoeff(x, 1); +} + + +long IsX(const zz_pX& a) +{ + return deg(a) == 1 && IsOne(LeadCoeff(a)) && IsZero(ConstTerm(a)); +} + + + +const zz_p coeff(const zz_pX& a, long i) +{ + if (i < 0 || i > deg(a)) + return zz_p::zero(); + else + return a.rep[i]; +} + + +const zz_p LeadCoeff(const zz_pX& a) +{ + if (IsZero(a)) + return zz_p::zero(); + else + return a.rep[deg(a)]; +} + +const zz_p ConstTerm(const zz_pX& a) +{ + if (IsZero(a)) + return zz_p::zero(); + else + return a.rep[0]; +} + + + +void conv(zz_pX& x, zz_p a) +{ + if (IsZero(a)) + x.rep.SetLength(0); + else { + x.rep.SetLength(1); + x.rep[0] = a; + } +} + +void conv(zz_pX& x, long a) +{ + if (a == 0) { + x.rep.SetLength(0); + return; + } + + zz_p t; + + conv(t, a); + conv(x, t); +} + +void conv(zz_pX& x, const ZZ& a) +{ + if (a == 0) { + x.rep.SetLength(0); + return; + } + + zz_p t; + + conv(t, a); + conv(x, t); +} + + +void conv(zz_pX& x, const vec_zz_p& a) +{ + x.rep = a; + x.normalize(); +} + + +void add(zz_pX& x, const zz_pX& a, const zz_pX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const zz_p *ap, *bp; + zz_p* xp; + long p = zz_p::modulus(); + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + xp->LoopHole() = AddMod(rep(*ap), rep(*bp), p); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab && &x != &b) + for (i = db-minab; i; i--, xp++, bp++) + *xp = *bp; + else + x.normalize(); +} + +void add(zz_pX& x, const zz_pX& a, zz_p b) +{ + if (a.rep.length() == 0) { + conv(x, b); + } + else { + if (&x != &a) x = a; + add(x.rep[0], x.rep[0], b); + x.normalize(); + } +} + + +void sub(zz_pX& x, const zz_pX& a, const zz_pX& b) +{ + long da = deg(a); + long db = deg(b); + long minab = min(da, db); + long maxab = max(da, db); + x.rep.SetLength(maxab+1); + + long i; + const zz_p *ap, *bp; + zz_p* xp; + long p = zz_p::modulus(); + + for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts(); + i; i--, ap++, bp++, xp++) + xp->LoopHole() = SubMod(rep(*ap), rep(*bp), p); + + if (da > minab && &x != &a) + for (i = da-minab; i; i--, xp++, ap++) + *xp = *ap; + else if (db > minab) + for (i = db-minab; i; i--, xp++, bp++) + xp->LoopHole() = NegateMod(rep(*bp), p); + else + x.normalize(); + +} + +void sub(zz_pX& x, const zz_pX& a, zz_p b) +{ + if (a.rep.length() == 0) { + x.rep.SetLength(1); + negate(x.rep[0], b); + } + else { + if (&x != &a) x = a; + sub(x.rep[0], x.rep[0], b); + } + x.normalize(); +} + +void sub(zz_pX& x, zz_p a, const zz_pX& b) +{ + negate(x, b); + add(x, x, a); +} + +void negate(zz_pX& x, const zz_pX& a) +{ + long n = a.rep.length(); + x.rep.SetLength(n); + + const zz_p* ap = a.rep.elts(); + zz_p* xp = x.rep.elts(); + long i; + long p = zz_p::modulus(); + + for (i = n; i; i--, ap++, xp++) + xp->LoopHole() = NegateMod(rep(*ap), p); +} + +void mul(zz_pX& x, const zz_pX& a, const zz_pX& b) +{ + if (&a == &b) { + sqr(x, a); + return; + } + + if (deg(a) > NTL_zz_pX_MUL_CROSSOVER && deg(b) > NTL_zz_pX_MUL_CROSSOVER) + FFTMul(x, a, b); + else + PlainMul(x, a, b); +} + +void sqr(zz_pX& x, const zz_pX& a) +{ + if (deg(a) > NTL_zz_pX_MUL_CROSSOVER) + FFTSqr(x, a); + else + PlainSqr(x, a); +} + +/* "plain" multiplication and squaring actually incorporates Karatsuba */ + +void PlainMul(zz_p *xp, const zz_p *ap, long sa, const zz_p *bp, long sb) +{ + if (sa == 0 || sb == 0) return; + + long sx = sa+sb-1; + + + if (sa < sb) { + { long t = sa; sa = sb; sb = t; } + { const zz_p *t = ap; ap = bp; bp = t; } + } + + long i, j; + + for (i = 0; i < sx; i++) + clear(xp[i]); + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + for (i = 0; i < sb; i++) { + long t1 = rep(bp[i]); + mulmod_precon_t bpinv = PrepMulModPrecon(t1, p, pinv); + zz_p *xp1 = xp+i; + for (j = 0; j < sa; j++) { + long t2; + t2 = MulModPrecon(rep(ap[j]), t1, p, bpinv); + xp1[j].LoopHole() = AddMod(t2, rep(xp1[j]), p); + } + } +} + +static inline +void reduce(zz_p& r, long a, long p, mulmod_t pinv) +{ + // DIRT: uses undocumented MulMod feature (see sp_arith.h) + r.LoopHole() = MulMod(a, 1L, p, pinv); +} + +void PlainMul_long(zz_p *xp, const zz_p *ap, long sa, const zz_p *bp, long sb) +{ + if (sa == 0 || sb == 0) return; + + long d = sa+sb-2; + + long i, j, jmin, jmax; + + long accum; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + for (i = 0; i <= d; i++) { + jmin = max(0, i-(sb-1)); + jmax = min((sa-1), i); + accum = 0; + for (j = jmin; j <= jmax; j++) { + accum += rep(ap[j])*rep(bp[i-j]); + } + reduce(xp[i], accum, p, pinv); + } +} + +#define KARX (16) + +void KarFold(zz_p *T, const zz_p *b, long sb, long hsa) +{ + long m = sb - hsa; + long i; + long p = zz_p::modulus(); + + for (i = 0; i < m; i++) + T[i].LoopHole() = AddMod(rep(b[i]), rep(b[hsa+i]), p); + + for (i = m; i < hsa; i++) + T[i] = b[i]; +} + +void KarSub(zz_p *T, const zz_p *b, long sb) +{ + long i; + long p = zz_p::modulus(); + + for (i = 0; i < sb; i++) + T[i].LoopHole() = SubMod(rep(T[i]), rep(b[i]), p); +} + +void KarAdd(zz_p *T, const zz_p *b, long sb) +{ + long i; + long p = zz_p::modulus(); + + for (i = 0; i < sb; i++) + T[i].LoopHole() = AddMod(rep(T[i]), rep(b[i]), p); +} + +void KarFix(zz_p *c, const zz_p *b, long sb, long hsa) +{ + long i; + long p = zz_p::modulus(); + + for (i = 0; i < hsa; i++) + c[i] = b[i]; + + for (i = hsa; i < sb; i++) + c[i].LoopHole() = AddMod(rep(c[i]), rep(b[i]), p); +} + + +void KarMul(zz_p *c, const zz_p *a, long sa, const zz_p *b, long sb, zz_p *stk) +{ + if (sa < sb) { + { long t = sa; sa = sb; sb = t; } + { const zz_p *t = a; a = b; b = t; } + } + + if (sb < KARX) { + PlainMul(c, a, sa, b, sb); + return; + } + + long hsa = (sa + 1) >> 1; + + if (hsa < sb) { + /* normal case */ + + long hsa2 = hsa << 1; + + zz_p *T1, *T2, *T3; + + T1 = stk; stk += hsa; + T2 = stk; stk += hsa; + T3 = stk; stk += hsa2 - 1; + + /* compute T1 = a_lo + a_hi */ + + KarFold(T1, a, sa, hsa); + + /* compute T2 = b_lo + b_hi */ + + KarFold(T2, b, sb, hsa); + + /* recursively compute T3 = T1 * T2 */ + + KarMul(T3, T1, hsa, T2, hsa, stk); + + /* recursively compute a_hi * b_hi into high part of c */ + /* and subtract from T3 */ + + KarMul(c + hsa2, a+hsa, sa-hsa, b+hsa, sb-hsa, stk); + KarSub(T3, c + hsa2, sa + sb - hsa2 - 1); + + + /* recursively compute a_lo*b_lo into low part of c */ + /* and subtract from T3 */ + + KarMul(c, a, hsa, b, hsa, stk); + KarSub(T3, c, hsa2 - 1); + + clear(c[hsa2 - 1]); + + /* finally, add T3 * X^{hsa} to c */ + + KarAdd(c+hsa, T3, hsa2-1); + } + else { + /* degenerate case */ + + zz_p *T; + + T = stk; stk += hsa + sb - 1; + + /* recursively compute b*a_hi into high part of c */ + + KarMul(c + hsa, a + hsa, sa - hsa, b, sb, stk); + + /* recursively compute b*a_lo into T */ + + KarMul(T, a, hsa, b, sb, stk); + + KarFix(c, T, hsa + sb - 1, hsa); + } +} + +void KarMul_long(zz_p *c, const zz_p *a, long sa, const zz_p *b, long sb, zz_p *stk) +{ + if (sa < sb) { + { long t = sa; sa = sb; sb = t; } + { const zz_p *t = a; a = b; b = t; } + } + + if (sb < KARX) { + PlainMul_long(c, a, sa, b, sb); + return; + } + + long hsa = (sa + 1) >> 1; + + if (hsa < sb) { + /* normal case */ + + long hsa2 = hsa << 1; + + zz_p *T1, *T2, *T3; + + T1 = stk; stk += hsa; + T2 = stk; stk += hsa; + T3 = stk; stk += hsa2 - 1; + + /* compute T1 = a_lo + a_hi */ + + KarFold(T1, a, sa, hsa); + + /* compute T2 = b_lo + b_hi */ + + KarFold(T2, b, sb, hsa); + + /* recursively compute T3 = T1 * T2 */ + + KarMul_long(T3, T1, hsa, T2, hsa, stk); + + /* recursively compute a_hi * b_hi into high part of c */ + /* and subtract from T3 */ + + KarMul_long(c + hsa2, a+hsa, sa-hsa, b+hsa, sb-hsa, stk); + KarSub(T3, c + hsa2, sa + sb - hsa2 - 1); + + + /* recursively compute a_lo*b_lo into low part of c */ + /* and subtract from T3 */ + + KarMul_long(c, a, hsa, b, hsa, stk); + KarSub(T3, c, hsa2 - 1); + + clear(c[hsa2 - 1]); + + /* finally, add T3 * X^{hsa} to c */ + + KarAdd(c+hsa, T3, hsa2-1); + } + else { + /* degenerate case */ + + zz_p *T; + + T = stk; stk += hsa + sb - 1; + + /* recursively compute b*a_hi into high part of c */ + + KarMul_long(c + hsa, a + hsa, sa - hsa, b, sb, stk); + + /* recursively compute b*a_lo into T */ + + KarMul_long(T, a, hsa, b, sb, stk); + + KarFix(c, T, hsa + sb - 1, hsa); + } +} + + +void PlainMul(zz_pX& c, const zz_pX& a, const zz_pX& b) +{ + long sa = a.rep.length(); + long sb = b.rep.length(); + + if (sa == 0 || sb == 0) { + clear(c); + return; + } + + if (sa == 1) { + mul(c, b, a.rep[0]); + return; + } + + if (sb == 1) { + mul(c, a, b.rep[0]); + return; + } + + if (&a == &b) { + PlainSqr(c, a); + return; + } + + vec_zz_p mem; + + const zz_p *ap, *bp; + zz_p *cp; + + if (&a == &c) { + mem = a.rep; + ap = mem.elts(); + } + else + ap = a.rep.elts(); + + if (&b == &c) { + mem = b.rep; + bp = mem.elts(); + } + else + bp = b.rep.elts(); + + c.rep.SetLength(sa+sb-1); + cp = c.rep.elts(); + + long p = zz_p::modulus(); + long use_long = (p < NTL_SP_BOUND/KARX && p*KARX < NTL_SP_BOUND/p); + + if (sa < KARX || sb < KARX) { + if (use_long) + PlainMul_long(cp, ap, sa, bp, sb); + else + PlainMul(cp, ap, sa, bp, sb); + } + else { + /* karatsuba */ + + long n, hn, sp; + + n = max(sa, sb); + sp = 0; + do { + hn = (n+1) >> 1; + sp += (hn << 2) - 1; + n = hn; + } while (n >= KARX); + + vec_zz_p stk; + stk.SetLength(sp); + + if (use_long) + KarMul_long(cp, ap, sa, bp, sb, stk.elts()); + else + KarMul(cp, ap, sa, bp, sb, stk.elts()); + } + + c.normalize(); +} + +void PlainSqr_long(zz_p *xp, const zz_p *ap, long sa) +{ + if (sa == 0) return; + + long da = sa-1; + long d = 2*da; + + long i, j, jmin, jmax, m, m2; + + long accum; + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + for (i = 0; i <= d; i++) { + jmin = max(0, i-da); + jmax = min(da, i); + m = jmax - jmin + 1; + m2 = m >> 1; + jmax = jmin + m2 - 1; + accum = 0; + for (j = jmin; j <= jmax; j++) { + accum += rep(ap[j])*rep(ap[i-j]); + } + accum += accum; + if (m & 1) { + accum += rep(ap[jmax + 1])*rep(ap[jmax + 1]); + } + + reduce(xp[i], accum, p, pinv); + } +} + + +void PlainSqr(zz_p *xp, const zz_p *ap, long sa) +{ + if (sa == 0) return; + + long i, j, k, cnt; + + cnt = 2*sa-1; + for (i = 0; i < cnt; i++) + clear(xp[i]); + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + long t1, t2; + + i = -1; + for (j = 0; j <= sa-2; j++) { + i += 2; + + t1 = MulMod(rep(ap[j]), rep(ap[j]), p, pinv); + t2 = rep(xp[i-1]); + t2 = AddMod(t2, t2, p); + t2 = AddMod(t2, t1, p); + xp[i-1].LoopHole() = t2; + + cnt = sa - 1 - j; + const zz_p *ap1 = ap+(j+1); + zz_p *xp1 = xp+i; + t1 = rep(ap[j]); + mulmod_precon_t tpinv = PrepMulModPrecon(t1, p, pinv); + + for (k = 0; k < cnt; k++) { + t2 = MulModPrecon(rep(ap1[k]), t1, p, tpinv); + t2 = AddMod(t2, rep(xp1[k]), p); + xp1[k].LoopHole() = t2; + } + t2 = rep(*xp1); + t2 = AddMod(t2, t2, p); + (*xp1).LoopHole() = t2; + } + + + t1 = rep(ap[sa-1]); + t1 = MulMod(t1, t1, p, pinv); + xp[2*sa-2].LoopHole() = t1; +} + +#define KARSX (30) + +void KarSqr(zz_p *c, const zz_p *a, long sa, zz_p *stk) +{ + if (sa < KARSX) { + PlainSqr(c, a, sa); + return; + } + + long hsa = (sa + 1) >> 1; + long hsa2 = hsa << 1; + + zz_p *T1, *T2; + + T1 = stk; stk += hsa; + T2 = stk; stk += hsa2-1; + + KarFold(T1, a, sa, hsa); + KarSqr(T2, T1, hsa, stk); + + + KarSqr(c + hsa2, a+hsa, sa-hsa, stk); + KarSub(T2, c + hsa2, sa + sa - hsa2 - 1); + + + KarSqr(c, a, hsa, stk); + KarSub(T2, c, hsa2 - 1); + + clear(c[hsa2 - 1]); + + KarAdd(c+hsa, T2, hsa2-1); +} + +void KarSqr_long(zz_p *c, const zz_p *a, long sa, zz_p *stk) +{ + if (sa < KARSX) { + PlainSqr_long(c, a, sa); + return; + } + + long hsa = (sa + 1) >> 1; + long hsa2 = hsa << 1; + + zz_p *T1, *T2; + + T1 = stk; stk += hsa; + T2 = stk; stk += hsa2-1; + + KarFold(T1, a, sa, hsa); + KarSqr_long(T2, T1, hsa, stk); + + + KarSqr_long(c + hsa2, a+hsa, sa-hsa, stk); + KarSub(T2, c + hsa2, sa + sa - hsa2 - 1); + + + KarSqr_long(c, a, hsa, stk); + KarSub(T2, c, hsa2 - 1); + + clear(c[hsa2 - 1]); + + KarAdd(c+hsa, T2, hsa2-1); +} + +void PlainSqr(zz_pX& c, const zz_pX& a) +{ + if (IsZero(a)) { + clear(c); + return; + } + + vec_zz_p mem; + + const zz_p *ap; + zz_p *cp; + + long sa = a.rep.length(); + + if (&a == &c) { + mem = a.rep; + ap = mem.elts(); + } + else + ap = a.rep.elts(); + + c.rep.SetLength(2*sa-1); + cp = c.rep.elts(); + + long p = zz_p::modulus(); + long use_long = (p < NTL_SP_BOUND/KARSX && p*KARSX < NTL_SP_BOUND/p); + + if (sa < KARSX) { + if (use_long) + PlainSqr_long(cp, ap, sa); + else + PlainSqr(cp, ap, sa); + } + else { + /* karatsuba */ + + long n, hn, sp; + + n = sa; + sp = 0; + do { + hn = (n+1) >> 1; + sp += hn+hn+hn - 1; + n = hn; + } while (n >= KARSX); + + vec_zz_p stk; + stk.SetLength(sp); + + if (use_long) + KarSqr_long(cp, ap, sa, stk.elts()); + else + KarSqr(cp, ap, sa, stk.elts()); + } + + c.normalize(); +} + + +void PlainDivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pX& b) +{ + long da, db, dq, i, j, LCIsOne; + const zz_p *bp; + zz_p *qp; + zz_p *xp; + + + zz_p LCInv, t; + zz_p s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("zz_pX: division by zero"); + + if (da < db) { + r = a; + clear(q); + return; + } + + zz_pX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_zz_p x; + if (&r == &a) + xp = r.rep.elts(); + else { + x = a.rep; + xp = x.elts(); + } + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + for (i = dq; i >= 0; i--) { + t = xp[i+db]; + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + long T = rep(t); + mulmod_precon_t Tpinv = PrepMulModPrecon(T, p, pinv); + + for (j = db-1; j >= 0; j--) { + long S = MulModPrecon(rep(bp[j]), T, p, Tpinv); + S = AddMod(S, rep(xp[i+j]), p); + xp[i+j].LoopHole() = S; + } + } + + r.rep.SetLength(db); + if (&r != &a) { + for (i = 0; i < db; i++) + r.rep[i] = xp[i]; + } + r.normalize(); +} + +void PlainDiv(zz_pX& q, const zz_pX& a, const zz_pX& b) +{ + long da, db, dq, i, j, LCIsOne; + const zz_p *bp; + zz_p *qp; + zz_p *xp; + + + zz_p LCInv, t; + zz_p s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("zz_pX: division by zero"); + + if (da < db) { + clear(q); + return; + } + + zz_pX lb; + + if (&q == &b) { + lb = b; + bp = lb.rep.elts(); + } + else + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_zz_p x; + x.SetLength(da+1-db); + for (i = db; i <= da; i++) + x[i-db] = a.rep[i]; + + xp = x.elts(); + + + + dq = da - db; + q.rep.SetLength(dq+1); + qp = q.rep.elts(); + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + for (i = dq; i >= 0; i--) { + t = xp[i]; + if (!LCIsOne) + mul(t, t, LCInv); + qp[i] = t; + negate(t, t); + + long T = rep(t); + mulmod_precon_t Tpinv = PrepMulModPrecon(T, p, pinv); + + long lastj = max(0, db-i); + + for (j = db-1; j >= lastj; j--) { + long S = MulModPrecon(rep(bp[j]), T, p, Tpinv); + S = AddMod(S, rep(xp[i+j-db]), p); + xp[i+j-db].LoopHole() = S; + } + } +} + + +void PlainRem(zz_pX& r, const zz_pX& a, const zz_pX& b) +{ + long da, db, dq, i, j, LCIsOne; + const zz_p *bp; + zz_p *xp; + + + zz_p LCInv, t; + zz_p s; + + da = deg(a); + db = deg(b); + + if (db < 0) ArithmeticError("zz_pX: division by zero"); + + if (da < db) { + r = a; + return; + } + + bp = b.rep.elts(); + + if (IsOne(bp[db])) + LCIsOne = 1; + else { + LCIsOne = 0; + inv(LCInv, bp[db]); + } + + vec_zz_p x; + + if (&r == &a) + xp = r.rep.elts(); + else { + x = a.rep; + xp = x.elts(); + } + + dq = da - db; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + for (i = dq; i >= 0; i--) { + t = xp[i+db]; + if (!LCIsOne) + mul(t, t, LCInv); + negate(t, t); + + long T = rep(t); + mulmod_precon_t Tpinv = PrepMulModPrecon(T, p, pinv); + + for (j = db-1; j >= 0; j--) { + long S = MulModPrecon(rep(bp[j]), T, p, Tpinv); + S = AddMod(S, rep(xp[i+j]), p); + xp[i+j].LoopHole() = S; + } + } + + r.rep.SetLength(db); + if (&r != &a) { + for (i = 0; i < db; i++) + r.rep[i] = xp[i]; + } + r.normalize(); +} + + +void mul(zz_pX& x, const zz_pX& a, zz_p b) +{ + if (IsZero(b)) { + clear(x); + return; + } + + if (IsOne(b)) { + x = a; + return; + } + + long i, da; + + const zz_p *ap; + zz_p* xp; + + long t; + t = rep(b); + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + mulmod_precon_t bpinv = PrepMulModPrecon(t, p, pinv); + + da = deg(a); + x.rep.SetLength(da+1); + ap = a.rep.elts(); + xp = x.rep.elts(); + + for (i = 0; i <= da; i++) + xp[i].LoopHole() = MulModPrecon(rep(ap[i]), t, p, bpinv); + + x.normalize(); +} + + + +void PlainGCD(zz_pX& x, const zz_pX& a, const zz_pX& b) +{ + zz_p t; + + if (IsZero(b)) + x = a; + else if (IsZero(a)) + x = b; + else { + long n = max(deg(a),deg(b)) + 1; + zz_pX u(INIT_SIZE, n), v(INIT_SIZE, n); + + u = a; + v = b; + do { + PlainRem(u, u, v); + swap(u, v); + } while (!IsZero(v)); + + x = u; + } + + if (IsZero(x)) return; + if (IsOne(LeadCoeff(x))) return; + + /* make gcd monic */ + + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + + + + +void PlainXGCD(zz_pX& d, zz_pX& s, zz_pX& t, const zz_pX& a, const zz_pX& b) +{ + zz_p z; + + + if (IsZero(b)) { + set(s); + clear(t); + d = a; + } + else if (IsZero(a)) { + clear(s); + set(t); + d = b; + } + else { + long e = max(deg(a), deg(b)) + 1; + + zz_pX temp(INIT_SIZE, e), u(INIT_SIZE, e), v(INIT_SIZE, e), u0(INIT_SIZE, e), v0(INIT_SIZE, e), + u1(INIT_SIZE, e), v1(INIT_SIZE, e), u2(INIT_SIZE, e), v2(INIT_SIZE, e), q(INIT_SIZE, e); + + + set(u1); clear(v1); + clear(u2); set(v2); + u = a; v = b; + + do { + DivRem(q, u, u, v); + swap(u, v); + u0 = u2; + v0 = v2; + mul(temp, q, u2); + sub(u2, u1, temp); + mul(temp, q, v2); + sub(v2, v1, temp); + u1 = u0; + v1 = v0; + } while (!IsZero(v)); + + d = u; + s = u1; + t = v1; + } + + if (IsZero(d)) return; + if (IsOne(LeadCoeff(d))) return; + + /* make gcd monic */ + + inv(z, LeadCoeff(d)); + mul(d, d, z); + mul(s, s, z); + mul(t, t, z); +} + + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pX& b, const zz_pX& f) +{ + if (deg(a) >= deg(f) || deg(b) >= deg(f) || deg(f) == 0) + LogicError("MulMod: bad args"); + + zz_pX t; + + mul(t, a, b); + rem(x, t, f); +} + +void SqrMod(zz_pX& x, const zz_pX& a, const zz_pX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("SqrMod: bad args"); + + zz_pX t; + + sqr(t, a); + rem(x, t, f); +} + + +void InvMod(zz_pX& x, const zz_pX& a, const zz_pX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvMod: bad args"); + + zz_pX d, xx, t; + + XGCD(d, xx, t, a, f); + if (!IsOne(d)) + InvModError("zz_pX InvMod: can't compute multiplicative inverse"); + + x = xx; +} + +long InvModStatus(zz_pX& x, const zz_pX& a, const zz_pX& f) +{ + if (deg(a) >= deg(f) || deg(f) == 0) LogicError("InvModStatus: bad args"); + + zz_pX d, t; + + XGCD(d, x, t, a, f); + if (!IsOne(d)) { + x = d; + return 1; + } + else + return 0; +} + + + + +static +void MulByXModAux(zz_pX& h, const zz_pX& a, const zz_pX& f) +{ + long i, n, m; + zz_p* hh; + const zz_p *aa, *ff; + + zz_p t, z; + + n = deg(f); + m = deg(a); + + if (m >= n || n == 0) LogicError("MulByXMod: bad args"); + + if (m < 0) { + clear(h); + return; + } + + if (m < n-1) { + h.rep.SetLength(m+2); + hh = h.rep.elts(); + aa = a.rep.elts(); + for (i = m+1; i >= 1; i--) + hh[i] = aa[i-1]; + clear(hh[0]); + } + else { + h.rep.SetLength(n); + hh = h.rep.elts(); + aa = a.rep.elts(); + ff = f.rep.elts(); + negate(z, aa[n-1]); + if (!IsOne(ff[n])) + div(z, z, ff[n]); + for (i = n-1; i >= 1; i--) { + mul(t, z, ff[i]); + add(hh[i], aa[i-1], t); + } + mul(hh[0], z, ff[0]); + h.normalize(); + } +} + +void MulByXMod(zz_pX& h, const zz_pX& a, const zz_pX& f) +{ + if (&h == &f) { + zz_pX hh; + MulByXModAux(hh, a, f); + h = hh; + } + else + MulByXModAux(h, a, f); +} + + +void random(zz_pX& x, long n) +{ + long i; + + x.rep.SetLength(n); + VectorRandom(n, x.rep.elts()); + x.normalize(); +} + + + + + +void fftRep::DoSetSize(long NewK, long NewNumPrimes) +{ + if (NewK < -1) LogicError("bad arg to fftRep::SetSize()"); + + if (NewK >= NTL_BITS_PER_LONG-1) + ResourceError("bad arg to fftRep::SetSize()"); + + if (NewK == -1) { + k = -1; + return; + } + + if (NewNumPrimes == 0) + NewNumPrimes = zz_pInfo->NumPrimes; + + if (MaxK >= 0 && NumPrimes != NewNumPrimes) + LogicError("fftRep: inconsistent use"); + + if (NewK <= MaxK) { + k = NewK; + return; + } + + UniqueArray new_tbl[4]; + long i; + + for (i = 0; i < NewNumPrimes; i++) + new_tbl[i].SetLength(1L << NewK); + + for (i = 0; i < NewNumPrimes; i++) + tbl[i].move(new_tbl[i]); + + NumPrimes = NewNumPrimes; + k = MaxK = NewK; +} + +void fftRep::SetSize(long NewK) +{ + DoSetSize(NewK, 0); +} + + +fftRep& fftRep::operator=(const fftRep& R) +{ + if (this == &R) return *this; + + if (MaxK >= 0 && R.MaxK >= 0 && NumPrimes != R.NumPrimes) + LogicError("fftRep: inconsistent use"); + + if (R.k < 0) { + k = -1; + return *this; + } + + DoSetSize(R.k, R.NumPrimes); + long i, j, n; + + n = 1L << k; + + for (i = 0; i < NumPrimes; i++) + for (j = 0; j < n; j++) + tbl[i][j] = R.tbl[i][j]; + + return *this; +} + + + +static inline +void FromModularRep(zz_p& res, long *a, zz_pInfoT* info) +{ + long n = info->NumPrimes; + long p = info->p; + mulmod_t pinv = info->pinv; + long *CoeffModP = info->CoeffModP.elts(); + double *x = info->x.elts(); + long *u = info->u.elts(); + mulmod_precon_t *uqinv = info->uqinv.elts(); + long MinusMModP = info->MinusMModP; + mulmod_precon_t MinusMModPpinv = info->MinusMModPpinv; + mulmod_precon_t *CoeffModPpinv = info->CoeffModPpinv.elts(); + + long q, s, t; + long i; + double y; + + y = double(0L); + t = 0; + + for (i = 0; i < n; i++) { + s = MulModPrecon(a[i], u[i], GetFFTPrime(i), uqinv[i]); + y = y + double(s)*GetFFTPrimeRecip(i); + + + // DIRT: uses undocumented MulMod feature (see sp_arith.h) + // input s is not reduced mod p + s = MulModPrecon(s, CoeffModP[i], p, CoeffModPpinv[i]); + + t = AddMod(t, s, p); + } + + q = (long) (y + 0.5); + + // DIRT: uses undocumented MulMod feature (see sp_arith.h) + // input q may not be reduced mod p + s = MulModPrecon(q, MinusMModP, p, MinusMModPpinv); + + t = AddMod(t, s, p); + res.LoopHole() = t; + +} + + +#if 0 +// converts entries lo..lo+cnt-1 in R and stores results into res +static +void FromModularRep(zz_p* res, const fftRep& R, long lo, long cnt, + zz_pInfoT* info) +{ + if (cnt <= 0) return; + + long nprimes = info->NumPrimes; + long p = info->p; + mulmod_t pinv = info->pinv; + long *CoeffModP = info->CoeffModP.elts(); + double *x = info->x.elts(); + long *u = info->u.elts(); + mulmod_precon_t *uqinv = info->uqinv.elts(); + long MinusMModP = info->MinusMModP; + mulmod_precon_t MinusMModPpinv = info->MinusMModPpinv; + mulmod_precon_t *CoeffModPpinv = info->CoeffModPpinv.elts(); + + long primes[4]; + double prime_recip[4]; + long *tbl[4]; + + long q, s, t; + long i, j; + double y; + + for (i = 0; i < nprimes; i++) { + primes[i] = GetFFTPrime(i); + prime_recip[i] = GetFFTPrimeRecip(i); + tbl[i] = R.tbl[i].get(); + } + + for (j = 0; j < cnt; j++) { + y = double(0L); + t = 0; + + for (i = 0; i < nprimes; i++) { + s = MulModPrecon(tbl[i][j+lo], u[i], primes[i], uqinv[i]); + y = y + double(s)*prime_recip[i]; + + + // DIRT: uses undocumented MulMod feature (see sp_arith.h) + // input s is not reduced mod p + s = MulModPrecon(s, CoeffModP[i], p, CoeffModPpinv[i]); + + t = AddMod(t, s, p); + } + + q = (long) (y + 0.5); + + // DIRT: uses undocumented MulMod feature (see sp_arith.h) + // input q may not be reduced mod p + s = MulModPrecon(q, MinusMModP, p, MinusMModPpinv); + + t = AddMod(t, s, p); + res[j].LoopHole() = t; + } + +} +#else + +#define NTL_FMR_LOOP_BODY(i) \ + s = MulModPrecon(tbl[i][j+lo], u[i], primes[i], uqinv[i]);\ + y = y + double(s)*prime_recip[i];\ +\ +\ + /* DIRT: uses undocumented MulMod feature (see sp_arith.h) */\ + /* input s is not reduced mod p */\ + s = MulModPrecon(s, CoeffModP[i], p, CoeffModPpinv[i]);\ +\ + t = AddMod(t, s, p);\ + + +#define NTL_FMP_OUTER_LOOP(XXX) \ + for (j = 0; j < cnt; j++) {\ + y = double(0L);\ + t = 0;\ + XXX \ + q = (long) (y + 0.5);\ + /* DIRT: uses undocumented MulMod feature (see sp_arith.h) */\ + /* input q may not be reduced mod p */\ + s = MulModPrecon(q, MinusMModP, p, MinusMModPpinv);\ + t = AddMod(t, s, p);\ + res[j].LoopHole() = t;\ + }\ + + + +// converts entries lo..lo+cnt-1 in R and stores results into res +static +void FromModularRep(zz_p* res, const fftRep& R, long lo, long cnt, + zz_pInfoT* info) +{ + if (cnt <= 0) return; + + long nprimes = info->NumPrimes; + long p = info->p; + mulmod_t pinv = info->pinv; + long *CoeffModP = info->CoeffModP.elts(); + double *x = info->x.elts(); + long *u = info->u.elts(); + mulmod_precon_t *uqinv = info->uqinv.elts(); + long MinusMModP = info->MinusMModP; + mulmod_precon_t MinusMModPpinv = info->MinusMModPpinv; + mulmod_precon_t *CoeffModPpinv = info->CoeffModPpinv.elts(); + + long primes[4]; + double prime_recip[4]; + long *tbl[4]; + + long q, s, t; + long i, j; + double y; + + for (i = 0; i < nprimes; i++) { + primes[i] = GetFFTPrime(i); + prime_recip[i] = GetFFTPrimeRecip(i); + tbl[i] = R.tbl[i].get(); + } + + if (nprimes == 1) { + long *tbl_0 = tbl[0]; + mulmod_precon_t CoeffModPpinv_0 = CoeffModPpinv[0]; + long primes_0 = primes[0]; + long hp0 = primes_0 >> 1; + + for (j = 0; j < cnt; j++) { + s = tbl_0[j+lo]; + + // DIRT: uses undocumented MulMod feature (see sp_arith.h) + // input s is not reduced mod p + t = MulModPrecon(s, 1, p, CoeffModPpinv_0); + + res[j].LoopHole() = AddMod(t, sp_SignMask(hp0-s) & MinusMModP, p); + } + } + else if (nprimes == 2) { + NTL_FMP_OUTER_LOOP( NTL_FMR_LOOP_BODY(0) NTL_FMR_LOOP_BODY(1) ) + } + else if (nprimes == 3) { + NTL_FMP_OUTER_LOOP( NTL_FMR_LOOP_BODY(0) NTL_FMR_LOOP_BODY(1) NTL_FMR_LOOP_BODY(2) ) + } + else { // nprimes == 4 + NTL_FMP_OUTER_LOOP( NTL_FMR_LOOP_BODY(0) NTL_FMR_LOOP_BODY(1) NTL_FMR_LOOP_BODY(2) NTL_FMR_LOOP_BODY(3) ) + } +} + + + + +#endif + + + + +void TofftRep(fftRep& y, const zz_pX& x, long k, long lo, long hi) +// computes an n = 2^k point convolution. +// if deg(x) >= 2^k, then x is first reduced modulo X^n-1. +{ + zz_pInfoT *info = zz_pInfo; + long p = info->p; + + long n, i, j, m, j1; + long accum; + long nprimes = info->NumPrimes; + + + if (k > info->MaxRoot) + ResourceError("Polynomial too big for FFT"); + + if (lo < 0) + LogicError("bad arg to TofftRep"); + + hi = min(hi, deg(x)); + + y.SetSize(k); + + n = 1L << k; + + m = max(hi-lo + 1, 0); + + const zz_p *xx = x.rep.elts(); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + if (n >= m) { + long *yp = &y.tbl[0][0]; + for (j = 0; j < m; j++) { + yp[j] = rep(xx[j+lo]); + } + for (j = m; j < n; j++) { + yp[j] = 0; + } + } + else { + for (j = 0; j < n; j++) { + accum = rep(xx[j+lo]); + for (j1 = j + n; j1 < m; j1 += n) + accum = AddMod(accum, rep(xx[j1+lo]), p); + y.tbl[0][j] = accum; + } + } + } + else { + if (n >= m) { + for (i = 0; i < nprimes; i++) { + long q = GetFFTPrime(i); + long *yp = &y.tbl[i][0]; + for (j = 0; j < m; j++) { + long t = rep(xx[j+lo]); + t = sp_CorrectExcess(t, q); + yp[j] = t; + } + for (j = m; j < n; j++) { + yp[j] = 0; + } + } + } + else { + for (j = 0; j < n; j++) { + accum = rep(xx[j+lo]); + for (j1 = j + n; j1 < m; j1 += n) + accum = AddMod(accum, rep(xx[j1+lo]), p); + for (i = 0; i < nprimes; i++) { + long q = GetFFTPrime(i); + long t = accum; + t = sp_CorrectExcess(t, q); + y.tbl[i][j] = t; + } + } + } + } + + + if (p_info) { + long *yp = &y.tbl[0][0]; + FFTFwd(yp, yp, k, *p_info); + } + else { + for (i = 0; i < nprimes; i++) { + long *yp = &y.tbl[i][0]; + FFTFwd(yp, yp, k, i); + } + } +} + + + +void RevTofftRep(fftRep& y, const vec_zz_p& x, + long k, long lo, long hi, long offset) +// computes an n = 2^k point convolution of X^offset*x[lo..hi] mod X^n-1 +// using "inverted" evaluation points. + +{ + zz_pInfoT *info = zz_pInfo; + long p = info->p; + + long n, i, j, m, j1; + long accum; + long NumPrimes = info->NumPrimes; + + if (k > info->MaxRoot) + ResourceError("Polynomial too big for FFT"); + + if (lo < 0) + LogicError("bad arg to TofftRep"); + + hi = min(hi, x.length()-1); + + y.SetSize(k); + + n = 1L << k; + + m = max(hi-lo + 1, 0); + + const zz_p *xx = x.elts(); + + FFTPrimeInfo *p_info = info->p_info; + + offset = offset & (n-1); + + if (p_info) { + for (j = 0; j < n; j++) { + if (j >= m) { + y.tbl[0][offset] = 0; + } + else { + accum = rep(xx[j+lo]); + for (j1 = j + n; j1 < m; j1 += n) + accum = AddMod(accum, rep(xx[j1+lo]), p); + y.tbl[0][offset] = accum; + } + offset = (offset + 1) & (n-1); + } + } + else { + for (j = 0; j < n; j++) { + if (j >= m) { + for (i = 0; i < NumPrimes; i++) + y.tbl[i][offset] = 0; + } + else { + accum = rep(xx[j+lo]); + for (j1 = j + n; j1 < m; j1 += n) + accum = AddMod(accum, rep(xx[j1+lo]), p); + for (i = 0; i < NumPrimes; i++) { + long q = GetFFTPrime(i); + long t = accum; + t = sp_CorrectExcess(t, q); + y.tbl[i][offset] = t; + } + } + offset = (offset + 1) & (n-1); + } + } + + + if (p_info) { + long *yp = &y.tbl[0][0]; + FFTRev1(yp, yp, k, *p_info); + } + else { + for (i = 0; i < info->NumPrimes; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + } +} + +void FromfftRep(zz_pX& x, fftRep& y, long lo, long hi) + + // converts from FFT-representation to coefficient representation + // only the coefficients lo..hi are computed + + +{ + zz_pInfoT *info = zz_pInfo; + + long k, n, i, j, l; + long NumPrimes = info->NumPrimes; + + + k = y.k; + n = (1L << k); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + long *yp = &y.tbl[0][0]; + FFTRev1(yp, yp, k, *p_info); + } + else { + for (i = 0; i < NumPrimes; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + } + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.rep.SetLength(l); + + if (p_info) { + zz_p *xp = x.rep.elts(); + long *yp = &y.tbl[0][0]; + for (j = 0; j < l; j++) + xp[j].LoopHole() = yp[j+lo]; + } + else { + FromModularRep(x.rep.elts(), y, lo, l, info); + } + + x.normalize(); +} + +void RevFromfftRep(vec_zz_p& x, fftRep& y, long lo, long hi) + + // converts from FFT-representation to coefficient representation + // using "inverted" evaluation points. + // only the coefficients lo..hi are computed + + +{ + zz_pInfoT *info = zz_pInfo; + + long k, n, i, j, l; + long NumPrimes = info->NumPrimes; + + + k = y.k; + n = (1L << k); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + long *yp = &y.tbl[0][0]; + FFTFwd(yp, yp, k, *p_info); + } + else { + for (i = 0; i < NumPrimes; i++) { + long *yp = &y.tbl[i][0]; + FFTFwd(yp, yp, k, i); + } + } + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.SetLength(l); + + if (p_info) { + zz_p *xp = x.elts(); + long *yp = &y.tbl[0][0]; + for (j = 0; j < l; j++) + xp[j].LoopHole() = yp[j+lo]; + } + else { + FromModularRep(x.elts(), y, lo, l, info); + } +} + +void NDFromfftRep(zz_pX& x, const fftRep& y, long lo, long hi, fftRep& z) +{ + zz_pInfoT *info = zz_pInfo; + + long k, n, i, j, l; + long NumPrimes = info->NumPrimes; + + + k = y.k; + n = (1L << k); + + z.SetSize(k); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + long *zp = &z.tbl[0][0]; + const long *yp = &y.tbl[0][0]; + FFTRev1(zp, yp, k, *p_info); + } + else { + for (i = 0; i < NumPrimes; i++) { + long *zp = &z.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + FFTRev1(zp, yp, k, i); + } + } + + hi = min(hi, n-1); + l = hi-lo+1; + l = max(l, 0); + x.rep.SetLength(l); + + if (p_info) { + zz_p *xp = x.rep.elts(); + long *zp = &z.tbl[0][0]; + for (j = 0; j < l; j++) + xp[j].LoopHole() = zp[j+lo]; + } + else { + FromModularRep(x.rep.elts(), z, lo, l, info); + } + + x.normalize(); +} + +void NDFromfftRep(zz_pX& x, fftRep& y, long lo, long hi) +{ + fftRep z; + NDFromfftRep(x, y, lo, hi, z); +} + +void FromfftRep(zz_p* x, fftRep& y, long lo, long hi) + + // converts from FFT-representation to coefficient representation + // only the coefficients lo..hi are computed + + +{ + zz_pInfoT *info = zz_pInfo; + + long k, n, i, j; + long NumPrimes = info->NumPrimes; + + + k = y.k; + n = (1L << k); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + long *yp = &y.tbl[0][0]; + FFTRev1(yp, yp, k, *p_info); + + for (j = lo; j <= hi; j++) { + if (j >= n) + clear(x[j-lo]); + else { + x[j-lo].LoopHole() = y.tbl[0][j]; + } + } + } + else { + for (i = 0; i < NumPrimes; i++) { + long *yp = &y.tbl[i][0]; + FFTRev1(yp, yp, k, i); + } + + // take coefficients lo..min(hi, n-1) from y + // zero out coefficients max(n, lo)..hi + + long l = min(hi, n-1) - lo + 1; + l = max(l, 0); + FromModularRep(x, y, lo, l, info); + for (j = max(n, lo); j <= hi; j++) clear(x[j-lo]); + } +} + + +void mul(fftRep& z, const fftRep& x, const fftRep& y) +{ + zz_pInfoT *info = zz_pInfo; + + long k, n, i, j; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + long *zp = &z.tbl[0][0]; + const long *xp = &x.tbl[0][0]; + const long *yp = &y.tbl[0][0]; + long q = p_info->q; + mulmod_t qinv = p_info->qinv; + + if (NormalizedModulus(qinv)) { + for (j = 0; j < n; j++) + zp[j] = NormalizedMulMod(xp[j], yp[j], q, qinv); + } + else { + for (j = 0; j < n; j++) + zp[j] = MulMod(xp[j], yp[j], q, qinv); + } + } + else { + for (i = 0; i < info->NumPrimes; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + mulmod_t qinv = GetFFTPrimeInv(i); + + for (j = 0; j < n; j++) + zp[j] = NormalizedMulMod(xp[j], yp[j], q, qinv); + } + } +} + +void sub(fftRep& z, const fftRep& x, const fftRep& y) +{ + zz_pInfoT *info = zz_pInfo; + + long k, n, i, j; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + long *zp = &z.tbl[0][0]; + const long *xp = &x.tbl[0][0]; + const long *yp = &y.tbl[0][0]; + long q = p_info->q; + + for (j = 0; j < n; j++) + zp[j] = SubMod(xp[j], yp[j], q); + } + else { + for (i = 0; i < info->NumPrimes; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + + for (j = 0; j < n; j++) + zp[j] = SubMod(xp[j], yp[j], q); + } + } +} + +void add(fftRep& z, const fftRep& x, const fftRep& y) +{ + zz_pInfoT *info = zz_pInfo; + + long k, n, i, j; + + if (x.k != y.k) LogicError("FFT rep mismatch"); + + k = x.k; + n = 1L << k; + + z.SetSize(k); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + long *zp = &z.tbl[0][0]; + const long *xp = &x.tbl[0][0]; + const long *yp = &y.tbl[0][0]; + long q = p_info->q; + + for (j = 0; j < n; j++) + zp[j] = AddMod(xp[j], yp[j], q); + } + else { + for (i = 0; i < info->NumPrimes; i++) { + long *zp = &z.tbl[i][0]; + const long *xp = &x.tbl[i][0]; + const long *yp = &y.tbl[i][0]; + long q = GetFFTPrime(i); + + for (j = 0; j < n; j++) + zp[j] = AddMod(xp[j], yp[j], q); + } + } +} + + +void reduce(fftRep& x, const fftRep& a, long k) + // reduces a 2^l point FFT-rep to a 2^k point FFT-rep + // input may alias output +{ + zz_pInfoT *info = zz_pInfo; + + long i, j, l, n; + long* xp; + const long* ap; + + l = a.k; + n = 1L << k; + + if (l < k) LogicError("reduce: bad operands"); + + x.SetSize(k); + + for (i = 0; i < info->NumPrimes; i++) { + ap = &a.tbl[i][0]; + xp = &x.tbl[i][0]; + for (j = 0; j < n; j++) + xp[j] = ap[j << (l-k)]; + } +} + +void AddExpand(fftRep& x, const fftRep& a) +// x = x + (an "expanded" version of a) +{ + zz_pInfoT *info = zz_pInfo; + + long i, j, l, k, n; + + l = x.k; + k = a.k; + n = 1L << k; + + if (l < k) LogicError("AddExpand: bad args"); + + FFTPrimeInfo *p_info = info->p_info; + + if (p_info) { + long q = p_info->q; + const long *ap = &a.tbl[0][0]; + long *xp = &x.tbl[0][0]; + for (j = 0; j < n; j++) { + long j1 = j << (l-k); + xp[j1] = AddMod(xp[j1], ap[j], q); + } + } + else { + for (i = 0; i < info->NumPrimes; i++) { + long q = GetFFTPrime(i); + const long *ap = &a.tbl[i][0]; + long *xp = &x.tbl[i][0]; + for (j = 0; j < n; j++) { + long j1 = j << (l-k); + xp[j1] = AddMod(xp[j1], ap[j], q); + } + } + } +} + + + +void FFTMul(zz_pX& x, const zz_pX& a, const zz_pX& b) +{ + long k, d; + + if (IsZero(a) || IsZero(b)) { + clear(x); + return; + } + + d = deg(a) + deg(b); + k = NextPowerOfTwo(d+1); + + fftRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); + + TofftRep(R1, a, k); + TofftRep(R2, b, k); + mul(R1, R1, R2); + FromfftRep(x, R1, 0, d); +} + +void FFTSqr(zz_pX& x, const zz_pX& a) +{ + long k, d; + + if (IsZero(a)) { + clear(x); + return; + } + + d = 2*deg(a); + k = NextPowerOfTwo(d+1); + + fftRep R1(INIT_SIZE, k); + + TofftRep(R1, a, k); + mul(R1, R1, R1); + FromfftRep(x, R1, 0, d); +} + + +void CopyReverse(zz_pX& x, const zz_pX& a, long lo, long hi) + + // x[0..hi-lo] = reverse(a[lo..hi]), with zero fill + // input may not alias output + +{ + long i, j, n, m; + + n = hi-lo+1; + m = a.rep.length(); + + x.rep.SetLength(n); + + const zz_p* ap = a.rep.elts(); + zz_p* xp = x.rep.elts(); + + for (i = 0; i < n; i++) { + j = hi-i; + if (j < 0 || j >= m) + clear(xp[i]); + else + xp[i] = ap[j]; + } + + x.normalize(); +} + +void copy(zz_pX& x, const zz_pX& a, long lo, long hi) + + // x[0..hi-lo] = a[lo..hi], with zero fill + // input may not alias output + +{ + long i, j, n, m; + + n = hi-lo+1; + m = a.rep.length(); + + x.rep.SetLength(n); + + const zz_p* ap = a.rep.elts(); + zz_p* xp = x.rep.elts(); + + for (i = 0; i < n; i++) { + j = lo + i; + if (j < 0 || j >= m) + clear(xp[i]); + else + xp[i] = ap[j]; + } + + x.normalize(); +} + + +void rem21(zz_pX& x, const zz_pX& a, const zz_pXModulus& F) +{ + long i, da, ds, n, kk; + + da = deg(a); + n = F.n; + + if (da > 2*n-2) + LogicError("bad args to rem(zz_pX,zz_pX,zz_pXModulus)"); + + + if (da < n) { + x = a; + return; + } + + if (!F.UseFFT || da - n <= NTL_zz_pX_MOD_CROSSOVER) { + PlainRem(x, a, F.f); + return; + } + + fftRep R1(INIT_SIZE, F.l); + zz_pX P1(INIT_SIZE, n); + + TofftRep(R1, a, F.l, n, 2*(n-1)); + mul(R1, R1, F.HRep); + FromfftRep(P1, R1, n-2, 2*n-4); + + TofftRep(R1, P1, F.k); + mul(R1, R1, F.FRep); + FromfftRep(P1, R1, 0, n-1); + + ds = deg(P1); + + kk = 1L << F.k; + + x.rep.SetLength(n); + const zz_p* aa = a.rep.elts(); + const zz_p* ss = P1.rep.elts(); + zz_p* xx = x.rep.elts(); + + for (i = 0; i < n; i++) { + if (i <= ds) + sub(xx[i], aa[i], ss[i]); + else + xx[i] = aa[i]; + + if (i + kk <= da) + add(xx[i], xx[i], aa[i+kk]); + } + + x.normalize(); +} + + +void DivRem21(zz_pX& q, zz_pX& x, const zz_pX& a, const zz_pXModulus& F) +{ + long i, da, ds, n, kk; + + da = deg(a); + n = F.n; + + if (da > 2*n-2) + LogicError("bad args to rem(zz_pX,zz_pX,zz_pXModulus)"); + + + if (da < n) { + x = a; + clear(q); + return; + } + + if (!F.UseFFT || da - n <= NTL_zz_pX_MOD_CROSSOVER) { + PlainDivRem(q, x, a, F.f); + return; + } + + fftRep R1(INIT_SIZE, F.l); + zz_pX P1(INIT_SIZE, n), qq; + + TofftRep(R1, a, F.l, n, 2*(n-1)); + mul(R1, R1, F.HRep); + FromfftRep(P1, R1, n-2, 2*n-4); + qq = P1; + + TofftRep(R1, P1, F.k); + mul(R1, R1, F.FRep); + FromfftRep(P1, R1, 0, n-1); + + ds = deg(P1); + + kk = 1L << F.k; + + x.rep.SetLength(n); + const zz_p* aa = a.rep.elts(); + const zz_p* ss = P1.rep.elts(); + zz_p* xx = x.rep.elts(); + + for (i = 0; i < n; i++) { + if (i <= ds) + sub(xx[i], aa[i], ss[i]); + else + xx[i] = aa[i]; + + if (i + kk <= da) + add(xx[i], xx[i], aa[i+kk]); + } + + x.normalize(); + q = qq; +} + +void div21(zz_pX& x, const zz_pX& a, const zz_pXModulus& F) +{ + long da, n; + + da = deg(a); + n = F.n; + + if (da > 2*n-2) + LogicError("bad args to rem(zz_pX,zz_pX,zz_pXModulus)"); + + + if (da < n) { + clear(x); + return; + } + + if (!F.UseFFT || da - n <= NTL_zz_pX_MOD_CROSSOVER) { + PlainDiv(x, a, F.f); + return; + } + + fftRep R1(INIT_SIZE, F.l); + zz_pX P1(INIT_SIZE, n); + + TofftRep(R1, a, F.l, n, 2*(n-1)); + mul(R1, R1, F.HRep); + FromfftRep(x, R1, n-2, 2*n-4); +} + + +void rem(zz_pX& x, const zz_pX& a, const zz_pXModulus& F) +{ + long da = deg(a); + long n = F.n; + + if (n < 0) LogicError("rem: uninitialized modulus"); + + if (da <= 2*n-2) { + rem21(x, a, F); + return; + } + else if (!F.UseFFT || da-n <= NTL_zz_pX_MOD_CROSSOVER) { + PlainRem(x, a, F.f); + return; + } + + zz_pX buf(INIT_SIZE, 2*n-1); + + long a_len = da+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + rem21(buf, buf, F); + + a_len -= amt; + } + + x = buf; +} + +void DivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pXModulus& F) +{ + long da = deg(a); + long n = F.n; + + if (n < 0) LogicError("DivRem: uninitialized modulus"); + + if (da <= 2*n-2) { + DivRem21(q, r, a, F); + return; + } + else if (!F.UseFFT || da-n <= NTL_zz_pX_MOD_CROSSOVER) { + PlainDivRem(q, r, a, F.f); + return; + } + + zz_pX buf(INIT_SIZE, 2*n-1); + zz_pX qbuf(INIT_SIZE, n-1); + + zz_pX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + DivRem21(qbuf, buf, buf, F); + long dl = qbuf.rep.length(); + a_len = a_len - amt; + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + r = buf; + + qq.normalize(); + q = qq; +} + +void div(zz_pX& q, const zz_pX& a, const zz_pXModulus& F) +{ + long da = deg(a); + long n = F.n; + + if (n < 0) LogicError("div: uninitialized modulus"); + + if (da <= 2*n-2) { + div21(q, a, F); + return; + } + else if (!F.UseFFT || da-n <= NTL_zz_pX_MOD_CROSSOVER) { + PlainDiv(q, a, F.f); + return; + } + + zz_pX buf(INIT_SIZE, 2*n-1); + zz_pX qbuf(INIT_SIZE, n-1); + + zz_pX qq; + qq.rep.SetLength(da-n+1); + + long a_len = da+1; + long q_hi = da-n+1; + + while (a_len > 0) { + long old_buf_len = buf.rep.length(); + long amt = min(2*n-1-old_buf_len, a_len); + + buf.rep.SetLength(old_buf_len+amt); + + long i; + + for (i = old_buf_len+amt-1; i >= amt; i--) + buf.rep[i] = buf.rep[i-amt]; + + for (i = amt-1; i >= 0; i--) + buf.rep[i] = a.rep[a_len-amt+i]; + + buf.normalize(); + + a_len = a_len - amt; + if (a_len > 0) + DivRem21(qbuf, buf, buf, F); + else + div21(qbuf, buf, F); + + long dl = qbuf.rep.length(); + for(i = 0; i < dl; i++) + qq.rep[a_len+i] = qbuf.rep[i]; + for(i = dl+a_len; i < q_hi; i++) + clear(qq.rep[i]); + q_hi = a_len; + } + + qq.normalize(); + q = qq; +} + + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pX& b, const zz_pXModulus& F) +{ + long da, db, d, n, k; + + da = deg(a); + db = deg(b); + n = F.n; + + if (n < 0) LogicError("MulMod: uninitialized modulus"); + + if (da >= n || db >= n) + LogicError("bad args to MulMod(zz_pX,zz_pX,zz_pX,zz_pXModulus)"); + + if (da < 0 || db < 0) { + clear(x); + return; + } + + if (!F.UseFFT || da <= NTL_zz_pX_MUL_CROSSOVER || db <= NTL_zz_pX_MUL_CROSSOVER) { + zz_pX P1; + mul(P1, a, b); + rem(x, P1, F); + return; + } + + d = da + db + 1; + + k = NextPowerOfTwo(d); + k = max(k, F.k); + + fftRep R1(INIT_SIZE, k), R2(INIT_SIZE, F.l); + zz_pX P1(INIT_SIZE, n); + + TofftRep(R1, a, k); + TofftRep(R2, b, k); + + mul(R1, R1, R2); + + NDFromfftRep(P1, R1, n, d-1, R2); // save R1 for future use + + TofftRep(R2, P1, F.l); + mul(R2, R2, F.HRep); + FromfftRep(P1, R2, n-2, 2*n-4); + + TofftRep(R2, P1, F.k); + mul(R2, R2, F.FRep); + reduce(R1, R1, F.k); + sub(R1, R1, R2); + FromfftRep(x, R1, 0, n-1); +} + +void SqrMod(zz_pX& x, const zz_pX& a, const zz_pXModulus& F) +{ + long da, d, n, k; + + da = deg(a); + n = F.n; + + if (n < 0) LogicError("SqrMod: uninitialized modulus"); + + if (da >= n) + LogicError("bad args to SqrMod(zz_pX,zz_pX,zz_pXModulus)"); + + if (!F.UseFFT || da <= NTL_zz_pX_MUL_CROSSOVER) { + zz_pX P1; + sqr(P1, a); + rem(x, P1, F); + return; + } + + + d = 2*da + 1; + + k = NextPowerOfTwo(d); + k = max(k, F.k); + + fftRep R1(INIT_SIZE, k), R2(INIT_SIZE, F.l); + zz_pX P1(INIT_SIZE, n); + + TofftRep(R1, a, k); + mul(R1, R1, R1); + NDFromfftRep(P1, R1, n, d-1, R2); // save R1 for future use + + TofftRep(R2, P1, F.l); + mul(R2, R2, F.HRep); + FromfftRep(P1, R2, n-2, 2*n-4); + + TofftRep(R2, P1, F.k); + mul(R2, R2, F.FRep); + reduce(R1, R1, F.k); + sub(R1, R1, R2); + FromfftRep(x, R1, 0, n-1); +} + +void PlainInvTrunc(zz_pX& x, const zz_pX& a, long m) + + /* x = (1/a) % X^m, input not output, constant term a is nonzero */ + +{ + long i, k, n, lb; + zz_p v, t; + zz_p s; + const zz_p* ap; + zz_p* xp; + + + n = deg(a); + + if (n < 0) ArithmeticError("division by zero"); + + inv(s, ConstTerm(a)); + + if (n == 0) { + conv(x, s); + return; + } + + ap = a.rep.elts(); + x.rep.SetLength(m); + xp = x.rep.elts(); + + xp[0] = s; + + long is_one = IsOne(s); + + for (k = 1; k < m; k++) { + clear(v); + lb = max(k-n, 0); + for (i = lb; i <= k-1; i++) { + mul(t, xp[i], ap[k-i]); + add(v, v, t); + } + xp[k] = v; + negate(xp[k], xp[k]); + if (!is_one) mul(xp[k], xp[k], s); + } + + x.normalize(); +} + + +void trunc(zz_pX& x, const zz_pX& a, long m) + +// x = a % X^m, output may alias input + +{ + if (m < 0) LogicError("trunc: bad args"); + + if (&x == &a) { + if (x.rep.length() > m) { + x.rep.SetLength(m); + x.normalize(); + } + } + else { + long n; + long i; + zz_p* xp; + const zz_p* ap; + + n = min(a.rep.length(), m); + x.rep.SetLength(n); + + xp = x.rep.elts(); + ap = a.rep.elts(); + + for (i = 0; i < n; i++) xp[i] = ap[i]; + + x.normalize(); + } +} + +void CyclicReduce(zz_pX& x, const zz_pX& a, long m) + +// computes x = a mod X^m-1 + +{ + long n = deg(a); + long i, j; + long accum; + long p = zz_p::modulus(); + + if (n < m) { + x = a; + return; + } + + if (&x != &a) + x.rep.SetLength(m); + + for (i = 0; i < m; i++) { + accum = rep(a.rep[i]); + for (j = i + m; j <= n; j += m) + accum = AddMod(accum, rep(a.rep[j]), p); + x.rep[i].LoopHole() = accum; + } + + if (&x == &a) + x.rep.SetLength(m); + + x.normalize(); +} + + + +void InvTrunc(zz_pX& x, const zz_pX& a, long m) +{ + if (m < 0) LogicError("InvTrunc: bad args"); + if (m == 0) { + clear(x); + return; + } + + if (NTL_OVERFLOW(m, 1, 0)) + ResourceError("overflow in InvTrunc"); + + if (&x == &a) { + zz_pX la; + la = a; + if (m > NTL_zz_pX_NEWTON_CROSSOVER && deg(a) > 0) + NewtonInvTrunc(x, la, m); + else + PlainInvTrunc(x, la, m); + } + else { + if (m > NTL_zz_pX_NEWTON_CROSSOVER && deg(a) > 0) + NewtonInvTrunc(x, a, m); + else + PlainInvTrunc(x, a, m); + } +} + + + +void build(zz_pXModulus& x, const zz_pX& f) +{ + x.f = f; + x.n = deg(f); + + x.tracevec.make(); + + if (x.n <= 0) + LogicError("build: deg(f) must be at least 1"); + + if (x.n <= NTL_zz_pX_MOD_CROSSOVER + 1) { + x.UseFFT = 0; + return; + } + + x.UseFFT = 1; + + x.k = NextPowerOfTwo(x.n); + x.l = NextPowerOfTwo(2*x.n - 3); + TofftRep(x.FRep, f, x.k); + + zz_pX P1(INIT_SIZE, x.n+1), P2(INIT_SIZE, x.n); + + CopyReverse(P1, f, 0, x.n); + InvTrunc(P2, P1, x.n-1); + + CopyReverse(P1, P2, 0, x.n-2); + TofftRep(x.HRep, P1, x.l); +} + +zz_pXModulus::zz_pXModulus(const zz_pX& ff) +{ + build(*this, ff); +} + +zz_pXMultiplier::zz_pXMultiplier(const zz_pX& b, const zz_pXModulus& F) +{ + build(*this, b, F); +} + + + +void build(zz_pXMultiplier& x, const zz_pX& b, + const zz_pXModulus& F) +{ + long db; + long n = F.n; + + if (n < 0) LogicError("build zz_pXMultiplier: uninitialized modulus"); + + x.b = b; + db = deg(b); + + if (db >= n) LogicError("build zz_pXMultiplier: deg(b) >= deg(f)"); + + if (!F.UseFFT || db <= NTL_zz_pX_MOD_CROSSOVER) { + x.UseFFT = 0; + return; + } + + x.UseFFT = 1; + + fftRep R1(INIT_SIZE, F.l); + zz_pX P1(INIT_SIZE, n); + + + TofftRep(R1, b, F.l); + reduce(x.B2, R1, F.k); + mul(R1, R1, F.HRep); + FromfftRep(P1, R1, n-1, 2*n-3); + TofftRep(x.B1, P1, F.l); +} + + +void MulMod(zz_pX& x, const zz_pX& a, const zz_pXMultiplier& B, + const zz_pXModulus& F) +{ + + long n = F.n; + long da; + + da = deg(a); + + if (da >= n) + LogicError(" bad args to MulMod(zz_pX,zz_pX,zz_pXMultiplier,zz_pXModulus)"); + + if (da < 0) { + clear(x); + return; + } + + if (!B.UseFFT || !F.UseFFT || da <= NTL_zz_pX_MOD_CROSSOVER) { + zz_pX P1; + mul(P1, a, B.b); + rem(x, P1, F); + return; + } + + zz_pX P1(INIT_SIZE, n), P2(INIT_SIZE, n); + fftRep R1(INIT_SIZE, F.l), R2(INIT_SIZE, F.l); + + TofftRep(R1, a, F.l); + mul(R2, R1, B.B1); + FromfftRep(P1, R2, n-1, 2*n-3); + + reduce(R1, R1, F.k); + mul(R1, R1, B.B2); + TofftRep(R2, P1, F.k); + mul(R2, R2, F.FRep); + sub(R1, R1, R2); + + FromfftRep(x, R1, 0, n-1); +} + + +void PowerXMod(zz_pX& hh, const ZZ& e, const zz_pXModulus& F) +{ + if (F.n < 0) LogicError("PowerXMod: uninitialized modulus"); + + if (IsZero(e)) { + set(hh); + return; + } + + long n = NumBits(e); + long i; + + zz_pX h; + + h.SetMaxLength(F.n); + set(h); + + for (i = n - 1; i >= 0; i--) { + SqrMod(h, h, F); + if (bit(e, i)) + MulByXMod(h, h, F.f); + } + + if (e < 0) InvMod(h, h, F); + + hh = h; +} + + + +void PowerXPlusAMod(zz_pX& hh, zz_p a, const ZZ& e, const zz_pXModulus& F) +{ + if (F.n < 0) LogicError("PowerXPlusAMod: uninitialized modulus"); + + if (IsZero(e)) { + set(hh); + return; + } + + zz_pX t1(INIT_SIZE, F.n), t2(INIT_SIZE, F.n); + long n = NumBits(e); + long i; + + zz_pX h; + + h.SetMaxLength(F.n); + set(h); + + for (i = n - 1; i >= 0; i--) { + SqrMod(h, h, F); + if (bit(e, i)) { + MulByXMod(t1, h, F.f); + mul(t2, h, a); + add(h, t1, t2); + } + } + + if (e < 0) InvMod(h, h, F); + + hh = h; +} + + + +void PowerMod(zz_pX& h, const zz_pX& g, const ZZ& e, const zz_pXModulus& F) +{ + if (deg(g) >= F.n) LogicError("PowerMod: bad args"); + + if (IsZero(e)) { + set(h); + return; + } + + zz_pXMultiplier G; + + zz_pX res; + + long n = NumBits(e); + long i; + + build(G, g, F); + + res.SetMaxLength(F.n); + set(res); + + for (i = n - 1; i >= 0; i--) { + SqrMod(res, res, F); + if (bit(e, i)) + MulMod(res, res, G, F); + } + + if (e < 0) InvMod(res, res, F); + + h = res; +} + + +void NewtonInvTrunc(zz_pX& x, const zz_pX& a, long m) +{ + x.SetMaxLength(m); + + long i; + long t; + + + t = NextPowerOfTwo(2*m-1); + + fftRep R1(INIT_SIZE, t), R2(INIT_SIZE, t); + zz_pX P1(INIT_SIZE, m); + + long log2_newton = NextPowerOfTwo(NTL_zz_pX_NEWTON_CROSSOVER)-1; + + PlainInvTrunc(x, a, 1L << log2_newton); + long k = 1L << log2_newton; + long a_len = min(m, a.rep.length()); + + while (k < m) { + long l = min(2*k, m); + + t = NextPowerOfTwo(2*k); + TofftRep(R1, x, t); + mul(R1, R1, R1); + FromfftRep(P1, R1, 0, l-1); + + t = NextPowerOfTwo(deg(P1) + min(l, a_len)); + TofftRep(R1, P1, t); + TofftRep(R2, a, t, 0, min(l, a_len)-1); + mul(R1, R1, R2); + FromfftRep(P1, R1, k, l-1); + + x.rep.SetLength(l); + long y_len = P1.rep.length(); + for (i = k; i < l; i++) { + if (i-k >= y_len) + clear(x.rep[i]); + else + negate(x.rep[i], P1.rep[i-k]); + } + x.normalize(); + + k = l; + } +} + + + + +void FFTDivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pX& b) +{ + long n = deg(b); + long m = deg(a); + long k, l; + + if (m < n) { + clear(q); + r = a; + return; + } + + if (m >= 3*n) { + zz_pXModulus B; + build(B, b); + DivRem(q, r, a, B); + return; + } + + zz_pX P1, P2, P3; + + CopyReverse(P3, b, 0, n); + InvTrunc(P2, P3, m-n+1); + CopyReverse(P1, P2, 0, m-n); + + k = NextPowerOfTwo(2*(m-n)+1); + long k1 = NextPowerOfTwo(n); + long mx = max(k1, k); + + fftRep R1(INIT_SIZE, mx), R2(INIT_SIZE, mx); + + TofftRep(R1, P1, k); + TofftRep(R2, a, k, n, m); + mul(R1, R1, R2); + FromfftRep(P3, R1, m-n, 2*(m-n)); + + l = 1L << k1; + + + TofftRep(R1, b, k1); + TofftRep(R2, P3, k1); + mul(R1, R1, R2); + FromfftRep(P1, R1, 0, n-1); + CyclicReduce(P2, a, l); + trunc(r, P2, n); + sub(r, r, P1); + q = P3; +} + + + + +void FFTDiv(zz_pX& q, const zz_pX& a, const zz_pX& b) +{ + + long n = deg(b); + long m = deg(a); + long k; + + if (m < n) { + clear(q); + return; + } + + if (m >= 3*n) { + zz_pXModulus B; + build(B, b); + div(q, a, B); + return; + } + + zz_pX P1, P2, P3; + + CopyReverse(P3, b, 0, n); + InvTrunc(P2, P3, m-n+1); + CopyReverse(P1, P2, 0, m-n); + + k = NextPowerOfTwo(2*(m-n)+1); + + fftRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); + + TofftRep(R1, P1, k); + TofftRep(R2, a, k, n, m); + mul(R1, R1, R2); + FromfftRep(q, R1, m-n, 2*(m-n)); +} + + + +void FFTRem(zz_pX& r, const zz_pX& a, const zz_pX& b) +{ + long n = deg(b); + long m = deg(a); + long k, l; + + if (m < n) { + r = a; + return; + } + + if (m >= 3*n) { + zz_pXModulus B; + build(B, b); + rem(r, a, B); + return; + } + + zz_pX P1, P2, P3; + + CopyReverse(P3, b, 0, n); + InvTrunc(P2, P3, m-n+1); + CopyReverse(P1, P2, 0, m-n); + + k = NextPowerOfTwo(2*(m-n)+1); + long k1 = NextPowerOfTwo(n); + long mx = max(k, k1); + + fftRep R1(INIT_SIZE, mx), R2(INIT_SIZE, mx); + + TofftRep(R1, P1, k); + TofftRep(R2, a, k, n, m); + mul(R1, R1, R2); + FromfftRep(P3, R1, m-n, 2*(m-n)); + + l = 1L << k1; + + + TofftRep(R1, b, k1); + TofftRep(R2, P3, k1); + mul(R1, R1, R2); + FromfftRep(P3, R1, 0, n-1); + CyclicReduce(P2, a, l); + trunc(r, P2, n); + sub(r, r, P3); +} + + +void DivRem(zz_pX& q, zz_pX& r, const zz_pX& a, const zz_pX& b) +{ + if (deg(b) > NTL_zz_pX_DIV_CROSSOVER && deg(a) - deg(b) > NTL_zz_pX_DIV_CROSSOVER) + FFTDivRem(q, r, a, b); + else + PlainDivRem(q, r, a, b); +} + +void div(zz_pX& q, const zz_pX& a, const zz_pX& b) +{ + if (deg(b) > NTL_zz_pX_DIV_CROSSOVER && deg(a) - deg(b) > NTL_zz_pX_DIV_CROSSOVER) + FFTDiv(q, a, b); + else + PlainDiv(q, a, b); +} + +void div(zz_pX& q, const zz_pX& a, zz_p b) +{ + zz_p t; + inv(t, b); + mul(q, a, t); +} + + +void rem(zz_pX& r, const zz_pX& a, const zz_pX& b) +{ + if (deg(b) > NTL_zz_pX_DIV_CROSSOVER && deg(a) - deg(b) > NTL_zz_pX_DIV_CROSSOVER) + FFTRem(r, a, b); + else + PlainRem(r, a, b); +} + + + +long operator==(const zz_pX& a, long b) +{ + if (b == 0) + return IsZero(a); + + if (b == 1) + return IsOne(a); + + long da = deg(a); + + if (da > 0) + return 0; + + zz_p bb; + bb = b; + + if (da < 0) + return IsZero(bb); + + return a.rep[0] == bb; +} + +long operator==(const zz_pX& a, zz_p b) +{ + if (IsZero(b)) + return IsZero(a); + + long da = deg(a); + + if (da != 0) + return 0; + + return a.rep[0] == b; +} + +void power(zz_pX& x, const zz_pX& a, long e) +{ + if (e < 0) { + ArithmeticError("power: negative exponent"); + } + + if (e == 0) { + x = 1; + return; + } + + if (a == 0 || a == 1) { + x = a; + return; + } + + long da = deg(a); + + if (da == 0) { + x = power(ConstTerm(a), e); + return; + } + + if (da > (NTL_MAX_LONG-1)/e) + ResourceError("overflow in power"); + + zz_pX res; + res.SetMaxLength(da*e + 1); + res = 1; + + long k = NumBits(e); + long i; + + for (i = k - 1; i >= 0; i--) { + sqr(res, res); + if (bit(e, i)) + mul(res, res, a); + } + + x = res; +} + +void reverse(zz_pX& x, const zz_pX& a, long hi) +{ + if (hi < 0) { clear(x); return; } + if (NTL_OVERFLOW(hi, 1, 0)) + ResourceError("overflow in reverse"); + + if (&x == &a) { + zz_pX tmp; + CopyReverse(tmp, a, 0, hi); + x = tmp; + } + else + CopyReverse(x, a, 0, hi); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/lzz_pX1.c b/thirdparty/linux/ntl/src/lzz_pX1.c new file mode 100644 index 0000000000..26b6628f56 --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_pX1.c @@ -0,0 +1,2485 @@ + +#include +#include + +#ifdef NTL_HAVE_AVX +#include +#endif + + +NTL_START_IMPL + + + +long divide(zz_pX& q, const zz_pX& a, const zz_pX& b) +{ + if (IsZero(b)) { + if (IsZero(a)) { + clear(q); + return 1; + } + else + return 0; + } + + zz_pX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + q = lq; + return 1; +} + +long divide(const zz_pX& a, const zz_pX& b) +{ + if (IsZero(b)) return IsZero(a); + zz_pX lq, r; + DivRem(lq, r, a, b); + if (!IsZero(r)) return 0; + return 1; +} + + + +void zz_pXMatrix::operator=(const zz_pXMatrix& M) +{ + elts[0][0] = M.elts[0][0]; + elts[0][1] = M.elts[0][1]; + elts[1][0] = M.elts[1][0]; + elts[1][1] = M.elts[1][1]; +} + + +void RightShift(zz_pX& x, const zz_pX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) ResourceError("overflow in RightShift"); + LeftShift(x, a, -n); + return; + } + + long da = deg(a); + long i; + + if (da < n) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(da-n+1); + + for (i = 0; i <= da-n; i++) + x.rep[i] = a.rep[i+n]; + + if (&x == &a) + x.rep.SetLength(da-n+1); + + x.normalize(); +} + +void LeftShift(zz_pX& x, const zz_pX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + if (n < 0) { + if (n < -NTL_MAX_LONG) + clear(x); + else + RightShift(x, a, -n); + return; + } + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("overflow in LeftShift"); + + long m = a.rep.length(); + + x.rep.SetLength(m+n); + + long i; + for (i = m-1; i >= 0; i--) + x.rep[i+n] = a.rep[i]; + + for (i = 0; i < n; i++) + clear(x.rep[i]); +} + + +void ShiftAdd(zz_pX& U, const zz_pX& V, long n) +// assumes input does not alias output +{ + if (IsZero(V)) + return; + + long du = deg(U); + long dv = deg(V); + + long d = max(du, n+dv); + + U.rep.SetLength(d+1); + long i; + + for (i = du+1; i <= d; i++) + clear(U.rep[i]); + + for (i = 0; i <= dv; i++) + add(U.rep[i+n], U.rep[i+n], V.rep[i]); + + U.normalize(); +} + +void ShiftSub(zz_pX& U, const zz_pX& V, long n) +// assumes input does not alias output +{ + if (IsZero(V)) + return; + + long du = deg(U); + long dv = deg(V); + + long d = max(du, n+dv); + + U.rep.SetLength(d+1); + long i; + + for (i = du+1; i <= d; i++) + clear(U.rep[i]); + + for (i = 0; i <= dv; i++) + sub(U.rep[i+n], U.rep[i+n], V.rep[i]); + + U.normalize(); +} + +void mul(zz_pX& U, zz_pX& V, const zz_pXMatrix& M) +// (U, V)^T = M*(U, V)^T +{ + long d = deg(U) - deg(M(1,1)); + long k = NextPowerOfTwo(d - 1); + + // When the GCD algorithm is run on polynomials of degree n, n-1, + // where n is a power of two, then d-1 is likely to be a power of two. + // It would be more natural to set k = NextPowerOfTwo(d+1), but this + // would be much less efficient in this case. + + long n = (1L << k); + long xx; + zz_p a0, a1, b0, b1, c0, d0, u0, u1, v0, v1, nu0, nu1, nv0; + zz_p t1, t2; + + if (n == d-1) + xx = 1; + else if (n == d) + xx = 2; + else + xx = 3; + + switch (xx) { + case 1: + GetCoeff(a0, M(0,0), 0); + GetCoeff(a1, M(0,0), 1); + GetCoeff(b0, M(0,1), 0); + GetCoeff(b1, M(0,1), 1); + GetCoeff(c0, M(1,0), 0); + GetCoeff(d0, M(1,1), 0); + + GetCoeff(u0, U, 0); + GetCoeff(u1, U, 1); + GetCoeff(v0, V, 0); + GetCoeff(v1, V, 1); + + mul(t1, (a0), (u0)); + mul(t2, (b0), (v0)); + add(t1, t1, t2); + nu0 = t1; + + mul(t1, (a1), (u0)); + mul(t2, (a0), (u1)); + add(t1, t1, t2); + mul(t2, (b1), (v0)); + add(t1, t1, t2); + mul(t2, (b0), (v1)); + add(t1, t1, t2); + nu1 = t1; + + mul(t1, (c0), (u0)); + mul(t2, (d0), (v0)); + add (t1, t1, t2); + nv0 = t1; + + break; + + case 2: + GetCoeff(a0, M(0,0), 0); + GetCoeff(b0, M(0,1), 0); + + GetCoeff(u0, U, 0); + GetCoeff(v0, V, 0); + + mul(t1, (a0), (u0)); + mul(t2, (b0), (v0)); + add(t1, t1, t2); + nu0 = t1; + + break; + + case 3: + break; + + } + + fftRep RU(INIT_SIZE, k), RV(INIT_SIZE, k), R1(INIT_SIZE, k), + R2(INIT_SIZE, k); + + TofftRep(RU, U, k); + TofftRep(RV, V, k); + + TofftRep(R1, M(0,0), k); + mul(R1, R1, RU); + TofftRep(R2, M(0,1), k); + mul(R2, R2, RV); + add(R1, R1, R2); + FromfftRep(U, R1, 0, d); + + TofftRep(R1, M(1,0), k); + mul(R1, R1, RU); + TofftRep(R2, M(1,1), k); + mul(R2, R2, RV); + add(R1, R1, R2); + FromfftRep(V, R1, 0, d-1); + + // now fix-up results + + switch (xx) { + case 1: + GetCoeff(u0, U, 0); + sub(u0, u0, nu0); + SetCoeff(U, d-1, u0); + SetCoeff(U, 0, nu0); + + GetCoeff(u1, U, 1); + sub(u1, u1, nu1); + SetCoeff(U, d, u1); + SetCoeff(U, 1, nu1); + + GetCoeff(v0, V, 0); + sub(v0, v0, nv0); + SetCoeff(V, d-1, v0); + SetCoeff(V, 0, nv0); + + break; + + + case 2: + GetCoeff(u0, U, 0); + sub(u0, u0, nu0); + SetCoeff(U, d, u0); + SetCoeff(U, 0, nu0); + + break; + + } +} + + +void mul(zz_pXMatrix& A, zz_pXMatrix& B, zz_pXMatrix& C) +// A = B*C, B and C are destroyed +{ + long db = deg(B(1,1)); + long dc = deg(C(1,1)); + long da = db + dc; + + long k = NextPowerOfTwo(da+1); + + fftRep B00, B01, B10, B11, C0, C1, T1, T2; + + TofftRep(B00, B(0,0), k); B(0,0).kill(); + TofftRep(B01, B(0,1), k); B(0,1).kill(); + TofftRep(B10, B(1,0), k); B(1,0).kill(); + TofftRep(B11, B(1,1), k); B(1,1).kill(); + + TofftRep(C0, C(0,0), k); C(0,0).kill(); + TofftRep(C1, C(1,0), k); C(1,0).kill(); + + mul(T1, B00, C0); + mul(T2, B01, C1); + add(T1, T1, T2); + FromfftRep(A(0,0), T1, 0, da); + + mul(T1, B10, C0); + mul(T2, B11, C1); + add(T1, T1, T2); + FromfftRep(A(1,0), T1, 0, da); + + TofftRep(C0, C(0,1), k); C(0,1).kill(); + TofftRep(C1, C(1,1), k); C(1,1).kill(); + + mul(T1, B00, C0); + mul(T2, B01, C1); + add(T1, T1, T2); + FromfftRep(A(0,1), T1, 0, da); + + mul(T1, B10, C0); + mul(T2, B11, C1); + add(T1, T1, T2); + FromfftRep(A(1,1), T1, 0, da); +} + +void IterHalfGCD(zz_pXMatrix& M_out, zz_pX& U, zz_pX& V, long d_red) +{ + M_out(0,0).SetMaxLength(d_red); + M_out(0,1).SetMaxLength(d_red); + M_out(1,0).SetMaxLength(d_red); + M_out(1,1).SetMaxLength(d_red); + + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + long goal = deg(U) - d_red; + + if (deg(V) <= goal) + return; + + zz_pX Q, t(INIT_SIZE, d_red); + + while (deg(V) > goal) { + PlainDivRem(Q, U, U, V); + swap(U, V); + + mul(t, Q, M_out(1,0)); + sub(t, M_out(0,0), t); + M_out(0,0) = M_out(1,0); + M_out(1,0) = t; + + mul(t, Q, M_out(1,1)); + sub(t, M_out(0,1), t); + M_out(0,1) = M_out(1,1); + M_out(1,1) = t; + } +} + + + +void HalfGCD(zz_pXMatrix& M_out, const zz_pX& U, const zz_pX& V, long d_red) +{ + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + return; + } + + + long n = deg(U) - 2*d_red + 2; + if (n < 0) n = 0; + + zz_pX U1, V1; + + RightShift(U1, U, n); + RightShift(V1, V, n); + + if (d_red <= NTL_zz_pX_HalfGCD_CROSSOVER) { + IterHalfGCD(M_out, U1, V1, d_red); + return; + } + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + zz_pXMatrix M1; + + HalfGCD(M1, U1, V1, d1); + mul(U1, V1, M1); + + long d2 = deg(V1) - deg(U) + n + d_red; + + if (IsZero(V1) || d2 <= 0) { + M_out = M1; + return; + } + + + zz_pX Q; + zz_pXMatrix M2; + + DivRem(Q, U1, U1, V1); + swap(U1, V1); + + HalfGCD(M2, U1, V1, d2); + + zz_pX t(INIT_SIZE, deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,0)); + sub(t, M1(0,0), t); + swap(M1(0,0), M1(1,0)); + swap(M1(1,0), t); + + t.kill(); + + t.SetMaxLength(deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,1)); + sub(t, M1(0,1), t); + swap(M1(0,1), M1(1,1)); + swap(M1(1,1), t); + + t.kill(); + + mul(M_out, M2, M1); +} + + + + +void XHalfGCD(zz_pXMatrix& M_out, zz_pX& U, zz_pX& V, long d_red) +{ + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + return; + } + + long du = deg(U); + + if (d_red <= NTL_zz_pX_HalfGCD_CROSSOVER) { + IterHalfGCD(M_out, U, V, d_red); + return; + } + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + zz_pXMatrix M1; + + HalfGCD(M1, U, V, d1); + mul(U, V, M1); + + long d2 = deg(V) - du + d_red; + + if (IsZero(V) || d2 <= 0) { + M_out = M1; + return; + } + + + zz_pX Q; + zz_pXMatrix M2; + + DivRem(Q, U, U, V); + swap(U, V); + + XHalfGCD(M2, U, V, d2); + + zz_pX t(INIT_SIZE, deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,0)); + sub(t, M1(0,0), t); + swap(M1(0,0), M1(1,0)); + swap(M1(1,0), t); + + t.kill(); + + t.SetMaxLength(deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,1)); + sub(t, M1(0,1), t); + swap(M1(0,1), M1(1,1)); + swap(M1(1,1), t); + + t.kill(); + + mul(M_out, M2, M1); +} + +void HalfGCD(zz_pX& U, zz_pX& V) +{ + long d_red = (deg(U)+1)/2; + + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + return; + } + + long du = deg(U); + + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + zz_pXMatrix M1; + + HalfGCD(M1, U, V, d1); + mul(U, V, M1); + + long d2 = deg(V) - du + d_red; + + if (IsZero(V) || d2 <= 0) { + return; + } + + M1(0,0).kill(); + M1(0,1).kill(); + M1(1,0).kill(); + M1(1,1).kill(); + + + zz_pX Q; + + DivRem(Q, U, U, V); + swap(U, V); + + HalfGCD(M1, U, V, d2); + + mul(U, V, M1); +} + + +void GCD(zz_pX& d, const zz_pX& u, const zz_pX& v) +{ + zz_pX u1, v1; + + u1 = u; + v1 = v; + + if (deg(u1) == deg(v1)) { + if (IsZero(u1)) { + clear(d); + return; + } + + rem(v1, v1, u1); + } + else if (deg(u1) < deg(v1)) { + swap(u1, v1); + } + + // deg(u1) > deg(v1) + + while (deg(u1) > NTL_zz_pX_GCD_CROSSOVER && !IsZero(v1)) { + HalfGCD(u1, v1); + + if (!IsZero(v1)) { + rem(u1, u1, v1); + swap(u1, v1); + } + } + + PlainGCD(d, u1, v1); +} + + + +void XGCD(zz_pX& d, zz_pX& s, zz_pX& t, const zz_pX& a, const zz_pX& b) +{ + zz_p w; + + if (IsZero(a) && IsZero(b)) { + clear(d); + set(s); + clear(t); + return; + } + + zz_pX U, V, Q; + + U = a; + V = b; + + long flag = 0; + + if (deg(U) == deg(V)) { + DivRem(Q, U, U, V); + swap(U, V); + flag = 1; + } + else if (deg(U) < deg(V)) { + swap(U, V); + flag = 2; + } + + zz_pXMatrix M; + + XHalfGCD(M, U, V, deg(U)+1); + + d = U; + + if (flag == 0) { + s = M(0,0); + t = M(0,1); + } + else if (flag == 1) { + s = M(0,1); + mul(t, Q, M(0,1)); + sub(t, M(0,0), t); + } + else { /* flag == 2 */ + s = M(0,1); + t = M(0,0); + } + + // normalize + + inv(w, LeadCoeff(d)); + mul(d, d, w); + mul(s, s, w); + mul(t, t, w); +} + + + + + + + +void IterBuild(zz_p* a, long n) +{ + long i, k; + zz_p b, t; + + if (n <= 0) return; + + negate(a[0], a[0]); + + for (k = 1; k <= n-1; k++) { + negate(b, a[k]); + add(a[k], b, a[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t, a[i], b); + add(a[i], t, a[i-1]); + } + mul(a[0], a[0], b); + } +} + +void mul(zz_p* x, const zz_p* a, const zz_p* b, long n) +{ + zz_p t, accum; + + long i, j, jmin, jmax; + + long d = 2*n-1; + + for (i = 0; i <= d; i++) { + jmin = max(0, i-(n-1)); + jmax = min(n-1, i); + clear(accum); + for (j = jmin; j <= jmax; j++) { + mul(t, (a[j]), (b[i-j])); + add(accum, accum, t); + } + if (i >= n) { + add(accum, accum, (a[i-n])); + add(accum, accum, (b[i-n])); + } + + x[i] = accum; + } +} + + +void BuildFromRoots(zz_pX& x, const vec_zz_p& a) +{ + long n = a.length(); + + if (n == 0) { + set(x); + return; + } + + long k0 = NextPowerOfTwo(NTL_zz_pX_MUL_CROSSOVER)-1; + long crossover = 1L << k0; + + if (n <= NTL_zz_pX_MUL_CROSSOVER) { + x.rep.SetMaxLength(n+1); + x.rep = a; + IterBuild(&x.rep[0], n); + x.rep.SetLength(n+1); + SetCoeff(x, n); + return; + } + + long k = NextPowerOfTwo(n); + + long m = 1L << k; + long i, j; + long l, width; + + zz_pX b(INIT_SIZE, m+1); + + b.rep = a; + b.rep.SetLength(m+1); + for (i = n; i < m; i++) + clear(b.rep[i]); + + set(b.rep[m]); + + fftRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); + + + zz_p t1, one; + set(one); + + vec_zz_p G(INIT_SIZE, crossover), H(INIT_SIZE, crossover); + zz_p *g = G.elts(); + zz_p *h = H.elts(); + zz_p *tmp; + + for (i = 0; i < m; i+= crossover) { + for (j = 0; j < crossover; j++) + negate(g[j], b.rep[i+j]); + + if (k0 > 0) { + for (j = 0; j < crossover; j+=2) { + mul(t1, g[j], g[j+1]); + add(g[j+1], g[j], g[j+1]); + g[j] = t1; + } + } + + for (l = 1; l < k0; l++) { + width = 1L << l; + + for (j = 0; j < crossover; j += 2*width) + mul(&h[j], &g[j], &g[j+width], width); + + tmp = g; g = h; h = tmp; + } + + for (j = 0; j < crossover; j++) + b.rep[i+j] = g[j]; + } + + for (l = k0; l < k; l++) { + width = 1L << l; + for (i = 0; i < m; i += 2*width) { + t1 = b.rep[i+width]; + set(b.rep[i+width]); + TofftRep(R1, b, l+1, i, i+width); + b.rep[i+width] = t1; + t1 = b.rep[i+2*width]; + set(b.rep[i+2*width]); + TofftRep(R2, b, l+1, i+width, i+2*width); + b.rep[i+2*width] = t1; + mul(R1, R1, R2); + FromfftRep(&b.rep[i], R1, 0, 2*width-1); + sub(b.rep[i], b.rep[i], one); + } + } + + x.rep.SetLength(n+1); + long delta = m-n; + for (i = 0; i <= n; i++) + x.rep[i] = b.rep[i+delta]; + + // no need to normalize +} + + + +void eval(zz_p& b, const zz_pX& f, zz_p a) +// does a Horner evaluation +{ + zz_p acc; + long i; + + clear(acc); + for (i = deg(f); i >= 0; i--) { + mul(acc, acc, a); + add(acc, acc, f.rep[i]); + } + + b = acc; +} + + + +void eval(vec_zz_p& b, const zz_pX& f, const vec_zz_p& a) +// naive algorithm: repeats Horner +{ + if (&b == &f.rep) { + vec_zz_p bb; + eval(bb, f, a); + b = bb; + return; + } + + long m = a.length(); + b.SetLength(m); + long i; + for (i = 0; i < m; i++) + eval(b[i], f, a[i]); +} + + + + +void interpolate(zz_pX& f, const vec_zz_p& a, const vec_zz_p& b) +{ + long m = a.length(); + if (b.length() != m) LogicError("interpolate: vector length mismatch"); + + if (m == 0) { + clear(f); + return; + } + + vec_zz_p prod; + prod = a; + + zz_p t1, t2; + + long k, i; + + vec_zz_p res; + res.SetLength(m); + + for (k = 0; k < m; k++) { + + const zz_p& aa = a[k]; + + set(t1); + for (i = k-1; i >= 0; i--) { + mul(t1, t1, aa); + add(t1, t1, prod[i]); + } + + clear(t2); + for (i = k-1; i >= 0; i--) { + mul(t2, t2, aa); + add(t2, t2, res[i]); + } + + + inv(t1, t1); + sub(t2, b[k], t2); + mul(t1, t1, t2); + + for (i = 0; i < k; i++) { + mul(t2, prod[i], t1); + add(res[i], res[i], t2); + } + + res[k] = t1; + + if (k < m-1) { + if (k == 0) + negate(prod[0], prod[0]); + else { + negate(t1, a[k]); + add(prod[k], t1, prod[k-1]); + for (i = k-1; i >= 1; i--) { + mul(t2, prod[i], t1); + add(prod[i], t2, prod[i-1]); + } + mul(prod[0], prod[0], t1); + } + } + } + + while (m > 0 && IsZero(res[m-1])) m--; + res.SetLength(m); + f.rep = res; +} + + + +void InnerProduct(zz_pX& x, const vec_zz_p& v, long low, long high, + const vec_zz_pX& H, long n, vec_zz_p& t) +{ + zz_p s; + long i, j; + + zz_p *tp = t.elts(); + + for (j = 0; j < n; j++) + clear(tp[j]); + + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + high = min(high, v.length()-1); + for (i = low; i <= high; i++) { + const vec_zz_p& h = H[i-low].rep; + long m = h.length(); + zz_p w = (v[i]); + + long W = rep(w); + mulmod_precon_t Wpinv = PrepMulModPrecon(W, p, pinv); + const zz_p *hp = h.elts(); + + for (j = 0; j < m; j++) { + long S = MulModPrecon(rep(hp[j]), W, p, Wpinv); + S = AddMod(S, rep(tp[j]), p); + tp[j].LoopHole() = S; + } + } + + x.rep = t; + x.normalize(); +} + + +void CompMod(zz_pX& x, const zz_pX& g, const zz_pXArgument& A, + const zz_pXModulus& F) +{ + if (deg(g) <= 0) { + x = g; + return; + } + + + zz_pX s, t; + vec_zz_p scratch(INIT_SIZE, F.n); + + long m = A.H.length() - 1; + long l = ((g.rep.length()+m-1)/m) - 1; + + zz_pXMultiplier M; + build(M, A.H[m], F); + + InnerProduct(t, g.rep, l*m, l*m + m - 1, A.H, F.n, scratch); + for (long i = l-1; i >= 0; i--) { + InnerProduct(s, g.rep, i*m, i*m + m - 1, A.H, F.n, scratch); + MulMod(t, t, M, F); + add(t, t, s); + } + + x = t; +} + + +void build(zz_pXArgument& A, const zz_pX& h, const zz_pXModulus& F, long m) +{ + if (m <= 0 || deg(h) >= F.n) LogicError("build: bad args"); + + if (m > F.n) m = F.n; + + long i; + + if (zz_pXArgBound > 0) { + double sz = 1; + sz = sz*F.n; + sz = sz+6; + sz = sz*(sizeof (long)); + sz = sz/1024; + m = min(m, long(zz_pXArgBound/sz)); + m = max(m, 1); + } + + zz_pXMultiplier M; + + build(M, h, F); + + A.H.SetLength(m+1); + + set(A.H[0]); + A.H[1] = h; + for (i = 2; i <= m; i++) + MulMod(A.H[i], A.H[i-1], M, F); +} + + + + +NTL_CHEAP_THREAD_LOCAL long zz_pXArgBound = 0; + + +void CompMod(zz_pX& x, const zz_pX& g, const zz_pX& h, const zz_pXModulus& F) + // x = g(h) mod f +{ + long m = SqrRoot(g.rep.length()); + + if (m == 0) { + clear(x); + return; + } + + zz_pXArgument A; + + build(A, h, F, m); + + CompMod(x, g, A, F); +} + + + + +void Comp2Mod(zz_pX& x1, zz_pX& x2, const zz_pX& g1, const zz_pX& g2, + const zz_pX& h, const zz_pXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + return; + } + + zz_pXArgument A; + + build(A, h, F, m); + + zz_pX xx1, xx2; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + + x1 = xx1; + x2 = xx2; +} + +void Comp3Mod(zz_pX& x1, zz_pX& x2, zz_pX& x3, + const zz_pX& g1, const zz_pX& g2, const zz_pX& g3, + const zz_pX& h, const zz_pXModulus& F) + +{ + long m = SqrRoot(g1.rep.length() + g2.rep.length() + g3.rep.length()); + + if (m == 0) { + clear(x1); + clear(x2); + clear(x3); + return; + } + + zz_pXArgument A; + + build(A, h, F, m); + + zz_pX xx1, xx2, xx3; + + CompMod(xx1, g1, A, F); + CompMod(xx2, g2, A, F); + CompMod(xx3, g3, A, F); + + x1 = xx1; + x2 = xx2; + x3 = xx3; +} + + +// BEGIN zz_pXAltArgument variation + + + + +void build(zz_pXAltArgument& altH, const zz_pXArgument& H, const zz_pXModulus& F) +{ + altH.orig = &H; + + +#ifdef NTL_HAVE_LL_TYPE + altH.mem.kill(); + altH.row.kill(); + +#ifdef NTL_HAVE_AVX + altH.dmem.kill(); + altH.drow.kill(); +#endif + + if (H.H.length() < 10 || F.n < 50) { altH.strategy = 0; return; } + + altH.n = F.n; + altH.m = H.H.length()-1; + + long p = zz_p::modulus(); + long n = altH.n; + long m = altH.m; + + +#ifdef NTL_HAVE_AVX + if (n >= 128 && m <= ((1L << NTL_DOUBLE_PRECISION)-1)/(p-1) && + m*(p-1) <= ((1L << NTL_DOUBLE_PRECISION)-1)/(p-1)) { + altH.strategy = 3; + altH.pinv_L = sp_PrepRem(p); + } + else +#endif + if (cast_unsigned(m) <= (~(0UL))/cast_unsigned(p-1) && + cast_unsigned(m)*cast_unsigned(p-1) <= (~(0UL))/cast_unsigned(p-1)) { + altH.strategy = 1; + altH.pinv_L = sp_PrepRem(p); + } + else { + altH.strategy = 2; + altH.pinv_LL = make_sp_ll_reduce_struct(p); + } + + + if (altH.strategy == 1 || altH.strategy == 2) { + + altH.row.SetLength(n); + long **row = altH.row.elts(); + + const long AllocAmt = 1L << 18; + + long BlockSize = (AllocAmt + m - 1)/m; + long NumBlocks = (n + BlockSize - 1)/BlockSize; + + altH.mem.SetLength(NumBlocks); + + for (long i = 0; i < NumBlocks; i++) { + long first = i*BlockSize; + long last = min(n, first + BlockSize); + altH.mem[i].SetLength((last-first)*m); + for (long j = first; j < last; j++) { + row[j] = altH.mem[i].elts() + (j-first)*m; + } + } + + for (long i = 0; i < m; i++) { + const zz_p* ptr = H.H[i].rep.elts(); + long len = H.H[i].rep.length(); + for (long j = 0; j < len; j++) + row[j][i] = rep(ptr[j]); + for (long j = len; j < n; j++) + row[j][i] = 0; + } + } +#ifdef NTL_HAVE_AVX + else { + + // sanity check + if (m >= (1L << (NTL_BITS_PER_LONG-8))) ResourceError("zz_pXAltArgument: overflow"); + + long npanels = (n+15)/16; + long panel_size = 16*m; + + const long AllocAmt = 1L << 18; + + long BlockSize = (AllocAmt + panel_size - 1)/panel_size; + long NumBlocks = (npanels + BlockSize - 1)/BlockSize; + + altH.dmem.SetLength(NumBlocks); + altH.drow.SetLength(npanels); + double **drow = altH.drow.elts(); + + for (long i = 0; i < NumBlocks; i++) { + long first = i*BlockSize; + long last = min(npanels, first + BlockSize); + altH.dmem[i].SetLength((last-first)*panel_size); + + double *ptr = altH.dmem[i].get(); + + for (long j = first; j < last; j++) + drow[j] = ptr + (j-first)*panel_size; + } + + for (long i = 0; i < m; i++) { + const zz_p *ptr = H.H[i].rep.elts(); + long len = H.H[i].rep.length(); + for (long j = 0; j < len; j++) + drow[j/16][(i*16) + (j%16)] = rep(ptr[j]); + for (long j = len; j < npanels*16; j++) + drow[j/16][(i*16) + (j%16)] = 0; + } + } + +#endif + + +#endif +} + + +#ifdef NTL_HAVE_LL_TYPE + + +#ifdef NTL_HAVE_AVX +static +void mul16rowsD(double *x, const double *a, const double *b, long n) +{ + __m256d avec0, avec1, avec2, avec3; + + __m256d acc0 = _mm256_setzero_pd(); + __m256d acc1 = _mm256_setzero_pd(); + __m256d acc2 = _mm256_setzero_pd(); + __m256d acc3 = _mm256_setzero_pd(); + + __m256d bvec; + + for (long i = 0; i < n; i++) { + bvec = _mm256_broadcast_sd(&b[i]); + + avec0 = _mm256_load_pd(a); a += 4; + avec1 = _mm256_load_pd(a); a += 4; + avec2 = _mm256_load_pd(a); a += 4; + avec3 = _mm256_load_pd(a); a += 4; + +#ifdef NTL_HAVE_FMA + + acc0 = _mm256_fmadd_pd(avec0, bvec, acc0); + acc1 = _mm256_fmadd_pd(avec1, bvec, acc1); + acc2 = _mm256_fmadd_pd(avec2, bvec, acc2); + acc3 = _mm256_fmadd_pd(avec3, bvec, acc3); + +#else + + acc0 = _mm256_add_pd(_mm256_mul_pd(avec0, bvec), acc0); + acc1 = _mm256_add_pd(_mm256_mul_pd(avec1, bvec), acc1); + acc2 = _mm256_add_pd(_mm256_mul_pd(avec2, bvec), acc2); + acc3 = _mm256_add_pd(_mm256_mul_pd(avec3, bvec), acc3); + +#endif + + } + + _mm256_store_pd(x + 0*4, acc0); + _mm256_store_pd(x + 1*4, acc1); + _mm256_store_pd(x + 2*4, acc2); + _mm256_store_pd(x + 3*4, acc3); +} + +static +void mul16rows2D(double *x, double *x_, const double *a, const double *b, const double *b_, long n) +{ + __m256d avec0, avec1, avec2, avec3; + + __m256d acc0 = _mm256_setzero_pd(); + __m256d acc1 = _mm256_setzero_pd(); + __m256d acc2 = _mm256_setzero_pd(); + __m256d acc3 = _mm256_setzero_pd(); + + __m256d acc0_ = _mm256_setzero_pd(); + __m256d acc1_ = _mm256_setzero_pd(); + __m256d acc2_ = _mm256_setzero_pd(); + __m256d acc3_ = _mm256_setzero_pd(); + + + __m256d bvec; + __m256d bvec_; + + for (long i = 0; i < n; i++) { + bvec = _mm256_broadcast_sd(&b[i]); + bvec_ = _mm256_broadcast_sd(&b_[i]); + + avec0 = _mm256_load_pd(a); a += 4; + avec1 = _mm256_load_pd(a); a += 4; + avec2 = _mm256_load_pd(a); a += 4; + avec3 = _mm256_load_pd(a); a += 4; + +#ifdef NTL_HAVE_FMA + + acc0 = _mm256_fmadd_pd(avec0, bvec, acc0); + acc1 = _mm256_fmadd_pd(avec1, bvec, acc1); + acc2 = _mm256_fmadd_pd(avec2, bvec, acc2); + acc3 = _mm256_fmadd_pd(avec3, bvec, acc3); + + acc0_ = _mm256_fmadd_pd(avec0, bvec_, acc0_); + acc1_ = _mm256_fmadd_pd(avec1, bvec_, acc1_); + acc2_ = _mm256_fmadd_pd(avec2, bvec_, acc2_); + acc3_ = _mm256_fmadd_pd(avec3, bvec_, acc3_); + +#else + acc0 = _mm256_add_pd(_mm256_mul_pd(avec0, bvec), acc0); + acc1 = _mm256_add_pd(_mm256_mul_pd(avec1, bvec), acc1); + acc2 = _mm256_add_pd(_mm256_mul_pd(avec2, bvec), acc2); + acc3 = _mm256_add_pd(_mm256_mul_pd(avec3, bvec), acc3); + + acc0_ = _mm256_add_pd(_mm256_mul_pd(avec0, bvec_), acc0_); + acc1_ = _mm256_add_pd(_mm256_mul_pd(avec1, bvec_), acc1_); + acc2_ = _mm256_add_pd(_mm256_mul_pd(avec2, bvec_), acc2_); + acc3_ = _mm256_add_pd(_mm256_mul_pd(avec3, bvec_), acc3_); + +#endif + + } + + _mm256_store_pd(x + 0*4, acc0); + _mm256_store_pd(x + 1*4, acc1); + _mm256_store_pd(x + 2*4, acc2); + _mm256_store_pd(x + 3*4, acc3); + + _mm256_store_pd(x_ + 0*4, acc0_); + _mm256_store_pd(x_ + 1*4, acc1_); + _mm256_store_pd(x_ + 2*4, acc2_); + _mm256_store_pd(x_ + 3*4, acc3_); +} + + +#endif + + + +static +void InnerProduct_LL(zz_pX& x, const vec_zz_p& v, long low, long high, + const zz_pXAltArgument& H, long n) +{ + high = min(high, v.length()-1); + long len = high-low+1; + if (len <= 0) { + clear(x); + return; + } + + x.rep.SetLength(n); + zz_p *xp = x.rep.elts(); + + long p = zz_p::modulus(); + sp_ll_reduce_struct pinv = H.pinv_LL; + + const zz_p *vp = v.elts() + low; + + for (long i = 0; i < n; i++) + xp[i].LoopHole() = InnerProd_LL(H.row[i], vp, len, p, pinv); + + x.normalize(); +} + +static +void CompMod_LL(zz_pX& x, const zz_pX& g, const zz_pXAltArgument& A, + const zz_pXModulus& F) +{ + if (deg(g) <= 0) { + x = g; + return; + } + + + zz_pX s, t; + + long m = A.m; + long l = ((g.rep.length()+m-1)/m) - 1; + + zz_pXMultiplier M; + build(M, A.orig->H[m], F); + + InnerProduct_LL(t, g.rep, l*m, l*m + m - 1, A, F.n); + for (long i = l-1; i >= 0; i--) { + InnerProduct_LL(s, g.rep, i*m, i*m + m - 1, A, F.n); + MulMod(t, t, M, F); + add(t, t, s); + } + + x = t; +} + +static +void InnerProduct_L(zz_pX& x, const vec_zz_p& v, long low, long high, + const zz_pXAltArgument& H, long n) +{ + high = min(high, v.length()-1); + long len = high-low+1; + if (len <= 0) { + clear(x); + return; + } + + x.rep.SetLength(n); + zz_p *xp = x.rep.elts(); + + long p = zz_p::modulus(); + sp_reduce_struct pinv = H.pinv_L; + + + const zz_p *vp = v.elts() + low; + + for (long i = 0; i < n; i++) + xp[i].LoopHole() = InnerProd_L(H.row[i], vp, len, p, pinv); + + x.normalize(); +} + +static +void CompMod_L(zz_pX& x, const zz_pX& g, const zz_pXAltArgument& A, + const zz_pXModulus& F) +{ + if (deg(g) <= 0) { + x = g; + return; + } + + + zz_pX s, t; + + long m = A.m; + long l = ((g.rep.length()+m-1)/m) - 1; + + zz_pXMultiplier M; + build(M, A.orig->H[m], F); + + InnerProduct_L(t, g.rep, l*m, l*m + m - 1, A, F.n); + for (long i = l-1; i >= 0; i--) { + InnerProduct_L(s, g.rep, i*m, i*m + m - 1, A, F.n); + MulMod(t, t, M, F); + add(t, t, s); + } + + x = t; +} + + +#ifdef NTL_HAVE_AVX + +static +void InnerProduct_AVX(zz_pX& x, const Vec& v, long low, long high, + const zz_pXAltArgument& H, long n) +{ + high = min(high, v.length()-1); + long len = high-low+1; + if (len <= 0) { + clear(x); + return; + } + + x.rep.SetLength(n); + zz_p *xp = x.rep.elts(); + + long p = zz_p::modulus(); + sp_reduce_struct pinv = H.pinv_L; + + + const double *vp = v.elts() + low; + + NTL_AVX_LOCAL_ARRAY(res, double, 16); + + long npanels = H.drow.length(); + + for (long i = 0, first = 0; i < npanels; i++, first += 16) { + mul16rowsD(res, H.drow[i], vp, len); + long last = min(n, first + 16); + for (long ii = first; ii < last; ii++) + xp[ii].LoopHole() = rem((unsigned long) (long) res[ii-first], p, pinv); + } + + x.normalize(); +} + +static +void InnerProduct2_AVX(zz_pX& x, zz_pX& x_, const Vec& v, long low, long low_, long len, + const zz_pXAltArgument& H, long n) +{ + x.rep.SetLength(n); + zz_p *xp = x.rep.elts(); + + x_.rep.SetLength(n); + zz_p *xp_ = x_.rep.elts(); + + long p = zz_p::modulus(); + sp_reduce_struct pinv = H.pinv_L; + + + const double *vp = v.elts() + low; + const double *vp_ = v.elts() + low_; + + NTL_AVX_LOCAL_ARRAY(res, double, 16); + NTL_AVX_LOCAL_ARRAY(res_, double, 16); + + long npanels = H.drow.length(); + + for (long i = 0, first = 0; i < npanels; i++, first += 16) { + mul16rows2D(res, res_, H.drow[i], vp, vp_, len); + long last = min(n, first + 16); + for (long ii = first; ii < last; ii++) { + xp[ii].LoopHole() = rem((unsigned long) (long) res[ii-first], p, pinv); + xp_[ii].LoopHole() = rem((unsigned long) (long) res_[ii-first], p, pinv); + } + } + + x.normalize(); + x_.normalize(); +} + +static +void CompMod_AVX(zz_pX& x, const zz_pX& g, const zz_pXAltArgument& A, + const zz_pXModulus& F) +{ + if (deg(g) <= 0) { + x = g; + return; + } + + + zz_pX s, s_, t; + + long m = A.m; + long l = ((g.rep.length()+m-1)/m) - 1; + + zz_pXMultiplier M; + build(M, A.orig->H[m], F); + + long len = g.rep.length(); + Vec gg; + gg.SetLength(len); + for (long i = 0; i < len; i++) gg[i] = rep(g.rep[i]); + + InnerProduct_AVX(t, gg, l*m, l*m + m - 1, A, F.n); + long i = l-1; + for (; i >= 1; i -= 2) { + InnerProduct2_AVX(s, s_, gg, i*m, (i-1)*m, m, A, F.n); + MulMod(t, t, M, F); + add(t, t, s); + MulMod(t, t, M, F); + add(t, t, s_); + } + + if (i >= 0) { + InnerProduct_AVX(s, gg, i*m, i*m + m - 1, A, F.n); + MulMod(t, t, M, F); + add(t, t, s); + } + + x = t; +} +#endif + + + +#endif + + + +void CompMod(zz_pX& x, const zz_pX& g, const zz_pXAltArgument& A, + const zz_pXModulus& F) +{ + if (!A.orig) LogicError("CompMod: uninitialized arg"); + +#ifndef NTL_HAVE_LL_TYPE + CompMod(x, g, *A.orig, F); +#else + + switch (A.strategy) { + case 0: + CompMod(x, g, *A.orig, F); + break; + + case 1: + CompMod_L(x, g, A, F); + break; + + case 2: + CompMod_LL(x, g, A, F); + break; + +#ifdef NTL_HAVE_AVX + case 3: + CompMod_AVX(x, g, A, F); + break; + +#endif + + default: + LogicError("CompMod: bad strategy"); + } +#endif + +} + + + +// END zz_pXAltArgument variation + + + + +static void StripZeroes(vec_zz_p& x) +{ + long n = x.length(); + while (n > 0 && IsZero(x[n-1])) + n--; + x.SetLength(n); +} + + +void PlainUpdateMap(vec_zz_p& xx, const vec_zz_p& a, + const zz_pX& b, const zz_pX& f) +{ + long n = deg(f); + long i, m; + + if (IsZero(b)) { + xx.SetLength(0); + return; + } + + m = n-1 - deg(b); + + vec_zz_p x(INIT_SIZE, n); + + for (i = 0; i <= m; i++) + InnerProduct(x[i], a, b.rep, i); + + if (deg(b) != 0) { + zz_pX c(INIT_SIZE, n); + LeftShift(c, b, m); + + for (i = m+1; i < n; i++) { + MulByXMod(c, c, f); + InnerProduct(x[i], a, c.rep); + } + } + + xx = x; +} + + + + +void UpdateMap(vec_zz_p& x, const vec_zz_p& aa, + const zz_pXMultiplier& B, const zz_pXModulus& F) +{ + long n = F.n; + + vec_zz_p a; + a = aa; + StripZeroes(a); + + if (a.length() > n) LogicError("UpdateMap: bad args"); + long i; + + if (!B.UseFFT) { + PlainUpdateMap(x, a, B.b, F.f); + StripZeroes(x); + return; + } + + fftRep R1(INIT_SIZE, F.k), R2(INIT_SIZE, F.l); + vec_zz_p V1(INIT_SIZE, n); + + + RevTofftRep(R1, a, F.k, 0, a.length()-1, 0); + mul(R2, R1, F.FRep); + RevFromfftRep(V1, R2, 0, n-2); + for (i = 0; i <= n-2; i++) negate(V1[i], V1[i]); + RevTofftRep(R2, V1, F.l, 0, n-2, n-1); + mul(R2, R2, B.B1); + mul(R1, R1, B.B2); + + AddExpand(R2, R1); + RevFromfftRep(x, R2, 0, n-1); + StripZeroes(x); +} + + + +void ProjectPowers(vec_zz_p& x, const vec_zz_p& a, long k, + const zz_pXArgument& H, const zz_pXModulus& F) + +{ + long n = F.n; + + if (a.length() > n || k < 0) + LogicError("ProjectPowers: bad args"); + if (NTL_OVERFLOW(k, 1, 0)) + ResourceError("ProjectPowers: excessive args"); + + long m = H.H.length()-1; + long l = (k+m-1)/m - 1; + + zz_pXMultiplier M; + build(M, H.H[m], F); + + vec_zz_p s(INIT_SIZE, n); + s = a; + StripZeroes(s); + + x.SetLength(k); + + for (long i = 0; i <= l; i++) { + long m1 = min(m, k-i*m); + zz_p* w = &x[i*m]; + for (long j = 0; j < m1; j++) + InnerProduct(w[j], H.H[j].rep, s); + if (i < l) + UpdateMap(s, s, M, F); + } +} + + + +void ProjectPowers(vec_zz_p& x, const vec_zz_p& a, long k, + const zz_pX& h, const zz_pXModulus& F) + +{ + if (a.length() > F.n || k < 0) LogicError("ProjectPowers: bad args"); + + if (k == 0) { + x.SetLength(0); + return; + } + + long m = SqrRoot(k); + + zz_pXArgument H; + + build(H, h, F, m); + ProjectPowers(x, a, k, H, F); +} + + +void BerlekampMassey(zz_pX& h, const vec_zz_p& a, long m) +{ + zz_pX Lambda, Sigma, Temp; + long L; + zz_p Delta, Delta1, t1; + long shamt; + + // cerr << "*** " << m << "\n"; + + Lambda.SetMaxLength(m+1); + Sigma.SetMaxLength(m+1); + Temp.SetMaxLength(m+1); + + L = 0; + set(Lambda); + clear(Sigma); + set(Delta); + shamt = 0; + + long i, r, dl; + + for (r = 1; r <= 2*m; r++) { + // cerr << r << "--"; + clear(Delta1); + dl = deg(Lambda); + for (i = 0; i <= dl; i++) { + mul(t1, Lambda.rep[i], a[r-i-1]); + add(Delta1, Delta1, t1); + } + + if (IsZero(Delta1)) { + shamt++; + // cerr << "case 1: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else if (2*L < r) { + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + Sigma = Lambda; + ShiftSub(Lambda, Temp, shamt+1); + shamt = 0; + L = r-L; + Delta = Delta1; + // cerr << "case 2: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + else { + shamt++; + div(t1, Delta1, Delta); + mul(Temp, Sigma, t1); + ShiftSub(Lambda, Temp, shamt); + // cerr << "case 3: " << deg(Lambda) << " " << deg(Sigma) << " " << shamt << "\n"; + } + } + + // cerr << "finished: " << L << " " << deg(Lambda) << "\n"; + + dl = deg(Lambda); + h.rep.SetLength(L + 1); + + for (i = 0; i < L - dl; i++) + clear(h.rep[i]); + + for (i = L - dl; i <= L; i++) + h.rep[i] = Lambda.rep[L - i]; +} + + +void GCDMinPolySeq(zz_pX& h, const vec_zz_p& x, long m) +{ + long i; + zz_pX a, b; + zz_pXMatrix M; + zz_p t; + + a.rep.SetLength(2*m); + for (i = 0; i < 2*m; i++) a.rep[i] = x[2*m-1-i]; + a.normalize(); + + SetCoeff(b, 2*m); + + HalfGCD(M, b, a, m+1); + + /* make monic */ + + inv(t, LeadCoeff(M(1,1))); + mul(h, M(1,1), t); +} + + +void MinPolySeq(zz_pX& h, const vec_zz_p& a, long m) +{ + if (m < 0 || NTL_OVERFLOW(m, 1, 0)) LogicError("MinPoly: bad args"); + if (a.length() < 2*m) LogicError("MinPoly: sequence too short"); + + if (m > NTL_zz_pX_BERMASS_CROSSOVER) + GCDMinPolySeq(h, a, m); + else + BerlekampMassey(h, a, m); +} + + +void DoMinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m, + const vec_zz_p& R) +{ + vec_zz_p x; + + ProjectPowers(x, R, 2*m, g, F); + MinPolySeq(h, x, m); +} + + +void ProbMinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m) +{ + long n = F.n; + if (m < 1 || m > n) LogicError("ProbMinPoly: bad args"); + + long i; + vec_zz_p R(INIT_SIZE, n); + + for (i = 0; i < n; i++) random(R[i]); + DoMinPolyMod(h, g, F, m, R); +} + +void MinPolyMod(zz_pX& hh, const zz_pX& g, const zz_pXModulus& F, long m) +{ + zz_pX h, h1; + long n = F.n; + if (m < 1 || m > n) LogicError("MinPoly: bad args"); + + /* probabilistically compute min-poly */ + + ProbMinPolyMod(h, g, F, m); + if (deg(h) == m) { hh = h; return; } + CompMod(h1, h, g, F); + if (IsZero(h1)) { hh = h; return; } + + /* not completely successful...must iterate */ + + long i; + + zz_pX h2, h3; + zz_pXMultiplier H1; + vec_zz_p R(INIT_SIZE, n); + + for (;;) { + R.SetLength(n); + for (i = 0; i < n; i++) random(R[i]); + build(H1, h1, F); + UpdateMap(R, R, H1, F); + DoMinPolyMod(h2, g, F, m-deg(h), R); + + mul(h, h, h2); + if (deg(h) == m) { hh = h; return; } + CompMod(h3, h2, g, F); + MulMod(h1, h3, H1, F); + if (IsZero(h1)) { hh = h; return; } + } +} + +void IrredPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m) +{ + vec_zz_p R(INIT_SIZE, 1); + if (m < 1 || m > F.n) LogicError("IrredPoly: bad args"); + + set(R[0]); + DoMinPolyMod(h, g, F, m, R); +} + + + +void diff(zz_pX& x, const zz_pX& a) +{ + long n = deg(a); + long i; + + if (n <= 0) { + clear(x); + return; + } + + if (&x != &a) + x.rep.SetLength(n); + + for (i = 0; i <= n-1; i++) { + mul(x.rep[i], a.rep[i+1], i+1); + } + + if (&x == &a) + x.rep.SetLength(n); + + x.normalize(); +} + +void MakeMonic(zz_pX& x) +{ + if (IsZero(x)) + return; + + if (IsOne(LeadCoeff(x))) + return; + + zz_p t; + + inv(t, LeadCoeff(x)); + mul(x, x, t); +} + + + + + +void PlainMulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n) +{ + zz_pX y; + mul(y, a, b); + trunc(x, y, n); +} + + +void FFTMulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n) +{ + if (IsZero(a) || IsZero(b)) { + clear(x); + return; + } + + long d = deg(a) + deg(b); + if (n > d + 1) + n = d + 1; + + long k = NextPowerOfTwo(d + 1); + fftRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); + + TofftRep(R1, a, k); + TofftRep(R2, b, k); + mul(R1, R1, R2); + FromfftRep(x, R1, 0, n-1); +} + +void MulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n) +{ + if (n < 0) LogicError("MulTrunc: bad args"); + + if (deg(a) <= NTL_zz_pX_MUL_CROSSOVER || deg(b) <= NTL_zz_pX_MUL_CROSSOVER) + PlainMulTrunc(x, a, b, n); + else + FFTMulTrunc(x, a, b, n); +} + +void PlainSqrTrunc(zz_pX& x, const zz_pX& a, long n) +{ + zz_pX y; + sqr(y, a); + trunc(x, y, n); +} + + +void FFTSqrTrunc(zz_pX& x, const zz_pX& a, long n) +{ + if (IsZero(a)) { + clear(x); + return; + } + + long d = 2*deg(a); + if (n > d + 1) + n = d + 1; + + long k = NextPowerOfTwo(d + 1); + fftRep R1(INIT_SIZE, k); + + TofftRep(R1, a, k); + mul(R1, R1, R1); + FromfftRep(x, R1, 0, n-1); +} + +void SqrTrunc(zz_pX& x, const zz_pX& a, long n) +{ + if (n < 0) LogicError("SqrTrunc: bad args"); + + if (deg(a) <= NTL_zz_pX_MUL_CROSSOVER) + PlainSqrTrunc(x, a, n); + else + FFTSqrTrunc(x, a, n); +} + + + +void FastTraceVec(vec_zz_p& S, const zz_pX& f) +{ + long n = deg(f); + + if (n <= 0) + LogicError("FastTraceVec: bad args"); + + if (n == 0) { + S.SetLength(0); + return; + } + + if (n == 1) { + S.SetLength(1); + set(S[0]); + return; + } + + long i; + zz_pX f1; + + f1.rep.SetLength(n-1); + for (i = 0; i <= n-2; i++) + f1.rep[i] = f.rep[n-i]; + f1.normalize(); + + zz_pX f2; + f2.rep.SetLength(n-1); + for (i = 0; i <= n-2; i++) + mul(f2.rep[i], f.rep[n-1-i], i+1); + f2.normalize(); + + zz_pX f3; + InvTrunc(f3, f1, n-1); + MulTrunc(f3, f3, f2, n-1); + + S.SetLength(n); + + S[0] = n; + for (i = 1; i < n; i++) + negate(S[i], coeff(f3, i-1)); +} + + +void PlainTraceVec(vec_zz_p& S, const zz_pX& ff) +{ + if (deg(ff) <= 0) + LogicError("TraceVec: bad args"); + + zz_pX f; + f = ff; + + MakeMonic(f); + + long n = deg(f); + + S.SetLength(n); + + if (n == 0) + return; + + long k, i; + zz_p acc, t; + + const zz_p *fp = f.rep.elts();; + zz_p *sp = S.elts(); + + sp[0] = n; + + for (k = 1; k < n; k++) { + mul(acc, fp[n-k], k); + + for (i = 1; i < k; i++) { + mul(t, fp[n-i], rep(sp[k-i])); + add(acc, acc, t); + } + + negate(sp[k], acc); + } +} + +void TraceVec(vec_zz_p& S, const zz_pX& f) +{ + if (deg(f) <= NTL_zz_pX_TRACE_CROSSOVER) + PlainTraceVec(S, f); + else + FastTraceVec(S, f); +} + +void ComputeTraceVec(vec_zz_p& S, const zz_pXModulus& F) +{ + if (!F.UseFFT) { + PlainTraceVec(S, F.f); + return; + } + + long i; + long n = F.n; + + fftRep R; + zz_pX P, g; + + g.rep.SetLength(n-1); + for (i = 1; i < n; i++) + mul(g.rep[n-i-1], F.f.rep[n-i], i); + g.normalize(); + + TofftRep(R, g, F.l); + mul(R, R, F.HRep); + FromfftRep(P, R, n-2, 2*n-4); + + S.SetLength(n); + + S[0] = n; + for (i = 1; i < n; i++) + negate(S[i], coeff(P, n-1-i)); +} + +void TraceMod(zz_p& x, const zz_pX& a, const zz_pXModulus& F) +{ + long n = F.n; + + if (deg(a) >= n) + LogicError("trace: bad args"); + + do { // NOTE: thread safe lazy init + Lazy::Builder builder(F.tracevec.val()); + if (!builder()) break; + UniquePtr p; + p.make(); + ComputeTraceVec(*p, F); + builder.move(p); + } while (0); + + InnerProduct(x, a.rep, *F.tracevec.val()); +} + + +void TraceMod(zz_p& x, const zz_pX& a, const zz_pX& f) +{ + if (deg(a) >= deg(f) || deg(f) <= 0) + LogicError("trace: bad args"); + + project(x, TraceVec(f), a); +} + + +void PlainResultant(zz_p& rres, const zz_pX& a, const zz_pX& b) +{ + zz_p res; + + if (IsZero(a) || IsZero(b)) + clear(res); + else if (deg(a) == 0 && deg(b) == 0) + set(res); + else { + long d0, d1, d2; + zz_p lc; + set(res); + + long n = max(deg(a),deg(b)) + 1; + zz_pX u(INIT_SIZE, n), v(INIT_SIZE, n); + + u = a; + v = b; + + for (;;) { + d0 = deg(u); + d1 = deg(v); + lc = LeadCoeff(v); + + PlainRem(u, u, v); + swap(u, v); + + d2 = deg(v); + if (d2 >= 0) { + power(lc, lc, d0-d2); + mul(res, res, lc); + if (d0 & d1 & 1) negate(res, res); + } + else { + if (d1 == 0) { + power(lc, lc, d0); + mul(res, res, lc); + } + else + clear(res); + + break; + } + } + } + + rres = res; +} + + +void ResIterHalfGCD(zz_pXMatrix& M_out, zz_pX& U, zz_pX& V, long d_red, + vec_zz_p& cvec, vec_long& dvec) +{ + M_out(0,0).SetMaxLength(d_red); + M_out(0,1).SetMaxLength(d_red); + M_out(1,0).SetMaxLength(d_red); + M_out(1,1).SetMaxLength(d_red); + + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + long goal = deg(U) - d_red; + + if (deg(V) <= goal) + return; + + zz_pX Q, t(INIT_SIZE, d_red); + + + while (deg(V) > goal) { + append(cvec, LeadCoeff(V)); + append(dvec, dvec[dvec.length()-1]-deg(U)+deg(V)); + PlainDivRem(Q, U, U, V); + swap(U, V); + + mul(t, Q, M_out(1,0)); + sub(t, M_out(0,0), t); + M_out(0,0) = M_out(1,0); + M_out(1,0) = t; + + mul(t, Q, M_out(1,1)); + sub(t, M_out(0,1), t); + M_out(0,1) = M_out(1,1); + M_out(1,1) = t; + } +} + + + +void ResHalfGCD(zz_pXMatrix& M_out, const zz_pX& U, const zz_pX& V, long d_red, + vec_zz_p& cvec, vec_long& dvec) +{ + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + set(M_out(0,0)); clear(M_out(0,1)); + clear(M_out(1,0)); set(M_out(1,1)); + + return; + } + + + long n = deg(U) - 2*d_red + 2; + if (n < 0) n = 0; + + zz_pX U1, V1; + + RightShift(U1, U, n); + RightShift(V1, V, n); + + if (d_red <= NTL_zz_pX_HalfGCD_CROSSOVER) { + ResIterHalfGCD(M_out, U1, V1, d_red, cvec, dvec); + return; + } + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + zz_pXMatrix M1; + + ResHalfGCD(M1, U1, V1, d1, cvec, dvec); + mul(U1, V1, M1); + + long d2 = deg(V1) - deg(U) + n + d_red; + + if (IsZero(V1) || d2 <= 0) { + M_out = M1; + return; + } + + + zz_pX Q; + zz_pXMatrix M2; + + append(cvec, LeadCoeff(V1)); + append(dvec, dvec[dvec.length()-1]-deg(U1)+deg(V1)); + DivRem(Q, U1, U1, V1); + swap(U1, V1); + + ResHalfGCD(M2, U1, V1, d2, cvec, dvec); + + zz_pX t(INIT_SIZE, deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,0)); + sub(t, M1(0,0), t); + swap(M1(0,0), M1(1,0)); + swap(M1(1,0), t); + + t.kill(); + + t.SetMaxLength(deg(M1(1,1))+deg(Q)+1); + + mul(t, Q, M1(1,1)); + sub(t, M1(0,1), t); + swap(M1(0,1), M1(1,1)); + swap(M1(1,1), t); + + t.kill(); + + mul(M_out, M2, M1); +} + +void ResHalfGCD(zz_pX& U, zz_pX& V, vec_zz_p& cvec, vec_long& dvec) +{ + long d_red = (deg(U)+1)/2; + + if (IsZero(V) || deg(V) <= deg(U) - d_red) { + return; + } + + long du = deg(U); + + + long d1 = (d_red + 1)/2; + if (d1 < 1) d1 = 1; + if (d1 >= d_red) d1 = d_red - 1; + + zz_pXMatrix M1; + + ResHalfGCD(M1, U, V, d1, cvec, dvec); + mul(U, V, M1); + + long d2 = deg(V) - du + d_red; + + if (IsZero(V) || d2 <= 0) { + return; + } + + M1(0,0).kill(); + M1(0,1).kill(); + M1(1,0).kill(); + M1(1,1).kill(); + + + zz_pX Q; + + append(cvec, LeadCoeff(V)); + append(dvec, dvec[dvec.length()-1]-deg(U)+deg(V)); + DivRem(Q, U, U, V); + swap(U, V); + + ResHalfGCD(M1, U, V, d2, cvec, dvec); + + mul(U, V, M1); +} + + +void resultant(zz_p& rres, const zz_pX& u, const zz_pX& v) +{ + if (deg(u) <= NTL_zz_pX_GCD_CROSSOVER || deg(v) <= NTL_zz_pX_GCD_CROSSOVER) { + PlainResultant(rres, u, v); + return; + } + + zz_pX u1, v1; + + u1 = u; + v1 = v; + + zz_p res, t; + set(res); + + if (deg(u1) == deg(v1)) { + rem(u1, u1, v1); + swap(u1, v1); + + if (IsZero(v1)) { + clear(rres); + return; + } + + power(t, LeadCoeff(u1), deg(u1) - deg(v1)); + mul(res, res, t); + if (deg(u1) & 1) + negate(res, res); + } + else if (deg(u1) < deg(v1)) { + swap(u1, v1); + if (deg(u1) & deg(v1) & 1) + negate(res, res); + } + + // deg(u1) > deg(v1) && v1 != 0 + + vec_zz_p cvec; + vec_long dvec; + + cvec.SetMaxLength(deg(v1)+2); + dvec.SetMaxLength(deg(v1)+2); + + append(cvec, LeadCoeff(u1)); + append(dvec, deg(u1)); + + + while (deg(u1) > NTL_zz_pX_GCD_CROSSOVER && !IsZero(v1)) { + ResHalfGCD(u1, v1, cvec, dvec); + + if (!IsZero(v1)) { + append(cvec, LeadCoeff(v1)); + append(dvec, deg(v1)); + rem(u1, u1, v1); + swap(u1, v1); + } + } + + if (IsZero(v1) && deg(u1) > 0) { + clear(rres); + return; + } + + long i, l; + l = dvec.length(); + + if (deg(u1) == 0) { + // we went all the way... + + for (i = 0; i <= l-3; i++) { + power(t, cvec[i+1], dvec[i]-dvec[i+2]); + mul(res, res, t); + if (dvec[i] & dvec[i+1] & 1) + negate(res, res); + } + + power(t, cvec[l-1], dvec[l-2]); + mul(res, res, t); + } + else { + for (i = 0; i <= l-3; i++) { + power(t, cvec[i+1], dvec[i]-dvec[i+2]); + mul(res, res, t); + if (dvec[i] & dvec[i+1] & 1) + negate(res, res); + } + + power(t, cvec[l-1], dvec[l-2]-deg(v1)); + mul(res, res, t); + if (dvec[l-2] & dvec[l-1] & 1) + negate(res, res); + + PlainResultant(t, u1, v1); + mul(res, res, t); + } + + rres = res; +} + +void NormMod(zz_p& x, const zz_pX& a, const zz_pX& f) +{ + if (deg(f) <= 0 || deg(a) >= deg(f)) + LogicError("norm: bad args"); + + if (IsZero(a)) { + clear(x); + return; + } + + zz_p t; + resultant(t, f, a); + if (!IsOne(LeadCoeff(f))) { + zz_p t1; + power(t1, LeadCoeff(f), deg(a)); + inv(t1, t1); + mul(t, t, t1); + } + + x = t; +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/lzz_pXCharPoly.c b/thirdparty/linux/ntl/src/lzz_pXCharPoly.c new file mode 100644 index 0000000000..c8d0c6bafc --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_pXCharPoly.c @@ -0,0 +1,77 @@ +#include + +#include + +NTL_START_IMPL + +static +void HessCharPoly(zz_pX& g, const zz_pX& a, const zz_pX& f) +{ + long n = deg(f); + if (n <= 0 || deg(a) >= n) + LogicError("HessCharPoly: bad args"); + + mat_zz_p M; + M.SetDims(n, n); + + long i, j; + + zz_pX t; + t = a; + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) + M[i][j] = coeff(t, j); + + if (i < n-1) + MulByXMod(t, t, f); + } + + CharPoly(g, M); +} + +void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& ff) +{ + zz_pX f = ff; + MakeMonic(f); + long n = deg(f); + + if (n <= 0 || deg(a) >= n) + LogicError("CharPoly: bad args"); + + if (IsZero(a)) { + clear(g); + SetCoeff(g, n); + return; + } + + if (n > 90 || (zz_p::PrimeCnt() <= 1 && n > 45)) { + zz_pX h; + MinPolyMod(h, a, f); + if (deg(h) == n) { + g = h; + return; + } + } + + if (zz_p::modulus() < n+1) { + HessCharPoly(g, a, f); + return; + } + + vec_zz_p u(INIT_SIZE, n+1), v(INIT_SIZE, n+1); + + zz_pX h, h1; + negate(h, a); + long i; + + for (i = 0; i <= n; i++) { + u[i] = i; + add(h1, h, u[i]); + resultant(v[i], f, h1); + } + + interpolate(g, u, v); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/lzz_pXFactoring.c b/thirdparty/linux/ntl/src/lzz_pXFactoring.c new file mode 100644 index 0000000000..91b01eca39 --- /dev/null +++ b/thirdparty/linux/ntl/src/lzz_pXFactoring.c @@ -0,0 +1,1925 @@ + +#include +#include +#include + +#include + +NTL_START_IMPL + + + +void SquareFreeDecomp(vec_pair_zz_pX_long& u, const zz_pX& ff) +{ + zz_pX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SquareFreeDecomp: bad args"); + + zz_pX r, t, v, tmp1; + long m, j, finished, done; + + u.SetLength(0); + + if (deg(f) == 0) + return; + + m = 1; + finished = 0; + + do { + j = 1; + diff(tmp1, f); + GCD(r, f, tmp1); + div(t, f, r); + + if (deg(t) > 0) { + done = 0; + do { + GCD(v, r, t); + div(tmp1, t, v); + if (deg(tmp1) > 0) append(u, cons(tmp1, j*m)); + if (deg(v) > 0) { + div(r, r, v); + t = v; + j++; + } + else + done = 1; + } while (!done); + if (deg(r) == 0) finished = 1; + } + + if (!finished) { + /* r is a p-th power */ + long p, k, d; + p = long(zz_p::modulus()); + d = deg(r)/p; + f.rep.SetLength(d+1); + for (k = 0; k <= d; k++) + f.rep[k] = r.rep[k*p]; + m = m*p; + } + } while (!finished); +} + + + +static +void NullSpace(long& r, vec_long& D, vec_vec_zz_p& M, long verbose) +{ + long k, l, n; + long i, j; + long pos; + zz_p t1, t2; + zz_p *x, *y; + + n = M.length(); + + D.SetLength(n); + for (j = 0; j < n; j++) D[j] = -1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + long T1, T2; + mulmod_precon_t T1pinv; + + r = 0; + + l = 0; + for (k = 0; k < n; k++) { + + if (verbose && k % 10 == 0) cerr << "+"; + + pos = -1; + for (i = l; i < n; i++) { + if (!IsZero(M[i][k])) { + pos = i; + break; + } + } + + if (pos != -1) { + swap(M[pos], M[l]); + + // make M[l, k] == -1 mod p + + inv(t1, M[l][k]); + negate(t1, t1); + for (j = k+1; j < n; j++) { + mul(M[l][j], M[l][j], t1); + } + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k] + + t1 = M[i][k]; + + T1 = rep(t1); + T1pinv = PrepMulModPrecon(T1, p, pinv); + + x = M[i].elts() + (k+1); + y = M[l].elts() + (k+1); + + for (j = k+1; j < n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + T2 = MulModPrecon(rep(*y), T1, p, T1pinv); + T2 = AddMod(T2, rep(*x), p); + (*x).LoopHole() = T2; + } + } + + D[k] = l; // variable k is defined by row l + l++; + + } + else { + r++; + } + } +} + + + +static +void BuildMatrix(mat_zz_p& M, + long n, const zz_pX& g, const zz_pXModulus& F, long verbose) +{ + zz_pXMultiplier G; + zz_pX h; + + M.SetDims(n, n); + + build(G, g, F); + + set(h); + for (long i = 0; i < n; i++) { + if (verbose && i % 10 == 0) cerr << "+"; + + VectorCopy(M[i], h, n); + + if (i < n-1) + MulMod(h, h, G, F); + } + + for (long i = 0; i < n; i++) + add(M[i][i], M[i][i], -1); + +} + + + +static +void RecFindRoots(vec_zz_p& x, const zz_pX& f) +{ + if (deg(f) == 0) return; + + if (deg(f) == 1) { + long k = x.length(); + x.SetLength(k+1); + negate(x[k], ConstTerm(f)); + return; + } + + zz_pX h; + + zz_p r; + + long p1 = zz_p::modulus() >> 1; + + { + zz_pXModulus F; + build(F, f); + + do { + random(r); + PowerXPlusAMod(h, r, p1, F); + add(h, h, -1); + GCD(h, h, f); + } while (deg(h) <= 0 || deg(h) == deg(f)); + } + + RecFindRoots(x, h); + div(h, f, h); + RecFindRoots(x, h); +} + +void FindRoots(vec_zz_p& x, const zz_pX& ff) +{ + zz_pX f = ff; + + x.SetMaxLength(deg(f)); + x.SetLength(0); + RecFindRoots(x, f); +} + + +static +void RandomBasisElt(zz_pX& g, mat_zz_p& ker) +{ + long r = ker.NumRows(); + long n = ker.NumCols(); + + vec_zz_p v; + v.SetLength(r); + for (long i = 0; i < r; i++) random(v[i]); + + mul(g.rep, v, ker); + g.normalize(); +} + + + +static +void split(zz_pX& f1, zz_pX& g1, zz_pX& f2, zz_pX& g2, + const zz_pX& f, const zz_pX& g, + const vec_zz_p& roots, long lo, long mid) +{ + long r = mid-lo+1; + + zz_pXModulus F; + build(F, f); + + vec_zz_p lroots(INIT_SIZE, r); + long i; + + for (i = 0; i < r; i++) + lroots[i] = roots[lo+i]; + + + zz_pX h, a, d; + BuildFromRoots(h, lroots); + CompMod(a, h, g, F); + + + GCD(f1, a, f); + + div(f2, f, f1); + + rem(g1, g, f1); + rem(g2, g, f2); +} + +static +void RecFindFactors(vec_zz_pX& factors, const zz_pX& f, const zz_pX& g, + const vec_zz_p& roots, long lo, long hi) +{ + long r = hi-lo+1; + + if (r == 0) return; + + if (r == 1) { + append(factors, f); + return; + } + + zz_pX f1, g1, f2, g2; + + long mid = (lo+hi)/2; + + split(f1, g1, f2, g2, f, g, roots, lo, mid); + + RecFindFactors(factors, f1, g1, roots, lo, mid); + RecFindFactors(factors, f2, g2, roots, mid+1, hi); +} + + +static +void FindFactors(vec_zz_pX& factors, const zz_pX& f, const zz_pX& g, + const vec_zz_p& roots) +{ + long r = roots.length(); + + factors.SetMaxLength(r); + factors.SetLength(0); + + RecFindFactors(factors, f, g, roots, 0, r-1); +} + +#if 0 + +static +void IterFindFactors(vec_zz_pX& factors, const zz_pX& f, + const zz_pX& g, const vec_zz_p& roots) +{ + long r = roots.length(); + long i; + zz_pX h; + + factors.SetLength(r); + + for (i = 0; i < r; i++) { + sub(h, g, roots[i]); + GCD(factors[i], f, h); + } +} + +#endif + + + +void SFBerlekamp(vec_zz_pX& factors, const zz_pX& ff, long verbose) +{ + zz_pX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SFBerlekamp: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + double t; + + long p; + + p = zz_p::modulus(); + + long n = deg(f); + + zz_pXModulus F; + + build(F, f); + + zz_pX g, h; + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + PowerXMod(g, p, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + mat_zz_p M, ker; + + if (verbose) { cerr << "building matrix..."; t = GetTime(); } + BuildMatrix(M, n, g, F, verbose); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + if (verbose) { cerr << "diagonalizing..."; t = GetTime(); } + kernel(ker, M); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + + M.kill(); + + long r = ker.NumRows(); + + if (verbose) cerr << "number of factors = " << r << "\n"; + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (verbose) { cerr << "factor extraction..."; t = GetTime(); } + + vec_zz_p roots; + + RandomBasisElt(g, ker); + MinPolyMod(h, g, F, r); + FindRoots(roots, h); + FindFactors(factors, f, g, roots); + + zz_pX g1; + vec_zz_pX S, S1; + long i; + + while (factors.length() < r) { + if (verbose) cerr << "+"; + RandomBasisElt(g, ker); + S.kill(); + for (i = 0; i < factors.length(); i++) { + const zz_pX& f = factors[i]; + if (deg(f) == 1) { + append(S, f); + continue; + } + build(F, f); + rem(g1, g, F); + if (deg(g1) <= 0) { + append(S, f); + continue; + } + MinPolyMod(h, g1, F, min(deg(f), r-factors.length()+1)); + FindRoots(roots, h); + S1.kill(); + FindFactors(S1, f, g1, roots); + append(S, S1); + } + swap(factors, S); + } + + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + if (verbose) { + cerr << "degrees:"; + long i; + for (i = 0; i < factors.length(); i++) + cerr << " " << deg(factors[i]); + cerr << "\n"; + } +} + + +void berlekamp(vec_pair_zz_pX_long& factors, const zz_pX& f, long verbose) +{ + double t; + vec_pair_zz_pX_long sfd; + vec_zz_pX x; + + if (!IsOne(LeadCoeff(f))) + LogicError("berlekamp: bad args"); + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFBerlekamp(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + + + +static +void AddFactor(vec_pair_zz_pX_long& factors, const zz_pX& g, long d, long verbose) +{ + if (verbose) + cerr << "degree=" << d << ", number=" << deg(g)/d << "\n"; + append(factors, cons(g, d)); +} + +static +void ProcessTable(zz_pX& f, vec_pair_zz_pX_long& factors, + const zz_pXModulus& F, long limit, const vec_zz_pX& tbl, + long d, long verbose) + +{ + if (limit == 0) return; + + if (verbose) cerr << "+"; + + zz_pX t1; + + if (limit == 1) { + GCD(t1, f, tbl[0]); + if (deg(t1) > 0) { + AddFactor(factors, t1, d, verbose); + div(f, f, t1); + } + + return; + } + + long i; + + t1 = tbl[0]; + for (i = 1; i < limit; i++) + MulMod(t1, t1, tbl[i], F); + + GCD(t1, f, t1); + + if (deg(t1) == 0) return; + + div(f, f, t1); + + zz_pX t2; + + i = 0; + d = d - limit + 1; + + while (2*d <= deg(t1)) { + GCD(t2, tbl[i], t1); + if (deg(t2) > 0) { + AddFactor(factors, t2, d, verbose); + div(t1, t1, t2); + } + + i++; + d++; + } + + if (deg(t1) > 0) + AddFactor(factors, t1, deg(t1), verbose); +} + + +void TraceMap(zz_pX& w, const zz_pX& a, long d, const zz_pXModulus& F, + const zz_pX& b) + +{ + if (d < 0) LogicError("TraceMap: bad args"); + + zz_pX y, z, t; + + z = b; + y = a; + clear(w); + + while (d) { + if (d == 1) { + if (IsZero(w)) + w = y; + else { + CompMod(w, w, z, F); + add(w, w, y); + } + } + else if ((d & 1) == 0) { + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else if (IsZero(w)) { + w = y; + Comp2Mod(z, t, z, y, z, F); + add(y, t, y); + } + else { + Comp3Mod(z, t, w, z, y, w, z, F); + add(w, w, y); + add(y, t, y); + } + + d = d >> 1; + } +} + + +void PowerCompose(zz_pX& y, const zz_pX& h, long q, const zz_pXModulus& F) +{ + if (q < 0) LogicError("PowerCompose: bad args"); + + zz_pX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y); + + while (q) { + sw = 0; + + if (q > 1) sw = 2; + if (q & 1) { + if (IsX(y)) + y = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y, y, z, F); + break; + + case 2: + CompMod(z, z, z, F); + break; + + case 3: + Comp2Mod(y, z, y, z, z, F); + break; + } + + q = q >> 1; + } +} + + +long ProbIrredTest(const zz_pX& f, long iter) +{ + long n = deg(f); + + if (n <= 0) return 0; + if (n == 1) return 1; + + long p; + p = zz_p::modulus(); + + zz_pXModulus F; + + build(F, f); + + zz_pX b, r, s; + + PowerXMod(b, p, F); + + long i; + + for (i = 0; i < iter; i++) { + random(r, n); + TraceMap(s, r, n, F, b); + + if (deg(s) > 0) return 0; + } + + if (p >= n) return 1; + + if (n % p != 0) return 1; + + PowerCompose(s, b, n/p, F); + return !IsX(s); +} + +NTL_CHEAP_THREAD_LOCAL long zz_pX_BlockingFactor = 10; + +void DDF(vec_pair_zz_pX_long& factors, const zz_pX& ff, const zz_pX& hh, + long verbose) +{ + zz_pX f = ff; + zz_pX h = hh; + + if (!IsOne(LeadCoeff(f))) + LogicError("DDF: bad args"); + + factors.SetLength(0); + + if (deg(f) == 0) + return; + + if (deg(f) == 1) { + AddFactor(factors, f, 1, verbose); + return; + } + + long CompTableSize = 2*SqrRoot(deg(f)); + + long GCDTableSize = zz_pX_BlockingFactor; + + zz_pXModulus F; + build(F, f); + + zz_pXArgument H; + + build(H, h, F, min(CompTableSize, deg(f))); + + long i, d, limit, old_n; + zz_pX g, X; + + + vec_zz_pX tbl(INIT_SIZE, GCDTableSize); + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = GCDTableSize; + + + while (2*d <= deg(f)) { + + old_n = deg(f); + sub(tbl[i], g, X); + i++; + if (i == limit) { + ProcessTable(f, factors, F, i, tbl, d, verbose); + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + // we need to go further + + if (deg(f) < old_n) { + // f has changed + + build(F, f); + rem(h, h, f); + rem(g, g, f); + build(H, h, F, min(CompTableSize, deg(f))); + } + + CompMod(g, g, H, F); + } + } + + ProcessTable(f, factors, F, i, tbl, d-1, verbose); + + if (!IsOne(f)) AddFactor(factors, f, deg(f), verbose); +} + + + +void RootEDF(vec_zz_pX& factors, const zz_pX& f, long verbose) +{ + vec_zz_p roots; + double t; + + if (verbose) { cerr << "finding roots..."; t = GetTime(); } + FindRoots(roots, f); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + long r = roots.length(); + factors.SetLength(r); + for (long j = 0; j < r; j++) { + SetX(factors[j]); + sub(factors[j], factors[j], roots[j]); + } +} + +static +void EDFSplit(vec_zz_pX& v, const zz_pX& f, const zz_pX& b, long d) +{ + zz_pX a, g, h; + zz_pXModulus F; + vec_zz_p roots; + + build(F, f); + long n = F.n; + long r = n/d; + random(a, n); + TraceMap(g, a, d, F, b); + MinPolyMod(h, g, F, r); + FindRoots(roots, h); + FindFactors(v, f, g, roots); +} + +static +void RecEDF(vec_zz_pX& factors, const zz_pX& f, const zz_pX& b, long d, + long verbose) +{ + vec_zz_pX v; + long i; + + if (verbose) cerr << "+"; + + EDFSplit(v, f, b, d); + for (i = 0; i < v.length(); i++) { + if (deg(v[i]) == d) { + append(factors, v[i]); + } + else { + zz_pX bb; + rem(bb, b, v[i]); + RecEDF(factors, v[i], bb, d, verbose); + } + } +} + + +void EDF(vec_zz_pX& factors, const zz_pX& ff, const zz_pX& bb, + long d, long verbose) + +{ + zz_pX f = ff; + zz_pX b = bb; + + if (!IsOne(LeadCoeff(f))) + LogicError("EDF: bad args"); + + long n = deg(f); + long r = n/d; + + if (r == 0) { + factors.SetLength(0); + return; + } + + if (r == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + if (d == 1) { + RootEDF(factors, f, verbose); + return; + } + + + double t; + if (verbose) { + cerr << "computing EDF(" << d << "," << r << ")..."; + t = GetTime(); + } + + factors.SetLength(0); + + RecEDF(factors, f, b, d, verbose); + + if (verbose) cerr << (GetTime()-t) << "\n"; +} + + +void SFCanZass1(vec_pair_zz_pX_long& u, zz_pX& h, const zz_pX& f, long verbose) +{ + if (!IsOne(LeadCoeff(f)) || deg(f) == 0) + LogicError("SFCanZass1: bad args"); + + double t; + + long p = zz_p::modulus(); + + + zz_pXModulus F; + build(F, f); + + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + PowerXMod(h, p, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + if (verbose) { cerr << "computing DDF..."; t = GetTime(); } + NewDDF(u, f, h, verbose); + if (verbose) { + t = GetTime()-t; + cerr << "DDF time: " << t << "\n"; + } +} + +void SFCanZass2(vec_zz_pX& factors, const vec_pair_zz_pX_long& u, + const zz_pX& h, long verbose) +{ + zz_pX hh; + vec_zz_pX v; + + factors.SetLength(0); + + long i; + for (i = 0; i < u.length(); i++) { + const zz_pX& g = u[i].a; + long d = u[i].b; + long r = deg(g)/d; + + if (r == 1) { + // g is already irreducible + + append(factors, g); + } + else { + // must perform EDF + + if (d == 1) { + // root finding + RootEDF(v, g, verbose); + append(factors, v); + } + else { + // general case + rem(hh, h, g); + EDF(v, g, hh, d, verbose); + append(factors, v); + } + } + } +} + + +void SFCanZass(vec_zz_pX& factors, const zz_pX& ff, long verbose) +{ + zz_pX f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("SFCanZass: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(1); + factors[0] = f; + return; + } + + factors.SetLength(0); + + double t; + + long p = zz_p::modulus(); + + + zz_pXModulus F; + build(F, f); + + zz_pX h; + + if (verbose) { cerr << "computing X^p..."; t = GetTime(); } + PowerXMod(h, p, F); + if (verbose) { cerr << (GetTime()-t) << "\n"; } + + vec_pair_zz_pX_long u; + if (verbose) { cerr << "computing DDF..."; t = GetTime(); } + NewDDF(u, f, h, verbose); + if (verbose) { + t = GetTime()-t; + cerr << "DDF time: " << t << "\n"; + } + + zz_pX hh; + vec_zz_pX v; + + long i; + for (i = 0; i < u.length(); i++) { + const zz_pX& g = u[i].a; + long d = u[i].b; + long r = deg(g)/d; + + if (r == 1) { + // g is already irreducible + + append(factors, g); + } + else { + // must perform EDF + + if (d == 1) { + // root finding + RootEDF(v, g, verbose); + append(factors, v); + } + else { + // general case + rem(hh, h, g); + EDF(v, g, hh, d, verbose); + append(factors, v); + } + } + } +} + +void CanZass(vec_pair_zz_pX_long& factors, const zz_pX& f, long verbose) +{ + if (!IsOne(LeadCoeff(f))) + LogicError("CanZass: bad args"); + + double t; + vec_pair_zz_pX_long sfd; + vec_zz_pX x; + + if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); } + SquareFreeDecomp(sfd, f); + if (verbose) cerr << (GetTime()-t) << "\n"; + + factors.SetLength(0); + + long i, j; + + for (i = 0; i < sfd.length(); i++) { + if (verbose) { + cerr << "factoring multiplicity " << sfd[i].b + << ", deg = " << deg(sfd[i].a) << "\n"; + } + + SFCanZass(x, sfd[i].a, verbose); + + for (j = 0; j < x.length(); j++) + append(factors, cons(x[j], sfd[i].b)); + } +} + +void mul(zz_pX& f, const vec_pair_zz_pX_long& v) +{ + long i, j, n; + + n = 0; + for (i = 0; i < v.length(); i++) + n += v[i].b*deg(v[i].a); + + zz_pX g(INIT_SIZE, n+1); + + set(g); + for (i = 0; i < v.length(); i++) + for (j = 0; j < v[i].b; j++) { + mul(g, g, v[i].a); + } + + f = g; +} + + + + +static +long BaseCase(const zz_pX& h, long q, long a, const zz_pXModulus& F) +{ + long b, e; + zz_pX lh(INIT_SIZE, F.n); + + lh = h; + b = 1; + e = 0; + while (e < a-1 && !IsX(lh)) { + e++; + b *= q; + PowerCompose(lh, lh, q, F); + } + + if (!IsX(lh)) b *= q; + + return b; +} + + + +void TandemPowerCompose(zz_pX& y1, zz_pX& y2, const zz_pX& h, + long q1, long q2, const zz_pXModulus& F) +{ + zz_pX z(INIT_SIZE, F.n); + long sw; + + z = h; + SetX(y1); + SetX(y2); + + while (q1 || q2) { + sw = 0; + + if (q1 > 1 || q2 > 1) sw = 4; + + if (q1 & 1) { + if (IsX(y1)) + y1 = z; + else + sw = sw | 2; + } + + if (q2 & 1) { + if (IsX(y2)) + y2 = z; + else + sw = sw | 1; + } + + switch (sw) { + case 0: + break; + + case 1: + CompMod(y2, y2, z, F); + break; + + case 2: + CompMod(y1, y1, z, F); + break; + + case 3: + Comp2Mod(y1, y2, y1, y2, z, F); + break; + + case 4: + CompMod(z, z, z, F); + break; + + case 5: + Comp2Mod(z, y2, z, y2, z, F); + break; + + case 6: + Comp2Mod(z, y1, z, y1, z, F); + break; + + case 7: + Comp3Mod(z, y1, y2, z, y1, y2, z, F); + break; + } + + q1 = q1 >> 1; + q2 = q2 >> 1; + } +} + + + +long RecComputeDegree(long u, const zz_pX& h, const zz_pXModulus& F, + FacVec& fvec) +{ + if (IsX(h)) return 1; + + if (fvec[u].link == -1) return BaseCase(h, fvec[u].q, fvec[u].a, F); + + zz_pX h1, h2; + long q1, q2, r1, r2; + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + r1 = RecComputeDegree(fvec[u].link, h2, F, fvec); + r2 = RecComputeDegree(fvec[u].link+1, h1, F, fvec); + return r1*r2; +} + + + + +long ComputeDegree(const zz_pX& h, const zz_pXModulus& F) + // f = F.f is assumed to be an "equal degree" polynomial + // h = X^p mod f + // the common degree of the irreducible factors of f is computed +{ + if (F.n == 1 || IsX(h)) + return 1; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecComputeDegree(fvec.length()-1, h, F, fvec); +} + +long ProbComputeDegree(const zz_pX& h, const zz_pXModulus& F) +{ + if (F.n == 1 || IsX(h)) + return 1; + + long n = F.n; + + zz_pX P1, P2, P3; + + random(P1, n); + TraceMap(P2, P1, n, F, h); + ProbMinPolyMod(P3, P2, F, n/2); + + long r = deg(P3); + + if (r <= 0 || n % r != 0) + return 0; + else + return n/r; +} + + + +void FindRoot(zz_p& root, const zz_pX& ff) +// finds a root of ff. +// assumes that ff is monic and splits into distinct linear factors + +{ + zz_pXModulus F; + zz_pX h, h1, f; + zz_p r; + long p1; + + + f = ff; + + if (!IsOne(LeadCoeff(f))) + LogicError("FindRoot: bad args"); + + if (deg(f) == 0) + LogicError("FindRoot: bad args"); + + p1 = zz_p::modulus() >> 1; + h1 = 1; + + while (deg(f) > 1) { + build(F, f); + random(r); + PowerXPlusAMod(h, r, p1, F); + sub(h, h, h1); + GCD(h, h, f); + if (deg(h) > 0 && deg(h) < deg(f)) { + if (deg(h) > deg(f)/2) + div(f, f, h); + else + f = h; + } + } + + negate(root, ConstTerm(f)); +} + + +static +long power(long a, long e) +{ + long i, res; + + res = 1; + for (i = 1; i <= e; i++) + res = res * a; + + return res; +} + + +static +long IrredBaseCase(const zz_pX& h, long q, long a, const zz_pXModulus& F) +{ + long e; + zz_pX X, s, d; + + e = power(q, a-1); + PowerCompose(s, h, e, F); + SetX(X); + sub(s, s, X); + GCD(d, F.f, s); + return IsOne(d); +} + + +static +long RecIrredTest(long u, const zz_pX& h, const zz_pXModulus& F, + const FacVec& fvec) +{ + long q1, q2; + zz_pX h1, h2; + + if (IsX(h)) return 0; + + if (fvec[u].link == -1) { + return IrredBaseCase(h, fvec[u].q, fvec[u].a, F); + } + + + q1 = fvec[fvec[u].link].val; + q2 = fvec[fvec[u].link+1].val; + + TandemPowerCompose(h1, h2, h, q1, q2, F); + return RecIrredTest(fvec[u].link, h2, F, fvec) + && RecIrredTest(fvec[u].link+1, h1, F, fvec); +} + +long DetIrredTest(const zz_pX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + zz_pXModulus F; + + build(F, f); + + zz_pX h; + + PowerXMod(h, zz_p::modulus(), F); + + zz_pX s; + PowerCompose(s, h, F.n, F); + if (!IsX(s)) return 0; + + FacVec fvec; + + FactorInt(fvec, F.n); + + return RecIrredTest(fvec.length()-1, h, F, fvec); +} + + + +long IterIrredTest(const zz_pX& f) +{ + if (deg(f) <= 0) return 0; + if (deg(f) == 1) return 1; + + zz_pXModulus F; + + build(F, f); + + zz_pX h; + + PowerXMod(h, zz_p::modulus(), F); + + long rootn = SqrRoot(deg(f)); + + long CompTableSize = 2*rootn; + + zz_pXArgument H; + + long UseModComp = 1; + + if (NumBits(zz_p::modulus()) < rootn/2) + UseModComp = 0; + + if (UseModComp) build(H, h, F, CompTableSize); + + long i, d, limit, limit_sqr; + zz_pX g, X, t, prod; + + + SetX(X); + + i = 0; + g = h; + d = 1; + limit = 2; + limit_sqr = limit*limit; + + set(prod); + + + while (2*d <= deg(f)) { + sub(t, g, X); + MulMod(prod, prod, t, F); + i++; + if (i == limit_sqr) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + + set(prod); + limit++; + limit_sqr = limit*limit; + i = 0; + } + + d = d + 1; + if (2*d <= deg(f)) { + if (UseModComp) + CompMod(g, g, H, F); + else + PowerMod(g, g, zz_p::modulus(), F); + } + } + + if (i > 0) { + GCD(t, f, prod); + if (!IsOne(t)) return 0; + } + + return 1; +} + + +static +void MulByXPlusY(vec_zz_pX& h, const zz_pX& f, const zz_pX& g) +// h represents the bivariate polynomial h[0] + h[1]*Y + ... + h[n-1]*Y^k, +// where the h[i]'s are polynomials in X, each of degree < deg(f), +// and k < deg(g). +// h is replaced by the bivariate polynomial h*(X+Y) (mod f(X), g(Y)). + +{ + long n = deg(g); + long k = h.length()-1; + + if (k < 0) return; + + if (k < n-1) { + h.SetLength(k+2); + h[k+1] = h[k]; + for (long i = k; i >= 1; i--) { + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + } + MulByXMod(h[0], h[0], f); + } + else { + zz_pX b, t; + + b = h[n-1]; + for (long i = n-1; i >= 1; i--) { + mul(t, b, g.rep[i]); + MulByXMod(h[i], h[i], f); + add(h[i], h[i], h[i-1]); + sub(h[i], h[i], t); + } + mul(t, b, g.rep[0]); + MulByXMod(h[0], h[0], f); + sub(h[0], h[0], t); + } + + // normalize + + k = h.length()-1; + while (k >= 0 && IsZero(h[k])) k--; + h.SetLength(k+1); +} + + + +static +void IrredCombine(zz_pX& x, const zz_pX& f, const zz_pX& g) +{ + if (deg(f) < deg(g)) { + IrredCombine(x, g, f); + return; + } + + // deg(f) >= deg(g)...not necessary, but maybe a little more + // time & space efficient + + long df = deg(f); + long dg = deg(g); + long m = df*dg; + + vec_zz_pX h(INIT_SIZE, dg); + + long i; + for (i = 0; i < dg; i++) h[i].SetMaxLength(df); + + h.SetLength(1); + set(h[0]); + + vec_zz_p a; + + a.SetLength(2*m); + + for (i = 0; i < 2*m; i++) { + a[i] = ConstTerm(h[0]); + if (i < 2*m-1) + MulByXPlusY(h, f, g); + } + + MinPolySeq(x, a, m); +} + +static +void BuildPrimePowerIrred(zz_pX& f, long q, long e) +{ + long n = power(q, e); + + do { + random(f, n); + SetCoeff(f, n); + } while (!IterIrredTest(f)); +} + +static +void RecBuildIrred(zz_pX& f, long u, const FacVec& fvec) +{ + if (fvec[u].link == -1) + BuildPrimePowerIrred(f, fvec[u].q, fvec[u].a); + else { + zz_pX g, h; + RecBuildIrred(g, fvec[u].link, fvec); + RecBuildIrred(h, fvec[u].link+1, fvec); + IrredCombine(f, g, h); + } +} + + +void BuildIrred(zz_pX& f, long n) +{ + if (n <= 0) + LogicError("BuildIrred: n must be positive"); + + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in BuildIrred"); + + if (n == 1) { + SetX(f); + return; + } + + FacVec fvec; + + FactorInt(fvec, n); + + RecBuildIrred(f, fvec.length()-1, fvec); +} + + + +void BuildRandomIrred(zz_pX& f, const zz_pX& g) +{ + zz_pXModulus G; + zz_pX h, ff; + + build(G, g); + do { + random(h, deg(g)); + IrredPolyMod(ff, h, G); + } while (deg(ff) < deg(g)); + + f = ff; +} + + +/************* NEW DDF ****************/ + +NTL_CHEAP_THREAD_LOCAL long zz_pX_GCDTableSize = 4; +static NTL_CHEAP_THREAD_LOCAL vec_zz_pX *BabyStepFile = 0; +static NTL_CHEAP_THREAD_LOCAL vec_zz_pX *GiantStepFile = 0; +static NTL_CHEAP_THREAD_LOCAL zz_pXArgument *HHH = 0; +static NTL_CHEAP_THREAD_LOCAL zz_pXAltArgument *HHH1 = 0; +static NTL_CHEAP_THREAD_LOCAL long OldN = 0; + + +static +void GenerateBabySteps(zz_pX& h1, const zz_pX& f, const zz_pX& h, long k, + long verbose) +{ + double t; + + if (verbose) { cerr << "generating baby steps..."; t = GetTime(); } + + zz_pXModulus F; + build(F, f); + + + (*BabyStepFile).SetLength(k-1); + + h1 = h; + + long i; + + long rootn = SqrRoot(F.n); + + if (NumBits(zz_p::modulus()) < rootn/2) { + for (i = 1; i <= k-1; i++) { + (*BabyStepFile)(i) = h1; + + PowerMod(h1, h1, zz_p::modulus(), F); + if (verbose) cerr << "+"; + } + } + else { + zz_pXArgument H; + build(H, h, F, 2*rootn); + + zz_pXAltArgument H1; + build(H1, H, F); + + + for (i = 1; i <= k-1; i++) { + (*BabyStepFile)(i) = h1; + + CompMod(h1, h1, H1, F); + if (verbose) cerr << "."; + } + } + + if (verbose) + cerr << (GetTime()-t) << "\n"; +} + + + +static +void GenerateGiantSteps(const zz_pX& f, const zz_pX& h, long l, long verbose) +{ + zz_pXModulus F; + + build(F, f); + + build(*HHH, h, F, 2*SqrRoot(F.n)); + build(*HHH1, *HHH, F); + + OldN = F.n; + + (*GiantStepFile).SetLength(1); + (*GiantStepFile)(1) = h; +} + + +static +void NewAddFactor(vec_pair_zz_pX_long& u, const zz_pX& g, long m, long verbose) +{ + long len = u.length(); + + u.SetLength(len+1); + u[len].a = g; + u[len].b = m; + + if (verbose) { + cerr << "split " << m << " " << deg(g) << "\n"; + } +} + + + + +static +void NewProcessTable(vec_pair_zz_pX_long& u, zz_pX& f, const zz_pXModulus& F, + vec_zz_pX& buf, long size, long StartInterval, + long IntervalLength, long verbose) + +{ + if (size == 0) return; + + zz_pX& g = buf[size-1]; + + long i; + + for (i = 0; i < size-1; i++) + MulMod(g, g, buf[i], F); + + GCD(g, f, g); + + if (deg(g) == 0) return; + + div(f, f, g); + + long d = (StartInterval-1)*IntervalLength + 1; + i = 0; + long interval = StartInterval; + + while (i < size-1 && 2*d <= deg(g)) { + GCD(buf[i], buf[i], g); + if (deg(buf[i]) > 0) { + NewAddFactor(u, buf[i], interval, verbose); + div(g, g, buf[i]); + } + + i++; + interval++; + d += IntervalLength; + } + + if (deg(g) > 0) { + if (i == size-1) + NewAddFactor(u, g, interval, verbose); + else + NewAddFactor(u, g, (deg(g)+IntervalLength-1)/IntervalLength, verbose); + } +} + + +static +void FetchGiantStep(zz_pX& g, long gs, const zz_pXModulus& F) +{ + long l = (*GiantStepFile).length(); + zz_pX last; + + if (gs > l+1) + LogicError("bad arg to FetchGiantStep"); + + if (gs == l+1) { + last = (*GiantStepFile)(l); + if (F.n < OldN) { + rem(last, last, F); + for (long i = 0; i < (*HHH).H.length(); i++) + rem((*HHH).H[i], (*HHH).H[i], F); + build(*HHH1, *HHH, F); + OldN = F.n; + } + + (*GiantStepFile).SetLength(l+1); + CompMod((*GiantStepFile)(l+1), last, *HHH1, F); + g = (*GiantStepFile)(l+1); + } + else if (deg((*GiantStepFile)(gs)) >= F.n) + rem(g, (*GiantStepFile)(gs), F); + else + g = (*GiantStepFile)(gs); +} + + +static +void FetchBabySteps(vec_zz_pX& v, long k) +{ + v.SetLength(k); + + SetX(v[0]); + + long i; + for (i = 1; i <= k-1; i++) { + v[i] = (*BabyStepFile)(i); + } +} + + + +static +void GiantRefine(vec_pair_zz_pX_long& u, const zz_pX& ff, long k, long l, + long verbose) + +{ + double t; + + if (verbose) { + cerr << "giant refine..."; + t = GetTime(); + } + + u.SetLength(0); + + vec_zz_pX BabyStep; + + FetchBabySteps(BabyStep, k); + + vec_zz_pX buf(INIT_SIZE, zz_pX_GCDTableSize); + + zz_pX f; + f = ff; + + zz_pXModulus F; + build(F, f); + + zz_pX g; + zz_pX h; + + long size = 0; + + long first_gs; + + long d = 1; + + while (2*d <= deg(f)) { + + long old_n = deg(f); + + long gs = (d+k-1)/k; + long bs = gs*k - d; + + if (bs == k-1) { + size++; + if (size == 1) first_gs = gs; + FetchGiantStep(g, gs, F); + sub(buf[size-1], g, BabyStep[bs]); + } + else { + sub(h, g, BabyStep[bs]); + MulMod(buf[size-1], buf[size-1], h, F); + } + + if (verbose && bs == 0) cerr << "+"; + + if (size == zz_pX_GCDTableSize && bs == 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + size = 0; + } + + d++; + + if (2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + + long i; + for (i = 1; i <= k-1; i++) + rem(BabyStep[i], BabyStep[i], F); + } + } + + if (size > 0) { + NewProcessTable(u, f, F, buf, size, first_gs, k, verbose); + if (verbose) cerr << "*"; + } + + if (deg(f) > 0) + NewAddFactor(u, f, 0, verbose); + + if (verbose) { + t = GetTime()-t; + cerr << "giant refine time: " << t << "\n"; + } +} + + +static +void IntervalRefine(vec_pair_zz_pX_long& factors, const zz_pX& ff, + long k, long gs, const vec_zz_pX& BabyStep, long verbose) + +{ + vec_zz_pX buf(INIT_SIZE, zz_pX_GCDTableSize); + + zz_pX f; + f = ff; + + zz_pXModulus F; + build(F, f); + + zz_pX g; + + FetchGiantStep(g, gs, F); + + long size = 0; + + long first_d; + + long d = (gs-1)*k + 1; + long bs = k-1; + + while (bs >= 0 && 2*d <= deg(f)) { + + long old_n = deg(f); + + if (size == 0) first_d = d; + rem(buf[size], BabyStep[bs], F); + sub(buf[size], buf[size], g); + size++; + + if (size == zz_pX_GCDTableSize) { + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + size = 0; + } + + d++; + bs--; + + if (bs >= 0 && 2*d <= deg(f) && deg(f) < old_n) { + build(F, f); + rem(g, g, F); + } + } + + NewProcessTable(factors, f, F, buf, size, first_d, 1, verbose); + + if (deg(f) > 0) + NewAddFactor(factors, f, deg(f), verbose); +} + + + + +static +void BabyRefine(vec_pair_zz_pX_long& factors, const vec_pair_zz_pX_long& u, + long k, long l, long verbose) + +{ + double t; + + if (verbose) { + cerr << "baby refine..."; + t = GetTime(); + } + + factors.SetLength(0); + + vec_zz_pX BabyStep; + + long i; + for (i = 0; i < u.length(); i++) { + const zz_pX& g = u[i].a; + long gs = u[i].b; + + if (gs == 0 || 2*((gs-1)*k+1) > deg(g)) + NewAddFactor(factors, g, deg(g), verbose); + else { + if (BabyStep.length() == 0) + FetchBabySteps(BabyStep, k); + IntervalRefine(factors, g, k, gs, BabyStep, verbose); + } + } + + if (verbose) { + t = GetTime()-t; + cerr << "baby refine time: " << t << "\n"; + } +} + + + + + +void NewDDF(vec_pair_zz_pX_long& factors, + const zz_pX& f, + const zz_pX& h, + long verbose) + +{ + if (!IsOne(LeadCoeff(f))) + LogicError("NewDDF: bad args"); + + if (deg(f) == 0) { + factors.SetLength(0); + return; + } + + if (deg(f) == 1) { + factors.SetLength(0); + append(factors, cons(f, 1L)); + return; + } + + long B = deg(f)/2; + + long k = SqrRoot(B); + + // we double the number of baby steps if it seems like + // baby steps are significantly cheaper than giant steps. + // The calculations below are closely tied to a test in GenerateBabySteps: + // if nbm >= sdf/2, then scale should be 1 (baby steps and giant steps balanced) + if (B >= 500) { + long sdf = SqrRoot(deg(f)); + long nbm = NumBits(zz_p::modulus()); + double scale = 0.25*double(sdf)/double(nbm); + if (scale < 1) scale = 1; + if (scale > 2) scale = 2; + k = long(scale*k); + } + + long l = (B+k-1)/k; + + vec_zz_pX local_BabyStepFile; + vec_zz_pX local_GiantStepFile; + zz_pXArgument local_HHH; + zz_pXAltArgument local_HHH1; + + BabyStepFile = &local_BabyStepFile; + GiantStepFile = &local_GiantStepFile; + HHH = &local_HHH; + HHH1 = &local_HHH1; + + zz_pX h1; + GenerateBabySteps(h1, f, h, k, verbose); + GenerateGiantSteps(f, h1, l, verbose); + + vec_pair_zz_pX_long u; + GiantRefine(u, f, k, l, verbose); + BabyRefine(factors, u, k, l, verbose); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mach_desc.win b/thirdparty/linux/ntl/src/mach_desc.win new file mode 100644 index 0000000000..4aaed81968 --- /dev/null +++ b/thirdparty/linux/ntl/src/mach_desc.win @@ -0,0 +1,631 @@ +#ifndef NTL_mach_desc__H +#define NTL_mach_desc__H + + +#define NTL_BITS_PER_LONG (32) +#define NTL_MAX_LONG (2147483647L) +#define NTL_MAX_INT (2147483647) +#define NTL_BITS_PER_INT (32) +#define NTL_BITS_PER_SIZE_T (32) +#define NTL_ARITH_RIGHT_SHIFT (1) +#define NTL_NBITS_MAX (30) +#define NTL_WNBITS_MAX (30) +#define NTL_DOUBLE_PRECISION (53) +#define NTL_FDOUBLE_PRECISION (((double)(1L<<30))*((double)(1L<<22))) +#define NTL_LONGDOUBLE_OK (0) +#define NTL_WIDE_DOUBLE_DP ((wide_double(1L<<52))) +#define NTL_QUAD_FLOAT_SPLIT ((((double)(1L<<27)))+1.0) +#define NTL_EXT_DOUBLE (0) +#define NTL_FMA_DETECTED (1) +#define NTL_BIG_POINTERS (1) +#define NTL_MIN_LONG (-NTL_MAX_LONG - 1L) +#define NTL_MIN_INT (-NTL_MAX_INT - 1) + + + +#define NTL_BB_MUL_CODE0 \ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[8];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + lo = A[b & 7]; t = A[(b >> 3) & 7]; hi = t >> 29; lo ^= t << 3;\ + t = A[(b >> 6) & 7]; hi ^= t >> 26; lo ^= t << 6;\ + t = A[(b >> 9) & 7]; hi ^= t >> 23; lo ^= t << 9;\ + t = A[(b >> 12) & 7]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[(b >> 15) & 7]; hi ^= t >> 17; lo ^= t << 15;\ + t = A[(b >> 18) & 7]; hi ^= t >> 14; lo ^= t << 18;\ + t = A[(b >> 21) & 7]; hi ^= t >> 11; lo ^= t << 21;\ + t = A[(b >> 24) & 7]; hi ^= t >> 8; lo ^= t << 24;\ + t = A[(b >> 27) & 7]; hi ^= t >> 5; lo ^= t << 27;\ + t = A[b >> 30]; hi ^= t >> 2; lo ^= t << 30;\ + if (a >> 31) hi ^= ((b & 0xb6db6db6UL) >> 1);\ + if ((a >> 30) & 1) hi ^= ((b & 0x24924924UL) >> 2);\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 28; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 24; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 16; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 12; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 8; lo ^= t << 24;\ + t = A[b >> 28]; hi ^= t >> 4; lo ^= t << 28;\ + if (a >> 31) hi ^= ((b & 0xeeeeeeeeUL) >> 1);\ + if ((a >> 30) & 1) hi ^= ((b & 0xccccccccUL) >> 2);\ + if ((a >> 29) & 1) hi ^= ((b & 0x88888888UL) >> 3);\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_BB_MUL_CODE2 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 28; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 24; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 16; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 12; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 8; lo ^= t << 24;\ + t = A[b >> 28]; hi ^= t >> 4; lo ^= t << 28;\ + if (a >> 31) hi ^= ((b & 0xeeeeeeeeUL) >> 1);\ + if ((a >> 30) & 1) hi ^= ((b & 0xccccccccUL) >> 2);\ + if ((a >> 29) & 1) hi ^= ((b & 0x88888888UL) >> 3);\ + cp[i] ^= (carry ^ lo); carry = hi;\ + }\ + cp[sb] ^= carry;\ + + + + + +#define NTL_SHORT_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 28; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 24; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 16; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 12; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 8; lo ^= t << 24;\ + t = A[b >> 28]; hi ^= t >> 4; lo ^= t << 28;\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_HALF_BB_MUL_CODE0 \ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[4];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + lo = A[b & 3]; t = A[(b >> 2) & 3]; hi = t >> 30; lo ^= t << 2;\ + t = A[(b >> 4) & 3]; hi ^= t >> 28; lo ^= t << 4;\ + t = A[(b >> 6) & 3]; hi ^= t >> 26; lo ^= t << 6;\ + t = A[(b >> 8) & 3]; hi ^= t >> 24; lo ^= t << 8;\ + t = A[(b >> 10) & 3]; hi ^= t >> 22; lo ^= t << 10;\ + t = A[(b >> 12) & 3]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[b >> 14]; hi ^= t >> 18; lo ^= t << 14;\ + if (a >> 31) hi ^= ((b & 0xaaaaUL) >> 1);\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_ALT_BB_MUL_CODE0 \ + _ntl_ulong A[8];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + const _ntl_ulong t3 = A[(b >> 3) & 7]; \ + const _ntl_ulong t6 = A[(b >> 6) & 7]; \ + const _ntl_ulong t9 = A[(b >> 9) & 7]; \ + const _ntl_ulong t12 = A[(b >> 12) & 7]; \ + const _ntl_ulong t15 = A[(b >> 15) & 7]; \ + const _ntl_ulong t18 = A[(b >> 18) & 7]; \ + const _ntl_ulong t21 = A[(b >> 21) & 7]; \ + const _ntl_ulong t24 = A[(b >> 24) & 7]; \ + const _ntl_ulong t27 = A[(b >> 27) & 7]; \ + const _ntl_ulong t30 = A[b >> 30]; \ + const _ntl_ulong lo = A[b & 7] \ + ^ (t3 << 3)\ + ^ (t6 << 6)\ + ^ (t9 << 9)\ + ^ (t12 << 12)\ + ^ (t15 << 15)\ + ^ (t18 << 18)\ + ^ (t21 << 21)\ + ^ (t24 << 24)\ + ^ (t27 << 27)\ + ^ (t30 << 30);\ + const _ntl_ulong hi = (t3 >> 29)\ + ^ (t6 >> 26)\ + ^ (t9 >> 23)\ + ^ (t12 >> 20)\ + ^ (t15 >> 17)\ + ^ (t18 >> 14)\ + ^ (t21 >> 11)\ + ^ (t24 >> 8)\ + ^ (t27 >> 5)\ + ^ (t30 >> 2)\ + ^ (((b & 0xb6db6db6UL) >> 1) & (-(a >> 31)))\ + ^ (((b & 0x24924924UL) >> 2) & (-((a >> 30) & 1UL)));\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_ALT_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + const _ntl_ulong b = bp[i];\ + const _ntl_ulong t4 = A[(b >> 4) & 15]; \ + const _ntl_ulong t8 = A[(b >> 8) & 15]; \ + const _ntl_ulong t12 = A[(b >> 12) & 15]; \ + const _ntl_ulong t16 = A[(b >> 16) & 15]; \ + const _ntl_ulong t20 = A[(b >> 20) & 15]; \ + const _ntl_ulong t24 = A[(b >> 24) & 15]; \ + const _ntl_ulong t28 = A[b >> 28]; \ + const _ntl_ulong lo = A[b & 15] \ + ^ (t4 << 4)\ + ^ (t8 << 8)\ + ^ (t12 << 12)\ + ^ (t16 << 16)\ + ^ (t20 << 20)\ + ^ (t24 << 24)\ + ^ (t28 << 28);\ + const _ntl_ulong hi = (t4 >> 28)\ + ^ (t8 >> 24)\ + ^ (t12 >> 20)\ + ^ (t16 >> 16)\ + ^ (t20 >> 12)\ + ^ (t24 >> 8)\ + ^ (t28 >> 4)\ + ^ (((b & 0xeeeeeeeeUL) >> 1) & (-(a >> 31)))\ + ^ (((b & 0xccccccccUL) >> 2) & (-((a >> 30) & 1UL)))\ + ^ (((b & 0x88888888UL) >> 3) & (-((a >> 29) & 1UL)));\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_ALT_BB_MUL_CODE2 \ + long i;\ + _ntl_ulong carry = 0;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + const _ntl_ulong b = bp[i];\ + const _ntl_ulong t4 = A[(b >> 4) & 15]; \ + const _ntl_ulong t8 = A[(b >> 8) & 15]; \ + const _ntl_ulong t12 = A[(b >> 12) & 15]; \ + const _ntl_ulong t16 = A[(b >> 16) & 15]; \ + const _ntl_ulong t20 = A[(b >> 20) & 15]; \ + const _ntl_ulong t24 = A[(b >> 24) & 15]; \ + const _ntl_ulong t28 = A[b >> 28]; \ + const _ntl_ulong lo = A[b & 15] \ + ^ (t4 << 4)\ + ^ (t8 << 8)\ + ^ (t12 << 12)\ + ^ (t16 << 16)\ + ^ (t20 << 20)\ + ^ (t24 << 24)\ + ^ (t28 << 28);\ + const _ntl_ulong hi = (t4 >> 28)\ + ^ (t8 >> 24)\ + ^ (t12 >> 20)\ + ^ (t16 >> 16)\ + ^ (t20 >> 12)\ + ^ (t24 >> 8)\ + ^ (t28 >> 4)\ + ^ (((b & 0xeeeeeeeeUL) >> 1) & (-(a >> 31)))\ + ^ (((b & 0xccccccccUL) >> 2) & (-((a >> 30) & 1UL)))\ + ^ (((b & 0x88888888UL) >> 3) & (-((a >> 29) & 1UL)));\ + cp[i] ^= (carry ^ lo); carry = hi;\ + }\ + cp[sb] ^= carry;\ + + + + + +#define NTL_ALT_SHORT_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + const _ntl_ulong b = bp[i];\ + const _ntl_ulong t4 = A[(b >> 4) & 15]; \ + const _ntl_ulong t8 = A[(b >> 8) & 15]; \ + const _ntl_ulong t12 = A[(b >> 12) & 15]; \ + const _ntl_ulong t16 = A[(b >> 16) & 15]; \ + const _ntl_ulong t20 = A[(b >> 20) & 15]; \ + const _ntl_ulong t24 = A[(b >> 24) & 15]; \ + const _ntl_ulong t28 = A[b >> 28]; \ + const _ntl_ulong lo = A[b & 15] \ + ^ (t4 << 4)\ + ^ (t8 << 8)\ + ^ (t12 << 12)\ + ^ (t16 << 16)\ + ^ (t20 << 20)\ + ^ (t24 << 24)\ + ^ (t28 << 28);\ + const _ntl_ulong hi = (t4 >> 28)\ + ^ (t8 >> 24)\ + ^ (t12 >> 20)\ + ^ (t16 >> 16)\ + ^ (t20 >> 12)\ + ^ (t24 >> 8)\ + ^ (t28 >> 4);\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_ALT_HALF_BB_MUL_CODE0 \ + _ntl_ulong A[4];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + const _ntl_ulong t2 = A[(b >> 2) & 3]; \ + const _ntl_ulong t4 = A[(b >> 4) & 3]; \ + const _ntl_ulong t6 = A[(b >> 6) & 3]; \ + const _ntl_ulong t8 = A[(b >> 8) & 3]; \ + const _ntl_ulong t10 = A[(b >> 10) & 3]; \ + const _ntl_ulong t12 = A[(b >> 12) & 3]; \ + const _ntl_ulong t14 = A[b >> 14]; \ + const _ntl_ulong lo = A[b & 3] \ + ^ (t2 << 2)\ + ^ (t4 << 4)\ + ^ (t6 << 6)\ + ^ (t8 << 8)\ + ^ (t10 << 10)\ + ^ (t12 << 12)\ + ^ (t14 << 14);\ + const _ntl_ulong hi = (t2 >> 30)\ + ^ (t4 >> 28)\ + ^ (t6 >> 26)\ + ^ (t8 >> 24)\ + ^ (t10 >> 22)\ + ^ (t12 >> 20)\ + ^ (t14 >> 18)\ + ^ (((b & 0xaaaaUL) >> 1) & (-(a >> 31)));\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_ALT1_BB_MUL_CODE0 \ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[8];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + lo = A[b & 7]; t = A[(b >> 3) & 7]; hi = t >> 29; lo ^= t << 3;\ + t = A[(b >> 6) & 7]; hi ^= t >> 26; lo ^= t << 6;\ + t = A[(b >> 9) & 7]; hi ^= t >> 23; lo ^= t << 9;\ + t = A[(b >> 12) & 7]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[(b >> 15) & 7]; hi ^= t >> 17; lo ^= t << 15;\ + t = A[(b >> 18) & 7]; hi ^= t >> 14; lo ^= t << 18;\ + t = A[(b >> 21) & 7]; hi ^= t >> 11; lo ^= t << 21;\ + t = A[(b >> 24) & 7]; hi ^= t >> 8; lo ^= t << 24;\ + t = A[(b >> 27) & 7]; hi ^= t >> 5; lo ^= t << 27;\ + t = A[b >> 30]; hi ^= t >> 2; lo ^= t << 30;\ + hi ^= (((b & 0xb6db6db6UL) >> 1) & (-(a >> 31)))\ + ^ (((b & 0x24924924UL) >> 2) & (-((a >> 30) & 1UL)));\ + c[0] = lo; c[1] = hi;\ + + + + + +#define NTL_ALT1_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 28; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 24; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 16; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 12; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 8; lo ^= t << 24;\ + t = A[b >> 28]; hi ^= t >> 4; lo ^= t << 28;\ + hi ^= (((b & 0xeeeeeeeeUL) >> 1) & (-(a >> 31)))\ + ^ (((b & 0xccccccccUL) >> 2) & (-((a >> 30) & 1UL)))\ + ^ (((b & 0x88888888UL) >> 3) & (-((a >> 29) & 1UL)));\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_ALT1_BB_MUL_CODE2 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 28; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 24; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 16; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 12; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 8; lo ^= t << 24;\ + t = A[b >> 28]; hi ^= t >> 4; lo ^= t << 28;\ + hi ^= (((b & 0xeeeeeeeeUL) >> 1) & (-(a >> 31)))\ + ^ (((b & 0xccccccccUL) >> 2) & (-((a >> 30) & 1UL)))\ + ^ (((b & 0x88888888UL) >> 3) & (-((a >> 29) & 1UL)));\ + cp[i] ^= (carry ^ lo); carry = hi;\ + }\ + cp[sb] ^= carry;\ + + + + + +#define NTL_ALT1_SHORT_BB_MUL_CODE1 \ + long i;\ + _ntl_ulong carry = 0, b;\ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[16];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + A[4] = A[2] << 1;\ + A[5] = A[4] ^ A[1];\ + A[6] = A[3] << 1;\ + A[7] = A[6] ^ A[1];\ + A[8] = A[4] << 1;\ + A[9] = A[8] ^ A[1];\ + A[10] = A[5] << 1;\ + A[11] = A[10] ^ A[1];\ + A[12] = A[6] << 1;\ + A[13] = A[12] ^ A[1];\ + A[14] = A[7] << 1;\ + A[15] = A[14] ^ A[1];\ + for (i = 0; i < sb; i++) {\ + b = bp[i];\ + lo = A[b & 15]; t = A[(b >> 4) & 15]; hi = t >> 28; lo ^= t << 4;\ + t = A[(b >> 8) & 15]; hi ^= t >> 24; lo ^= t << 8;\ + t = A[(b >> 12) & 15]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[(b >> 16) & 15]; hi ^= t >> 16; lo ^= t << 16;\ + t = A[(b >> 20) & 15]; hi ^= t >> 12; lo ^= t << 20;\ + t = A[(b >> 24) & 15]; hi ^= t >> 8; lo ^= t << 24;\ + t = A[b >> 28]; hi ^= t >> 4; lo ^= t << 28;\ + cp[i] = carry ^ lo; carry = hi;\ + }\ + cp[sb] = carry;\ + + + + + +#define NTL_ALT1_HALF_BB_MUL_CODE0 \ + _ntl_ulong hi, lo, t;\ + _ntl_ulong A[4];\ + A[0] = 0;\ + A[1] = a;\ + A[2] = A[1] << 1;\ + A[3] = A[2] ^ A[1];\ + lo = A[b & 3]; t = A[(b >> 2) & 3]; hi = t >> 30; lo ^= t << 2;\ + t = A[(b >> 4) & 3]; hi ^= t >> 28; lo ^= t << 4;\ + t = A[(b >> 6) & 3]; hi ^= t >> 26; lo ^= t << 6;\ + t = A[(b >> 8) & 3]; hi ^= t >> 24; lo ^= t << 8;\ + t = A[(b >> 10) & 3]; hi ^= t >> 22; lo ^= t << 10;\ + t = A[(b >> 12) & 3]; hi ^= t >> 20; lo ^= t << 12;\ + t = A[b >> 14]; hi ^= t >> 18; lo ^= t << 14;\ + hi ^= (((b & 0xaaaaUL) >> 1) & (-(a >> 31)));\ + c[0] = lo; c[1] = hi;\ + + + +#define NTL_BB_MUL1_BITS (4) + + + + + +#define NTL_BB_SQR_CODE \ +lo=sqrtab[a&255];\ +lo=lo|(sqrtab[(a>>8)&255]<<16);\ +hi=sqrtab[(a>>16)&255];\ +hi=hi|(sqrtab[(a>>24)&255]<<16);\ + + + + +#define NTL_BB_REV_CODE (revtab[(a>>0)&255]<<24)\ +|(revtab[(a>>8)&255]<<16)\ +|(revtab[(a>>16)&255]<<8)\ +|(revtab[(a>>24)&255]<<0) + +#endif + diff --git a/thirdparty/linux/ntl/src/makefile b/thirdparty/linux/ntl/src/makefile new file mode 100644 index 0000000000..c3180d12a9 --- /dev/null +++ b/thirdparty/linux/ntl/src/makefile @@ -0,0 +1,575 @@ +############################################################### +# +# First, choose a C++ compiler, and set compiler flags. +# This is done by setting the variables CXX and CXXFLAGS. +# +############################################################### + + + +CXX=g++ +# A C++ compiler, e.g., g++, CC, xlC + + +CXXFLAGS=-g -O2 +# Flags for the C++ compiler + +CXXAUTOFLAGS= -march=native +# Flags for the C++ compiler, automatically generated by configuration script + + +AR=ar +# command to make a library + +ARFLAGS=ruv +# arguments for AR + +RANLIB=ranlib +# set to echo if you want to disable it completely + +LDFLAGS= +# libraries for linking C++ programs + +LDLIBS=-lm +# libraries for linking C++ programs + +CPPFLAGS= +# arguments for the C preprocessor + +LIBTOOL=libtool +# libtool command + +DEF_PREFIX=/usr/local + +PREFIX=$(DEF_PREFIX) +LIBDIR=$(PREFIX)/lib +INCLUDEDIR=$(PREFIX)/include +DOCDIR=$(PREFIX)/share/doc +# where to install NTL + +############################################################### +# +# Second, if you want to use GMP (the GNU Multi-Precision library), +# define the variables GMP_OPT_INCDIR, GMP_OPT_LIBDIR, GMP_OPT_LIB below. +# You also will have to set either NTL_GMP_LIP or NTL_GMP_HACK +# in the config.h file. +# +# Using GMP can lead to significant performance gains on some +# platforms. You can obtain GMP from http://www.swox.com/gmp. +# Once you unpack it into a directory, just execute +# ./configure; make +# in that directory. +# +############################################################### + + +GMP_PREFIX=$(DEF_PREFIX) + +GMP_INCDIR=$(GMP_PREFIX)/include +# directory containing gmp.h if using GMP + +GMP_LIBDIR=$(GMP_PREFIX)/lib +# directory containing libgmp.a if using GMP + +GMP_OPT_INCDIR=# -I$(GMP_INCDIR) # GMPI +GMP_OPT_LIBDIR=# -L$(GMP_LIBDIR) # GMPL +GMP_OPT_LIB=-lgmp # GMP +# uncomment these if using GMP + + +############################################################### +# +# Third, if you want to use gf2x (a library for fast +# multiplication over GF(2)[X]), you need to +# define the variables GF2X_OPT_INCDIR, GF2X_OPT_LIBDIR, GF2X_OPT_LIB below. +# You also will have to set NTL_GF2X_LIB +# in the config.h file. +# +############################################################### + +GF2X_PREFIX=$(DEF_PREFIX) + +GF2X_INCDIR=$(GF2X_PREFIX)/include +# directory containing gf2x.h if using gf2x + +GF2X_LIBDIR=$(GF2X_PREFIX)/lib +# directory containing libgf2x.a + +GF2X_OPT_INCDIR=# -I$(GF2X_INCDIR) # GF2X +GF2X_OPT_LIBDIR=# -L$(GF2X_LIBDIR) # GF2X +GF2X_OPT_LIB=# -lgf2x # GF2X +# uncomment these if using gf2x + + +############################################################### +# +# Fourth, if you do not want to run the wizard that automagically +# sets some performace related flags in config.h, set the flag below. +# +############################################################### + + +WIZARD=on +# Set to off if you want to bypass the wizard; otherwise, set to on. + + +################################################################# +# +# That's it! You can ignore everything else in this file! +# +################################################################# + + +# object files + +O01=FFT.o FacVec.o GF2.o GF2E.o GF2EX.o GF2EXFactoring.o GF2X.o GF2X1.o +O02=$(O01) GF2XFactoring.o GF2XVec.o GetTime.o GetPID.o HNF.o ctools.o LLL.o +O03=$(O02) LLL_FP.o LLL_QP.o LLL_RR.o LLL_XD.o RR.o WordVector.o ZZ.o ZZVec.o +O04=$(O03) ZZX.o ZZX1.o ZZXCharPoly.o ZZXFactoring.o ZZ_p.o ZZ_pE.o ZZ_pEX.o +O05=$(O04) ZZ_pEXFactoring.o ZZ_pX.o ZZ_pX1.o ZZ_pXCharPoly.o ZZ_pXFactoring.o +O06=$(O05) fileio.o lip.o lzz_p.o lzz_pE.o lzz_pEX.o lzz_pEXFactoring.o +O07=$(O06) lzz_pX.o lzz_pX1.o lzz_pXCharPoly.o lzz_pXFactoring.o +O08=$(O07) mat_GF2.o mat_GF2E.o mat_RR.o mat_ZZ.o mat_ZZ_p.o +O09=$(O08) mat_ZZ_pE.o mat_lzz_p.o mat_lzz_pE.o mat_poly_ZZ.o +O10=$(O09) mat_poly_ZZ_p.o mat_poly_lzz_p.o +O11=$(O10) +O12=$(O11) +O13=$(O12) quad_float.o tools.o vec_GF2.o vec_GF2E.o +O14=$(O13) vec_RR.o vec_ZZ.o vec_ZZ_p.o vec_ZZ_pE.o +O15=$(O14) vec_lzz_p.o vec_lzz_pE.o +O16=$(O15) +O17=$(O16) +O18=$(O17) xdouble.o +O19=$(O18) G_LLL_FP.o G_LLL_QP.o G_LLL_XD.o G_LLL_RR.o thread.o BasicThreadPool.o + +OBJ=$(O19) + +# library source files + + +S01=FFT.c FacVec.c GF2.c GF2E.c GF2EX.c GF2EXFactoring.c GF2X.c GF2X1.c +S02=$(S01) GF2XFactoring.c GF2XVec.c HNF.c ctools.c LLL.c LLL_FP.c LLL_QP.c +S03=$(S02) LLL_RR.c LLL_XD.c RR.c WordVector.c ZZ.c ZZVec.c ZZX.c ZZX1.c +S04=$(S03) ZZXCharPoly.c ZZXFactoring.c ZZ_p.c ZZ_pE.c ZZ_pEX.c +S05=$(S04) ZZ_pEXFactoring.c ZZ_pX.c ZZ_pX1.c ZZ_pXCharPoly.c +S06=$(S05) ZZ_pXFactoring.c fileio.c lip.c lzz_p.c lzz_pE.c lzz_pEX.c +S07=$(S06) lzz_pEXFactoring.c lzz_pX.c lzz_pX1.c +S08=$(S07) lzz_pXCharPoly.c lzz_pXFactoring.c mat_GF2.c mat_GF2E.c +S09=$(S08) mat_RR.c mat_ZZ.c mat_ZZ_p.c mat_ZZ_pE.c mat_lzz_p.c mat_lzz_pE.c +S10=$(S09) mat_poly_ZZ.c mat_poly_ZZ_p.c mat_poly_lzz_p.c +S11=$(S10) +S12=$(S11) +S13=$(S12) quad_float.c tools.c vec_GF2.c vec_GF2E.c vec_RR.c +S14=$(S13) vec_ZZ.c vec_ZZ_p.c vec_ZZ_pE.c +S15=$(S14) vec_lzz_p.c vec_lzz_pE.c +S16=$(S15) +S17=$(S16) +S18=$(S17) xdouble.c +S19=$(S18) G_LLL_FP.c G_LLL_QP.c G_LLL_XD.c G_LLL_RR.c thread.c BasicThreadPool.c + +SRC = $(S19) + +# library source files that are header files + +SINC = c_lip_impl.h g_lip_impl.h + + + + + +# library header files + +IN01= FFT.h FacVec.h GF2.h GF2E.h GF2EX.h GF2EXFactoring.h GF2X.h +IN02=$(IN01) GF2XFactoring.h GF2XVec.h HNF.h ctools.h LLL.h +IN03=$(IN02) RR.h SPMM_ASM.h WordVector.h ZZ.h sp_arith.h ZZVec.h ZZX.h ZZXFactoring.h +IN04=$(IN03) ZZ_p.h ZZ_pE.h ZZ_pEX.h ZZ_pEXFactoring.h ZZ_pX.h ZZ_pXFactoring.h +IN05=$(IN04) fileio.h lip.h lzz_p.h lzz_pE.h lzz_pEX.h lzz_pEXFactoring.h +IN06=$(IN05) lzz_pX.h lzz_pXFactoring.h mat_GF2.h mat_GF2E.h mat_RR.h +IN07=$(IN06) mat_ZZ.h mat_ZZ_p.h mat_ZZ_pE.h mat_lzz_p.h mat_lzz_pE.h +IN08=$(IN07) mat_poly_ZZ.h mat_poly_ZZ_p.h mat_poly_lzz_p.h matrix.h +IN09=$(IN08) pair.h vector.h pair_GF2EX_long.h pair_GF2X_long.h +IN10=$(IN09) pair_ZZX_long.h pair_ZZ_pEX_long.h pair_ZZ_pX_long.h +IN11=$(IN10) pair_lzz_pEX_long.h pair_lzz_pX_long.h quad_float.h +IN12=$(IN11) tools.h vec_GF2.h vec_GF2E.h vec_GF2XVec.h vec_RR.h +IN13=$(IN12) vec_ZZ.h vec_ZZVec.h vec_ZZ_p.h vec_ZZ_pE.h vec_double.h +IN14=$(IN13) vec_long.h vec_lzz_p.h vec_lzz_pE.h vec_quad_float.h +IN15=$(IN14) vec_vec_GF2.h vec_vec_GF2E.h vec_vec_RR.h vec_vec_ZZ.h +IN16=$(IN15) vec_vec_ZZ_p.h vec_vec_ZZ_pE.h vec_vec_long.h vec_vec_lzz_p.h +IN17=$(IN16) vec_vec_lzz_pE.h vec_xdouble.h xdouble.h config.h version.h +IN18=$(IN17) def_config.h new.h vec_ulong.h vec_vec_ulong.h c_lip.h g_lip.h +IN19=$(IN18) SmartPtr.h Lazy.h LazyTable.h thread.h BasicThreadPool.h +INCL=$(IN19) + + + +# test data + +TD1=BerlekampTestIn BerlekampTestOut CanZassTestIn CanZassTestOut +TD2=$(TD1) ZZXFacTestIn ZZXFacTestOut MoreFacTestIn LLLTestIn LLLTestOut RRTestIn RRTestOut +TD3=$(TD2) MatrixTestIn MatrixTestOut CharPolyTestIn +TD4=$(TD3) CharPolyTestOut QuadTestIn QuadTestOut + +TD = $(TD4) + +# test source files + +TS1=QuickTest.c BerlekampTest.c CanZassTest.c ZZXFacTest.c MoreFacTest.c LLLTest.c +TS2=$(TS1) subset.c MatrixTest.c mat_lzz_pTest.c CharPolyTest.c RRTest.c QuadTest.c +TS3=$(TS2) GF2XTest.c GF2EXTest.c BitMatTest.c ZZ_pEXTest.c lzz_pEXTest.c Timing.c +TS4=$(TS3) ThreadTest.c ExceptionTest.c +TS = $(TS4) + +# scripts + +SCRIPTS1=MakeGetTime MakeGetPID MakeCheckFeature ResetFeatures CopyFeatures TestScript dosify unixify RemoveProg +SCRIPTS2=$(SCRIPTS1) configure DoConfig mfile cfile ppscript + +SCRIPTS=$(SCRIPTS2) + +# auxilliary source + +MD=MakeDesc.c MakeDescAux.c newnames.c gen_gmp_aux.c +GT=GetTime0.c GetTime1.c GetTime2.c GetTime3.c GetTime4.c GetTime5.c TestGetTime.c +GP=GetPID1.c GetPID2.c TestGetPID.c +CH=CheckCLZL.c CheckCLZLAux.c CheckLL.c CheckLLAux.c CheckAVX.c CheckFMA.c CheckCompile.c + +AUXPROGS = TestGetTime TestGetPID CheckFeature CheckCompile + + + +# documentation + + +D01=copying.txt BasicThreadPool.txt GF2.txt GF2E.txt GF2EX.txt GF2EXFactoring.txt GF2X.txt +D02=$(D01) GF2XFactoring.txt GF2XVec.txt HNF.txt Lazy.txt LazyTable.txt LLL.txt RR.txt SmartPtr.txt +D03=$(D02) ZZ.txt ZZVec.txt ZZX.txt ZZXFactoring.txt ZZ_p.txt ZZ_pE.txt +D04=$(D03) ZZ_pEX.txt ZZ_pEXFactoring.txt ZZ_pX.txt ZZ_pXFactoring.txt +D05=$(D04) conversions.txt flags.txt lzz_p.txt lzz_pE.txt lzz_pEX.txt +D06=$(D05) lzz_pEXFactoring.txt lzz_pX.txt lzz_pXFactoring.txt mat_GF2.txt +D07=$(D06) mat_GF2E.txt mat_RR.txt mat_ZZ.txt mat_ZZ_p.txt mat_ZZ_pE.txt +D08=$(D07) mat_lzz_p.txt mat_lzz_pE.txt mat_poly_ZZ.txt mat_poly_ZZ_p.txt +D09=$(D08) mat_poly_lzz_p.txt matrix.txt pair.txt vector.txt +D10=$(D09) quad_float.txt sedscript.txt tools.txt vec_GF2.txt +D11=$(D10) vec_GF2E.txt vec_RR.txt vec_ZZ.txt vec_ZZ_p.txt vec_ZZ_pE.txt +D12=$(D11) vec_lzz_p.txt vec_lzz_pE.txt xdouble.txt names.txt +D13=$(D12) tour-ack.html tour-intro.html tour-time.html tour-changes.html +D14=$(D13) tour-modules.html tour-unix.html tour-examples.html +D15=$(D14) tour-roadmap.html tour-win.html tour-impl.html tour-struct.html +D16=$(D15) tour.html tour-ex1.html tour-ex2.html tour-ex3.html tour-ex4.html +D17=$(D16) tour-ex5.html tour-ex6.html tour-ex7.html arrow1.gif arrow2.gif arrow3.gif +D18=$(D17) tour-gmp.html tour-gf2x.html tour-tips.html config.txt version.txt + +TX01=GF2.txt GF2E.txt GF2EX.txt GF2EXFactoring.txt GF2X.txt GF2XFactoring.txt +TX02=GF2XVec.txt HNF.txt Lazy.txt LazyTable.txt LLL.txt RR.txt SmartPtr.txt ZZ.txt ZZVec.txt ZZX.txt ZZXFactoring.txt +TX03=ZZ_p.txt ZZ_pE.txt ZZ_pEX.txt ZZ_pEXFactoring.txt ZZ_pX.txt ZZ_pXFactoring.txt +TX04=lzz_p.txt lzz_pE.txt lzz_pEX.txt lzz_pEXFactoring.txt lzz_pX.txt +TX05=lzz_pXFactoring.txt mat_GF2.txt mat_GF2E.txt mat_RR.txt mat_ZZ.txt mat_ZZ_p.txt +TX06=mat_ZZ_pE.txt mat_lzz_p.txt mat_lzz_pE.txt mat_poly_ZZ.txt mat_poly_ZZ_p.txt +TX07=mat_poly_lzz_p.txt matrix.txt pair.txt quad_float.txt tools.txt vec_GF2.txt +TX08=vec_GF2E.txt vec_RR.txt vec_ZZ.txt vec_ZZ_p.txt vec_ZZ_pE.txt vec_lzz_p.txt +TX09=vec_lzz_pE.txt vector.txt version.txt xdouble.txt BasicThreadPool.txt + +TXFILES=$(TX01) $(TX02) $(TX03) $(TX04) $(TX05) $(TX06) $(TX07) $(TX08) $(TX09) + +HT01=GF2.cpp.html GF2E.cpp.html GF2EX.cpp.html GF2EXFactoring.cpp.html GF2X.cpp.html GF2XFactoring.cpp.html +HT02=GF2XVec.cpp.html HNF.cpp.html Lazy.cpp.html LazyTable.cpp.html LLL.cpp.html RR.cpp.html SmartPtr.cpp.html ZZ.cpp.html ZZVec.cpp.html ZZX.cpp.html ZZXFactoring.cpp.html +HT03=ZZ_p.cpp.html ZZ_pE.cpp.html ZZ_pEX.cpp.html ZZ_pEXFactoring.cpp.html ZZ_pX.cpp.html ZZ_pXFactoring.cpp.html +HT04=lzz_p.cpp.html lzz_pE.cpp.html lzz_pEX.cpp.html lzz_pEXFactoring.cpp.html lzz_pX.cpp.html +HT05=lzz_pXFactoring.cpp.html mat_GF2.cpp.html mat_GF2E.cpp.html mat_RR.cpp.html mat_ZZ.cpp.html mat_ZZ_p.cpp.html +HT06=mat_ZZ_pE.cpp.html mat_lzz_p.cpp.html mat_lzz_pE.cpp.html mat_poly_ZZ.cpp.html mat_poly_ZZ_p.cpp.html +HT07=mat_poly_lzz_p.cpp.html matrix.cpp.html pair.cpp.html quad_float.cpp.html tools.cpp.html vec_GF2.cpp.html +HT08=vec_GF2E.cpp.html vec_RR.cpp.html vec_ZZ.cpp.html vec_ZZ_p.cpp.html vec_ZZ_pE.cpp.html vec_lzz_p.cpp.html +HT09=vec_lzz_pE.cpp.html vector.cpp.html version.cpp.html xdouble.cpp.html BasicThreadPool.cpp.html + +HTFILES=$(HT01) $(HT02) $(HT03) $(HT04) $(HT05) $(HT06) $(HT07) $(HT08) $(HT09) + + +DOC = $(D18) $(HTFILES) + + + +# test program executables + +PROG1=QuickTest BerlekampTest CanZassTest ZZXFacTest MoreFacTest LLLTest BitMatTest +PROG2=$(PROG1) MatrixTest mat_lzz_pTest CharPolyTest RRTest QuadTest +PROG3=$(PROG2) GF2XTest GF2EXTest subset ZZ_pEXTest lzz_pEXTest Timing ThreadTest +PROGS = $(PROG3) + +# things to save to a tar file + +SFI1=makefile $(SRC) $(SINC) $(SCRIPTS) $(MD) $(GT) $(GP) $(CH) $(TS) $(TD) mach_desc.win +SFI2=$(SFI1) MulTimeTest.c Poly1TimeTest.c Poly2TimeTest.c Poly3TimeTest.c GF2XTimeTest.c +SFI3=$(SFI2) InitSettings.c DispSettings.c WizardAux Wizard def_makefile +SFILES=$(SFI3) + + +################################################################# +# +# Rules for compiling the library +# +################################################################# + + +NTL_INCLUDE = -I../include -I. +# NTL needs this to find its include files + +COMPILE = $(CXX) $(NTL_INCLUDE) $(CPPFLAGS) $(CXXAUTOFLAGS) $(CXXFLAGS) -c + +LINK = $(CXX) $(NTL_INCLUDE) $(CPPFLAGS) $(CXXAUTOFLAGS) $(CXXFLAGS) $(LDFLAGS) + + + +# 'make all' does a complete make, including all setup. +# It also creates the file 'all', which means you should +# run 'make clobber' before running 'make' or 'make all' +# again. + +all: + make setup1 + make setup2 + make setup3 + make setup4 + make ntl.a + touch all + + +# setup1 generates the file ../incluse/NTL/mach_desc.h + +setup1: + $(COMPILE) MakeDescAux.c + $(LINK) -o MakeDesc MakeDesc.c MakeDescAux.o $(LDLIBS) + ./MakeDesc + mv mach_desc.h ../include/NTL/mach_desc.h + + +# setup2 does some dynamic checks for GetTime, GetPID, __builtin_clzl, and LL types + +setup2: + echo "*** CheckFeature log ***" > CheckFeature.log + sh MakeGetTime "$(LINK)" "$(LDLIBS)" + sh MakeGetPID "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature BUILTIN_CLZL "CheckCLZL.c CheckCLZLAux.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature LL_TYPE "CheckLL.c CheckLLAux.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature AVX "CheckAVX.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature FMA "CheckFMA.c" "$(LINK)" "$(LDLIBS)" + +# setup3 generates the file ../include/NTL/gmp_aux.h +# The file ../include/NTL/gmp_aux.h is included in ../include/NTL/lip.h +# when NTL_GMP_LIP is set. +# When this flag is not set, an empty files produced. + +setup3: + $(LINK) $(GMP_OPT_INCDIR) -o gen_gmp_aux gen_gmp_aux.c $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + ./gen_gmp_aux > ../include/NTL/gmp_aux.h + +# setup4 runs the wizard + +setup4: + sh Wizard $(WIZARD) + + +ntl.a: $(OBJ) + $(AR) $(ARFLAGS) ntl.a $(OBJ) #LSTAT + - $(RANLIB) ntl.a #LSTAT +# $(LIBTOOL) --tag=CXX --mode=link $(LINK) -o libntl.la $(OBJ:.o=.lo) $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(GF2X_OPT_LIBDIR) $(GF2X_OPT_LIB) $(LDLIBS) -rpath $(LIBDIR) -version-info `cat VERSION_INFO` #LSHAR + +LCOMP= #LSTAT +# LCOMP=$(LIBTOOL) --tag=CXX --mode=compile #LSHAR + +lip.o: lip.c g_lip_impl.h c_lip_impl.h + $(LCOMP) $(COMPILE) $(GMP_OPT_INCDIR) lip.c + +ctools.o: ctools.c + $(LCOMP) $(COMPILE) ctools.c + + +GetTime.o: GetTime.c + $(LCOMP) $(COMPILE) GetTime.c + +GetPID.o: GetPID.c + $(LCOMP) $(COMPILE) GetPID.c + +CheckCompile: CheckCompile.c + $(LINK) -o CheckCompile CheckCompile.c $(LDLIBS) + + +.c.o: + $(LCOMP) $(COMPILE) $(GF2X_OPT_INCDIR) $< + +.c: + $(LINK) -o $@ $< ntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(GF2X_OPT_LIBDIR) $(GF2X_OPT_LIB) $(LDLIBS) #LSTAT +# $(LIBTOOL) --tag=CXX --mode=link $(LINK) -o $@ $< libntl.la #LSHAR + +################################################################# +# +# Rule for running tests +# make check runs a series of tests +# +################################################################# + +check: + sh RemoveProg $(PROGS) + make QuickTest + ./QuickTest + sh RemoveProg QuickTest + sh TestScript + +################################################################# +# +# Rule for installing +# make install just does a simple copy of the include file +# and library. The -p option is used to preserve file attributes. +# This helps avoid some problems (especially when copying ntl.a). +# Also, an attempt is made to make everything that is +# installed readable by everyone. +# +# make uninstall removes these files +# +################################################################# + + + + +install: + mkdir -p -m 755 $(INCLUDEDIR) + rm -rf $(INCLUDEDIR)/NTL + mkdir -m 755 $(INCLUDEDIR)/NTL + cp -p ../include/NTL/*.h $(INCLUDEDIR)/NTL + - chmod -R a+r $(INCLUDEDIR)/NTL + mkdir -p -m 755 $(DOCDIR) + rm -rf $(DOCDIR)/NTL + mkdir -m 755 $(DOCDIR)/NTL + cp -p ../doc/*.txt $(DOCDIR)/NTL + cp -p ../doc/*.html $(DOCDIR)/NTL + cp -p ../doc/*.gif $(DOCDIR)/NTL + - chmod -R a+r $(DOCDIR)/NTL + mkdir -p -m 755 $(LIBDIR) + cp -p ntl.a $(LIBDIR)/libntl.a #LSTAT + - chmod a+r $(LIBDIR)/libntl.a #LSTAT +# $(LIBTOOL) --mode=install cp -p libntl.la $(LIBDIR) #LSHAR + + +uninstall: + rm -f $(LIBDIR)/libntl.a #LSTAT +# $(LIBTOOL) --mode=uninstall rm -f $(LIBDIR)/libntl.la #LSHAR + rm -rf $(INCLUDEDIR)/NTL + rm -rf $(DOCDIR)/NTL + +################################################################# +# +# Rules for cleaning up +# +# make clobber removes *everything* created by make, +# but it does not restore config.h to its default. +# +# make clean tidies up a bit +# +################################################################# + +clobber: + rm -f ntl.a mach_desc.h ../include/NTL/mach_desc.h GetTime.c GetPID.c + sh ResetFeatures '..' + rm -f ../include/NTL/gmp_aux.h + sh RemoveProg $(PROGS) MakeDesc $(AUXPROGS) gen_gmp_aux + rm -f *.o + rm -rf small + rm -f cfileout mfileout + rm -rf .libs *.lo libntl.la + rm -f all + +clean: + sh RemoveProg $(PROGS) MakeDesc $(AUXPROGS) gen_gmp_aux + rm -f *.o + rm -rf small +# - $(LIBTOOL) --mode=clean rm -f libntl.la *.lo #LSHAR + +################################################################# +# +# Rules for making tar and zip files +# +# make ppdoc creates pretty-printed versions of some documentation +# - run before make package or make winpack +# +# make package creates a tar.gz file suitable for Unix +# +# make winpack creates a zip file suitable for Windows +# +################################################################# + +ppdoc: + sh ppscript "$(TXFILES)" + +ppclean: + rm -f ../doc/*.cpp + + +package: + ./configure --nowrite + cp mfileout def_makefile + cp cfileout ../include/NTL/def_config.h + sh unixify "$(SFILES) DIRNAME WINDIR VERSION_INFO NOTES" "$(INCL)" "$(DOC)" + rm -rf `cat DIRNAME` + rm -f `cat DIRNAME`.tar + rm -f `cat DIRNAME`.tar.gz + mv unix `cat DIRNAME` + chmod -R a+rX `cat DIRNAME` + tar -cvf `cat DIRNAME`.tar `cat DIRNAME` + gzip `cat DIRNAME`.tar + rm -rf `cat DIRNAME` + +winpack: + ./configure --nowrite NTL_GMP_LIP=off + cp mfileout def_makefile + cp cfileout ../include/NTL/def_config.h + sh dosify "$(SRC)" "$(INCL)" "$(DOC)" "$(TS)" "$(TD)" "$(SINC)" + rm -rf `cat WINDIR` + rm -f `cat WINDIR`.zip + mv dos `cat WINDIR` + chmod -R a+rX `cat WINDIR` + find ./`cat WINDIR` '!' -name '*.gif' -print | zip -l `cat WINDIR` -@ + find ./`cat WINDIR` -name '*.gif' -print | zip -u `cat WINDIR` -@ + rm -rf `cat WINDIR` + + +###################################################################### +# +# config wizard related stuff +# +###################################################################### + +WO1 = FFT.o GetTime.o GetPID.o ctools.o ZZ.o ZZVec.o ZZ_p.o ZZ_pX.o +WO2 = $(WO1) ZZ_pX1.o lip.o tools.o vec_ZZ.o vec_ZZ_p.o +WO3 = $(WO2) GF2.o WordVector.o vec_GF2.o GF2X.o GF2X1.o thread.o BasicThreadPool.o fileio.o + +WOBJ = $(WO3) + +# wntl.a: LCOMP= #LSHAR +wntl.a: $(WOBJ) + $(AR) $(ARFLAGS) wntl.a $(WOBJ) + - $(RANLIB) wntl.a + +MulTimeTest: + $(LINK) -o MulTimeTest MulTimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + + +Poly1TimeTest: + $(LINK) -o Poly1TimeTest Poly1TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) +Poly2TimeTest: + $(LINK) -o Poly2TimeTest Poly2TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) +Poly3TimeTest: + $(LINK) -o Poly3TimeTest Poly3TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + + +GF2XTimeTest: + $(LINK) -o GF2XTimeTest GF2XTimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + +InitSettings: + $(LINK) -o InitSettings InitSettings.c $(LDLIBS) + + +DispSettings: + $(LINK) -o DispSettings DispSettings.c $(LDLIBS) + + + + diff --git a/thirdparty/linux/ntl/src/mat_GF2.c b/thirdparty/linux/ntl/src/mat_GF2.c new file mode 100644 index 0000000000..1542641faa --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_GF2.c @@ -0,0 +1,755 @@ + + +#include +#include + +#include + +NTL_START_IMPL + + +void add(mat_GF2& X, const mat_GF2& A, const mat_GF2& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + X.SetDims(n, m); + + long mw = (m + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + + long i; + for (i = 0; i < n; i++) { + _ntl_ulong *xp = X[i].rep.elts(); + const _ntl_ulong *ap = A[i].rep.elts(); + const _ntl_ulong *bp = B[i].rep.elts(); + long j; + for (j = 0; j < mw; j++) + xp[j] = ap[j] ^ bp[j]; + } +} + +static +void mul_aux(vec_GF2& x, const mat_GF2& A, const vec_GF2& b) +{ + long n = A.NumRows(); + long l = A.NumCols(); + + if (l != b.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(n); + + long i; + + for (i = 0; i < n; i++) { + x.put(i, A[i] * b); + } +} + + +void mul(vec_GF2& x, const mat_GF2& A, const vec_GF2& b) +{ + if (&b == &x || A.position1(x) != -1) { + vec_GF2 tmp; + mul_aux(tmp, A, b); + x = tmp; + } + else + mul_aux(x, A, b); +} + +static +void mul_aux(vec_GF2& x, const vec_GF2& a, const mat_GF2& B) +{ + long n = B.NumRows(); + long l = B.NumCols(); + + if (n != a.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(l); + clear(x); + + const _ntl_ulong *ap = a.rep.elts(); + _ntl_ulong a_mask = 1; + + _ntl_ulong *xp = x.rep.elts(); + + long lw = (l + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + + long i; + for (i = 0; i < n; i++) { + if (*ap & a_mask) { + const _ntl_ulong *bp = B[i].rep.elts(); + long j; + for (j = 0; j < lw; j++) + xp[j] ^= bp[j]; + } + + a_mask <<= 1; + if (!a_mask) { + a_mask = 1; + ap++; + } + } +} + +void mul(vec_GF2& x, const vec_GF2& a, const mat_GF2& B) +{ + if (&a == &x || B.position1(x) != -1) { + vec_GF2 tmp; + mul_aux(tmp, a, B); + x = tmp; + } + else + mul_aux(x, a, B); +} + +void mul_aux(mat_GF2& X, const mat_GF2& A, const mat_GF2& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + X.SetDims(n, m); + + long i; + + for (i = 1; i <= n; i++) { + mul_aux(X(i), A(i), B); + } +} + + +void mul(mat_GF2& X, const mat_GF2& A, const mat_GF2& B) +{ + if (&X == &A || &X == &B) { + mat_GF2 tmp; + mul_aux(tmp, A, B); + X = tmp; + } + else + mul_aux(X, A, B); +} + + + + +void ident(mat_GF2& X, long n) +{ + X.SetDims(n, n); + clear(X); + long i; + + for (i = 0; i < n; i++) + X.put(i, i, to_GF2(1)); +} + + +void determinant(ref_GF2 d, const mat_GF2& M_in) +{ + long k, n; + long i, j; + long pos; + + n = M_in.NumRows(); + + if (M_in.NumCols() != n) + LogicError("determinant: nonsquare matrix"); + + if (n == 0) { + set(d); + return; + } + + mat_GF2 M; + + M = M_in; + + long wn = (n + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + + for (k = 0; k < n; k++) { + long wk = k/NTL_BITS_PER_LONG; + long bk = k - wk*NTL_BITS_PER_LONG; + _ntl_ulong k_mask = 1UL << bk; + + pos = -1; + for (i = k; i < n; i++) { + if (M[i].rep.elts()[wk] & k_mask) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + } + + + _ntl_ulong *y = M[k].rep.elts(); + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + if (M[i].rep.elts()[wk] & k_mask) { + _ntl_ulong *x = M[i].rep.elts(); + + for (j = wk; j < wn; j++) + x[j] ^= y[j]; + } + + } + } + else { + clear(d); + return; + } + } + + set(d); + return; +} + +static +long IsUnitVector(const vec_GF2& a, long i) +{ + long wi = i/NTL_BITS_PER_LONG; + long bi = i - wi*NTL_BITS_PER_LONG; + + const _ntl_ulong *p = a.rep.elts(); + long wdlen = a.rep.length(); + + long j; + + for (j = 0; j < wi; j++) + if (p[j] != 0) return 0; + + if (p[wi] != (1UL << bi)) + return 0; + + for (j = wi+1; j < wdlen; j++) + if (p[j] != 0) return 0; + + return 1; +} + + +long IsIdent(const mat_GF2& A, long n) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + if (n == 0) return 1; + + long i; + + for (i = 0; i < n; i++) + if (!IsUnitVector(A[i], i)) + return 0; + + return 1; +} + +void AddToCol(mat_GF2& x, long j, const vec_GF2& a) +// add a to column j of x +// ALIAS RESTRICTION: a should not alias any row of x +{ + long n = x.NumRows(); + long m = x.NumCols(); + + if (a.length() != n || j < 0 || j >= m) + LogicError("AddToCol: bad args"); + + long wj = j/NTL_BITS_PER_LONG; + long bj = j - wj*NTL_BITS_PER_LONG; + _ntl_ulong j_mask = 1UL << bj; + + const _ntl_ulong *ap = a.rep.elts(); + _ntl_ulong a_mask = 1; + + long i; + for (i = 0; i < n; i++) { + if (*ap & a_mask) + x[i].rep.elts()[wj] ^= j_mask; + + a_mask <<= 1; + if (!a_mask) { + a_mask = 1; + ap++; + } + } +} + + +void transpose_aux(mat_GF2& X, const mat_GF2& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(m, n); + clear(X); + + long i; + for (i = 0; i < n; i++) + AddToCol(X, i, A[i]); +} + + +void transpose(mat_GF2& X, const mat_GF2& A) +{ + if (&X == &A) { + mat_GF2 tmp; + transpose_aux(tmp, A); + X = tmp; + } + else + transpose_aux(X, A); +} + + + +static +void solve_impl(ref_GF2 d, vec_GF2& X, const mat_GF2& A, const vec_GF2& b, bool trans) + +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (b.length() != n) + LogicError("solve: dimension mismatch"); + + if (n == 0) { + X.SetLength(0); + set(d); + return; + } + + long i, j, k, pos; + + mat_GF2 M; + M.SetDims(n, n+1); + + if (trans) { + for (i = 0; i < n; i++) { + AddToCol(M, i, A[i]); + } + } + else { + for (i = 0; i < n; i++) { + VectorCopy(M[i], A[i], n+1); + } + } + + AddToCol(M, n, b); + + long wn = ((n+1) + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + + for (k = 0; k < n; k++) { + long wk = k/NTL_BITS_PER_LONG; + long bk = k - wk*NTL_BITS_PER_LONG; + _ntl_ulong k_mask = 1UL << bk; + + pos = -1; + for (i = k; i < n; i++) { + if (M[i].rep.elts()[wk] & k_mask) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + } + + _ntl_ulong *y = M[k].rep.elts(); + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + if (M[i].rep.elts()[wk] & k_mask) { + _ntl_ulong *x = M[i].rep.elts(); + + for (j = wk; j < wn; j++) + x[j] ^= y[j]; + } + + + } + } + else { + clear(d); + return; + } + } + + vec_GF2 XX; + XX.SetLength(n+1); + XX.put(n, 1); + + for (i = n-1; i >= 0; i--) { + XX.put(i, XX*M[i]); + } + + XX.SetLength(n); + X = XX; + + set(d); + return; +} + +void solve(ref_GF2 d, vec_GF2& x, const mat_GF2& A, const vec_GF2& b) +{ + solve_impl(d, x, A, b, true); +} + +void solve(ref_GF2 d, const mat_GF2& A, vec_GF2& x, const vec_GF2& b) +{ + solve_impl(d, x, A, b, false); +} + + +void inv(ref_GF2 d, mat_GF2& X, const mat_GF2& A) +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (n == 0) { + X.SetDims(0, 0); + set(d); + } + + long i, j, k, pos; + + mat_GF2 M; + M.SetDims(n, 2*n); + + vec_GF2 aa; + aa.SetLength(2*n); + + + for (i = 0; i < n; i++) { + aa = A[i]; + aa.SetLength(2*n); + aa.put(n+i, 1); + M[i] = aa; + } + + long wn = ((2*n) + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + + for (k = 0; k < n; k++) { + long wk = k/NTL_BITS_PER_LONG; + long bk = k - wk*NTL_BITS_PER_LONG; + _ntl_ulong k_mask = 1UL << bk; + + pos = -1; + for (i = k; i < n; i++) { + if (M[i].rep.elts()[wk] & k_mask) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + } + + _ntl_ulong *y = M[k].rep.elts(); + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + if (M[i].rep.elts()[wk] & k_mask) { + _ntl_ulong *x = M[i].rep.elts(); + + for (j = wk; j < wn; j++) + x[j] ^= y[j]; + } + + + } + } + else { + clear(d); + return; + } + } + + vec_GF2 XX; + XX.SetLength(2*n); + + X.SetDims(n, n); + clear(X); + + for (j = 0; j < n; j++) { + XX.SetLength(n+j+1); + clear(XX); + XX.put(n+j, to_GF2(1)); + + for (i = n-1; i >= 0; i--) { + XX.put(i, XX*M[i]); + } + + XX.SetLength(n); + AddToCol(X, j, XX); + } + + set(d); + return; +} + + + + + +long gauss(mat_GF2& M, long w) +{ + long k, l; + long i, j; + long pos; + + long n = M.NumRows(); + long m = M.NumCols(); + + if (w < 0 || w > m) + LogicError("gauss: bad args"); + + long wm = (m + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + + l = 0; + for (k = 0; k < w && l < n; k++) { + long wk = k/NTL_BITS_PER_LONG; + long bk = k - wk*NTL_BITS_PER_LONG; + _ntl_ulong k_mask = 1UL << bk; + + + pos = -1; + for (i = l; i < n; i++) { + if (M[i].rep.elts()[wk] & k_mask) { + pos = i; + break; + } + } + + if (pos != -1) { + if (l != pos) + swap(M[pos], M[l]); + + _ntl_ulong *y = M[l].rep.elts(); + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k] + + if (M[i].rep.elts()[wk] & k_mask) { + _ntl_ulong *x = M[i].rep.elts(); + + for (j = wk; j < wm; j++) + x[j] ^= y[j]; + } + } + + l++; + } + } + + return l; +} + +long gauss(mat_GF2& M) +{ + return gauss(M, M.NumCols()); +} + + +void image(mat_GF2& X, const mat_GF2& A) +{ + mat_GF2 M; + M = A; + long r = gauss(M); + M.SetDims(r, M.NumCols()); + X = M; +} + +void kernel(mat_GF2& X, const mat_GF2& A) +{ + long m = A.NumRows(); + long n = A.NumCols(); + + mat_GF2 M; + long r; + + transpose(M, A); + r = gauss(M); + + X.SetDims(m-r, m); + clear(X); + + long i, j, k; + + vec_long D; + D.SetLength(m); + for (j = 0; j < m; j++) D[j] = -1; + + j = -1; + for (i = 0; i < r; i++) { + do { + j++; + } while (M.get(i, j) == 0); + + D[j] = i; + } + + for (k = 0; k < m-r; k++) { + vec_GF2& v = X[k]; + long pos = 0; + for (j = m-1; j >= 0; j--) { + if (D[j] == -1) { + if (pos == k) { + v[j] = 1; + // v.put(j, to_GF2(1)); + } + pos++; + } + else { + v[j] = v*M[D[j]]; + // v.put(j, v*M[D[j]]); + } + } + } +} + + +void mul(mat_GF2& X, const mat_GF2& A, GF2 b) +{ + X = A; + if (b == 0) + clear(X); +} + +void diag(mat_GF2& X, long n, GF2 d) +{ + if (d == 1) + ident(X, n); + else { + X.SetDims(n, n); + clear(X); + } +} + +long IsDiag(const mat_GF2& A, long n, GF2 d) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + if (d == 1) + return IsIdent(A, n); + else + return IsZero(A); +} + + +long IsZero(const mat_GF2& a) +{ + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +void clear(mat_GF2& x) +{ + long n = x.NumRows(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + +mat_GF2 operator+(const mat_GF2& a, const mat_GF2& b) +{ + mat_GF2 res; + add(res, a, b); + NTL_OPT_RETURN(mat_GF2, res); +} + +mat_GF2 operator*(const mat_GF2& a, const mat_GF2& b) +{ + mat_GF2 res; + mul_aux(res, a, b); + NTL_OPT_RETURN(mat_GF2, res); +} + +mat_GF2 operator-(const mat_GF2& a, const mat_GF2& b) +{ + mat_GF2 res; + add(res, a, b); + NTL_OPT_RETURN(mat_GF2, res); +} + + +vec_GF2 operator*(const mat_GF2& a, const vec_GF2& b) +{ + vec_GF2 res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_GF2, res); +} + +vec_GF2 operator*(const vec_GF2& a, const mat_GF2& b) +{ + vec_GF2 res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_GF2, res); +} + + +void inv(mat_GF2& X, const mat_GF2& A) +{ + GF2 d; + inv(d, X, A); + if (d == 0) ArithmeticError("inv: non-invertible matrix"); +} + +void power(mat_GF2& X, const mat_GF2& A, const ZZ& e) +{ + if (A.NumRows() != A.NumCols()) LogicError("power: non-square matrix"); + + if (e == 0) { + ident(X, A.NumRows()); + return; + } + + mat_GF2 T1, T2; + long i, k; + + k = NumBits(e); + T1 = A; + + for (i = k-2; i >= 0; i--) { + sqr(T2, T1); + if (bit(e, i)) + mul(T1, T2, A); + else + T1 = T2; + } + + if (e < 0) + inv(X, T1); + else + X = T1; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_GF2E.c b/thirdparty/linux/ntl/src/mat_GF2E.c new file mode 100644 index 0000000000..b437f2149c --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_GF2E.c @@ -0,0 +1,808 @@ + +#include +#include +#include + +#include + +NTL_START_IMPL + + +void add(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + add(X(i,j), A(i,j), B(i,j)); +} + +void mul_aux(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + X.SetDims(n, m); + + long i, j, k; + GF2X acc, tmp; + + for (i = 1; i <= n; i++) { + for (j = 1; j <= m; j++) { + clear(acc); + for(k = 1; k <= l; k++) { + mul(tmp, rep(A(i,k)), rep(B(k,j))); + add(acc, acc, tmp); + } + conv(X(i,j), acc); + } + } +} + + +void mul(mat_GF2E& X, const mat_GF2E& A, const mat_GF2E& B) +{ + if (&X == &A || &X == &B) { + mat_GF2E tmp; + mul_aux(tmp, A, B); + X = tmp; + } + else + mul_aux(X, A, B); +} + + +static +void mul_aux(vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b) +{ + long n = A.NumRows(); + long l = A.NumCols(); + + if (l != b.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(n); + + long i, k; + GF2X acc, tmp; + + for (i = 1; i <= n; i++) { + clear(acc); + for (k = 1; k <= l; k++) { + mul(tmp, rep(A(i,k)), rep(b(k))); + add(acc, acc, tmp); + } + conv(x(i), acc); + } +} + + +void mul(vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b) +{ + if (&b == &x || A.position1(x) != -1) { + vec_GF2E tmp; + mul_aux(tmp, A, b); + x = tmp; + } + else + mul_aux(x, A, b); +} + +static +void mul_aux(vec_GF2E& x, const vec_GF2E& a, const mat_GF2E& B) +{ + long n = B.NumRows(); + long l = B.NumCols(); + + if (n != a.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(l); + + long i, k; + GF2X acc, tmp; + + for (i = 1; i <= l; i++) { + clear(acc); + for (k = 1; k <= n; k++) { + mul(tmp, rep(a(k)), rep(B(k,i))); + add(acc, acc, tmp); + } + conv(x(i), acc); + } +} + +void mul(vec_GF2E& x, const vec_GF2E& a, const mat_GF2E& B) +{ + if (&a == &x) { + vec_GF2E tmp; + mul_aux(tmp, a, B); + x = tmp; + } + else + mul_aux(x, a, B); +} + + + +void ident(mat_GF2E& X, long n) +{ + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + set(X(i, j)); + else + clear(X(i, j)); +} + + +void determinant(GF2E& d, const mat_GF2E& M_in) +{ + long k, n; + long i, j; + long pos; + GF2X t1, t2; + GF2X *x, *y; + + const GF2XModulus& p = GF2E::modulus(); + + n = M_in.NumRows(); + + if (M_in.NumCols() != n) + LogicError("determinant: nonsquare matrix"); + + if (n == 0) { + set(d); + return; + } + + vec_GF2XVec M; + + M.SetLength(n); + for (i = 0; i < n; i++) { + M[i].SetSize(n, 2*GF2E::WordLength()); + for (j = 0; j < n; j++) + M[i][j] = rep(M_in[i][j]); + } + + GF2X det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) + pos = i; + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + for (j = k+1; j < n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + conv(d, det); +} + +long IsIdent(const mat_GF2E& A, long n) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (!IsOne(A(i, j))) return 0; + } + + return 1; +} + + +void transpose(mat_GF2E& X, const mat_GF2E& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + long i, j; + + if (&X == & A) { + if (n == m) + for (i = 1; i <= n; i++) + for (j = i+1; j <= n; j++) + swap(X(i, j), X(j, i)); + else { + mat_GF2E tmp; + tmp.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + tmp(j, i) = A(i, j); + X.kill(); + X = tmp; + } + } + else { + X.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + X(j, i) = A(i, j); + } +} + + +static +void solve_impl(GF2E& d, vec_GF2E& X, const mat_GF2E& A, const vec_GF2E& b, bool trans) + +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (b.length() != n) + LogicError("solve: dimension mismatch"); + + if (n == 0) { + set(d); + X.SetLength(0); + return; + } + + long i, j, k, pos; + GF2X t1, t2; + GF2X *x, *y; + + const GF2XModulus& p = GF2E::modulus(); + + vec_GF2XVec M; + + M.SetLength(n); + + for (i = 0; i < n; i++) { + M[i].SetSize(n+1, 2*GF2E::WordLength()); + + if (trans) + for (j = 0; j < n; j++) M[i][j] = rep(A[j][i]); + else + for (j = 0; j < n; j++) M[i][j] = rep(A[i][j]); + + M[i][n] = rep(b[i]); + } + + GF2X det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + for (j = k+1; j <= n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j <= n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetLength(n); + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, rep(X[j]), M[i][j]); + add(t1, t1, t2); + } + add(t1, t1, M[i][n]); + conv(X[i], t1); + } + + conv(d, det); +} + +void solve(GF2E& d, vec_GF2E& x, const mat_GF2E& A, const vec_GF2E& b) +{ + solve_impl(d, x, A, b, true); +} + +void solve(GF2E& d, const mat_GF2E& A, vec_GF2E& x, const vec_GF2E& b) +{ + solve_impl(d, x, A, b, false); +} + +void inv(GF2E& d, mat_GF2E& X, const mat_GF2E& A) +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + long i, j, k, pos; + GF2X t1, t2; + GF2X *x, *y; + + const GF2XModulus& p = GF2E::modulus(); + + vec_GF2XVec M; + + M.SetLength(n); + + for (i = 0; i < n; i++) { + M[i].SetSize(2*n, 2*GF2E::WordLength()); + for (j = 0; j < n; j++) { + M[i][j] = rep(A[i][j]); + clear(M[i][n+j]); + } + set(M[i][n+i]); + } + + GF2X det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + for (j = k+1; j < 2*n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < 2*n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetDims(n, n); + for (k = 0; k < n; k++) { + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, rep(X[j][k]), M[i][j]); + add(t1, t1, t2); + } + add(t1, t1, M[i][n+k]); + conv(X[i][k], t1); + } + } + + conv(d, det); +} + + + +long gauss(mat_GF2E& M_in, long w) +{ + long k, l; + long i, j; + long pos; + GF2X t1, t2, t3; + GF2X *x, *y; + + long n = M_in.NumRows(); + long m = M_in.NumCols(); + + if (w < 0 || w > m) + LogicError("gauss: bad args"); + + const GF2XModulus& p = GF2E::modulus(); + + vec_GF2XVec M; + + M.SetLength(n); + for (i = 0; i < n; i++) { + M[i].SetSize(m, 2*GF2E::WordLength()); + for (j = 0; j < m; j++) { + M[i][j] = rep(M_in[i][j]); + } + } + + l = 0; + for (k = 0; k < w && l < n; k++) { + + pos = -1; + for (i = l; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + swap(M[pos], M[l]); + + InvMod(t3, M[l][k], p); + + for (j = k+1; j < m; j++) { + rem(M[l][j], M[l][j], p); + } + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k]*t3 + + MulMod(t1, M[i][k], t3, p); + + clear(M[i][k]); + + x = M[i].elts() + (k+1); + y = M[l].elts() + (k+1); + + for (j = k+1; j < m; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(t2, t2, *x); + *x = t2; + } + } + + l++; + } + } + + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + conv(M_in[i][j], M[i][j]); + + return l; +} + +long gauss(mat_GF2E& M) +{ + return gauss(M, M.NumCols()); +} + +void image(mat_GF2E& X, const mat_GF2E& A) +{ + mat_GF2E M; + M = A; + long r = gauss(M); + M.SetDims(r, M.NumCols()); + X = M; +} + +void kernel(mat_GF2E& X, const mat_GF2E& A) +{ + long m = A.NumRows(); + long n = A.NumCols(); + + mat_GF2E M; + long r; + + transpose(M, A); + r = gauss(M); + + X.SetDims(m-r, m); + + long i, j, k, s; + GF2X t1, t2; + + GF2E T3; + + vec_long D; + D.SetLength(m); + for (j = 0; j < m; j++) D[j] = -1; + + vec_GF2E inverses; + inverses.SetLength(m); + + j = -1; + for (i = 0; i < r; i++) { + do { + j++; + } while (IsZero(M[i][j])); + + D[j] = i; + inv(inverses[j], M[i][j]); + } + + for (k = 0; k < m-r; k++) { + vec_GF2E& v = X[k]; + long pos = 0; + for (j = m-1; j >= 0; j--) { + if (D[j] == -1) { + if (pos == k) + set(v[j]); + else + clear(v[j]); + pos++; + } + else { + i = D[j]; + + clear(t1); + + for (s = j+1; s < m; s++) { + mul(t2, rep(v[s]), rep(M[i][s])); + add(t1, t1, t2); + } + + conv(T3, t1); + mul(T3, T3, inverses[j]); + v[j] = T3; + } + } + } +} + +void mul(mat_GF2E& X, const mat_GF2E& A, const GF2E& b_in) +{ + GF2E b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void mul(mat_GF2E& X, const mat_GF2E& A, GF2 b) +{ + X = A; + if (b == 0) + clear(X); +} + +void diag(mat_GF2E& X, long n, const GF2E& d_in) +{ + GF2E d = d_in; + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + X(i, j) = d; + else + clear(X(i, j)); +} + +long IsDiag(const mat_GF2E& A, long n, const GF2E& d) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (A(i, j) != d) return 0; + } + + return 1; +} + + +long IsZero(const mat_GF2E& a) +{ + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +void clear(mat_GF2E& x) +{ + long n = x.NumRows(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + +mat_GF2E operator+(const mat_GF2E& a, const mat_GF2E& b) +{ + mat_GF2E res; + add(res, a, b); + NTL_OPT_RETURN(mat_GF2E, res); +} + +mat_GF2E operator*(const mat_GF2E& a, const mat_GF2E& b) +{ + mat_GF2E res; + mul_aux(res, a, b); + NTL_OPT_RETURN(mat_GF2E, res); +} + +mat_GF2E operator-(const mat_GF2E& a, const mat_GF2E& b) +{ + mat_GF2E res; + sub(res, a, b); + NTL_OPT_RETURN(mat_GF2E, res); +} + + +mat_GF2E operator-(const mat_GF2E& a) +{ + mat_GF2E res; + negate(res, a); + NTL_OPT_RETURN(mat_GF2E, res); +} + + +vec_GF2E operator*(const mat_GF2E& a, const vec_GF2E& b) +{ + vec_GF2E res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_GF2E, res); +} + +vec_GF2E operator*(const vec_GF2E& a, const mat_GF2E& b) +{ + vec_GF2E res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_GF2E, res); +} + + +void inv(mat_GF2E& X, const mat_GF2E& A) +{ + GF2E d; + inv(d, X, A); + if (d == 0) ArithmeticError("inv: non-invertible matrix"); +} + +void power(mat_GF2E& X, const mat_GF2E& A, const ZZ& e) +{ + if (A.NumRows() != A.NumCols()) LogicError("power: non-square matrix"); + + if (e == 0) { + ident(X, A.NumRows()); + return; + } + + mat_GF2E T1, T2; + long i, k; + + k = NumBits(e); + T1 = A; + + for (i = k-2; i >= 0; i--) { + sqr(T2, T1); + if (bit(e, i)) + mul(T1, T2, A); + else + T1 = T2; + } + + if (e < 0) + inv(X, T1); + else + X = T1; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_RR.c b/thirdparty/linux/ntl/src/mat_RR.c new file mode 100644 index 0000000000..5b661a0f58 --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_RR.c @@ -0,0 +1,680 @@ + +#include + +#include + +NTL_START_IMPL + + +void add(mat_RR& X, const mat_RR& A, const mat_RR& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + add(X(i,j), A(i,j), B(i,j)); +} + +void sub(mat_RR& X, const mat_RR& A, const mat_RR& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix sub: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + sub(X(i,j), A(i,j), B(i,j)); +} + +void mul_aux(mat_RR& X, const mat_RR& A, const mat_RR& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + X.SetDims(n, m); + + long i, j, k; + RR acc, tmp; + + for (i = 1; i <= n; i++) { + for (j = 1; j <= m; j++) { + clear(acc); + for(k = 1; k <= l; k++) { + mul(tmp, A(i,k), B(k,j)); + add(acc, acc, tmp); + } + X(i,j) = acc; + } + } +} + + +void mul(mat_RR& X, const mat_RR& A, const mat_RR& B) +{ + if (&X == &A || &X == &B) { + mat_RR tmp; + mul_aux(tmp, A, B); + X = tmp; + } + else + mul_aux(X, A, B); +} + + +static +void mul_aux(vec_RR& x, const mat_RR& A, const vec_RR& b) +{ + long n = A.NumRows(); + long l = A.NumCols(); + + if (l != b.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(n); + + long i, k; + RR acc, tmp; + + for (i = 1; i <= n; i++) { + clear(acc); + for (k = 1; k <= l; k++) { + mul(tmp, A(i,k), b(k)); + add(acc, acc, tmp); + } + x(i) = acc; + } +} + + +void mul(vec_RR& x, const mat_RR& A, const vec_RR& b) +{ + if (&b == &x || A.position1(x) != -1) { + vec_RR tmp; + mul_aux(tmp, A, b); + x = tmp; + } + else + mul_aux(x, A, b); +} + +static +void mul_aux(vec_RR& x, const vec_RR& a, const mat_RR& B) +{ + long n = B.NumRows(); + long l = B.NumCols(); + + if (n != a.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(l); + + long i, k; + RR acc, tmp; + + for (i = 1; i <= l; i++) { + clear(acc); + for (k = 1; k <= n; k++) { + mul(tmp, a(k), B(k,i)); + add(acc, acc, tmp); + } + x(i) = acc; + } +} + +void mul(vec_RR& x, const vec_RR& a, const mat_RR& B) +{ + if (&a == &x) { + vec_RR tmp; + mul_aux(tmp, a, B); + x = tmp; + } + else + mul_aux(x, a, B); +} + + + +void ident(mat_RR& X, long n) +{ + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + set(X(i, j)); + else + clear(X(i, j)); +} + + +void determinant(RR& d, const mat_RR& M_in) +{ + long k, n; + long i, j; + long pos; + RR t1, t2; + RR *x, *y; + + n = M_in.NumRows(); + + if (M_in.NumCols() != n) + LogicError("determinant: nonsquare matrix"); + + if (n == 0) { + set(d); + return; + } + + mat_RR M; + + M = M_in; + + + RR det; + set(det); + + RR maxval; + + + for (k = 0; k < n; k++) { + pos = -1; + clear(maxval); + for (i = k; i < n; i++) { + abs(t1, M[i][k]); + if (t1 > maxval) { + pos = i; + maxval = t1; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + mul(det, det, M[k][k]); + + // make M[k, k] == -1 + + inv(t1, M[k][k]); + negate(t1, t1); + for (j = k+1; j < n; j++) { + mul(M[k][j], M[k][j], t1); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + d = det; +} + +RR determinant(const mat_RR& a) + { RR x; determinant(x, a); NTL_OPT_RETURN(RR, x); } + + +long IsIdent(const mat_RR& A, long n) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (!IsOne(A(i, j))) return 0; + } + + return 1; +} + + +void transpose(mat_RR& X, const mat_RR& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + long i, j; + + if (&X == & A) { + if (n == m) + for (i = 1; i <= n; i++) + for (j = i+1; j <= n; j++) + swap(X(i, j), X(j, i)); + else { + mat_RR tmp; + tmp.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + tmp(j, i) = A(i, j); + X.kill(); + X = tmp; + } + } + else { + X.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + X(j, i) = A(i, j); + } +} + + +void solve(RR& d, vec_RR& X, + const mat_RR& A, const vec_RR& b) + +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (b.length() != n) + LogicError("solve: dimension mismatch"); + + if (n == 0) { + set(d); + X.SetLength(0); + return; + } + + long i, j, k, pos; + RR t1, t2; + RR *x, *y; + + mat_RR M; + M.SetDims(n, n+1); + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) + M[i][j] = A[j][i]; + M[i][n] = b[i]; + } + + RR det; + set(det); + + RR maxval; + + for (k = 0; k < n; k++) { + pos = -1; + clear(maxval); + for (i = k; i < n; i++) { + abs(t1, M[i][k]); + if (t1 > maxval) { + pos = i; + maxval = t1; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + mul(det, det, M[k][k]); + + // make M[k, k] == -1 + + inv(t1, M[k][k]); + negate(t1, t1); + for (j = k+1; j <= n; j++) { + mul(M[k][j], M[k][j], t1); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j <= n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetLength(n); + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, X[j], M[i][j]); + add(t1, t1, t2); + } + sub(t1, t1, M[i][n]); + X[i] = t1; + } + + d = det; +} + +void inv(RR& d, mat_RR& X, const mat_RR& A) +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + long i, j, k, pos; + RR t1, t2; + RR *x, *y; + + + mat_RR M; + M.SetDims(n, 2*n); + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + M[i][j] = A[i][j]; + clear(M[i][n+j]); + } + set(M[i][n+i]); + } + + RR det; + set(det); + + RR maxval; + + for (k = 0; k < n; k++) { + pos = -1; + clear(maxval); + for (i = k; i < n; i++) { + abs(t1, M[i][k]); + if (t1 > maxval) { + pos = i; + maxval = t1; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + mul(det, det, M[k][k]); + + // make M[k, k] == -1 + + inv(t1, M[k][k]); + negate(t1, t1); + for (j = k+1; j < 2*n; j++) { + mul(M[k][j], M[k][j], t1); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < 2*n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetDims(n, n); + for (k = 0; k < n; k++) { + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, X[j][k], M[i][j]); + add(t1, t1, t2); + } + sub(t1, t1, M[i][n+k]); + X[i][k] = t1; + } + } + + d = det; +} + + + +void mul(mat_RR& X, const mat_RR& A, const RR& b_in) +{ + RR b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + + +void mul(mat_RR& X, const mat_RR& A, double b_in) +{ + RR b; + b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void diag(mat_RR& X, long n, const RR& d_in) +{ + RR d = d_in; + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + X(i, j) = d; + else + clear(X(i, j)); +} + +long IsDiag(const mat_RR& A, long n, const RR& d) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (A(i, j) != d) return 0; + } + + return 1; +} + +void negate(mat_RR& X, const mat_RR& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + negate(X(i,j), A(i,j)); +} + +long IsZero(const mat_RR& a) +{ + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +void clear(mat_RR& x) +{ + long n = x.NumRows(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + +mat_RR operator+(const mat_RR& a, const mat_RR& b) +{ + mat_RR res; + add(res, a, b); + NTL_OPT_RETURN(mat_RR, res); +} + +mat_RR operator*(const mat_RR& a, const mat_RR& b) +{ + mat_RR res; + mul_aux(res, a, b); + NTL_OPT_RETURN(mat_RR, res); +} + +mat_RR operator-(const mat_RR& a, const mat_RR& b) +{ + mat_RR res; + sub(res, a, b); + NTL_OPT_RETURN(mat_RR, res); +} + + +mat_RR operator-(const mat_RR& a) +{ + mat_RR res; + negate(res, a); + NTL_OPT_RETURN(mat_RR, res); +} + + +vec_RR operator*(const mat_RR& a, const vec_RR& b) +{ + vec_RR res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_RR, res); +} + +vec_RR operator*(const vec_RR& a, const mat_RR& b) +{ + vec_RR res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_RR, res); +} + + +void inv(mat_RR& X, const mat_RR& A) +{ + RR d; + inv(d, X, A); + if (d == 0) ArithmeticError("inv: non-invertible matrix"); +} + +void power(mat_RR& X, const mat_RR& A, const ZZ& e) +{ + if (A.NumRows() != A.NumCols()) LogicError("power: non-square matrix"); + + if (e == 0) { + ident(X, A.NumRows()); + return; + } + + mat_RR T1, T2; + long i, k; + + k = NumBits(e); + T1 = A; + + for (i = k-2; i >= 0; i--) { + sqr(T2, T1); + if (bit(e, i)) + mul(T1, T2, A); + else + T1 = T2; + } + + if (e < 0) + inv(X, T1); + else + X = T1; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_ZZ.c b/thirdparty/linux/ntl/src/mat_ZZ.c new file mode 100644 index 0000000000..766ce2cb26 --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_ZZ.c @@ -0,0 +1,1337 @@ + +#include + +#include + +NTL_START_IMPL + + +void add(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + add(X(i,j), A(i,j), B(i,j)); +} + +void sub(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix sub: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + sub(X(i,j), A(i,j), B(i,j)); +} + +void mul_aux(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + X.SetDims(n, m); + + long i, j, k; + ZZ acc, tmp; + + for (i = 1; i <= n; i++) { + for (j = 1; j <= m; j++) { + clear(acc); + for(k = 1; k <= l; k++) { + mul(tmp, A(i,k), B(k,j)); + add(acc, acc, tmp); + } + X(i,j) = acc; + } + } +} + + +void mul(mat_ZZ& X, const mat_ZZ& A, const mat_ZZ& B) +{ + if (&X == &A || &X == &B) { + mat_ZZ tmp; + mul_aux(tmp, A, B); + X = tmp; + } + else + mul_aux(X, A, B); +} + + +static +void mul_aux(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& b) +{ + long n = A.NumRows(); + long l = A.NumCols(); + + if (l != b.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(n); + + long i, k; + ZZ acc, tmp; + + for (i = 1; i <= n; i++) { + clear(acc); + for (k = 1; k <= l; k++) { + mul(tmp, A(i,k), b(k)); + add(acc, acc, tmp); + } + x(i) = acc; + } +} + + +void mul(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& b) +{ + if (&b == &x || A.position1(x) != -1) { + vec_ZZ tmp; + mul_aux(tmp, A, b); + x = tmp; + } + else + mul_aux(x, A, b); +} + +static +void mul_aux(vec_ZZ& x, const vec_ZZ& a, const mat_ZZ& B) +{ + long n = B.NumRows(); + long l = B.NumCols(); + + if (n != a.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(l); + + long i, k; + ZZ acc, tmp; + + for (i = 1; i <= l; i++) { + clear(acc); + for (k = 1; k <= n; k++) { + mul(tmp, a(k), B(k,i)); + add(acc, acc, tmp); + } + x(i) = acc; + } +} + +void mul(vec_ZZ& x, const vec_ZZ& a, const mat_ZZ& B) +{ + if (&a == &x) { + vec_ZZ tmp; + mul_aux(tmp, a, B); + x = tmp; + } + else + mul_aux(x, a, B); +} + + + +void ident(mat_ZZ& X, long n) +{ + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + set(X(i, j)); + else + clear(X(i, j)); +} + +static +long DetBound(const mat_ZZ& a) +{ + long n = a.NumRows(); + long i; + ZZ res, t1; + + set(res); + + for (i = 0; i < n; i++) { + InnerProduct(t1, a[i], a[i]); + if (t1 > 1) { + SqrRoot(t1, t1); + add(t1, t1, 1); + } + mul(res, res, t1); + } + + return NumBits(res); +} + + + + + +void determinant(ZZ& rres, const mat_ZZ& a, long deterministic) +{ + long n = a.NumRows(); + if (a.NumCols() != n) + LogicError("determinant: nonsquare matrix"); + + if (n == 0) { + set(rres); + return; + } + + zz_pBak zbak; + zbak.save(); + + ZZ_pBak Zbak; + Zbak.save(); + + long instable = 1; + + long gp_cnt = 0; + + long bound = 2+DetBound(a); + + ZZ res, prod; + + clear(res); + set(prod); + + + long i; + for (i = 0; ; i++) { + if (NumBits(prod) > bound) + break; + + if (!deterministic && + !instable && bound > 1000 && NumBits(prod) < 0.25*bound) { + ZZ P; + + + long plen = 90 + NumBits(max(bound, NumBits(res))); + GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); + + ZZ_p::init(P); + + mat_ZZ_p A; + conv(A, a); + + ZZ_p t; + determinant(t, A); + + if (CRT(res, prod, rep(t), P)) + instable = 1; + else + break; + } + + + zz_p::FFTInit(i); + long p = zz_p::modulus(); + + mat_zz_p A; + conv(A, a); + + zz_p t; + determinant(t, A); + + instable = CRT(res, prod, rep(t), p); + } + + rres = res; + + zbak.restore(); + Zbak.restore(); +} + + + + +void conv(mat_zz_p& x, const mat_ZZ& a) +{ + long n = a.NumRows(); + long m = a.NumCols(); + long i; + + x.SetDims(n, m); + for (i = 0; i < n; i++) + conv(x[i], a[i]); +} + +void conv(mat_ZZ_p& x, const mat_ZZ& a) +{ + long n = a.NumRows(); + long m = a.NumCols(); + long i; + + x.SetDims(n, m); + for (i = 0; i < n; i++) + conv(x[i], a[i]); +} + +long IsIdent(const mat_ZZ& A, long n) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (!IsOne(A(i, j))) return 0; + } + + return 1; +} + + +void transpose(mat_ZZ& X, const mat_ZZ& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + long i, j; + + if (&X == & A) { + if (n == m) + for (i = 1; i <= n; i++) + for (j = i+1; j <= n; j++) + swap(X(i, j), X(j, i)); + else { + mat_ZZ tmp; + tmp.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + tmp(j, i) = A(i, j); + X.kill(); + X = tmp; + } + } + else { + X.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + X(j, i) = A(i, j); + } +} + +long CRT(mat_ZZ& gg, ZZ& a, const mat_zz_p& G) +{ + long n = gg.NumRows(); + long m = gg.NumCols(); + + if (G.NumRows() != n || G.NumCols() != m) + LogicError("CRT: dimension mismatch"); + + long p = zz_p::modulus(); + + ZZ new_a; + mul(new_a, a, p); + + long a_inv; + a_inv = rem(a, p); + a_inv = InvMod(a_inv, p); + + long p1; + p1 = p >> 1; + + ZZ a1; + RightShift(a1, a, 1); + + long p_odd = (p & 1); + + long modified = 0; + + long h; + + ZZ g; + long i, j; + + for (i = 0; i < n; i++) { + for (j = 0; j < m; j++) { + if (!CRTInRange(gg[i][j], a)) { + modified = 1; + rem(g, gg[i][j], a); + if (g > a1) sub(g, g, a); + } + else + g = gg[i][j]; + + h = rem(g, p); + h = SubMod(rep(G[i][j]), h, p); + h = MulMod(h, a_inv, p); + if (h > p1) + h = h - p; + + if (h != 0) { + modified = 1; + + if (!p_odd && g > 0 && (h == p1)) + MulSubFrom(g, a, h); + else + MulAddTo(g, a, h); + + } + + gg[i][j] = g; + } + } + + a = new_a; + + return modified; + +} + + +void mul(mat_ZZ& X, const mat_ZZ& A, const ZZ& b_in) +{ + ZZ b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void mul(mat_ZZ& X, const mat_ZZ& A, long b) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + + +static +void ExactDiv(vec_ZZ& x, const ZZ& d) +{ + long n = x.length(); + long i; + + for (i = 0; i < n; i++) + if (!divide(x[i], x[i], d)) + ArithmeticError("inexact division"); +} + +static +void ExactDiv(mat_ZZ& x, const ZZ& d) +{ + long n = x.NumRows(); + long m = x.NumCols(); + + long i, j; + + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + if (!divide(x[i][j], x[i][j], d)) + ArithmeticError("inexact division"); +} + +void diag(mat_ZZ& X, long n, const ZZ& d_in) +{ + ZZ d = d_in; + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + X(i, j) = d; + else + clear(X(i, j)); +} + +long IsDiag(const mat_ZZ& A, long n, const ZZ& d) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (A(i, j) != d) return 0; + } + + return 1; +} + + + + +void solve(ZZ& d_out, vec_ZZ& x_out, + const mat_ZZ& A, const vec_ZZ& b, + long deterministic) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (b.length() != n) + LogicError("solve: dimension mismatch"); + + if (n == 0) { + set(d_out); + x_out.SetLength(0); + return; + } + + zz_pBak zbak; + zbak.save(); + + ZZ_pBak Zbak; + Zbak.save(); + + vec_ZZ x(INIT_SIZE, n); + ZZ d, d1; + + ZZ d_prod, x_prod; + set(d_prod); + set(x_prod); + + long d_instable = 1; + long x_instable = 1; + + long check = 0; + + long gp_cnt = 0; + + vec_ZZ y, b1; + + long i; + long bound = 2+DetBound(A); + + for (i = 0; ; i++) { + if ((check || IsZero(d)) && !d_instable) { + if (NumBits(d_prod) > bound) { + break; + } + else if (!deterministic && + bound > 1000 && NumBits(d_prod) < 0.25*bound) { + + ZZ P; + + long plen = 90 + NumBits(max(bound, NumBits(d))); + GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); + + ZZ_p::init(P); + + mat_ZZ_p AA; + conv(AA, A); + + ZZ_p dd; + determinant(dd, AA); + + if (CRT(d, d_prod, rep(dd), P)) + d_instable = 1; + else + break; + } + } + + + zz_p::FFTInit(i); + long p = zz_p::modulus(); + + mat_zz_p AA; + conv(AA, A); + + if (!check) { + vec_zz_p bb, xx; + conv(bb, b); + + zz_p dd; + + solve(dd, xx, AA, bb); + + d_instable = CRT(d, d_prod, rep(dd), p); + if (!IsZero(dd)) { + mul(xx, xx, dd); + x_instable = CRT(x, x_prod, xx); + } + else + x_instable = 1; + + if (!d_instable && !x_instable) { + mul(y, x, A); + mul(b1, b, d); + if (y == b1) { + d1 = d; + check = 1; + } + } + } + else { + zz_p dd; + determinant(dd, AA); + d_instable = CRT(d, d_prod, rep(dd), p); + } + } + + if (check && d1 != d) { + mul(x, x, d); + ExactDiv(x, d1); + } + + d_out = d; + if (check) x_out = x; + + zbak.restore(); + Zbak.restore(); +} + +void inv(ZZ& d_out, mat_ZZ& x_out, const mat_ZZ& A, long deterministic) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (n == 0) { + set(d_out); + x_out.SetDims(0, 0); + return; + } + + zz_pBak zbak; + zbak.save(); + + ZZ_pBak Zbak; + Zbak.save(); + + mat_ZZ x(INIT_SIZE, n, n); + ZZ d, d1; + + ZZ d_prod, x_prod; + set(d_prod); + set(x_prod); + + long d_instable = 1; + long x_instable = 1; + + long gp_cnt = 0; + + long check = 0; + + + mat_ZZ y; + + long i; + long bound = 2+DetBound(A); + + for (i = 0; ; i++) { + if ((check || IsZero(d)) && !d_instable) { + if (NumBits(d_prod) > bound) { + break; + } + else if (!deterministic && + bound > 1000 && NumBits(d_prod) < 0.25*bound) { + + ZZ P; + + long plen = 90 + NumBits(max(bound, NumBits(d))); + GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); + + ZZ_p::init(P); + + mat_ZZ_p AA; + conv(AA, A); + + ZZ_p dd; + determinant(dd, AA); + + if (CRT(d, d_prod, rep(dd), P)) + d_instable = 1; + else + break; + } + } + + + zz_p::FFTInit(i); + long p = zz_p::modulus(); + + mat_zz_p AA; + conv(AA, A); + + if (!check) { + mat_zz_p xx; + + zz_p dd; + + inv(dd, xx, AA); + + d_instable = CRT(d, d_prod, rep(dd), p); + if (!IsZero(dd)) { + mul(xx, xx, dd); + x_instable = CRT(x, x_prod, xx); + } + else + x_instable = 1; + + if (!d_instable && !x_instable) { + mul(y, x, A); + if (IsDiag(y, n, d)) { + d1 = d; + check = 1; + } + } + } + else { + zz_p dd; + determinant(dd, AA); + d_instable = CRT(d, d_prod, rep(dd), p); + } + } + + if (check && d1 != d) { + mul(x, x, d); + ExactDiv(x, d1); + } + + d_out = d; + if (check) x_out = x; + + zbak.restore(); + Zbak.restore(); +} + +void negate(mat_ZZ& X, const mat_ZZ& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + negate(X(i,j), A(i,j)); +} + + + +long IsZero(const mat_ZZ& a) +{ + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +void clear(mat_ZZ& x) +{ + long n = x.NumRows(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + +mat_ZZ operator+(const mat_ZZ& a, const mat_ZZ& b) +{ + mat_ZZ res; + add(res, a, b); + NTL_OPT_RETURN(mat_ZZ, res); +} + +mat_ZZ operator*(const mat_ZZ& a, const mat_ZZ& b) +{ + mat_ZZ res; + mul_aux(res, a, b); + NTL_OPT_RETURN(mat_ZZ, res); +} + +mat_ZZ operator-(const mat_ZZ& a, const mat_ZZ& b) +{ + mat_ZZ res; + sub(res, a, b); + NTL_OPT_RETURN(mat_ZZ, res); +} + + +mat_ZZ operator-(const mat_ZZ& a) +{ + mat_ZZ res; + negate(res, a); + NTL_OPT_RETURN(mat_ZZ, res); +} + +vec_ZZ operator*(const mat_ZZ& a, const vec_ZZ& b) +{ + vec_ZZ res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_ZZ, res); +} + +vec_ZZ operator*(const vec_ZZ& a, const mat_ZZ& b) +{ + vec_ZZ res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_ZZ, res); +} + + + + +void inv(mat_ZZ& X, const mat_ZZ& A) +{ + ZZ d; + inv(d, X, A); + if (d == -1) + negate(X, X); + else if (d != 1) + ArithmeticError("inv: non-invertible matrix"); +} + +void power(mat_ZZ& X, const mat_ZZ& A, const ZZ& e) +{ + if (A.NumRows() != A.NumCols()) LogicError("power: non-square matrix"); + + if (e == 0) { + ident(X, A.NumRows()); + return; + } + + mat_ZZ T1, T2; + long i, k; + + k = NumBits(e); + T1 = A; + + for (i = k-2; i >= 0; i--) { + sqr(T2, T1); + if (bit(e, i)) + mul(T1, T2, A); + else + T1 = T2; + } + + if (e < 0) + inv(X, T1); + else + X = T1; +} + + + +/*********************************************************** + + routines for solving a linear system via Hensel lifting + +************************************************************/ + + +static +long MaxBits(const mat_ZZ& A) +{ + long m = 0; + long i, j; + for (i = 0; i < A.NumRows(); i++) + for (j = 0; j < A.NumCols(); j++) + m = max(m, NumBits(A[i][j])); + + return m; +} + + + + +// Computes an upper bound on the numerators and denominators +// to the solution x*A = b using Hadamard's bound and Cramer's rule. +// If A contains a zero row, then sets both bounds to zero. + +static +void hadamard(ZZ& num_bound, ZZ& den_bound, + const mat_ZZ& A, const vec_ZZ& b) +{ + long n = A.NumRows(); + + if (n == 0) LogicError("internal error: hadamard with n = 0"); + + ZZ b_len, min_A_len, prod, t1; + + InnerProduct(min_A_len, A[0], A[0]); + + prod = min_A_len; + + long i; + for (i = 1; i < n; i++) { + InnerProduct(t1, A[i], A[i]); + if (t1 < min_A_len) + min_A_len = t1; + mul(prod, prod, t1); + } + + if (min_A_len == 0) { + num_bound = 0; + den_bound = 0; + return; + } + + InnerProduct(b_len, b, b); + + div(t1, prod, min_A_len); + mul(t1, t1, b_len); + + SqrRoot(num_bound, t1); + SqrRoot(den_bound, prod); +} + + +static +void MixedMul(vec_ZZ& x, const vec_zz_p& a, const mat_ZZ& B) +{ + long n = B.NumRows(); + long l = B.NumCols(); + + if (n != a.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(l); + + long i, k; + ZZ acc, tmp; + + for (i = 1; i <= l; i++) { + clear(acc); + for (k = 1; k <= n; k++) { + mul(tmp, B(k, i), rep(a(k))); + add(acc, acc, tmp); + } + x(i) = acc; + } +} + +static +void SubDiv(vec_ZZ& e, const vec_ZZ& t, long p) +{ + long n = e.length(); + if (t.length() != n) LogicError("SubDiv: dimension mismatch"); + + ZZ s; + long i; + + for (i = 0; i < n; i++) { + sub(s, e[i], t[i]); + div(e[i], s, p); + } +} + +static +void MulAdd(vec_ZZ& x, const ZZ& prod, const vec_zz_p& h) +{ + long n = x.length(); + if (h.length() != n) LogicError("MulAdd: dimension mismatch"); + + ZZ t; + long i; + + for (i = 0; i < n; i++) { + mul(t, prod, rep(h[i])); + add(x[i], x[i], t); + } +} + + +static +void double_MixedMul1(vec_ZZ& x, double *a, double **B, long n) +{ + long i, k; + double acc; + + for (i = 0; i < n; i++) { + double *bp = B[i]; + acc = 0; + for (k = 0; k < n; k++) { + acc += bp[k] * a[k]; + } + conv(x[i], acc); + } +} + + +static +void double_MixedMul2(vec_ZZ& x, double *a, double **B, long n, long limit) +{ + long i, k; + double acc; + ZZ acc1, t; + long j; + + for (i = 0; i < n; i++) { + double *bp = B[i]; + + clear(acc1); + acc = 0; + j = 0; + + for (k = 0; k < n; k++) { + acc += bp[k] * a[k]; + j++; + if (j == limit) { + conv(t, acc); + add(acc1, acc1, t); + acc = 0; + j = 0; + } + } + + if (j > 0) { + conv(t, acc); + add(acc1, acc1, t); + } + + x[i] = acc1; + } +} + + +static +void long_MixedMul1(vec_ZZ& x, long *a, long **B, long n) +{ + long i, k; + long acc; + + for (i = 0; i < n; i++) { + long *bp = B[i]; + acc = 0; + for (k = 0; k < n; k++) { + acc += bp[k] * a[k]; + } + conv(x[i], acc); + } +} + + +static +void long_MixedMul2(vec_ZZ& x, long *a, long **B, long n, long limit) +{ + long i, k; + long acc; + ZZ acc1, t; + long j; + + for (i = 0; i < n; i++) { + long *bp = B[i]; + + clear(acc1); + acc = 0; + j = 0; + + for (k = 0; k < n; k++) { + acc += bp[k] * a[k]; + j++; + if (j == limit) { + conv(t, acc); + add(acc1, acc1, t); + acc = 0; + j = 0; + } + } + + if (j > 0) { + conv(t, acc); + add(acc1, acc1, t); + } + + x[i] = acc1; + } +} + + +void solve1(ZZ& d_out, vec_ZZ& x_out, const mat_ZZ& A, const vec_ZZ& b) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("solve1: nonsquare matrix"); + + if (b.length() != n) + LogicError("solve1: dimension mismatch"); + + if (n == 0) { + set(d_out); + x_out.SetLength(0); + return; + } + + ZZ num_bound, den_bound; + + hadamard(num_bound, den_bound, A, b); + + if (den_bound == 0) { + clear(d_out); + return; + } + + zz_pBak zbak; + zbak.save(); + + long i; + long j; + + ZZ prod; + prod = 1; + + mat_zz_p B; + + + for (i = 0; ; i++) { + zz_p::FFTInit(i); + + mat_zz_p AA, BB; + zz_p dd; + + conv(AA, A); + inv(dd, BB, AA); + + if (dd != 0) { + transpose(B, BB); + break; + } + + mul(prod, prod, zz_p::modulus()); + + if (prod > den_bound) { + d_out = 0; + return; + } + } + + long max_A_len = MaxBits(A); + + long use_double_mul1 = 0; + long use_double_mul2 = 0; + long double_limit = 0; + + if (max_A_len + NTL_SP_NBITS + NumBits(n) <= NTL_DOUBLE_PRECISION-1) + use_double_mul1 = 1; + + if (!use_double_mul1 && max_A_len+NTL_SP_NBITS+2 <= NTL_DOUBLE_PRECISION-1) { + use_double_mul2 = 1; + double_limit = (1L << (NTL_DOUBLE_PRECISION-1-max_A_len-NTL_SP_NBITS)); + } + + long use_long_mul1 = 0; + long use_long_mul2 = 0; + long long_limit = 0; + + if (max_A_len + NTL_SP_NBITS + NumBits(n) <= NTL_BITS_PER_LONG-1) + use_long_mul1 = 1; + + if (!use_long_mul1 && max_A_len+NTL_SP_NBITS+2 <= NTL_BITS_PER_LONG-1) { + use_long_mul2 = 1; + long_limit = (1L << (NTL_BITS_PER_LONG-1-max_A_len-NTL_SP_NBITS)); + } + + + + if (use_double_mul1 && use_long_mul1) + use_long_mul1 = 0; + else if (use_double_mul1 && use_long_mul2) + use_long_mul2 = 0; + else if (use_double_mul2 && use_long_mul1) + use_double_mul2 = 0; + else if (use_double_mul2 && use_long_mul2) { + if (long_limit > double_limit) + use_double_mul2 = 0; + else + use_long_mul2 = 0; + } + + + double **double_A=0; + double *double_h=0; + + Unique2DArray double_A_store; + UniqueArray double_h_store; + + + if (use_double_mul1 || use_double_mul2) { + double_h_store.SetLength(n); + double_h = double_h_store.get(); + + double_A_store.SetDims(n, n); + double_A = double_A_store.get(); + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + double_A[j][i] = to_double(A[i][j]); + } + + long **long_A=0; + long *long_h=0; + + Unique2DArray long_A_store; + UniqueArray long_h_store; + + + if (use_long_mul1 || use_long_mul2) { + long_h_store.SetLength(n); + long_h = long_h_store.get(); + + long_A_store.SetDims(n, n); + long_A = long_A_store.get(); + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + long_A[j][i] = to_long(A[i][j]); + } + + + vec_ZZ x; + x.SetLength(n); + + vec_zz_p h; + h.SetLength(n); + + vec_ZZ e; + e = b; + + vec_zz_p ee; + + vec_ZZ t; + t.SetLength(n); + + prod = 1; + + ZZ bound1; + mul(bound1, num_bound, den_bound); + mul(bound1, bound1, 2); + + while (prod <= bound1) { + conv(ee, e); + + mul(h, B, ee); + + if (use_double_mul1) { + for (i = 0; i < n; i++) + double_h[i] = to_double(rep(h[i])); + + double_MixedMul1(t, double_h, double_A, n); + } + else if (use_double_mul2) { + for (i = 0; i < n; i++) + double_h[i] = to_double(rep(h[i])); + + double_MixedMul2(t, double_h, double_A, n, double_limit); + } + else if (use_long_mul1) { + for (i = 0; i < n; i++) + long_h[i] = to_long(rep(h[i])); + + long_MixedMul1(t, long_h, long_A, n); + } + else if (use_long_mul2) { + for (i = 0; i < n; i++) + long_h[i] = to_long(rep(h[i])); + + long_MixedMul2(t, long_h, long_A, n, long_limit); + } + else + MixedMul(t, h, A); // t = h*A + + SubDiv(e, t, zz_p::modulus()); // e = (e-t)/p + MulAdd(x, prod, h); // x = x + prod*h + + mul(prod, prod, zz_p::modulus()); + } + + vec_ZZ num, denom; + ZZ d, d_mod_prod, tmp1; + + num.SetLength(n); + denom.SetLength(n); + + d = 1; + d_mod_prod = 1; + + for (i = 0; i < n; i++) { + rem(x[i], x[i], prod); + MulMod(x[i], x[i], d_mod_prod, prod); + + if (!ReconstructRational(num[i], denom[i], x[i], prod, + num_bound, den_bound)) + LogicError("solve1 internal error: rat recon failed!"); + + mul(d, d, denom[i]); + + if (i != n-1) { + if (denom[i] != 1) { + div(den_bound, den_bound, denom[i]); + mul(bound1, num_bound, den_bound); + mul(bound1, bound1, 2); + + div(tmp1, prod, zz_p::modulus()); + while (tmp1 > bound1) { + prod = tmp1; + div(tmp1, prod, zz_p::modulus()); + } + + rem(tmp1, denom[i], prod); + rem(d_mod_prod, d_mod_prod, prod); + MulMod(d_mod_prod, d_mod_prod, tmp1, prod); + } + } + } + + tmp1 = 1; + for (i = n-1; i >= 0; i--) { + mul(num[i], num[i], tmp1); + mul(tmp1, tmp1, denom[i]); + } + + x_out.SetLength(n); + + for (i = 0; i < n; i++) { + x_out[i] = num[i]; + } + + d_out = d; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_ZZ_p.c b/thirdparty/linux/ntl/src/mat_ZZ_p.c new file mode 100644 index 0000000000..2df4c14428 --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_ZZ_p.c @@ -0,0 +1,859 @@ + +#include +#include +#include + +NTL_START_IMPL + + +void add(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + add(X(i,j), A(i,j), B(i,j)); +} + +void sub(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix sub: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + sub(X(i,j), A(i,j), B(i,j)); +} + +void negate(mat_ZZ_p& X, const mat_ZZ_p& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + negate(X(i,j), A(i,j)); +} + +void mul_aux(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + X.SetDims(n, m); + + long i, j, k; + ZZ acc, tmp; + + for (i = 1; i <= n; i++) { + for (j = 1; j <= m; j++) { + clear(acc); + for(k = 1; k <= l; k++) { + mul(tmp, rep(A(i,k)), rep(B(k,j))); + add(acc, acc, tmp); + } + conv(X(i,j), acc); + } + } +} + + +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, const mat_ZZ_p& B) +{ + if (&X == &A || &X == &B) { + mat_ZZ_p tmp; + mul_aux(tmp, A, B); + X = tmp; + } + else + mul_aux(X, A, B); +} + + +static +void mul_aux(vec_ZZ_p& x, const mat_ZZ_p& A, const vec_ZZ_p& b) +{ + long n = A.NumRows(); + long l = A.NumCols(); + + if (l != b.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(n); + + long i, k; + ZZ acc, tmp; + + for (i = 1; i <= n; i++) { + clear(acc); + for (k = 1; k <= l; k++) { + mul(tmp, rep(A(i,k)), rep(b(k))); + add(acc, acc, tmp); + } + conv(x(i), acc); + } +} + + +void mul(vec_ZZ_p& x, const mat_ZZ_p& A, const vec_ZZ_p& b) +{ + if (&b == &x || A.position1(x) != -1) { + vec_ZZ_p tmp; + mul_aux(tmp, A, b); + x = tmp; + } + else + mul_aux(x, A, b); +} + +static +void mul_aux(vec_ZZ_p& x, const vec_ZZ_p& a, const mat_ZZ_p& B) +{ + long n = B.NumRows(); + long l = B.NumCols(); + + if (n != a.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(l); + + long i, k; + ZZ acc, tmp; + + for (i = 1; i <= l; i++) { + clear(acc); + for (k = 1; k <= n; k++) { + mul(tmp, rep(a(k)), rep(B(k,i))); + add(acc, acc, tmp); + } + conv(x(i), acc); + } +} + +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, const mat_ZZ_p& B) +{ + if (&a == &x) { + vec_ZZ_p tmp; + mul_aux(tmp, a, B); + x = tmp; + } + else + mul_aux(x, a, B); +} + + + +void ident(mat_ZZ_p& X, long n) +{ + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + set(X(i, j)); + else + clear(X(i, j)); +} + + +void determinant(ZZ_p& d, const mat_ZZ_p& M_in) +{ + long k, n; + long i, j; + long pos; + ZZ t1, t2; + ZZ *x, *y; + + const ZZ& p = ZZ_p::modulus(); + + n = M_in.NumRows(); + + if (M_in.NumCols() != n) + LogicError("determinant: nonsquare matrix"); + + if (n == 0) { + set(d); + return; + } + + vec_ZZVec M; + sqr(t1, p); + mul(t1, t1, n); + + M.SetLength(n); + for (i = 0; i < n; i++) { + M[i].SetSize(n, t1.size()); + for (j = 0; j < n; j++) + M[i][j] = rep(M_in[i][j]); + } + + ZZ det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) + pos = i; + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + NegateMod(det, det, p); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + NegateMod(t1, t1, p); + for (j = k+1; j < n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + conv(d, det); +} + +long IsIdent(const mat_ZZ_p& A, long n) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (!IsOne(A(i, j))) return 0; + } + + return 1; +} + + +void transpose(mat_ZZ_p& X, const mat_ZZ_p& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + long i, j; + + if (&X == & A) { + if (n == m) + for (i = 1; i <= n; i++) + for (j = i+1; j <= n; j++) + swap(X(i, j), X(j, i)); + else { + mat_ZZ_p tmp; + tmp.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + tmp(j, i) = A(i, j); + X.kill(); + X = tmp; + } + } + else { + X.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + X(j, i) = A(i, j); + } +} + + +static +void solve_impl(ZZ_p& d, vec_ZZ_p& X, const mat_ZZ_p& A, const vec_ZZ_p& b, bool trans) + +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (b.length() != n) + LogicError("solve: dimension mismatch"); + + if (n == 0) { + set(d); + X.SetLength(0); + return; + } + + long i, j, k, pos; + ZZ t1, t2; + ZZ *x, *y; + + const ZZ& p = ZZ_p::modulus(); + + vec_ZZVec M; + sqr(t1, p); + mul(t1, t1, n); + + M.SetLength(n); + + for (i = 0; i < n; i++) { + M[i].SetSize(n+1, t1.size()); + + if (trans) + for (j = 0; j < n; j++) M[i][j] = rep(A[j][i]); + else + for (j = 0; j < n; j++) M[i][j] = rep(A[i][j]); + + M[i][n] = rep(b[i]); + } + + ZZ det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + NegateMod(det, det, p); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + NegateMod(t1, t1, p); + for (j = k+1; j <= n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j <= n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetLength(n); + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, rep(X[j]), M[i][j]); + add(t1, t1, t2); + } + sub(t1, t1, M[i][n]); + conv(X[i], t1); + } + + conv(d, det); +} + +void solve(ZZ_p& d, vec_ZZ_p& x, const mat_ZZ_p& A, const vec_ZZ_p& b) +{ + solve_impl(d, x, A, b, true); +} + +void solve(ZZ_p& d, const mat_ZZ_p& A, vec_ZZ_p& x, const vec_ZZ_p& b) +{ + solve_impl(d, x, A, b, false); +} + +void inv(ZZ_p& d, mat_ZZ_p& X, const mat_ZZ_p& A) +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + long i, j, k, pos; + ZZ t1, t2; + ZZ *x, *y; + + const ZZ& p = ZZ_p::modulus(); + + vec_ZZVec M; + sqr(t1, p); + mul(t1, t1, n); + + M.SetLength(n); + + for (i = 0; i < n; i++) { + M[i].SetSize(2*n, t1.size()); + for (j = 0; j < n; j++) { + M[i][j] = rep(A[i][j]); + clear(M[i][n+j]); + } + set(M[i][n+i]); + } + + ZZ det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + NegateMod(det, det, p); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + NegateMod(t1, t1, p); + for (j = k+1; j < 2*n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < 2*n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetDims(n, n); + for (k = 0; k < n; k++) { + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, rep(X[j][k]), M[i][j]); + add(t1, t1, t2); + } + sub(t1, t1, M[i][n+k]); + conv(X[i][k], t1); + } + } + + conv(d, det); +} + + + +long gauss(mat_ZZ_p& M_in, long w) +{ + long k, l; + long i, j; + long pos; + ZZ t1, t2, t3; + ZZ *x, *y; + + long n = M_in.NumRows(); + long m = M_in.NumCols(); + + if (w < 0 || w > m) + LogicError("gauss: bad args"); + + const ZZ& p = ZZ_p::modulus(); + + vec_ZZVec M; + sqr(t1, p); + mul(t1, t1, n); + + M.SetLength(n); + for (i = 0; i < n; i++) { + M[i].SetSize(m, t1.size()); + for (j = 0; j < m; j++) { + M[i][j] = rep(M_in[i][j]); + } + } + + l = 0; + for (k = 0; k < w && l < n; k++) { + + pos = -1; + for (i = l; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + swap(M[pos], M[l]); + + InvMod(t3, M[l][k], p); + NegateMod(t3, t3, p); + + for (j = k+1; j < m; j++) { + rem(M[l][j], M[l][j], p); + } + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k]*t3 + + MulMod(t1, M[i][k], t3, p); + + clear(M[i][k]); + + x = M[i].elts() + (k+1); + y = M[l].elts() + (k+1); + + for (j = k+1; j < m; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(t2, t2, *x); + *x = t2; + } + } + + l++; + } + } + + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + conv(M_in[i][j], M[i][j]); + + return l; +} + +long gauss(mat_ZZ_p& M) +{ + return gauss(M, M.NumCols()); +} + +void image(mat_ZZ_p& X, const mat_ZZ_p& A) +{ + mat_ZZ_p M; + M = A; + long r = gauss(M); + M.SetDims(r, M.NumCols()); + X = M; +} + +void kernel(mat_ZZ_p& X, const mat_ZZ_p& A) +{ + long m = A.NumRows(); + long n = A.NumCols(); + + mat_ZZ_p M; + long r; + + transpose(M, A); + r = gauss(M); + + X.SetDims(m-r, m); + + long i, j, k, s; + ZZ t1, t2; + + ZZ_p T3; + + vec_long D; + D.SetLength(m); + for (j = 0; j < m; j++) D[j] = -1; + + vec_ZZ_p inverses; + inverses.SetLength(m); + + j = -1; + for (i = 0; i < r; i++) { + do { + j++; + } while (IsZero(M[i][j])); + + D[j] = i; + inv(inverses[j], M[i][j]); + } + + for (k = 0; k < m-r; k++) { + vec_ZZ_p& v = X[k]; + long pos = 0; + for (j = m-1; j >= 0; j--) { + if (D[j] == -1) { + if (pos == k) + set(v[j]); + else + clear(v[j]); + pos++; + } + else { + i = D[j]; + + clear(t1); + + for (s = j+1; s < m; s++) { + mul(t2, rep(v[s]), rep(M[i][s])); + add(t1, t1, t2); + } + + conv(T3, t1); + mul(T3, T3, inverses[j]); + negate(v[j], T3); + } + } + } +} + +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, const ZZ_p& b_in) +{ + NTL_ZZ_pRegister(b); + b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void mul(mat_ZZ_p& X, const mat_ZZ_p& A, long b_in) +{ + NTL_ZZ_pRegister(b); + b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void diag(mat_ZZ_p& X, long n, const ZZ_p& d_in) +{ + ZZ_p d = d_in; + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + X(i, j) = d; + else + clear(X(i, j)); +} + +long IsDiag(const mat_ZZ_p& A, long n, const ZZ_p& d) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (A(i, j) != d) return 0; + } + + return 1; +} + + +long IsZero(const mat_ZZ_p& a) +{ + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +void clear(mat_ZZ_p& x) +{ + long n = x.NumRows(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + +mat_ZZ_p operator+(const mat_ZZ_p& a, const mat_ZZ_p& b) +{ + mat_ZZ_p res; + add(res, a, b); + NTL_OPT_RETURN(mat_ZZ_p, res); +} + +mat_ZZ_p operator*(const mat_ZZ_p& a, const mat_ZZ_p& b) +{ + mat_ZZ_p res; + mul_aux(res, a, b); + NTL_OPT_RETURN(mat_ZZ_p, res); +} + +mat_ZZ_p operator-(const mat_ZZ_p& a, const mat_ZZ_p& b) +{ + mat_ZZ_p res; + sub(res, a, b); + NTL_OPT_RETURN(mat_ZZ_p, res); +} + + +mat_ZZ_p operator-(const mat_ZZ_p& a) +{ + mat_ZZ_p res; + negate(res, a); + NTL_OPT_RETURN(mat_ZZ_p, res); +} + + +vec_ZZ_p operator*(const mat_ZZ_p& a, const vec_ZZ_p& b) +{ + vec_ZZ_p res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_ZZ_p, res); +} + +vec_ZZ_p operator*(const vec_ZZ_p& a, const mat_ZZ_p& b) +{ + vec_ZZ_p res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_ZZ_p, res); +} + +void inv(mat_ZZ_p& X, const mat_ZZ_p& A) +{ + ZZ_p d; + inv(d, X, A); + if (d == 0) ArithmeticError("inv: non-invertible matrix"); +} + +void power(mat_ZZ_p& X, const mat_ZZ_p& A, const ZZ& e) +{ + if (A.NumRows() != A.NumCols()) LogicError("power: non-square matrix"); + + if (e == 0) { + ident(X, A.NumRows()); + return; + } + + mat_ZZ_p T1, T2; + long i, k; + + k = NumBits(e); + T1 = A; + + for (i = k-2; i >= 0; i--) { + sqr(T2, T1); + if (bit(e, i)) + mul(T1, T2, A); + else + T1 = T2; + } + + if (e < 0) + inv(X, T1); + else + X = T1; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_ZZ_pE.c b/thirdparty/linux/ntl/src/mat_ZZ_pE.c new file mode 100644 index 0000000000..fa80de5db0 --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_ZZ_pE.c @@ -0,0 +1,882 @@ + +#include + +#include + +NTL_START_IMPL + + +void add(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + add(X(i,j), A(i,j), B(i,j)); +} + +void sub(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix sub: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + sub(X(i,j), A(i,j), B(i,j)); +} + +void negate(mat_ZZ_pE& X, const mat_ZZ_pE& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + negate(X(i,j), A(i,j)); +} + +void mul_aux(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + X.SetDims(n, m); + + long i, j, k; + ZZ_pX acc, tmp; + + for (i = 1; i <= n; i++) { + for (j = 1; j <= m; j++) { + clear(acc); + for(k = 1; k <= l; k++) { + mul(tmp, rep(A(i,k)), rep(B(k,j))); + add(acc, acc, tmp); + } + conv(X(i,j), acc); + } + } +} + + +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const mat_ZZ_pE& B) +{ + if (&X == &A || &X == &B) { + mat_ZZ_pE tmp; + mul_aux(tmp, A, B); + X = tmp; + } + else + mul_aux(X, A, B); +} + + +static +void mul_aux(vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b) +{ + long n = A.NumRows(); + long l = A.NumCols(); + + if (l != b.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(n); + + long i, k; + ZZ_pX acc, tmp; + + for (i = 1; i <= n; i++) { + clear(acc); + for (k = 1; k <= l; k++) { + mul(tmp, rep(A(i,k)), rep(b(k))); + add(acc, acc, tmp); + } + conv(x(i), acc); + } +} + + +void mul(vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b) +{ + if (&b == &x || A.position1(x) != -1) { + vec_ZZ_pE tmp; + mul_aux(tmp, A, b); + x = tmp; + } + else + mul_aux(x, A, b); +} + +static +void mul_aux(vec_ZZ_pE& x, const vec_ZZ_pE& a, const mat_ZZ_pE& B) +{ + long n = B.NumRows(); + long l = B.NumCols(); + + if (n != a.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(l); + + long i, k; + ZZ_pX acc, tmp; + + for (i = 1; i <= l; i++) { + clear(acc); + for (k = 1; k <= n; k++) { + mul(tmp, rep(a(k)), rep(B(k,i))); + add(acc, acc, tmp); + } + conv(x(i), acc); + } +} + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const mat_ZZ_pE& B) +{ + if (&a == &x) { + vec_ZZ_pE tmp; + mul_aux(tmp, a, B); + x = tmp; + } + else + mul_aux(x, a, B); + +} + + + +void ident(mat_ZZ_pE& X, long n) +{ + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + set(X(i, j)); + else + clear(X(i, j)); +} + + +void determinant(ZZ_pE& d, const mat_ZZ_pE& M_in) +{ + long k, n; + long i, j; + long pos; + ZZ_pX t1, t2; + ZZ_pX *x, *y; + + const ZZ_pXModulus& p = ZZ_pE::modulus(); + + n = M_in.NumRows(); + + if (M_in.NumCols() != n) + LogicError("determinant: nonsquare matrix"); + + if (n == 0) { + set(d); + return; + } + + UniqueArray M_store; + M_store.SetLength(n); + vec_ZZ_pX *M = M_store.get(); + + for (i = 0; i < n; i++) { + M[i].SetLength(n); + for (j = 0; j < n; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(M_in[i][j]); + } + } + + ZZ_pX det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) + pos = i; + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + negate(t1, t1); + for (j = k+1; j < n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + conv(d, det); +} + +long IsIdent(const mat_ZZ_pE& A, long n) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (!IsOne(A(i, j))) return 0; + } + + return 1; +} + + +void transpose(mat_ZZ_pE& X, const mat_ZZ_pE& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + long i, j; + + if (&X == & A) { + if (n == m) + for (i = 1; i <= n; i++) + for (j = i+1; j <= n; j++) + swap(X(i, j), X(j, i)); + else { + mat_ZZ_pE tmp; + tmp.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + tmp(j, i) = A(i, j); + X.kill(); + X = tmp; + } + } + else { + X.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + X(j, i) = A(i, j); + } +} + + +static +void solve_impl(ZZ_pE& d, vec_ZZ_pE& X, const mat_ZZ_pE& A, const vec_ZZ_pE& b, bool trans) + +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (b.length() != n) + LogicError("solve: dimension mismatch"); + + if (n == 0) { + set(d); + X.SetLength(0); + return; + } + + long i, j, k, pos; + ZZ_pX t1, t2; + ZZ_pX *x, *y; + + const ZZ_pXModulus& p = ZZ_pE::modulus(); + + UniqueArray M_store; + M_store.SetLength(n); + vec_ZZ_pX *M = M_store.get(); + + for (i = 0; i < n; i++) { + M[i].SetLength(n+1); + if (trans) { + for (j = 0; j < n; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(A[j][i]); + } + } + else { + for (j = 0; j < n; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(A[i][j]); + } + } + M[i][n].rep.SetMaxLength(2*deg(p)-1); + M[i][n] = rep(b[i]); + } + + ZZ_pX det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + negate(t1, t1); + for (j = k+1; j <= n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j <= n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetLength(n); + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, rep(X[j]), M[i][j]); + add(t1, t1, t2); + } + sub(t1, t1, M[i][n]); + conv(X[i], t1); + } + + conv(d, det); +} + +void solve(ZZ_pE& d, vec_ZZ_pE& x, const mat_ZZ_pE& A, const vec_ZZ_pE& b) +{ + solve_impl(d, x, A, b, true); +} + +void solve(ZZ_pE& d, const mat_ZZ_pE& A, vec_ZZ_pE& x, const vec_ZZ_pE& b) +{ + solve_impl(d, x, A, b, false); +} + +void inv(ZZ_pE& d, mat_ZZ_pE& X, const mat_ZZ_pE& A) +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + long i, j, k, pos; + ZZ_pX t1, t2; + ZZ_pX *x, *y; + + const ZZ_pXModulus& p = ZZ_pE::modulus(); + + + UniqueArray M_store; + M_store.SetLength(n); + vec_ZZ_pX *M = M_store.get(); + + for (i = 0; i < n; i++) { + M[i].SetLength(2*n); + for (j = 0; j < n; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(A[i][j]); + M[i][n+j].rep.SetMaxLength(2*deg(p)-1); + clear(M[i][n+j]); + } + set(M[i][n+i]); + } + + ZZ_pX det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + negate(t1, t1); + for (j = k+1; j < 2*n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < 2*n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetDims(n, n); + for (k = 0; k < n; k++) { + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, rep(X[j][k]), M[i][j]); + add(t1, t1, t2); + } + sub(t1, t1, M[i][n+k]); + conv(X[i][k], t1); + } + } + + conv(d, det); +} + + + +long gauss(mat_ZZ_pE& M_in, long w) +{ + long k, l; + long i, j; + long pos; + ZZ_pX t1, t2, t3; + ZZ_pX *x, *y; + + long n = M_in.NumRows(); + long m = M_in.NumCols(); + + if (w < 0 || w > m) + LogicError("gauss: bad args"); + + const ZZ_pXModulus& p = ZZ_pE::modulus(); + + + UniqueArray M_store; + M_store.SetLength(n); + vec_ZZ_pX *M = M_store.get(); + + for (i = 0; i < n; i++) { + M[i].SetLength(m); + for (j = 0; j < m; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(M_in[i][j]); + } + } + + l = 0; + for (k = 0; k < w && l < n; k++) { + + pos = -1; + for (i = l; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + swap(M[pos], M[l]); + + InvMod(t3, M[l][k], p); + negate(t3, t3); + + for (j = k+1; j < m; j++) { + rem(M[l][j], M[l][j], p); + } + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k]*t3 + + MulMod(t1, M[i][k], t3, p); + + clear(M[i][k]); + + x = M[i].elts() + (k+1); + y = M[l].elts() + (k+1); + + for (j = k+1; j < m; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(t2, t2, *x); + *x = t2; + } + } + + l++; + } + } + + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + conv(M_in[i][j], M[i][j]); + + return l; +} + +long gauss(mat_ZZ_pE& M) +{ + return gauss(M, M.NumCols()); +} + +void image(mat_ZZ_pE& X, const mat_ZZ_pE& A) +{ + mat_ZZ_pE M; + M = A; + long r = gauss(M); + M.SetDims(r, M.NumCols()); + X = M; +} + +void kernel(mat_ZZ_pE& X, const mat_ZZ_pE& A) +{ + long m = A.NumRows(); + long n = A.NumCols(); + + mat_ZZ_pE M; + long r; + + transpose(M, A); + r = gauss(M); + + X.SetDims(m-r, m); + + long i, j, k, s; + ZZ_pX t1, t2; + + ZZ_pE T3; + + vec_long D; + D.SetLength(m); + for (j = 0; j < m; j++) D[j] = -1; + + vec_ZZ_pE inverses; + inverses.SetLength(m); + + j = -1; + for (i = 0; i < r; i++) { + do { + j++; + } while (IsZero(M[i][j])); + + D[j] = i; + inv(inverses[j], M[i][j]); + } + + for (k = 0; k < m-r; k++) { + vec_ZZ_pE& v = X[k]; + long pos = 0; + for (j = m-1; j >= 0; j--) { + if (D[j] == -1) { + if (pos == k) + set(v[j]); + else + clear(v[j]); + pos++; + } + else { + i = D[j]; + + clear(t1); + + for (s = j+1; s < m; s++) { + mul(t2, rep(v[s]), rep(M[i][s])); + add(t1, t1, t2); + } + + conv(T3, t1); + mul(T3, T3, inverses[j]); + negate(v[j], T3); + } + } + } +} + +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ_pE& b_in) +{ + ZZ_pE b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ_p& b_in) +{ + NTL_ZZ_pRegister(b); + b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void mul(mat_ZZ_pE& X, const mat_ZZ_pE& A, long b_in) +{ + NTL_ZZ_pRegister(b); + b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void diag(mat_ZZ_pE& X, long n, const ZZ_pE& d_in) +{ + ZZ_pE d = d_in; + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + X(i, j) = d; + else + clear(X(i, j)); +} + +long IsDiag(const mat_ZZ_pE& A, long n, const ZZ_pE& d) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (A(i, j) != d) return 0; + } + + return 1; +} + + +long IsZero(const mat_ZZ_pE& a) +{ + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +void clear(mat_ZZ_pE& x) +{ + long n = x.NumRows(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + +mat_ZZ_pE operator+(const mat_ZZ_pE& a, const mat_ZZ_pE& b) +{ + mat_ZZ_pE res; + add(res, a, b); + NTL_OPT_RETURN(mat_ZZ_pE, res); +} + +mat_ZZ_pE operator*(const mat_ZZ_pE& a, const mat_ZZ_pE& b) +{ + mat_ZZ_pE res; + mul_aux(res, a, b); + NTL_OPT_RETURN(mat_ZZ_pE, res); +} + +mat_ZZ_pE operator-(const mat_ZZ_pE& a, const mat_ZZ_pE& b) +{ + mat_ZZ_pE res; + sub(res, a, b); + NTL_OPT_RETURN(mat_ZZ_pE, res); +} + + +mat_ZZ_pE operator-(const mat_ZZ_pE& a) +{ + mat_ZZ_pE res; + negate(res, a); + NTL_OPT_RETURN(mat_ZZ_pE, res); +} + + +vec_ZZ_pE operator*(const mat_ZZ_pE& a, const vec_ZZ_pE& b) +{ + vec_ZZ_pE res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_ZZ_pE, res); +} + +vec_ZZ_pE operator*(const vec_ZZ_pE& a, const mat_ZZ_pE& b) +{ + vec_ZZ_pE res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_ZZ_pE, res); +} + +void inv(mat_ZZ_pE& X, const mat_ZZ_pE& A) +{ + ZZ_pE d; + inv(d, X, A); + if (d == 0) ArithmeticError("inv: non-invertible matrix"); +} + +void power(mat_ZZ_pE& X, const mat_ZZ_pE& A, const ZZ& e) +{ + if (A.NumRows() != A.NumCols()) LogicError("power: non-square matrix"); + + if (e == 0) { + ident(X, A.NumRows()); + return; + } + + mat_ZZ_pE T1, T2; + long i, k; + + k = NumBits(e); + T1 = A; + + for (i = k-2; i >= 0; i--) { + sqr(T2, T1); + if (bit(e, i)) + mul(T1, T2, A); + else + T1 = T2; + } + + if (e < 0) + inv(X, T1); + else + X = T1; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_lzz_p.c b/thirdparty/linux/ntl/src/mat_lzz_p.c new file mode 100644 index 0000000000..898fab18e2 --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_lzz_p.c @@ -0,0 +1,7191 @@ + +#include +#include +#include + + +#include + + + +#ifdef NTL_HAVE_AVX +#include +#endif + +NTL_START_IMPL + + +#define PAR_THRESH_SQ (200) +#define PAR_THRESH (40000) + + +// ******************************************************* +// +// Matrix Window data structure: perhaps some day this +// will be made public. +// +// ******************************************************* + +struct mat_window_zz_p { + mat_zz_p &A; + long r_offset; + long c_offset; + long nrows; + long ncols; + + mat_window_zz_p(mat_zz_p& _A) : + A(_A), r_offset(0), c_offset(0), nrows(A.NumRows()), ncols(A.NumCols()) { } + + mat_window_zz_p(const mat_window_zz_p& w, long r1, long c1, long r2, long c2) : + A(w.A) + { + if (r1 < 0 || c1 < 0 || r2 < r1 || c2 < c1 || r2-r1 > w.nrows || c2-c1 > w.ncols) + LogicError("mat_window_zz_p: bad args"); + + r_offset = w.r_offset + r1; + c_offset = w.c_offset + c1; + nrows = r2-r1; + ncols = c2-c1; + } + + zz_p * operator[](long i) const { return A[i+r_offset].elts() + c_offset; } + + long NumRows() const { return nrows; } + long NumCols() const { return ncols; } + +}; + + +struct const_mat_window_zz_p { + const mat_zz_p &A; + long r_offset; + long c_offset; + long nrows; + long ncols; + + const_mat_window_zz_p(const mat_zz_p& _A) : + A(_A), r_offset(0), c_offset(0), nrows(A.NumRows()), ncols(A.NumCols()) { } + + const_mat_window_zz_p(const mat_window_zz_p& w) : + A(w.A), r_offset(w.r_offset), c_offset(w.c_offset), nrows(w.nrows), ncols(w.ncols) { } + + const_mat_window_zz_p(const const_mat_window_zz_p& w, long r1, long c1, long r2, long c2) : + A(w.A) + { + if (r1 < 0 || c1 < 0 || r2 < r1 || c2 < c1 || r2-r1 > w.nrows || c2-c1 > w.ncols) + LogicError("const_mat_window_zz_p: bad args"); + + r_offset = w.r_offset + r1; + c_offset = w.c_offset + c1; + nrows = r2-r1; + ncols = c2-c1; + } + + const zz_p * operator[](long i) const { return A[i+r_offset].elts() + c_offset; } + + long NumRows() const { return nrows; } + long NumCols() const { return ncols; } + +}; + +void add(const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + if (X.NumRows() != n || X.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + long p = zz_p::modulus(); + + for (long i = 0; i < n; i++) { + zz_p *x = X[i]; + const zz_p *a = A[i]; + const zz_p *b = B[i]; + for (long j = 0; j < m; j++) { + x[j].LoopHole() = AddMod(rep(a[j]), rep(b[j]), p); + } + } +} + +void sub(const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix sub: dimension mismatch"); + + if (X.NumRows() != n || X.NumCols() != m) + LogicError("matrix sub: dimension mismatch"); + + long p = zz_p::modulus(); + + for (long i = 0; i < n; i++) { + zz_p *x = X[i]; + const zz_p *a = A[i]; + const zz_p *b = B[i]; + for (long j = 0; j < m; j++) { + x[j].LoopHole() = SubMod(rep(a[j]), rep(b[j]), p); + } + } +} + + +void clear(const mat_window_zz_p& X) +{ + long n = X.NumRows(); + long m = X.NumCols(); + + for (long i = 0; i < n; i++) + for (long j = 0; j < m; j++) + clear(X[i][j]); +} + + + +// *********************************************************** + + + + + + +void add(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + X.SetDims(n, m); + + long p = zz_p::modulus(); + + for (long i = 0; i < n; i++) { + zz_p *x = X[i].elts(); + const zz_p *a = A[i].elts(); + const zz_p *b = B[i].elts(); + for (long j = 0; j < m; j++) { + x[j].LoopHole() = AddMod(rep(a[j]), rep(b[j]), p); + } + } +} + +void sub(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix sub: dimension mismatch"); + + X.SetDims(n, m); + + long p = zz_p::modulus(); + + for (long i = 0; i < n; i++) { + zz_p *x = X[i].elts(); + const zz_p *a = A[i].elts(); + const zz_p *b = B[i].elts(); + for (long j = 0; j < m; j++) { + x[j].LoopHole() = SubMod(rep(a[j]), rep(b[j]), p); + } + } + +} + + + + + +void diag(mat_zz_p& X, long n, zz_p d) +{ + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + X(i, j) = d; + else + clear(X(i, j)); +} + +long IsDiag(const mat_zz_p& A, long n, zz_p d) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (A(i, j) != d) return 0; + } + + return 1; +} + +void negate(mat_zz_p& X, const mat_zz_p& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + + X.SetDims(n, m); + + long p = zz_p::modulus(); + + for (long i = 0; i < n; i++) { + zz_p *x = X[i].elts(); + const zz_p *a = A[i].elts(); + for (long j = 0; j < m; j++) { + x[j].LoopHole() = NegateMod(rep(a[j]), p); + } + } +} + +long IsZero(const mat_zz_p& a) +{ + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +void clear(mat_zz_p& x) +{ + long n = x.NumRows(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + +void ident(mat_zz_p& X, long n) +{ + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + set(X(i, j)); + else + clear(X(i, j)); +} + + +long IsIdent(const mat_zz_p& A, long n) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (!IsOne(A(i, j))) return 0; + } + + return 1; +} + + +void transpose(mat_zz_p& X, const mat_zz_p& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + long i, j; + + if (&X == & A) { + if (n == m) + for (i = 1; i <= n; i++) + for (j = i+1; j <= n; j++) + swap(X(i, j), X(j, i)); + else { + mat_zz_p tmp; + tmp.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + tmp(j, i) = A(i, j); + X.kill(); + X = tmp; + } + } + else { + X.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + X(j, i) = A(i, j); + } +} + + + + +void relaxed_power(mat_zz_p& X, const mat_zz_p& A, const ZZ& e, bool relax) +{ + if (A.NumRows() != A.NumCols()) LogicError("power: non-square matrix"); + + if (e == 0) { + ident(X, A.NumRows()); + return; + } + + mat_zz_p T1, T2; + long i, k; + + k = NumBits(e); + T1 = A; + + for (i = k-2; i >= 0; i--) { + sqr(T2, T1); + if (bit(e, i)) + mul(T1, T2, A); + else + T1 = T2; + } + + if (e < 0) + relaxed_inv(X, T1, relax); + else + X = T1; +} + + + +// ****************************************************************** +// +// matrix-vector multiplication code +// +// ****************************************************************** + + + + + + +void mul(vec_zz_p& x, const vec_zz_p& a, const mat_zz_p& B) +{ + long l = a.length(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + if (m == 0) { + + x.SetLength(0); + + } + else if (m == 1) { + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + long acc, tmp; + long k; + + acc = 0; + for(k = 1; k <= l; k++) { + tmp = MulMod(rep(a(k)), rep(B(k,1)), p, pinv); + acc = AddMod(acc, tmp, p); + } + + x.SetLength(1); + x(1).LoopHole() = acc; + + } + else { // m > 1. precondition and EXEC_RANGE + + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + NTL_TLS_LOCAL(vec_long, mul_aux_vec); + vec_long::Watcher watch_mul_aux_vec(mul_aux_vec); + mul_aux_vec.SetLength(m); + long *acc = mul_aux_vec.elts(); + + const zz_p* ap = a.elts(); + + for (long j = 0; j < m; j++) acc[j] = 0; + + const bool seq = double(l)*double(m) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, m, first, last) { + + for (long k = 0; k < l; k++) { + long aa = rep(ap[k]); + if (aa != 0) { + const zz_p* bp = B[k].elts(); + long T1; + mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv); + + for (long j = first; j < last; j++) { + T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv); + acc[j] = AddMod(acc[j], T1, p); + } + } + } + + } NTL_GEXEC_RANGE_END + + x.SetLength(m); + zz_p *xp = x.elts(); + for (long j = 0; j < m; j++) + xp[j].LoopHole() = acc[j]; + } +} + + +void mul_aux(vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b) +{ + long n = A.NumRows(); + long l = A.NumCols(); + + if (l != b.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(n); + zz_p* xp = x.elts(); + + long p = zz_p::modulus(); + const zz_p* bp = b.elts(); + + const bool seq = double(n)*double(l) < PAR_THRESH; + + +#ifdef NTL_HAVE_LL_TYPE + + if (cast_unsigned(l) <= (~(0UL))/cast_unsigned(p-1) && + cast_unsigned(l)*cast_unsigned(p-1) <= (~(0UL))/cast_unsigned(p-1)) { + + sp_reduce_struct red_struct = zz_p::red_struct(); + + NTL_GEXEC_RANGE(seq, n, first, last) { + + for (long i = first; i < last; i++) { + xp[i].LoopHole() = InnerProd_L(A[i].elts(), bp, l, p, red_struct); + } + + } NTL_GEXEC_RANGE_END + } + else { + sp_ll_reduce_struct ll_red_struct = zz_p::ll_red_struct(); + + NTL_GEXEC_RANGE(seq, n, first, last) { + + for (long i = first; i < last; i++) { + xp[i].LoopHole() = InnerProd_LL(A[i].elts(), bp, l, p, ll_red_struct); + } + + } NTL_GEXEC_RANGE_END + + } + +#else + + mulmod_t pinv = zz_p::ModulusInverse(); + + if (n <= 1) { + + for (long i = 0; i < n; i++) { + long acc = 0; + const zz_p* ap = A[i].elts(); + + for (long k = 0; k < l; k++) { + long tmp = MulMod(rep(ap[k]), rep(bp[k]), p, pinv); + acc = AddMod(acc, tmp, p); + } + + xp[i].LoopHole() = acc; + } + + } + else { + + NTL_TLS_LOCAL(Vec, precon_vec); + Vec::Watcher watch_precon_vec(precon_vec); + precon_vec.SetLength(l); + mulmod_precon_t *bpinv = precon_vec.elts(); + + for (long k = 0; k < l; k++) + bpinv[k] = PrepMulModPrecon(rep(bp[k]), p, pinv); + + + NTL_GEXEC_RANGE(seq, n, first, last) { + for (long i = first; i < last; i++) { + long acc = 0; + const zz_p* ap = A[i].elts(); + + for (long k = 0; k < l; k++) { + long tmp = MulModPrecon(rep(ap[k]), rep(bp[k]), p, bpinv[k]); + acc = AddMod(acc, tmp, p); + } + + xp[i].LoopHole() = acc; + } + } NTL_GEXEC_RANGE_END + + } + +#endif +} + +void mul(vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b) +{ + if (&b == &x || A.position1(x) != -1) { + vec_zz_p tmp; + mul_aux(tmp, A, b); + x = tmp; + } + else + mul_aux(x, A, b); + +} + + +void mul(mat_zz_p& X, const mat_zz_p& A, zz_p b) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + + if (n == 0 || m == 0 || (n == 1 && m == 1)) { + long i, j; + + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); + + } + else { + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + long bb = rep(b); + mulmod_precon_t bpinv = PrepMulModPrecon(bb, p, pinv); + + const bool seq = double(n)*double(m) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, n, first, last) + long i, j; + for (i = first; i < last; i++) { + const zz_p *ap = A[i].elts(); + zz_p *xp = X[i].elts(); + + for (j = 0; j < m; j++) + xp[j].LoopHole() = MulModPrecon(rep(ap[j]), bb, p, bpinv); + } + NTL_GEXEC_RANGE_END + + + } +} + +void mul(mat_zz_p& X, const mat_zz_p& A, long b_in) +{ + zz_p b; + b = b_in; + mul(X, A, b); +} + + +// ****************************************************************** +// +// Code shared by block-matrix code +// +// ****************************************************************** + +#define MAT_BLK_SZ (32) + + +#ifdef NTL_HAVE_LL_TYPE + +#ifdef NTL_HAVE_AVX + +#define MAX_DBL_INT ((1L << NTL_DOUBLE_PRECISION)-1) +// max int representable exactly as a double +// this assumes NTL_DBL_PRECISION <= NTL_BITS_PER_LONG-2, which is +// checked in the code that tests for HAVE_AVX, but we check it here as +// well + +#if (NTL_DBL_PRECISION > NTL_BITS_PER_LONG-2) +#error "NTL_DBL_PRECISION > NTL_BITS_PER_LONG-2" +#endif + + +// MUL_ADD(a, b, c): a += b*c +#ifdef NTL_HAVE_FMA +#define MUL_ADD(a, b, c) a = _mm256_fmadd_pd(b, c, a) +#else +#define MUL_ADD(a, b, c) a = _mm256_add_pd(a, _mm256_mul_pd(b, c)) +#endif + +#if 0 +static +void muladd1_by_32(double *x, const double *a, const double *b, long n) +{ + __m256d avec, bvec; + + + __m256d acc0=_mm256_load_pd(x + 0*4); + __m256d acc1=_mm256_load_pd(x + 1*4); + __m256d acc2=_mm256_load_pd(x + 2*4); + __m256d acc3=_mm256_load_pd(x + 3*4); + __m256d acc4=_mm256_load_pd(x + 4*4); + __m256d acc5=_mm256_load_pd(x + 5*4); + __m256d acc6=_mm256_load_pd(x + 6*4); + __m256d acc7=_mm256_load_pd(x + 7*4); + + + for (long i = 0; i < n; i++) { + avec = _mm256_broadcast_sd(a); a++; + + + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc0, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc1, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc2, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc3, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc4, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc5, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc6, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc7, avec, bvec); + } + + + _mm256_store_pd(x + 0*4, acc0); + _mm256_store_pd(x + 1*4, acc1); + _mm256_store_pd(x + 2*4, acc2); + _mm256_store_pd(x + 3*4, acc3); + _mm256_store_pd(x + 4*4, acc4); + _mm256_store_pd(x + 5*4, acc5); + _mm256_store_pd(x + 6*4, acc6); + _mm256_store_pd(x + 7*4, acc7); +} + +#else + +static +void muladd1_by_32(double *x, const double *a, const double *b, long n) +{ + __m256d acc0=_mm256_load_pd(x + 0*4); + __m256d acc1=_mm256_load_pd(x + 1*4); + __m256d acc2=_mm256_load_pd(x + 2*4); + __m256d acc3=_mm256_load_pd(x + 3*4); + __m256d acc4=_mm256_load_pd(x + 4*4); + __m256d acc5=_mm256_load_pd(x + 5*4); + __m256d acc6=_mm256_load_pd(x + 6*4); + __m256d acc7=_mm256_load_pd(x + 7*4); + + long i = 0; + for (; i <= n-4; i +=4) { + + // the following code sequences are a bit faster than + // just doing 4 _mm256_broadcast_sd's + // it requires a to point to aligned storage, however + +#if 1 + // this one seems slightly faster + __m256d a0101 = _mm256_broadcast_pd((const __m128d*)(a+0)); + __m256d a2323 = _mm256_broadcast_pd((const __m128d*)(a+2)); +#else + __m256d avec = _mm256_load_pd(a); + __m256d a0101 = _mm256_permute2f128_pd(avec, avec, 0); + __m256d a2323 = _mm256_permute2f128_pd(avec, avec, 0x11); + +#endif + + __m256d avec0 = _mm256_permute_pd(a0101, 0); + __m256d avec1 = _mm256_permute_pd(a0101, 0xf); + __m256d avec2 = _mm256_permute_pd(a2323, 0); + __m256d avec3 = _mm256_permute_pd(a2323, 0xf); + + a += 4; + + __m256d bvec; + + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc0, avec0, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc1, avec0, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc2, avec0, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc3, avec0, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc4, avec0, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc5, avec0, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc6, avec0, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc7, avec0, bvec); + + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc0, avec1, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc1, avec1, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc2, avec1, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc3, avec1, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc4, avec1, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc5, avec1, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc6, avec1, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc7, avec1, bvec); + + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc0, avec2, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc1, avec2, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc2, avec2, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc3, avec2, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc4, avec2, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc5, avec2, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc6, avec2, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc7, avec2, bvec); + + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc0, avec3, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc1, avec3, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc2, avec3, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc3, avec3, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc4, avec3, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc5, avec3, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc6, avec3, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc7, avec3, bvec); + } + + for (; i < n; i++) { + __m256d avec = _mm256_broadcast_sd(a); a++; + __m256d bvec; + + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc0, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc1, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc2, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc3, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc4, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc5, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc6, avec, bvec); + bvec = _mm256_load_pd(b); b += 4; MUL_ADD(acc7, avec, bvec); + } + + + _mm256_store_pd(x + 0*4, acc0); + _mm256_store_pd(x + 1*4, acc1); + _mm256_store_pd(x + 2*4, acc2); + _mm256_store_pd(x + 3*4, acc3); + _mm256_store_pd(x + 4*4, acc4); + _mm256_store_pd(x + 5*4, acc5); + _mm256_store_pd(x + 6*4, acc6); + _mm256_store_pd(x + 7*4, acc7); +} + +#endif + +// experiment: process two rows at a time +#if 1 +static +void muladd2_by_32(double *x, const double *a, const double *b, long n) +{ + __m256d avec0, avec1, bvec; + __m256d acc00, acc01, acc02, acc03; + __m256d acc10, acc11, acc12, acc13; + + + // round 0 + + acc00=_mm256_load_pd(x + 0*4 + 0*MAT_BLK_SZ); + acc01=_mm256_load_pd(x + 1*4 + 0*MAT_BLK_SZ); + acc02=_mm256_load_pd(x + 2*4 + 0*MAT_BLK_SZ); + acc03=_mm256_load_pd(x + 3*4 + 0*MAT_BLK_SZ); + + acc10=_mm256_load_pd(x + 0*4 + 1*MAT_BLK_SZ); + acc11=_mm256_load_pd(x + 1*4 + 1*MAT_BLK_SZ); + acc12=_mm256_load_pd(x + 2*4 + 1*MAT_BLK_SZ); + acc13=_mm256_load_pd(x + 3*4 + 1*MAT_BLK_SZ); + + for (long i = 0; i < n; i++) { + avec0 = _mm256_broadcast_sd(&a[i]); + avec1 = _mm256_broadcast_sd(&a[i+MAT_BLK_SZ]); + + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+0*4]); MUL_ADD(acc00, avec0, bvec); MUL_ADD(acc10, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+1*4]); MUL_ADD(acc01, avec0, bvec); MUL_ADD(acc11, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+2*4]); MUL_ADD(acc02, avec0, bvec); MUL_ADD(acc12, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+3*4]); MUL_ADD(acc03, avec0, bvec); MUL_ADD(acc13, avec1, bvec); + } + + + _mm256_store_pd(x + 0*4 + 0*MAT_BLK_SZ, acc00); + _mm256_store_pd(x + 1*4 + 0*MAT_BLK_SZ, acc01); + _mm256_store_pd(x + 2*4 + 0*MAT_BLK_SZ, acc02); + _mm256_store_pd(x + 3*4 + 0*MAT_BLK_SZ, acc03); + + _mm256_store_pd(x + 0*4 + 1*MAT_BLK_SZ, acc10); + _mm256_store_pd(x + 1*4 + 1*MAT_BLK_SZ, acc11); + _mm256_store_pd(x + 2*4 + 1*MAT_BLK_SZ, acc12); + _mm256_store_pd(x + 3*4 + 1*MAT_BLK_SZ, acc13); + + // round 1 + + acc00=_mm256_load_pd(x + 4*4 + 0*MAT_BLK_SZ); + acc01=_mm256_load_pd(x + 5*4 + 0*MAT_BLK_SZ); + acc02=_mm256_load_pd(x + 6*4 + 0*MAT_BLK_SZ); + acc03=_mm256_load_pd(x + 7*4 + 0*MAT_BLK_SZ); + + acc10=_mm256_load_pd(x + 4*4 + 1*MAT_BLK_SZ); + acc11=_mm256_load_pd(x + 5*4 + 1*MAT_BLK_SZ); + acc12=_mm256_load_pd(x + 6*4 + 1*MAT_BLK_SZ); + acc13=_mm256_load_pd(x + 7*4 + 1*MAT_BLK_SZ); + + for (long i = 0; i < n; i++) { + avec0 = _mm256_broadcast_sd(&a[i]); + avec1 = _mm256_broadcast_sd(&a[i+MAT_BLK_SZ]); + + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+0*4+MAT_BLK_SZ/2]); MUL_ADD(acc00, avec0, bvec); MUL_ADD(acc10, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+1*4+MAT_BLK_SZ/2]); MUL_ADD(acc01, avec0, bvec); MUL_ADD(acc11, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+2*4+MAT_BLK_SZ/2]); MUL_ADD(acc02, avec0, bvec); MUL_ADD(acc12, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+3*4+MAT_BLK_SZ/2]); MUL_ADD(acc03, avec0, bvec); MUL_ADD(acc13, avec1, bvec); + } + + + _mm256_store_pd(x + 4*4 + 0*MAT_BLK_SZ, acc00); + _mm256_store_pd(x + 5*4 + 0*MAT_BLK_SZ, acc01); + _mm256_store_pd(x + 6*4 + 0*MAT_BLK_SZ, acc02); + _mm256_store_pd(x + 7*4 + 0*MAT_BLK_SZ, acc03); + + _mm256_store_pd(x + 4*4 + 1*MAT_BLK_SZ, acc10); + _mm256_store_pd(x + 5*4 + 1*MAT_BLK_SZ, acc11); + _mm256_store_pd(x + 6*4 + 1*MAT_BLK_SZ, acc12); + _mm256_store_pd(x + 7*4 + 1*MAT_BLK_SZ, acc13); + +} + +#else + +static +void muladd2_by_32(double *x, const double *a, const double *b, long n) +{ + long i, j; + __m256d bvec; + __m256d acc00, acc01, acc02, acc03; + __m256d acc10, acc11, acc12, acc13; + + + for (j = 0; j < 2; j++) { + + acc00=_mm256_load_pd(x + 0*4 + 0*MAT_BLK_SZ + j*(MAT_BLK_SZ/2)); + acc01=_mm256_load_pd(x + 1*4 + 0*MAT_BLK_SZ + j*(MAT_BLK_SZ/2)); + acc02=_mm256_load_pd(x + 2*4 + 0*MAT_BLK_SZ + j*(MAT_BLK_SZ/2)); + acc03=_mm256_load_pd(x + 3*4 + 0*MAT_BLK_SZ + j*(MAT_BLK_SZ/2)); + + acc10=_mm256_load_pd(x + 0*4 + 1*MAT_BLK_SZ + j*(MAT_BLK_SZ/2)); + acc11=_mm256_load_pd(x + 1*4 + 1*MAT_BLK_SZ + j*(MAT_BLK_SZ/2)); + acc12=_mm256_load_pd(x + 2*4 + 1*MAT_BLK_SZ + j*(MAT_BLK_SZ/2)); + acc13=_mm256_load_pd(x + 3*4 + 1*MAT_BLK_SZ + j*(MAT_BLK_SZ/2)); + + for (i = 0; i <= n-4; i+=4) { + __m256d a0_0101 = _mm256_broadcast_pd((const __m128d*)(a+i+0)); + __m256d a0_2323 = _mm256_broadcast_pd((const __m128d*)(a+i+2)); + __m256d avec00 = _mm256_permute_pd(a0_0101, 0); + __m256d avec01 = _mm256_permute_pd(a0_0101, 0xf); + __m256d avec02 = _mm256_permute_pd(a0_2323, 0); + __m256d avec03 = _mm256_permute_pd(a0_2323, 0xf); + + __m256d a1_0101 = _mm256_broadcast_pd((const __m128d*)(a+i+0+MAT_BLK_SZ)); + __m256d a1_2323 = _mm256_broadcast_pd((const __m128d*)(a+i+2+MAT_BLK_SZ)); + __m256d avec10 = _mm256_permute_pd(a1_0101, 0); + __m256d avec11 = _mm256_permute_pd(a1_0101, 0xf); + __m256d avec12 = _mm256_permute_pd(a1_2323, 0); + __m256d avec13 = _mm256_permute_pd(a1_2323, 0xf); + + bvec = _mm256_load_pd(&b[(i+0)*MAT_BLK_SZ+0*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc00, avec00, bvec); MUL_ADD(acc10, avec10, bvec); + bvec = _mm256_load_pd(&b[(i+0)*MAT_BLK_SZ+1*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc01, avec00, bvec); MUL_ADD(acc11, avec10, bvec); + bvec = _mm256_load_pd(&b[(i+0)*MAT_BLK_SZ+2*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc02, avec00, bvec); MUL_ADD(acc12, avec10, bvec); + bvec = _mm256_load_pd(&b[(i+0)*MAT_BLK_SZ+3*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc03, avec00, bvec); MUL_ADD(acc13, avec10, bvec); + + bvec = _mm256_load_pd(&b[(i+1)*MAT_BLK_SZ+0*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc00, avec01, bvec); MUL_ADD(acc10, avec11, bvec); + bvec = _mm256_load_pd(&b[(i+1)*MAT_BLK_SZ+1*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc01, avec01, bvec); MUL_ADD(acc11, avec11, bvec); + bvec = _mm256_load_pd(&b[(i+1)*MAT_BLK_SZ+2*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc02, avec01, bvec); MUL_ADD(acc12, avec11, bvec); + bvec = _mm256_load_pd(&b[(i+1)*MAT_BLK_SZ+3*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc03, avec01, bvec); MUL_ADD(acc13, avec11, bvec); + + bvec = _mm256_load_pd(&b[(i+2)*MAT_BLK_SZ+0*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc00, avec02, bvec); MUL_ADD(acc10, avec12, bvec); + bvec = _mm256_load_pd(&b[(i+2)*MAT_BLK_SZ+1*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc01, avec02, bvec); MUL_ADD(acc11, avec12, bvec); + bvec = _mm256_load_pd(&b[(i+2)*MAT_BLK_SZ+2*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc02, avec02, bvec); MUL_ADD(acc12, avec12, bvec); + bvec = _mm256_load_pd(&b[(i+2)*MAT_BLK_SZ+3*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc03, avec02, bvec); MUL_ADD(acc13, avec12, bvec); + + bvec = _mm256_load_pd(&b[(i+3)*MAT_BLK_SZ+0*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc00, avec03, bvec); MUL_ADD(acc10, avec13, bvec); + bvec = _mm256_load_pd(&b[(i+3)*MAT_BLK_SZ+1*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc01, avec03, bvec); MUL_ADD(acc11, avec13, bvec); + bvec = _mm256_load_pd(&b[(i+3)*MAT_BLK_SZ+2*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc02, avec03, bvec); MUL_ADD(acc12, avec13, bvec); + bvec = _mm256_load_pd(&b[(i+3)*MAT_BLK_SZ+3*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc03, avec03, bvec); MUL_ADD(acc13, avec13, bvec); + } + + for (; i < n; i++) { + __m256d avec0 = _mm256_broadcast_sd(&a[i]); + __m256d avec1 = _mm256_broadcast_sd(&a[i+MAT_BLK_SZ]); + + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+0*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc00, avec0, bvec); MUL_ADD(acc10, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+1*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc01, avec0, bvec); MUL_ADD(acc11, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+2*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc02, avec0, bvec); MUL_ADD(acc12, avec1, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+3*4+j*(MAT_BLK_SZ/2)]); MUL_ADD(acc03, avec0, bvec); MUL_ADD(acc13, avec1, bvec); + } + + + _mm256_store_pd(x + 0*4 + 0*MAT_BLK_SZ + j*(MAT_BLK_SZ/2), acc00); + _mm256_store_pd(x + 1*4 + 0*MAT_BLK_SZ + j*(MAT_BLK_SZ/2), acc01); + _mm256_store_pd(x + 2*4 + 0*MAT_BLK_SZ + j*(MAT_BLK_SZ/2), acc02); + _mm256_store_pd(x + 3*4 + 0*MAT_BLK_SZ + j*(MAT_BLK_SZ/2), acc03); + + _mm256_store_pd(x + 0*4 + 1*MAT_BLK_SZ + j*(MAT_BLK_SZ/2), acc10); + _mm256_store_pd(x + 1*4 + 1*MAT_BLK_SZ + j*(MAT_BLK_SZ/2), acc11); + _mm256_store_pd(x + 2*4 + 1*MAT_BLK_SZ + j*(MAT_BLK_SZ/2), acc12); + _mm256_store_pd(x + 3*4 + 1*MAT_BLK_SZ + j*(MAT_BLK_SZ/2), acc13); + + } +} +#endif + + + +// experiment: process three rows at a time +// NOTE: this makes things slower on an AVX1 platform --- not enough registers +// it could be faster on AVX2/FMA, where there should be enough registers + +static +void muladd3_by_32(double *x, const double *a, const double *b, long n) +{ + __m256d avec0, avec1, avec2, bvec; + __m256d acc00, acc01, acc02, acc03; + __m256d acc10, acc11, acc12, acc13; + __m256d acc20, acc21, acc22, acc23; + + + // round 0 + + acc00=_mm256_load_pd(x + 0*4 + 0*MAT_BLK_SZ); + acc01=_mm256_load_pd(x + 1*4 + 0*MAT_BLK_SZ); + acc02=_mm256_load_pd(x + 2*4 + 0*MAT_BLK_SZ); + acc03=_mm256_load_pd(x + 3*4 + 0*MAT_BLK_SZ); + + acc10=_mm256_load_pd(x + 0*4 + 1*MAT_BLK_SZ); + acc11=_mm256_load_pd(x + 1*4 + 1*MAT_BLK_SZ); + acc12=_mm256_load_pd(x + 2*4 + 1*MAT_BLK_SZ); + acc13=_mm256_load_pd(x + 3*4 + 1*MAT_BLK_SZ); + + acc20=_mm256_load_pd(x + 0*4 + 2*MAT_BLK_SZ); + acc21=_mm256_load_pd(x + 1*4 + 2*MAT_BLK_SZ); + acc22=_mm256_load_pd(x + 2*4 + 2*MAT_BLK_SZ); + acc23=_mm256_load_pd(x + 3*4 + 2*MAT_BLK_SZ); + + for (long i = 0; i < n; i++) { + avec0 = _mm256_broadcast_sd(&a[i]); + avec1 = _mm256_broadcast_sd(&a[i+MAT_BLK_SZ]); + avec2 = _mm256_broadcast_sd(&a[i+2*MAT_BLK_SZ]); + + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+0*4]); MUL_ADD(acc00, avec0, bvec); MUL_ADD(acc10, avec1, bvec); MUL_ADD(acc20, avec2, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+1*4]); MUL_ADD(acc01, avec0, bvec); MUL_ADD(acc11, avec1, bvec); MUL_ADD(acc21, avec2, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+2*4]); MUL_ADD(acc02, avec0, bvec); MUL_ADD(acc12, avec1, bvec); MUL_ADD(acc22, avec2, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+3*4]); MUL_ADD(acc03, avec0, bvec); MUL_ADD(acc13, avec1, bvec); MUL_ADD(acc23, avec2, bvec); + } + + + _mm256_store_pd(x + 0*4 + 0*MAT_BLK_SZ, acc00); + _mm256_store_pd(x + 1*4 + 0*MAT_BLK_SZ, acc01); + _mm256_store_pd(x + 2*4 + 0*MAT_BLK_SZ, acc02); + _mm256_store_pd(x + 3*4 + 0*MAT_BLK_SZ, acc03); + + _mm256_store_pd(x + 0*4 + 1*MAT_BLK_SZ, acc10); + _mm256_store_pd(x + 1*4 + 1*MAT_BLK_SZ, acc11); + _mm256_store_pd(x + 2*4 + 1*MAT_BLK_SZ, acc12); + _mm256_store_pd(x + 3*4 + 1*MAT_BLK_SZ, acc13); + + _mm256_store_pd(x + 0*4 + 2*MAT_BLK_SZ, acc20); + _mm256_store_pd(x + 1*4 + 2*MAT_BLK_SZ, acc21); + _mm256_store_pd(x + 2*4 + 2*MAT_BLK_SZ, acc22); + _mm256_store_pd(x + 3*4 + 2*MAT_BLK_SZ, acc23); + + // round 1 + + acc00=_mm256_load_pd(x + 4*4 + 0*MAT_BLK_SZ); + acc01=_mm256_load_pd(x + 5*4 + 0*MAT_BLK_SZ); + acc02=_mm256_load_pd(x + 6*4 + 0*MAT_BLK_SZ); + acc03=_mm256_load_pd(x + 7*4 + 0*MAT_BLK_SZ); + + acc10=_mm256_load_pd(x + 4*4 + 1*MAT_BLK_SZ); + acc11=_mm256_load_pd(x + 5*4 + 1*MAT_BLK_SZ); + acc12=_mm256_load_pd(x + 6*4 + 1*MAT_BLK_SZ); + acc13=_mm256_load_pd(x + 7*4 + 1*MAT_BLK_SZ); + + acc20=_mm256_load_pd(x + 4*4 + 2*MAT_BLK_SZ); + acc21=_mm256_load_pd(x + 5*4 + 2*MAT_BLK_SZ); + acc22=_mm256_load_pd(x + 6*4 + 2*MAT_BLK_SZ); + acc23=_mm256_load_pd(x + 7*4 + 2*MAT_BLK_SZ); + + for (long i = 0; i < n; i++) { + avec0 = _mm256_broadcast_sd(&a[i]); + avec1 = _mm256_broadcast_sd(&a[i+MAT_BLK_SZ]); + avec2 = _mm256_broadcast_sd(&a[i+2*MAT_BLK_SZ]); + + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+0*4+MAT_BLK_SZ/2]); MUL_ADD(acc00, avec0, bvec); MUL_ADD(acc10, avec1, bvec); MUL_ADD(acc20, avec2, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+1*4+MAT_BLK_SZ/2]); MUL_ADD(acc01, avec0, bvec); MUL_ADD(acc11, avec1, bvec); MUL_ADD(acc21, avec2, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+2*4+MAT_BLK_SZ/2]); MUL_ADD(acc02, avec0, bvec); MUL_ADD(acc12, avec1, bvec); MUL_ADD(acc22, avec2, bvec); + bvec = _mm256_load_pd(&b[i*MAT_BLK_SZ+3*4+MAT_BLK_SZ/2]); MUL_ADD(acc03, avec0, bvec); MUL_ADD(acc13, avec1, bvec); MUL_ADD(acc23, avec2, bvec); + } + + + _mm256_store_pd(x + 4*4 + 0*MAT_BLK_SZ, acc00); + _mm256_store_pd(x + 5*4 + 0*MAT_BLK_SZ, acc01); + _mm256_store_pd(x + 6*4 + 0*MAT_BLK_SZ, acc02); + _mm256_store_pd(x + 7*4 + 0*MAT_BLK_SZ, acc03); + + _mm256_store_pd(x + 4*4 + 1*MAT_BLK_SZ, acc10); + _mm256_store_pd(x + 5*4 + 1*MAT_BLK_SZ, acc11); + _mm256_store_pd(x + 6*4 + 1*MAT_BLK_SZ, acc12); + _mm256_store_pd(x + 7*4 + 1*MAT_BLK_SZ, acc13); + + _mm256_store_pd(x + 4*4 + 2*MAT_BLK_SZ, acc20); + _mm256_store_pd(x + 5*4 + 2*MAT_BLK_SZ, acc21); + _mm256_store_pd(x + 6*4 + 2*MAT_BLK_SZ, acc22); + _mm256_store_pd(x + 7*4 + 2*MAT_BLK_SZ, acc23); + +} + +static inline +void muladd_all_by_32(long first, long last, double *x, const double *a, const double *b, long n) +{ + long i = first; +#ifdef NTL_HAVE_FMA + // processing three rows at a time is faster + for (; i <= last-3; i+=3) + muladd3_by_32(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, n); + for (; i < last; i++) + muladd1_by_32(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, n); +#else + // process only two rows at a time: not enough registers :-( + for (; i <= last-2; i+=2) + muladd2_by_32(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, n); + for (; i < last; i++) + muladd1_by_32(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, n); +#endif +} + + +// this assumes n is a multiple of 16 +static inline +void muladd_interval(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n) +{ + __m256d xvec0, xvec1, xvec2, xvec3; + __m256d yvec0, yvec1, yvec2, yvec3; + + __m256d cvec = _mm256_broadcast_sd(&c); + + for (long i = 0; i < n; i += 16, x += 16, y += 16) { + xvec0 = _mm256_load_pd(x+0*4); + xvec1 = _mm256_load_pd(x+1*4); + xvec2 = _mm256_load_pd(x+2*4); + xvec3 = _mm256_load_pd(x+3*4); + + yvec0 = _mm256_load_pd(y+0*4); + yvec1 = _mm256_load_pd(y+1*4); + yvec2 = _mm256_load_pd(y+2*4); + yvec3 = _mm256_load_pd(y+3*4); + + MUL_ADD(xvec0, yvec0, cvec); + MUL_ADD(xvec1, yvec1, cvec); + MUL_ADD(xvec2, yvec2, cvec); + MUL_ADD(xvec3, yvec3, cvec); + + _mm256_store_pd(x + 0*4, xvec0); + _mm256_store_pd(x + 1*4, xvec1); + _mm256_store_pd(x + 2*4, xvec2); + _mm256_store_pd(x + 3*4, xvec3); + } +} + +// this one is more general: does not assume that n is a +// multiple of 16 +static inline +void muladd_interval1(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n) +{ + + __m256d xvec0, xvec1, xvec2, xvec3; + __m256d yvec0, yvec1, yvec2, yvec3; + __m256d cvec; + + if (n >= 4) + cvec = _mm256_broadcast_sd(&c); + + long i=0; + for (; i <= n-16; i += 16, x += 16, y += 16) { + xvec0 = _mm256_load_pd(x+0*4); + xvec1 = _mm256_load_pd(x+1*4); + xvec2 = _mm256_load_pd(x+2*4); + xvec3 = _mm256_load_pd(x+3*4); + + yvec0 = _mm256_load_pd(y+0*4); + yvec1 = _mm256_load_pd(y+1*4); + yvec2 = _mm256_load_pd(y+2*4); + yvec3 = _mm256_load_pd(y+3*4); + + MUL_ADD(xvec0, yvec0, cvec); + MUL_ADD(xvec1, yvec1, cvec); + MUL_ADD(xvec2, yvec2, cvec); + MUL_ADD(xvec3, yvec3, cvec); + + _mm256_store_pd(x + 0*4, xvec0); + _mm256_store_pd(x + 1*4, xvec1); + _mm256_store_pd(x + 2*4, xvec2); + _mm256_store_pd(x + 3*4, xvec3); + } + + for (; i <= n-4; i += 4, x += 4, y += 4) { + xvec0 = _mm256_load_pd(x+0*4); + yvec0 = _mm256_load_pd(y+0*4); + MUL_ADD(xvec0, yvec0, cvec); + _mm256_store_pd(x + 0*4, xvec0); + } + + for (; i < n; i++, x++, y++) { + *x += (*y)*c; + } +} + +#define AVX_PD_SZ (4) + +// experimental: assumes n is a multiple of 4 in the range [0..32] +#if 1 +static inline +void muladd_interval2(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n) +{ + n /= 4; + if (n <= 0 || n > 8) return; + + x += n*4; + y += n*4; + + // n in [1..8] + + __m256d xvec, yvec, cvec; + + cvec = _mm256_broadcast_sd(&c); + + switch (n) { + case 8: xvec = _mm256_load_pd(x-8*4); yvec = _mm256_load_pd(y-8*4); MUL_ADD(xvec, yvec, cvec); _mm256_store_pd(x-8*4, xvec); + case 7: xvec = _mm256_load_pd(x-7*4); yvec = _mm256_load_pd(y-7*4); MUL_ADD(xvec, yvec, cvec); _mm256_store_pd(x-7*4, xvec); + case 6: xvec = _mm256_load_pd(x-6*4); yvec = _mm256_load_pd(y-6*4); MUL_ADD(xvec, yvec, cvec); _mm256_store_pd(x-6*4, xvec); + case 5: xvec = _mm256_load_pd(x-5*4); yvec = _mm256_load_pd(y-5*4); MUL_ADD(xvec, yvec, cvec); _mm256_store_pd(x-5*4, xvec); + case 4: xvec = _mm256_load_pd(x-4*4); yvec = _mm256_load_pd(y-4*4); MUL_ADD(xvec, yvec, cvec); _mm256_store_pd(x-4*4, xvec); + case 3: xvec = _mm256_load_pd(x-3*4); yvec = _mm256_load_pd(y-3*4); MUL_ADD(xvec, yvec, cvec); _mm256_store_pd(x-3*4, xvec); + case 2: xvec = _mm256_load_pd(x-2*4); yvec = _mm256_load_pd(y-2*4); MUL_ADD(xvec, yvec, cvec); _mm256_store_pd(x-2*4, xvec); + case 1: xvec = _mm256_load_pd(x-1*4); yvec = _mm256_load_pd(y-1*4); MUL_ADD(xvec, yvec, cvec); _mm256_store_pd(x-1*4, xvec); + } + +} +#else +static inline +void muladd_interval2(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n) +{ + for (long i = 0; i < n; i++) + x[i] += y[i]*c; +} +#endif + +#endif + + +#define DO_MUL(a, b) ((unsigned long) (long(a)*long(b))) +//#define DO_MUL(a, b) ((a)*(b)) + +static +inline void muladd_interval(unsigned long * NTL_RESTRICT x, unsigned long * NTL_RESTRICT y, + unsigned long c, long n) +{ + for (long i = 0; i < n; i++) + x[i] += DO_MUL(y[i], c); +} + +static +void muladd1_by_32(unsigned long *x, const unsigned long *a, const unsigned long *b, + long n) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + unsigned long sum = x[j]; + long i = 0; + + for (; i <= n-4; i += 4) { + sum += DO_MUL(a[i+0], b[i+0]); + sum += DO_MUL(a[i+1], b[i+1]); + sum += DO_MUL(a[i+2], b[i+2]); + sum += DO_MUL(a[i+3], b[i+3]); + } + + for (; i < n; i++) + sum += DO_MUL(a[i], b[i]); + + x[j] = sum; + b += MAT_BLK_SZ; + } +} + +// experiment with shorter int's +static +void muladd1_by_32(unsigned long *x, const unsigned int *a, const unsigned int *b, + long n) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + unsigned long sum = x[j]; + long i = 0; + + for (; i <= n-4; i += 4) { + sum += DO_MUL(a[i+0], b[i+0]); + sum += DO_MUL(a[i+1], b[i+1]); + sum += DO_MUL(a[i+2], b[i+2]); + sum += DO_MUL(a[i+3], b[i+3]); + } + + for (; i < n; i++) + sum += DO_MUL(a[i], b[i]); + + x[j] = sum; + b += MAT_BLK_SZ; + } +} + +#if 0 +static +void muladd1_by_32_full(unsigned long *x, const unsigned long *a, const unsigned long *b) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + unsigned long sum = x[j]; + long i = 0; + + sum += DO_MUL(a[i+0], b[i+0]); + sum += DO_MUL(a[i+1], b[i+1]); + sum += DO_MUL(a[i+2], b[i+2]); + sum += DO_MUL(a[i+3], b[i+3]); + sum += DO_MUL(a[i+4], b[i+4]); + sum += DO_MUL(a[i+5], b[i+5]); + sum += DO_MUL(a[i+6], b[i+6]); + sum += DO_MUL(a[i+7], b[i+7]); + sum += DO_MUL(a[i+8], b[i+8]); + sum += DO_MUL(a[i+9], b[i+9]); + sum += DO_MUL(a[i+10], b[i+10]); + sum += DO_MUL(a[i+11], b[i+11]); + sum += DO_MUL(a[i+12], b[i+12]); + sum += DO_MUL(a[i+13], b[i+13]); + sum += DO_MUL(a[i+14], b[i+14]); + sum += DO_MUL(a[i+15], b[i+15]); + sum += DO_MUL(a[i+16], b[i+16]); + sum += DO_MUL(a[i+17], b[i+17]); + sum += DO_MUL(a[i+18], b[i+18]); + sum += DO_MUL(a[i+19], b[i+19]); + sum += DO_MUL(a[i+20], b[i+20]); + sum += DO_MUL(a[i+21], b[i+21]); + sum += DO_MUL(a[i+22], b[i+22]); + sum += DO_MUL(a[i+23], b[i+23]); + sum += DO_MUL(a[i+24], b[i+24]); + sum += DO_MUL(a[i+25], b[i+25]); + sum += DO_MUL(a[i+26], b[i+26]); + sum += DO_MUL(a[i+27], b[i+27]); + sum += DO_MUL(a[i+28], b[i+28]); + sum += DO_MUL(a[i+29], b[i+29]); + sum += DO_MUL(a[i+30], b[i+30]); + sum += DO_MUL(a[i+31], b[i+31]); + + x[j] = sum; + b += MAT_BLK_SZ; + } +} +#else + +// this version is faster (by about 25%) on a Sandybridge machine + +#define ONE_STEP_L(i) \ + sum += DO_MUL(a[i],b[i]);\ + sum_1 += DO_MUL(a[i],b_1[i]);\ + sum_2 += DO_MUL(a[i],b_2[i]);\ + sum_3 += DO_MUL(a[i],b_3[i])\ + + +static +void muladd1_by_32_full(unsigned long *x, const unsigned long *a, const unsigned long *b) +{ + for (long j = 0; j < MAT_BLK_SZ; j+=4) { + + unsigned long sum = x[j]; + unsigned long sum_1 = x[j+1]; + unsigned long sum_2 = x[j+2]; + unsigned long sum_3 = x[j+3]; + + const unsigned long *b_1 = b+MAT_BLK_SZ; + const unsigned long *b_2 = b+2*MAT_BLK_SZ; + const unsigned long *b_3 = b+3*MAT_BLK_SZ; + + ONE_STEP_L(0); + ONE_STEP_L(1); + ONE_STEP_L(2); + ONE_STEP_L(3); + ONE_STEP_L(4); + ONE_STEP_L(5); + ONE_STEP_L(6); + ONE_STEP_L(7); + ONE_STEP_L(8); + ONE_STEP_L(9); + ONE_STEP_L(10); + ONE_STEP_L(11); + ONE_STEP_L(12); + ONE_STEP_L(13); + ONE_STEP_L(14); + ONE_STEP_L(15); + ONE_STEP_L(16); + ONE_STEP_L(17); + ONE_STEP_L(18); + ONE_STEP_L(19); + ONE_STEP_L(20); + ONE_STEP_L(21); + ONE_STEP_L(22); + ONE_STEP_L(23); + ONE_STEP_L(24); + ONE_STEP_L(25); + ONE_STEP_L(26); + ONE_STEP_L(27); + ONE_STEP_L(28); + ONE_STEP_L(29); + ONE_STEP_L(30); + ONE_STEP_L(31); + + x[j] = sum; + x[j+1] = sum_1; + x[j+2] = sum_2; + x[j+3] = sum_3; + + b += 4*MAT_BLK_SZ; + } +} + +// experiment with shorter int's +static +void muladd1_by_32_full(unsigned long *x, const unsigned int *a, const unsigned int *b) +{ + for (long j = 0; j < MAT_BLK_SZ; j+=4) { + + unsigned long sum = x[j]; + unsigned long sum_1 = x[j+1]; + unsigned long sum_2 = x[j+2]; + unsigned long sum_3 = x[j+3]; + + const unsigned int *b_1 = b+MAT_BLK_SZ; + const unsigned int *b_2 = b+2*MAT_BLK_SZ; + const unsigned int *b_3 = b+3*MAT_BLK_SZ; + + ONE_STEP_L(0); + ONE_STEP_L(1); + ONE_STEP_L(2); + ONE_STEP_L(3); + ONE_STEP_L(4); + ONE_STEP_L(5); + ONE_STEP_L(6); + ONE_STEP_L(7); + ONE_STEP_L(8); + ONE_STEP_L(9); + ONE_STEP_L(10); + ONE_STEP_L(11); + ONE_STEP_L(12); + ONE_STEP_L(13); + ONE_STEP_L(14); + ONE_STEP_L(15); + ONE_STEP_L(16); + ONE_STEP_L(17); + ONE_STEP_L(18); + ONE_STEP_L(19); + ONE_STEP_L(20); + ONE_STEP_L(21); + ONE_STEP_L(22); + ONE_STEP_L(23); + ONE_STEP_L(24); + ONE_STEP_L(25); + ONE_STEP_L(26); + ONE_STEP_L(27); + ONE_STEP_L(28); + ONE_STEP_L(29); + ONE_STEP_L(30); + ONE_STEP_L(31); + + x[j] = sum; + x[j+1] = sum_1; + x[j+2] = sum_2; + x[j+3] = sum_3; + + b += 4*MAT_BLK_SZ; + } +} + +#endif + +static inline +void muladd_all_by_32(long first, long last, unsigned long *x, const unsigned int *a, const unsigned int *b, long n) +{ + if (n == MAT_BLK_SZ) { + for (long i = first; i < last; i++) + muladd1_by_32_full(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b); + } + else { + for (long i = first; i < last; i++) + muladd1_by_32(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, n); + } +} + +static inline +void muladd_all_by_32(long first, long last, unsigned long *x, const unsigned long *a, const unsigned long *b, long n) +{ + if (n == MAT_BLK_SZ) { + for (long i = first; i < last; i++) + muladd1_by_32_full(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b); + } + else { + for (long i = first; i < last; i++) + muladd1_by_32(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, n); + } +} + +#if (NTL_BITS_PER_INT >= NTL_BITS_PER_LONG/2) + +typedef unsigned int uhlong; + +#else + +typedef unsigned long uhlong; + +#endif + + + + +// NOTE: the following code is hardcoded for MAT_BLK_SZ == 32. +// Also, we special case NTL_BITS_PER_LONG-NTL_SP_NBITS > 2, which +// allows us to accumulate all 32 products without additional carries. + +#if (NTL_BITS_PER_LONG-NTL_SP_NBITS > 2) + +static +void muladd1_by_32(long *x, const long *a, const long *b, + long n, long p, sp_ll_reduce_struct ll_red_struct) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + + ll_type sum; + ll_init(sum, x[j]); +#if 0 + for (long i = 0; i < n; i++) + ll_imul_add(sum, a[i], b[i]); +#else + long i=0; + for(; i <= n-8; i+= 8) { + ll_imul_add(sum, a[i+0], b[i+0]); + ll_imul_add(sum, a[i+1], b[i+1]); + ll_imul_add(sum, a[i+2], b[i+2]); + ll_imul_add(sum, a[i+3], b[i+3]); + + ll_imul_add(sum, a[i+4], b[i+4]); + ll_imul_add(sum, a[i+5], b[i+5]); + ll_imul_add(sum, a[i+6], b[i+6]); + ll_imul_add(sum, a[i+7], b[i+7]); + } + + for (; i < n; i++) + ll_imul_add(sum, a[i], b[i]); + +#endif + + unsigned long sum0 = ll_get_lo(sum); + unsigned long sum1 = ll_get_hi(sum); + + long res; + + if (ll_red_struct.nbits == NTL_SP_NBITS) + res = sp_ll_red_31_normalized(0, sum1, sum0, p, ll_red_struct); + else + res = sp_ll_red_31(0, sum1, sum0, p, ll_red_struct); + + + x[j] = res; + b += MAT_BLK_SZ; + } +} + +#if 0 +static +void muladd1_by_32_full(long *x, const long *a, const long *b, + long p, sp_ll_reduce_struct ll_red_struct) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + + ll_type sum; + ll_init(sum, x[j]); + + ll_imul_add(sum, a[0], b[0]); + ll_imul_add(sum, a[1], b[1]); + ll_imul_add(sum, a[2], b[2]); + ll_imul_add(sum, a[3], b[3]); + ll_imul_add(sum, a[4], b[4]); + ll_imul_add(sum, a[5], b[5]); + ll_imul_add(sum, a[6], b[6]); + ll_imul_add(sum, a[7], b[7]); + ll_imul_add(sum, a[8], b[8]); + ll_imul_add(sum, a[9], b[9]); + ll_imul_add(sum, a[10], b[10]); + ll_imul_add(sum, a[11], b[11]); + ll_imul_add(sum, a[12], b[12]); + ll_imul_add(sum, a[13], b[13]); + ll_imul_add(sum, a[14], b[14]); + ll_imul_add(sum, a[15], b[15]); + ll_imul_add(sum, a[16], b[16]); + ll_imul_add(sum, a[17], b[17]); + ll_imul_add(sum, a[18], b[18]); + ll_imul_add(sum, a[19], b[19]); + ll_imul_add(sum, a[20], b[20]); + ll_imul_add(sum, a[21], b[21]); + ll_imul_add(sum, a[22], b[22]); + ll_imul_add(sum, a[23], b[23]); + ll_imul_add(sum, a[24], b[24]); + ll_imul_add(sum, a[25], b[25]); + ll_imul_add(sum, a[26], b[26]); + ll_imul_add(sum, a[27], b[27]); + ll_imul_add(sum, a[28], b[28]); + ll_imul_add(sum, a[29], b[29]); + ll_imul_add(sum, a[30], b[30]); + ll_imul_add(sum, a[31], b[31]); + + unsigned long sum0 = ll_get_lo(sum); + unsigned long sum1 = ll_get_hi(sum); + + long res; + + if (ll_red_struct.nbits == NTL_SP_NBITS) + res = sp_ll_red_31_normalized(0, sum1, sum0, p, ll_red_struct); + else + res = sp_ll_red_31(0, sum1, sum0, p, ll_red_struct); + + + x[j] = res; + b += MAT_BLK_SZ; + } +} + +#elif 1 +// This version is consistently fastest on tests on Sandybridge and Haswell + + + +#define ONE_STEP(i) \ + ll_imul_add(sum, a[i], b[i]);\ + ll_imul_add(sum_1, a[i], b_1[i]);\ + ll_imul_add(sum_2, a[i], b_2[i]);\ + ll_imul_add(sum_3, a[i], b_3[i]);\ + + +void muladd1_by_32_full(long *x, const long *a, const long *b, + long p, sp_ll_reduce_struct ll_red_struct) +{ + for (long j = 0; j < MAT_BLK_SZ; j+=4) { + + ll_type sum, sum_1, sum_2, sum_3; + ll_init(sum, x[j]); + ll_init(sum_1, x[j+1]); + ll_init(sum_2, x[j+2]); + ll_init(sum_3, x[j+3]); + + const long *b_1 = b+MAT_BLK_SZ; + const long *b_2 = b+2*MAT_BLK_SZ; + const long *b_3 = b+3*MAT_BLK_SZ; + + ONE_STEP(0); + ONE_STEP(1); + ONE_STEP(2); + ONE_STEP(3); + ONE_STEP(4); + ONE_STEP(5); + ONE_STEP(6); + ONE_STEP(7); + ONE_STEP(8); + ONE_STEP(9); + ONE_STEP(10); + ONE_STEP(11); + ONE_STEP(12); + ONE_STEP(13); + ONE_STEP(14); + ONE_STEP(15); + ONE_STEP(16); + ONE_STEP(17); + ONE_STEP(18); + ONE_STEP(19); + ONE_STEP(20); + ONE_STEP(21); + ONE_STEP(22); + ONE_STEP(23); + ONE_STEP(24); + ONE_STEP(25); + ONE_STEP(26); + ONE_STEP(27); + ONE_STEP(28); + ONE_STEP(29); + ONE_STEP(30); + ONE_STEP(31); + + unsigned long sum0 = ll_get_lo(sum); + unsigned long sum1 = ll_get_hi(sum); + + unsigned long sum0_1 = ll_get_lo(sum_1); + unsigned long sum1_1 = ll_get_hi(sum_1); + + unsigned long sum0_2 = ll_get_lo(sum_2); + unsigned long sum1_2 = ll_get_hi(sum_2); + + unsigned long sum0_3 = ll_get_lo(sum_3); + unsigned long sum1_3 = ll_get_hi(sum_3); + + if (ll_red_struct.nbits == NTL_SP_NBITS) { + x[j] = sp_ll_red_31_normalized(0, sum1, sum0, p, ll_red_struct); + x[j+1] = sp_ll_red_31_normalized(0, sum1_1, sum0_1, p, ll_red_struct); + x[j+2] = sp_ll_red_31_normalized(0, sum1_2, sum0_2, p, ll_red_struct); + x[j+3] = sp_ll_red_31_normalized(0, sum1_3, sum0_3, p, ll_red_struct); + } + else { + x[j] = sp_ll_red_31(0, sum1, sum0, p, ll_red_struct); + x[j+1] = sp_ll_red_31(0, sum1_1, sum0_1, p, ll_red_struct); + x[j+2] = sp_ll_red_31(0, sum1_2, sum0_2, p, ll_red_struct); + x[j+3] = sp_ll_red_31(0, sum1_3, sum0_3, p, ll_red_struct); + } + + + b += 4*MAT_BLK_SZ; + } +} + + +#endif + +#else + + +static +void muladd1_by_32(long *x, const long *a, const long *b, + long n, long p, sp_ll_reduce_struct ll_red_struct) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + + ll_type sum; + ll_init(sum, x[j]); + + long i = 0; + for (; i < n-16; i++) + ll_imul_add(sum, a[i], b[i]); + + ll_type acc21; + ll_init(acc21, ll_get_hi(sum)); + unsigned long acc0 = ll_get_lo(sum); + ll_init(sum, acc0); + + for (; i < n; i++) + ll_imul_add(sum, a[i], b[i]); + + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + + long res; + + if (ll_red_struct.nbits == NTL_SP_NBITS) + res = sp_ll_red_31_normalized(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, ll_red_struct); + else + res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, ll_red_struct); + + x[j] = res; + b += MAT_BLK_SZ; + } +} + +static +void muladd1_by_32_full(long *x, const long *a, const long *b, + long p, sp_ll_reduce_struct ll_red_struct) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + + ll_type sum; + ll_init(sum, x[j]); + + ll_imul_add(sum, a[0], b[0]); + ll_imul_add(sum, a[1], b[1]); + ll_imul_add(sum, a[2], b[2]); + ll_imul_add(sum, a[3], b[3]); + ll_imul_add(sum, a[4], b[4]); + ll_imul_add(sum, a[5], b[5]); + ll_imul_add(sum, a[6], b[6]); + ll_imul_add(sum, a[7], b[7]); + ll_imul_add(sum, a[8], b[8]); + ll_imul_add(sum, a[9], b[9]); + ll_imul_add(sum, a[10], b[10]); + ll_imul_add(sum, a[11], b[11]); + ll_imul_add(sum, a[12], b[12]); + ll_imul_add(sum, a[13], b[13]); + ll_imul_add(sum, a[14], b[14]); + ll_imul_add(sum, a[15], b[15]); + + ll_type acc21; + ll_init(acc21, ll_get_hi(sum)); + unsigned long acc0 = ll_get_lo(sum); + ll_init(sum, acc0); + + ll_imul_add(sum, a[16], b[16]); + ll_imul_add(sum, a[17], b[17]); + ll_imul_add(sum, a[18], b[18]); + ll_imul_add(sum, a[19], b[19]); + ll_imul_add(sum, a[20], b[20]); + ll_imul_add(sum, a[21], b[21]); + ll_imul_add(sum, a[22], b[22]); + ll_imul_add(sum, a[23], b[23]); + ll_imul_add(sum, a[24], b[24]); + ll_imul_add(sum, a[25], b[25]); + ll_imul_add(sum, a[26], b[26]); + ll_imul_add(sum, a[27], b[27]); + ll_imul_add(sum, a[28], b[28]); + ll_imul_add(sum, a[29], b[29]); + ll_imul_add(sum, a[30], b[30]); + ll_imul_add(sum, a[31], b[31]); + + acc0 = ll_get_lo(sum); + ll_add(acc21, ll_get_hi(sum)); + + long res; + + if (ll_red_struct.nbits == NTL_SP_NBITS) + res = sp_ll_red_31_normalized(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, ll_red_struct); + else + res = sp_ll_red_31(ll_get_hi(acc21), ll_get_lo(acc21), acc0, p, ll_red_struct); + + x[j] = res; + b += MAT_BLK_SZ; + } +} + + + +#endif + + +static +void muladd1_by_32_half2(long *x, const long *a, const long *b, + long n, long p, sp_ll_reduce_struct ll_red_struct) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + + unsigned long sum[2]; + sum[0] = x[j]; + sum[1] = 0; + + long k=0; + long i=0; + for(; i <= n-16; i+= 16) { + unsigned long lsum = a[i+0]*b[i+0]; + lsum += a[i+1]*b[i+1]; + lsum += a[i+2]*b[i+2]; + lsum += a[i+3]*b[i+3]; + lsum += a[i+4]*b[i+4]; + lsum += a[i+5]*b[i+5]; + lsum += a[i+6]*b[i+6]; + lsum += a[i+7]*b[i+7]; + lsum += a[i+8]*b[i+8]; + lsum += a[i+9]*b[i+9]; + lsum += a[i+10]*b[i+10]; + lsum += a[i+11]*b[i+11]; + lsum += a[i+12]*b[i+12]; + lsum += a[i+13]*b[i+13]; + lsum += a[i+14]*b[i+14]; + lsum += a[i+15]*b[i+15]; + sum[k++] += lsum; + } + + if (i < n) { + unsigned long lsum = a[i]*b[i]; + for (i++; i < n; i++) + lsum += a[i]*b[i]; + sum[k++] += lsum; + } + + + long t0 = sp_ll_red_21(0, sum[0], p, ll_red_struct); + long t1 = sp_ll_red_21(0, sum[1], p, ll_red_struct); + x[j] = AddMod(t0, t1, p); + + b += MAT_BLK_SZ; + } +} + + + +// NOTE: oddly, this is slightly faster than the half2 routine, which +// I would have thought would be faster +// DIRT: this assumes MAT_BLK_SZ < (1L << NTL_BITS_PER_LONG/2), +// which will hold unconditionally for MAT_BLK_SZ < 2^16. +static +void muladd1_by_32_half1(long *x, const long *a, const long *b, + long n, long p, sp_ll_reduce_struct ll_red_struct) +{ + for (long j = 0; j < MAT_BLK_SZ; j++) { + + ll_type sum; + ll_init(sum, x[j]); + + long i=0; + for(; i <= n-4; i+= 4) { + unsigned long lsum = a[i+0]*b[i+0]; + lsum += a[i+1]*b[i+1]; + lsum += a[i+2]*b[i+2]; + lsum += a[i+3]*b[i+3]; + ll_add(sum, lsum); + } + + if (i < n) { + unsigned long lsum = a[i]*b[i]; + for (i++; i < n; i++) + lsum += a[i]*b[i]; + ll_add(sum, lsum); + } + + unsigned long sum0 = ll_get_lo(sum); + unsigned long sum1 = ll_get_hi(sum); + x[j] = sp_ll_red_21(sum1, sum0, p, ll_red_struct); + + b += MAT_BLK_SZ; + } +} + + +static inline +void muladd_all_by_32(long first, long last, long *x, const long *a, const long *b, long n, + long p, sp_ll_reduce_struct ll_red_struct) +{ + if ((p-1) >= (1L << ((NTL_BITS_PER_LONG/2)-1))) { + if (n == MAT_BLK_SZ) { + for (long i = first; i < last; i++) + muladd1_by_32_full(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, p, ll_red_struct); + } + else { + for (long i = first; i < last; i++) + muladd1_by_32(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, n, p, ll_red_struct); + } + } + else { + for (long i = first; i < last; i++) + muladd1_by_32_half1(x + i*MAT_BLK_SZ, a + i*MAT_BLK_SZ, b, n, p, ll_red_struct); + } +} + + + +#endif + + + +static +inline void muladd_interval(long * NTL_RESTRICT x, long * NTL_RESTRICT y, + long c, long n, long p, mulmod_t pinv) +{ + mulmod_precon_t cpinv = PrepMulModPrecon(c, p, pinv); + for (long i = 0; i < n; i++) { + long t = MulModPrecon(y[i], c, p, cpinv); + x[i] = AddMod(x[i], t, p); + } +} + + +// ****************************************************************** +// +// General matrix multiplication code +// +// ****************************************************************** + + + + + +static +void basic_mul(const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + const bool seq = double(n)*double(l)*double(m) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, n, first, last) { + + for (long i = first; i < last; i++) { + long j, k; + const zz_p* ap = &A[i][0]; + + zz_p *xp = &X[i][0]; + for (j = 0; j < m; j++) xp[j].LoopHole() = 0; + + for (k = 0; k < l; k++) { + long aa = rep(ap[k]); + if (aa != 0) { + const zz_p* bp = &B[k][0]; + long T1; + mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv); + + for (j = 0; j < m; j++) { + T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv); + xp[j].LoopHole() = AddMod(rep(xp[j]), T1, p); + } + } + } + } + + } NTL_GEXEC_RANGE_END +} + + + + +#ifdef NTL_HAVE_LL_TYPE + +static +void alt_mul_L(const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + long p = zz_p::modulus(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + const bool seq = double(n)*double(l)*double(m) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, m, first, last) { + + Vec B_col; + B_col.SetLength(l); + long *bp = B_col.elts(); + + long i, j, k; + + for (j = first; j < last; j++) { + for (k = 0; k < l; k++) bp[k] = rep(B[k][j]); + + for (i = 0; i < n; i++) { + const zz_p *ap = &A[i][0]; + X[i][j].LoopHole() = InnerProd_L(bp, ap, l, p, red_struct); + } + } + + } NTL_GEXEC_RANGE_END +} + + +static +void alt_mul_LL(const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + long p = zz_p::modulus(); + sp_ll_reduce_struct ll_red_struct = zz_p::ll_red_struct(); + + const bool seq = double(n)*double(l)*double(m) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, m, first, last) { + + Vec B_col; + B_col.SetLength(l); + long *bp = B_col.elts(); + + long i, j, k; + + for (j = first; j < last; j++) { + for (k = 0; k < l; k++) bp[k] = rep(B[k][j]); + + for (i = 0; i < n; i++) { + const zz_p *ap = &A[i][0]; + X[i][j].LoopHole() = InnerProd_LL(bp, ap, l, p, ll_red_struct); + } + } + + } NTL_GEXEC_RANGE_END +} + + +#ifdef NTL_HAVE_AVX + +static +void blk_mul_DD(const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + long p = zz_p::modulus(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + UniqueArray< AlignedArray > A_buf; + long npanels = (l+MAT_BLK_SZ-1)/MAT_BLK_SZ; + A_buf.SetLength(npanels); + + for (long kk = 0, panel = 0; kk < l; kk += MAT_BLK_SZ, panel++) { + long k_max = min(kk+MAT_BLK_SZ, l); + + A_buf[panel].SetLength(n * MAT_BLK_SZ); + double *abp = &A_buf[panel][0]; + + for (long i = 0; i < n; i++, abp += MAT_BLK_SZ) { + const zz_p *ap1 = &A[i][0]; + for (long k = kk; k < k_max; k++) { + abp[k-kk] = rep(ap1[k]); + } + for (long k = k_max; k < kk+MAT_BLK_SZ; k++) { + abp[k-kk] = 0; + } + } + } + + long nxpanels = (m+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + const bool seq = double(n)*double(l)*double(m) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, nxpanels, first, last) + NTL_IMPORT(n) + NTL_IMPORT(l) + NTL_IMPORT(m) + NTL_IMPORT(p) + NTL_IMPORT(red_struct) + + AlignedArray B_rec; + B_rec.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + double *brec = B_rec.get(); + + AlignedArray X_buf; + X_buf.SetLength(n*MAT_BLK_SZ); + double *xbp = X_buf.get(); + + long jj, kk; + long i, j, k; + long panel; + long xpanel; + + for (xpanel = first, jj = first*MAT_BLK_SZ; xpanel < last; + xpanel++, jj += MAT_BLK_SZ) { + + long j_max = min(jj+MAT_BLK_SZ, m); + + for (i = 0; i < n*MAT_BLK_SZ; i++) xbp[i] = 0; + + long red_trigger = (MAX_DBL_INT-(p-1))/((p-1)*(p-1)); + long red_count = red_trigger; + + for (kk = 0, panel = 0; kk < l; kk += MAT_BLK_SZ, panel++) { + long k_max = min(kk+MAT_BLK_SZ, l); + + for (k = kk; k < k_max; k++) { + const zz_p *bp = &B[k][0]; + for (j = jj; j < j_max; j++) + brec[(k-kk)*MAT_BLK_SZ+(j-jj)] = rep(bp[j]); + for (j = j_max; j < jj+MAT_BLK_SZ; j++) + brec[(k-kk)*MAT_BLK_SZ+(j-jj)] = 0; + } + + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + for (i = 0; i < n*MAT_BLK_SZ; i++) + xbp[i] = rem((unsigned long)(long)xbp[i], p, red_struct); + } + + red_count = red_count-MAT_BLK_SZ; + + const double *abp = &A_buf[panel][0]; + + muladd_all_by_32(0, n, xbp, abp, brec, k_max-kk); + } + + + for (i = 0; i < n; i++) { + zz_p *xp = &X[i][0]; + for (j = jj; j < j_max; j++) + xp[j].LoopHole() = + rem((unsigned long)(long)xbp[i*MAT_BLK_SZ + (j-jj)], p, red_struct); + } + } + + NTL_GEXEC_RANGE_END +} + +#endif + + +static +void blk_mul_LL(const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + long p = zz_p::modulus(); + sp_ll_reduce_struct ll_red_struct = zz_p::ll_red_struct(); + + Vec< Vec > A_buf; + Vec abufp; + long npanels = (l+MAT_BLK_SZ-1)/MAT_BLK_SZ; + A_buf.SetLength(npanels); + abufp.SetLength(npanels); + + for (long kk = 0, panel = 0; kk < l; kk += MAT_BLK_SZ, panel++) { + long k_max = min(kk+MAT_BLK_SZ, l); + + A_buf[panel].SetLength(n * MAT_BLK_SZ); + long *abp = A_buf[panel].elts(); + abufp[panel] = abp; + + for (long i = 0; i < n; i++, abp += MAT_BLK_SZ) { + const zz_p *ap1 = &A[i][0]; + for (long k = kk; k < k_max; k++) { + abp[k-kk] = rep(ap1[k]); + } + for (long k = k_max; k < kk+MAT_BLK_SZ; k++) { + abp[k-kk] = 0; + } + } + } + + long nxpanels = (m+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + const bool seq = double(n)*double(l)*double(m) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, nxpanels, first, last) + NTL_IMPORT(n) + NTL_IMPORT(l) + NTL_IMPORT(m) + NTL_IMPORT(p) + NTL_IMPORT(ll_red_struct) + + UniqueArray B_rec; + B_rec.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + long *brec = B_rec.get(); + + UniqueArray X_buf; + X_buf.SetLength(n*MAT_BLK_SZ); + long *xbp = X_buf.get(); + + long jj, kk; + long i, j, k; + long panel; + long xpanel; + + for (xpanel = first, jj = first*MAT_BLK_SZ; xpanel < last; + xpanel++, jj += MAT_BLK_SZ) { + + long j_max = min(jj+MAT_BLK_SZ, m); + + for (i = 0; i < n*MAT_BLK_SZ; i++) xbp[i] = 0; + + for (kk = 0, panel = 0; kk < l; kk += MAT_BLK_SZ, panel++) { + long k_max = min(kk+MAT_BLK_SZ, l); + + // fill brec, transposed + + for (k = kk; k < k_max; k++) { + const zz_p *bp = &B[k][0]; + for (j = jj; j < j_max; j++) + brec[(k-kk)+(j-jj)*MAT_BLK_SZ] = rep(bp[j]); + for (j = j_max; j < jj+MAT_BLK_SZ; j++) + brec[(k-kk)+(j-jj)*MAT_BLK_SZ] = 0; + } + + const long *abp = abufp[panel]; + muladd_all_by_32(0, n, xbp, abp, brec, k_max-kk, p, ll_red_struct); + } + + + for (i = 0; i < n; i++) { + zz_p *xp = &X[i][0]; + for (j = jj; j < j_max; j++) + xp[j].LoopHole() = xbp[i*MAT_BLK_SZ + (j-jj)]; + } + } + + NTL_GEXEC_RANGE_END +} + + +static +void blk_mul_L(const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + long p = zz_p::modulus(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + Vec< Vec > A_buf; + Vec abufp; + long npanels = (l+MAT_BLK_SZ-1)/MAT_BLK_SZ; + A_buf.SetLength(npanels); + abufp.SetLength(npanels); + + for (long kk = 0, panel = 0; kk < l; kk += MAT_BLK_SZ, panel++) { + long k_max = min(kk+MAT_BLK_SZ, l); + + A_buf[panel].SetLength(n * MAT_BLK_SZ); + uhlong *abp = A_buf[panel].elts(); + abufp[panel] = abp; + + for (long i = 0; i < n; i++, abp += MAT_BLK_SZ) { + const zz_p *ap1 = &A[i][0]; + for (long k = kk; k < k_max; k++) { + abp[k-kk] = rep(ap1[k]); + } + for (long k = k_max; k < kk+MAT_BLK_SZ; k++) { + abp[k-kk] = 0; + } + } + } + + long nxpanels = (m+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + const bool seq = double(n)*double(l)*double(m) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, nxpanels, first, last) + NTL_IMPORT(n) + NTL_IMPORT(l) + NTL_IMPORT(m) + NTL_IMPORT(p) + NTL_IMPORT(red_struct) + + UniqueArray B_rec; + B_rec.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + uhlong *brec = B_rec.get(); + + UniqueArray X_buf; + X_buf.SetLength(n*MAT_BLK_SZ); + unsigned long *xbp = X_buf.get(); + + long jj, kk; + long i, j, k; + long panel; + long xpanel; + + for (xpanel = first, jj = first*MAT_BLK_SZ; xpanel < last; + xpanel++, jj += MAT_BLK_SZ) { + + long j_max = min(jj+MAT_BLK_SZ, m); + + for (i = 0; i < n*MAT_BLK_SZ; i++) xbp[i] = 0; + + unsigned long ured_trigger = + (~(0UL)-cast_unsigned(p-1))/(cast_unsigned(p-1)*cast_unsigned(p-1)); + // NOTE: corner case at p == 2: need unsigned long to prevent overflow + + long red_trigger = min(cast_unsigned(NTL_MAX_LONG), ured_trigger); + + long red_count = red_trigger; + + for (kk = 0, panel = 0; kk < l; kk += MAT_BLK_SZ, panel++) { + long k_max = min(kk+MAT_BLK_SZ, l); + + // fill brec, transposed + + for (k = kk; k < k_max; k++) { + const zz_p *bp = &B[k][0]; + for (j = jj; j < j_max; j++) + brec[(k-kk)+(j-jj)*MAT_BLK_SZ] = rep(bp[j]); + for (j = j_max; j < jj+MAT_BLK_SZ; j++) + brec[(k-kk)+(j-jj)*MAT_BLK_SZ] = 0; + } + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + for (i = 0; i < n*MAT_BLK_SZ; i++) + xbp[i] = rem(xbp[i], p, red_struct); + } + + red_count = red_count-MAT_BLK_SZ; + + const uhlong *abp = abufp[panel]; + + muladd_all_by_32(0, n, xbp, abp, brec, k_max-kk); + } + + + for (i = 0; i < n; i++) { + zz_p *xp = &X[i][0]; + for (j = jj; j < j_max; j++) + xp[j].LoopHole() = + rem(xbp[i*MAT_BLK_SZ + (j-jj)], p, red_struct); + } + } + + NTL_GEXEC_RANGE_END +} + + +#endif + + + + +static +void mul_base (const mat_window_zz_p& X, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (n == 0 || l == 0 || m == 0) { + clear(X); + return; + } + + +#ifndef NTL_HAVE_LL_TYPE + + basic_mul(X, A, B); + +#else + + if (l < 32) { + //cerr << "basic_mul\n"; + basic_mul(X, A, B); + return; + } + + long p = zz_p::modulus(); + + if (n/MAT_BLK_SZ < 4 || l/MAT_BLK_SZ < 4 || m/MAT_BLK_SZ < 4) { + if (cast_unsigned(l) <= (~(0UL))/cast_unsigned(p-1) && + cast_unsigned(l)*cast_unsigned(p-1) <= (~(0UL))/cast_unsigned(p-1)) { + //cerr << "alt_mul_L\n"; + alt_mul_L(X, A, B); + } + else { + //cerr << "alt_mul_LL\n"; + alt_mul_LL(X, A, B); + } + + return; + } + + { + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("number too big"); + if (NTL_OVERFLOW(l, MAT_BLK_SZ, 0)) ResourceError("number too big"); + if (NTL_OVERFLOW(m, MAT_BLK_SZ, 0)) ResourceError("number too big"); + + long V = MAT_BLK_SZ*4; + +#ifdef NTL_HAVE_AVX + if (p-1 <= MAX_DBL_INT && + V <= (MAX_DBL_INT-(p-1))/(p-1) && + V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { + + // cerr << "block_mul_DD\n"; + blk_mul_DD(X, A, B); + } + else +#endif + if (cast_unsigned(V) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1) && + cast_unsigned(V)*cast_unsigned(p-1) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1)) { + + //cerr << "blk_mul_L\n"; + blk_mul_L(X, A, B); + + } + else { + + //cerr << "blk_mul_LL\n"; + blk_mul_LL(X, A, B); + } + } + +#endif + + +} + +// The following implementation of Strassen is derived directly +// from the implementation in FLINT v2.5.2 (see http://www.flintlib.org), +// although a number of details have changed. +// I include the original copyright notice from the file nmod_mat/mul_strassen.c +// in the FLINT distribution. + +/*============================================================================= + + This file is part of FLINT. + + FLINT is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + FLINT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with FLINT; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2008, Martin Albrecht + Copyright (C) 2008, 2009 William Hart. + Copyright (C) 2010, Fredrik Johansson + +******************************************************************************/ + + + + +void mul_strassen(const mat_window_zz_p& C, + const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) +{ + long a, b, c; + long anr, anc, bnr, bnc; + + + a = A.NumRows(); + b = A.NumCols(); + c = B.NumCols(); + + + bool use_DD = false; + // this code determines if mul_base triggers blk_mul_DD, + // in which case a higher crossover is used + +#if (defined(NTL_HAVE_LL_TYPE) && defined(NTL_HAVE_AVX)) + { + long V = MAT_BLK_SZ*4; + long p = zz_p::modulus(); + + if (p-1 <= MAX_DBL_INT && + V <= (MAX_DBL_INT-(p-1))/(p-1) && + V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) + { + use_DD = true; + } + } +#endif + + long nt = AvailableThreads(); + + long xover; + // now we set the crossover -- it is kind of a heauristic + // mess based on nt and use_DD...I've run some tests to + // make sure these settings are reasonable, but a more + // rational approach would be preferable + + if (nt > 1) { + if (use_DD || nt > 8192/(2*MAT_BLK_SZ)) + xover = 8192; + else + xover = max(800, nt*2*MAT_BLK_SZ); + } + else { + if (use_DD) + xover = 800; + else + xover = 448; + } + + if (a <= xover || b <= xover || c <= xover) + { + mul_base(C, A, B); + return; + } + + anr = a / 2; + anc = b / 2; + bnr = anc; + bnc = c / 2; + + const_mat_window_zz_p A11(A, 0, 0, anr, anc); + const_mat_window_zz_p A12(A, 0, anc, anr, 2*anc); + const_mat_window_zz_p A21(A, anr, 0, 2*anr, anc); + const_mat_window_zz_p A22(A, anr, anc, 2*anr, 2*anc); + + const_mat_window_zz_p B11(B, 0, 0, bnr, bnc); + const_mat_window_zz_p B12(B, 0, bnc, bnr, 2*bnc); + const_mat_window_zz_p B21(B, bnr, 0, 2*bnr, bnc); + const_mat_window_zz_p B22(B, bnr, bnc, 2*bnr, 2*bnc); + + mat_window_zz_p C11(C, 0, 0, anr, bnc); + mat_window_zz_p C12(C, 0, bnc, anr, 2*bnc); + mat_window_zz_p C21(C, anr, 0, 2*anr, bnc); + mat_window_zz_p C22(C, anr, bnc, 2*anr, 2*bnc); + + mat_zz_p X1_store; + X1_store.SetDims(anr, max(bnc, anc)); + + mat_window_zz_p X1a(X1_store, 0, 0, anr, anc); + mat_window_zz_p X1b(X1_store, 0, 0, anr, bnc); + + mat_zz_p X2; + X2.SetDims(anc, bnc); + + /* + See Jean-Guillaume Dumas, Clement Pernet, Wei Zhou; "Memory + efficient scheduling of Strassen-Winograd's matrix multiplication + algorithm"; http://arxiv.org/pdf/0707.2347v3 for reference on the + used operation scheduling. + */ + + sub(X1a, A11, A21); + sub(X2, B22, B12); + mul_strassen(C21, X1a, X2); + + add(X1a, A21, A22); + sub(X2, B12, B11); + mul_strassen(C22, X1a, X2); + + sub(X1a, X1a, A11); + sub(X2, B22, X2); + mul_strassen(C12, X1a, X2); + + sub(X1a, A12, X1a); + mul_strassen(C11, X1a, B22); + + + mul_strassen(X1b, A11, B11); + + add(C12, X1b, C12); + add(C21, C12, C21); + add(C12, C12, C22); + add(C22, C21, C22); + add(C12, C12, C11); + sub(X2, X2, B21); + mul_strassen(C11, A22, X2); + + X2.kill(); + + sub(C21, C21, C11); + mul_strassen(C11, A12, B21); + + add(C11, X1b, C11); + + X1_store.kill(); + + if (c > 2*bnc) /* A by last col of B -> last col of C */ + { + const_mat_window_zz_p Bc(B, 0, 2*bnc, b, c); + mat_window_zz_p Cc(C, 0, 2*bnc, a, c); + + mul_strassen(Cc, A, Bc); + } + + if (a > 2*anr) /* last row of A by B -> last row of C */ + { + const_mat_window_zz_p Ar(A, 2*anr, 0, a, b); + mat_window_zz_p Cr(C, 2*anr, 0, a, c); + mul_strassen(Cr, Ar, B); + } + + if (b > 2*anc) /* last col of A by last row of B -> C */ + { + const_mat_window_zz_p Ac(A, 0, 2*anc, 2*anr, b); + const_mat_window_zz_p Br(B, 2*bnr, 0, b, 2*bnc); + mat_window_zz_p Cb(C, 0, 0, 2*anr, 2*bnc); + + // Cb += Ac*Br + mat_zz_p tmp; + tmp.SetDims(Cb.NumRows(), Cb.NumCols()); + mul_strassen(tmp, Ac, Br); + add(Cb, Cb, tmp); + } +} + + + + + + + +static +void mul_aux(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + X.SetDims(n, m); + + if (n == 0 || l == 0 || m == 0) { + clear(X); + return; + } + + mul_strassen(X, A, B); +} + + +void mul(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B) +{ + if (&X == &A || &X == &B) { + mat_zz_p tmp; + mul_aux(tmp, A, B); + X = tmp; + } + else + mul_aux(X, A, B); +} + + +// ****************************************************************** +// +// Matrix inversion code +// +// ****************************************************************** + +static +long relaxed_InvModStatus(long& x, long a, long n, bool relax) +{ + if (relax) { + return InvModStatus(x, a, n); + } + else { + x = InvMod(a, n); + return 0; + } +} + +static +void basic_inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + + Mat M; + conv(M, A); + // scratch space + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + bool seq = n < PAR_THRESH_SQ; + + bool pivoting = false; + + for (long k = 0; k < n; k++) { + long pos = -1; + long pivot_inv; + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + long pivot = M[i][k]; + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + } + + det = MulMod(det, M[k][k], p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + long * NTL_RESTRICT y = &M[k][0]; + for (long j = 0; j < n; j++) + y[j] = MulModPrecon(y[j], t1, p, t1pinv); + + y[k] = pivot_inv; + } + + + + NTL_GEXEC_RANGE(seq, n, first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(k) + long * NTL_RESTRICT y = &M[k][0]; + for (long i = first; i < last; i++) { + if (i == k) continue; // skip row k + + long * NTL_RESTRICT x = &M[i][0]; + long t1 = x[k]; + t1 = NegateMod(t1, p); + x[k] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + + for (long j = 0; j < n; j++) { + long t2 = MulModPrecon(y[j], t1, p, t1pinv); + x[j] = AddMod(x[j], t2, p); + } + } + NTL_GEXEC_RANGE_END + } + else { + clear(d); + return; + } + } + + if (pivoting) { + // pivot colums, using reverse swap sequence + + for (long i = 0; i < n; i++) { + long * NTL_RESTRICT x = &M[i][0]; + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) _ntl_swap(x[pos], x[k]); + } + } + } + + X.SetDims(n, n); + for (long i = 0; i < n; i++) + for (long j = 0; j < n; j++) + X[i][j].LoopHole() = M[i][j]; + + d.LoopHole() = det; +} + + + +#ifdef NTL_HAVE_LL_TYPE + + + +static +void alt_inv_L(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + + Mat M; + conv(M, A); + // scractch space + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + + + bool seq = n < PAR_THRESH_SQ; + + bool pivoting = false; + + unsigned long ured_trigger = + (~(0UL)-cast_unsigned(p-1))/(cast_unsigned(p-1)*cast_unsigned(p-1)); + // NOTE: corner case at p == 2: need unsigned long to prevent overflow + + long red_trigger = min(cast_unsigned(NTL_MAX_LONG), ured_trigger); + + long red_count = red_trigger; + + + for (long k = 0; k < n; k++) { + bool cleanup = false; + + if (red_count-1 < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-1; + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = rem(M[i][k], p, red_struct); + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); // t1*pinv; + unsigned long * NTL_RESTRICT y = &M[k][0]; + for (long j = 0; j < n; j++) { + long t2 = rem(y[j], p, red_struct); + y[j] = MulModPrecon(t2, t1, p, t1pinv); + } + + y[k] = pivot_inv; + } + + + NTL_GEXEC_RANGE(seq, n, first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(k) + NTL_IMPORT(red_struct) + unsigned long * NTL_RESTRICT y = &M[k][0]; + if (cleanup) { + for (long i = first; i < last; i++) { + if (i == k) continue; + // skip row k: the data won't change, but it + // technically is a race condition in a multi-theaded + // execution, and it would violate the "restrict" + // contract + + unsigned long * NTL_RESTRICT x = &M[i][0]; + for (long j = 0; j < n; j++) { + x[j] = rem(x[j], p, red_struct); + } + } + } + + + for (long i = first; i < last; i++) { + if (i == k) continue; // skip row k + + unsigned long * NTL_RESTRICT x = &M[i][0]; + long t1 = rem(x[k], p, red_struct); + t1 = NegateMod(t1, p); + x[k] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + unsigned long ut1 = t1; + long j; + for (j = 0; j <= n-4; j+=4) { + unsigned long xj0 = x[j+0] + DO_MUL(y[j+0], ut1); + unsigned long xj1 = x[j+1] + DO_MUL(y[j+1], ut1); + unsigned long xj2 = x[j+2] + DO_MUL(y[j+2], ut1); + unsigned long xj3 = x[j+3] + DO_MUL(y[j+3], ut1); + x[j+0] = xj0; + x[j+1] = xj1; + x[j+2] = xj2; + x[j+3] = xj3; + } + for (; j < n; j++) { + x[j] += DO_MUL(y[j], ut1); + } + } + NTL_GEXEC_RANGE_END + } + else { + clear(d); + return; + } + } + + if (pivoting) { + // pivot colums, using reverse swap sequence + + for (long i = 0; i < n; i++) { + unsigned long * NTL_RESTRICT x = &M[i][0]; + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) _ntl_swap(x[pos], x[k]); + } + } + } + + X.SetDims(n, n); + for (long i = 0; i < n; i++) + for (long j = 0; j < n; j++) + X[i][j].LoopHole() = rem(M[i][j], p, red_struct); + + d.LoopHole() = det; +} + + + + + +#ifdef NTL_HAVE_AVX + +static +void alt_inv_DD(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + Vec< AlignedArray > M; + M.SetLength(n); + for (long i = 0; i < n; i++) M[i].SetLength(n); + + for (long i = 0; i < n; i++) { + for (long j = 0; j < n; j++) + M[i][j] = rep(A[i][j]); + } + + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + + + bool seq = n < PAR_THRESH_SQ; + + bool pivoting = false; + + long red_trigger = (MAX_DBL_INT-(p-1))/((p-1)*(p-1)); + long red_count = red_trigger; + + for (long k = 0; k < n; k++) { + bool cleanup = false; + + if (red_count-1 < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-1; + + long pos = -1; + long pivot; + long pivot_inv; + + + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = rem((unsigned long)(long)M[i][k], p, red_struct); + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); // t1*pinv; + double * NTL_RESTRICT y = &M[k][0]; + for (long j = 0; j < n; j++) { + long t2 = rem((unsigned long)(long)y[j], p, red_struct); + y[j] = MulModPrecon(t2, t1, p, t1pinv); + } + + y[k] = pivot_inv; + } + + + NTL_GEXEC_RANGE(seq, n, first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(k) + NTL_IMPORT(red_struct) + double * NTL_RESTRICT y = &M[k][0]; + if (cleanup) { + for (long i = first; i < last; i++) { + if (i == k) continue; + // skip row k: the data won't change, but it + // technically is a race condition in a multi-theaded + // execution, and it would violate the "restrict" + // contract + + double * NTL_RESTRICT x = &M[i][0]; + for (long j = 0; j < n; j++) { + x[j] = rem((unsigned long)(long)x[j], p, red_struct); + } + } + } + + + for (long i = first; i < last; i++) { + if (i == k) continue; // skip row k + + double * NTL_RESTRICT x = &M[i][0]; + long t1 = rem((unsigned long)(long)x[k], p, red_struct); + t1 = NegateMod(t1, p); + x[k] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + double ut1 = t1; + muladd_interval1(x, y, ut1, n); + } + NTL_GEXEC_RANGE_END + } + else { + clear(d); + return; + } + } + + + if (pivoting) { + // pivot colums, using reverse swap sequence + + for (long i = 0; i < n; i++) { + double * NTL_RESTRICT x = &M[i][0]; + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) _ntl_swap(x[pos], x[k]); + } + } + } + + + X.SetDims(n, n); + for (long i = 0; i < n; i++) + for (long j = 0; j < n; j++) + X[i][j].LoopHole() = rem((unsigned long)(long)M[i][j], p, red_struct); + + d.LoopHole() = det; +} + +#endif + + + + + +#ifdef NTL_HAVE_AVX + +static +void blk_inv_DD(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + + long npanels = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + + Vec< AlignedArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + double *panelp = &M[panel][0]; + + for (long r = 0; r < n*MAT_BLK_SZ; r++) panelp[r] = 0; + } + + // copy A into panels + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + double *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + + bool seq = double(n)*double(n)*double(MAT_BLK_SZ) < PAR_THRESH; + + bool pivoting = false; + + long red_trigger = (MAX_DBL_INT-(p-1))/((p-1)*(p-1)); + long red_count = red_trigger; + + for (long kk = 0, kpanel = 0; kk < n; kk += MAT_BLK_SZ, kpanel++) { + long k_max = min(kk+MAT_BLK_SZ, n); + + bool cleanup = false; + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-MAT_BLK_SZ; + double * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + if (cleanup) { + for (long r = 0; r < n*MAT_BLK_SZ; r++) + kpanelp[r] = rem((unsigned long)(long)kpanelp[r], p, red_struct); + } + + for (long k = kk; k < k_max; k++) { + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = rem((unsigned long)(long)kpanelp[i*MAT_BLK_SZ+(k-kk)], p, red_struct); + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos == -1) { + clear(d); + return; + } + + double * NTL_RESTRICT y = &kpanelp[k*MAT_BLK_SZ]; + if (k != pos) { + // swap rows pos and k + double * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + for (long j = 0; j < MAT_BLK_SZ; j++) { + long t2 = rem((unsigned long)(long)y[j], p, red_struct); + y[j] = MulModPrecon(t2, t1, p, t1pinv); + } + + y[k-kk] = pivot_inv; + } + + for (long i = 0; i < n; i++) { + if (i == k) continue; // skip row k + + double * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + long t1 = rem((unsigned long)(long)x[k-kk], p, red_struct); + t1 = NegateMod(t1, p); + x[k-kk] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + double ut1 = t1; + muladd_interval(x, y, ut1, MAT_BLK_SZ); + } + } + + + // finished processing current kpanel + // next, reduce and apply to all other kpanels + + for (long r = 0; r < n*MAT_BLK_SZ; r++) + kpanelp[r] = rem((unsigned long)(long)kpanelp[r], p, red_struct); + + // special processing: subtract 1 off of diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = SubMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + + NTL_GEXEC_RANGE(seq, npanels, first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(red_struct) + NTL_IMPORT(kpanel) + NTL_IMPORT(kpanelp) + NTL_IMPORT(kk) + NTL_IMPORT(k_max) + + + AlignedArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + double *buf = &buf_store[0]; + + for (long jpanel = first; jpanel < last; jpanel++) { + if (jpanel == kpanel) continue; + + double * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + if (cleanup) { + for (long r = 0; r < n*MAT_BLK_SZ; r++) + jpanelp[r] = rem((unsigned long)(long)jpanelp[r], p, red_struct); + } + + // perform swaps + for (long k = kk; k < k_max; k++) { + long pos = P[k]; + if (pos != k) { + // swap rows pos and k + double * NTL_RESTRICT pos_p = &jpanelp[pos*MAT_BLK_SZ]; + double * NTL_RESTRICT k_p = &jpanelp[k*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], k_p[j]); + } + } + + // copy block number kpanel (the one on the diagonal) into buf + + for (long i = 0; i < (k_max-kk)*MAT_BLK_SZ; i++) + buf[i] = rem((unsigned long)(long)jpanelp[kk*MAT_BLK_SZ+i], p, red_struct); + + // jpanel += kpanel*buf + + muladd_all_by_32(0, n, jpanelp, kpanelp, buf, k_max-kk); + } + + NTL_GEXEC_RANGE_END + + // special processing: add 1 back to the diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = AddMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + } + + if (pivoting) { + // pivot colums, using reverse swap sequence + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) { + // swap columns pos and k + + double * NTL_RESTRICT x = &M[pos / MAT_BLK_SZ][pos % MAT_BLK_SZ]; + double * NTL_RESTRICT y = &M[k / MAT_BLK_SZ][k % MAT_BLK_SZ]; + for (long i = 0; i < n; i++) { + _ntl_swap(x[i*MAT_BLK_SZ], y[i*MAT_BLK_SZ]); + } + } + } + } + + + // copy panels into X + X.SetDims(n, n); + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + double *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + zz_p *xp = X[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + xp[j-jj].LoopHole() = rem((unsigned long)(long)panelp[j-jj], p, red_struct); + } + } + + d.LoopHole() = det; + +} + +#endif + + + +static +void blk_inv_L(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + + long npanels = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + Vec< UniqueArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + unsigned long *panelp = &M[panel][0]; + + for (long r = 0; r < n*MAT_BLK_SZ; r++) panelp[r] = 0; + } + + // copy A into panels + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + unsigned long *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + + bool seq = double(n)*double(n)*double(MAT_BLK_SZ) < PAR_THRESH; + + bool pivoting = false; + + unsigned long ured_trigger = + (~(0UL)-cast_unsigned(p-1))/(cast_unsigned(p-1)*cast_unsigned(p-1)); + // NOTE: corner case at p == 2: need unsigned long to prevent overflow + + long red_trigger = min(cast_unsigned(NTL_MAX_LONG), ured_trigger); + + long red_count = red_trigger; + + for (long kk = 0, kpanel = 0; kk < n; kk += MAT_BLK_SZ, kpanel++) { + long k_max = min(kk+MAT_BLK_SZ, n); + + bool cleanup = false; + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-MAT_BLK_SZ; + unsigned long * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + if (cleanup) { + for (long r = 0; r < n*MAT_BLK_SZ; r++) + kpanelp[r] = rem(kpanelp[r], p, red_struct); + } + + for (long k = kk; k < k_max; k++) { + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = rem(kpanelp[i*MAT_BLK_SZ+(k-kk)], p, red_struct); + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos == -1) { + clear(d); + return; + } + + unsigned long * NTL_RESTRICT y = &kpanelp[k*MAT_BLK_SZ]; + if (k != pos) { + // swap rows pos and k + unsigned long * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + for (long j = 0; j < MAT_BLK_SZ; j++) { + long t2 = rem(y[j], p, red_struct); + y[j] = MulModPrecon(t2, t1, p, t1pinv); + } + + y[k-kk] = pivot_inv; + } + + for (long i = 0; i < n; i++) { + if (i == k) continue; // skip row k + + unsigned long * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + long t1 = rem(x[k-kk], p, red_struct); + t1 = NegateMod(t1, p); + x[k-kk] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + unsigned long ut1 = t1; + muladd_interval(x, y, ut1, MAT_BLK_SZ); + } + } + + + // finished processing current kpanel + // next, reduce and apply to all other kpanels + + for (long r = 0; r < n*MAT_BLK_SZ; r++) + kpanelp[r] = rem(kpanelp[r], p, red_struct); + + // special processing: subtract 1 off of diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = SubMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + + NTL_GEXEC_RANGE(seq, npanels, first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(red_struct) + NTL_IMPORT(kpanel) + NTL_IMPORT(kpanelp) + NTL_IMPORT(kk) + NTL_IMPORT(k_max) + + + UniqueArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + unsigned long *buf = &buf_store[0]; + + for (long jpanel = first; jpanel < last; jpanel++) { + if (jpanel == kpanel) continue; + + unsigned long * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + if (cleanup) { + for (long r = 0; r < n*MAT_BLK_SZ; r++) + jpanelp[r] = rem(jpanelp[r], p, red_struct); + } + + // perform swaps + for (long k = kk; k < k_max; k++) { + long pos = P[k]; + if (pos != k) { + // swap rows pos and k + unsigned long * NTL_RESTRICT pos_p = &jpanelp[pos*MAT_BLK_SZ]; + unsigned long * NTL_RESTRICT k_p = &jpanelp[k*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], k_p[j]); + } + } + + // copy block number kpanel (the one on the diagonal) into buf + // here, we transpose it + + for (long k = kk; k < k_max; k++) + for (long j = 0; j < MAT_BLK_SZ; j++) + buf[j*MAT_BLK_SZ + (k-kk)] = + rem(jpanelp[k*MAT_BLK_SZ+j], p, red_struct); + + // jpanel += kpanel*buf + + muladd_all_by_32(0, n, jpanelp, kpanelp, buf, k_max-kk); + } + + NTL_GEXEC_RANGE_END + + // special processing: add 1 back to the diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = AddMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + } + + if (pivoting) { + // pivot colums, using reverse swap sequence + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) { + // swap columns pos and k + + unsigned long * NTL_RESTRICT x = &M[pos / MAT_BLK_SZ][pos % MAT_BLK_SZ]; + unsigned long * NTL_RESTRICT y = &M[k / MAT_BLK_SZ][k % MAT_BLK_SZ]; + for (long i = 0; i < n; i++) { + _ntl_swap(x[i*MAT_BLK_SZ], y[i*MAT_BLK_SZ]); + } + } + } + } + + // copy panels into X + X.SetDims(n, n); + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + unsigned long *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + zz_p *xp = X[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + xp[j-jj].LoopHole() = rem(panelp[j-jj], p, red_struct); + } + } + + d.LoopHole() = det; + +} + + + + + + + + +static +void blk_inv_LL(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too big"); + + long npanels = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + Vec< UniqueArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + long *panelp = &M[panel][0]; + + for (long r = 0; r < n*MAT_BLK_SZ; r++) panelp[r] = 0; + } + + + // copy A into panels + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + long *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_ll_reduce_struct ll_red_struct = zz_p::ll_red_struct(); + + + bool seq = double(n)*double(n)*double(MAT_BLK_SZ) < PAR_THRESH; + + bool pivoting = false; + + for (long kk = 0, kpanel = 0; kk < n; kk += MAT_BLK_SZ, kpanel++) { + long k_max = min(kk+MAT_BLK_SZ, n); + + long * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + + for (long k = kk; k < k_max; k++) { + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = kpanelp[i*MAT_BLK_SZ+(k-kk)]; + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos == -1) { + clear(d); + return; + } + + long * NTL_RESTRICT y = &kpanelp[k*MAT_BLK_SZ]; + if (k != pos) { + // swap rows pos and k + long * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + for (long j = 0; j < MAT_BLK_SZ; j++) { + y[j] = MulModPrecon(y[j], t1, p, t1pinv); + } + + y[k-kk] = pivot_inv; + } + + for (long i = 0; i < n; i++) { + if (i == k) continue; // skip row k + + long * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + long t1 = x[k-kk]; + t1 = NegateMod(t1, p); + x[k-kk] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + long ut1 = t1; + muladd_interval(x, y, ut1, MAT_BLK_SZ, p, pinv); + } + } + + + // finished processing current kpanel + // next, reduce and apply to all other kpanels + + // special processing: subtract 1 off of diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = SubMod(kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + + NTL_GEXEC_RANGE(seq, npanels, first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(ll_red_struct) + NTL_IMPORT(kpanel) + NTL_IMPORT(kpanelp) + NTL_IMPORT(kk) + NTL_IMPORT(k_max) + + + UniqueArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + long *buf = &buf_store[0]; + + for (long jpanel = first; jpanel < last; jpanel++) { + if (jpanel == kpanel) continue; + + long * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + // perform swaps + for (long k = kk; k < k_max; k++) { + long pos = P[k]; + if (pos != k) { + // swap rows pos and k + long * NTL_RESTRICT pos_p = &jpanelp[pos*MAT_BLK_SZ]; + long * NTL_RESTRICT k_p = &jpanelp[k*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], k_p[j]); + } + } + + // copy block number kpanel (the one on the diagonal) into buf + // here, we transpose it + + for (long k = kk; k < k_max; k++) + for (long j = 0; j < MAT_BLK_SZ; j++) + buf[j*MAT_BLK_SZ + (k-kk)] = + jpanelp[k*MAT_BLK_SZ+j]; + + + // jpanel += kpanel*buf + + muladd_all_by_32(0, n, jpanelp, kpanelp, buf, k_max-kk, p, ll_red_struct); + } + + NTL_GEXEC_RANGE_END + + // special processing: add 1 back to the diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = AddMod(kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + } + + if (pivoting) { + // pivot colums, using reverse swap sequence + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) { + // swap columns pos and k + + long * NTL_RESTRICT x = &M[pos / MAT_BLK_SZ][pos % MAT_BLK_SZ]; + long * NTL_RESTRICT y = &M[k / MAT_BLK_SZ][k % MAT_BLK_SZ]; + for (long i = 0; i < n; i++) { + _ntl_swap(x[i*MAT_BLK_SZ], y[i*MAT_BLK_SZ]); + } + } + } + } + + // copy panels into X + X.SetDims(n, n); + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + long *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + zz_p *xp = X[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + xp[j-jj].LoopHole() = panelp[j-jj]; + } + } + + d.LoopHole() = det; + +} + + + +#endif + + + +void relaxed_inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + +#ifndef NTL_HAVE_LL_TYPE + + basic_inv(d, X, A, relax); + +#else + + long p = zz_p::modulus(); + + if (n < 16) { + //cerr << "basic_inv\n"; + basic_inv(d, X, A, relax); + } + else if (n/MAT_BLK_SZ < 4) { + long V = 64; + +#ifdef NTL_HAVE_AVX + if (p-1 <= MAX_DBL_INT && + V <= (MAX_DBL_INT-(p-1))/(p-1) && + V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { + + //cerr << "alt_inv_DD\n"; + alt_inv_DD(d, X, A, relax); + } + else +#endif + if (cast_unsigned(V) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1) && + cast_unsigned(V)*cast_unsigned(p-1) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1)) { + + //cerr << "alt_inv_L\n"; + alt_inv_L(d, X, A, relax); + + } + else { + + //cerr << "basic_inv\n"; + basic_inv(d, X, A, relax); + } + } + else { + long V = 4*MAT_BLK_SZ; + +#ifdef NTL_HAVE_AVX + if (p-1 <= MAX_DBL_INT && + V <= (MAX_DBL_INT-(p-1))/(p-1) && + V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { + + //cerr << "blk_inv_DD\n"; + blk_inv_DD(d, X, A, relax); + } + else +#endif + if (cast_unsigned(V) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1) && + cast_unsigned(V)*cast_unsigned(p-1) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1)) { + + //cerr << "blk_inv_L\n"; + blk_inv_L(d, X, A, relax); + + } + else { + + //cerr << "blk_inv_LL\n"; + blk_inv_LL(d, X, A, relax); + } + + } + +#endif + + + +} + + + +// ****************************************************************** +// +// Triangularizing square matrices, with applications +// to solving linear systems and computing determinants. +// Should be about 3x faster than the matrix inverse +// algorithms. +// +// ****************************************************************** + + +static +void basic_tri(zz_p& d, const mat_zz_p& A, const vec_zz_p *bp, + vec_zz_p *xp, bool trans, bool relax) +{ + long n = A.NumRows(); + + // adjust + if (A.NumCols() != n) + LogicError("tri: nonsquare matrix"); + + // adjust + if (bp && bp->length() != n) + LogicError("tri: dimension mismatch"); + + // adjust + if (bp && !xp) + LogicError("tri: bad args"); + + if (n == 0) { + set(d); + // adjust + if (xp) xp->SetLength(0); + return; + } + + // adjust (several lines) + // scratch space + Mat M; + if (!trans) { + conv(M, A); + } + else { + M.SetDims(n, n); + for (long i = 0; i < n; i++) + for (long j = 0; j < n; j++) + M[i][j] = rep(A[j][i]); + } + + Vec bv; + if (bp) conv(bv, *bp); + // end adjust + + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + + bool pivoting = false; + + for (long k = 0; k < n; k++) { + long pos = -1; + long pivot_inv; + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + long pivot = M[i][k]; + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + + // adjust + if (bp) _ntl_swap(bv[pos], bv[k]); + } + + det = MulMod(det, M[k][k], p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + long * NTL_RESTRICT y = &M[k][0]; + // adjust + for (long j = k+1; j < n; j++) + y[j] = MulModPrecon(y[j], t1, p, t1pinv); + + // adjust // y[k] = pivot_inv; + + // adjust + if (bp) bv[k] = MulModPrecon(bv[k], t1, p, t1pinv); + } + + + + // adjust + bool seq = n-(k+1) < PAR_THRESH_SQ; + NTL_GEXEC_RANGE(seq, n-(k+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(k) + long * NTL_RESTRICT y = &M[k][0]; + + // adjust + for (long ii = first; ii < last; ii++) { + long i = ii + k+1; + + long * NTL_RESTRICT x = &M[i][0]; + long t1 = x[k]; + t1 = NegateMod(t1, p); + // adjust // x[k] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + + // adjust + for (long j = k+1; j < n; j++) { + long t2 = MulModPrecon(y[j], t1, p, t1pinv); + x[j] = AddMod(x[j], t2, p); + } + + // adjust + if (bp) + { + long t2 = MulModPrecon(bv[k], t1, p, t1pinv); + bv[i] = AddMod(bv[i], t2, p); + } + } + NTL_GEXEC_RANGE_END + } + else { + clear(d); + return; + } + } + + + // adjust + if (bp) { + xp->SetLength(n); + zz_p *X = xp->elts(); + + for (long i = n-1; i >= 0; i--) { + long t1 = 0; + for (long j = i+1; j < n; j++) { + long t2 = MulMod(rep(X[j]), M[i][j], p); + t1 = AddMod(t1, t2, p); + } + X[i].LoopHole() = SubMod(bv[i], t1, p); + } + } + + d.LoopHole() = det; +} + + + + +#ifdef NTL_HAVE_LL_TYPE + + + +static +void alt_tri_L(zz_p& d, const mat_zz_p& A, const vec_zz_p *bp, + vec_zz_p *xp, bool trans, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("tri: nonsquare matrix"); + + // adjust + if (bp && bp->length() != n) + LogicError("tri: dimension mismatch"); + + // adjust + if (bp && !xp) + LogicError("tri: bad args"); + + if (n == 0) { + set(d); + if (xp) xp->SetLength(0); + return; + } + + + // scratch space + Mat M; + if (!trans) { + conv(M, A); + } + else { + M.SetDims(n, n); + for (long i = 0; i < n; i++) + for (long j = 0; j < n; j++) + M[i][j] = rep(A[j][i]); + } + + Vec bv; + if (bp) conv(bv, *bp); + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + + + bool pivoting = false; + + unsigned long ured_trigger = + (~(0UL)-cast_unsigned(p-1))/(cast_unsigned(p-1)*cast_unsigned(p-1)); + // NOTE: corner case at p == 2: need unsigned long to prevent overflow + + long red_trigger = min(cast_unsigned(NTL_MAX_LONG), ured_trigger); + + long red_count = red_trigger; + + + for (long k = 0; k < n; k++) { + bool cleanup = false; + + if (red_count-1 < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-1; + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = rem(M[i][k], p, red_struct); + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + + if (bp) _ntl_swap(bv[pos], bv[k]); + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); // t1*pinv; + unsigned long * NTL_RESTRICT y = &M[k][0]; + for (long j = k+1; j < n; j++) { + long t2 = rem(y[j], p, red_struct); + y[j] = MulModPrecon(t2, t1, p, t1pinv); + } + + if (bp) bv[k] = MulModPrecon(bv[k], t1, p, t1pinv); + } + + + + bool seq = n-(k+1) < PAR_THRESH_SQ; + NTL_GEXEC_RANGE(seq, n-(k+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(k) + NTL_IMPORT(red_struct) + unsigned long * NTL_RESTRICT y = &M[k][0]; + if (cleanup) { + for (long ii = first; ii < last; ii++) { + long i = ii + k+1; + + unsigned long * NTL_RESTRICT x = &M[i][0]; + for (long j = k+1; j < n; j++) { + x[j] = rem(x[j], p, red_struct); + } + } + } + + + for (long ii = first; ii < last; ii++) { + long i = ii + k+1; + + unsigned long * NTL_RESTRICT x = &M[i][0]; + long t1 = rem(x[k], p, red_struct); + t1 = NegateMod(t1, p); + if (t1 == 0) continue; + + // add t1 * row k to row i + unsigned long ut1 = t1; + long j; + for (j = k+1; j <= n-4; j+=4) { + unsigned long xj0 = x[j+0] + DO_MUL(y[j+0], ut1); + unsigned long xj1 = x[j+1] + DO_MUL(y[j+1], ut1); + unsigned long xj2 = x[j+2] + DO_MUL(y[j+2], ut1); + unsigned long xj3 = x[j+3] + DO_MUL(y[j+3], ut1); + x[j+0] = xj0; + x[j+1] = xj1; + x[j+2] = xj2; + x[j+3] = xj3; + } + for (; j < n; j++) { + x[j] += DO_MUL(y[j], ut1); + } + + if (bp) + { + long t2 = MulMod(bv[k], t1, p); + bv[i] = AddMod(bv[i], t2, p); + } + } + NTL_GEXEC_RANGE_END + } + else { + clear(d); + return; + } + } + + + + if (bp) { + xp->SetLength(n); + zz_p *X = xp->elts(); + + for (long i = n-1; i >= 0; i--) { + long t1 = 0; + for (long j = i+1; j < n; j++) { + long t0 = rem(M[i][j], p, red_struct); + long t2 = MulMod(rep(X[j]), t0, p); + t1 = AddMod(t1, t2, p); + } + X[i].LoopHole() = SubMod(bv[i], t1, p); + } + } + + d.LoopHole() = det; +} + + + + +#ifdef NTL_HAVE_AVX + +static +void alt_tri_DD(zz_p& d, const mat_zz_p& A, const vec_zz_p *bp, + vec_zz_p *xp, bool trans, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("tri: nonsquare matrix"); + + // adjust + if (bp && bp->length() != n) + LogicError("tri: dimension mismatch"); + + // adjust + if (bp && !xp) + LogicError("tri: bad args"); + + if (n == 0) { + set(d); + if (xp) xp->SetLength(0); + return; + } + + + // scratch space + + Vec< AlignedArray > M; + M.SetLength(n); + for (long i = 0; i < n; i++) M[i].SetLength(n); + if (!trans) { + for (long i = 0; i < n; i++) + for (long j = 0; j < n; j++) + M[i][j] = rep(A[i][j]); + } + else { + for (long i = 0; i < n; i++) + for (long j = 0; j < n; j++) + M[i][j] = rep(A[j][i]); + } + + Vec bv; + if (bp) conv(bv, *bp); + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + + + bool pivoting = false; + + long red_trigger = (MAX_DBL_INT-(p-1))/((p-1)*(p-1)); + long red_count = red_trigger; + + for (long k = 0; k < n; k++) { + bool cleanup = false; + + if (red_count-1 < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-1; + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = rem((unsigned long)(long)M[i][k], p, red_struct); + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + + if (bp) _ntl_swap(bv[pos], bv[k]); + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); // t1*pinv; + double * NTL_RESTRICT y = &M[k][0]; + for (long j = k+1; j < n; j++) { + long t2 = rem((unsigned long)(long)y[j], p, red_struct); + y[j] = MulModPrecon(t2, t1, p, t1pinv); + } + + if (bp) bv[k] = MulModPrecon(bv[k], t1, p, t1pinv); + } + + + + bool seq = n-(k+1) < PAR_THRESH_SQ; + NTL_GEXEC_RANGE(seq, n-(k+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(k) + NTL_IMPORT(red_struct) + double * NTL_RESTRICT y = &M[k][0]; + if (cleanup) { + for (long ii = first; ii < last; ii++) { + long i = ii + k+1; + + double * NTL_RESTRICT x = &M[i][0]; + for (long j = k+1; j < n; j++) { + x[j] = rem((unsigned long)(long)x[j], p, red_struct); + } + } + } + + long align_boundary = + min((((k+1)+(NTL_AVX_DBL_ALIGN-1))/NTL_AVX_DBL_ALIGN)*NTL_AVX_DBL_ALIGN, n); + + + for (long ii = first; ii < last; ii++) { + long i = ii + k+1; + + double * NTL_RESTRICT x = &M[i][0]; + long t1 = rem((unsigned long)(long)x[k], p, red_struct); + t1 = NegateMod(t1, p); + if (t1 == 0) continue; + + // add t1 * row k to row i + double ut1 = t1; + for (long j = k+1; j < align_boundary; j++) x[j] += y[j]*ut1; + muladd_interval1(x+align_boundary, y+align_boundary, ut1, n-align_boundary); + + if (bp) + { + long t2 = MulMod(bv[k], t1, p); + bv[i] = AddMod(bv[i], t2, p); + } + } + NTL_GEXEC_RANGE_END + } + else { + clear(d); + return; + } + } + + + + if (bp) { + xp->SetLength(n); + zz_p *X = xp->elts(); + + for (long i = n-1; i >= 0; i--) { + long t1 = 0; + for (long j = i+1; j < n; j++) { + long t0 = rem((unsigned long)(long)M[i][j], p, red_struct); + long t2 = MulMod(rep(X[j]), t0, p); + t1 = AddMod(t1, t2, p); + } + X[i].LoopHole() = SubMod(bv[i], t1, p); + } + } + + d.LoopHole() = det; +} + + +#endif + + + + +#ifdef NTL_HAVE_AVX + +static +void blk_tri_DD(zz_p& d, const mat_zz_p& A, const vec_zz_p *bp, + vec_zz_p *xp, bool trans, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("tri: nonsquare matrix"); + + if (bp && bp->length() != n) + LogicError("tri: dimension mismatch"); + + if (bp && !xp) + LogicError("tri: bad args"); + + if (n == 0) { + set(d); + if (xp) xp->SetLength(0); + return; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + + long npanels = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + Vec< AlignedArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + double *panelp = &M[panel][0]; + + for (long r = 0; r < n*MAT_BLK_SZ; r++) panelp[r] = 0; + } + + if (trans) { + // copy A transposed into panels + for (long i = 0; i < n; i++) { + const zz_p *row = &A[i][0]; + double *col = &M[i/MAT_BLK_SZ][i%MAT_BLK_SZ]; + for (long j = 0; j < n; j++) + col[j*MAT_BLK_SZ] = rep(row[j]); + } + } + else { + // copy A into panels + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + double *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + } + + Vec bv; + if (bp) conv(bv, *bp); + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + + bool pivoting = false; + + long red_trigger = (MAX_DBL_INT-(p-1))/((p-1)*(p-1)); + long red_count = red_trigger; + + for (long kk = 0, kpanel = 0; kk < n; kk += MAT_BLK_SZ, kpanel++) { + long k_max = min(kk+MAT_BLK_SZ, n); + + bool cleanup = false; + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-MAT_BLK_SZ; + double * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + if (cleanup) { + for (long r = kk*MAT_BLK_SZ; r < n*MAT_BLK_SZ; r++) + kpanelp[r] = rem((unsigned long)(long)kpanelp[r], p, red_struct); + } + + for (long k = kk; k < k_max; k++) { + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = rem((unsigned long)(long)kpanelp[i*MAT_BLK_SZ+(k-kk)], p, red_struct); + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos == -1) { + clear(d); + return; + } + + double * NTL_RESTRICT y = &kpanelp[k*MAT_BLK_SZ]; + if (k != pos) { + // swap rows pos and k + double * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + + if (bp) _ntl_swap(bv[pos], bv[k]); + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + for (long j = 0; j < MAT_BLK_SZ; j++) { + long t2 = rem((unsigned long)(long)y[j], p, red_struct); + y[j] = MulModPrecon(t2, t1, p, t1pinv); + } + + y[k-kk] = pivot_inv; + + if (bp) bv[k] = MulModPrecon(bv[k], t1, p, t1pinv); + } + + for (long i = kk; i < n; i++) { + if (i == k) continue; // skip row k + + double * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + long t1 = rem((unsigned long)(long)x[k-kk], p, red_struct); + t1 = NegateMod(t1, p); + x[k-kk] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + double ut1 = t1; + muladd_interval(x, y, ut1, MAT_BLK_SZ); + if (bp) + { + long t2 = MulMod(bv[k], t1, p); + bv[i] = AddMod(bv[i], t2, p); + } + } + } + + + // finished processing current kpanel + // next, reduce and apply to all other kpanels + + for (long r = kk*MAT_BLK_SZ; r < n*MAT_BLK_SZ; r++) + kpanelp[r] = rem((unsigned long)(long)kpanelp[r], p, red_struct); + + // special processing: subtract 1 off of diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = SubMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + + bool seq = double(npanels-(kpanel+1))*double(n)*double(MAT_BLK_SZ)*double(MAT_BLK_SZ) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, npanels-(kpanel+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(red_struct) + NTL_IMPORT(kpanel) + NTL_IMPORT(kpanelp) + NTL_IMPORT(kk) + NTL_IMPORT(k_max) + + + AlignedArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + double *buf = &buf_store[0]; + + for (long index = first; index < last; index++) { + long jpanel = index + kpanel+1; + + double * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + if (cleanup) { + for (long r = kk*MAT_BLK_SZ; r < n*MAT_BLK_SZ; r++) + jpanelp[r] = rem((unsigned long)(long)jpanelp[r], p, red_struct); + } + + // perform swaps + for (long k = kk; k < k_max; k++) { + long pos = P[k]; + if (pos != k) { + // swap rows pos and k + double * NTL_RESTRICT pos_p = &jpanelp[pos*MAT_BLK_SZ]; + double * NTL_RESTRICT k_p = &jpanelp[k*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], k_p[j]); + } + } + + // copy block number kpanel (the one on the diagonal) into buf + + for (long i = 0; i < (k_max-kk)*MAT_BLK_SZ; i++) + buf[i] = rem((unsigned long)(long)jpanelp[kk*MAT_BLK_SZ+i], p, red_struct); + + // jpanel += kpanel*buf + + muladd_all_by_32(kk, n, jpanelp, kpanelp, buf, k_max-kk); + } + + NTL_GEXEC_RANGE_END + + // special processing: add 1 back to the diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = AddMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + } + + if (bp) { + xp->SetLength(n); + zz_p *X = xp->elts(); + + for (long i = n-1; i >= 0; i--) { + long t1 = 0; + long start_panel = ((i+1)+MAT_BLK_SZ-1)/MAT_BLK_SZ; + for (long jj = MAT_BLK_SZ*start_panel, panel = start_panel; + jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + double *row = &M[panel][i*MAT_BLK_SZ]; + for (long j = jj; j < j_max; j++) { + long t0 = rem((unsigned long)(long)row[j-jj], p, red_struct); + long t2 = MulMod(rep(X[j]), t0, p); + t1 = AddMod(t1, t2, p); + } + } + X[i].LoopHole() = SubMod(bv[i], t1, p); + } + } + + d.LoopHole() = det; + +} + +#endif + + +static +void blk_tri_L(zz_p& d, const mat_zz_p& A, const vec_zz_p *bp, + vec_zz_p *xp, bool trans, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("tri: nonsquare matrix"); + + if (bp && bp->length() != n) + LogicError("tri: dimension mismatch"); + + if (bp && !xp) + LogicError("tri: bad args"); + + if (n == 0) { + set(d); + if (xp) xp->SetLength(0); + return; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + + long npanels = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + Vec< UniqueArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + unsigned long *panelp = &M[panel][0]; + + for (long r = 0; r < n*MAT_BLK_SZ; r++) panelp[r] = 0; + } + + if (trans) { + // copy A transposed into panels + for (long i = 0; i < n; i++) { + const zz_p *row = &A[i][0]; + unsigned long *col = &M[i/MAT_BLK_SZ][i%MAT_BLK_SZ]; + for (long j = 0; j < n; j++) + col[j*MAT_BLK_SZ] = rep(row[j]); + } + } + else { + // copy A into panels + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + unsigned long *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + } + + Vec bv; + if (bp) conv(bv, *bp); + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + + bool pivoting = false; + + unsigned long ured_trigger = + (~(0UL)-cast_unsigned(p-1))/(cast_unsigned(p-1)*cast_unsigned(p-1)); + // NOTE: corner case at p == 2: need unsigned long to prevent overflow + + long red_trigger = min(cast_unsigned(NTL_MAX_LONG), ured_trigger); + + long red_count = red_trigger; + + for (long kk = 0, kpanel = 0; kk < n; kk += MAT_BLK_SZ, kpanel++) { + long k_max = min(kk+MAT_BLK_SZ, n); + + bool cleanup = false; + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-MAT_BLK_SZ; + unsigned long * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + if (cleanup) { + for (long r = kk*MAT_BLK_SZ; r < n*MAT_BLK_SZ; r++) + kpanelp[r] = rem(kpanelp[r], p, red_struct); + } + + for (long k = kk; k < k_max; k++) { + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = rem(kpanelp[i*MAT_BLK_SZ+(k-kk)], p, red_struct); + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos == -1) { + clear(d); + return; + } + + unsigned long * NTL_RESTRICT y = &kpanelp[k*MAT_BLK_SZ]; + if (k != pos) { + // swap rows pos and k + unsigned long * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + + if (bp) _ntl_swap(bv[pos], bv[k]); + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + for (long j = 0; j < MAT_BLK_SZ; j++) { + long t2 = rem(y[j], p, red_struct); + y[j] = MulModPrecon(t2, t1, p, t1pinv); + } + + y[k-kk] = pivot_inv; + + if (bp) bv[k] = MulModPrecon(bv[k], t1, p, t1pinv); + } + + for (long i = kk; i < n; i++) { + if (i == k) continue; // skip row k + + unsigned long * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + long t1 = rem(x[k-kk], p, red_struct); + t1 = NegateMod(t1, p); + x[k-kk] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + unsigned long ut1 = t1; + muladd_interval(x, y, ut1, MAT_BLK_SZ); + if (bp) + { + long t2 = MulMod(bv[k], t1, p); + bv[i] = AddMod(bv[i], t2, p); + } + } + } + + + // finished processing current kpanel + // next, reduce and apply to all other kpanels + + for (long r = kk*MAT_BLK_SZ; r < n*MAT_BLK_SZ; r++) + kpanelp[r] = rem(kpanelp[r], p, red_struct); + + // special processing: subtract 1 off of diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = SubMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + + bool seq = double(npanels-(kpanel+1))*double(n)*double(MAT_BLK_SZ)*double(MAT_BLK_SZ) < PAR_THRESH; + NTL_GEXEC_RANGE(seq, npanels-(kpanel+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(red_struct) + NTL_IMPORT(kpanel) + NTL_IMPORT(kpanelp) + NTL_IMPORT(kk) + NTL_IMPORT(k_max) + + + UniqueArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + unsigned long *buf = &buf_store[0]; + + for (long index = first; index < last; index++) { + long jpanel = index + kpanel+1; + + unsigned long * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + if (cleanup) { + for (long r = kk*MAT_BLK_SZ; r < n*MAT_BLK_SZ; r++) + jpanelp[r] = rem(jpanelp[r], p, red_struct); + } + + // perform swaps + for (long k = kk; k < k_max; k++) { + long pos = P[k]; + if (pos != k) { + // swap rows pos and k + unsigned long * NTL_RESTRICT pos_p = &jpanelp[pos*MAT_BLK_SZ]; + unsigned long * NTL_RESTRICT k_p = &jpanelp[k*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], k_p[j]); + } + } + + // copy block number kpanel (the one on the diagonal) into buf + // here, we transpose it + + for (long k = kk; k < k_max; k++) + for (long j = 0; j < MAT_BLK_SZ; j++) + buf[j*MAT_BLK_SZ + (k-kk)] = + rem(jpanelp[k*MAT_BLK_SZ+j], p, red_struct); + + // jpanel += kpanel*buf + + muladd_all_by_32(kk, n, jpanelp, kpanelp, buf, k_max-kk); + } + + NTL_GEXEC_RANGE_END + + // special processing: add 1 back to the diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = AddMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + } + + if (bp) { + xp->SetLength(n); + zz_p *X = xp->elts(); + + for (long i = n-1; i >= 0; i--) { + long t1 = 0; + long start_panel = ((i+1)+MAT_BLK_SZ-1)/MAT_BLK_SZ; + for (long jj = MAT_BLK_SZ*start_panel, panel = start_panel; + jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + unsigned long *row = &M[panel][i*MAT_BLK_SZ]; + for (long j = jj; j < j_max; j++) { + long t0 = rem(row[j-jj], p, red_struct); + long t2 = MulMod(rep(X[j]), t0, p); + t1 = AddMod(t1, t2, p); + } + } + X[i].LoopHole() = SubMod(bv[i], t1, p); + } + } + + d.LoopHole() = det; + +} + + +static +void blk_tri_LL(zz_p& d, const mat_zz_p& A, const vec_zz_p *bp, + vec_zz_p *xp, bool trans, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("tri: nonsquare matrix"); + + if (bp && bp->length() != n) + LogicError("tri: dimension mismatch"); + + if (bp && !xp) + LogicError("tri: bad args"); + + if (n == 0) { + set(d); + if (xp) xp->SetLength(0); + return; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + + long npanels = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + Vec< UniqueArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + long *panelp = &M[panel][0]; + + for (long r = 0; r < n*MAT_BLK_SZ; r++) panelp[r] = 0; + } + + if (trans) { + // copy A transposed into panels + for (long i = 0; i < n; i++) { + const zz_p *row = &A[i][0]; + long *col = &M[i/MAT_BLK_SZ][i%MAT_BLK_SZ]; + for (long j = 0; j < n; j++) + col[j*MAT_BLK_SZ] = rep(row[j]); + } + } + else { + // copy A into panels + for (long jj = 0, panel = 0; jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + long *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + } + + Vec bv; + if (bp) conv(bv, *bp); + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + + long det; + det = 1; + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_ll_reduce_struct ll_red_struct = zz_p::ll_red_struct(); + + + bool pivoting = false; + + for (long kk = 0, kpanel = 0; kk < n; kk += MAT_BLK_SZ, kpanel++) { + long k_max = min(kk+MAT_BLK_SZ, n); + + long * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + for (long k = kk; k < k_max; k++) { + + long pos = -1; + long pivot; + long pivot_inv; + + for (long i = k; i < n; i++) { + // NOTE: by using InvModStatus, this code will work + // for prime-powers as well as primes + pivot = kpanelp[i*MAT_BLK_SZ+(k-kk)]; + if (pivot != 0 && !relaxed_InvModStatus(pivot_inv, pivot, p, relax)) { + pos = i; + break; + } + } + + if (pos == -1) { + clear(d); + return; + } + + long * NTL_RESTRICT y = &kpanelp[k*MAT_BLK_SZ]; + if (k != pos) { + // swap rows pos and k + long * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + + det = NegateMod(det, p); + P[k] = pos; + pivoting = true; + + if (bp) _ntl_swap(bv[pos], bv[k]); + } + + det = MulMod(det, pivot, p); + + { + // multiply row k by pivot_inv + long t1 = pivot_inv; + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + for (long j = 0; j < MAT_BLK_SZ; j++) { + y[j] = MulModPrecon(y[j], t1, p, t1pinv); + } + + y[k-kk] = pivot_inv; + + if (bp) bv[k] = MulModPrecon(bv[k], t1, p, t1pinv); + } + + for (long i = kk; i < n; i++) { + if (i == k) continue; // skip row k + + long * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + long t1 = x[k-kk]; + t1 = NegateMod(t1, p); + x[k-kk] = 0; + if (t1 == 0) continue; + + // add t1 * row k to row i + long ut1 = t1; + muladd_interval(x, y, ut1, MAT_BLK_SZ, p, pinv); + if (bp) + { + long t2 = MulMod(bv[k], t1, p); + bv[i] = AddMod(bv[i], t2, p); + } + } + } + + + // finished processing current kpanel + // next, reduce and apply to all other kpanels + + // special processing: subtract 1 off of diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = SubMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + + bool seq = double(npanels-(kpanel+1))*double(n)*double(MAT_BLK_SZ)*double(MAT_BLK_SZ) < PAR_THRESH; + NTL_GEXEC_RANGE(seq, npanels-(kpanel+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(ll_red_struct) + NTL_IMPORT(kpanel) + NTL_IMPORT(kpanelp) + NTL_IMPORT(kk) + NTL_IMPORT(k_max) + + + UniqueArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + long *buf = &buf_store[0]; + + for (long index = first; index < last; index++) { + long jpanel = index + kpanel+1; + + long * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + // perform swaps + for (long k = kk; k < k_max; k++) { + long pos = P[k]; + if (pos != k) { + // swap rows pos and k + long * NTL_RESTRICT pos_p = &jpanelp[pos*MAT_BLK_SZ]; + long * NTL_RESTRICT k_p = &jpanelp[k*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], k_p[j]); + } + } + + // copy block number kpanel (the one on the diagonal) into buf + // here, we transpose it + + for (long k = kk; k < k_max; k++) + for (long j = 0; j < MAT_BLK_SZ; j++) + buf[j*MAT_BLK_SZ + (k-kk)] = jpanelp[k*MAT_BLK_SZ+j]; + + // jpanel += kpanel*buf + + muladd_all_by_32(kk, n, jpanelp, kpanelp, buf, k_max-kk, p, ll_red_struct); + } + + NTL_GEXEC_RANGE_END + + // special processing: add 1 back to the diangonal + + for (long k = kk; k < k_max; k++) + kpanelp[k*MAT_BLK_SZ+(k-kk)] = AddMod((long)kpanelp[k*MAT_BLK_SZ+(k-kk)], 1, p); + + } + + if (bp) { + xp->SetLength(n); + zz_p *X = xp->elts(); + + for (long i = n-1; i >= 0; i--) { + long t1 = 0; + long start_panel = ((i+1)+MAT_BLK_SZ-1)/MAT_BLK_SZ; + for (long jj = MAT_BLK_SZ*start_panel, panel = start_panel; + jj < n; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, n); + long *row = &M[panel][i*MAT_BLK_SZ]; + for (long j = jj; j < j_max; j++) { + long t0 = row[j-jj]; + long t2 = MulMod(rep(X[j]), t0, p); + t1 = AddMod(t1, t2, p); + } + } + X[i].LoopHole() = SubMod(bv[i], t1, p); + } + } + + d.LoopHole() = det; + +} + + + +#endif + + + +static +void tri(zz_p& d, const mat_zz_p& A, const vec_zz_p *bp, + vec_zz_p *xp, bool trans, bool relax) +{ + long n = A.NumRows(); + + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (bp && bp->length() != n) + LogicError("tri: dimension mismatch"); + + if (bp && !xp) + LogicError("tri: bad args"); + +#ifndef NTL_HAVE_LL_TYPE + + basic_tri(d, A, bp, xp, trans, relax); + +#else + + long p = zz_p::modulus(); + + if (n < 16) { + //cerr << "basic_tri\n"; + basic_tri(d, A, bp, xp, trans, relax); + } + else if (n/MAT_BLK_SZ < 4) { + long V = 64; + +#ifdef NTL_HAVE_AVX + if (p-1 <= MAX_DBL_INT && + V <= (MAX_DBL_INT-(p-1))/(p-1) && + V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { + + //cerr << "alt_tri_DD\n"; + alt_tri_DD(d, A, bp, xp, trans, relax); + } + else +#endif + if (cast_unsigned(V) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1) && + cast_unsigned(V)*cast_unsigned(p-1) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1)) { + + //cerr << "alt_tri_L\n"; + alt_tri_L(d, A, bp, xp, trans, relax); + + } + else { + + //cerr << "basic_tri\n"; + basic_tri(d, A, bp, xp, trans, relax); + } + } + else { + long V = 4*MAT_BLK_SZ; + +#ifdef NTL_HAVE_AVX + if (p-1 <= MAX_DBL_INT && + V <= (MAX_DBL_INT-(p-1))/(p-1) && + V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { + + //cerr << "blk_tri_DD\n"; + blk_tri_DD(d, A, bp, xp, trans, relax); + } + else +#endif + if (cast_unsigned(V) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1) && + cast_unsigned(V)*cast_unsigned(p-1) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1)) { + + //cerr << "blk_tri_L\n"; + blk_tri_L(d, A, bp, xp, trans, relax); + + } + else { + + //cerr << "blk_tri_LL\n"; + blk_tri_LL(d, A, bp, xp, trans, relax); + } + + } + +#endif + + + +} + + + +void relaxed_determinant(zz_p& d, const mat_zz_p& A, bool relax) +{ + tri(d, A, 0, 0, false, relax); +} + + +void relaxed_solve(zz_p& d, vec_zz_p& x, + const mat_zz_p& A, const vec_zz_p& b, bool relax) +{ + tri(d, A, &b, &x, true, relax); +} + +void relaxed_solve(zz_p& d, const mat_zz_p& A, vec_zz_p& x, const vec_zz_p& b, bool relax) +{ + tri(d, A, &b, &x, false, relax); +} + +// ****************************************************************** +// +// new image and kernel routines +// +// ****************************************************************** + + +static +long elim_basic(const mat_zz_p& A, mat_zz_p *im, mat_zz_p *ker, + long w, bool full) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (w < 0 || w > m) LogicError("elim: bad args"); + + // take care of corner cases + if (n == 0) { + if (im) im->SetDims(0, m); + if (ker) ker->SetDims(0, 0); + return 0; + } + + if (w == 0) { + if (im) { + if (full) + (*im) = A; + else + im->SetDims(0, m); + } + if (ker) ident(*ker, n); + return 0; + } + + Mat M; + conv(M, A); + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + Vec pcol; + pcol.SetLength(n); + // pcol[i] records pivot columns for row i + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + bool pivoting = false; + + long r = 0; + + for (long k = 0; k < w; k++) { + long pos = -1; + long pivot_inv; + for (long i = r; i < n; i++) { + long pivot = M[i][k]; + if (pivot != 0) { + pivot_inv = InvMod(pivot, p); + pos = i; + break; + } + } + + if (pos == -1) + continue; + + if (r != pos) { + swap(M[pos], M[r]); + P[r] = pos; + pivoting = true; + } + + bool seq = double(n-r)*double(m-k) < PAR_THRESH; + + NTL_GEXEC_RANGE(seq, n-(r+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(k) + NTL_IMPORT(r) + long * NTL_RESTRICT y = &M[r][0]; + + for (long ii = first; ii < last; ii++) { + long i = ii + r+1; + + long * NTL_RESTRICT x = &M[i][0]; + long t1 = x[k]; + t1 = MulMod(t1, pivot_inv, p); + t1 = NegateMod(t1, p); + x[k] = t1; + if (t1 == 0) continue; + + // add t1 * row r to row i + mulmod_precon_t t1pinv = PrepMulModPrecon(t1, p, pinv); + + for (long j = k+1; j < m; j++) { + long t2 = MulModPrecon(y[j], t1, p, t1pinv); + x[j] = AddMod(x[j], t2, p); + } + } + NTL_GEXEC_RANGE_END + + pcol[r] = k; + r++; + } + + if (im) { + mat_zz_p& Im = *im;; + if (full) + Im.SetDims(n, m); + else + Im.SetDims(r, m); + + for (long i = 0; i < r; i++) { + long pc = pcol[i]; + for (long j = 0; j < pc; j++) Im[i][j].LoopHole() = 0; + for (long j = pc; j < m; j++) Im[i][j].LoopHole() = M[i][j]; + } + + if (full) { + for (long i = r; i < n; i++) { + for (long j = 0; j < w; j++) Im[i][j].LoopHole() = 0; + for (long j = w; j < m; j++) Im[i][j].LoopHole() = M[i][j]; + } + } + } + + if (ker) { + + if (n == r) { + mat_zz_p& Ker = *ker; + Ker.SetDims(n-r, n); + } + else { + Mat colbuf; + colbuf.SetDims(r, n); + + for (long k = 0; k < r; k++) { + long pc = pcol[k]; + for (long i = k+1; i < n; i++) colbuf[k][i] = M[i][pc]; + } + + M.kill(); + + Mat X; + X.SetDims(n-r, r); + + bool seq = double(n-r)*double(r)*double(r)/2 < PAR_THRESH; + NTL_GEXEC_RANGE(seq, n-r, first, last) + NTL_IMPORT(p) + NTL_IMPORT(r) + + for (long i = first; i < last; i++) { + long *Xi = &X[i][0]; + + for (long k = r-1; k >= 0; k--) { + long *cvecp = &colbuf[k][0]; + + long acc = cvecp[i+r]; + for (long j = k+1; j < r; j++) { + acc = AddMod( acc, MulMod(Xi[j], cvecp[j], p), p ); + } + Xi[k] = acc; + } + + } + + NTL_GEXEC_RANGE_END + + mat_zz_p& Ker = *ker; + Ker.SetDims(n-r, n); + for (long i = 0; i < n-r; i++) { + for (long j = 0; j < r; j++) Ker[i][j].LoopHole() = X[i][j]; + for (long j = r; j < n; j++) Ker[i][j].LoopHole() = 0; + Ker[i][r+i].LoopHole() = 1; + } + + if (pivoting) { + for (long i = 0; i < n-r; i++) { + zz_p *x = Ker[i].elts(); + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) swap(x[pos], x[k]); + } + } + } + } + } + + return r; +} + +#ifdef NTL_HAVE_LL_TYPE + + +#ifdef NTL_HAVE_AVX + + +static inline +void CopyBlock(double *dst_ptr, long dst_blk, const double *src_ptr, long src_blk, long src_limit) +{ + long src_row = src_blk*MAT_BLK_SZ; + long dst_row = dst_blk*MAT_BLK_SZ; + + long nrows = min(MAT_BLK_SZ, src_limit - src_row); + + for (long i = 0; i < nrows; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = src_ptr[(src_row + i)*MAT_BLK_SZ + j]; + + for (long i = nrows; i < MAT_BLK_SZ; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = 0; + +} + +static inline +void CopyBlock(double *dst_ptr, long dst_blk, const double *src_ptr, long src_blk) +{ + long src_row = src_blk*MAT_BLK_SZ; + long dst_row = dst_blk*MAT_BLK_SZ; + + long nrows = MAT_BLK_SZ; + + for (long i = 0; i < nrows; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = src_ptr[(src_row + i)*MAT_BLK_SZ + j]; +} + +static inline +void SwapOneRow(double *panelp, long i, long pos) +{ + double * NTL_RESTRICT pos_p = &panelp[pos*MAT_BLK_SZ]; + double * NTL_RESTRICT i_p = &panelp[i*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], i_p[j]); +} + +static inline +void ApplySwaps(double *panelp, long start, long end, const Vec& P) +{ + for (long i = start; i < end; i++) { + long pos = P[i]; + if (pos != i) + SwapOneRow(panelp, i, pos); + } +} + + +static inline +void MulAddBlock(double *x, const double *y, const double *z) +{ + // x += y*z + muladd_all_by_32(0, MAT_BLK_SZ, x, y, z, MAT_BLK_SZ); +} + + +static +long elim_blk_DD(const mat_zz_p& A, mat_zz_p *im, mat_zz_p *ker, + long w, bool full) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (w < 0 || w > m) LogicError("elim: bad args"); + + // take care of corner cases + if (n == 0) { + if (im) im->SetDims(0, m); + if (ker) ker->SetDims(0, 0); + return 0; + } + + if (w == 0) { + if (im) { + if (full) + (*im) = A; + else + im->SetDims(0, m); + } + if (ker) ident(*ker, n); + return 0; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + if (NTL_OVERFLOW(m, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + + long npanels = (m+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + + Vec< AlignedArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + double *panelp = &M[panel][0]; + + for (long h = 0; h < n*MAT_BLK_SZ; h++) panelp[h] = 0; + } + + // copy A into panels + for (long jj = 0, panel = 0; jj < m; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, m); + double *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + + AlignedArray aux_panel_store; + aux_panel_store.SetLength(n*MAT_BLK_SZ); + double * NTL_RESTRICT aux_panel = &aux_panel_store[0]; + + + AlignedArray buf_store1; + buf_store1.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + double *buf1 = &buf_store1[0]; + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + Vec pcol; + pcol.SetLength(n); + // pcol[i] records pivot columns for row i + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + bool pivoting = false; + + long red_trigger = (MAX_DBL_INT-(p-1))/((p-1)*(p-1)); + long red_count = red_trigger; + + long r = 0, rr = 0, k = 0, kk = 0; + long rpanel = 0, kpanel = 0; + + while (k < w) { + + if (r > rr && ker) { + // we have a panel from a previous iteration + // we store enough of it to facilitate the kernel + // computation later. At this point, we have + // r == rr+INV_BLK_SIZE, and it suffices to store + // rows [r..n) into M[rpanel], and this will not + // overwrite anything useful in M[rpanel] + + double *panelp = &M[rpanel][0]; + for (long h = r*MAT_BLK_SZ; h < n*MAT_BLK_SZ; h++) { + panelp[h] = aux_panel[h]; + } + + rpanel++; + } + + rr = r; + + for (long h = 0; h < n*MAT_BLK_SZ; h++) aux_panel[h] = 0; + + bool cleanup = false; + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-MAT_BLK_SZ; + + for (; r < rr+MAT_BLK_SZ && k < w; k++) { // panel incomplete + + if (k == kk+MAT_BLK_SZ) { // start new kpanel + kk = k; + kpanel++; + } + + double * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + if (k == kk) { // a fresh kpanel -- special processing + + if (cleanup) { + for (long h = 0; h < n*MAT_BLK_SZ; h++) + kpanelp[h] = rem((unsigned long)(long)kpanelp[h], p, red_struct); + } + + if (r > rr) { + + + // apply current sequence of permutations + + ApplySwaps(kpanelp, rr, r, P); + + // clean aux_panel + for (long h = 0; h < n*MAT_BLK_SZ; h++) + aux_panel[h] = rem((unsigned long)(long)aux_panel[h], p, red_struct); + + // copy rows [rr..r) of kpanel into buf1 + for (long i = 0; i < (r-rr)*MAT_BLK_SZ; i++) + buf1[i] = rem((unsigned long)(long)kpanelp[rr*MAT_BLK_SZ+i], p, red_struct); + + // kpanel[rr..n) += aux_panel[rr..n)*buf1 + + muladd_all_by_32(rr, n, kpanelp, aux_panel, buf1, r-rr); + } + } + + long pos = -1; + long pivot; + long pivot_inv; + for (long i = r; i < n; i++) { + pivot = rem((unsigned long)(long)kpanelp[i*MAT_BLK_SZ+(k-kk)], p, red_struct); + kpanelp[i*MAT_BLK_SZ+(k-kk)] = pivot; + + if (pivot != 0) { + pivot_inv = InvMod(pivot, p); + pos = i; + break; + } + } + + if (pos == -1) { + continue; + } + + double * NTL_RESTRICT y = &kpanelp[r*MAT_BLK_SZ]; + double * NTL_RESTRICT y1 = &aux_panel[r*MAT_BLK_SZ]; + if (r != pos) { + // swap rows pos and r + double * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + double * NTL_RESTRICT x1 = &aux_panel[pos*MAT_BLK_SZ]; + + for (long j = k-kk; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + for (long j = 0; j < r-rr; j++) _ntl_swap(x1[j], y1[j]); + + P[r] = pos; + pivoting = true; + } + + // clean up row r of kpanel and aux_panel + for (long j = k-kk; j < MAT_BLK_SZ; j++) + y[j] = rem((unsigned long)(long)y[j], p, red_struct); + for (long j = 0; j < r-rr; j++) + y1[j] = rem((unsigned long)(long)y1[j], p, red_struct); + + // clear column + for (long i = r+1; i < n; i++) { + double * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + double * NTL_RESTRICT x1 = &aux_panel[i*MAT_BLK_SZ]; + long t1 = rem((unsigned long)(long)x[k-kk], p, red_struct); + t1 = MulMod(t1, pivot_inv, p); + t1 = NegateMod(t1, p); + x[k-kk] = 0; + x1[r-rr] = t1; + if (t1 == 0) continue; + + // add t1 * row r to row i + double ut1 = t1; + + for (long j = k-kk+1; j < MAT_BLK_SZ; j++) + x[j] += y[j]*ut1; + for (long j = 0; j < r-rr; j++) + x1[j] += y1[j]*ut1; + } + + pcol[r] = k; + r++; + } + + if (r > rr) { + + // we have a panel + + // clean it up + for (long h = 0; h < n*MAT_BLK_SZ; h++) + aux_panel[h] = rem((unsigned long)(long)aux_panel[h], p, red_struct); + + bool seq = + double(npanels-(kpanel+1))*double(n-rr)*double(r-rr)*double(MAT_BLK_SZ) < PAR_THRESH; + + // apply aux_panel to remaining panels: [kpanel+1..npanels) + NTL_GEXEC_RANGE(seq, npanels-(kpanel+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(red_struct) + NTL_IMPORT(aux_panel) + NTL_IMPORT(rr) + NTL_IMPORT(r) + + + AlignedArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + double *buf = &buf_store[0]; + + + for (long index = first; index < last; index++) { + long jpanel = index + kpanel+1; + + double * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + if (cleanup) { + for (long h = 0; h < n*MAT_BLK_SZ; h++) + jpanelp[h] = rem((unsigned long)(long)jpanelp[h], p, red_struct); + } + + // perform swaps + ApplySwaps(jpanelp, rr, r, P); + + // copy rows [rr..r) of jpanel into buf + for (long i = 0; i < (r-rr)*MAT_BLK_SZ; i++) + buf[i] = rem((unsigned long)(long)jpanelp[rr*MAT_BLK_SZ+i], p, red_struct); + + // jpanel[rr..n) += aux_panel[rr..n)*buf + + muladd_all_by_32(rr, n, jpanelp, aux_panel, buf, r-rr); + } + + NTL_GEXEC_RANGE_END + + } + + } + + if (im) { + mat_zz_p& Im = *im;; + if (full) + Im.SetDims(n, m); + else + Im.SetDims(r, m); + + for (long i = 0; i < r; i++) { + long pc = pcol[i]; + for (long j = 0; j < pc; j++) Im[i][j].LoopHole() = 0; + for (long j = pc; j < m; j++) { + double t0 = M[j/MAT_BLK_SZ][i*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + Im[i][j].LoopHole() = rem((unsigned long)(long)t0, p, red_struct); + } + } + + if (full) { + for (long i = r; i < n; i++) { + for (long j = 0; j < w; j++) Im[i][j].LoopHole() = 0; + for (long j = w; j < m; j++) { + double t0 = M[j/MAT_BLK_SZ][i*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + Im[i][j].LoopHole() = rem((unsigned long)(long)t0, p, red_struct); + } + } + } + } + + if (ker) { + mat_zz_p& Ker = *ker; + Ker.SetDims(n-r, n); + if (r < n) { + + long start_block = r/MAT_BLK_SZ; + long end_block = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + long vblocks = end_block-start_block; + long hblocks = (r+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + Vec< AlignedArray > kerbuf; + kerbuf.SetLength(vblocks); + for (long i = 0; i < vblocks; i++) + kerbuf[i].SetLength(hblocks*MAT_BLK_SZ*MAT_BLK_SZ); + + long colblocks = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + // if r > rr, we have a panel sitting in + // aux_panel, which may or may not be a full panel + + double *initial_panel = 0; + if (r > rr) { + initial_panel = aux_panel; + } + else { + initial_panel = &M[hblocks-1][0]; + } + + for (long vb = start_block; vb < end_block; vb++) + CopyBlock(&kerbuf[vb-start_block][0], hblocks-1, initial_panel, vb, n); + + for (long hb = hblocks-2; hb >= 0; hb--) { + + ApplySwaps(&M[hb][0], (hb+1)*MAT_BLK_SZ, r, P); + + for (long b = hb+1; b < end_block; b++) + CopyBlock(&M[hb][0], b-1, &M[hb][0], b, n); + } + + bool seq = double(n-r)*double(r)*double(r)/2 < PAR_THRESH; + + + NTL_GEXEC_RANGE(seq, end_block-start_block, first, last) + NTL_IMPORT(p) + NTL_IMPORT(red_struct) + NTL_IMPORT(hblocks) + + for (long index = first; index < last; index++) { + long vb = index + start_block; + double *kerbufp = &kerbuf[vb-start_block][0]; + + for (long hb = hblocks-2; hb >= 0; hb--) { + double *colbuf = &M[hb][0]; + double *acc = &kerbufp[hb*MAT_BLK_SZ*MAT_BLK_SZ]; + + CopyBlock(acc, 0, colbuf, vb-1); + + long red_trigger = (MAX_DBL_INT-(p-1))/((p-1)*(p-1)); + long red_count = red_trigger; + + for (long b = hb+1; b < hblocks; b++) { + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + for (long h = 0; h < MAT_BLK_SZ*MAT_BLK_SZ; h++) + acc[h] = rem((unsigned long)(long)acc[h], p, red_struct); + + } + red_count = red_count-MAT_BLK_SZ; + + MulAddBlock(acc, &kerbufp[b*MAT_BLK_SZ*MAT_BLK_SZ], + &colbuf[(b-1)*MAT_BLK_SZ*MAT_BLK_SZ]); + } + + for (long h = 0; h < MAT_BLK_SZ*MAT_BLK_SZ; h++) + acc[h] = rem((unsigned long)(long)acc[h], p, red_struct); + } + } + + NTL_GEXEC_RANGE_END + + for (long i = r; i < n; i++) { + + double *kerbufp = &kerbuf[(i/MAT_BLK_SZ)-start_block][0]; + + for (long j = 0; j < r; j++) { + double t0 = + kerbufp[(j/MAT_BLK_SZ)*MAT_BLK_SZ*MAT_BLK_SZ+ + (i%MAT_BLK_SZ)*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + + Ker[i-r][j].LoopHole() = long(t0); + } + } + + for (long i = 0; i < n-r; i++) { + for (long j = 0; j < n-r; j++) { + Ker[i][j+r].LoopHole() = 0; + } + Ker[i][i+r].LoopHole() = 1; + } + + if (pivoting) { + for (long i = 0; i < n-r; i++) { + zz_p *x = Ker[i].elts(); + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) swap(x[pos], x[k]); + } + } + } + } + } + + return r; + +} + +#endif + + + +static inline +void CopyBlock(unsigned long *dst_ptr, long dst_blk, const unsigned long *src_ptr, long src_blk, long src_limit) +{ + long src_row = src_blk*MAT_BLK_SZ; + long dst_row = dst_blk*MAT_BLK_SZ; + + long nrows = min(MAT_BLK_SZ, src_limit - src_row); + + for (long i = 0; i < nrows; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = src_ptr[(src_row + i)*MAT_BLK_SZ + j]; + + for (long i = nrows; i < MAT_BLK_SZ; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = 0; + +} + +static inline +void CopyBlock(unsigned long *dst_ptr, long dst_blk, const unsigned long *src_ptr, long src_blk) +{ + long src_row = src_blk*MAT_BLK_SZ; + long dst_row = dst_blk*MAT_BLK_SZ; + + long nrows = MAT_BLK_SZ; + + for (long i = 0; i < nrows; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = src_ptr[(src_row + i)*MAT_BLK_SZ + j]; +} + +static inline +void TransposeBlock(unsigned long *dst_ptr, long dst_blk) +{ + dst_ptr += dst_blk*MAT_BLK_SZ*MAT_BLK_SZ; + + for (long i = 0; i < MAT_BLK_SZ; i++) + for (long j = 0; j < i; j++) + _ntl_swap(dst_ptr[i*MAT_BLK_SZ+j], dst_ptr[i+j*MAT_BLK_SZ]); +} + +static inline +void SwapOneRow(unsigned long *panelp, long i, long pos) +{ + unsigned long * NTL_RESTRICT pos_p = &panelp[pos*MAT_BLK_SZ]; + unsigned long * NTL_RESTRICT i_p = &panelp[i*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], i_p[j]); +} + +static inline +void ApplySwaps(unsigned long *panelp, long start, long end, const Vec& P) +{ + for (long i = start; i < end; i++) { + long pos = P[i]; + if (pos != i) + SwapOneRow(panelp, i, pos); + } +} + + +static inline +void MulAddBlock(unsigned long *x, const unsigned long *y, const unsigned long *z) +{ + // x += y*z + + muladd_all_by_32(0, MAT_BLK_SZ, x, y, z, MAT_BLK_SZ); +} + + +static +long elim_blk_L(const mat_zz_p& A, mat_zz_p *im, mat_zz_p *ker, + long w, bool full) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (w < 0 || w > m) LogicError("elim: bad args"); + + // take care of corner cases + if (n == 0) { + if (im) im->SetDims(0, m); + if (ker) ker->SetDims(0, 0); + return 0; + } + + if (w == 0) { + if (im) { + if (full) + (*im) = A; + else + im->SetDims(0, m); + } + if (ker) ident(*ker, n); + return 0; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + if (NTL_OVERFLOW(m, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + + long npanels = (m+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + + Vec< UniqueArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + unsigned long *panelp = &M[panel][0]; + + for (long h = 0; h < n*MAT_BLK_SZ; h++) panelp[h] = 0; + } + + // copy A into panels + for (long jj = 0, panel = 0; jj < m; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, m); + unsigned long *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + + UniqueArray aux_panel_store; + aux_panel_store.SetLength(n*MAT_BLK_SZ); + unsigned long * NTL_RESTRICT aux_panel = &aux_panel_store[0]; + + + UniqueArray buf_store1; + buf_store1.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + unsigned long *buf1 = &buf_store1[0]; + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + Vec pcol; + pcol.SetLength(n); + // pcol[i] records pivot columns for row i + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_reduce_struct red_struct = zz_p::red_struct(); + + bool pivoting = false; + + unsigned long ured_trigger = + (~(0UL)-cast_unsigned(p-1))/(cast_unsigned(p-1)*cast_unsigned(p-1)); + // NOTE: corner case at p == 2: need unsigned long to prevent overflow + + long red_trigger = min(cast_unsigned(NTL_MAX_LONG), ured_trigger); + + long red_count = red_trigger; + + long r = 0, rr = 0, k = 0, kk = 0; + long rpanel = 0, kpanel = 0; + + while (k < w) { + + if (r > rr && ker) { + // we have a panel from a previous iteration + // we store enough of it to facilitate the kernel + // computation later. At this point, we have + // r == rr+INV_BLK_SIZE, and it suffices to store + // rows [r..n) into M[rpanel], and this will not + // overwrite anything useful in M[rpanel] + + unsigned long *panelp = &M[rpanel][0]; + for (long h = r*MAT_BLK_SZ; h < n*MAT_BLK_SZ; h++) { + panelp[h] = aux_panel[h]; + } + + rpanel++; + } + + rr = r; + + for (long h = 0; h < n*MAT_BLK_SZ; h++) aux_panel[h] = 0; + + bool cleanup = false; + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + cleanup = true; + } + + red_count = red_count-MAT_BLK_SZ; + + for (; r < rr+MAT_BLK_SZ && k < w; k++) { // panel incomplete + + if (k == kk+MAT_BLK_SZ) { // start new kpanel + kk = k; + kpanel++; + } + + unsigned long * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + if (k == kk) { // a fresh kpanel -- special processing + + if (cleanup) { + for (long h = 0; h < n*MAT_BLK_SZ; h++) + kpanelp[h] = rem(kpanelp[h], p, red_struct); + } + + if (r > rr) { + + + // apply current sequence of permutations + + ApplySwaps(kpanelp, rr, r, P); + + // clean aux_panel + for (long h = 0; h < n*MAT_BLK_SZ; h++) + aux_panel[h] = rem(aux_panel[h], p, red_struct); + + // copy rows [rr..r) of kpanel into buf1 + for (long i = 0; i < (r-rr)*MAT_BLK_SZ; i++) + buf1[i] = rem(kpanelp[rr*MAT_BLK_SZ+i], p, red_struct); + + TransposeBlock(buf1, 0); + + // kpanel[rr..n) += aux_panel[rr..n)*buf1 + + muladd_all_by_32(rr, n, kpanelp, aux_panel, buf1, r-rr); + } + } + + long pos = -1; + long pivot; + long pivot_inv; + for (long i = r; i < n; i++) { + pivot = rem(kpanelp[i*MAT_BLK_SZ+(k-kk)], p, red_struct); + kpanelp[i*MAT_BLK_SZ+(k-kk)] = pivot; + + if (pivot != 0) { + pivot_inv = InvMod(pivot, p); + pos = i; + break; + } + } + + if (pos == -1) { + continue; + } + + unsigned long * NTL_RESTRICT y = &kpanelp[r*MAT_BLK_SZ]; + unsigned long * NTL_RESTRICT y1 = &aux_panel[r*MAT_BLK_SZ]; + if (r != pos) { + // swap rows pos and r + unsigned long * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + unsigned long * NTL_RESTRICT x1 = &aux_panel[pos*MAT_BLK_SZ]; + + for (long j = k-kk; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + for (long j = 0; j < r-rr; j++) _ntl_swap(x1[j], y1[j]); + + P[r] = pos; + pivoting = true; + } + + // clean up row r of kpanel and aux_panel + for (long j = k-kk; j < MAT_BLK_SZ; j++) + y[j] = rem(y[j], p, red_struct); + for (long j = 0; j < r-rr; j++) + y1[j] = rem(y1[j], p, red_struct); + + // clear column + for (long i = r+1; i < n; i++) { + unsigned long * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + unsigned long * NTL_RESTRICT x1 = &aux_panel[i*MAT_BLK_SZ]; + long t1 = rem(x[k-kk], p, red_struct); + t1 = MulMod(t1, pivot_inv, p); + t1 = NegateMod(t1, p); + x[k-kk] = 0; + x1[r-rr] = t1; + if (t1 == 0) continue; + + // add t1 * row r to row i + unsigned long ut1 = t1; + + for (long j = k-kk+1; j < MAT_BLK_SZ; j++) + x[j] += y[j]*ut1; + for (long j = 0; j < r-rr; j++) + x1[j] += y1[j]*ut1; + } + + pcol[r] = k; + r++; + } + + if (r > rr) { + + // we have a panel + + // clean it up + for (long h = 0; h < n*MAT_BLK_SZ; h++) + aux_panel[h] = rem(aux_panel[h], p, red_struct); + + bool seq = + double(npanels-(kpanel+1))*double(n-rr)*double(r-rr)*double(MAT_BLK_SZ) < PAR_THRESH; + + // apply aux_panel to remaining panels: [kpanel+1..npanels) + NTL_GEXEC_RANGE(seq, npanels-(kpanel+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(red_struct) + NTL_IMPORT(aux_panel) + NTL_IMPORT(rr) + NTL_IMPORT(r) + + + UniqueArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + unsigned long *buf = &buf_store[0]; + + + for (long index = first; index < last; index++) { + long jpanel = index + kpanel+1; + + unsigned long * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + if (cleanup) { + for (long h = 0; h < n*MAT_BLK_SZ; h++) + jpanelp[h] = rem(jpanelp[h], p, red_struct); + } + + // perform swaps + ApplySwaps(jpanelp, rr, r, P); + + // copy rows [rr..r) of jpanel into buf + for (long i = 0; i < (r-rr)*MAT_BLK_SZ; i++) + buf[i] = rem(jpanelp[rr*MAT_BLK_SZ+i], p, red_struct); + + TransposeBlock(buf, 0); + + // jpanel[rr..n) += aux_panel[rr..n)*buf + + muladd_all_by_32(rr, n, jpanelp, aux_panel, buf, r-rr); + } + + NTL_GEXEC_RANGE_END + + } + + } + + if (im) { + mat_zz_p& Im = *im;; + if (full) + Im.SetDims(n, m); + else + Im.SetDims(r, m); + + for (long i = 0; i < r; i++) { + long pc = pcol[i]; + for (long j = 0; j < pc; j++) Im[i][j].LoopHole() = 0; + for (long j = pc; j < m; j++) { + unsigned long t0 = M[j/MAT_BLK_SZ][i*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + Im[i][j].LoopHole() = rem(t0, p, red_struct); + } + } + + if (full) { + for (long i = r; i < n; i++) { + for (long j = 0; j < w; j++) Im[i][j].LoopHole() = 0; + for (long j = w; j < m; j++) { + unsigned long t0 = M[j/MAT_BLK_SZ][i*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + Im[i][j].LoopHole() = rem(t0, p, red_struct); + } + } + } + } + + if (ker) { + mat_zz_p& Ker = *ker; + Ker.SetDims(n-r, n); + if (r < n) { + + long start_block = r/MAT_BLK_SZ; + long end_block = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + long vblocks = end_block-start_block; + long hblocks = (r+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + Vec< UniqueArray > kerbuf; + kerbuf.SetLength(vblocks); + for (long i = 0; i < vblocks; i++) + kerbuf[i].SetLength(hblocks*MAT_BLK_SZ*MAT_BLK_SZ); + + long colblocks = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + // if r > rr, we have a panel sitting in + // aux_panel, which may or may not be a full panel + + unsigned long *initial_panel = 0; + if (r > rr) { + initial_panel = aux_panel; + } + else { + initial_panel = &M[hblocks-1][0]; + } + + for (long vb = start_block; vb < end_block; vb++) + CopyBlock(&kerbuf[vb-start_block][0], hblocks-1, initial_panel, vb, n); + + for (long hb = hblocks-2; hb >= 0; hb--) { + + ApplySwaps(&M[hb][0], (hb+1)*MAT_BLK_SZ, r, P); + + for (long b = hb+1; b < end_block; b++) { + CopyBlock(&M[hb][0], b-1, &M[hb][0], b, n); + TransposeBlock(&M[hb][0], b-1); + } + } + + bool seq = double(n-r)*double(r)*double(r)/2 < PAR_THRESH; + + + NTL_GEXEC_RANGE(seq, end_block-start_block, first, last) + NTL_IMPORT(p) + NTL_IMPORT(red_struct) + NTL_IMPORT(hblocks) + + for (long index = first; index < last; index++) { + long vb = index + start_block; + unsigned long *kerbufp = &kerbuf[vb-start_block][0]; + + for (long hb = hblocks-2; hb >= 0; hb--) { + unsigned long *colbuf = &M[hb][0]; + unsigned long *acc = &kerbufp[hb*MAT_BLK_SZ*MAT_BLK_SZ]; + + CopyBlock(acc, 0, colbuf, vb-1); + TransposeBlock(acc, 0); + + + unsigned long ured_trigger = + (~(0UL)-cast_unsigned(p-1))/(cast_unsigned(p-1)*cast_unsigned(p-1)); + // NOTE: corner case at p == 2: need unsigned long to prevent overflow + + long red_trigger = min(cast_unsigned(NTL_MAX_LONG), ured_trigger); + long red_count = red_trigger; + + for (long b = hb+1; b < hblocks; b++) { + + if (red_count-MAT_BLK_SZ < 0) { + red_count = red_trigger; + for (long h = 0; h < MAT_BLK_SZ*MAT_BLK_SZ; h++) + acc[h] = rem(acc[h], p, red_struct); + + } + red_count = red_count-MAT_BLK_SZ; + + MulAddBlock(acc, &kerbufp[b*MAT_BLK_SZ*MAT_BLK_SZ], + &colbuf[(b-1)*MAT_BLK_SZ*MAT_BLK_SZ]); + } + + for (long h = 0; h < MAT_BLK_SZ*MAT_BLK_SZ; h++) + acc[h] = rem(acc[h], p, red_struct); + } + } + + NTL_GEXEC_RANGE_END + + for (long i = r; i < n; i++) { + + unsigned long *kerbufp = &kerbuf[(i/MAT_BLK_SZ)-start_block][0]; + + for (long j = 0; j < r; j++) { + unsigned long t0 = + kerbufp[(j/MAT_BLK_SZ)*MAT_BLK_SZ*MAT_BLK_SZ+ + (i%MAT_BLK_SZ)*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + + Ker[i-r][j].LoopHole() = long(t0); + } + } + + for (long i = 0; i < n-r; i++) { + for (long j = 0; j < n-r; j++) { + Ker[i][j+r].LoopHole() = 0; + } + Ker[i][i+r].LoopHole() = 1; + } + + if (pivoting) { + for (long i = 0; i < n-r; i++) { + zz_p *x = Ker[i].elts(); + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) swap(x[pos], x[k]); + } + } + } + } + } + + return r; + +} + + +static inline +void CopyBlock(long *dst_ptr, long dst_blk, const long *src_ptr, long src_blk, long src_limit) +{ + long src_row = src_blk*MAT_BLK_SZ; + long dst_row = dst_blk*MAT_BLK_SZ; + + long nrows = min(MAT_BLK_SZ, src_limit - src_row); + + for (long i = 0; i < nrows; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = src_ptr[(src_row + i)*MAT_BLK_SZ + j]; + + for (long i = nrows; i < MAT_BLK_SZ; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = 0; + +} + +static inline +void CopyBlock(long *dst_ptr, long dst_blk, const long *src_ptr, long src_blk) +{ + long src_row = src_blk*MAT_BLK_SZ; + long dst_row = dst_blk*MAT_BLK_SZ; + + long nrows = MAT_BLK_SZ; + + for (long i = 0; i < nrows; i++) + for (long j = 0; j < MAT_BLK_SZ; j++) + dst_ptr[(dst_row + i)*MAT_BLK_SZ + j] = src_ptr[(src_row + i)*MAT_BLK_SZ + j]; +} + +static inline +void TransposeBlock(long *dst_ptr, long dst_blk) +{ + dst_ptr += dst_blk*MAT_BLK_SZ*MAT_BLK_SZ; + + for (long i = 0; i < MAT_BLK_SZ; i++) + for (long j = 0; j < i; j++) + _ntl_swap(dst_ptr[i*MAT_BLK_SZ+j], dst_ptr[i+j*MAT_BLK_SZ]); +} + +static inline +void SwapOneRow(long *panelp, long i, long pos) +{ + long * NTL_RESTRICT pos_p = &panelp[pos*MAT_BLK_SZ]; + long * NTL_RESTRICT i_p = &panelp[i*MAT_BLK_SZ]; + for (long j = 0; j < MAT_BLK_SZ; j++) + _ntl_swap(pos_p[j], i_p[j]); +} + +static inline +void ApplySwaps(long *panelp, long start, long end, const Vec& P) +{ + for (long i = start; i < end; i++) { + long pos = P[i]; + if (pos != i) + SwapOneRow(panelp, i, pos); + } +} + + +static inline +void MulAddBlock(long *x, const long *y, const long *z, + long p, sp_ll_reduce_struct ll_red_struct) +{ + // x += y*z + + muladd_all_by_32(0, MAT_BLK_SZ, x, y, z, MAT_BLK_SZ, p, ll_red_struct); +} + + + +static +long elim_blk_LL(const mat_zz_p& A, mat_zz_p *im, mat_zz_p *ker, + long w, bool full) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (w < 0 || w > m) LogicError("elim: bad args"); + + // take care of corner cases + if (n == 0) { + if (im) im->SetDims(0, m); + if (ker) ker->SetDims(0, 0); + return 0; + } + + if (w == 0) { + if (im) { + if (full) + (*im) = A; + else + im->SetDims(0, m); + } + if (ker) ident(*ker, n); + return 0; + } + + if (NTL_OVERFLOW(n, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + if (NTL_OVERFLOW(m, MAT_BLK_SZ, 0)) ResourceError("dimension too large"); + + long npanels = (m+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + + Vec< UniqueArray > M; + M.SetLength(npanels); + for (long panel = 0; panel < npanels; panel++) { + M[panel].SetLength(n*MAT_BLK_SZ); + long *panelp = &M[panel][0]; + + for (long h = 0; h < n*MAT_BLK_SZ; h++) panelp[h] = 0; + } + + // copy A into panels + for (long jj = 0, panel = 0; jj < m; jj += MAT_BLK_SZ, panel++) { + long j_max = min(jj+MAT_BLK_SZ, m); + long *panelp = &M[panel][0]; + + for (long i = 0; i < n; i++, panelp += MAT_BLK_SZ) { + const zz_p *ap = A[i].elts() + jj; + + for (long j = jj; j < j_max; j++) + panelp[j-jj] = rep(ap[j-jj]); + } + } + + UniqueArray aux_panel_store; + aux_panel_store.SetLength(n*MAT_BLK_SZ); + long * NTL_RESTRICT aux_panel = &aux_panel_store[0]; + + + UniqueArray buf_store1; + buf_store1.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + long *buf1 = &buf_store1[0]; + + Vec P; + P.SetLength(n); + for (long k = 0; k < n; k++) P[k] = k; + // records swap operations + + Vec pcol; + pcol.SetLength(n); + // pcol[i] records pivot columns for row i + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + sp_ll_reduce_struct ll_red_struct = zz_p::ll_red_struct(); + + bool pivoting = false; + + long r = 0, rr = 0, k = 0, kk = 0; + long rpanel = 0, kpanel = 0; + + while (k < w) { + + if (r > rr && ker) { + // we have a panel from a previous iteration + // we store enough of it to facilitate the kernel + // computation later. At this point, we have + // r == rr+INV_BLK_SIZE, and it suffices to store + // rows [r..n) into M[rpanel], and this will not + // overwrite anything useful in M[rpanel] + + long *panelp = &M[rpanel][0]; + for (long h = r*MAT_BLK_SZ; h < n*MAT_BLK_SZ; h++) { + panelp[h] = aux_panel[h]; + } + + rpanel++; + } + + rr = r; + + for (long h = 0; h < n*MAT_BLK_SZ; h++) aux_panel[h] = 0; + + for (; r < rr+MAT_BLK_SZ && k < w; k++) { // panel incomplete + + if (k == kk+MAT_BLK_SZ) { // start new kpanel + kk = k; + kpanel++; + } + + long * NTL_RESTRICT kpanelp = &M[kpanel][0]; + + if (k == kk) { // a fresh kpanel -- special processing + + + if (r > rr) { + + + // apply current sequence of permutations + + ApplySwaps(kpanelp, rr, r, P); + + // copy rows [rr..r) of kpanel into buf1 + for (long i = 0; i < (r-rr)*MAT_BLK_SZ; i++) + buf1[i] = kpanelp[rr*MAT_BLK_SZ+i]; + + TransposeBlock(buf1, 0); + + // kpanel[rr..n) += aux_panel[rr..n)*buf1 + + muladd_all_by_32(rr, n, kpanelp, aux_panel, buf1, r-rr, p, ll_red_struct); + } + } + + long pos = -1; + long pivot; + long pivot_inv; + for (long i = r; i < n; i++) { + pivot = kpanelp[i*MAT_BLK_SZ+(k-kk)]; + kpanelp[i*MAT_BLK_SZ+(k-kk)] = pivot; + + if (pivot != 0) { + pivot_inv = InvMod(pivot, p); + pos = i; + break; + } + } + + if (pos == -1) { + continue; + } + + long * NTL_RESTRICT y = &kpanelp[r*MAT_BLK_SZ]; + long * NTL_RESTRICT y1 = &aux_panel[r*MAT_BLK_SZ]; + if (r != pos) { + // swap rows pos and r + long * NTL_RESTRICT x = &kpanelp[pos*MAT_BLK_SZ]; + long * NTL_RESTRICT x1 = &aux_panel[pos*MAT_BLK_SZ]; + + for (long j = k-kk; j < MAT_BLK_SZ; j++) _ntl_swap(x[j], y[j]); + for (long j = 0; j < r-rr; j++) _ntl_swap(x1[j], y1[j]); + + P[r] = pos; + pivoting = true; + } + + // clear column + for (long i = r+1; i < n; i++) { + long * NTL_RESTRICT x = &kpanelp[i*MAT_BLK_SZ]; + long * NTL_RESTRICT x1 = &aux_panel[i*MAT_BLK_SZ]; + long t1 = x[k-kk]; + t1 = MulMod(t1, pivot_inv, p); + t1 = NegateMod(t1, p); + x[k-kk] = 0; + x1[r-rr] = t1; + if (t1 == 0) continue; + + // add t1 * row r to row i + long ut1 = t1; + mulmod_precon_t ut1_pinv = PrepMulModPrecon(ut1, p, pinv); + + for (long j = k-kk+1; j < MAT_BLK_SZ; j++) + x[j] = AddMod(x[j], MulModPrecon(y[j], ut1, p, ut1_pinv), p); + for (long j = 0; j < r-rr; j++) + x1[j] = AddMod(x1[j], MulModPrecon(y1[j], ut1, p, ut1_pinv), p); + } + + pcol[r] = k; + r++; + } + + if (r > rr) { + + // we have a panel + + bool seq = + double(npanels-(kpanel+1))*double(n-rr)*double(r-rr)*double(MAT_BLK_SZ) < PAR_THRESH; + + // apply aux_panel to remaining panels: [kpanel+1..npanels) + NTL_GEXEC_RANGE(seq, npanels-(kpanel+1), first, last) + NTL_IMPORT(p) + NTL_IMPORT(n) + NTL_IMPORT(ll_red_struct) + NTL_IMPORT(aux_panel) + NTL_IMPORT(rr) + NTL_IMPORT(r) + + + UniqueArray buf_store; + buf_store.SetLength(MAT_BLK_SZ*MAT_BLK_SZ); + long *buf = &buf_store[0]; + + + for (long index = first; index < last; index++) { + long jpanel = index + kpanel+1; + + long * NTL_RESTRICT jpanelp = &M[jpanel][0]; + + // perform swaps + ApplySwaps(jpanelp, rr, r, P); + + // copy rows [rr..r) of jpanel into buf + for (long i = 0; i < (r-rr)*MAT_BLK_SZ; i++) + buf[i] = jpanelp[rr*MAT_BLK_SZ+i]; + + TransposeBlock(buf, 0); + + // jpanel[rr..n) += aux_panel[rr..n)*buf + + muladd_all_by_32(rr, n, jpanelp, aux_panel, buf, r-rr, p, ll_red_struct); + } + + NTL_GEXEC_RANGE_END + + } + + } + + if (im) { + mat_zz_p& Im = *im;; + if (full) + Im.SetDims(n, m); + else + Im.SetDims(r, m); + + for (long i = 0; i < r; i++) { + long pc = pcol[i]; + for (long j = 0; j < pc; j++) Im[i][j].LoopHole() = 0; + for (long j = pc; j < m; j++) { + long t0 = M[j/MAT_BLK_SZ][i*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + Im[i][j].LoopHole() = t0; + } + } + + if (full) { + for (long i = r; i < n; i++) { + for (long j = 0; j < w; j++) Im[i][j].LoopHole() = 0; + for (long j = w; j < m; j++) { + long t0 = M[j/MAT_BLK_SZ][i*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + Im[i][j].LoopHole() = t0; + } + } + } + } + + if (ker) { + mat_zz_p& Ker = *ker; + Ker.SetDims(n-r, n); + if (r < n) { + + long start_block = r/MAT_BLK_SZ; + long end_block = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + long vblocks = end_block-start_block; + long hblocks = (r+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + Vec< UniqueArray > kerbuf; + kerbuf.SetLength(vblocks); + for (long i = 0; i < vblocks; i++) + kerbuf[i].SetLength(hblocks*MAT_BLK_SZ*MAT_BLK_SZ); + + long colblocks = (n+MAT_BLK_SZ-1)/MAT_BLK_SZ; + + // if r > rr, we have a panel sitting in + // aux_panel, which may or may not be a full panel + + long *initial_panel = 0; + if (r > rr) { + initial_panel = aux_panel; + } + else { + initial_panel = &M[hblocks-1][0]; + } + + for (long vb = start_block; vb < end_block; vb++) + CopyBlock(&kerbuf[vb-start_block][0], hblocks-1, initial_panel, vb, n); + + for (long hb = hblocks-2; hb >= 0; hb--) { + + ApplySwaps(&M[hb][0], (hb+1)*MAT_BLK_SZ, r, P); + + for (long b = hb+1; b < end_block; b++) { + CopyBlock(&M[hb][0], b-1, &M[hb][0], b, n); + TransposeBlock(&M[hb][0], b-1); + } + } + + bool seq = double(n-r)*double(r)*double(r)/2 < PAR_THRESH; + + + NTL_GEXEC_RANGE(seq, end_block-start_block, first, last) + NTL_IMPORT(p) + NTL_IMPORT(ll_red_struct) + NTL_IMPORT(hblocks) + + for (long index = first; index < last; index++) { + long vb = index + start_block; + long *kerbufp = &kerbuf[vb-start_block][0]; + + for (long hb = hblocks-2; hb >= 0; hb--) { + long *colbuf = &M[hb][0]; + long *acc = &kerbufp[hb*MAT_BLK_SZ*MAT_BLK_SZ]; + + CopyBlock(acc, 0, colbuf, vb-1); + TransposeBlock(acc, 0); + + for (long b = hb+1; b < hblocks; b++) { + MulAddBlock(acc, &kerbufp[b*MAT_BLK_SZ*MAT_BLK_SZ], + &colbuf[(b-1)*MAT_BLK_SZ*MAT_BLK_SZ], p, ll_red_struct); + } + } + } + + NTL_GEXEC_RANGE_END + + for (long i = r; i < n; i++) { + + long *kerbufp = &kerbuf[(i/MAT_BLK_SZ)-start_block][0]; + + for (long j = 0; j < r; j++) { + long t0 = + kerbufp[(j/MAT_BLK_SZ)*MAT_BLK_SZ*MAT_BLK_SZ+ + (i%MAT_BLK_SZ)*MAT_BLK_SZ+(j%MAT_BLK_SZ)]; + + Ker[i-r][j].LoopHole() = long(t0); + } + } + + for (long i = 0; i < n-r; i++) { + for (long j = 0; j < n-r; j++) { + Ker[i][j+r].LoopHole() = 0; + } + Ker[i][i+r].LoopHole() = 1; + } + + if (pivoting) { + for (long i = 0; i < n-r; i++) { + zz_p *x = Ker[i].elts(); + + for (long k = n-1; k >= 0; k--) { + long pos = P[k]; + if (pos != k) swap(x[pos], x[k]); + } + } + } + } + } + + return r; + +} + + +#endif + + + +static +long elim(const mat_zz_p& A, mat_zz_p *im, mat_zz_p *ker, long w, bool full) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (w < 0 || w > m) LogicError("elim: bad args"); + +#ifndef NTL_HAVE_LL_TYPE + + return elim_basic(A, im, ker, w, full); + +#else + + long p = zz_p::modulus(); + + if (n/MAT_BLK_SZ < 4 || w/MAT_BLK_SZ < 4) { + return elim_basic(A, im, ker, w, full); + } + else { + long V = 4*MAT_BLK_SZ; + +#ifdef NTL_HAVE_AVX + if (p-1 <= MAX_DBL_INT && + V <= (MAX_DBL_INT-(p-1))/(p-1) && + V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { + + return elim_blk_DD(A, im, ker, w, full); + } + else +#endif + if (cast_unsigned(V) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1) && + cast_unsigned(V)*cast_unsigned(p-1) <= (~(0UL)-cast_unsigned(p-1))/cast_unsigned(p-1)) { + + return elim_blk_L(A, im, ker, w, full); + + } + else { + + return elim_blk_LL(A, im, ker, w, full); + } + + } + +#endif + + + +} + + +// ****************************************************************** +// +// High level interfaces +// +// ****************************************************************** + + + +long gauss(mat_zz_p& M, long w) +{ + return elim(M, &M, 0, w, true); +} + + +long gauss(mat_zz_p& M) +{ + return gauss(M, M.NumCols()); +} + +void image(mat_zz_p& X, const mat_zz_p& A) +{ + elim(A, &X, 0, A.NumCols(), false); +} + +void kernel(mat_zz_p& X, const mat_zz_p& A) +{ + elim(A, 0, &X, A.NumCols(), false); +} + + +// ****************************************************************** +// +// Operator/functional notation +// +// ****************************************************************** + + + + +mat_zz_p operator+(const mat_zz_p& a, const mat_zz_p& b) +{ + mat_zz_p res; + add(res, a, b); + NTL_OPT_RETURN(mat_zz_p, res); +} + +mat_zz_p operator*(const mat_zz_p& a, const mat_zz_p& b) +{ + mat_zz_p res; + mul_aux(res, a, b); + NTL_OPT_RETURN(mat_zz_p, res); +} + +mat_zz_p operator-(const mat_zz_p& a, const mat_zz_p& b) +{ + mat_zz_p res; + sub(res, a, b); + NTL_OPT_RETURN(mat_zz_p, res); +} + + +mat_zz_p operator-(const mat_zz_p& a) +{ + mat_zz_p res; + negate(res, a); + NTL_OPT_RETURN(mat_zz_p, res); +} + + +vec_zz_p operator*(const mat_zz_p& a, const vec_zz_p& b) +{ + vec_zz_p res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_zz_p, res); +} + +vec_zz_p operator*(const vec_zz_p& a, const mat_zz_p& b) +{ + vec_zz_p res; + mul(res, a, b); + NTL_OPT_RETURN(vec_zz_p, res); +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_lzz_pE.c b/thirdparty/linux/ntl/src/mat_lzz_pE.c new file mode 100644 index 0000000000..35f7421d2f --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_lzz_pE.c @@ -0,0 +1,884 @@ + +#include + +#include + +NTL_START_IMPL + + +void add(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix add: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + add(X(i,j), A(i,j), B(i,j)); +} + +void sub(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + if (B.NumRows() != n || B.NumCols() != m) + LogicError("matrix sub: dimension mismatch"); + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + sub(X(i,j), A(i,j), B(i,j)); +} + +void negate(mat_zz_pE& X, const mat_zz_pE& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + + X.SetDims(n, m); + + long i, j; + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + negate(X(i,j), A(i,j)); +} + +void mul_aux(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B) +{ + long n = A.NumRows(); + long l = A.NumCols(); + long m = B.NumCols(); + + if (l != B.NumRows()) + LogicError("matrix mul: dimension mismatch"); + + X.SetDims(n, m); + + long i, j, k; + zz_pX acc, tmp; + + for (i = 1; i <= n; i++) { + for (j = 1; j <= m; j++) { + clear(acc); + for(k = 1; k <= l; k++) { + mul(tmp, rep(A(i,k)), rep(B(k,j))); + add(acc, acc, tmp); + } + conv(X(i,j), acc); + } + } +} + + +void mul(mat_zz_pE& X, const mat_zz_pE& A, const mat_zz_pE& B) +{ + if (&X == &A || &X == &B) { + mat_zz_pE tmp; + mul_aux(tmp, A, B); + X = tmp; + } + else + mul_aux(X, A, B); +} + + +static +void mul_aux(vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b) +{ + long n = A.NumRows(); + long l = A.NumCols(); + + if (l != b.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(n); + + long i, k; + zz_pX acc, tmp; + + for (i = 1; i <= n; i++) { + clear(acc); + for (k = 1; k <= l; k++) { + mul(tmp, rep(A(i,k)), rep(b(k))); + add(acc, acc, tmp); + } + conv(x(i), acc); + } +} + + +void mul(vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b) +{ + if (&b == &x || A.position1(x) != -1) { + vec_zz_pE tmp; + mul_aux(tmp, A, b); + x = tmp; + } + else + mul_aux(x, A, b); +} + +static +void mul_aux(vec_zz_pE& x, const vec_zz_pE& a, const mat_zz_pE& B) +{ + long n = B.NumRows(); + long l = B.NumCols(); + + if (n != a.length()) + LogicError("matrix mul: dimension mismatch"); + + x.SetLength(l); + + long i, k; + zz_pX acc, tmp; + + for (i = 1; i <= l; i++) { + clear(acc); + for (k = 1; k <= n; k++) { + mul(tmp, rep(a(k)), rep(B(k,i))); + add(acc, acc, tmp); + } + conv(x(i), acc); + } +} + +void mul(vec_zz_pE& x, const vec_zz_pE& a, const mat_zz_pE& B) +{ + if (&a == &x) { + vec_zz_pE tmp; + mul_aux(tmp, a, B); + x = tmp; + } + else + mul_aux(x, a, B); + +} + + + +void ident(mat_zz_pE& X, long n) +{ + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + set(X(i, j)); + else + clear(X(i, j)); +} + + +void determinant(zz_pE& d, const mat_zz_pE& M_in) +{ + long k, n; + long i, j; + long pos; + zz_pX t1, t2; + zz_pX *x, *y; + + const zz_pXModulus& p = zz_pE::modulus(); + + n = M_in.NumRows(); + + if (M_in.NumCols() != n) + LogicError("determinant: nonsquare matrix"); + + if (n == 0) { + set(d); + return; + } + + + UniqueArray M_store; + M_store.SetLength(n); + vec_zz_pX *M = M_store.get(); + + for (i = 0; i < n; i++) { + M[i].SetLength(n); + for (j = 0; j < n; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(M_in[i][j]); + } + } + + zz_pX det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) + pos = i; + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + negate(t1, t1); + for (j = k+1; j < n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + conv(d, det); +} + +long IsIdent(const mat_zz_pE& A, long n) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (!IsOne(A(i, j))) return 0; + } + + return 1; +} + + +void transpose(mat_zz_pE& X, const mat_zz_pE& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + + long i, j; + + if (&X == & A) { + if (n == m) + for (i = 1; i <= n; i++) + for (j = i+1; j <= n; j++) + swap(X(i, j), X(j, i)); + else { + mat_zz_pE tmp; + tmp.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + tmp(j, i) = A(i, j); + X.kill(); + X = tmp; + } + } + else { + X.SetDims(m, n); + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + X(j, i) = A(i, j); + } +} + + +static +void solve_impl(zz_pE& d, vec_zz_pE& X, const mat_zz_pE& A, const vec_zz_pE& b, bool trans) + +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("solve: nonsquare matrix"); + + if (b.length() != n) + LogicError("solve: dimension mismatch"); + + if (n == 0) { + set(d); + X.SetLength(0); + return; + } + + long i, j, k, pos; + zz_pX t1, t2; + zz_pX *x, *y; + + const zz_pXModulus& p = zz_pE::modulus(); + + + UniqueArray M_store; + M_store.SetLength(n); + vec_zz_pX *M = M_store.get(); + + for (i = 0; i < n; i++) { + M[i].SetLength(n+1); + if (trans) { + for (j = 0; j < n; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(A[j][i]); + } + } + else { + for (j = 0; j < n; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(A[i][j]); + } + } + M[i][n].rep.SetMaxLength(2*deg(p)-1); + M[i][n] = rep(b[i]); + } + + zz_pX det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + negate(t1, t1); + for (j = k+1; j <= n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j <= n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetLength(n); + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, rep(X[j]), M[i][j]); + add(t1, t1, t2); + } + sub(t1, t1, M[i][n]); + conv(X[i], t1); + } + + conv(d, det); +} + +void solve(zz_pE& d, vec_zz_pE& x, const mat_zz_pE& A, const vec_zz_pE& b) +{ + solve_impl(d, x, A, b, true); +} + +void solve(zz_pE& d, const mat_zz_pE& A, vec_zz_pE& x, const vec_zz_pE& b) +{ + solve_impl(d, x, A, b, false); +} + +void inv(zz_pE& d, mat_zz_pE& X, const mat_zz_pE& A) +{ + long n = A.NumRows(); + if (A.NumCols() != n) + LogicError("inv: nonsquare matrix"); + + if (n == 0) { + set(d); + X.SetDims(0, 0); + return; + } + + long i, j, k, pos; + zz_pX t1, t2; + zz_pX *x, *y; + + const zz_pXModulus& p = zz_pE::modulus(); + + + UniqueArray M_store; + M_store.SetLength(n); + vec_zz_pX *M = M_store.get(); + + for (i = 0; i < n; i++) { + M[i].SetLength(2*n); + for (j = 0; j < n; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(A[i][j]); + M[i][n+j].rep.SetMaxLength(2*deg(p)-1); + clear(M[i][n+j]); + } + set(M[i][n+i]); + } + + zz_pX det; + set(det); + + for (k = 0; k < n; k++) { + pos = -1; + for (i = k; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + if (k != pos) { + swap(M[pos], M[k]); + negate(det, det); + } + + MulMod(det, det, M[k][k], p); + + // make M[k, k] == -1 mod p, and make row k reduced + + InvMod(t1, M[k][k], p); + negate(t1, t1); + for (j = k+1; j < 2*n; j++) { + rem(t2, M[k][j], p); + MulMod(M[k][j], t2, t1, p); + } + + for (i = k+1; i < n; i++) { + // M[i] = M[i] + M[k]*M[i,k] + + t1 = M[i][k]; // this is already reduced + + x = M[i].elts() + (k+1); + y = M[k].elts() + (k+1); + + for (j = k+1; j < 2*n; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(*x, *x, t2); + } + } + } + else { + clear(d); + return; + } + } + + X.SetDims(n, n); + for (k = 0; k < n; k++) { + for (i = n-1; i >= 0; i--) { + clear(t1); + for (j = i+1; j < n; j++) { + mul(t2, rep(X[j][k]), M[i][j]); + add(t1, t1, t2); + } + sub(t1, t1, M[i][n+k]); + conv(X[i][k], t1); + } + } + + conv(d, det); +} + + + +long gauss(mat_zz_pE& M_in, long w) +{ + long k, l; + long i, j; + long pos; + zz_pX t1, t2, t3; + zz_pX *x, *y; + + long n = M_in.NumRows(); + long m = M_in.NumCols(); + + if (w < 0 || w > m) + LogicError("gauss: bad args"); + + const zz_pXModulus& p = zz_pE::modulus(); + + + UniqueArray M_store; + M_store.SetLength(n); + vec_zz_pX *M = M_store.get(); + + for (i = 0; i < n; i++) { + M[i].SetLength(m); + for (j = 0; j < m; j++) { + M[i][j].rep.SetMaxLength(2*deg(p)-1); + M[i][j] = rep(M_in[i][j]); + } + } + + l = 0; + for (k = 0; k < w && l < n; k++) { + + pos = -1; + for (i = l; i < n; i++) { + rem(t1, M[i][k], p); + M[i][k] = t1; + if (pos == -1 && !IsZero(t1)) { + pos = i; + } + } + + if (pos != -1) { + swap(M[pos], M[l]); + + InvMod(t3, M[l][k], p); + negate(t3, t3); + + for (j = k+1; j < m; j++) { + rem(M[l][j], M[l][j], p); + } + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k]*t3 + + MulMod(t1, M[i][k], t3, p); + + clear(M[i][k]); + + x = M[i].elts() + (k+1); + y = M[l].elts() + (k+1); + + for (j = k+1; j < m; j++, x++, y++) { + // *x = *x + (*y)*t1 + + mul(t2, *y, t1); + add(t2, t2, *x); + *x = t2; + } + } + + l++; + } + } + + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + conv(M_in[i][j], M[i][j]); + + return l; +} + +long gauss(mat_zz_pE& M) +{ + return gauss(M, M.NumCols()); +} + +void image(mat_zz_pE& X, const mat_zz_pE& A) +{ + mat_zz_pE M; + M = A; + long r = gauss(M); + M.SetDims(r, M.NumCols()); + X = M; +} + +void kernel(mat_zz_pE& X, const mat_zz_pE& A) +{ + long m = A.NumRows(); + long n = A.NumCols(); + + mat_zz_pE M; + long r; + + transpose(M, A); + r = gauss(M); + + X.SetDims(m-r, m); + + long i, j, k, s; + zz_pX t1, t2; + + zz_pE T3; + + vec_long D; + D.SetLength(m); + for (j = 0; j < m; j++) D[j] = -1; + + vec_zz_pE inverses; + inverses.SetLength(m); + + j = -1; + for (i = 0; i < r; i++) { + do { + j++; + } while (IsZero(M[i][j])); + + D[j] = i; + inv(inverses[j], M[i][j]); + } + + for (k = 0; k < m-r; k++) { + vec_zz_pE& v = X[k]; + long pos = 0; + for (j = m-1; j >= 0; j--) { + if (D[j] == -1) { + if (pos == k) + set(v[j]); + else + clear(v[j]); + pos++; + } + else { + i = D[j]; + + clear(t1); + + for (s = j+1; s < m; s++) { + mul(t2, rep(v[s]), rep(M[i][s])); + add(t1, t1, t2); + } + + conv(T3, t1); + mul(T3, T3, inverses[j]); + negate(v[j], T3); + } + } + } +} + +void mul(mat_zz_pE& X, const mat_zz_pE& A, const zz_pE& b_in) +{ + zz_pE b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void mul(mat_zz_pE& X, const mat_zz_pE& A, const zz_p& b_in) +{ + NTL_zz_pRegister(b); + b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void mul(mat_zz_pE& X, const mat_zz_pE& A, long b_in) +{ + NTL_zz_pRegister(b); + b = b_in; + long n = A.NumRows(); + long m = A.NumCols(); + + X.SetDims(n, m); + + long i, j; + for (i = 0; i < n; i++) + for (j = 0; j < m; j++) + mul(X[i][j], A[i][j], b); +} + +void diag(mat_zz_pE& X, long n, const zz_pE& d_in) +{ + zz_pE d = d_in; + X.SetDims(n, n); + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i == j) + X(i, j) = d; + else + clear(X(i, j)); +} + +long IsDiag(const mat_zz_pE& A, long n, const zz_pE& d) +{ + if (A.NumRows() != n || A.NumCols() != n) + return 0; + + long i, j; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + if (i != j) { + if (!IsZero(A(i, j))) return 0; + } + else { + if (A(i, j) != d) return 0; + } + + return 1; +} + + +long IsZero(const mat_zz_pE& a) +{ + long n = a.NumRows(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +void clear(mat_zz_pE& x) +{ + long n = x.NumRows(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + +mat_zz_pE operator+(const mat_zz_pE& a, const mat_zz_pE& b) +{ + mat_zz_pE res; + add(res, a, b); + NTL_OPT_RETURN(mat_zz_pE, res); +} + +mat_zz_pE operator*(const mat_zz_pE& a, const mat_zz_pE& b) +{ + mat_zz_pE res; + mul_aux(res, a, b); + NTL_OPT_RETURN(mat_zz_pE, res); +} + +mat_zz_pE operator-(const mat_zz_pE& a, const mat_zz_pE& b) +{ + mat_zz_pE res; + sub(res, a, b); + NTL_OPT_RETURN(mat_zz_pE, res); +} + + +mat_zz_pE operator-(const mat_zz_pE& a) +{ + mat_zz_pE res; + negate(res, a); + NTL_OPT_RETURN(mat_zz_pE, res); +} + + +vec_zz_pE operator*(const mat_zz_pE& a, const vec_zz_pE& b) +{ + vec_zz_pE res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_zz_pE, res); +} + +vec_zz_pE operator*(const vec_zz_pE& a, const mat_zz_pE& b) +{ + vec_zz_pE res; + mul_aux(res, a, b); + NTL_OPT_RETURN(vec_zz_pE, res); +} + +void inv(mat_zz_pE& X, const mat_zz_pE& A) +{ + zz_pE d; + inv(d, X, A); + if (d == 0) ArithmeticError("inv: non-invertible matrix"); +} + +void power(mat_zz_pE& X, const mat_zz_pE& A, const ZZ& e) +{ + if (A.NumRows() != A.NumCols()) LogicError("power: non-square matrix"); + + if (e == 0) { + ident(X, A.NumRows()); + return; + } + + mat_zz_pE T1, T2; + long i, k; + + k = NumBits(e); + T1 = A; + + for (i = k-2; i >= 0; i--) { + sqr(T2, T1); + if (bit(e, i)) + mul(T1, T2, A); + else + T1 = T2; + } + + if (e < 0) + inv(X, T1); + else + X = T1; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_lzz_pTest.c b/thirdparty/linux/ntl/src/mat_lzz_pTest.c new file mode 100644 index 0000000000..8c79ca8c78 --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_lzz_pTest.c @@ -0,0 +1,269 @@ + +#include + +NTL_CLIENT + + + +void FillRandom(Mat& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + for (long i = 0; i < n; i++) + for (long j = 0; j < m; j++) + random(A[i][j]); +} + +void FillRandom1(Mat& A) +{ + long n = A.NumRows(); + long m = A.NumCols(); + for (long j = 0; j < m; j++) { + if (j > 0 && RandomBnd(2)) { + for (long i = 0; i < n; i++) + A[i][j] = A[i][j-1]; + } + else { + for (long i = 0; i < n; i++) + random(A[i][j]); + } + } +} + +void FillRandom(Vec& A) +{ + long n = A.length(); + for (long i = 0; i < n; i++) + random(A[i]); +} + +long old_gauss(mat_zz_p& M, long w) +{ + using NTL_NAMESPACE::negate; + long k, l; + long i, j; + long pos; + zz_p t1, t2, t3; + zz_p *x, *y; + + long n = M.NumRows(); + long m = M.NumCols(); + + if (w < 0 || w > m) + LogicError("gauss: bad args"); + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + long T1, T2; + + l = 0; + for (k = 0; k < w && l < n; k++) { + + pos = -1; + for (i = l; i < n; i++) { + if (!IsZero(M[i][k])) { + pos = i; + break; + } + } + + if (pos != -1) { + swap(M[pos], M[l]); + + inv(t3, M[l][k]); + negate(t3, t3); + + for (i = l+1; i < n; i++) { + // M[i] = M[i] + M[l]*M[i,k]*t3 + + mul(t1, M[i][k], t3); + + T1 = rep(t1); + mulmod_precon_t T1pinv = PrepMulModPrecon(T1, p, pinv); + + clear(M[i][k]); + + x = M[i].elts() + (k+1); + y = M[l].elts() + (k+1); + + for (j = k+1; j < m; j++, x++, y++) { + // *x = *x + (*y)*t1 + + T2 = MulModPrecon(rep(*y), T1, p, T1pinv); + T2 = AddMod(T2, rep(*x), p); + (*x).LoopHole() = T2; + } + } + + l++; + } + } + + return l; +} + +long old_gauss(mat_zz_p& M) +{ + return old_gauss(M, M.NumCols()); +} + +void old_image(mat_zz_p& X, const mat_zz_p& A) +{ + mat_zz_p M; + M = A; + long r = old_gauss(M); + M.SetDims(r, M.NumCols()); + X = M; +} + +int main(int argc, char **argv) +{ + long iters = 100; + + +#if 1 + cerr << "testing multiplication"; + for (long cnt = 0; cnt < iters; cnt++) { + cerr << "."; + + long bnd = (cnt%2) ? 25 : 2000; + + long len = RandomBnd(NTL_SP_NBITS-3)+4; + long n = RandomBnd(bnd); + long l = RandomBnd(bnd); + long m = RandomBnd(bnd); + + long p = RandomPrime_long(len); + zz_p::init(p); + + Mat A, B, X; + + A.SetDims(n, l); + B.SetDims(l, m); + + FillRandom(A); + FillRandom(B); + + X.SetDims(n, m); + + vec_zz_p R; + + R.SetLength(m); + for (long i = 0; i < m; i++) random(R[i]); + + mul(X, A, B); + + if (X*R != A*(B*R)) + cerr << "*\n*\n*\n*\n*\n*********** oops " << len << " " << n << " " << l << " " + << m << "\n"; + } +#endif + +#if 1 + cerr << "\ntesting inversion"; + for (long cnt = 0; cnt < iters; cnt++) { + cerr << "."; + long bnd = (cnt%2) ? 25 : 1500; + + long len = RandomBnd(NTL_SP_NBITS-3)+4; + long n = RandomBnd(bnd); + + long p = RandomPrime_long(len); + zz_p::init(p); + + Mat A, X; + + A.SetDims(n, n); + + FillRandom(A); + + + vec_zz_p R; + + R.SetLength(n); + for (long i = 0; i < n; i++) random(R[i]); + + zz_p d; + + inv(d, X, A); + + if (d != 0) { + if (R != A*(X*R)) + cerr << "\n*\n*\n*\n*\n*********** oops " << len << " " << n << "\n"; + } + else { + cerr << "[singular]"; + } + } +#endif + +#if 1 + cerr << "\ntesting solve"; + for (long cnt = 0; cnt < iters; cnt++) { + cerr << "."; + long bnd = (cnt%2) ? 25 : 2000; + + long len = RandomBnd(NTL_SP_NBITS-3)+4; + long n = RandomBnd(bnd); + + long p = RandomPrime_long(len); + zz_p::init(p); + + Mat A; + + A.SetDims(n, n); + FillRandom(A); + + Vec x, b; + b.SetLength(n); + FillRandom(b); + + zz_p d; + + solve(d, A, x, b); + + if (d != 0) { + if (A*x != b) + cerr << "\n*\n*\n*\n*\n*********** oops " << len << " " << n << "\n"; + } + else { + cerr << "[singular]"; + } + } +#endif + +#if 1 + cerr << "\ntesting image and kernel"; + for (long cnt = 0; cnt < iters; cnt++) { + cerr << "."; + long bnd = (cnt%2) ? 25 : 1500; + + long len = RandomBnd(NTL_SP_NBITS-3)+4; + long n = RandomBnd(bnd); + long m = RandomBnd(bnd); + + long p = RandomPrime_long(len); + zz_p::init(p); + + Mat A; + + A.SetDims(n, m); + FillRandom1(A); + + Mat im, im1, ker1; + + old_image(im, A); + image(im1, A); + kernel(ker1, A); + + + if (im != im1 || !IsZero(ker1*A) || im1.NumRows() + ker1.NumRows() != n) { + cerr << "\n*\n*\n*\n*\n*********** oops " << len << " " << n << m << "\n"; + } + } +#endif + + cerr << "\n"; + +} + diff --git a/thirdparty/linux/ntl/src/mat_poly_ZZ.c b/thirdparty/linux/ntl/src/mat_poly_ZZ.c new file mode 100644 index 0000000000..351fc131a0 --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_poly_ZZ.c @@ -0,0 +1,116 @@ +#include +#include +#include + +#include + +NTL_START_IMPL + +static +long CharPolyBound(const mat_ZZ& a) +// This bound is computed via interpolation +// through complex roots of unity. + +{ + long n = a.NumRows(); + long i; + ZZ res, t1, t2; + + set(res); + + for (i = 0; i < n; i++) { + InnerProduct(t1, a[i], a[i]); + abs(t2, a[i][i]); + mul(t2, t2, 2); + add(t2, t2, 1); + add(t1, t1, t2); + if (t1 > 1) { + SqrRoot(t1, t1); + add(t1, t1, 1); + } + mul(res, res, t1); + } + + return NumBits(res); +} + +void CharPoly(ZZX& gg, const mat_ZZ& a, long deterministic) +{ + long n = a.NumRows(); + if (a.NumCols() != n) + LogicError("CharPoly: nonsquare matrix"); + + if (n == 0) { + set(gg); + return; + } + + + if (n == 1) { + ZZ t; + SetX(gg); + negate(t, a(1, 1)); + SetCoeff(gg, 0, t); + return; + } + + long bound = 2 + CharPolyBound(a); + + zz_pBak bak; + bak.save(); + + ZZ_pBak bak1; + bak1.save(); + + ZZX g; + ZZ prod; + + clear(g); + set(prod); + + long i; + + long instable = 1; + + long gp_cnt = 0; + + for (i = 0; ; i++) { + if (NumBits(prod) > bound) + break; + + if (!deterministic && + !instable && bound > 1000 && NumBits(prod) < 0.25*bound) { + long plen = 90 + NumBits(max(bound, MaxBits(g))); + + ZZ P; + + GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); + + ZZ_p::init(P); + mat_ZZ_p A; + ZZ_pX G; + conv(A, a); + CharPoly(G, A); + + if (CRT(g, prod, G)) + instable = 1; + else + break; + } + + zz_p::FFTInit(i); + + mat_zz_p A; + zz_pX G; + conv(A, a); + CharPoly(G, A); + instable = CRT(g, prod, G); + } + + gg = g; + + bak.restore(); + bak1.restore(); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_poly_ZZ_p.c b/thirdparty/linux/ntl/src/mat_poly_ZZ_p.c new file mode 100644 index 0000000000..160e4cd3dd --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_poly_ZZ_p.c @@ -0,0 +1,88 @@ +#include + +#include + +NTL_START_IMPL + + +void CharPoly(ZZ_pX& f, const mat_ZZ_p& M) +{ + long n = M.NumRows(); + if (M.NumCols() != n) + LogicError("CharPoly: nonsquare matrix"); + + if (n == 0) { + set(f); + return; + } + + ZZ_p t; + + if (n == 1) { + SetX(f); + negate(t, M(1, 1)); + SetCoeff(f, 0, t); + return; + } + + mat_ZZ_p H; + + H = M; + + long i, j, m; + ZZ_p u, t1; + + for (m = 2; m <= n-1; m++) { + i = m; + while (i <= n && IsZero(H(i, m-1))) + i++; + + if (i <= n) { + t = H(i, m-1); + if (i > m) { + swap(H(i), H(m)); + // swap columns i and m + for (j = 1; j <= n; j++) + swap(H(j, i), H(j, m)); + } + + for (i = m+1; i <= n; i++) { + div(u, H(i, m-1), t); + for (j = m; j <= n; j++) { + mul(t1, u, H(m, j)); + sub(H(i, j), H(i, j), t1); + } + + for (j = 1; j <= n; j++) { + mul(t1, u, H(j, i)); + add(H(j, m), H(j, m), t1); + } + } + } + } + + vec_ZZ_pX F; + F.SetLength(n+1); + ZZ_pX T; + T.SetMaxLength(n); + + set(F[0]); + for (m = 1; m <= n; m++) { + LeftShift(F[m], F[m-1], 1); + mul(T, F[m-1], H(m, m)); + sub(F[m], F[m], T); + + set(t); + for (i = 1; i <= m-1; i++) { + mul(t, t, H(m-i+1, m-i)); + mul(t1, t, H(m-i, m)); + mul(T, F[m-i-1], t1); + sub(F[m], F[m], T); + } + } + + f = F[n]; +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mat_poly_lzz_p.c b/thirdparty/linux/ntl/src/mat_poly_lzz_p.c new file mode 100644 index 0000000000..db4ef2261a --- /dev/null +++ b/thirdparty/linux/ntl/src/mat_poly_lzz_p.c @@ -0,0 +1,87 @@ +#include + +#include + +NTL_START_IMPL + + +void CharPoly(zz_pX& f, const mat_zz_p& M) +{ + long n = M.NumRows(); + if (M.NumCols() != n) + LogicError("CharPoly: nonsquare matrix"); + + if (n == 0) { + set(f); + return; + } + + zz_p t; + + if (n == 1) { + SetX(f); + negate(t, M(1, 1)); + SetCoeff(f, 0, t); + return; + } + + mat_zz_p H; + + H = M; + + long i, j, m; + zz_p u, t1; + + for (m = 2; m <= n-1; m++) { + i = m; + while (i <= n && IsZero(H(i, m-1))) + i++; + + if (i <= n) { + t = H(i, m-1); + if (i > m) { + swap(H(i), H(m)); + // swap columns i and m + for (j = 1; j <= n; j++) + swap(H(j, i), H(j, m)); + } + + for (i = m+1; i <= n; i++) { + div(u, H(i, m-1), t); + for (j = m; j <= n; j++) { + mul(t1, u, H(m, j)); + sub(H(i, j), H(i, j), t1); + } + + for (j = 1; j <= n; j++) { + mul(t1, u, H(j, i)); + add(H(j, m), H(j, m), t1); + } + } + } + } + + vec_zz_pX F; + F.SetLength(n+1); + zz_pX T; + T.SetMaxLength(n); + + set(F[0]); + for (m = 1; m <= n; m++) { + LeftShift(F[m], F[m-1], 1); + mul(T, F[m-1], H(m, m)); + sub(F[m], F[m], T); + + set(t); + for (i = 1; i <= m-1; i++) { + mul(t, t, H(m-i+1, m-i)); + mul(t1, t, H(m-i, m)); + mul(T, F[m-i-1], t1); + sub(F[m], F[m], T); + } + } + + f = F[n]; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/mfile b/thirdparty/linux/ntl/src/mfile new file mode 100644 index 0000000000..eb9e4b2c19 --- /dev/null +++ b/thirdparty/linux/ntl/src/mfile @@ -0,0 +1,575 @@ +############################################################### +# +# First, choose a C++ compiler, and set compiler flags. +# This is done by setting the variables CXX and CXXFLAGS. +# +############################################################### + + + +CXX=@{CXX} +# A C++ compiler, e.g., g++, CC, xlC + + +CXXFLAGS=@{CXXFLAGS} +# Flags for the C++ compiler + +CXXAUTOFLAGS=@{CXXAUTOFLAGS} +# Flags for the C++ compiler, automatically generated by configuration script + + +AR=@{AR} +# command to make a library + +ARFLAGS=@{ARFLAGS} +# arguments for AR + +RANLIB=@{RANLIB} +# set to echo if you want to disable it completely + +LDFLAGS=@{LDFLAGS} +# libraries for linking C++ programs + +LDLIBS=@{LDLIBS} +# libraries for linking C++ programs + +CPPFLAGS=@{CPPFLAGS} +# arguments for the C preprocessor + +LIBTOOL=@{LIBTOOL} +# libtool command + +DEF_PREFIX=@{DEF_PREFIX} + +PREFIX=@{PREFIX} +LIBDIR=@{LIBDIR} +INCLUDEDIR=@{INCLUDEDIR} +DOCDIR=@{DOCDIR} +# where to install NTL + +############################################################### +# +# Second, if you want to use GMP (the GNU Multi-Precision library), +# define the variables GMP_OPT_INCDIR, GMP_OPT_LIBDIR, GMP_OPT_LIB below. +# You also will have to set either NTL_GMP_LIP or NTL_GMP_HACK +# in the config.h file. +# +# Using GMP can lead to significant performance gains on some +# platforms. You can obtain GMP from http://www.swox.com/gmp. +# Once you unpack it into a directory, just execute +# ./configure; make +# in that directory. +# +############################################################### + + +GMP_PREFIX=@{GMP_PREFIX} + +GMP_INCDIR=@{GMP_INCDIR} +# directory containing gmp.h if using GMP + +GMP_LIBDIR=@{GMP_LIBDIR} +# directory containing libgmp.a if using GMP + +GMP_OPT_INCDIR=@{GMPI}-I$(GMP_INCDIR) # GMPI +GMP_OPT_LIBDIR=@{GMPL}-L$(GMP_LIBDIR) # GMPL +GMP_OPT_LIB=@{GMP}-lgmp # GMP +# uncomment these if using GMP + + +############################################################### +# +# Third, if you want to use gf2x (a library for fast +# multiplication over GF(2)[X]), you need to +# define the variables GF2X_OPT_INCDIR, GF2X_OPT_LIBDIR, GF2X_OPT_LIB below. +# You also will have to set NTL_GF2X_LIB +# in the config.h file. +# +############################################################### + +GF2X_PREFIX=@{GF2X_PREFIX} + +GF2X_INCDIR=@{GF2X_INCDIR} +# directory containing gf2x.h if using gf2x + +GF2X_LIBDIR=@{GF2X_LIBDIR} +# directory containing libgf2x.a + +GF2X_OPT_INCDIR=@{GF2XI}-I$(GF2X_INCDIR) # GF2X +GF2X_OPT_LIBDIR=@{GF2XL}-L$(GF2X_LIBDIR) # GF2X +GF2X_OPT_LIB=@{GF2X}-lgf2x # GF2X +# uncomment these if using gf2x + + +############################################################### +# +# Fourth, if you do not want to run the wizard that automagically +# sets some performace related flags in config.h, set the flag below. +# +############################################################### + + +WIZARD=@{WIZARD} +# Set to off if you want to bypass the wizard; otherwise, set to on. + + +################################################################# +# +# That's it! You can ignore everything else in this file! +# +################################################################# + + +# object files + +O01=FFT.o FacVec.o GF2.o GF2E.o GF2EX.o GF2EXFactoring.o GF2X.o GF2X1.o +O02=$(O01) GF2XFactoring.o GF2XVec.o GetTime.o GetPID.o HNF.o ctools.o LLL.o +O03=$(O02) LLL_FP.o LLL_QP.o LLL_RR.o LLL_XD.o RR.o WordVector.o ZZ.o ZZVec.o +O04=$(O03) ZZX.o ZZX1.o ZZXCharPoly.o ZZXFactoring.o ZZ_p.o ZZ_pE.o ZZ_pEX.o +O05=$(O04) ZZ_pEXFactoring.o ZZ_pX.o ZZ_pX1.o ZZ_pXCharPoly.o ZZ_pXFactoring.o +O06=$(O05) fileio.o lip.o lzz_p.o lzz_pE.o lzz_pEX.o lzz_pEXFactoring.o +O07=$(O06) lzz_pX.o lzz_pX1.o lzz_pXCharPoly.o lzz_pXFactoring.o +O08=$(O07) mat_GF2.o mat_GF2E.o mat_RR.o mat_ZZ.o mat_ZZ_p.o +O09=$(O08) mat_ZZ_pE.o mat_lzz_p.o mat_lzz_pE.o mat_poly_ZZ.o +O10=$(O09) mat_poly_ZZ_p.o mat_poly_lzz_p.o +O11=$(O10) +O12=$(O11) +O13=$(O12) quad_float.o tools.o vec_GF2.o vec_GF2E.o +O14=$(O13) vec_RR.o vec_ZZ.o vec_ZZ_p.o vec_ZZ_pE.o +O15=$(O14) vec_lzz_p.o vec_lzz_pE.o +O16=$(O15) +O17=$(O16) +O18=$(O17) xdouble.o +O19=$(O18) G_LLL_FP.o G_LLL_QP.o G_LLL_XD.o G_LLL_RR.o thread.o BasicThreadPool.o + +OBJ=$(O19) + +# library source files + + +S01=FFT.c FacVec.c GF2.c GF2E.c GF2EX.c GF2EXFactoring.c GF2X.c GF2X1.c +S02=$(S01) GF2XFactoring.c GF2XVec.c HNF.c ctools.c LLL.c LLL_FP.c LLL_QP.c +S03=$(S02) LLL_RR.c LLL_XD.c RR.c WordVector.c ZZ.c ZZVec.c ZZX.c ZZX1.c +S04=$(S03) ZZXCharPoly.c ZZXFactoring.c ZZ_p.c ZZ_pE.c ZZ_pEX.c +S05=$(S04) ZZ_pEXFactoring.c ZZ_pX.c ZZ_pX1.c ZZ_pXCharPoly.c +S06=$(S05) ZZ_pXFactoring.c fileio.c lip.c lzz_p.c lzz_pE.c lzz_pEX.c +S07=$(S06) lzz_pEXFactoring.c lzz_pX.c lzz_pX1.c +S08=$(S07) lzz_pXCharPoly.c lzz_pXFactoring.c mat_GF2.c mat_GF2E.c +S09=$(S08) mat_RR.c mat_ZZ.c mat_ZZ_p.c mat_ZZ_pE.c mat_lzz_p.c mat_lzz_pE.c +S10=$(S09) mat_poly_ZZ.c mat_poly_ZZ_p.c mat_poly_lzz_p.c +S11=$(S10) +S12=$(S11) +S13=$(S12) quad_float.c tools.c vec_GF2.c vec_GF2E.c vec_RR.c +S14=$(S13) vec_ZZ.c vec_ZZ_p.c vec_ZZ_pE.c +S15=$(S14) vec_lzz_p.c vec_lzz_pE.c +S16=$(S15) +S17=$(S16) +S18=$(S17) xdouble.c +S19=$(S18) G_LLL_FP.c G_LLL_QP.c G_LLL_XD.c G_LLL_RR.c thread.c BasicThreadPool.c + +SRC = $(S19) + +# library source files that are header files + +SINC = c_lip_impl.h g_lip_impl.h + + + + + +# library header files + +IN01= FFT.h FacVec.h GF2.h GF2E.h GF2EX.h GF2EXFactoring.h GF2X.h +IN02=$(IN01) GF2XFactoring.h GF2XVec.h HNF.h ctools.h LLL.h +IN03=$(IN02) RR.h SPMM_ASM.h WordVector.h ZZ.h sp_arith.h ZZVec.h ZZX.h ZZXFactoring.h +IN04=$(IN03) ZZ_p.h ZZ_pE.h ZZ_pEX.h ZZ_pEXFactoring.h ZZ_pX.h ZZ_pXFactoring.h +IN05=$(IN04) fileio.h lip.h lzz_p.h lzz_pE.h lzz_pEX.h lzz_pEXFactoring.h +IN06=$(IN05) lzz_pX.h lzz_pXFactoring.h mat_GF2.h mat_GF2E.h mat_RR.h +IN07=$(IN06) mat_ZZ.h mat_ZZ_p.h mat_ZZ_pE.h mat_lzz_p.h mat_lzz_pE.h +IN08=$(IN07) mat_poly_ZZ.h mat_poly_ZZ_p.h mat_poly_lzz_p.h matrix.h +IN09=$(IN08) pair.h vector.h pair_GF2EX_long.h pair_GF2X_long.h +IN10=$(IN09) pair_ZZX_long.h pair_ZZ_pEX_long.h pair_ZZ_pX_long.h +IN11=$(IN10) pair_lzz_pEX_long.h pair_lzz_pX_long.h quad_float.h +IN12=$(IN11) tools.h vec_GF2.h vec_GF2E.h vec_GF2XVec.h vec_RR.h +IN13=$(IN12) vec_ZZ.h vec_ZZVec.h vec_ZZ_p.h vec_ZZ_pE.h vec_double.h +IN14=$(IN13) vec_long.h vec_lzz_p.h vec_lzz_pE.h vec_quad_float.h +IN15=$(IN14) vec_vec_GF2.h vec_vec_GF2E.h vec_vec_RR.h vec_vec_ZZ.h +IN16=$(IN15) vec_vec_ZZ_p.h vec_vec_ZZ_pE.h vec_vec_long.h vec_vec_lzz_p.h +IN17=$(IN16) vec_vec_lzz_pE.h vec_xdouble.h xdouble.h config.h version.h +IN18=$(IN17) def_config.h new.h vec_ulong.h vec_vec_ulong.h c_lip.h g_lip.h +IN19=$(IN18) SmartPtr.h Lazy.h LazyTable.h thread.h BasicThreadPool.h +INCL=$(IN19) + + + +# test data + +TD1=BerlekampTestIn BerlekampTestOut CanZassTestIn CanZassTestOut +TD2=$(TD1) ZZXFacTestIn ZZXFacTestOut MoreFacTestIn LLLTestIn LLLTestOut RRTestIn RRTestOut +TD3=$(TD2) MatrixTestIn MatrixTestOut CharPolyTestIn +TD4=$(TD3) CharPolyTestOut QuadTestIn QuadTestOut + +TD = $(TD4) + +# test source files + +TS1=QuickTest.c BerlekampTest.c CanZassTest.c ZZXFacTest.c MoreFacTest.c LLLTest.c +TS2=$(TS1) subset.c MatrixTest.c mat_lzz_pTest.c CharPolyTest.c RRTest.c QuadTest.c +TS3=$(TS2) GF2XTest.c GF2EXTest.c BitMatTest.c ZZ_pEXTest.c lzz_pEXTest.c Timing.c +TS4=$(TS3) ThreadTest.c ExceptionTest.c +TS = $(TS4) + +# scripts + +SCRIPTS1=MakeGetTime MakeGetPID MakeCheckFeature ResetFeatures CopyFeatures TestScript dosify unixify RemoveProg +SCRIPTS2=$(SCRIPTS1) configure DoConfig mfile cfile ppscript + +SCRIPTS=$(SCRIPTS2) + +# auxilliary source + +MD=MakeDesc.c MakeDescAux.c newnames.c gen_gmp_aux.c +GT=GetTime0.c GetTime1.c GetTime2.c GetTime3.c GetTime4.c GetTime5.c TestGetTime.c +GP=GetPID1.c GetPID2.c TestGetPID.c +CH=CheckCLZL.c CheckCLZLAux.c CheckLL.c CheckLLAux.c CheckAVX.c CheckFMA.c CheckCompile.c + +AUXPROGS = TestGetTime TestGetPID CheckFeature CheckCompile + + + +# documentation + + +D01=copying.txt BasicThreadPool.txt GF2.txt GF2E.txt GF2EX.txt GF2EXFactoring.txt GF2X.txt +D02=$(D01) GF2XFactoring.txt GF2XVec.txt HNF.txt Lazy.txt LazyTable.txt LLL.txt RR.txt SmartPtr.txt +D03=$(D02) ZZ.txt ZZVec.txt ZZX.txt ZZXFactoring.txt ZZ_p.txt ZZ_pE.txt +D04=$(D03) ZZ_pEX.txt ZZ_pEXFactoring.txt ZZ_pX.txt ZZ_pXFactoring.txt +D05=$(D04) conversions.txt flags.txt lzz_p.txt lzz_pE.txt lzz_pEX.txt +D06=$(D05) lzz_pEXFactoring.txt lzz_pX.txt lzz_pXFactoring.txt mat_GF2.txt +D07=$(D06) mat_GF2E.txt mat_RR.txt mat_ZZ.txt mat_ZZ_p.txt mat_ZZ_pE.txt +D08=$(D07) mat_lzz_p.txt mat_lzz_pE.txt mat_poly_ZZ.txt mat_poly_ZZ_p.txt +D09=$(D08) mat_poly_lzz_p.txt matrix.txt pair.txt vector.txt +D10=$(D09) quad_float.txt sedscript.txt tools.txt vec_GF2.txt +D11=$(D10) vec_GF2E.txt vec_RR.txt vec_ZZ.txt vec_ZZ_p.txt vec_ZZ_pE.txt +D12=$(D11) vec_lzz_p.txt vec_lzz_pE.txt xdouble.txt names.txt +D13=$(D12) tour-ack.html tour-intro.html tour-time.html tour-changes.html +D14=$(D13) tour-modules.html tour-unix.html tour-examples.html +D15=$(D14) tour-roadmap.html tour-win.html tour-impl.html tour-struct.html +D16=$(D15) tour.html tour-ex1.html tour-ex2.html tour-ex3.html tour-ex4.html +D17=$(D16) tour-ex5.html tour-ex6.html tour-ex7.html arrow1.gif arrow2.gif arrow3.gif +D18=$(D17) tour-gmp.html tour-gf2x.html tour-tips.html config.txt version.txt + +TX01=GF2.txt GF2E.txt GF2EX.txt GF2EXFactoring.txt GF2X.txt GF2XFactoring.txt +TX02=GF2XVec.txt HNF.txt Lazy.txt LazyTable.txt LLL.txt RR.txt SmartPtr.txt ZZ.txt ZZVec.txt ZZX.txt ZZXFactoring.txt +TX03=ZZ_p.txt ZZ_pE.txt ZZ_pEX.txt ZZ_pEXFactoring.txt ZZ_pX.txt ZZ_pXFactoring.txt +TX04=lzz_p.txt lzz_pE.txt lzz_pEX.txt lzz_pEXFactoring.txt lzz_pX.txt +TX05=lzz_pXFactoring.txt mat_GF2.txt mat_GF2E.txt mat_RR.txt mat_ZZ.txt mat_ZZ_p.txt +TX06=mat_ZZ_pE.txt mat_lzz_p.txt mat_lzz_pE.txt mat_poly_ZZ.txt mat_poly_ZZ_p.txt +TX07=mat_poly_lzz_p.txt matrix.txt pair.txt quad_float.txt tools.txt vec_GF2.txt +TX08=vec_GF2E.txt vec_RR.txt vec_ZZ.txt vec_ZZ_p.txt vec_ZZ_pE.txt vec_lzz_p.txt +TX09=vec_lzz_pE.txt vector.txt version.txt xdouble.txt BasicThreadPool.txt + +TXFILES=$(TX01) $(TX02) $(TX03) $(TX04) $(TX05) $(TX06) $(TX07) $(TX08) $(TX09) + +HT01=GF2.cpp.html GF2E.cpp.html GF2EX.cpp.html GF2EXFactoring.cpp.html GF2X.cpp.html GF2XFactoring.cpp.html +HT02=GF2XVec.cpp.html HNF.cpp.html Lazy.cpp.html LazyTable.cpp.html LLL.cpp.html RR.cpp.html SmartPtr.cpp.html ZZ.cpp.html ZZVec.cpp.html ZZX.cpp.html ZZXFactoring.cpp.html +HT03=ZZ_p.cpp.html ZZ_pE.cpp.html ZZ_pEX.cpp.html ZZ_pEXFactoring.cpp.html ZZ_pX.cpp.html ZZ_pXFactoring.cpp.html +HT04=lzz_p.cpp.html lzz_pE.cpp.html lzz_pEX.cpp.html lzz_pEXFactoring.cpp.html lzz_pX.cpp.html +HT05=lzz_pXFactoring.cpp.html mat_GF2.cpp.html mat_GF2E.cpp.html mat_RR.cpp.html mat_ZZ.cpp.html mat_ZZ_p.cpp.html +HT06=mat_ZZ_pE.cpp.html mat_lzz_p.cpp.html mat_lzz_pE.cpp.html mat_poly_ZZ.cpp.html mat_poly_ZZ_p.cpp.html +HT07=mat_poly_lzz_p.cpp.html matrix.cpp.html pair.cpp.html quad_float.cpp.html tools.cpp.html vec_GF2.cpp.html +HT08=vec_GF2E.cpp.html vec_RR.cpp.html vec_ZZ.cpp.html vec_ZZ_p.cpp.html vec_ZZ_pE.cpp.html vec_lzz_p.cpp.html +HT09=vec_lzz_pE.cpp.html vector.cpp.html version.cpp.html xdouble.cpp.html BasicThreadPool.cpp.html + +HTFILES=$(HT01) $(HT02) $(HT03) $(HT04) $(HT05) $(HT06) $(HT07) $(HT08) $(HT09) + + +DOC = $(D18) $(HTFILES) + + + +# test program executables + +PROG1=QuickTest BerlekampTest CanZassTest ZZXFacTest MoreFacTest LLLTest BitMatTest +PROG2=$(PROG1) MatrixTest mat_lzz_pTest CharPolyTest RRTest QuadTest +PROG3=$(PROG2) GF2XTest GF2EXTest subset ZZ_pEXTest lzz_pEXTest Timing ThreadTest +PROGS = $(PROG3) + +# things to save to a tar file + +SFI1=makefile $(SRC) $(SINC) $(SCRIPTS) $(MD) $(GT) $(GP) $(CH) $(TS) $(TD) mach_desc.win +SFI2=$(SFI1) MulTimeTest.c Poly1TimeTest.c Poly2TimeTest.c Poly3TimeTest.c GF2XTimeTest.c +SFI3=$(SFI2) InitSettings.c DispSettings.c WizardAux Wizard def_makefile +SFILES=$(SFI3) + + +################################################################# +# +# Rules for compiling the library +# +################################################################# + + +NTL_INCLUDE = -I../include -I. +# NTL needs this to find its include files + +COMPILE = $(CXX) $(NTL_INCLUDE) $(CPPFLAGS) $(CXXAUTOFLAGS) $(CXXFLAGS) -c + +LINK = $(CXX) $(NTL_INCLUDE) $(CPPFLAGS) $(CXXAUTOFLAGS) $(CXXFLAGS) $(LDFLAGS) + + + +# 'make all' does a complete make, including all setup. +# It also creates the file 'all', which means you should +# run 'make clobber' before running 'make' or 'make all' +# again. + +all: + make setup1 + make setup2 + make setup3 + make setup4 + make ntl.a + touch all + + +# setup1 generates the file ../incluse/NTL/mach_desc.h + +setup1: + $(COMPILE) MakeDescAux.c + $(LINK) -o MakeDesc MakeDesc.c MakeDescAux.o $(LDLIBS) + ./MakeDesc + mv mach_desc.h ../include/NTL/mach_desc.h + + +# setup2 does some dynamic checks for GetTime, GetPID, __builtin_clzl, and LL types + +setup2: + echo "*** CheckFeature log ***" > CheckFeature.log + sh MakeGetTime "$(LINK)" "$(LDLIBS)" + sh MakeGetPID "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature BUILTIN_CLZL "CheckCLZL.c CheckCLZLAux.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature LL_TYPE "CheckLL.c CheckLLAux.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature AVX "CheckAVX.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature FMA "CheckFMA.c" "$(LINK)" "$(LDLIBS)" + +# setup3 generates the file ../include/NTL/gmp_aux.h +# The file ../include/NTL/gmp_aux.h is included in ../include/NTL/lip.h +# when NTL_GMP_LIP is set. +# When this flag is not set, an empty files produced. + +setup3: + $(LINK) $(GMP_OPT_INCDIR) -o gen_gmp_aux gen_gmp_aux.c $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + ./gen_gmp_aux > ../include/NTL/gmp_aux.h + +# setup4 runs the wizard + +setup4: + sh Wizard $(WIZARD) + + +ntl.a: $(OBJ) +@{LSTAT} $(AR) $(ARFLAGS) ntl.a $(OBJ) #LSTAT +@{LSTAT} - $(RANLIB) ntl.a #LSTAT +@{LSHAR} $(LIBTOOL) --tag=CXX --mode=link $(LINK) -o libntl.la $(OBJ:.o=.lo) $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(GF2X_OPT_LIBDIR) $(GF2X_OPT_LIB) $(LDLIBS) -rpath $(LIBDIR) -version-info `cat VERSION_INFO` #LSHAR + +@{LSTAT}LCOMP= #LSTAT +@{LSHAR}LCOMP=$(LIBTOOL) --tag=CXX --mode=compile #LSHAR + +lip.o: lip.c g_lip_impl.h c_lip_impl.h + $(LCOMP) $(COMPILE) $(GMP_OPT_INCDIR) lip.c + +ctools.o: ctools.c + $(LCOMP) $(COMPILE) ctools.c + + +GetTime.o: GetTime.c + $(LCOMP) $(COMPILE) GetTime.c + +GetPID.o: GetPID.c + $(LCOMP) $(COMPILE) GetPID.c + +CheckCompile: CheckCompile.c + $(LINK) -o CheckCompile CheckCompile.c $(LDLIBS) + + +.c.o: + $(LCOMP) $(COMPILE) $(GF2X_OPT_INCDIR) $< + +.c: +@{LSTAT} $(LINK) -o $@ $< ntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(GF2X_OPT_LIBDIR) $(GF2X_OPT_LIB) $(LDLIBS) #LSTAT +@{LSHAR} $(LIBTOOL) --tag=CXX --mode=link $(LINK) -o $@ $< libntl.la #LSHAR + +################################################################# +# +# Rule for running tests +# make check runs a series of tests +# +################################################################# + +check: + sh RemoveProg $(PROGS) + make QuickTest + ./QuickTest + sh RemoveProg QuickTest + sh TestScript + +################################################################# +# +# Rule for installing +# make install just does a simple copy of the include file +# and library. The -p option is used to preserve file attributes. +# This helps avoid some problems (especially when copying ntl.a). +# Also, an attempt is made to make everything that is +# installed readable by everyone. +# +# make uninstall removes these files +# +################################################################# + + + + +install: + mkdir -p -m 755 $(INCLUDEDIR) + rm -rf $(INCLUDEDIR)/NTL + mkdir -m 755 $(INCLUDEDIR)/NTL + cp -p ../include/NTL/*.h $(INCLUDEDIR)/NTL + - chmod -R a+r $(INCLUDEDIR)/NTL + mkdir -p -m 755 $(DOCDIR) + rm -rf $(DOCDIR)/NTL + mkdir -m 755 $(DOCDIR)/NTL + cp -p ../doc/*.txt $(DOCDIR)/NTL + cp -p ../doc/*.html $(DOCDIR)/NTL + cp -p ../doc/*.gif $(DOCDIR)/NTL + - chmod -R a+r $(DOCDIR)/NTL + mkdir -p -m 755 $(LIBDIR) +@{LSTAT} cp -p ntl.a $(LIBDIR)/libntl.a #LSTAT +@{LSTAT} - chmod a+r $(LIBDIR)/libntl.a #LSTAT +@{LSHAR} $(LIBTOOL) --mode=install cp -p libntl.la $(LIBDIR) #LSHAR + + +uninstall: +@{LSTAT} rm -f $(LIBDIR)/libntl.a #LSTAT +@{LSHAR} $(LIBTOOL) --mode=uninstall rm -f $(LIBDIR)/libntl.la #LSHAR + rm -rf $(INCLUDEDIR)/NTL + rm -rf $(DOCDIR)/NTL + +################################################################# +# +# Rules for cleaning up +# +# make clobber removes *everything* created by make, +# but it does not restore config.h to its default. +# +# make clean tidies up a bit +# +################################################################# + +clobber: + rm -f ntl.a mach_desc.h ../include/NTL/mach_desc.h GetTime.c GetPID.c + sh ResetFeatures '..' + rm -f ../include/NTL/gmp_aux.h + sh RemoveProg $(PROGS) MakeDesc $(AUXPROGS) gen_gmp_aux + rm -f *.o + rm -rf small + rm -f cfileout mfileout + rm -rf .libs *.lo libntl.la + rm -f all + +clean: + sh RemoveProg $(PROGS) MakeDesc $(AUXPROGS) gen_gmp_aux + rm -f *.o + rm -rf small +@{LSHAR} - $(LIBTOOL) --mode=clean rm -f libntl.la *.lo #LSHAR + +################################################################# +# +# Rules for making tar and zip files +# +# make ppdoc creates pretty-printed versions of some documentation +# - run before make package or make winpack +# +# make package creates a tar.gz file suitable for Unix +# +# make winpack creates a zip file suitable for Windows +# +################################################################# + +ppdoc: + sh ppscript "$(TXFILES)" + +ppclean: + rm -f ../doc/*.cpp + + +package: + ./configure --nowrite + cp mfileout def_makefile + cp cfileout ../include/NTL/def_config.h + sh unixify "$(SFILES) DIRNAME WINDIR VERSION_INFO NOTES" "$(INCL)" "$(DOC)" + rm -rf `cat DIRNAME` + rm -f `cat DIRNAME`.tar + rm -f `cat DIRNAME`.tar.gz + mv unix `cat DIRNAME` + chmod -R a+rX `cat DIRNAME` + tar -cvf `cat DIRNAME`.tar `cat DIRNAME` + gzip `cat DIRNAME`.tar + rm -rf `cat DIRNAME` + +winpack: + ./configure --nowrite NTL_GMP_LIP=off + cp mfileout def_makefile + cp cfileout ../include/NTL/def_config.h + sh dosify "$(SRC)" "$(INCL)" "$(DOC)" "$(TS)" "$(TD)" "$(SINC)" + rm -rf `cat WINDIR` + rm -f `cat WINDIR`.zip + mv dos `cat WINDIR` + chmod -R a+rX `cat WINDIR` + find ./`cat WINDIR` '!' -name '*.gif' -print | zip -l `cat WINDIR` -@ + find ./`cat WINDIR` -name '*.gif' -print | zip -u `cat WINDIR` -@ + rm -rf `cat WINDIR` + + +###################################################################### +# +# config wizard related stuff +# +###################################################################### + +WO1 = FFT.o GetTime.o GetPID.o ctools.o ZZ.o ZZVec.o ZZ_p.o ZZ_pX.o +WO2 = $(WO1) ZZ_pX1.o lip.o tools.o vec_ZZ.o vec_ZZ_p.o +WO3 = $(WO2) GF2.o WordVector.o vec_GF2.o GF2X.o GF2X1.o thread.o BasicThreadPool.o fileio.o + +WOBJ = $(WO3) + +@{LSHAR}wntl.a: LCOMP= #LSHAR +wntl.a: $(WOBJ) + $(AR) $(ARFLAGS) wntl.a $(WOBJ) + - $(RANLIB) wntl.a + +MulTimeTest: + $(LINK) -o MulTimeTest MulTimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + + +Poly1TimeTest: + $(LINK) -o Poly1TimeTest Poly1TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) +Poly2TimeTest: + $(LINK) -o Poly2TimeTest Poly2TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) +Poly3TimeTest: + $(LINK) -o Poly3TimeTest Poly3TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + + +GF2XTimeTest: + $(LINK) -o GF2XTimeTest GF2XTimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + +InitSettings: + $(LINK) -o InitSettings InitSettings.c $(LDLIBS) + + +DispSettings: + $(LINK) -o DispSettings DispSettings.c $(LDLIBS) + + + + diff --git a/thirdparty/linux/ntl/src/mfileout b/thirdparty/linux/ntl/src/mfileout new file mode 100644 index 0000000000..c3180d12a9 --- /dev/null +++ b/thirdparty/linux/ntl/src/mfileout @@ -0,0 +1,575 @@ +############################################################### +# +# First, choose a C++ compiler, and set compiler flags. +# This is done by setting the variables CXX and CXXFLAGS. +# +############################################################### + + + +CXX=g++ +# A C++ compiler, e.g., g++, CC, xlC + + +CXXFLAGS=-g -O2 +# Flags for the C++ compiler + +CXXAUTOFLAGS= -march=native +# Flags for the C++ compiler, automatically generated by configuration script + + +AR=ar +# command to make a library + +ARFLAGS=ruv +# arguments for AR + +RANLIB=ranlib +# set to echo if you want to disable it completely + +LDFLAGS= +# libraries for linking C++ programs + +LDLIBS=-lm +# libraries for linking C++ programs + +CPPFLAGS= +# arguments for the C preprocessor + +LIBTOOL=libtool +# libtool command + +DEF_PREFIX=/usr/local + +PREFIX=$(DEF_PREFIX) +LIBDIR=$(PREFIX)/lib +INCLUDEDIR=$(PREFIX)/include +DOCDIR=$(PREFIX)/share/doc +# where to install NTL + +############################################################### +# +# Second, if you want to use GMP (the GNU Multi-Precision library), +# define the variables GMP_OPT_INCDIR, GMP_OPT_LIBDIR, GMP_OPT_LIB below. +# You also will have to set either NTL_GMP_LIP or NTL_GMP_HACK +# in the config.h file. +# +# Using GMP can lead to significant performance gains on some +# platforms. You can obtain GMP from http://www.swox.com/gmp. +# Once you unpack it into a directory, just execute +# ./configure; make +# in that directory. +# +############################################################### + + +GMP_PREFIX=$(DEF_PREFIX) + +GMP_INCDIR=$(GMP_PREFIX)/include +# directory containing gmp.h if using GMP + +GMP_LIBDIR=$(GMP_PREFIX)/lib +# directory containing libgmp.a if using GMP + +GMP_OPT_INCDIR=# -I$(GMP_INCDIR) # GMPI +GMP_OPT_LIBDIR=# -L$(GMP_LIBDIR) # GMPL +GMP_OPT_LIB=-lgmp # GMP +# uncomment these if using GMP + + +############################################################### +# +# Third, if you want to use gf2x (a library for fast +# multiplication over GF(2)[X]), you need to +# define the variables GF2X_OPT_INCDIR, GF2X_OPT_LIBDIR, GF2X_OPT_LIB below. +# You also will have to set NTL_GF2X_LIB +# in the config.h file. +# +############################################################### + +GF2X_PREFIX=$(DEF_PREFIX) + +GF2X_INCDIR=$(GF2X_PREFIX)/include +# directory containing gf2x.h if using gf2x + +GF2X_LIBDIR=$(GF2X_PREFIX)/lib +# directory containing libgf2x.a + +GF2X_OPT_INCDIR=# -I$(GF2X_INCDIR) # GF2X +GF2X_OPT_LIBDIR=# -L$(GF2X_LIBDIR) # GF2X +GF2X_OPT_LIB=# -lgf2x # GF2X +# uncomment these if using gf2x + + +############################################################### +# +# Fourth, if you do not want to run the wizard that automagically +# sets some performace related flags in config.h, set the flag below. +# +############################################################### + + +WIZARD=on +# Set to off if you want to bypass the wizard; otherwise, set to on. + + +################################################################# +# +# That's it! You can ignore everything else in this file! +# +################################################################# + + +# object files + +O01=FFT.o FacVec.o GF2.o GF2E.o GF2EX.o GF2EXFactoring.o GF2X.o GF2X1.o +O02=$(O01) GF2XFactoring.o GF2XVec.o GetTime.o GetPID.o HNF.o ctools.o LLL.o +O03=$(O02) LLL_FP.o LLL_QP.o LLL_RR.o LLL_XD.o RR.o WordVector.o ZZ.o ZZVec.o +O04=$(O03) ZZX.o ZZX1.o ZZXCharPoly.o ZZXFactoring.o ZZ_p.o ZZ_pE.o ZZ_pEX.o +O05=$(O04) ZZ_pEXFactoring.o ZZ_pX.o ZZ_pX1.o ZZ_pXCharPoly.o ZZ_pXFactoring.o +O06=$(O05) fileio.o lip.o lzz_p.o lzz_pE.o lzz_pEX.o lzz_pEXFactoring.o +O07=$(O06) lzz_pX.o lzz_pX1.o lzz_pXCharPoly.o lzz_pXFactoring.o +O08=$(O07) mat_GF2.o mat_GF2E.o mat_RR.o mat_ZZ.o mat_ZZ_p.o +O09=$(O08) mat_ZZ_pE.o mat_lzz_p.o mat_lzz_pE.o mat_poly_ZZ.o +O10=$(O09) mat_poly_ZZ_p.o mat_poly_lzz_p.o +O11=$(O10) +O12=$(O11) +O13=$(O12) quad_float.o tools.o vec_GF2.o vec_GF2E.o +O14=$(O13) vec_RR.o vec_ZZ.o vec_ZZ_p.o vec_ZZ_pE.o +O15=$(O14) vec_lzz_p.o vec_lzz_pE.o +O16=$(O15) +O17=$(O16) +O18=$(O17) xdouble.o +O19=$(O18) G_LLL_FP.o G_LLL_QP.o G_LLL_XD.o G_LLL_RR.o thread.o BasicThreadPool.o + +OBJ=$(O19) + +# library source files + + +S01=FFT.c FacVec.c GF2.c GF2E.c GF2EX.c GF2EXFactoring.c GF2X.c GF2X1.c +S02=$(S01) GF2XFactoring.c GF2XVec.c HNF.c ctools.c LLL.c LLL_FP.c LLL_QP.c +S03=$(S02) LLL_RR.c LLL_XD.c RR.c WordVector.c ZZ.c ZZVec.c ZZX.c ZZX1.c +S04=$(S03) ZZXCharPoly.c ZZXFactoring.c ZZ_p.c ZZ_pE.c ZZ_pEX.c +S05=$(S04) ZZ_pEXFactoring.c ZZ_pX.c ZZ_pX1.c ZZ_pXCharPoly.c +S06=$(S05) ZZ_pXFactoring.c fileio.c lip.c lzz_p.c lzz_pE.c lzz_pEX.c +S07=$(S06) lzz_pEXFactoring.c lzz_pX.c lzz_pX1.c +S08=$(S07) lzz_pXCharPoly.c lzz_pXFactoring.c mat_GF2.c mat_GF2E.c +S09=$(S08) mat_RR.c mat_ZZ.c mat_ZZ_p.c mat_ZZ_pE.c mat_lzz_p.c mat_lzz_pE.c +S10=$(S09) mat_poly_ZZ.c mat_poly_ZZ_p.c mat_poly_lzz_p.c +S11=$(S10) +S12=$(S11) +S13=$(S12) quad_float.c tools.c vec_GF2.c vec_GF2E.c vec_RR.c +S14=$(S13) vec_ZZ.c vec_ZZ_p.c vec_ZZ_pE.c +S15=$(S14) vec_lzz_p.c vec_lzz_pE.c +S16=$(S15) +S17=$(S16) +S18=$(S17) xdouble.c +S19=$(S18) G_LLL_FP.c G_LLL_QP.c G_LLL_XD.c G_LLL_RR.c thread.c BasicThreadPool.c + +SRC = $(S19) + +# library source files that are header files + +SINC = c_lip_impl.h g_lip_impl.h + + + + + +# library header files + +IN01= FFT.h FacVec.h GF2.h GF2E.h GF2EX.h GF2EXFactoring.h GF2X.h +IN02=$(IN01) GF2XFactoring.h GF2XVec.h HNF.h ctools.h LLL.h +IN03=$(IN02) RR.h SPMM_ASM.h WordVector.h ZZ.h sp_arith.h ZZVec.h ZZX.h ZZXFactoring.h +IN04=$(IN03) ZZ_p.h ZZ_pE.h ZZ_pEX.h ZZ_pEXFactoring.h ZZ_pX.h ZZ_pXFactoring.h +IN05=$(IN04) fileio.h lip.h lzz_p.h lzz_pE.h lzz_pEX.h lzz_pEXFactoring.h +IN06=$(IN05) lzz_pX.h lzz_pXFactoring.h mat_GF2.h mat_GF2E.h mat_RR.h +IN07=$(IN06) mat_ZZ.h mat_ZZ_p.h mat_ZZ_pE.h mat_lzz_p.h mat_lzz_pE.h +IN08=$(IN07) mat_poly_ZZ.h mat_poly_ZZ_p.h mat_poly_lzz_p.h matrix.h +IN09=$(IN08) pair.h vector.h pair_GF2EX_long.h pair_GF2X_long.h +IN10=$(IN09) pair_ZZX_long.h pair_ZZ_pEX_long.h pair_ZZ_pX_long.h +IN11=$(IN10) pair_lzz_pEX_long.h pair_lzz_pX_long.h quad_float.h +IN12=$(IN11) tools.h vec_GF2.h vec_GF2E.h vec_GF2XVec.h vec_RR.h +IN13=$(IN12) vec_ZZ.h vec_ZZVec.h vec_ZZ_p.h vec_ZZ_pE.h vec_double.h +IN14=$(IN13) vec_long.h vec_lzz_p.h vec_lzz_pE.h vec_quad_float.h +IN15=$(IN14) vec_vec_GF2.h vec_vec_GF2E.h vec_vec_RR.h vec_vec_ZZ.h +IN16=$(IN15) vec_vec_ZZ_p.h vec_vec_ZZ_pE.h vec_vec_long.h vec_vec_lzz_p.h +IN17=$(IN16) vec_vec_lzz_pE.h vec_xdouble.h xdouble.h config.h version.h +IN18=$(IN17) def_config.h new.h vec_ulong.h vec_vec_ulong.h c_lip.h g_lip.h +IN19=$(IN18) SmartPtr.h Lazy.h LazyTable.h thread.h BasicThreadPool.h +INCL=$(IN19) + + + +# test data + +TD1=BerlekampTestIn BerlekampTestOut CanZassTestIn CanZassTestOut +TD2=$(TD1) ZZXFacTestIn ZZXFacTestOut MoreFacTestIn LLLTestIn LLLTestOut RRTestIn RRTestOut +TD3=$(TD2) MatrixTestIn MatrixTestOut CharPolyTestIn +TD4=$(TD3) CharPolyTestOut QuadTestIn QuadTestOut + +TD = $(TD4) + +# test source files + +TS1=QuickTest.c BerlekampTest.c CanZassTest.c ZZXFacTest.c MoreFacTest.c LLLTest.c +TS2=$(TS1) subset.c MatrixTest.c mat_lzz_pTest.c CharPolyTest.c RRTest.c QuadTest.c +TS3=$(TS2) GF2XTest.c GF2EXTest.c BitMatTest.c ZZ_pEXTest.c lzz_pEXTest.c Timing.c +TS4=$(TS3) ThreadTest.c ExceptionTest.c +TS = $(TS4) + +# scripts + +SCRIPTS1=MakeGetTime MakeGetPID MakeCheckFeature ResetFeatures CopyFeatures TestScript dosify unixify RemoveProg +SCRIPTS2=$(SCRIPTS1) configure DoConfig mfile cfile ppscript + +SCRIPTS=$(SCRIPTS2) + +# auxilliary source + +MD=MakeDesc.c MakeDescAux.c newnames.c gen_gmp_aux.c +GT=GetTime0.c GetTime1.c GetTime2.c GetTime3.c GetTime4.c GetTime5.c TestGetTime.c +GP=GetPID1.c GetPID2.c TestGetPID.c +CH=CheckCLZL.c CheckCLZLAux.c CheckLL.c CheckLLAux.c CheckAVX.c CheckFMA.c CheckCompile.c + +AUXPROGS = TestGetTime TestGetPID CheckFeature CheckCompile + + + +# documentation + + +D01=copying.txt BasicThreadPool.txt GF2.txt GF2E.txt GF2EX.txt GF2EXFactoring.txt GF2X.txt +D02=$(D01) GF2XFactoring.txt GF2XVec.txt HNF.txt Lazy.txt LazyTable.txt LLL.txt RR.txt SmartPtr.txt +D03=$(D02) ZZ.txt ZZVec.txt ZZX.txt ZZXFactoring.txt ZZ_p.txt ZZ_pE.txt +D04=$(D03) ZZ_pEX.txt ZZ_pEXFactoring.txt ZZ_pX.txt ZZ_pXFactoring.txt +D05=$(D04) conversions.txt flags.txt lzz_p.txt lzz_pE.txt lzz_pEX.txt +D06=$(D05) lzz_pEXFactoring.txt lzz_pX.txt lzz_pXFactoring.txt mat_GF2.txt +D07=$(D06) mat_GF2E.txt mat_RR.txt mat_ZZ.txt mat_ZZ_p.txt mat_ZZ_pE.txt +D08=$(D07) mat_lzz_p.txt mat_lzz_pE.txt mat_poly_ZZ.txt mat_poly_ZZ_p.txt +D09=$(D08) mat_poly_lzz_p.txt matrix.txt pair.txt vector.txt +D10=$(D09) quad_float.txt sedscript.txt tools.txt vec_GF2.txt +D11=$(D10) vec_GF2E.txt vec_RR.txt vec_ZZ.txt vec_ZZ_p.txt vec_ZZ_pE.txt +D12=$(D11) vec_lzz_p.txt vec_lzz_pE.txt xdouble.txt names.txt +D13=$(D12) tour-ack.html tour-intro.html tour-time.html tour-changes.html +D14=$(D13) tour-modules.html tour-unix.html tour-examples.html +D15=$(D14) tour-roadmap.html tour-win.html tour-impl.html tour-struct.html +D16=$(D15) tour.html tour-ex1.html tour-ex2.html tour-ex3.html tour-ex4.html +D17=$(D16) tour-ex5.html tour-ex6.html tour-ex7.html arrow1.gif arrow2.gif arrow3.gif +D18=$(D17) tour-gmp.html tour-gf2x.html tour-tips.html config.txt version.txt + +TX01=GF2.txt GF2E.txt GF2EX.txt GF2EXFactoring.txt GF2X.txt GF2XFactoring.txt +TX02=GF2XVec.txt HNF.txt Lazy.txt LazyTable.txt LLL.txt RR.txt SmartPtr.txt ZZ.txt ZZVec.txt ZZX.txt ZZXFactoring.txt +TX03=ZZ_p.txt ZZ_pE.txt ZZ_pEX.txt ZZ_pEXFactoring.txt ZZ_pX.txt ZZ_pXFactoring.txt +TX04=lzz_p.txt lzz_pE.txt lzz_pEX.txt lzz_pEXFactoring.txt lzz_pX.txt +TX05=lzz_pXFactoring.txt mat_GF2.txt mat_GF2E.txt mat_RR.txt mat_ZZ.txt mat_ZZ_p.txt +TX06=mat_ZZ_pE.txt mat_lzz_p.txt mat_lzz_pE.txt mat_poly_ZZ.txt mat_poly_ZZ_p.txt +TX07=mat_poly_lzz_p.txt matrix.txt pair.txt quad_float.txt tools.txt vec_GF2.txt +TX08=vec_GF2E.txt vec_RR.txt vec_ZZ.txt vec_ZZ_p.txt vec_ZZ_pE.txt vec_lzz_p.txt +TX09=vec_lzz_pE.txt vector.txt version.txt xdouble.txt BasicThreadPool.txt + +TXFILES=$(TX01) $(TX02) $(TX03) $(TX04) $(TX05) $(TX06) $(TX07) $(TX08) $(TX09) + +HT01=GF2.cpp.html GF2E.cpp.html GF2EX.cpp.html GF2EXFactoring.cpp.html GF2X.cpp.html GF2XFactoring.cpp.html +HT02=GF2XVec.cpp.html HNF.cpp.html Lazy.cpp.html LazyTable.cpp.html LLL.cpp.html RR.cpp.html SmartPtr.cpp.html ZZ.cpp.html ZZVec.cpp.html ZZX.cpp.html ZZXFactoring.cpp.html +HT03=ZZ_p.cpp.html ZZ_pE.cpp.html ZZ_pEX.cpp.html ZZ_pEXFactoring.cpp.html ZZ_pX.cpp.html ZZ_pXFactoring.cpp.html +HT04=lzz_p.cpp.html lzz_pE.cpp.html lzz_pEX.cpp.html lzz_pEXFactoring.cpp.html lzz_pX.cpp.html +HT05=lzz_pXFactoring.cpp.html mat_GF2.cpp.html mat_GF2E.cpp.html mat_RR.cpp.html mat_ZZ.cpp.html mat_ZZ_p.cpp.html +HT06=mat_ZZ_pE.cpp.html mat_lzz_p.cpp.html mat_lzz_pE.cpp.html mat_poly_ZZ.cpp.html mat_poly_ZZ_p.cpp.html +HT07=mat_poly_lzz_p.cpp.html matrix.cpp.html pair.cpp.html quad_float.cpp.html tools.cpp.html vec_GF2.cpp.html +HT08=vec_GF2E.cpp.html vec_RR.cpp.html vec_ZZ.cpp.html vec_ZZ_p.cpp.html vec_ZZ_pE.cpp.html vec_lzz_p.cpp.html +HT09=vec_lzz_pE.cpp.html vector.cpp.html version.cpp.html xdouble.cpp.html BasicThreadPool.cpp.html + +HTFILES=$(HT01) $(HT02) $(HT03) $(HT04) $(HT05) $(HT06) $(HT07) $(HT08) $(HT09) + + +DOC = $(D18) $(HTFILES) + + + +# test program executables + +PROG1=QuickTest BerlekampTest CanZassTest ZZXFacTest MoreFacTest LLLTest BitMatTest +PROG2=$(PROG1) MatrixTest mat_lzz_pTest CharPolyTest RRTest QuadTest +PROG3=$(PROG2) GF2XTest GF2EXTest subset ZZ_pEXTest lzz_pEXTest Timing ThreadTest +PROGS = $(PROG3) + +# things to save to a tar file + +SFI1=makefile $(SRC) $(SINC) $(SCRIPTS) $(MD) $(GT) $(GP) $(CH) $(TS) $(TD) mach_desc.win +SFI2=$(SFI1) MulTimeTest.c Poly1TimeTest.c Poly2TimeTest.c Poly3TimeTest.c GF2XTimeTest.c +SFI3=$(SFI2) InitSettings.c DispSettings.c WizardAux Wizard def_makefile +SFILES=$(SFI3) + + +################################################################# +# +# Rules for compiling the library +# +################################################################# + + +NTL_INCLUDE = -I../include -I. +# NTL needs this to find its include files + +COMPILE = $(CXX) $(NTL_INCLUDE) $(CPPFLAGS) $(CXXAUTOFLAGS) $(CXXFLAGS) -c + +LINK = $(CXX) $(NTL_INCLUDE) $(CPPFLAGS) $(CXXAUTOFLAGS) $(CXXFLAGS) $(LDFLAGS) + + + +# 'make all' does a complete make, including all setup. +# It also creates the file 'all', which means you should +# run 'make clobber' before running 'make' or 'make all' +# again. + +all: + make setup1 + make setup2 + make setup3 + make setup4 + make ntl.a + touch all + + +# setup1 generates the file ../incluse/NTL/mach_desc.h + +setup1: + $(COMPILE) MakeDescAux.c + $(LINK) -o MakeDesc MakeDesc.c MakeDescAux.o $(LDLIBS) + ./MakeDesc + mv mach_desc.h ../include/NTL/mach_desc.h + + +# setup2 does some dynamic checks for GetTime, GetPID, __builtin_clzl, and LL types + +setup2: + echo "*** CheckFeature log ***" > CheckFeature.log + sh MakeGetTime "$(LINK)" "$(LDLIBS)" + sh MakeGetPID "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature BUILTIN_CLZL "CheckCLZL.c CheckCLZLAux.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature LL_TYPE "CheckLL.c CheckLLAux.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature AVX "CheckAVX.c" "$(LINK)" "$(LDLIBS)" + sh MakeCheckFeature FMA "CheckFMA.c" "$(LINK)" "$(LDLIBS)" + +# setup3 generates the file ../include/NTL/gmp_aux.h +# The file ../include/NTL/gmp_aux.h is included in ../include/NTL/lip.h +# when NTL_GMP_LIP is set. +# When this flag is not set, an empty files produced. + +setup3: + $(LINK) $(GMP_OPT_INCDIR) -o gen_gmp_aux gen_gmp_aux.c $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + ./gen_gmp_aux > ../include/NTL/gmp_aux.h + +# setup4 runs the wizard + +setup4: + sh Wizard $(WIZARD) + + +ntl.a: $(OBJ) + $(AR) $(ARFLAGS) ntl.a $(OBJ) #LSTAT + - $(RANLIB) ntl.a #LSTAT +# $(LIBTOOL) --tag=CXX --mode=link $(LINK) -o libntl.la $(OBJ:.o=.lo) $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(GF2X_OPT_LIBDIR) $(GF2X_OPT_LIB) $(LDLIBS) -rpath $(LIBDIR) -version-info `cat VERSION_INFO` #LSHAR + +LCOMP= #LSTAT +# LCOMP=$(LIBTOOL) --tag=CXX --mode=compile #LSHAR + +lip.o: lip.c g_lip_impl.h c_lip_impl.h + $(LCOMP) $(COMPILE) $(GMP_OPT_INCDIR) lip.c + +ctools.o: ctools.c + $(LCOMP) $(COMPILE) ctools.c + + +GetTime.o: GetTime.c + $(LCOMP) $(COMPILE) GetTime.c + +GetPID.o: GetPID.c + $(LCOMP) $(COMPILE) GetPID.c + +CheckCompile: CheckCompile.c + $(LINK) -o CheckCompile CheckCompile.c $(LDLIBS) + + +.c.o: + $(LCOMP) $(COMPILE) $(GF2X_OPT_INCDIR) $< + +.c: + $(LINK) -o $@ $< ntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(GF2X_OPT_LIBDIR) $(GF2X_OPT_LIB) $(LDLIBS) #LSTAT +# $(LIBTOOL) --tag=CXX --mode=link $(LINK) -o $@ $< libntl.la #LSHAR + +################################################################# +# +# Rule for running tests +# make check runs a series of tests +# +################################################################# + +check: + sh RemoveProg $(PROGS) + make QuickTest + ./QuickTest + sh RemoveProg QuickTest + sh TestScript + +################################################################# +# +# Rule for installing +# make install just does a simple copy of the include file +# and library. The -p option is used to preserve file attributes. +# This helps avoid some problems (especially when copying ntl.a). +# Also, an attempt is made to make everything that is +# installed readable by everyone. +# +# make uninstall removes these files +# +################################################################# + + + + +install: + mkdir -p -m 755 $(INCLUDEDIR) + rm -rf $(INCLUDEDIR)/NTL + mkdir -m 755 $(INCLUDEDIR)/NTL + cp -p ../include/NTL/*.h $(INCLUDEDIR)/NTL + - chmod -R a+r $(INCLUDEDIR)/NTL + mkdir -p -m 755 $(DOCDIR) + rm -rf $(DOCDIR)/NTL + mkdir -m 755 $(DOCDIR)/NTL + cp -p ../doc/*.txt $(DOCDIR)/NTL + cp -p ../doc/*.html $(DOCDIR)/NTL + cp -p ../doc/*.gif $(DOCDIR)/NTL + - chmod -R a+r $(DOCDIR)/NTL + mkdir -p -m 755 $(LIBDIR) + cp -p ntl.a $(LIBDIR)/libntl.a #LSTAT + - chmod a+r $(LIBDIR)/libntl.a #LSTAT +# $(LIBTOOL) --mode=install cp -p libntl.la $(LIBDIR) #LSHAR + + +uninstall: + rm -f $(LIBDIR)/libntl.a #LSTAT +# $(LIBTOOL) --mode=uninstall rm -f $(LIBDIR)/libntl.la #LSHAR + rm -rf $(INCLUDEDIR)/NTL + rm -rf $(DOCDIR)/NTL + +################################################################# +# +# Rules for cleaning up +# +# make clobber removes *everything* created by make, +# but it does not restore config.h to its default. +# +# make clean tidies up a bit +# +################################################################# + +clobber: + rm -f ntl.a mach_desc.h ../include/NTL/mach_desc.h GetTime.c GetPID.c + sh ResetFeatures '..' + rm -f ../include/NTL/gmp_aux.h + sh RemoveProg $(PROGS) MakeDesc $(AUXPROGS) gen_gmp_aux + rm -f *.o + rm -rf small + rm -f cfileout mfileout + rm -rf .libs *.lo libntl.la + rm -f all + +clean: + sh RemoveProg $(PROGS) MakeDesc $(AUXPROGS) gen_gmp_aux + rm -f *.o + rm -rf small +# - $(LIBTOOL) --mode=clean rm -f libntl.la *.lo #LSHAR + +################################################################# +# +# Rules for making tar and zip files +# +# make ppdoc creates pretty-printed versions of some documentation +# - run before make package or make winpack +# +# make package creates a tar.gz file suitable for Unix +# +# make winpack creates a zip file suitable for Windows +# +################################################################# + +ppdoc: + sh ppscript "$(TXFILES)" + +ppclean: + rm -f ../doc/*.cpp + + +package: + ./configure --nowrite + cp mfileout def_makefile + cp cfileout ../include/NTL/def_config.h + sh unixify "$(SFILES) DIRNAME WINDIR VERSION_INFO NOTES" "$(INCL)" "$(DOC)" + rm -rf `cat DIRNAME` + rm -f `cat DIRNAME`.tar + rm -f `cat DIRNAME`.tar.gz + mv unix `cat DIRNAME` + chmod -R a+rX `cat DIRNAME` + tar -cvf `cat DIRNAME`.tar `cat DIRNAME` + gzip `cat DIRNAME`.tar + rm -rf `cat DIRNAME` + +winpack: + ./configure --nowrite NTL_GMP_LIP=off + cp mfileout def_makefile + cp cfileout ../include/NTL/def_config.h + sh dosify "$(SRC)" "$(INCL)" "$(DOC)" "$(TS)" "$(TD)" "$(SINC)" + rm -rf `cat WINDIR` + rm -f `cat WINDIR`.zip + mv dos `cat WINDIR` + chmod -R a+rX `cat WINDIR` + find ./`cat WINDIR` '!' -name '*.gif' -print | zip -l `cat WINDIR` -@ + find ./`cat WINDIR` -name '*.gif' -print | zip -u `cat WINDIR` -@ + rm -rf `cat WINDIR` + + +###################################################################### +# +# config wizard related stuff +# +###################################################################### + +WO1 = FFT.o GetTime.o GetPID.o ctools.o ZZ.o ZZVec.o ZZ_p.o ZZ_pX.o +WO2 = $(WO1) ZZ_pX1.o lip.o tools.o vec_ZZ.o vec_ZZ_p.o +WO3 = $(WO2) GF2.o WordVector.o vec_GF2.o GF2X.o GF2X1.o thread.o BasicThreadPool.o fileio.o + +WOBJ = $(WO3) + +# wntl.a: LCOMP= #LSHAR +wntl.a: $(WOBJ) + $(AR) $(ARFLAGS) wntl.a $(WOBJ) + - $(RANLIB) wntl.a + +MulTimeTest: + $(LINK) -o MulTimeTest MulTimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + + +Poly1TimeTest: + $(LINK) -o Poly1TimeTest Poly1TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) +Poly2TimeTest: + $(LINK) -o Poly2TimeTest Poly2TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) +Poly3TimeTest: + $(LINK) -o Poly3TimeTest Poly3TimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + + +GF2XTimeTest: + $(LINK) -o GF2XTimeTest GF2XTimeTest.c wntl.a $(GMP_OPT_LIBDIR) $(GMP_OPT_LIB) $(LDLIBS) + +InitSettings: + $(LINK) -o InitSettings InitSettings.c $(LDLIBS) + + +DispSettings: + $(LINK) -o DispSettings DispSettings.c $(LDLIBS) + + + + diff --git a/thirdparty/linux/ntl/src/newnames.c b/thirdparty/linux/ntl/src/newnames.c new file mode 100644 index 0000000000..7a6b8e7f1d --- /dev/null +++ b/thirdparty/linux/ntl/src/newnames.c @@ -0,0 +1,173 @@ + +/************************************************************************ + +This program can be compiled under either C or C++. +It copies its input to its output, substituting all old +NTL macro names by new NTL macro names. +This is intended to automate the transition from NTL 3.1 to 3.5. + +Each maximal length alphanumeric substring in the input +is looked up in a table, and if there is a match, the substring +is replaced. + +*************************************************************************/ + + +#include +#include + +#define NumNames (79) + +const char *names[NumNames][2] = { +{ "BB_HALF_MUL_CODE", "NTL_BB_HALF_MUL_CODE" }, +{ "BB_MUL_CODE", "NTL_BB_MUL_CODE" }, +{ "BB_REV_CODE", "NTL_BB_REV_CODE" }, +{ "BB_SQR_CODE", "NTL_BB_SQR_CODE" }, +{ "FFTFudge", "NTL_FFTFudge" }, +{ "FFTMaxRoot", "NTL_FFTMaxRoot" }, +{ "FFTMaxRootBnd", "NTL_FFTMaxRootBnd" }, +{ "QUAD_FLOAT_SPLIT", "NTL_QUAD_FLOAT_SPLIT" }, +{ "WV_NTL_RANGE_CHECK_CODE", "NTL_WV_RANGE_CHECK_CODE" }, +{ "WordVectorExpansionRatio", "NTL_WordVectorExpansionRatio" }, +{ "WordVectorInputBlock", "NTL_WordVectorInputBlock" }, +{ "WordVectorMinAlloc", "NTL_WordVectorMinAlloc" }, +{ "XD_BOUND", "NTL_XD_BOUND" }, +{ "XD_BOUND_INV", "NTL_XD_BOUND_INV" }, +{ "XD_HBOUND", "NTL_XD_HBOUND" }, +{ "XD_HBOUND_INV", "NTL_XD_HBOUND_INV" }, +{ "ZZ_ARITH_RIGHT_SHIFT", "NTL_ARITH_RIGHT_SHIFT" }, +{ "ZZ_BITS_PER_INT", "NTL_BITS_PER_INT" }, +{ "ZZ_BITS_PER_LONG", "NTL_BITS_PER_LONG" }, +{ "ZZ_DOUBLES_LOW_HIGH", "NTL_DOUBLES_LOW_HIGH" }, +{ "ZZ_DOUBLE_PRECISION", "NTL_DOUBLE_PRECISION" }, +{ "ZZ_EXT_DOUBLE", "NTL_EXT_DOUBLE" }, +{ "ZZ_FDOUBLE_PRECISION", "NTL_FDOUBLE_PRECISION" }, +{ "ZZ_FRADIX", "NTL_FRADIX" }, +{ "ZZ_FRADIX_INV", "NTL_FRADIX_INV" }, +{ "ZZ_FetchHiLo", "NTL_FetchHiLo" }, +{ "ZZ_FetchLo", "NTL_FetchLo" }, +{ "ZZ_HI_WD", "NTL_HI_WD" }, +{ "ZZ_LO_WD", "NTL_LO_WD" }, +{ "ZZ_MAX_INT", "NTL_MAX_INT" }, +{ "ZZ_MAX_LONG", "NTL_MAX_LONG" }, +{ "ZZ_MIN_INT", "NTL_MIN_INT" }, +{ "ZZ_MIN_LONG", "NTL_MIN_LONG" }, +{ "ZZ_NBITS", "NTL_NBITS" }, +{ "ZZ_NBITSH", "NTL_NBITSH" }, +{ "ZZ_NBITS_MAX", "NTL_NBITS_MAX" }, +{ "ZZ_NTL_SINGLE_MUL_OK", "NTL_SINGLE_MUL_OK" }, +{ "ZZ_PRIME_BND", "NTL_PRIME_BND" }, +{ "ZZ_RADIX", "NTL_RADIX" }, +{ "ZZ_RADIXM", "NTL_RADIXM" }, +{ "ZZ_RADIXROOT", "NTL_RADIXROOT" }, +{ "ZZ_RADIXROOTM", "NTL_RADIXROOTM" }, +{ "ZZ_pRegister", "NTL_ZZ_pRegister" }, +{ "ZZ_pX_BERMASS_CROSSOVER", "NTL_ZZ_pX_BERMASS_CROSSOVER" }, +{ "ZZ_pX_DIV_CROSSOVER", "NTL_ZZ_pX_DIV_CROSSOVER" }, +{ "ZZ_pX_FFT_CROSSOVER", "NTL_ZZ_pX_FFT_CROSSOVER" }, +{ "ZZ_pX_GCD_CROSSOVER", "NTL_ZZ_pX_GCD_CROSSOVER" }, +{ "ZZ_pX_HalfGCD_CROSSOVER", "NTL_ZZ_pX_HalfGCD_CROSSOVER" }, +{ "ZZ_pX_NEWTON_CROSSOVER", "NTL_ZZ_pX_NEWTON_CROSSOVER" }, +{ "ZZ_pX_TRACE_CROSSOVER", "NTL_ZZ_pX_TRACE_CROSSOVER" }, +{ "ntl_eq_matrix_decl", "NTL_eq_matrix_decl" }, +{ "ntl_eq_matrix_impl", "NTL_eq_matrix_impl" }, +{ "ntl_eq_vector_decl", "NTL_eq_vector_decl" }, +{ "ntl_eq_vector_impl", "NTL_eq_vector_impl" }, +{ "ntl_io_matrix_decl", "NTL_io_matrix_decl" }, +{ "ntl_io_matrix_impl", "NTL_io_matrix_impl" }, +{ "ntl_io_vector_decl", "NTL_io_vector_decl" }, +{ "ntl_io_vector_impl", "NTL_io_vector_impl" }, +{ "ntl_matrix_decl", "NTL_matrix_decl" }, +{ "ntl_matrix_impl", "NTL_matrix_impl" }, +{ "ntl_pair_decl", "NTL_pair_decl" }, +{ "ntl_pair_eq_decl", "NTL_pair_eq_decl" }, +{ "ntl_pair_eq_impl", "NTL_pair_eq_impl" }, +{ "ntl_pair_impl", "NTL_pair_impl" }, +{ "ntl_pair_io_decl", "NTL_pair_io_decl" }, +{ "ntl_pair_io_impl", "NTL_pair_io_impl" }, +{ "ntl_vector_decl", "NTL_vector_decl" }, +{ "ntl_vector_default", "NTL_vector_default" }, +{ "ntl_vector_impl", "NTL_vector_impl" }, +{ "ntl_vector_impl_plain", "NTL_vector_impl_plain" }, +{ "zz_pRegister", "NTL_zz_pRegister" }, +{ "zz_pX_BERMASS_CROSSOVER", "NTL_zz_pX_BERMASS_CROSSOVER" }, +{ "zz_pX_DIV_CROSSOVER", "NTL_zz_pX_DIV_CROSSOVER" }, +{ "zz_pX_GCD_CROSSOVER", "NTL_zz_pX_GCD_CROSSOVER" }, +{ "zz_pX_HalfGCD_CROSSOVER", "NTL_zz_pX_HalfGCD_CROSSOVER" }, +{ "zz_pX_MOD_CROSSOVER", "NTL_zz_pX_MOD_CROSSOVER" }, +{ "zz_pX_MUL_CROSSOVER", "NTL_zz_pX_MUL_CROSSOVER" }, +{ "zz_pX_NEWTON_CROSSOVER", "NTL_zz_pX_NEWTON_CROSSOVER" }, +{ "zz_pX_TRACE_CROSSOVER", "NTL_zz_pX_TRACE_CROSSOVER" }, +}; + + +void PrintName(const char *name) +{ + int i; + + i = 0; + while (i < NumNames && strcmp(name, names[i][0])) + i++; + + if (i >= NumNames) + printf("%s", name); + else + printf("%s", names[i][1]); +} + + +int IsAlphaNum(int c) +{ + return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c == '_') || (c >= '0' && c <= '9')); +} + +char buf[10000]; + + +int main() +{ + int c; + int state; + int len; + + state = 0; + len = 0; + + + do { + c = getchar(); + + switch (state) { + case 0: + if (IsAlphaNum(c)) { + buf[len] = c; + len++; + state = 1; + } + else { + if (c != EOF) putchar(c); + } + + break; + + case 1: + if (IsAlphaNum(c)) { + buf[len] = c; + len++; + } + else { + buf[len] = '\0'; + PrintName(buf); + len = 0; + + if (c != EOF) putchar(c); + state = 0; + } + + break; + } + } while (c != EOF); + + return 0; +} diff --git a/thirdparty/linux/ntl/src/ppscript b/thirdparty/linux/ntl/src/ppscript new file mode 100644 index 0000000000..502379224c --- /dev/null +++ b/thirdparty/linux/ntl/src/ppscript @@ -0,0 +1,14 @@ +#!/bin/bash + +VIM=$HOME/Applications/MacVim/mvim + +cd ../doc + +for i in $* +do + name=`basename $i .txt` + cp $name.txt $name.cpp + $VIM $name.cpp '+set nu!' '+let c_no_curly_error=1' '+syntax off' '+syntax on' '+TOhtml' '+:1,$s/.*@anchor{\(.*\)}.*/<\/a>/' '+w' '+qa!' +done + + diff --git a/thirdparty/linux/ntl/src/quad_float.c b/thirdparty/linux/ntl/src/quad_float.c new file mode 100644 index 0000000000..c0ae7aebec --- /dev/null +++ b/thirdparty/linux/ntl/src/quad_float.c @@ -0,0 +1,951 @@ +/* +Copyright (C) 1997, 1998, 1999, 2000 Victor Shoup + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +***************************************************** + +The quad_float package is derived from the doubledouble package of +Keith Briggs. However, the version employed in NTL has been extensively +modified. Below, I attach the copyright notice from the original +doubledouble package, which is currently available at + + http://www.labs.bt.com/people/briggsk2 + +***************************************************** + +Copyright (C) 1997 Keith Martin Briggs + +Except where otherwise indicated, +this program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + + + +#ifdef __INTEL_COMPILER +#pragma float_control(precise,on) +#endif + +// NOTE: the above will force the Intel compiler to adhere to +// language standards, which it does not do by default + +#include +#include + +#include + +#include + +NTL_START_IMPL + +#if (NTL_EXT_DOUBLE && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) + +#if (!defined(NTL_X86_FIX) && !defined(NTL_NO_X86_FIX)) + +#define NTL_X86_FIX + +#endif + +#endif + + +#if (NTL_EXT_DOUBLE && !defined(NTL_X86_FIX)) + +#define DOUBLE volatile double + +#else + +#define DOUBLE double + +#endif + + +#ifdef NTL_X86_FIX + + +#define START_FIX \ + volatile unsigned short __old_cw, __new_cw; \ + asm volatile ("fnstcw %0":"=m" (__old_cw)); \ + __new_cw = (__old_cw & ~0x300) | 0x200; \ + asm volatile ("fldcw %0": :"m" (__new_cw)); + + +#define END_FIX asm volatile ("fldcw %0": :"m" (__old_cw)); + +#else + +#define START_FIX +#define END_FIX + +#endif + + +static +void normalize(quad_float& z, const double& xhi, const double& xlo) +{ +START_FIX + DOUBLE u, v; + + u = xhi + xlo; + v = xhi - u; + v = v + xlo; + + z.hi = u; + z.lo = v; +END_FIX +} + + + +#if (NTL_BITS_PER_LONG >= NTL_DOUBLE_PRECISION) + + +quad_float to_quad_float(long n) +{ + DOUBLE xhi, xlo; + + xhi = TrueDouble(n); + + // Because we are assuming 2's compliment integer + // arithmetic, the following prevents long(xhi) from overflowing. + + if (n > 0) + xlo = TrueDouble(n+long(-xhi)); + else + xlo = TrueDouble(n-long(xhi)); + + // renormalize...just to be safe + + quad_float z; + normalize(z, xhi, xlo); + return z; +} + +quad_float to_quad_float(unsigned long n) +{ + DOUBLE xhi, xlo, t; + + const double bnd = double(1L << (NTL_BITS_PER_LONG-2))*4.0; + + xhi = TrueDouble(n); + + if (xhi >= bnd) + t = xhi - bnd; + else + t = xhi; + + // we use the "to_long" function here to be as portable as possible. + long llo = to_long(n - (unsigned long)(t)); + xlo = TrueDouble(llo); + + quad_float z; + normalize(z, xhi, xlo); + return z; +} +#endif + + +NTL_CHEAP_THREAD_LOCAL +long quad_float::oprec = 10; + +void quad_float::SetOutputPrecision(long p) +{ + if (p < 1) p = 1; + + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("quad_float: output precision too big"); + + oprec = p; +} + + +quad_float operator +(const quad_float& x, const quad_float& y ) { +START_FIX + DOUBLE H, h, T, t, S, s, e, f; + DOUBLE t1; + + S = x.hi + y.hi; + T = x.lo + y.lo; + e = S - x.hi; + f = T - x.lo; + + t1 = S-e; + t1 = x.hi-t1; + s = y.hi-e; + s = s + t1; + + t1 = T-f; + t1 = x.lo-t1; + t = y.lo-f; + t = t + t1; + + + s = s + T; + H = S + s; + h = S - H; + h = h + s; + + h = h + t; + e = H + h; + f = H - e; + f = f + h; +END_FIX + return quad_float(e, f); +} + +quad_float& operator +=(quad_float& x, const quad_float& y ) { +START_FIX + DOUBLE H, h, T, t, S, s, e, f; + DOUBLE t1; + + S = x.hi + y.hi; + T = x.lo + y.lo; + e = S - x.hi; + f = T - x.lo; + + t1 = S-e; + t1 = x.hi-t1; + s = y.hi-e; + s = s + t1; + + t1 = T-f; + t1 = x.lo-t1; + t = y.lo-f; + t = t + t1; + + + s = s + T; + H = S + s; + h = S - H; + h = h + s; + + h = h + t; + e = H + h; + f = H - e; + f = f + h; + + x.hi = e; + x.lo = f; +END_FIX + return x; +} + +quad_float operator -(const quad_float& x, const quad_float& y ) { +START_FIX + DOUBLE H, h, T, t, S, s, e, f; + DOUBLE t1, yhi, ylo; + + yhi = -y.hi; + ylo = -y.lo; + + S = x.hi + yhi; + T = x.lo + ylo; + e = S - x.hi; + f = T - x.lo; + + t1 = S-e; + t1 = x.hi-t1; + s = yhi-e; + s = s + t1; + + t1 = T-f; + t1 = x.lo-t1; + t = ylo-f; + t = t + t1; + + + s = s + T; + H = S + s; + h = S - H; + h = h + s; + + h = h + t; + e = H + h; + f = H - e; + f = f + h; + +END_FIX + return quad_float(e, f); +} + +quad_float& operator -=(quad_float& x, const quad_float& y ) { +START_FIX + DOUBLE H, h, T, t, S, s, e, f; + DOUBLE t1, yhi, ylo; + + yhi = -y.hi; + ylo = -y.lo; + + S = x.hi + yhi; + T = x.lo + ylo; + e = S - x.hi; + f = T - x.lo; + + t1 = S-e; + t1 = x.hi-t1; + s = yhi-e; + s = s + t1; + + t1 = T-f; + t1 = x.lo-t1; + t = ylo-f; + t = t + t1; + + + s = s + T; + H = S + s; + h = S - H; + h = h + s; + + h = h + t; + e = H + h; + f = H - e; + f = f + h; + + x.hi = e; + x.lo = f; +END_FIX + return x; +} + +quad_float operator -(const quad_float& x) +{ +START_FIX + DOUBLE xhi, xlo, u, v; + + xhi = -x.hi; + xlo = -x.lo; + + // it is a good idea to renormalize here, just in case + // the rounding rule depends on sign, and thus we will + // maintain the "normal form" for quad_float's. + + u = xhi + xlo; + v = xhi - u; + v = v + xlo; + +END_FIX + return quad_float(u, v); +} + + + +#if (NTL_FMA_DETECTED) + +double quad_float_zero = 0; + +static inline +double Protect(double x) { return x + quad_float_zero; } + +#else + + +static inline +double Protect(double x) { return x; } + + +#endif + +// NOTE: this is really sick: some compilers will issue FMA +// (fused mul add) instructions which will break correctness. +// C99 standard is supposed to prevent this across separate +// statements, but C++ standard doesn't guarantee much at all. +// In any case, gcc does not even implement the C99 standard +// correctly. One could disable this by compiling with +// an appropriate flag: -mno-fma works for gcc, while -no-fma works +// for icc. icc and MSVC++ also support pragmas to do this: +// #pragma fp_contract(off). There is also a compiler flag for +// gcc: -ffp-contract=off, but -mno-fma seems more widely supported. +// These flags work for clang, as well. +// +// But in any case, I'd rather not mess with getting these flags right. +// By calling Protect(a*b), this has the effect of forcing the +// compiler to compute a*b + 0. Assuming the compiler otherwise +// does not perform any re-association, this should do the trick. +// There is a small performance penalty, but it should be reasonable. + + + +quad_float operator *(const quad_float& x,const quad_float& y ) { +START_FIX + DOUBLE hx, tx, hy, ty, C, c; + DOUBLE t1, t2; + + C = Protect(NTL_QUAD_FLOAT_SPLIT*x.hi); + hx = C-x.hi; + c = Protect(NTL_QUAD_FLOAT_SPLIT*y.hi); + hx = C-hx; + tx = x.hi-hx; + hy = c-y.hi; + C = Protect(x.hi*y.hi); + hy = c-hy; + ty = y.hi-hy; + + // c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(x.hi*y.lo+x.lo*y.hi); + + t1 = Protect(hx*hy); + t1 = t1-C; + t2 = Protect(hx*ty); + t1 = t1+t2; + t2 = Protect(tx*hy); + t1 = t1+t2; + t2 = Protect(tx*ty); + c = t1+t2; + t1 = Protect(x.hi*y.lo); + t2 = Protect(x.lo*y.hi); + t1 = t1+t2; + c = c + t1; + + + hx = C+c; + tx = C-hx; + tx = tx+c; + +END_FIX + return quad_float(hx, tx); +} + +quad_float& operator *=(quad_float& x,const quad_float& y ) { +START_FIX + DOUBLE hx, tx, hy, ty, C, c; + DOUBLE t1, t2; + + C = Protect(NTL_QUAD_FLOAT_SPLIT*x.hi); + hx = C-x.hi; + c = Protect(NTL_QUAD_FLOAT_SPLIT*y.hi); + hx = C-hx; + tx = x.hi-hx; + hy = c-y.hi; + C = Protect(x.hi*y.hi); + hy = c-hy; + ty = y.hi-hy; + + // c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(x.hi*y.lo+x.lo*y.hi); + + t1 = Protect(hx*hy); + t1 = t1-C; + t2 = Protect(hx*ty); + t1 = t1+t2; + t2 = Protect(tx*hy); + t1 = t1+t2; + t2 = Protect(tx*ty); + c = t1+t2; + t1 = Protect(x.hi*y.lo); + t2 = Protect(x.lo*y.hi); + t1 = t1+t2; + c = c + t1; + + + hx = C+c; + tx = C-hx; + tx = tx+c; + + x.hi = hx; + x.lo = tx; +END_FIX + return x; +} + +quad_float operator /(const quad_float& x, const quad_float& y ) { +START_FIX + DOUBLE hc, tc, hy, ty, C, c, U, u; + DOUBLE t1; + + C = x.hi/y.hi; + c = Protect(NTL_QUAD_FLOAT_SPLIT*C); + hc = c-C; + u = Protect(NTL_QUAD_FLOAT_SPLIT*y.hi); + hc = c-hc; + tc = C-hc; + hy = u-y.hi; + U = Protect(C * y.hi); + hy = u-hy; + ty = y.hi-hy; + + // u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty; + + u = Protect(hc*hy); + u = u-U; + t1 = Protect(hc*ty); + u = u+t1; + t1 = Protect(tc*hy); + u = u+t1; + t1 = Protect(tc*ty); + u = u+t1; + + // c = ((((x.hi-U)-u)+x.lo)-C*y.lo)/y.hi; + + c = x.hi-U; + c = c-u; + c = c+x.lo; + t1 = Protect(C*y.lo); + c = c - t1; + c = c/y.hi; + + hy = C+c; + ty = C-hy; + ty = ty+c; + +END_FIX + return quad_float(hy, ty); +} + +quad_float& operator /=(quad_float& x, const quad_float& y ) { +START_FIX + DOUBLE hc, tc, hy, ty, C, c, U, u; + DOUBLE t1; + + C = x.hi/y.hi; + c = Protect(NTL_QUAD_FLOAT_SPLIT*C); + hc = c-C; + u = Protect(NTL_QUAD_FLOAT_SPLIT*y.hi); + hc = c-hc; + tc = C-hc; + hy = u-y.hi; + U = Protect(C * y.hi); + hy = u-hy; + ty = y.hi-hy; + + // u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty; + + u = Protect(hc*hy); + u = u-U; + t1 = Protect(hc*ty); + u = u+t1; + t1 = Protect(tc*hy); + u = u+t1; + t1 = Protect(tc*ty); + u = u+t1; + + // c = ((((x.hi-U)-u)+x.lo)-C*y.lo)/y.hi; + + c = x.hi-U; + c = c-u; + c = c+x.lo; + t1 = Protect(C*y.lo); + c = c - t1; + c = c/y.hi; + + hy = C+c; + ty = C-hy; + ty = ty+c; + + x.hi = hy; + x.lo = ty; +END_FIX + return x; +} + + +quad_float sqrt(const quad_float& y) { + if (y.hi < 0.0) + ArithmeticError("quad_float: square root of negative number"); + if (y.hi == 0.0) return quad_float(0.0,0.0); + + double c; + c = sqrt(y.hi); + ForceToMem(&c); // This is fairly paranoid, but it doesn't cost too much. + +START_FIX + + DOUBLE p,q,hx,tx,u,uu,cc; + DOUBLE t1; + + p = Protect(NTL_QUAD_FLOAT_SPLIT*c); + hx = (c-p); + hx = hx+p; + tx = c-hx; + p = Protect(hx*hx); + q = Protect(hx*tx); + q = q+q; + + u = p+q; + uu = p-u; + uu = uu+q; + t1 = Protect(tx*tx); + uu = uu+t1; + + + cc = y.hi-u; + cc = cc-uu; + cc = cc+y.lo; + t1 = c+c; + cc = cc/t1; + + hx = c+cc; + tx = c-hx; + tx = tx+cc; +END_FIX + return quad_float(hx, tx); +} + + + +void power(quad_float& z, const quad_float& a, long e) +{ + quad_float res, u; + unsigned long k; + + if (e < 0) + k = -((unsigned long) e); + else + k = e; + + res = 1.0; + u = a; + + while (k) { + if (k & 1) + res = res * u; + + k = k >> 1; + if (k) + u = u * u; + } + + if (e < 0) + z = 1.0/res; + else + z = res; +} + + +void power2(quad_float& z, long e) +{ + z.hi = _ntl_ldexp(1.0, e); + z.lo = 0; +} + + +long to_long(const quad_float& x) +{ + double fhi, flo; + + fhi = floor(x.hi); + + if (fhi == x.hi) + flo = floor(x.lo); + else + flo = 0; + + // the following code helps to prevent unnecessary integer overflow, + // and guarantees that to_long(to_quad_float(a)) == a, for all long a, + // provided long's are not too wide. + + if (fhi > 0) + return long(flo) - long(-fhi); + else + return long(fhi) + long(flo); +} + + + +// This version of ZZ to quad_float coversion relies on the +// precise rounding rules implemented by the ZZ to double conversion. + + +void conv(quad_float& z, const ZZ& a) +{ + double xhi, xlo; + + conv(xhi, a); + + if (!IsFinite(&xhi)) { + z.hi = xhi; + z.lo = 0; + return; + } + + NTL_ZZRegister(t); + + conv(t, xhi); + sub(t, a, t); + + conv(xlo, t); + + normalize(z, xhi, xlo); + + // The following is just paranoia. + if (fabs(z.hi) < NTL_FDOUBLE_PRECISION && z.lo != 0) + LogicError("internal error: ZZ to quad_float conversion"); +} + +void conv(ZZ& z, const quad_float& x) +{ + NTL_ZZRegister(t1); + NTL_ZZRegister(t2); + NTL_ZZRegister(t3); + + double fhi, flo; + + fhi = floor(x.hi); + + if (fhi == x.hi) { + flo = floor(x.lo); + + conv(t1, fhi); + conv(t2, flo); + + add(z, t1, t2); + } + else + conv(z, fhi); +} + + + +ostream& operator<<(ostream& s, const quad_float& a) +{ + quad_float aa = a; + + if (!IsFinite(&aa)) { + s << "NaN"; + return s; + } + + RRPush push; + RROutputPush opush; + + RR::SetPrecision(long(3.33*quad_float::oprec) + 10); + RR::SetOutputPrecision(quad_float::oprec); + + NTL_TLS_LOCAL(RR, t); + + conv(t, a); + s << t; + + return s; +} + +istream& operator>>(istream& s, quad_float& x) +{ + RRPush push; + RR::SetPrecision(4*NTL_DOUBLE_PRECISION); + + NTL_TLS_LOCAL(RR, t); + NTL_INPUT_CHECK_RET(s, s >> t); + conv(x, t); + + return s; +} + +void random(quad_float& x) +{ + RRPush push; + RR::SetPrecision(4*NTL_DOUBLE_PRECISION); + + NTL_TLS_LOCAL(RR, t); + random(t); + conv(x, t); +} + +quad_float random_quad_float() +{ + quad_float x; + random(x); + return x; +} + +long IsFinite(quad_float *x) +{ + return IsFinite(&x->hi) && IsFinite(&x->lo); +} + + +long PrecisionOK() +{ +START_FIX + long k; + DOUBLE l1 = (double)1; + DOUBLE lh = 1/(double)2; + DOUBLE epsilon; + DOUBLE fudge, oldfudge; + + epsilon = l1; + fudge = l1+l1; + + k = 0; + + do { + k++; + epsilon = epsilon * lh; + oldfudge = fudge; + fudge = l1 + epsilon; + } while (fudge > l1 && fudge < oldfudge); + +END_FIX + return k == NTL_DOUBLE_PRECISION; +} + +quad_float floor(const quad_float& x) +{ + double fhi = floor(x.hi); + + if (fhi != x.hi) + return quad_float(fhi, 0.0); + else { + double flo = floor(x.lo); + quad_float z; + normalize(z, fhi, flo); + return z; + } +} + + +quad_float ceil(const quad_float& x) { + return -floor(-x); +} + +quad_float trunc(const quad_float& x) { + if (x>=0.0) return floor(x); else return -floor(-x); +} + + + +long compare(const quad_float& x, const quad_float& y) +{ + if (x.hi > y.hi) + return 1; + else if (x.hi < y.hi) + return -1; + else if (x.lo > y.lo) + return 1; + else if (x.lo < y.lo) + return -1; + else + return 0; +} + + +quad_float fabs(const quad_float& x) +{ if (x.hi>=0.0) return x; else return -x; } + +quad_float to_quad_float(const char *s) +{ + quad_float x; + + RRPush push; + RR::SetPrecision(4*NTL_DOUBLE_PRECISION); + + NTL_TLS_LOCAL(RR, t); + conv(t, s); + conv(x, t); + + return x; +} + + +quad_float ldexp(const quad_float& x, long exp) { // x*2^exp + double xhi, xlo; + quad_float z; + + xhi = _ntl_ldexp(x.hi, exp); + xlo = _ntl_ldexp(x.lo, exp); + + normalize(z, xhi, xlo); + return z; +} + + +quad_float exp(const quad_float& x) { // New version 97 Aug 05 +/* +! Calculate a quadruple-precision exponential +! Method: +! x x.log2(e) nint[x.log2(e)] + frac[x.log2(e)] +! e = 2 = 2 +! +! iy fy +! = 2 . 2 +! Then +! fy y.loge(2) +! 2 = e +! +! Now y.loge(2) will be less than 0.3466 in absolute value. +! This is halved and a Pade aproximation is used to approximate e^x over +! the region (-0.1733, +0.1733). This approximation is then squared. +*/ + if (x.hiDBL_MAX_10_EXP*2.302585092994045684017991) { + ResourceError("exp(quad_float): overflow"); + } + + static const quad_float Log2 = to_quad_float("0.6931471805599453094172321214581765680755"); + // GLOBAL (assumes C++11 thread-safe init) + + quad_float y,temp,ysq,sum1,sum2; + long iy; + y=x/Log2; + temp = floor(y+0.5); + iy = to_long(temp); + y=(y-temp)*Log2; + y=ldexp(y,-1L); + ysq=y*y; + sum1=y*((((ysq+3960.0)*ysq+2162160.0)*ysq+302702400.0)*ysq+8821612800.0); + sum2=(((90.0*ysq+110880.0)*ysq+30270240.0)*ysq+2075673600.0)*ysq+17643225600.0; +/* +! sum2 + sum1 2.sum1 +! Now approximation = ----------- = 1 + ----------- = 1 + 2.temp +! sum2 - sum1 sum2 - sum1 +! +! Then (1 + 2.temp)^2 = 4.temp.(1 + temp) + 1 +*/ + temp=sum1/(sum2-sum1); + y=temp*(temp+1); + y=ldexp(y,2L); + return ldexp(y+1,iy); +} + +quad_float log(const quad_float& t) { // Newton method. See Bailey, MPFUN + if (t.hi <= 0.0) { + ArithmeticError("log(quad_float): argument must be positive"); + } + double s1 = log(t.hi); + ForceToMem(&s1); // Again, this is fairly paranoid. + quad_float s; + s = s1; + quad_float e; + e=exp(s); + return s+(t-e)/e; // Newton step +} + +long operator> (const quad_float& x, const quad_float& y) { + return (x.hi> y.hi) || (x.hi==y.hi && x.lo> y.lo); } +long operator>=(const quad_float& x, const quad_float& y) { + return (x.hi>y.hi) || (x.hi==y.hi && x.lo>=y.lo); } +long operator< (const quad_float& x, const quad_float& y) { + return (x.hi< y.hi) || (x.hi==y.hi && x.lo< y.lo); } +long operator<=(const quad_float& x, const quad_float& y) { + return (x.hi + +NTL_CLIENT + +long SubsetSumSolution(const vec_ZZ& z) +{ + long n = z.length()-3; + long j; + + if (z(n+1) != 0) return 0; + if (z(n+2) != -1 && z(n+2) != 1) return 0; + for (j = 1; j <= n; j++) + if (z(j) != -1 && z(j) != 1) return 0; + + return 1; +} + + + +int main() +{ + RR::SetPrecision(150); + long n, b, size; + + cerr << "n: "; + cin >> n; + + cerr << "b: "; + cin >> b; + + cerr << "size: "; + cin >> size; + + cerr << "prune: "; + long prune; + cin >> prune; + + ZZ seed; + cerr << "seed: "; + cin >> seed; + + if (seed != 0) + SetSeed(seed); + + char alg; + cerr << "alg [fqQxr]: "; + cin >> alg; + + double TotalTime = 0; + long TotalSucc = 0; + + long iter; + + for (iter = 1; iter <= 20; iter++) { + vec_ZZ a; + a.SetLength(n); + + ZZ bound; + + LeftShift(bound, to_ZZ(1), b); + + long i; + for (i = 1; i <= n; i++) { + RandomBnd(a(i), bound); + a(i) += 1; + } + + ZZ S; + + do { + RandomLen(S, n+1); + } while (weight(S) != n/2+1); + + ZZ s; + clear(s); + for (i = 1; i <= n; i++) + if (bit(S, i-1)) + s += a(i); + + mat_ZZ B(INIT_SIZE, n+1, n+3); + + for (i = 1; i <= n; i++) { + B(i, i) = 2; + B(i, n+1) = a(i) * n; + B(i, n+3) = n; + } + + for (i = 1; i <= n; i++) + B(n+1, i) = 1; + + B(n+1, n+1) = s * n; + B(n+1, n+2) = 1; + B(n+1, n+3) = n; + B(n+1, n+3) *= n/2; + + swap(B(1), B(n+1)); + + for (i = 2; i <= n; i++) { + long j = RandomBnd(n-i+2) + i; + swap(B(i), B(j)); + } + + double t; + + LLLStatusInterval = 10; + + t = GetTime(); + switch (alg) { + case 'f': + BKZ_FP(B, 0.99, size, prune, SubsetSumSolution); + break; + case 'q': + BKZ_QP(B, 0.99, size, prune, SubsetSumSolution); + break; + case 'Q': + BKZ_QP1(B, 0.99, size, prune, SubsetSumSolution); + break; + case 'x': + BKZ_XD(B, 0.99, size, prune, SubsetSumSolution); + break; + case 'r': + BKZ_RR(B, 0.99, size, prune, SubsetSumSolution); + break; + default: + TerminalError("invalid algorithm"); + } + + + t = GetTime()-t; + + long succ = 0; + for (i = 1; i <= n+1; i++) + if (SubsetSumSolution(B(i))) + succ = 1; + + TotalTime += t; + TotalSucc += succ; + + if (succ) + cerr << "+"; + else + cerr << "-"; + } + + cerr << "\n"; + + cerr << "number of success: " << TotalSucc << "\n"; + cerr << "average time: " << TotalTime/20 << "\n"; + + return 0; +} + + + diff --git a/thirdparty/linux/ntl/src/thread.c b/thirdparty/linux/ntl/src/thread.c new file mode 100644 index 0000000000..4509a1bd89 --- /dev/null +++ b/thirdparty/linux/ntl/src/thread.c @@ -0,0 +1,37 @@ + +#include + +#ifdef NTL_THREADS + +#include +#include + +#endif + + + +NTL_START_IMPL + + +const string& CurrentThreadID() +{ + NTL_TLS_LOCAL(string, ID); + static NTL_CHEAP_THREAD_LOCAL bool initialized = false; + + if (!initialized) { +#ifdef NTL_THREADS + stringstream ss; + ss << this_thread::get_id(); + ID = ss.str(); +#else + ID = "0"; +#endif + initialized = true; + } + + return ID; +} + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/tools.c b/thirdparty/linux/ntl/src/tools.c new file mode 100644 index 0000000000..eea4722d68 --- /dev/null +++ b/thirdparty/linux/ntl/src/tools.c @@ -0,0 +1,152 @@ + +#include + +#include +#include + +#include + + + +NTL_START_IMPL + +NTL_CHEAP_THREAD_LOCAL void (*ErrorCallback)() = 0; +NTL_CHEAP_THREAD_LOCAL void (*ErrorMsgCallback)(const char *) = 0; + + +void TerminalError(const char *s) +{ + if (ErrorMsgCallback) + (*ErrorMsgCallback)(s); + else + cerr << s << "\n"; + + if (ErrorCallback) (*ErrorCallback)(); + abort(); +} + + +// The following implementation of CharToIntVal is completely portable. + +long CharToIntVal(long a) +{ + switch (a) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + + case 'A': return 10; + case 'B': return 11; + case 'C': return 12; + case 'D': return 13; + case 'E': return 14; + case 'F': return 15; + + case 'a': return 10; + case 'b': return 11; + case 'c': return 12; + case 'd': return 13; + case 'e': return 14; + case 'f': return 15; + + default: return -1; + } +} + +// The following implementation of IntValToChar is completely portable. + +char IntValToChar(long a) +{ + switch (a) { + case 0: return '0'; + case 1: return '1'; + case 2: return '2'; + case 3: return '3'; + case 4: return '4'; + case 5: return '5'; + case 6: return '6'; + case 7: return '7'; + case 8: return '8'; + case 9: return '9'; + + case 10: return 'a'; + case 11: return 'b'; + case 12: return 'c'; + case 13: return 'd'; + case 14: return 'e'; + case 15: return 'f'; + + default: LogicError("IntValToChar: bad arg"); + } + + return 0; // to supress warnings +} + + +long IsWhiteSpace(long a) +{ + if (a > NTL_MAX_INT || a < NTL_MIN_INT) + return 0; + + int b = (int) a; + + if (isspace(b)) + return 1; + else + return 0; +} + +long SkipWhiteSpace(istream& s) +{ + long c; + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + if (c == EOF) + return 0; + else + return 1; +} + +long IsEOFChar(long c) +{ + return c == EOF; +} + + + +void PrintTime(ostream& s, double t) +{ + long hh, mm, ss; + + ss = long(t + 0.5); + + hh = ss/3600; + ss = ss - hh*3600; + mm = ss/60; + ss = ss - mm*60; + + if (hh > 0) + s << hh << ":"; + + if (hh > 0 || mm > 0) { + if (hh > 0 && mm < 10) s << "0"; + s << mm << ":"; + } + + if ((hh > 0 || mm > 0) && ss < 10) s << "0"; + s << ss; +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/unixify b/thirdparty/linux/ntl/src/unixify new file mode 100644 index 0000000000..28262205bf --- /dev/null +++ b/thirdparty/linux/ntl/src/unixify @@ -0,0 +1,30 @@ + +rm -r unix +mkdir unix +mkdir unix/src +mkdir unix/include +mkdir unix/include/NTL +mkdir unix/doc + +cp ../README unix/README + +for i in $1 +do + cp $i unix/src/$i +done + +for i in $2 +do + cp ../include/NTL/$i unix/include/NTL/$i +done + + + +for i in $3 +do + cp ../doc/$i unix/doc/$i +done + +cp ../include/NTL/def_config.h unix/include/NTL/config.h +cp def_makefile unix/src/makefile +sh ResetFeatures unix diff --git a/thirdparty/linux/ntl/src/vec_GF2.c b/thirdparty/linux/ntl/src/vec_GF2.c new file mode 100644 index 0000000000..9d6001d96d --- /dev/null +++ b/thirdparty/linux/ntl/src/vec_GF2.c @@ -0,0 +1,650 @@ + +#include + +#include +#include + +NTL_START_IMPL + + +// FIXME: why do vec_GF2 and GF2X use different strategies for +// keeping high order bits cleared? I don't think it matters +// much, but it is strange. + +void vec_GF2::SetLength(long n) +{ + long len = length(); + + if (n == len) return; + + if (n < 0) LogicError("negative length in vec_GF2::SetLength"); + + if (NTL_OVERFLOW(n, 1, 0)) + ResourceError("vec_GF2::SetLength: excessive length"); + + if (fixed()) LogicError("SetLength: can't change this vector's length"); + + long wdlen = (n+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; + + if (n < len) { + // have to clear bits n..len-1 + + long q = n/NTL_BITS_PER_LONG; + long p = n - q*NTL_BITS_PER_LONG; + + _ntl_ulong *x = rep.elts(); + + x[q] &= (1UL << p) - 1UL; + + long q1 = (len-1)/NTL_BITS_PER_LONG; + long i; + + for (i = q+1; i <= q1; i++) + x[i] = 0; + + _len = n; + + rep.QuickSetLength(wdlen); + + return; + } + + long maxlen = MaxLength(); + + if (n <= maxlen) { + _len = n; + rep.QuickSetLength(wdlen); + return; + } + + long alloc = rep.MaxLength(); + + if (wdlen <= alloc) { + _len = n; + _maxlen = (n << 1); + rep.QuickSetLength(wdlen); + return; + } + + // have to grow vector and initialize to zero + + rep.SetLength(wdlen); + + wdlen = rep.MaxLength(); // careful! rep.MaxLength() may exceed the + // old value of wdlen...this is due to + // the awkward semantics of WordVector. + + _ntl_ulong *x = rep.elts(); + + long i; + for (i = alloc; i < wdlen; i++) + x[i] = 0; + + _len = n; + _maxlen = (n << 1); + +} + +void vec_GF2::SetLength(long n, GF2 a) +{ + long old_len = length(); + SetLength(n); + + if (!IsZero(a) && old_len < n) { + long i; + for (i = old_len; i < n; i++) put(i, a); + } +} + + +vec_GF2& vec_GF2::operator=(const vec_GF2& a) +{ + if (this == &a) return *this; + + long n = a.length(); + + SetLength(n); + + long wdlen = (n+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; + + _ntl_ulong *x = rep.elts(); + const _ntl_ulong *y = a.rep.elts(); + + long i; + for (i = 0; i < wdlen; i++) + x[i] = y[i]; + + return *this; +} + +void vec_GF2::kill() +{ + if (fixed()) LogicError("can't kill this vec_GF2"); + rep.kill(); + _len = _maxlen = 0; +} + + +void vec_GF2::SetMaxLength(long n) +{ + long oldlen = length(); + if (n > oldlen) { + SetLength(n); + SetLength(oldlen); + } +} + +void vec_GF2::FixLength(long n) +{ + if (MaxLength() > 0 || fixed()) LogicError("can't fix this vector"); + + SetLength(n); + _maxlen |= 1; +} + +void vec_GF2::FixAtCurrentLength() +{ + if (fixed()) return; + if (length() != MaxLength()) + LogicError("FixAtCurrentLength: can't fix this vector"); + + _maxlen |= 1; +} + + +const GF2 vec_GF2::get(long i) const +{ + const vec_GF2& v = *this; + + if (i < 0 || i >= v.length()) + LogicError("vec_GF2: subscript out of range"); + + long q = i/NTL_BITS_PER_LONG; + long p = i - q*NTL_BITS_PER_LONG; + + if (v.rep[q] & (1UL << p)) + return to_GF2(1); + else + return to_GF2(0); +} + +ref_GF2 vec_GF2::operator[](long i) +{ + vec_GF2& v = *this; + + if (i < 0 || i >= v.length()) + LogicError("vec_GF2: subscript out of range"); + + long q = i/NTL_BITS_PER_LONG; + long p = i - q*NTL_BITS_PER_LONG; + return ref_GF2(INIT_LOOP_HOLE, &v.rep[q], p); +} + + + +static +void SetBit(vec_GF2& v, long i) +{ + if (i < 0 || i >= v.length()) + LogicError("vec_GF2: subscript out of range"); + + long q = i/NTL_BITS_PER_LONG; + long p = i - q*NTL_BITS_PER_LONG; + + v.rep[q] |= (1UL << p); +} + +static +void ClearBit(vec_GF2& v, long i) +{ + if (i < 0 || i >= v.length()) + LogicError("vec_GF2: subscript out of range"); + + long q = i/NTL_BITS_PER_LONG; + long p = i - q*NTL_BITS_PER_LONG; + + v.rep[q] &= ~(1UL << p); +} + +void vec_GF2::put(long i, GF2 a) +{ + if (a == 1) + SetBit(*this, i); + else + ClearBit(*this, i); +} + +void vec_GF2::swap(vec_GF2& y) +{ + long xf = fixed(); + long yf = y.fixed(); + + if (xf != yf || (xf && length() != y.length())) + LogicError("swap: can't swap these vec_GF2s"); + + rep.swap(y.rep); + _ntl_swap(_len, y._len); + _ntl_swap(_maxlen, y._maxlen); +} + + +void vec_GF2::append(GF2 a) +{ + long n = length(); + SetLength(n+1); + put(n, a); +} + +void vec_GF2::append(const vec_GF2& a) +{ + long a_len = a.length(); + long x_len = length(); + + if (a_len == 0) return; + if (x_len == 0) { + *this = a; + return; + } + + + SetLength(x_len + a_len); + // new bits are guaranteed zero + + + ShiftAdd(rep.elts(), a.rep.elts(), a.rep.length(), x_len); +} + + +long operator==(const vec_GF2& a, const vec_GF2& b) +{ + return a.length() == b.length() && a.rep == b.rep; +} + + + + + +istream & operator>>(istream& s, vec_GF2& a) +{ + NTL_ZZRegister(ival); + + long c; + if (!s) NTL_INPUT_ERROR(s, "bad vec_GF2 input"); + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + if (c != '[') { + NTL_INPUT_ERROR(s, "bad vec_GF2 input"); + } + + vec_GF2 ibuf; + + ibuf.SetLength(0); + + s.get(); + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + while (c != ']' && c != EOF) { + if (!(s >> ival)) NTL_INPUT_ERROR(s, "bad vec_GF2 input"); + append(ibuf, to_GF2(ival)); + + c = s.peek(); + + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + } + + if (c == EOF) NTL_INPUT_ERROR(s, "bad vec_GF2 input"); + s.get(); + + a = ibuf; + return s; +} + + +ostream& operator<<(ostream& s, const vec_GF2& a) +{ + long i, n; + GF2 c; + + n = a.length(); + + s << '['; + + for (i = 0; i < n; i++) { + c = a.get(i); + if (c == 0) + s << "0"; + else + s << "1"; + if (i < n-1) s << " "; + } + + s << ']'; + + return s; +} + +// math operations: + +void mul(vec_GF2& x, const vec_GF2& a, GF2 b) +{ + x = a; + if (b == 0) + clear(x); +} + +void add(vec_GF2& x, const vec_GF2& a, const vec_GF2& b) +{ + long blen = a.length(); + + if (b.length() != blen) LogicError("vec_GF2 add: length mismatch"); + + x.SetLength(blen); + + long wlen = a.rep.length(); + long i; + + _ntl_ulong *xp = x.rep.elts(); + const _ntl_ulong *ap = a.rep.elts(); + const _ntl_ulong *bp = b.rep.elts(); + + for (i = 0; i < wlen; i++) + xp[i] = ap[i] ^ bp[i]; +} + +void clear(vec_GF2& x) +{ + long wlen = x.rep.length(); + long i; + _ntl_ulong *xp = x.rep.elts(); + + for (i = 0; i < wlen; i++) + xp[i] = 0; +} + + +long IsZero(const vec_GF2& x) +{ + long wlen = x.rep.length(); + long i; + const _ntl_ulong *xp = x.rep.elts(); + + for (i = 0; i < wlen; i++) + if (xp[i] != 0) return 0; + + return 1; +} + +vec_GF2 operator+(const vec_GF2& a, const vec_GF2& b) +{ + vec_GF2 res; + add(res, a, b); + NTL_OPT_RETURN(vec_GF2, res); +} + + +vec_GF2 operator-(const vec_GF2& a, const vec_GF2& b) +{ + vec_GF2 res; + add(res, a, b); + NTL_OPT_RETURN(vec_GF2, res); +} + +static +void ShiftToHigh(vec_GF2& x, const vec_GF2& a, long n) +// assumes 0 <= n < a.length() + +{ + long l = a.length(); + + x.SetLength(l); + + _ntl_ulong *xp = x.rep.elts(); + const _ntl_ulong *ap = a.rep.elts(); + + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + long sa = a.rep.length(); + + long i; + + if (bn == 0) { + for (i = sa-1; i >= wn; i--) + xp[i] = ap[i-wn]; + for (i = wn-1; i >= 0; i--) + xp[i] = 0; + } + else { + for (i = sa-1; i >= wn+1; i--) + xp[i] = (ap[i-wn] << bn) | (ap[i-wn-1] >> (NTL_BITS_PER_LONG-bn)); + xp[wn] = ap[0] << bn; + for (i = wn-1; i >= 0; i--) + xp[i] = 0; + } + + long p = l % NTL_BITS_PER_LONG; + + if (p != 0) + xp[sa-1] &= (1UL << p) - 1UL; + +} + +static +void ShiftToLow(vec_GF2& x, const vec_GF2& a, long n) +// assumes 0 <= n < a.length() + +{ + long l = a.length(); + + x.SetLength(l); + + _ntl_ulong *xp = x.rep.elts(); + const _ntl_ulong *ap = a.rep.elts(); + + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + long sa = a.rep.length(); + + long i; + + if (bn == 0) { + for (i = 0; i < sa-wn; i++) + xp[i] = ap[i+wn]; + } + else { + for (i = 0; i < sa-wn-1; i++) + xp[i] = (ap[i+wn] >> bn) | (ap[i+wn+1] << (NTL_BITS_PER_LONG - bn)); + + xp[sa-wn-1] = ap[sa-1] >> bn; + } + + for (i = sa-wn; i < sa; i++) + xp[i] = 0; +} + + + +void shift(vec_GF2& x, const vec_GF2& a, long n) +{ + long l = a.length(); + + if (n >= l || n <= -l) { + x.SetLength(l); + clear(x); + } + else if (n < 0) + ShiftToLow(x, a, -n); // |n| < l, so -n won't overflow! + else + ShiftToHigh(x, a, n); +} + + + + + +// This code is simply canibalized from GF2X.c... +// so much for "code re-use" and "modularity" + +static const _ntl_ulong revtab[256] = { + +0UL, 128UL, 64UL, 192UL, 32UL, 160UL, 96UL, 224UL, 16UL, 144UL, +80UL, 208UL, 48UL, 176UL, 112UL, 240UL, 8UL, 136UL, 72UL, 200UL, +40UL, 168UL, 104UL, 232UL, 24UL, 152UL, 88UL, 216UL, 56UL, 184UL, +120UL, 248UL, 4UL, 132UL, 68UL, 196UL, 36UL, 164UL, 100UL, 228UL, +20UL, 148UL, 84UL, 212UL, 52UL, 180UL, 116UL, 244UL, 12UL, 140UL, +76UL, 204UL, 44UL, 172UL, 108UL, 236UL, 28UL, 156UL, 92UL, 220UL, +60UL, 188UL, 124UL, 252UL, 2UL, 130UL, 66UL, 194UL, 34UL, 162UL, +98UL, 226UL, 18UL, 146UL, 82UL, 210UL, 50UL, 178UL, 114UL, 242UL, +10UL, 138UL, 74UL, 202UL, 42UL, 170UL, 106UL, 234UL, 26UL, 154UL, +90UL, 218UL, 58UL, 186UL, 122UL, 250UL, 6UL, 134UL, 70UL, 198UL, +38UL, 166UL, 102UL, 230UL, 22UL, 150UL, 86UL, 214UL, 54UL, 182UL, +118UL, 246UL, 14UL, 142UL, 78UL, 206UL, 46UL, 174UL, 110UL, 238UL, +30UL, 158UL, 94UL, 222UL, 62UL, 190UL, 126UL, 254UL, 1UL, 129UL, +65UL, 193UL, 33UL, 161UL, 97UL, 225UL, 17UL, 145UL, 81UL, 209UL, +49UL, 177UL, 113UL, 241UL, 9UL, 137UL, 73UL, 201UL, 41UL, 169UL, +105UL, 233UL, 25UL, 153UL, 89UL, 217UL, 57UL, 185UL, 121UL, 249UL, +5UL, 133UL, 69UL, 197UL, 37UL, 165UL, 101UL, 229UL, 21UL, 149UL, +85UL, 213UL, 53UL, 181UL, 117UL, 245UL, 13UL, 141UL, 77UL, 205UL, +45UL, 173UL, 109UL, 237UL, 29UL, 157UL, 93UL, 221UL, 61UL, 189UL, +125UL, 253UL, 3UL, 131UL, 67UL, 195UL, 35UL, 163UL, 99UL, 227UL, +19UL, 147UL, 83UL, 211UL, 51UL, 179UL, 115UL, 243UL, 11UL, 139UL, +75UL, 203UL, 43UL, 171UL, 107UL, 235UL, 27UL, 155UL, 91UL, 219UL, +59UL, 187UL, 123UL, 251UL, 7UL, 135UL, 71UL, 199UL, 39UL, 167UL, +103UL, 231UL, 23UL, 151UL, 87UL, 215UL, 55UL, 183UL, 119UL, 247UL, +15UL, 143UL, 79UL, 207UL, 47UL, 175UL, 111UL, 239UL, 31UL, 159UL, +95UL, 223UL, 63UL, 191UL, 127UL, 255UL }; + +static inline +_ntl_ulong rev1(_ntl_ulong a) +{ + return NTL_BB_REV_CODE; +} + + + +void reverse(vec_GF2& c, const vec_GF2& a) +// c = reverse of a + +{ + long n = a.length(); + + c = a; + + if (n <= 0) { + return; + } + + long wn = n/NTL_BITS_PER_LONG; + long bn = n - wn*NTL_BITS_PER_LONG; + + if (bn != 0) { + wn++; + bn = NTL_BITS_PER_LONG - bn; + } + + _ntl_ulong *cp = c.rep.elts(); + + long i; + + if (bn != 0) { + for (i = wn-1; i >= 1; i--) + cp[i] = (cp[i] << bn) | (cp[i-1] >> (NTL_BITS_PER_LONG-bn)); + cp[0] = cp[0] << bn; + } + + for (i = 0; i < wn/2; i++) { + _ntl_ulong t; t = cp[i]; cp[i] = cp[wn-1-i]; cp[wn-1-i] = t; + } + + for (i = 0; i < wn; i++) + cp[i] = rev1(cp[i]); +} + +static +long weight1(_ntl_ulong a) +{ + long res = 0; + while (a) { + if (a & 1) res ++; + a >>= 1; + } + return res; +} + +long weight(const vec_GF2& a) +{ + long wlen = a.rep.length(); + long res = 0; + long i; + for (i = 0; i < wlen; i++) + res += weight1(a.rep[i]); + + return res; +} + +void random(vec_GF2& x, long n) +{ + if (n < 0) LogicError("random: bad arg"); + + x.SetLength(n); + + long wl = x.rep.length(); + long i; + + for (i = 0; i < wl-1; i++) { + x.rep[i] = RandomWord(); + } + + if (n > 0) { + long pos = n % NTL_BITS_PER_LONG; + if (pos == 0) pos = NTL_BITS_PER_LONG; + x.rep[wl-1] = RandomBits_ulong(pos); + } +} + + + +void VectorCopy(vec_GF2& x, const vec_GF2& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in VectorCopy"); + + long m = min(n, a.length()); + + x.SetLength(n); + + long wn = (n + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + long wm = (m + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; + + _ntl_ulong *xp = x.rep.elts(); + const _ntl_ulong *ap = a.rep.elts(); + + long i; + + for (i = 0; i < wm; i++) + xp[i] = ap[i]; + + for (i = wm; i < wn; i++) + xp[i] = 0; + + long p = n % NTL_BITS_PER_LONG; + if (p != 0) { + xp[wn-1] &= ((1UL << p) - 1UL); + } +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/vec_GF2E.c b/thirdparty/linux/ntl/src/vec_GF2E.c new file mode 100644 index 0000000000..ce2e33a43f --- /dev/null +++ b/thirdparty/linux/ntl/src/vec_GF2E.c @@ -0,0 +1,227 @@ + +#include + + +NTL_START_IMPL + +static +void BasicBlockConstruct(GF2E* x, long n, long d) +{ + long m, j; + + long i = 0; + + NTL_SCOPE(guard) { BlockDestroy(x, i); }; + + while (i < n) { + m = WV_BlockConstructAlloc(x[i]._GF2E__rep.xrep, d, n-i); + for (j = 1; j < m; j++) + WV_BlockConstructSet(x[i]._GF2E__rep.xrep, x[i+j]._GF2E__rep.xrep, j); + i += m; + } + + guard.relax(); +} + + +void BlockConstruct(GF2E* x, long n) +{ + if (n <= 0) return; + + if (!GF2EInfo) + LogicError("GF2E constructor called while modulus undefined"); + + long d = GF2E::WordLength(); + BasicBlockConstruct(x, n, d); +} + +void BlockConstructFromVec(GF2E* x, long n, const GF2E* y) +{ + if (n <= 0) return; + + long d = y->_GF2E__rep.xrep.MaxLength(); + BasicBlockConstruct(x, n, d); + + NTL_SCOPE(guard) { BlockDestroy(x, n); }; + + long i; + for (i = 0; i < n; i++) x[i] = y[i]; + + guard.relax(); +} + +void BlockConstructFromObj(GF2E* x, long n, const GF2E& y) +{ + if (n <= 0) return; + + if (!GF2EInfo) + LogicError("GF2E constructor called while modulus undefined"); + + long d = GF2E::WordLength(); + + BasicBlockConstruct(x, n, d); + + NTL_SCOPE(guard) { BlockDestroy(x, n); }; + + long i; + for (i = 0; i < n; i++) x[i] = y; + + guard.relax(); +} + + + + +void BlockDestroy(GF2E* x, long n) +{ + if (n <= 0) return; + + long i = 0; + long m; + + while (i < n) { + m = WV_BlockDestroy(x[i]._GF2E__rep.xrep); + i += m; + } +} + + + +void InnerProduct(GF2E& x, const vec_GF2E& a, const vec_GF2E& b) +{ + long n = min(a.length(), b.length()); + long i; + GF2X accum, t; + + clear(accum); + for (i = 0; i < n; i++) { + mul(t, rep(a[i]), rep(b[i])); + add(accum, accum, t); + } + + conv(x, accum); +} + +void InnerProduct(GF2E& x, const vec_GF2E& a, const vec_GF2E& b, + long offset) +{ + if (offset < 0) LogicError("InnerProduct: negative offset"); + if (NTL_OVERFLOW(offset, 1, 0)) ResourceError("InnerProduct: offset too big"); + + long n = min(a.length(), b.length()+offset); + long i; + GF2X accum, t; + + clear(accum); + for (i = offset; i < n; i++) { + mul(t, rep(a[i]), rep(b[i-offset])); + add(accum, accum, t); + } + + conv(x, accum); +} + +void mul(vec_GF2E& x, const vec_GF2E& a, const GF2E& b_in) +{ + GF2E b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void mul(vec_GF2E& x, const vec_GF2E& a, GF2 b) +{ + x = a; + if (b == 0) + clear(x); +} + + +void add(vec_GF2E& x, const vec_GF2E& a, const vec_GF2E& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector add: dimension mismatch"); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + add(x[i], a[i], b[i]); +} + + +void clear(vec_GF2E& x) +{ + long n = x.length(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + + +long IsZero(const vec_GF2E& a) +{ + long n = a.length(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +vec_GF2E operator+(const vec_GF2E& a, const vec_GF2E& b) +{ + vec_GF2E res; + add(res, a, b); + NTL_OPT_RETURN(vec_GF2E, res); +} + +vec_GF2E operator-(const vec_GF2E& a, const vec_GF2E& b) +{ + vec_GF2E res; + sub(res, a, b); + NTL_OPT_RETURN(vec_GF2E, res); +} + + +vec_GF2E operator-(const vec_GF2E& a) +{ + vec_GF2E res; + negate(res, a); + NTL_OPT_RETURN(vec_GF2E, res); +} + + +GF2E operator*(const vec_GF2E& a, const vec_GF2E& b) +{ + GF2E res; + InnerProduct(res, a, b); + return res; +} + + +void VectorCopy(vec_GF2E& x, const vec_GF2E& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in VectorCopy"); + + long m = min(n, a.length()); + + x.SetLength(n); + + long i; + + for (i = 0; i < m; i++) + x[i] = a[i]; + + for (i = m; i < n; i++) + clear(x[i]); +} + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/vec_RR.c b/thirdparty/linux/ntl/src/vec_RR.c new file mode 100644 index 0000000000..1a15bf4e87 --- /dev/null +++ b/thirdparty/linux/ntl/src/vec_RR.c @@ -0,0 +1,143 @@ + +#include + + +NTL_START_IMPL + + +void InnerProduct(RR& xx, const vec_RR& a, const vec_RR& b) +{ + RR t1, x; + + long n = min(a.length(), b.length()); + long i; + + clear(x); + for (i = 1; i <= n; i++) { + mul(t1, a(i), b(i)); + add(x, x, t1); + } + + xx = x; +} + +void mul(vec_RR& x, const vec_RR& a, const RR& b_in) +{ + RR b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void mul(vec_RR& x, const vec_RR& a, double b_in) +{ + NTL_TLS_LOCAL(RR, b); + conv(b, b_in); + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void add(vec_RR& x, const vec_RR& a, const vec_RR& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector add: dimension mismatch"); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + add(x[i], a[i], b[i]); +} + +void sub(vec_RR& x, const vec_RR& a, const vec_RR& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector sub: dimension mismatch"); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + sub(x[i], a[i], b[i]); +} + +void clear(vec_RR& x) +{ + long n = x.length(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + +void negate(vec_RR& x, const vec_RR& a) +{ + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + negate(x[i], a[i]); +} + + +long IsZero(const vec_RR& a) +{ + long n = a.length(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +vec_RR operator+(const vec_RR& a, const vec_RR& b) +{ + vec_RR res; + add(res, a, b); + NTL_OPT_RETURN(vec_RR, res); +} + +vec_RR operator-(const vec_RR& a, const vec_RR& b) +{ + vec_RR res; + sub(res, a, b); + NTL_OPT_RETURN(vec_RR, res); +} + + +vec_RR operator-(const vec_RR& a) +{ + vec_RR res; + negate(res, a); + NTL_OPT_RETURN(vec_RR, res); +} + +RR operator*(const vec_RR& a, const vec_RR& b) +{ + RR res; + InnerProduct(res, a, b); + return res; +} + +void VectorCopy(vec_RR& x, const vec_RR& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in VectorCopy"); + + long m = min(n, a.length()); + + x.SetLength(n); + + long i; + + for (i = 0; i < m; i++) + x[i] = a[i]; + + for (i = m; i < n; i++) + clear(x[i]); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/vec_ZZ.c b/thirdparty/linux/ntl/src/vec_ZZ.c new file mode 100644 index 0000000000..3232aa3ecd --- /dev/null +++ b/thirdparty/linux/ntl/src/vec_ZZ.c @@ -0,0 +1,144 @@ + +#include + +NTL_START_IMPL + + +void InnerProduct(ZZ& xx, const vec_ZZ& a, const vec_ZZ& b) +{ + ZZ t1, x; + + long n = min(a.length(), b.length()); + long i; + + clear(x); + for (i = 1; i <= n; i++) { + mul(t1, a(i), b(i)); + add(x, x, t1); + } + + xx = x; +} + +void mul(vec_ZZ& x, const vec_ZZ& a, const ZZ& b_in) +{ + ZZ b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void mul(vec_ZZ& x, const vec_ZZ& a, long b) +{ + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void add(vec_ZZ& x, const vec_ZZ& a, const vec_ZZ& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector add: dimension mismatch"); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + add(x[i], a[i], b[i]); +} + +void sub(vec_ZZ& x, const vec_ZZ& a, const vec_ZZ& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector sub: dimension mismatch"); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + sub(x[i], a[i], b[i]); +} + +void clear(vec_ZZ& x) +{ + long n = x.length(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + +void negate(vec_ZZ& x, const vec_ZZ& a) +{ + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + negate(x[i], a[i]); +} + + + + +long IsZero(const vec_ZZ& a) +{ + long n = a.length(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +vec_ZZ operator+(const vec_ZZ& a, const vec_ZZ& b) +{ + vec_ZZ res; + add(res, a, b); + NTL_OPT_RETURN(vec_ZZ, res); +} + +vec_ZZ operator-(const vec_ZZ& a, const vec_ZZ& b) +{ + vec_ZZ res; + sub(res, a, b); + NTL_OPT_RETURN(vec_ZZ, res); +} + + +vec_ZZ operator-(const vec_ZZ& a) +{ + vec_ZZ res; + negate(res, a); + NTL_OPT_RETURN(vec_ZZ, res); +} + + +ZZ operator*(const vec_ZZ& a, const vec_ZZ& b) +{ + ZZ res; + InnerProduct(res, a, b); + NTL_OPT_RETURN(ZZ, res); +} + +void VectorCopy(vec_ZZ& x, const vec_ZZ& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in VectorCopy"); + + long m = min(n, a.length()); + + x.SetLength(n); + + long i; + + for (i = 0; i < m; i++) + x[i] = a[i]; + + for (i = m; i < n; i++) + clear(x[i]); +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/vec_ZZ_p.c b/thirdparty/linux/ntl/src/vec_ZZ_p.c new file mode 100644 index 0000000000..df767a5101 --- /dev/null +++ b/thirdparty/linux/ntl/src/vec_ZZ_p.c @@ -0,0 +1,249 @@ + + +#include + +NTL_START_IMPL + +static +void BasicBlockConstruct(ZZ_p* x, long n, long d) +{ + long m, j; + + long i = 0; + + NTL_SCOPE(guard) { BlockDestroy(x, i); }; + + while (i < n) { + m = ZZ_BlockConstructAlloc(x[i]._ZZ_p__rep, d, n-i); + for (j = 1; j < m; j++) + ZZ_BlockConstructSet(x[i]._ZZ_p__rep, x[i+j]._ZZ_p__rep, j); + i += m; + } + + guard.relax(); +} + +void BlockConstruct(ZZ_p* x, long n) +{ + if (n <= 0) return; + + if (!ZZ_pInfo) + LogicError("ZZ_p constructor called while modulus undefined"); + + long d = ZZ_p::ModulusSize(); + + BasicBlockConstruct(x, n, d); +} + +void BlockConstructFromVec(ZZ_p* x, long n, const ZZ_p* y) +{ + if (n <= 0) return; + + long d = y->_ZZ_p__rep.MaxAlloc() - 1; + BasicBlockConstruct(x, n, d); + + NTL_SCOPE(guard) { BlockDestroy(x, n); }; + + long i; + for (i = 0; i < n; i++) x[i] = y[i]; + + guard.relax(); +} + +void BlockConstructFromObj(ZZ_p* x, long n, const ZZ_p& y) +{ + if (n <= 0) return; + + + if (!ZZ_pInfo) + LogicError("ZZ_p constructor called while modulus undefined"); + + long d = ZZ_p::ModulusSize(); + + BasicBlockConstruct(x, n, d); + + NTL_SCOPE(guard) { BlockDestroy(x, n); }; + + long i; + for (i = 0; i < n; i++) x[i] = y; + + guard.relax(); +} + + +void BlockDestroy(ZZ_p* x, long n) +{ + if (n <= 0) return; + + long i = 0; + long m; + + while (i < n) { + m = ZZ_BlockDestroy(x[i]._ZZ_p__rep); + i += m; + } +} + + + +void InnerProduct(ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b) +{ + long n = min(a.length(), b.length()); + long i; + NTL_ZZRegister(accum); + NTL_ZZRegister(t); + + clear(accum); + for (i = 0; i < n; i++) { + mul(t, rep(a[i]), rep(b[i])); + add(accum, accum, t); + } + + conv(x, accum); +} + +void InnerProduct(ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b, + long offset) +{ + if (offset < 0) LogicError("InnerProduct: negative offset"); + if (NTL_OVERFLOW(offset, 1, 0)) + ResourceError("InnerProduct: offset too big"); + + long n = min(a.length(), b.length()+offset); + long i; + NTL_ZZRegister(accum); + NTL_ZZRegister(t); + + clear(accum); + for (i = offset; i < n; i++) { + mul(t, rep(a[i]), rep(b[i-offset])); + add(accum, accum, t); + } + + conv(x, accum); +} + +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, const ZZ_p& b_in) +{ + NTL_ZZ_pRegister(b); + b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void mul(vec_ZZ_p& x, const vec_ZZ_p& a, long b_in) +{ + NTL_ZZ_pRegister(b); + b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + + +void add(vec_ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector add: dimension mismatch"); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + add(x[i], a[i], b[i]); +} + +void sub(vec_ZZ_p& x, const vec_ZZ_p& a, const vec_ZZ_p& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector sub: dimension mismatch"); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + sub(x[i], a[i], b[i]); +} + +void clear(vec_ZZ_p& x) +{ + long n = x.length(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + +void negate(vec_ZZ_p& x, const vec_ZZ_p& a) +{ + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + negate(x[i], a[i]); +} + +long IsZero(const vec_ZZ_p& a) +{ + long n = a.length(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +vec_ZZ_p operator+(const vec_ZZ_p& a, const vec_ZZ_p& b) +{ + vec_ZZ_p res; + add(res, a, b); + NTL_OPT_RETURN(vec_ZZ_p, res); +} + +vec_ZZ_p operator-(const vec_ZZ_p& a, const vec_ZZ_p& b) +{ + vec_ZZ_p res; + sub(res, a, b); + NTL_OPT_RETURN(vec_ZZ_p, res); +} + + +vec_ZZ_p operator-(const vec_ZZ_p& a) +{ + vec_ZZ_p res; + negate(res, a); + NTL_OPT_RETURN(vec_ZZ_p, res); +} + + +ZZ_p operator*(const vec_ZZ_p& a, const vec_ZZ_p& b) +{ + ZZ_p res; + InnerProduct(res, a, b); + NTL_OPT_RETURN(ZZ_p, res); +} + + +void VectorCopy(vec_ZZ_p& x, const vec_ZZ_p& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in VectorCopy"); + + long m = min(n, a.length()); + + x.SetLength(n); + + long i; + + for (i = 0; i < m; i++) + x[i] = a[i]; + + for (i = m; i < n; i++) + clear(x[i]); +} + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/vec_ZZ_pE.c b/thirdparty/linux/ntl/src/vec_ZZ_pE.c new file mode 100644 index 0000000000..71a3e5049a --- /dev/null +++ b/thirdparty/linux/ntl/src/vec_ZZ_pE.c @@ -0,0 +1,179 @@ + +#include + + +NTL_START_IMPL + +void InnerProduct(ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b) +{ + long n = min(a.length(), b.length()); + long i; + ZZ_pX accum, t; + + clear(accum); + for (i = 0; i < n; i++) { + mul(t, rep(a[i]), rep(b[i])); + add(accum, accum, t); + } + + conv(x, accum); +} + +void InnerProduct(ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b, + long offset) +{ + if (offset < 0) LogicError("InnerProduct: negative offset"); + if (NTL_OVERFLOW(offset, 1, 0)) ResourceError("InnerProduct: offset too big"); + + long n = min(a.length(), b.length()+offset); + long i; + ZZ_pX accum, t; + + clear(accum); + for (i = offset; i < n; i++) { + mul(t, rep(a[i]), rep(b[i-offset])); + add(accum, accum, t); + } + + conv(x, accum); +} + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_pE& b_in) +{ + ZZ_pE b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, const ZZ_p& b_in) +{ + NTL_ZZ_pRegister(b); + b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void mul(vec_ZZ_pE& x, const vec_ZZ_pE& a, long b_in) +{ + NTL_ZZ_pRegister(b); + b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + + +void add(vec_ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector add: dimension mismatch"); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + add(x[i], a[i], b[i]); +} + +void sub(vec_ZZ_pE& x, const vec_ZZ_pE& a, const vec_ZZ_pE& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector sub: dimension mismatch"); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + sub(x[i], a[i], b[i]); +} + +void negate(vec_ZZ_pE& x, const vec_ZZ_pE& a) +{ + long n = a.length(); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + negate(x[i], a[i]); +} + + +void clear(vec_ZZ_pE& x) +{ + long n = x.length(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + + +long IsZero(const vec_ZZ_pE& a) +{ + long n = a.length(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +vec_ZZ_pE operator+(const vec_ZZ_pE& a, const vec_ZZ_pE& b) +{ + vec_ZZ_pE res; + add(res, a, b); + NTL_OPT_RETURN(vec_ZZ_pE, res); +} + +vec_ZZ_pE operator-(const vec_ZZ_pE& a, const vec_ZZ_pE& b) +{ + vec_ZZ_pE res; + sub(res, a, b); + NTL_OPT_RETURN(vec_ZZ_pE, res); +} + + +vec_ZZ_pE operator-(const vec_ZZ_pE& a) +{ + vec_ZZ_pE res; + negate(res, a); + NTL_OPT_RETURN(vec_ZZ_pE, res); +} + + +ZZ_pE operator*(const vec_ZZ_pE& a, const vec_ZZ_pE& b) +{ + ZZ_pE res; + InnerProduct(res, a, b); + return res; +} + +void VectorCopy(vec_ZZ_pE& x, const vec_ZZ_pE& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in VectorCopy"); + + long m = min(n, a.length()); + + x.SetLength(n); + + long i; + + for (i = 0; i < m; i++) + x[i] = a[i]; + + for (i = m; i < n; i++) + clear(x[i]); +} + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/vec_lzz_p.c b/thirdparty/linux/ntl/src/vec_lzz_p.c new file mode 100644 index 0000000000..7d39f34367 --- /dev/null +++ b/thirdparty/linux/ntl/src/vec_lzz_p.c @@ -0,0 +1,316 @@ + +#include + +NTL_START_IMPL + + +// NOTE: the signature for this is in lzz_p.h +void conv(vec_zz_p& x, const vec_ZZ& a) +{ + long i, n; + + n = a.length(); + x.SetLength(n); + + VectorConv(n, x.elts(), a.elts()); +} + +// NOTE: the signature for this is in lzz_p.h +void conv(vec_zz_p& x, const Vec& a) +{ + long i, n; + + n = a.length(); + x.SetLength(n); + + VectorConv(n, x.elts(), a.elts()); +} + + + + +void InnerProduct(zz_p& x, const vec_zz_p& a, const vec_zz_p& b) +{ + long n = min(a.length(), b.length()); + long i; + + long accum, t; + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + const zz_p *ap = a.elts(); + const zz_p *bp = b.elts(); + + accum = 0; + for (i = 0; i < n; i++) { + t = MulMod(rep(ap[i]), rep(bp[i]), p, pinv); + accum = AddMod(accum, t, p); + } + + x.LoopHole() = accum; +} + +void InnerProduct(zz_p& x, const vec_zz_p& a, const vec_zz_p& b, + long offset) +{ + if (offset < 0) LogicError("InnerProduct: negative offset"); + if (NTL_OVERFLOW(offset, 1, 0)) ResourceError("InnerProduct: offset too big"); + + long n = min(a.length(), b.length()+offset); + long i; + + long accum, t; + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + + + const zz_p *ap = a.elts(); + const zz_p *bp = b.elts(); + + accum = 0; + for (i = offset; i < n; i++) { + t = MulMod(rep(ap[i]), rep(bp[i-offset]), p, pinv); + accum = AddMod(accum, t, p); + } + + x.LoopHole() = accum; +} + +long CRT(vec_ZZ& gg, ZZ& a, const vec_zz_p& G) +{ + long n = gg.length(); + if (G.length() != n) LogicError("CRT: vector length mismatch"); + + long p = zz_p::modulus(); + + ZZ new_a; + mul(new_a, a, p); + + long a_inv; + a_inv = rem(a, p); + a_inv = InvMod(a_inv, p); + + long p1; + p1 = p >> 1; + + ZZ a1; + RightShift(a1, a, 1); + + long p_odd = (p & 1); + + long modified = 0; + + long h; + + ZZ g; + long i; + for (i = 0; i < n; i++) { + if (!CRTInRange(gg[i], a)) { + modified = 1; + rem(g, gg[i], a); + if (g > a1) sub(g, g, a); + } + else + g = gg[i]; + + h = rem(g, p); + h = SubMod(rep(G[i]), h, p); + h = MulMod(h, a_inv, p); + if (h > p1) + h = h - p; + + if (h != 0) { + modified = 1; + + if (!p_odd && g > 0 && (h == p1)) + MulSubFrom(g, a, h); + else + MulAddTo(g, a, h); + } + + gg[i] = g; + } + + a = new_a; + + return modified; +} + + + +void mul(vec_zz_p& x, const vec_zz_p& a, zz_p b) +{ + long n = a.length(); + x.SetLength(n); + + long i; + + if (n <= 1) { + + for (i = 0; i < n; i++) + mul(x[i], a[i], b); + + } + else { + + long p = zz_p::modulus(); + mulmod_t pinv = zz_p::ModulusInverse(); + long bb = rep(b); + mulmod_precon_t bpinv = PrepMulModPrecon(bb, p, pinv); + + + const zz_p *ap = a.elts(); + zz_p *xp = x.elts(); + + for (i = 0; i < n; i++) + xp[i].LoopHole() = MulModPrecon(rep(ap[i]), bb, p, bpinv); + + } +} + +void mul(vec_zz_p& x, const vec_zz_p& a, long b_in) +{ + zz_p b; + b = b_in; + mul(x, a, b); +} + + + +void add(vec_zz_p& x, const vec_zz_p& a, const vec_zz_p& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector add: dimension mismatch"); + + long p = zz_p::modulus(); + + x.SetLength(n); + + const zz_p *ap = a.elts(); + const zz_p *bp = b.elts(); + zz_p *xp = x.elts(); + + long i; + for (i = 0; i < n; i++) + xp[i].LoopHole() = AddMod(rep(ap[i]), rep(bp[i]), p); +} + +void sub(vec_zz_p& x, const vec_zz_p& a, const vec_zz_p& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector sub: dimension mismatch"); + + long p = zz_p::modulus(); + + x.SetLength(n); + + + const zz_p *ap = a.elts(); + const zz_p *bp = b.elts(); + zz_p *xp = x.elts(); + + long i; + for (i = 0; i < n; i++) + xp[i].LoopHole() = SubMod(rep(ap[i]), rep(bp[i]), p); +} + +void clear(vec_zz_p& x) +{ + long n = x.length(); + + + zz_p *xp = x.elts(); + + long i; + for (i = 0; i < n; i++) + clear(xp[i]); +} + +void negate(vec_zz_p& x, const vec_zz_p& a) +{ + long n = a.length(); + long p = zz_p::modulus(); + + x.SetLength(n); + + + const zz_p *ap = a.elts(); + zz_p *xp = x.elts(); + + + long i; + for (i = 0; i < n; i++) + xp[i].LoopHole() = NegateMod(rep(ap[i]), p); +} + + +long IsZero(const vec_zz_p& a) +{ + long n = a.length(); + + + const zz_p *ap = a.elts(); + + long i; + for (i = 0; i < n; i++) + if (!IsZero(ap[i])) + return 0; + + return 1; +} + +vec_zz_p operator+(const vec_zz_p& a, const vec_zz_p& b) +{ + vec_zz_p res; + add(res, a, b); + NTL_OPT_RETURN(vec_zz_p, res); +} + +vec_zz_p operator-(const vec_zz_p& a, const vec_zz_p& b) +{ + vec_zz_p res; + sub(res, a, b); + NTL_OPT_RETURN(vec_zz_p, res); +} + + +vec_zz_p operator-(const vec_zz_p& a) +{ + vec_zz_p res; + negate(res, a); + NTL_OPT_RETURN(vec_zz_p, res); +} + + +zz_p operator*(const vec_zz_p& a, const vec_zz_p& b) +{ + zz_p res; + InnerProduct(res, a, b); + return res; +} + + +void VectorCopy(vec_zz_p& x, const vec_zz_p& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in VectorCopy"); + + long m = min(n, a.length()); + + x.SetLength(n); + + + const zz_p *ap = a.elts(); + zz_p *xp = x.elts(); + + + long i; + + for (i = 0; i < m; i++) + xp[i] = ap[i]; + + for (i = m; i < n; i++) + clear(xp[i]); +} + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/vec_lzz_pE.c b/thirdparty/linux/ntl/src/vec_lzz_pE.c new file mode 100644 index 0000000000..005fa41f72 --- /dev/null +++ b/thirdparty/linux/ntl/src/vec_lzz_pE.c @@ -0,0 +1,178 @@ + +#include + +NTL_START_IMPL + +void InnerProduct(zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b) +{ + long n = min(a.length(), b.length()); + long i; + zz_pX accum, t; + + clear(accum); + for (i = 0; i < n; i++) { + mul(t, rep(a[i]), rep(b[i])); + add(accum, accum, t); + } + + conv(x, accum); +} + +void InnerProduct(zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b, + long offset) +{ + if (offset < 0) LogicError("InnerProduct: negative offset"); + if (NTL_OVERFLOW(offset, 1, 0)) ResourceError("InnerProduct: offset too big"); + + long n = min(a.length(), b.length()+offset); + long i; + zz_pX accum, t; + + clear(accum); + for (i = offset; i < n; i++) { + mul(t, rep(a[i]), rep(b[i-offset])); + add(accum, accum, t); + } + + conv(x, accum); +} + +void mul(vec_zz_pE& x, const vec_zz_pE& a, const zz_pE& b_in) +{ + zz_pE b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void mul(vec_zz_pE& x, const vec_zz_pE& a, const zz_p& b_in) +{ + NTL_zz_pRegister(b); + b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + +void mul(vec_zz_pE& x, const vec_zz_pE& a, long b_in) +{ + NTL_zz_pRegister(b); + b = b_in; + long n = a.length(); + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + mul(x[i], a[i], b); +} + + +void add(vec_zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector add: dimension mismatch"); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + add(x[i], a[i], b[i]); +} + +void sub(vec_zz_pE& x, const vec_zz_pE& a, const vec_zz_pE& b) +{ + long n = a.length(); + if (b.length() != n) LogicError("vector sub: dimension mismatch"); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + sub(x[i], a[i], b[i]); +} + +void negate(vec_zz_pE& x, const vec_zz_pE& a) +{ + long n = a.length(); + + x.SetLength(n); + long i; + for (i = 0; i < n; i++) + negate(x[i], a[i]); +} + + +void clear(vec_zz_pE& x) +{ + long n = x.length(); + long i; + for (i = 0; i < n; i++) + clear(x[i]); +} + + + +long IsZero(const vec_zz_pE& a) +{ + long n = a.length(); + long i; + + for (i = 0; i < n; i++) + if (!IsZero(a[i])) + return 0; + + return 1; +} + +vec_zz_pE operator+(const vec_zz_pE& a, const vec_zz_pE& b) +{ + vec_zz_pE res; + add(res, a, b); + NTL_OPT_RETURN(vec_zz_pE, res); +} + +vec_zz_pE operator-(const vec_zz_pE& a, const vec_zz_pE& b) +{ + vec_zz_pE res; + sub(res, a, b); + NTL_OPT_RETURN(vec_zz_pE, res); +} + + +vec_zz_pE operator-(const vec_zz_pE& a) +{ + vec_zz_pE res; + negate(res, a); + NTL_OPT_RETURN(vec_zz_pE, res); +} + + +zz_pE operator*(const vec_zz_pE& a, const vec_zz_pE& b) +{ + zz_pE res; + InnerProduct(res, a, b); + return res; +} + +void VectorCopy(vec_zz_pE& x, const vec_zz_pE& a, long n) +{ + if (n < 0) LogicError("VectorCopy: negative length"); + if (NTL_OVERFLOW(n, 1, 0)) ResourceError("overflow in VectorCopy"); + + long m = min(n, a.length()); + + x.SetLength(n); + + long i; + + for (i = 0; i < m; i++) + x[i] = a[i]; + + for (i = m; i < n; i++) + clear(x[i]); +} + + + +NTL_END_IMPL diff --git a/thirdparty/linux/ntl/src/xdouble.c b/thirdparty/linux/ntl/src/xdouble.c new file mode 100644 index 0000000000..0b90761a93 --- /dev/null +++ b/thirdparty/linux/ntl/src/xdouble.c @@ -0,0 +1,999 @@ + +#include +#include + + +#include + +NTL_START_IMPL + + + +NTL_CHEAP_THREAD_LOCAL +long xdouble::oprec = 10; + +void xdouble::SetOutputPrecision(long p) +{ + if (p < 1) p = 1; + + if (NTL_OVERFLOW(p, 1, 0)) + ResourceError("xdouble: output precision too big"); + + oprec = p; +} + +void xdouble::normalize() +{ + if (x == 0) + e = 0; + else if (x > 0) { + while (x < NTL_XD_HBOUND_INV) { x *= NTL_XD_BOUND; e--; } + while (x > NTL_XD_HBOUND) { x *= NTL_XD_BOUND_INV; e++; } + } + else { + while (x > -NTL_XD_HBOUND_INV) { x *= NTL_XD_BOUND; e--; } + while (x < -NTL_XD_HBOUND) { x *= NTL_XD_BOUND_INV; e++; } + } + + if (e >= NTL_OVFBND) + ResourceError("xdouble: overflow"); + + if (e <= -NTL_OVFBND) + ResourceError("xdouble: underflow"); +} + + + +xdouble to_xdouble(double a) +{ + if (a == 0 || a == 1 || (a > 0 && a >= NTL_XD_HBOUND_INV && a <= NTL_XD_HBOUND) + || (a < 0 && a <= -NTL_XD_HBOUND_INV && a >= -NTL_XD_HBOUND)) { + + return xdouble(a, 0); + + } + + if (!IsFinite(&a)) + ArithmeticError("double to xdouble conversion: non finite value"); + + xdouble z = xdouble(a, 0); + z.normalize(); + return z; +} + + +void conv(double& xx, const xdouble& a) +{ + double x; + long e; + + x = a.x; + e = a.e; + + while (e > 0) { x *= NTL_XD_BOUND; e--; } + while (e < 0) { x *= NTL_XD_BOUND_INV; e++; } + + xx = x; +} + + + + +xdouble operator+(const xdouble& a, const xdouble& b) +{ + xdouble z; + + if (a.x == 0) + return b; + + if (b.x == 0) + return a; + + + if (a.e == b.e) { + z.x = a.x + b.x; + z.e = a.e; + z.normalize(); + return z; + } + else if (a.e > b.e) { + if (a.e > b.e+1) + return a; + + z.x = a.x + b.x*NTL_XD_BOUND_INV; + z.e = a.e; + z.normalize(); + return z; + } + else { + if (b.e > a.e+1) + return b; + + z.x = a.x*NTL_XD_BOUND_INV + b.x; + z.e = b.e; + z.normalize(); + return z; + } +} + + +xdouble operator-(const xdouble& a, const xdouble& b) +{ + xdouble z; + + if (a.x == 0) + return -b; + + if (b.x == 0) + return a; + + if (a.e == b.e) { + z.x = a.x - b.x; + z.e = a.e; + z.normalize(); + return z; + } + else if (a.e > b.e) { + if (a.e > b.e+1) + return a; + + z.x = a.x - b.x*NTL_XD_BOUND_INV; + z.e = a.e; + z.normalize(); + return z; + } + else { + if (b.e > a.e+1) + return -b; + + z.x = a.x*NTL_XD_BOUND_INV - b.x; + z.e = b.e; + z.normalize(); + return z; + } +} + +xdouble operator-(const xdouble& a) +{ + xdouble z; + z.x = -a.x; + z.e = a.e; + return z; +} + +xdouble operator*(const xdouble& a, const xdouble& b) +{ + xdouble z; + + z.e = a.e + b.e; + z.x = a.x * b.x; + z.normalize(); + return z; +} + +xdouble operator/(const xdouble& a, const xdouble& b) +{ + xdouble z; + + if (b.x == 0) ArithmeticError("xdouble division by 0"); + + z.e = a.e - b.e; + z.x = a.x / b.x; + z.normalize(); + return z; +} + + + +long compare(const xdouble& a, const xdouble& b) +{ + xdouble z = a - b; + + if (z.x < 0) + return -1; + else if (z.x == 0) + return 0; + else + return 1; +} + +long sign(const xdouble& z) +{ + if (z.x < 0) + return -1; + else if (z.x == 0) + return 0; + else + return 1; +} + + + +xdouble trunc(const xdouble& a) +{ + if (a.x >= 0) + return floor(a); + else + return ceil(a); +} + + +xdouble floor(const xdouble& aa) +{ + xdouble z; + + xdouble a = aa; + ForceToMem(&a.x); + + if (a.e == 0) { + z.x = floor(a.x); + z.e = 0; + z.normalize(); + return z; + } + else if (a.e > 0) { + return a; + } + else { + if (a.x < 0) + return to_xdouble(-1); + else + return to_xdouble(0); + } +} + +xdouble ceil(const xdouble& aa) +{ + xdouble z; + + xdouble a = aa; + ForceToMem(&a.x); + + if (a.e == 0) { + z.x = ceil(a.x); + z.e = 0; + z.normalize(); + return z; + } + else if (a.e > 0) { + return a; + } + else { + if (a.x < 0) + return to_xdouble(0); + else + return to_xdouble(1); + } +} + +xdouble to_xdouble(const ZZ& a) +{ + RRPush push; + RR::SetPrecision(NTL_DOUBLE_PRECISION); + + NTL_TLS_LOCAL(RR, t); + conv(t, a); + + double x; + conv(x, t.mantissa()); + + xdouble y, z, res; + + conv(y, x); + power2(z, t.exponent()); + + res = y*z; + + return res; +} + +void conv(ZZ& x, const xdouble& a) +{ + xdouble b = floor(a); + + RRPush push; + RR::SetPrecision(NTL_DOUBLE_PRECISION); + + NTL_TLS_LOCAL(RR, t); + conv(t, b); + conv(x, t); +} + + +xdouble fabs(const xdouble& a) +{ + xdouble z; + + z.e = a.e; + z.x = fabs(a.x); + return z; +} + +xdouble sqrt(const xdouble& a) +{ + if (a == 0) + return to_xdouble(0); + + if (a < 0) + ArithmeticError("xdouble: sqrt of negative number"); + + xdouble t; + + if (a.e & 1) { + t.e = (a.e - 1)/2; + t.x = sqrt(a.x * NTL_XD_BOUND); + } + else { + t.e = a.e/2; + t.x = sqrt(a.x); + } + + t.normalize(); + + return t; +} + + +void power(xdouble& z, const xdouble& a, const ZZ& e) +{ + xdouble b, res; + + b = a; + + res = 1; + long n = NumBits(e); + long i; + + for (i = n-1; i >= 0; i--) { + res = res * res; + if (bit(e, i)) + res = res * b; + } + + if (sign(e) < 0) + z = 1/res; + else + z = res; +} + + + + +void power(xdouble& z, const xdouble& a, long e) +{ + NTL_ZZRegister(E); + E = e; + power(z, a, E); +} + + + + + +void power2(xdouble& z, long e) +{ + long hb = NTL_XD_HBOUND_LOG; + long b = 2*hb; + + long q, r; + + q = e/b; + r = e%b; + + while (r >= hb) { + r -= b; + q++; + } + + while (r < -hb) { + r += b; + q--; + } + + if (q >= NTL_OVFBND) + ResourceError("xdouble: overflow"); + + if (q <= -NTL_OVFBND) + ResourceError("xdouble: underflow"); + + double x = _ntl_ldexp(1.0, r); + + z.x = x; + z.e = q; +} + + +void MulAdd(xdouble& z, const xdouble& a, const xdouble& b, const xdouble& c) +// z = a + b*c +{ + double x; + long e; + + e = b.e + c.e; + x = b.x * c.x; + + if (x == 0) { + z = a; + return; + } + + if (a.x == 0) { + z.e = e; + z.x = x; + z.normalize(); + return; + } + + + if (a.e == e) { + z.x = a.x + x; + z.e = e; + z.normalize(); + return; + } + else if (a.e > e) { + if (a.e > e+1) { + z = a; + return; + } + + z.x = a.x + x*NTL_XD_BOUND_INV; + z.e = a.e; + z.normalize(); + return; + } + else { + if (e > a.e+1) { + z.x = x; + z.e = e; + z.normalize(); + return; + } + + z.x = a.x*NTL_XD_BOUND_INV + x; + z.e = e; + z.normalize(); + return; + } +} + +void MulSub(xdouble& z, const xdouble& a, const xdouble& b, const xdouble& c) +// z = a - b*c +{ + double x; + long e; + + e = b.e + c.e; + x = b.x * c.x; + + if (x == 0) { + z = a; + return; + } + + if (a.x == 0) { + z.e = e; + z.x = -x; + z.normalize(); + return; + } + + + if (a.e == e) { + z.x = a.x - x; + z.e = e; + z.normalize(); + return; + } + else if (a.e > e) { + if (a.e > e+1) { + z = a; + return; + } + + z.x = a.x - x*NTL_XD_BOUND_INV; + z.e = a.e; + z.normalize(); + return; + } + else { + if (e > a.e+1) { + z.x = -x; + z.e = e; + z.normalize(); + return; + } + + z.x = a.x*NTL_XD_BOUND_INV - x; + z.e = e; + z.normalize(); + return; + } +} + +double log(const xdouble& a) +{ + static const double LogBound = log(NTL_XD_BOUND); // GLOBAL (assumes C++11 thread-safe init) + if (a.x <= 0) { + ArithmeticError("log(xdouble): argument must be positive"); + } + + return log(a.x) + a.e*LogBound; +} + +xdouble xexp(double x) +{ + const double LogBound = log(NTL_XD_BOUND); + + double y = x/LogBound; + double iy = floor(y+0.5); + + if (iy >= NTL_OVFBND) + ResourceError("xdouble: overflow"); + + if (iy <= -NTL_OVFBND) + ResourceError("xdouble: underflow"); + + + double fy = y - iy; + + xdouble res; + res.e = long(iy); + res.x = exp(fy*LogBound); + res.normalize(); + return res; +} + +/************** input / output routines **************/ + + +void ComputeLn2(RR&); +void ComputeLn10(RR&); + +long ComputeMax10Power() +{ + RRPush push; + RR::SetPrecision(NTL_BITS_PER_LONG); + + RR ln2, ln10; + ComputeLn2(ln2); + ComputeLn10(ln10); + + long k = to_long( to_RR(NTL_OVFBND/2) * ln2 / ln10 ); + return k; +} + + +xdouble PowerOf10(const ZZ& e) +{ + static NTL_CHEAP_THREAD_LOCAL long init = 0; + static NTL_CHEAP_THREAD_LOCAL long k = 0; + + NTL_TLS_LOCAL(xdouble, v10k); + + if (!init) { + k = ComputeMax10Power(); + RRPush push; + RR::SetPrecision(NTL_DOUBLE_PRECISION); + v10k = to_xdouble(power(to_RR(10), k)); + init = 1; + } + + ZZ e1; + long neg; + + if (e < 0) { + e1 = -e; + neg = 1; + } + else { + e1 = e; + neg = 0; + } + + long r; + ZZ q; + + r = DivRem(q, e1, k); + + RRPush push; + RR::SetPrecision(NTL_DOUBLE_PRECISION); + xdouble x1 = to_xdouble(power(to_RR(10), r)); + + xdouble x2 = power(v10k, q); + xdouble x3 = x1*x2; + + if (neg) x3 = 1/x3; + + return x3; +} + + + + +ostream& operator<<(ostream& s, const xdouble& a) +{ + if (a == 0) { + s << "0"; + return s; + } + + RRPush push; + long temp_p = long(log(fabs(log(fabs(a))) + 1.0)/log(2.0)) + 10; + RR::SetPrecision(temp_p); + + RR ln2, ln10, log_2_10; + ComputeLn2(ln2); + ComputeLn10(ln10); + log_2_10 = ln10/ln2; + ZZ log_10_a = to_ZZ( + (to_RR(a.e)*to_RR(2*NTL_XD_HBOUND_LOG) + log(fabs(a.x))/log(2.0))/log_2_10); + + + xdouble b; + long neg; + + if (a < 0) { + b = -a; + neg = 1; + } + else { + b = a; + neg = 0; + } + + ZZ k = xdouble::OutputPrecision() - log_10_a; + + xdouble c, d; + + c = PowerOf10(to_ZZ(xdouble::OutputPrecision())); + d = PowerOf10(log_10_a); + + b = b / d; + b = b * c; + + while (b < c) { + b = b * 10.0; + k++; + } + + while (b >= c) { + b = b / 10.0; + k--; + } + + b = b + 0.5; + k = -k; + + ZZ B; + conv(B, b); + + long bp_len = xdouble::OutputPrecision()+10; + + UniqueArray bp_store; + bp_store.SetLength(bp_len); + char *bp = bp_store.get(); + + long len, i; + + len = 0; + do { + if (len >= bp_len) LogicError("xdouble output: buffer overflow"); + bp[len] = IntValToChar(DivRem(B, B, 10)); + len++; + } while (B > 0); + + for (i = 0; i < len/2; i++) { + char tmp; + tmp = bp[i]; + bp[i] = bp[len-1-i]; + bp[len-1-i] = tmp; + } + + i = len-1; + while (bp[i] == '0') i--; + + k += (len-1-i); + len = i+1; + + bp[len] = '\0'; + + if (k > 3 || k < -len - 3) { + // use scientific notation + + if (neg) s << "-"; + s << "0." << bp << "e" << (k + len); + } + else { + long kk = to_long(k); + + if (kk >= 0) { + if (neg) s << "-"; + s << bp; + for (i = 0; i < kk; i++) + s << "0"; + } + else if (kk <= -len) { + if (neg) s << "-"; + s << "0."; + for (i = 0; i < -len-kk; i++) + s << "0"; + s << bp; + } + else { + if (neg) s << "-"; + for (i = 0; i < len+kk; i++) + s << bp[i]; + + s << "."; + + for (i = len+kk; i < len; i++) + s << bp[i]; + } + } + + return s; +} + +istream& operator>>(istream& s, xdouble& x) +{ + long c; + long cval; + long sign; + ZZ a, b; + + if (!s) NTL_INPUT_ERROR(s, "bad xdouble input"); + + c = s.peek(); + while (IsWhiteSpace(c)) { + s.get(); + c = s.peek(); + } + + if (c == '-') { + sign = -1; + s.get(); + c = s.peek(); + } + else + sign = 1; + + long got1 = 0; + long got_dot = 0; + long got2 = 0; + + a = 0; + b = 1; + + cval = CharToIntVal(c); + + if (cval >= 0 && cval <= 9) { + got1 = 1; + + while (cval >= 0 && cval <= 9) { + mul(a, a, 10); + add(a, a, cval); + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + } + } + + if (c == '.') { + got_dot = 1; + + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + + if (cval >= 0 && cval <= 9) { + got2 = 1; + + while (cval >= 0 && cval <= 9) { + mul(a, a, 10); + add(a, a, cval); + mul(b, b, 10); + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + } + } + } + + if (got_dot && !got1 && !got2) NTL_INPUT_ERROR(s, "bad xdouble input"); + + ZZ e; + + long got_e = 0; + long e_sign; + + if (c == 'e' || c == 'E') { + got_e = 1; + + s.get(); + c = s.peek(); + + if (c == '-') { + e_sign = -1; + s.get(); + c = s.peek(); + } + else if (c == '+') { + e_sign = 1; + s.get(); + c = s.peek(); + } + else + e_sign = 1; + + cval = CharToIntVal(c); + + if (cval < 0 || cval > 9) NTL_INPUT_ERROR(s, "bad xdouble input"); + + e = 0; + while (cval >= 0 && cval <= 9) { + mul(e, e, 10); + add(e, e, cval); + s.get(); + c = s.peek(); + cval = CharToIntVal(c); + } + } + + if (!got1 && !got2 && !got_e) NTL_INPUT_ERROR(s, "bad xdouble input"); + + xdouble t1, t2, v; + + if (got1 || got2) { + conv(t1, a); + conv(t2, b); + v = t1/t2; + } + else + v = 1; + + if (sign < 0) + v = -v; + + if (got_e) { + if (e_sign < 0) negate(e, e); + t1 = PowerOf10(e); + v = v * t1; + } + + x = v; + return s; +} + + +xdouble to_xdouble(const char *s) +{ + long c; + long cval; + long sign; + ZZ a, b; + long i=0; + + if (!s) InputError("bad xdouble input"); + + c = s[i]; + while (IsWhiteSpace(c)) { + i++; + c = s[i]; + } + + if (c == '-') { + sign = -1; + i++; + c = s[i]; + } + else + sign = 1; + + long got1 = 0; + long got_dot = 0; + long got2 = 0; + + a = 0; + b = 1; + + cval = CharToIntVal(c); + + if (cval >= 0 && cval <= 9) { + got1 = 1; + + while (cval >= 0 && cval <= 9) { + mul(a, a, 10); + add(a, a, cval); + i++; + c = s[i]; + cval = CharToIntVal(c); + } + } + + if (c == '.') { + got_dot = 1; + + i++; + c = s[i]; + cval = CharToIntVal(c); + + if (cval >= 0 && cval <= 9) { + got2 = 1; + + while (cval >= 0 && cval <= 9) { + mul(a, a, 10); + add(a, a, cval); + mul(b, b, 10); + i++; + c = s[i]; + cval = CharToIntVal(c); + } + } + } + + if (got_dot && !got1 && !got2) InputError("bad xdouble input"); + + ZZ e; + + long got_e = 0; + long e_sign; + + if (c == 'e' || c == 'E') { + got_e = 1; + + i++; + c = s[i]; + + if (c == '-') { + e_sign = -1; + i++; + c = s[i]; + } + else if (c == '+') { + e_sign = 1; + i++; + c = s[i]; + } + else + e_sign = 1; + + cval = CharToIntVal(c); + + if (cval < 0 || cval > 9) InputError("bad xdouble input"); + + e = 0; + while (cval >= 0 && cval <= 9) { + mul(e, e, 10); + add(e, e, cval); + i++; + c = s[i]; + cval = CharToIntVal(c); + } + } + + if (!got1 && !got2 && !got_e) InputError("bad xdouble input"); + + xdouble t1, t2, v; + + if (got1 || got2) { + conv(t1, a); + conv(t2, b); + v = t1/t2; + } + else + v = 1; + + if (sign < 0) + v = -v; + + if (got_e) { + if (e_sign < 0) negate(e, e); + t1 = PowerOf10(e); + v = v * t1; + } + + return v; +} + +NTL_END_IMPL