diff options
Diffstat (limited to 'lib/gmp/mini-gmp.c')
-rw-r--r-- | lib/gmp/mini-gmp.c | 1810 |
1 files changed, 1120 insertions, 690 deletions
diff --git a/lib/gmp/mini-gmp.c b/lib/gmp/mini-gmp.c index f3b43fbe8..e2e686123 100644 --- a/lib/gmp/mini-gmp.c +++ b/lib/gmp/mini-gmp.c @@ -2,9 +2,7 @@ Contributed to the GNU project by Niels Möller -Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, -2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 -Free Software Foundation, Inc. +Copyright 1991-1997, 1999-2019 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -41,11 +39,15 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "mini-gmp.h" - +#if !defined(MINI_GMP_DONT_USE_FLOAT_H) +#include <float.h> +#endif + + /* Macros */ #define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT) -#define GMP_LIMB_MAX (~ (mp_limb_t) 0) +#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0) #define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1)) #define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2)) @@ -60,18 +62,34 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define GMP_MIN(a, b) ((a) < (b) ? (a) : (b)) #define GMP_MAX(a, b) ((a) > (b) ? (a) : (b)) +#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b))) + +#if defined(DBL_MANT_DIG) && FLT_RADIX == 2 +#define GMP_DBL_MANT_BITS DBL_MANT_DIG +#else +#define GMP_DBL_MANT_BITS (53) +#endif + +/* Return non-zero if xp,xsize and yp,ysize overlap. + If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no + overlap. If both these are false, there's an overlap. */ +#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \ + ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) + #define gmp_assert_nocarry(x) do { \ - mp_limb_t __cy = x; \ + mp_limb_t __cy = (x); \ assert (__cy == 0); \ } while (0) #define gmp_clz(count, x) do { \ mp_limb_t __clz_x = (x); \ - unsigned __clz_c; \ - for (__clz_c = 0; \ - (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \ - __clz_c += 8) \ - __clz_x <<= 8; \ + unsigned __clz_c = 0; \ + int LOCAL_SHIFT_BITS = 8; \ + if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \ + for (; \ + (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \ + __clz_c += 8) \ + { __clz_x <<= LOCAL_SHIFT_BITS; } \ for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \ __clz_x <<= 1; \ (count) = __clz_c; \ @@ -102,27 +120,42 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define gmp_umul_ppmm(w1, w0, u, v) \ do { \ - mp_limb_t __x0, __x1, __x2, __x3; \ - unsigned __ul, __vl, __uh, __vh; \ - mp_limb_t __u = (u), __v = (v); \ + int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \ + if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \ + { \ + unsigned int __ww = (unsigned int) (u) * (v); \ + w0 = (mp_limb_t) __ww; \ + w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ + } \ + else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \ + { \ + unsigned long int __ww = (unsigned long int) (u) * (v); \ + w0 = (mp_limb_t) __ww; \ + w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ + } \ + else { \ + mp_limb_t __x0, __x1, __x2, __x3; \ + unsigned __ul, __vl, __uh, __vh; \ + mp_limb_t __u = (u), __v = (v); \ \ - __ul = __u & GMP_LLIMB_MASK; \ - __uh = __u >> (GMP_LIMB_BITS / 2); \ - __vl = __v & GMP_LLIMB_MASK; \ - __vh = __v >> (GMP_LIMB_BITS / 2); \ + __ul = __u & GMP_LLIMB_MASK; \ + __uh = __u >> (GMP_LIMB_BITS / 2); \ + __vl = __v & GMP_LLIMB_MASK; \ + __vh = __v >> (GMP_LIMB_BITS / 2); \ \ - __x0 = (mp_limb_t) __ul * __vl; \ - __x1 = (mp_limb_t) __ul * __vh; \ - __x2 = (mp_limb_t) __uh * __vl; \ - __x3 = (mp_limb_t) __uh * __vh; \ + __x0 = (mp_limb_t) __ul * __vl; \ + __x1 = (mp_limb_t) __ul * __vh; \ + __x2 = (mp_limb_t) __uh * __vl; \ + __x3 = (mp_limb_t) __uh * __vh; \ \ - __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \ - __x1 += __x2; /* but this indeed can */ \ - if (__x1 < __x2) /* did we get it? */ \ - __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \ + __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \ \ - (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \ - (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \ + (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \ + (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \ + } \ } while (0) #define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ @@ -222,11 +255,13 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ } while (0) #define MPZ_SRCPTR_SWAP(x, y) \ do { \ - mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ + mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ (x) = (y); \ (y) = __mpz_srcptr_swap__tmp; \ } while (0) +const int mp_bits_per_limb = GMP_LIMB_BITS; + /* Memory allocation and other helper functions. */ static void @@ -251,20 +286,20 @@ gmp_default_alloc (size_t size) } static void * -gmp_default_realloc (void *old, size_t old_size, size_t new_size) +gmp_default_realloc (void *old, size_t unused_old_size, size_t new_size) { - mp_ptr p; + void * p; p = realloc (old, new_size); if (!p) - gmp_die("gmp_default_realoc: Virtual memory exhausted."); + gmp_die("gmp_default_realloc: Virtual memory exhausted."); return p; } static void -gmp_default_free (void *p, size_t size) +gmp_default_free (void *p, size_t unused_size) { free (p); } @@ -311,14 +346,14 @@ mp_set_memory_functions (void *(*alloc_func) (size_t), static mp_ptr gmp_xalloc_limbs (mp_size_t size) { - return gmp_xalloc (size * sizeof (mp_limb_t)); + return (mp_ptr) gmp_xalloc (size * sizeof (mp_limb_t)); } static mp_ptr gmp_xrealloc_limbs (mp_ptr old, mp_size_t size) { assert (size > 0); - return (*gmp_reallocate_func) (old, 0, size * sizeof (mp_limb_t)); + return (mp_ptr) (*gmp_reallocate_func) (old, 0, size * sizeof (mp_limb_t)); } @@ -335,19 +370,17 @@ mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n) void mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n) { - while (n-- > 0) + while (--n >= 0) d[n] = s[n]; } int mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n) { - for (; n > 0; n--) + while (--n >= 0) { - if (ap[n-1] < bp[n-1]) - return -1; - else if (ap[n-1] > bp[n-1]) - return 1; + if (ap[n] != bp[n]) + return ap[n] > bp[n] ? 1 : -1; } return 0; } @@ -355,10 +388,8 @@ mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n) static int mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) { - if (an > bn) - return 1; - else if (an < bn) - return -1; + if (an != bn) + return an < bn ? -1 : 1; else return mpn_cmp (ap, bp, an); } @@ -366,12 +397,23 @@ mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) static mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n) { - for (; n > 0 && xp[n-1] == 0; n--) - ; + while (n > 0 && xp[n-1] == 0) + --n; return n; } -#define mpn_zero_p(xp, n) (mpn_normalized_size ((xp), (n)) == 0) +int +mpn_zero_p(mp_srcptr rp, mp_size_t n) +{ + return mpn_normalized_size (rp, n) == 0; +} + +void +mpn_zero (mp_ptr rp, mp_size_t n) +{ + while (--n >= 0) + rp[n] = 0; +} mp_limb_t mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) @@ -379,14 +421,16 @@ mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) mp_size_t i; assert (n > 0); - - for (i = 0; i < n; i++) + i = 0; + do { mp_limb_t r = ap[i] + b; /* Carry out */ b = (r < b); rp[i] = r; } + while (++i < n); + return b; } @@ -429,14 +473,17 @@ mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) assert (n > 0); - for (i = 0; i < n; i++) + i = 0; + do { mp_limb_t a = ap[i]; /* Carry out */ - mp_limb_t cy = a < b;; + mp_limb_t cy = a < b; rp[i] = a - b; b = cy; } + while (++i < n); + return b; } @@ -551,23 +598,24 @@ mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { assert (un >= vn); assert (vn >= 1); + assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un)); + assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn)); /* We first multiply by the low order limb. This result can be stored, not added, to rp. We also avoid a loop for zeroing this way. */ rp[un] = mpn_mul_1 (rp, up, un, vp[0]); - rp += 1, vp += 1, vn -= 1; /* Now accumulate the product of up[] and the next higher limb from vp[]. */ - while (vn >= 1) + while (--vn >= 1) { + rp += 1, vp += 1; rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); - rp += 1, vp += 1, vn -= 1; } - return rp[un - 1]; + return rp[un]; } void @@ -587,7 +635,6 @@ mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) { mp_limb_t high_limb, low_limb; unsigned int tnc; - mp_size_t i; mp_limb_t retval; assert (n >= 1); @@ -602,7 +649,7 @@ mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) retval = low_limb >> tnc; high_limb = (low_limb << cnt); - for (i = n - 1; i != 0; i--) + while (--n != 0) { low_limb = *--up; *--rp = high_limb | (low_limb >> tnc); @@ -618,7 +665,6 @@ mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) { mp_limb_t high_limb, low_limb; unsigned int tnc; - mp_size_t i; mp_limb_t retval; assert (n >= 1); @@ -630,7 +676,7 @@ mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) retval = (high_limb << tnc); low_limb = high_limb >> cnt; - for (i = n - 1; i != 0; i--) + while (--n != 0) { high_limb = *up++; *rp++ = low_limb | (high_limb << tnc); @@ -641,60 +687,156 @@ mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) return retval; } +static mp_bitcnt_t +mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, + mp_limb_t ux) +{ + unsigned cnt; + + assert (ux == 0 || ux == GMP_LIMB_MAX); + assert (0 <= i && i <= un ); + + while (limb == 0) + { + i++; + if (i == un) + return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS); + limb = ux ^ up[i]; + } + gmp_ctz (cnt, limb); + return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt; +} + +mp_bitcnt_t +mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit) +{ + mp_size_t i; + i = bit / GMP_LIMB_BITS; + + return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), + i, ptr, i, 0); +} + +mp_bitcnt_t +mpn_scan0 (mp_srcptr ptr, mp_bitcnt_t bit) +{ + mp_size_t i; + i = bit / GMP_LIMB_BITS; + + return mpn_common_scan (~ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), + i, ptr, i, GMP_LIMB_MAX); +} + +void +mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n) +{ + while (--n >= 0) + *rp++ = ~ *up++; +} + +mp_limb_t +mpn_neg (mp_ptr rp, mp_srcptr up, mp_size_t n) +{ + while (*up == 0) + { + *rp = 0; + if (!--n) + return 0; + ++up; ++rp; + } + *rp = - *up; + mpn_com (++rp, ++up, --n); + return 1; +} + /* MPN division interface. */ + +/* The 3/2 inverse is defined as + + m = floor( (B^3-1) / (B u1 + u0)) - B +*/ mp_limb_t mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0) { - mp_limb_t r, p, m; - unsigned ul, uh; - unsigned ql, qh; + mp_limb_t r, m; - /* First, do a 2/1 inverse. */ - /* The inverse m is defined as floor( (B^2 - 1 - u1)/u1 ), so that 0 < - * B^2 - (B + m) u1 <= u1 */ - assert (u1 >= GMP_LIMB_HIGHBIT); + { + mp_limb_t p, ql; + unsigned ul, uh, qh; - ul = u1 & GMP_LLIMB_MASK; - uh = u1 >> (GMP_LIMB_BITS / 2); + /* For notation, let b denote the half-limb base, so that B = b^2. + Split u1 = b uh + ul. */ + ul = u1 & GMP_LLIMB_MASK; + uh = u1 >> (GMP_LIMB_BITS / 2); - qh = ~u1 / uh; - r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK; + /* Approximation of the high half of quotient. Differs from the 2/1 + inverse of the half limb uh, since we have already subtracted + u0. */ + qh = (u1 ^ GMP_LIMB_MAX) / uh; - p = (mp_limb_t) qh * ul; - /* Adjustment steps taken from udiv_qrnnd_c */ - if (r < p) - { - qh--; - r += u1; - if (r >= u1) /* i.e. we didn't get carry when adding to r */ - if (r < p) - { - qh--; - r += u1; - } - } - r -= p; + /* Adjust to get a half-limb 3/2 inverse, i.e., we want - /* Do a 3/2 division (with half limb size) */ - p = (r >> (GMP_LIMB_BITS / 2)) * qh + r; - ql = (p >> (GMP_LIMB_BITS / 2)) + 1; + qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u + = floor( (b (~u) + b-1) / u), - /* By the 3/2 method, we don't need the high half limb. */ - r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1; + and the remainder - if (r >= (p << (GMP_LIMB_BITS / 2))) - { - ql--; - r += u1; - } - m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql; - if (r >= u1) - { - m++; - r -= u1; - } + r = b (~u) + b-1 - qh (b uh + ul) + = b (~u - qh uh) + b-1 - qh ul + + Subtraction of qh ul may underflow, which implies adjustments. + But by normalization, 2 u >= B > qh ul, so we need to adjust by + at most 2. + */ + + r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK; + + p = (mp_limb_t) qh * ul; + /* Adjustment steps taken from udiv_qrnnd_c */ + if (r < p) + { + qh--; + r += u1; + if (r >= u1) /* i.e. we didn't get carry when adding to r */ + if (r < p) + { + qh--; + r += u1; + } + } + r -= p; + + /* Low half of the quotient is + + ql = floor ( (b r + b-1) / u1). + + This is a 3/2 division (on half-limbs), for which qh is a + suitable inverse. */ + + p = (r >> (GMP_LIMB_BITS / 2)) * qh + r; + /* Unlike full-limb 3/2, we can add 1 without overflow. For this to + work, it is essential that ql is a full mp_limb_t. */ + ql = (p >> (GMP_LIMB_BITS / 2)) + 1; + /* By the 3/2 trick, we don't need the high half limb. */ + r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1; + + if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2)))) + { + ql--; + r += u1; + } + m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql; + if (r >= u1) + { + m++; + r -= u1; + } + } + + /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a + 3/2 inverse. */ if (u0 > 0) { mp_limb_t th, tl; @@ -715,8 +857,7 @@ mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0) if (r < th) { m--; - if (r > u1 || (r == u1 && tl > u0)) - m--; + m -= ((r > u1) | ((r == u1) & (tl > u0))); } } @@ -807,7 +948,8 @@ mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, if (inv->shift > 0) { - tp = gmp_xalloc_limbs (nn); + /* Shift, reusing qp area if possible. In-place shift if qp == np. */ + tp = qp ? qp : gmp_xalloc_limbs (nn); r = mpn_lshift (tp, np, nn, inv->shift); np = tp; } @@ -816,7 +958,7 @@ mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, d = inv->d1; di = inv->di; - while (nn-- > 0) + while (--nn >= 0) { mp_limb_t q; @@ -824,44 +966,19 @@ mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, if (qp) qp[nn] = q; } - if (inv->shift > 0) + if ((inv->shift > 0) && (tp != qp)) gmp_free (tp); return r >> inv->shift; } -static mp_limb_t -mpn_div_qr_1 (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_limb_t d) -{ - assert (d > 0); - - /* Special case for powers of two. */ - if (d > 1 && (d & (d-1)) == 0) - { - unsigned shift; - mp_limb_t r = np[0] & (d-1); - gmp_ctz (shift, d); - if (qp) - mpn_rshift (qp, np, nn, shift); - - return r; - } - else - { - struct gmp_div_inverse inv; - mpn_div_qr_1_invert (&inv, d); - return mpn_div_qr_1_preinv (qp, np, nn, &inv); - } -} - static void -mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn, +mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, const struct gmp_div_inverse *inv) { unsigned shift; mp_size_t i; mp_limb_t d1, d0, di, r1, r0; - mp_ptr tp; assert (nn >= 2); shift = inv->shift; @@ -870,17 +987,14 @@ mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn, di = inv->di; if (shift > 0) - { - tp = gmp_xalloc_limbs (nn); - r1 = mpn_lshift (tp, np, nn, shift); - np = tp; - } + r1 = mpn_lshift (np, np, nn, shift); else r1 = 0; r0 = np[nn - 1]; - for (i = nn - 2; i >= 0; i--) + i = nn - 2; + do { mp_limb_t n0, q; n0 = np[i]; @@ -889,32 +1003,18 @@ mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn, if (qp) qp[i] = q; } + while (--i >= 0); if (shift > 0) { - assert ((r0 << (GMP_LIMB_BITS - shift)) == 0); + assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0); r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift)); r1 >>= shift; - - gmp_free (tp); } - rp[1] = r1; - rp[0] = r0; -} - -#if 0 -static void -mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn, - mp_limb_t d1, mp_limb_t d0) -{ - struct gmp_div_inverse inv; - assert (nn >= 2); - - mpn_div_qr_2_invert (&inv, d1, d0); - mpn_div_qr_2_preinv (qp, rp, np, nn, &inv); + np[1] = r1; + np[0] = r0; } -#endif static void mpn_div_qr_pi1 (mp_ptr qp, @@ -941,7 +1041,8 @@ mpn_div_qr_pi1 (mp_ptr qp, * by <d1, d0, dp[dn-3], ..., dp[0] > */ - for (i = nn - dn; i >= 0; i--) + i = nn - dn; + do { mp_limb_t n0 = np[dn-1+i]; @@ -973,6 +1074,7 @@ mpn_div_qr_pi1 (mp_ptr qp, if (qp) qp[i] = q; } + while (--i >= 0); np[dn - 1] = n1; } @@ -988,7 +1090,7 @@ mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, if (dn == 1) np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv); else if (dn == 2) - mpn_div_qr_2_preinv (qp, np, np, nn, inv); + mpn_div_qr_2_preinv (qp, np, nn, inv); else { mp_limb_t nh; @@ -1090,7 +1192,7 @@ mpn_get_str_bits (unsigned char *sp, unsigned bits, mp_srcptr up, mp_size_t un) unsigned char mask; size_t sn, j; mp_size_t i; - int shift; + unsigned shift; sn = ((un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]) + bits - 1) / bits; @@ -1128,7 +1230,7 @@ mpn_limb_get_str (unsigned char *sp, mp_limb_t w, l = w << binv->shift; gmp_udiv_qrnnd_preinv (w, r, h, l, binv->d1, binv->di); - assert ( (r << (GMP_LIMB_BITS - binv->shift)) == 0); + assert ((r & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - binv->shift))) == 0); r >>= binv->shift; sp[i] = r; @@ -1231,26 +1333,29 @@ mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, return rn; } +/* Result is usually normalized, except for all-zero input, in which + case a single zero limb is written at *RP, and 1 is returned. */ static mp_size_t mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, mp_limb_t b, const struct mpn_base_info *info) { mp_size_t rn; mp_limb_t w; - unsigned first; unsigned k; size_t j; - first = 1 + (sn - 1) % info->exp; + assert (sn > 0); + + k = 1 + (sn - 1) % info->exp; j = 0; w = sp[j++]; - for (k = 1; k < first; k++) + while (--k != 0) w = w * b + sp[j++]; rp[0] = w; - for (rn = (w > 0); j < sn;) + for (rn = 1; j < sn;) { mp_limb_t cy; @@ -1293,13 +1398,15 @@ mpn_set_str (mp_ptr rp, const unsigned char *sp, size_t sn, int base) void mpz_init (mpz_t r) { - r->_mp_alloc = 1; + static const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0; + + r->_mp_alloc = 0; r->_mp_size = 0; - r->_mp_d = gmp_xalloc_limbs (1); + r->_mp_d = (mp_ptr) &dummy_limb; } /* The utility of this function is a bit limited, since many functions - assings the result variable using mpz_swap. */ + assigns the result variable using mpz_swap. */ void mpz_init2 (mpz_t r, mp_bitcnt_t bits) { @@ -1316,15 +1423,19 @@ mpz_init2 (mpz_t r, mp_bitcnt_t bits) void mpz_clear (mpz_t r) { - gmp_free (r->_mp_d); + if (r->_mp_alloc) + gmp_free (r->_mp_d); } -static void * +static mp_ptr mpz_realloc (mpz_t r, mp_size_t size) { size = GMP_MAX (size, 1); - r->_mp_d = gmp_xrealloc_limbs (r->_mp_d, size); + if (r->_mp_alloc) + r->_mp_d = gmp_xrealloc_limbs (r->_mp_d, size); + else + r->_mp_d = gmp_xalloc_limbs (size); r->_mp_alloc = size; if (GMP_ABS (r->_mp_size) > size) @@ -1345,9 +1456,15 @@ mpz_set_si (mpz_t r, signed long int x) if (x >= 0) mpz_set_ui (r, x); else /* (x < 0) */ + if (GMP_LIMB_BITS < GMP_ULONG_BITS) + { + mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x)); + mpz_neg (r, r); + } + else { r->_mp_size = -1; - r->_mp_d[0] = GMP_NEG_CAST (unsigned long int, x); + MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x); } } @@ -1357,7 +1474,16 @@ mpz_set_ui (mpz_t r, unsigned long int x) if (x > 0) { r->_mp_size = 1; - r->_mp_d[0] = x; + MPZ_REALLOC (r, 1)[0] = x; + if (GMP_LIMB_BITS < GMP_ULONG_BITS) + { + int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; + while (x >>= LOCAL_GMP_LIMB_BITS) + { + ++ r->_mp_size; + MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x; + } + } } else r->_mp_size = 0; @@ -1404,16 +1530,20 @@ mpz_init_set (mpz_t r, const mpz_t x) int mpz_fits_slong_p (const mpz_t u) { - mp_size_t us = u->_mp_size; + return (LONG_MAX + LONG_MIN == 0 || mpz_cmp_ui (u, LONG_MAX) <= 0) && + mpz_cmpabs_ui (u, GMP_NEG_CAST (unsigned long int, LONG_MIN)) <= 0; +} - if (us == 0) - return 1; - else if (us == 1) - return u->_mp_d[0] < GMP_LIMB_HIGHBIT; - else if (us == -1) - return u->_mp_d[0] <= GMP_LIMB_HIGHBIT; - else - return 0; +static int +mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un) +{ + int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS; + mp_limb_t ulongrem = 0; + + if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0) + ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1; + + return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1); } int @@ -1421,25 +1551,36 @@ mpz_fits_ulong_p (const mpz_t u) { mp_size_t us = u->_mp_size; - return us == 0 || us == 1; + return us >= 0 && mpn_absfits_ulong_p (u->_mp_d, us); } long int mpz_get_si (const mpz_t u) { - mp_size_t us = u->_mp_size; + unsigned long r = mpz_get_ui (u); + unsigned long c = -LONG_MAX - LONG_MIN; - if (us > 0) - return (long) (u->_mp_d[0] & ~GMP_LIMB_HIGHBIT); - else if (us < 0) - return (long) (- u->_mp_d[0] | GMP_LIMB_HIGHBIT); + if (u->_mp_size < 0) + /* This expression is necessary to properly handle -LONG_MIN */ + return -(long) c - (long) ((r - c) & LONG_MAX); else - return 0; + return (long) (r & LONG_MAX); } unsigned long int mpz_get_ui (const mpz_t u) { + if (GMP_LIMB_BITS < GMP_ULONG_BITS) + { + int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; + unsigned long r = 0; + mp_size_t n = GMP_ABS (u->_mp_size); + n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS); + while (--n >= 0) + r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n]; + return r; + } + return u->_mp_size == 0 ? 0 : u->_mp_d[0]; } @@ -1458,6 +1599,56 @@ mpz_getlimbn (const mpz_t u, mp_size_t n) return 0; } +void +mpz_realloc2 (mpz_t x, mp_bitcnt_t n) +{ + mpz_realloc (x, 1 + (n - (n != 0)) / GMP_LIMB_BITS); +} + +mp_srcptr +mpz_limbs_read (mpz_srcptr x) +{ + return x->_mp_d; +} + +mp_ptr +mpz_limbs_modify (mpz_t x, mp_size_t n) +{ + assert (n > 0); + return MPZ_REALLOC (x, n); +} + +mp_ptr +mpz_limbs_write (mpz_t x, mp_size_t n) +{ + return mpz_limbs_modify (x, n); +} + +void +mpz_limbs_finish (mpz_t x, mp_size_t xs) +{ + mp_size_t xn; + xn = mpn_normalized_size (x->_mp_d, GMP_ABS (xs)); + x->_mp_size = xs < 0 ? -xn : xn; +} + +static mpz_srcptr +mpz_roinit_normal_n (mpz_t x, mp_srcptr xp, mp_size_t xs) +{ + x->_mp_alloc = 0; + x->_mp_d = (mp_ptr) xp; + x->_mp_size = xs; + return x; +} + +mpz_srcptr +mpz_roinit_n (mpz_t x, mp_srcptr xp, mp_size_t xs) +{ + mpz_roinit_normal_n (x, xp, xs); + mpz_limbs_finish (x, xs); + return x; +} + /* Conversions and comparison to double. */ void @@ -1472,26 +1663,22 @@ mpz_set_d (mpz_t r, double x) /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is zero or infinity. */ - if (x == 0.0 || x != x || x == x * 0.5) + if (x != x || x == x * 0.5) { r->_mp_size = 0; return; } - if (x < 0.0) - { - x = - x; - sign = 1; - } - else - sign = 0; + sign = x < 0.0 ; + if (sign) + x = - x; if (x < 1.0) { r->_mp_size = 0; return; } - B = 2.0 * (double) GMP_LIMB_HIGHBIT; + B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1); Bi = 1.0 / B; for (rn = 1; x >= B; rn++) x *= Bi; @@ -1501,8 +1688,9 @@ mpz_set_d (mpz_t r, double x) f = (mp_limb_t) x; x -= f; assert (x < 1.0); - rp[rn-1] = f; - for (i = rn-1; i-- > 0; ) + i = rn-1; + rp[i] = f; + while (--i >= 0) { x = B * x; f = (mp_limb_t) x; @@ -1524,18 +1712,34 @@ mpz_init_set_d (mpz_t r, double x) double mpz_get_d (const mpz_t u) { + int m; + mp_limb_t l; mp_size_t un; double x; - double B = 2.0 * (double) GMP_LIMB_HIGHBIT; + double B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1); un = GMP_ABS (u->_mp_size); if (un == 0) return 0.0; - x = u->_mp_d[--un]; - while (un > 0) - x = B*x + u->_mp_d[--un]; + l = u->_mp_d[--un]; + gmp_clz (m, l); + m = m + GMP_DBL_MANT_BITS - GMP_LIMB_BITS; + if (m < 0) + l &= GMP_LIMB_MAX << -m; + + for (x = l; --un >= 0;) + { + x = B*x; + if (m > 0) { + l = u->_mp_d[un]; + m -= GMP_LIMB_BITS; + if (m < 0) + l &= GMP_LIMB_MAX << -m; + x += l; + } + } if (u->_mp_size < 0) x = -x; @@ -1557,7 +1761,7 @@ mpz_cmpabs_d (const mpz_t x, double d) { xn = GMP_ABS (xn); - B = 2.0 * (double) GMP_LIMB_HIGHBIT; + B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1); Bi = 1.0 / B; /* Scale d so it can be compared with the top limb. */ @@ -1608,14 +1812,7 @@ mpz_cmp_d (const mpz_t x, double d) int mpz_sgn (const mpz_t u) { - mp_size_t usize = u->_mp_size; - - if (usize > 0) - return 1; - else if (usize < 0) - return -1; - else - return 0; + return GMP_CMP (u->_mp_size, 0); } int @@ -1623,21 +1820,12 @@ mpz_cmp_si (const mpz_t u, long v) { mp_size_t usize = u->_mp_size; - if (usize < -1) - return -1; - else if (v >= 0) + if (v >= 0) return mpz_cmp_ui (u, v); else if (usize >= 0) return 1; - else /* usize == -1 */ - { - mp_limb_t ul = u->_mp_d[0]; - if ((mp_limb_t)GMP_NEG_CAST (unsigned long int, v) < ul) - return -1; - else if ( (mp_limb_t)GMP_NEG_CAST (unsigned long int, v) > ul) - return 1; - } - return 0; + else + return - mpz_cmpabs_ui (u, GMP_NEG_CAST (unsigned long int, v)); } int @@ -1645,19 +1833,10 @@ mpz_cmp_ui (const mpz_t u, unsigned long v) { mp_size_t usize = u->_mp_size; - if (usize > 1) - return 1; - else if (usize < 0) + if (usize < 0) return -1; else - { - mp_limb_t ul = (usize > 0) ? u->_mp_d[0] : 0; - if (ul > v) - return 1; - else if (ul < v) - return -1; - } - return 0; + return mpz_cmpabs_ui (u, v); } int @@ -1666,35 +1845,26 @@ mpz_cmp (const mpz_t a, const mpz_t b) mp_size_t asize = a->_mp_size; mp_size_t bsize = b->_mp_size; - if (asize > bsize) - return 1; - else if (asize < bsize) - return -1; - else if (asize > 0) + if (asize != bsize) + return (asize < bsize) ? -1 : 1; + else if (asize >= 0) return mpn_cmp (a->_mp_d, b->_mp_d, asize); - else if (asize < 0) - return -mpn_cmp (a->_mp_d, b->_mp_d, -asize); else - return 0; + return mpn_cmp (b->_mp_d, a->_mp_d, -asize); } int mpz_cmpabs_ui (const mpz_t u, unsigned long v) { mp_size_t un = GMP_ABS (u->_mp_size); - mp_limb_t ul; - if (un > 1) + if (! mpn_absfits_ulong_p (u->_mp_d, un)) return 1; - - ul = (un == 1) ? u->_mp_d[0] : 0; - - if (ul > v) - return 1; - else if (ul < v) - return -1; else - return 0; + { + unsigned long uu = mpz_get_ui (u); + return GMP_CMP(uu, v); + } } int @@ -1707,18 +1877,14 @@ mpz_cmpabs (const mpz_t u, const mpz_t v) void mpz_abs (mpz_t r, const mpz_t u) { - if (r != u) - mpz_set (r, u); - + mpz_set (r, u); r->_mp_size = GMP_ABS (r->_mp_size); } void mpz_neg (mpz_t r, const mpz_t u) { - if (r != u) - mpz_set (r, u); - + mpz_set (r, u); r->_mp_size = -r->_mp_size; } @@ -1733,80 +1899,28 @@ mpz_swap (mpz_t u, mpz_t v) /* MPZ addition and subtraction */ -/* Adds to the absolute value. Returns new size, but doesn't store it. */ -static mp_size_t -mpz_abs_add_ui (mpz_t r, const mpz_t a, unsigned long b) -{ - mp_size_t an; - mp_ptr rp; - mp_limb_t cy; - - an = GMP_ABS (a->_mp_size); - if (an == 0) - { - r->_mp_d[0] = b; - return b > 0; - } - - rp = MPZ_REALLOC (r, an + 1); - - cy = mpn_add_1 (rp, a->_mp_d, an, b); - rp[an] = cy; - an += (cy > 0); - - return an; -} - -/* Subtract from the absolute value. Returns new size, (or -1 on underflow), - but doesn't store it. */ -static mp_size_t -mpz_abs_sub_ui (mpz_t r, const mpz_t a, unsigned long b) -{ - mp_size_t an = GMP_ABS (a->_mp_size); - mp_ptr rp = MPZ_REALLOC (r, an); - - if (an == 0) - { - rp[0] = b; - return -(b > 0); - } - else if (an == 1 && a->_mp_d[0] < b) - { - rp[0] = b - a->_mp_d[0]; - return -1; - } - else - { - gmp_assert_nocarry (mpn_sub_1 (rp, a->_mp_d, an, b)); - return mpn_normalized_size (rp, an); - } -} void mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b) { - if (a->_mp_size >= 0) - r->_mp_size = mpz_abs_add_ui (r, a, b); - else - r->_mp_size = -mpz_abs_sub_ui (r, a, b); + mpz_t bb; + mpz_init_set_ui (bb, b); + mpz_add (r, a, bb); + mpz_clear (bb); } void mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b) { - if (a->_mp_size < 0) - r->_mp_size = -mpz_abs_add_ui (r, a, b); - else - r->_mp_size = mpz_abs_sub_ui (r, a, b); + mpz_ui_sub (r, b, a); + mpz_neg (r, r); } void mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b) { - if (b->_mp_size < 0) - r->_mp_size = mpz_abs_add_ui (r, b, a); - else - r->_mp_size = -mpz_abs_sub_ui (r, b, a); + mpz_neg (r, b); + mpz_add_ui (r, r, a); } static mp_size_t @@ -1814,20 +1928,21 @@ mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b) { mp_size_t an = GMP_ABS (a->_mp_size); mp_size_t bn = GMP_ABS (b->_mp_size); - mp_size_t rn; mp_ptr rp; mp_limb_t cy; - rn = GMP_MAX (an, bn); - rp = MPZ_REALLOC (r, rn + 1); - if (an >= bn) - cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn); - else - cy = mpn_add (rp, b->_mp_d, bn, a->_mp_d, an); + if (an < bn) + { + MPZ_SRCPTR_SWAP (a, b); + MP_SIZE_T_SWAP (an, bn); + } - rp[rn] = cy; + rp = MPZ_REALLOC (r, an + 1); + cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn); - return rn + (cy > 0); + rp[an] = cy; + + return an + cy; } static mp_size_t @@ -1892,37 +2007,17 @@ mpz_mul_si (mpz_t r, const mpz_t u, long int v) mpz_neg (r, r); } else - mpz_mul_ui (r, u, (unsigned long int) v); + mpz_mul_ui (r, u, v); } void mpz_mul_ui (mpz_t r, const mpz_t u, unsigned long int v) { - mp_size_t un; - mpz_t t; - mp_ptr tp; - mp_limb_t cy; - - un = GMP_ABS (u->_mp_size); - - if (un == 0 || v == 0) - { - r->_mp_size = 0; - return; - } - - mpz_init2 (t, (un + 1) * GMP_LIMB_BITS); - - tp = t->_mp_d; - cy = mpn_mul_1 (tp, u->_mp_d, un, v); - tp[un] = cy; - - t->_mp_size = un + (cy > 0); - if (u->_mp_size < 0) - t->_mp_size = - t->_mp_size; - - mpz_swap (r, t); - mpz_clear (t); + mpz_t vv; + mpz_init_set_ui (vv, v); + mpz_mul (r, u, vv); + mpz_clear (vv); + return; } void @@ -1933,8 +2028,8 @@ mpz_mul (mpz_t r, const mpz_t u, const mpz_t v) mpz_t t; mp_ptr tp; - un = GMP_ABS (u->_mp_size); - vn = GMP_ABS (v->_mp_size); + un = u->_mp_size; + vn = v->_mp_size; if (un == 0 || vn == 0) { @@ -1942,7 +2037,10 @@ mpz_mul (mpz_t r, const mpz_t u, const mpz_t v) return; } - sign = (u->_mp_size ^ v->_mp_size) < 0; + sign = (un ^ vn) < 0; + + un = GMP_ABS (un); + vn = GMP_ABS (vn); mpz_init2 (t, (un + vn) * GMP_LIMB_BITS); @@ -1989,12 +2087,51 @@ mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits) else mpn_copyd (rp + limbs, u->_mp_d, un); - while (limbs > 0) - rp[--limbs] = 0; + mpn_zero (rp, limbs); r->_mp_size = (u->_mp_size < 0) ? - rn : rn; } +void +mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v) +{ + mpz_t t; + mpz_init_set_ui (t, v); + mpz_mul (t, u, t); + mpz_add (r, r, t); + mpz_clear (t); +} + +void +mpz_submul_ui (mpz_t r, const mpz_t u, unsigned long int v) +{ + mpz_t t; + mpz_init_set_ui (t, v); + mpz_mul (t, u, t); + mpz_sub (r, r, t); + mpz_clear (t); +} + +void +mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v) +{ + mpz_t t; + mpz_init (t); + mpz_mul (t, u, v); + mpz_add (r, r, t); + mpz_clear (t); +} + +void +mpz_submul (mpz_t r, const mpz_t u, const mpz_t v) +{ + mpz_t t; + mpz_init (t); + mpz_mul (t, u, v); + mpz_sub (r, r, t); + mpz_clear (t); +} + /* MPZ division */ enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC }; @@ -2059,8 +2196,7 @@ mpz_div_qr (mpz_t q, mpz_t r, mp_size_t qn, rn; mpz_t tq, tr; - mpz_init (tr); - mpz_set (tr, n); + mpz_init_set (tr, n); np = tr->_mp_d; qn = nn - dn + 1; @@ -2170,10 +2306,7 @@ mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d) void mpz_mod (mpz_t r, const mpz_t n, const mpz_t d) { - if (d->_mp_size >= 0) - mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR); - else - mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL); + mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL); } static void @@ -2183,7 +2316,7 @@ mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, mp_size_t un, qn; mp_size_t limb_cnt; mp_ptr qp; - mp_limb_t adjust; + int adjust; un = u->_mp_size; if (un == 0) @@ -2207,7 +2340,6 @@ mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, if (qn <= 0) qn = 0; - else { qp = MPZ_REALLOC (q, qn); @@ -2225,7 +2357,8 @@ mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, q->_mp_size = qn; - mpz_add_ui (q, q, adjust); + if (adjust) + mpz_add_ui (q, q, 1); if (un < 0) mpz_neg (q, q); } @@ -2260,16 +2393,9 @@ mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index, { /* Have to negate and sign extend. */ mp_size_t i; - mp_limb_t cy; - for (cy = 1, i = 0; i < un; i++) - { - mp_limb_t s = ~u->_mp_d[i] + cy; - cy = s < cy; - rp[i] = s; - } - assert (cy == 0); - for (; i < rn - 1; i++) + gmp_assert_nocarry (! mpn_neg (rp, u->_mp_d, un)); + for (i = un; i < rn - 1; i++) rp[i] = GMP_LIMB_MAX; rp[rn-1] = mask; @@ -2294,23 +2420,13 @@ mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index, if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */ { /* If r != 0, compute 2^{bit_count} - r. */ - mp_size_t i; - - for (i = 0; i < rn && rp[i] == 0; i++) - ; - if (i < rn) - { - /* r > 0, need to flip sign. */ - rp[i] = ~rp[i] + 1; - for (i++; i < rn; i++) - rp[i] = ~rp[i]; + mpn_neg (rp, rp, rn); - rp[rn-1] &= mask; + rp[rn-1] &= mask; - /* us is not used for anything else, so we can modify it - here to indicate flipped sign. */ - us = -us; - } + /* us is not used for anything else, so we can modify it + here to indicate flipped sign. */ + us = -us; } } rn = mpn_normalized_size (rp, rn); @@ -2365,60 +2481,42 @@ mpz_divisible_p (const mpz_t n, const mpz_t d) return mpz_div_qr (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0; } -static unsigned long -mpz_div_qr_ui (mpz_t q, mpz_t r, - const mpz_t n, unsigned long d, enum mpz_div_round_mode mode) +int +mpz_congruent_p (const mpz_t a, const mpz_t b, const mpz_t m) { - mp_size_t ns, qn; - mp_ptr qp; - mp_limb_t rl; - mp_size_t rs; + mpz_t t; + int res; - ns = n->_mp_size; - if (ns == 0) - { - if (q) - q->_mp_size = 0; - if (r) - r->_mp_size = 0; - return 0; - } + /* a == b (mod 0) iff a == b */ + if (mpz_sgn (m) == 0) + return (mpz_cmp (a, b) == 0); - qn = GMP_ABS (ns); - if (q) - qp = MPZ_REALLOC (q, qn); - else - qp = NULL; + mpz_init (t); + mpz_sub (t, a, b); + res = mpz_divisible_p (t, m); + mpz_clear (t); - rl = mpn_div_qr_1 (qp, n->_mp_d, qn, d); - assert (rl < d); + return res; +} - rs = rl > 0; - rs = (ns < 0) ? -rs : rs; +static unsigned long +mpz_div_qr_ui (mpz_t q, mpz_t r, + const mpz_t n, unsigned long d, enum mpz_div_round_mode mode) +{ + unsigned long ret; + mpz_t rr, dd; - if (rl > 0 && ( (mode == GMP_DIV_FLOOR && ns < 0) - || (mode == GMP_DIV_CEIL && ns >= 0))) - { - if (q) - gmp_assert_nocarry (mpn_add_1 (qp, qp, qn, 1)); - rl = d - rl; - rs = -rs; - } + mpz_init (rr); + mpz_init_set_ui (dd, d); + mpz_div_qr (q, rr, n, dd, mode); + mpz_clear (dd); + ret = mpz_get_ui (rr); if (r) - { - r->_mp_d[0] = rl; - r->_mp_size = rs; - } - if (q) - { - qn -= (qp[qn-1] == 0); - assert (qn == 0 || qp[qn-1] > 0); + mpz_swap (r, rr); + mpz_clear (rr); - q->_mp_size = (ns < 0) ? - qn : qn; - } - - return rl; + return ret; } unsigned long @@ -2557,53 +2655,31 @@ mpn_gcd_11 (mp_limb_t u, mp_limb_t v) unsigned long mpz_gcd_ui (mpz_t g, const mpz_t u, unsigned long v) { - mp_size_t un; + mpz_t t; + mpz_init_set_ui(t, v); + mpz_gcd (t, u, t); + if (v > 0) + v = mpz_get_ui (t); - if (v == 0) - { - if (g) - mpz_abs (g, u); - } - else - { - un = GMP_ABS (u->_mp_size); - if (un != 0) - v = mpn_gcd_11 (mpn_div_qr_1 (NULL, u->_mp_d, un, v), v); + if (g) + mpz_swap (t, g); - if (g) - mpz_set_ui (g, v); - } + mpz_clear (t); return v; } static mp_bitcnt_t -mpz_make_odd (mpz_t r, const mpz_t u) +mpz_make_odd (mpz_t r) { - mp_size_t un, rn, i; - mp_ptr rp; - unsigned shift; - - un = GMP_ABS (u->_mp_size); - assert (un > 0); - - for (i = 0; u->_mp_d[i] == 0; i++) - ; + mp_bitcnt_t shift; - gmp_ctz (shift, u->_mp_d[i]); + assert (r->_mp_size > 0); + /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */ + shift = mpn_common_scan (r->_mp_d[0], 0, r->_mp_d, 0, 0); + mpz_tdiv_q_2exp (r, r, shift); - rn = un - i; - rp = MPZ_REALLOC (r, rn); - if (shift > 0) - { - mpn_rshift (rp, u->_mp_d + i, rn, shift); - rn -= (rp[rn-1] == 0); - } - else - mpn_copyi (rp, u->_mp_d + i, rn); - - r->_mp_size = rn; - return i * GMP_LIMB_BITS + shift; + return shift; } void @@ -2626,8 +2702,10 @@ mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v) mpz_init (tu); mpz_init (tv); - uz = mpz_make_odd (tu, u); - vz = mpz_make_odd (tv, v); + mpz_abs (tu, u); + uz = mpz_make_odd (tu); + mpz_abs (tv, v); + vz = mpz_make_odd (tv); gz = GMP_MIN (uz, vz); if (tu->_mp_size < tv->_mp_size) @@ -2643,7 +2721,7 @@ mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v) { int c; - mpz_make_odd (tu, tu); + mpz_make_odd (tu); c = mpz_cmp (tu, tv); if (c == 0) { @@ -2680,7 +2758,7 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) signed long sign = mpz_sgn (v); mpz_abs (g, v); if (s) - mpz_set_ui (s, 0); + s->_mp_size = 0; if (t) mpz_set_si (t, sign); return; @@ -2694,7 +2772,7 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) if (s) mpz_set_si (s, sign); if (t) - mpz_set_ui (t, 0); + t->_mp_size = 0; return; } @@ -2705,8 +2783,10 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) mpz_init (t0); mpz_init (t1); - uz = mpz_make_odd (tu, u); - vz = mpz_make_odd (tv, v); + mpz_abs (tu, u); + uz = mpz_make_odd (tu); + mpz_abs (tv, v); + vz = mpz_make_odd (tv); gz = GMP_MIN (uz, vz); uz -= gz; @@ -2754,7 +2834,7 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) if (tu->_mp_size > 0) { mp_bitcnt_t shift; - shift = mpz_make_odd (tu, tu); + shift = mpz_make_odd (tu); mpz_mul_2exp (t0, t0, shift); mpz_mul_2exp (s0, s0, shift); power += shift; @@ -2777,7 +2857,7 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) mpz_add (t0, t0, t1); mpz_add (s0, s0, s1); - shift = mpz_make_odd (tv, tv); + shift = mpz_make_odd (tv); mpz_mul_2exp (t1, t1, shift); mpz_mul_2exp (s1, s1, shift); } @@ -2787,7 +2867,7 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) mpz_add (t1, t0, t1); mpz_add (s1, s0, s1); - shift = mpz_make_odd (tu, tu); + shift = mpz_make_odd (tu); mpz_mul_2exp (t0, t0, shift); mpz_mul_2exp (s0, s0, shift); } @@ -2817,8 +2897,9 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) mpz_sub (s0, s0, s1); mpz_add (t0, t0, t1); } - mpz_divexact_ui (s0, s0, 2); - mpz_divexact_ui (t0, t0, 2); + assert (mpz_even_p (t0) && mpz_even_p (s0)); + mpz_tdiv_q_2exp (s0, s0, 1); + mpz_tdiv_q_2exp (t0, t0, 1); } /* Arrange so that |s| < |u| / 2g */ @@ -2925,12 +3006,16 @@ mpz_pow_ui (mpz_t r, const mpz_t b, unsigned long e) mpz_t tr; mpz_init_set_ui (tr, 1); - for (bit = GMP_ULONG_HIGHBIT; bit > 0; bit >>= 1) + bit = GMP_ULONG_HIGHBIT; + do { mpz_mul (tr, tr, tr); if (e & bit) mpz_mul (tr, tr, b); + bit >>= 1; } + while (bit > 0); + mpz_swap (r, tr); mpz_clear (tr); } @@ -2939,6 +3024,7 @@ void mpz_ui_pow_ui (mpz_t r, unsigned long blimb, unsigned long e) { mpz_t b; + mpz_init_set_ui (b, blimb); mpz_pow_ui (r, b, e); mpz_clear (b); @@ -2986,7 +3072,7 @@ mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) if (e->_mp_size < 0) { if (!mpz_invert (base, b, m)) - gmp_die ("mpz_powm: Negative exponent and non-invertibe base."); + gmp_die ("mpz_powm: Negative exponent and non-invertible base."); } else { @@ -3013,12 +3099,13 @@ mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) } mpz_init_set_ui (tr, 1); - while (en-- > 0) + while (--en >= 0) { mp_limb_t w = e->_mp_d[en]; mp_limb_t bit; - for (bit = GMP_LIMB_HIGHBIT; bit > 0; bit >>= 1) + bit = GMP_LIMB_HIGHBIT; + do { mpz_mul (tr, tr, tr); if (w & bit) @@ -3028,7 +3115,9 @@ mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); } + bit >>= 1; } + while (bit > 0); } /* Final reduction */ @@ -3050,6 +3139,7 @@ void mpz_powm_ui (mpz_t r, const mpz_t b, unsigned long elimb, const mpz_t m) { mpz_t e; + mpz_init_set_ui (e, elimb); mpz_powm (r, b, e, m); mpz_clear (e); @@ -3063,20 +3153,21 @@ mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z) mpz_t t, u; sgn = y->_mp_size < 0; - if (sgn && (z & 1) == 0) + if ((~z & sgn) != 0) gmp_die ("mpz_rootrem: Negative argument, with even root."); if (z == 0) gmp_die ("mpz_rootrem: Zeroth root."); if (mpz_cmpabs_ui (y, 1) <= 0) { - mpz_set (x, y); + if (x) + mpz_set (x, y); if (r) r->_mp_size = 0; return; } - mpz_init (t); mpz_init (u); + mpz_init (t); mpz_setbit (t, mpz_sizeinbase (y, 2) / z + 1); if (z == 2) /* simplify sqrt loop: z-1 == 1 */ @@ -3109,7 +3200,8 @@ mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z) mpz_pow_ui (t, u, z); mpz_sub (r, y, t); } - mpz_swap (x, u); + if (x) + mpz_swap (x, u); mpz_clear (u); mpz_clear (t); } @@ -3141,19 +3233,69 @@ mpz_sqrt (mpz_t s, const mpz_t u) mpz_rootrem (s, NULL, u, 2); } +int +mpz_perfect_square_p (const mpz_t u) +{ + if (u->_mp_size <= 0) + return (u->_mp_size == 0); + else + return mpz_root (NULL, u, 2); +} + +int +mpn_perfect_square_p (mp_srcptr p, mp_size_t n) +{ + mpz_t t; + + assert (n > 0); + assert (p [n-1] != 0); + return mpz_root (NULL, mpz_roinit_normal_n (t, p, n), 2); +} + +mp_size_t +mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr p, mp_size_t n) +{ + mpz_t s, r, u; + mp_size_t res; + + assert (n > 0); + assert (p [n-1] != 0); + + mpz_init (r); + mpz_init (s); + mpz_rootrem (s, r, mpz_roinit_normal_n (u, p, n), 2); + + assert (s->_mp_size == (n+1)/2); + mpn_copyd (sp, s->_mp_d, s->_mp_size); + mpz_clear (s); + res = r->_mp_size; + if (rp) + mpn_copyd (rp, r->_mp_d, res); + mpz_clear (r); + return res; +} /* Combinatorics */ void +mpz_mfac_uiui (mpz_t x, unsigned long n, unsigned long m) +{ + mpz_set_ui (x, n + (n == 0)); + if (m + 1 < 2) return; + while (n > m + 1) + mpz_mul_ui (x, x, n -= m); +} + +void +mpz_2fac_ui (mpz_t x, unsigned long n) +{ + mpz_mfac_uiui (x, n, 2); +} + +void mpz_fac_ui (mpz_t x, unsigned long n) { - if (n < 2) { - mpz_set_ui (x, 1); - return; - } - mpz_set_ui (x, n); - for (;--n > 1;) - mpz_mul_ui (x, x, n); + mpz_mfac_uiui (x, n, 1); } void @@ -3161,25 +3303,299 @@ mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k) { mpz_t t; - if (k > n) { - r->_mp_size = 0; - return; - } - mpz_fac_ui (r, n); + mpz_set_ui (r, k <= n); + + if (k > (n >> 1)) + k = (k <= n) ? n - k : 0; + mpz_init (t); mpz_fac_ui (t, k); - mpz_divexact (r, r, t); - mpz_fac_ui (t, n - k); + + for (; k > 0; --k) + mpz_mul_ui (r, r, n--); + mpz_divexact (r, r, t); mpz_clear (t); } +/* Primality testing */ + +/* Computes Kronecker (a/b) with odd b, a!=0 and GCD(a,b) = 1 */ +/* Adapted from JACOBI_BASE_METHOD==4 in mpn/generic/jacbase.c */ +static int +gmp_jacobi_coprime (mp_limb_t a, mp_limb_t b) +{ + int c, bit = 0; + + assert (b & 1); + assert (a != 0); + /* assert (mpn_gcd_11 (a, b) == 1); */ + + /* Below, we represent a and b shifted right so that the least + significant one bit is implicit. */ + b >>= 1; + + gmp_ctz(c, a); + a >>= 1; + + do + { + a >>= c; + /* (2/b) = -1 if b = 3 or 5 mod 8 */ + bit ^= c & (b ^ (b >> 1)); + if (a < b) + { + bit ^= a & b; + a = b - a; + b -= a; + } + else + { + a -= b; + assert (a != 0); + } + + gmp_ctz(c, a); + ++c; + } + while (b > 0); + + return bit & 1 ? -1 : 1; +} + +static void +gmp_lucas_step_k_2k (mpz_t V, mpz_t Qk, const mpz_t n) +{ + mpz_mod (Qk, Qk, n); + /* V_{2k} <- V_k ^ 2 - 2Q^k */ + mpz_mul (V, V, V); + mpz_submul_ui (V, Qk, 2); + mpz_tdiv_r (V, V, n); + /* Q^{2k} = (Q^k)^2 */ + mpz_mul (Qk, Qk, Qk); +} + +/* Computes V_k, Q^k (mod n) for the Lucas' sequence */ +/* with P=1, Q=Q; k = (n>>b0)|1. */ +/* Requires an odd n > 4; b0 > 0; -2*Q must not overflow a long */ +/* Returns (U_k == 0) and sets V=V_k and Qk=Q^k. */ +static int +gmp_lucas_mod (mpz_t V, mpz_t Qk, long Q, + mp_bitcnt_t b0, const mpz_t n) +{ + mp_bitcnt_t bs; + mpz_t U; + int res; + + assert (b0 > 0); + assert (Q <= - (LONG_MIN / 2)); + assert (Q >= - (LONG_MAX / 2)); + assert (mpz_cmp_ui (n, 4) > 0); + assert (mpz_odd_p (n)); + + mpz_init_set_ui (U, 1); /* U1 = 1 */ + mpz_set_ui (V, 1); /* V1 = 1 */ + mpz_set_si (Qk, Q); + + for (bs = mpz_sizeinbase (n, 2) - 1; --bs >= b0;) + { + /* U_{2k} <- U_k * V_k */ + mpz_mul (U, U, V); + /* V_{2k} <- V_k ^ 2 - 2Q^k */ + /* Q^{2k} = (Q^k)^2 */ + gmp_lucas_step_k_2k (V, Qk, n); + + /* A step k->k+1 is performed if the bit in $n$ is 1 */ + /* mpz_tstbit(n,bs) or the the bit is 0 in $n$ but */ + /* should be 1 in $n+1$ (bs == b0) */ + if (b0 == bs || mpz_tstbit (n, bs)) + { + /* Q^{k+1} <- Q^k * Q */ + mpz_mul_si (Qk, Qk, Q); + /* U_{k+1} <- (U_k + V_k) / 2 */ + mpz_swap (U, V); /* Keep in V the old value of U_k */ + mpz_add (U, U, V); + /* We have to compute U/2, so we need an even value, */ + /* equivalent (mod n) */ + if (mpz_odd_p (U)) + mpz_add (U, U, n); + mpz_tdiv_q_2exp (U, U, 1); + /* V_{k+1} <-(D*U_k + V_k) / 2 = + U_{k+1} + (D-1)/2*U_k = U_{k+1} - 2Q*U_k */ + mpz_mul_si (V, V, -2*Q); + mpz_add (V, U, V); + mpz_tdiv_r (V, V, n); + } + mpz_tdiv_r (U, U, n); + } + + res = U->_mp_size == 0; + mpz_clear (U); + return res; +} + +/* Performs strong Lucas' test on x, with parameters suggested */ +/* for the BPSW test. Qk is only passed to recycle a variable. */ +/* Requires GCD (x,6) = 1.*/ +static int +gmp_stronglucas (const mpz_t x, mpz_t Qk) +{ + mp_bitcnt_t b0; + mpz_t V, n; + mp_limb_t maxD, D; /* The absolute value is stored. */ + long Q; + mp_limb_t tl; + + /* Test on the absolute value. */ + mpz_roinit_normal_n (n, x->_mp_d, GMP_ABS (x->_mp_size)); + + assert (mpz_odd_p (n)); + /* assert (mpz_gcd_ui (NULL, n, 6) == 1); */ + if (mpz_root (Qk, n, 2)) + return 0; /* A square is composite. */ + + /* Check Ds up to square root (in case, n is prime) + or avoid overflows */ + maxD = (Qk->_mp_size == 1) ? Qk->_mp_d [0] - 1 : GMP_LIMB_MAX; + + D = 3; + /* Search a D such that (D/n) = -1 in the sequence 5,-7,9,-11,.. */ + /* For those Ds we have (D/n) = (n/|D|) */ + do + { + if (D >= maxD) + return 1 + (D != GMP_LIMB_MAX); /* (1 + ! ~ D) */ + D += 2; + tl = mpz_tdiv_ui (n, D); + if (tl == 0) + return 0; + } + while (gmp_jacobi_coprime (tl, D) == 1); + + mpz_init (V); + + /* n-(D/n) = n+1 = d*2^{b0}, with d = (n>>b0) | 1 */ + b0 = mpz_scan0 (n, 0); + + /* D= P^2 - 4Q; P = 1; Q = (1-D)/4 */ + Q = (D & 2) ? (long) (D >> 2) + 1 : -(long) (D >> 2); + + if (! gmp_lucas_mod (V, Qk, Q, b0, n)) /* If Ud != 0 */ + while (V->_mp_size != 0 && --b0 != 0) /* while Vk != 0 */ + /* V <- V ^ 2 - 2Q^k */ + /* Q^{2k} = (Q^k)^2 */ + gmp_lucas_step_k_2k (V, Qk, n); + + mpz_clear (V); + return (b0 != 0); +} + +static int +gmp_millerrabin (const mpz_t n, const mpz_t nm1, mpz_t y, + const mpz_t q, mp_bitcnt_t k) +{ + assert (k > 0); + + /* Caller must initialize y to the base. */ + mpz_powm (y, y, q, n); + + if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0) + return 1; + + while (--k > 0) + { + mpz_powm_ui (y, y, 2, n); + if (mpz_cmp (y, nm1) == 0) + return 1; + /* y == 1 means that the previous y was a non-trivial square root + of 1 (mod n). y == 0 means that n is a power of the base. + In either case, n is not prime. */ + if (mpz_cmp_ui (y, 1) <= 0) + return 0; + } + return 0; +} + +/* This product is 0xc0cfd797, and fits in 32 bits. */ +#define GMP_PRIME_PRODUCT \ + (3UL*5UL*7UL*11UL*13UL*17UL*19UL*23UL*29UL) + +/* Bit (p+1)/2 is set, for each odd prime <= 61 */ +#define GMP_PRIME_MASK 0xc96996dcUL + +int +mpz_probab_prime_p (const mpz_t n, int reps) +{ + mpz_t nm1; + mpz_t q; + mpz_t y; + mp_bitcnt_t k; + int is_prime; + int j; + + /* Note that we use the absolute value of n only, for compatibility + with the real GMP. */ + if (mpz_even_p (n)) + return (mpz_cmpabs_ui (n, 2) == 0) ? 2 : 0; + + /* Above test excludes n == 0 */ + assert (n->_mp_size != 0); + + if (mpz_cmpabs_ui (n, 64) < 0) + return (GMP_PRIME_MASK >> (n->_mp_d[0] >> 1)) & 2; + + if (mpz_gcd_ui (NULL, n, GMP_PRIME_PRODUCT) != 1) + return 0; + + /* All prime factors are >= 31. */ + if (mpz_cmpabs_ui (n, 31*31) < 0) + return 2; + + mpz_init (nm1); + mpz_init (q); + + /* Find q and k, where q is odd and n = 1 + 2**k * q. */ + mpz_abs (nm1, n); + nm1->_mp_d[0] -= 1; + k = mpz_scan1 (nm1, 0); + mpz_tdiv_q_2exp (q, nm1, k); + + /* BPSW test */ + mpz_init_set_ui (y, 2); + is_prime = gmp_millerrabin (n, nm1, y, q, k) && gmp_stronglucas (n, y); + reps -= 24; /* skip the first 24 repetitions */ + + /* Use Miller-Rabin, with a deterministic sequence of bases, a[j] = + j^2 + j + 41 using Euler's polynomial. We potentially stop early, + if a[j] >= n - 1. Since n >= 31*31, this can happen only if reps > + 30 (a[30] == 971 > 31*31 == 961). */ + + for (j = 0; is_prime & (j < reps); j++) + { + mpz_set_ui (y, (unsigned long) j*j+j+41); + if (mpz_cmp (y, nm1) >= 0) + { + /* Don't try any further bases. This "early" break does not affect + the result for any reasonable reps value (<=5000 was tested) */ + assert (j >= 30); + break; + } + is_prime = gmp_millerrabin (n, nm1, y, q, k); + } + mpz_clear (nm1); + mpz_clear (q); + mpz_clear (y); + + return is_prime; +} + + /* Logical operations and bit manipulation. */ /* Numbers are treated as if represented in two's complement (and infinitely sign extended). For a negative values we get the two's - complement from -x = ~x + 1, where ~ is bitwise complementt. + complement from -x = ~x + 1, where ~ is bitwise complement. Negation transforms xxxx10...0 @@ -3223,9 +3639,9 @@ mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index) { /* d < 0. Check if any of the bits below is set: If so, our bit must be complemented. */ - if (shift > 0 && (w << (GMP_LIMB_BITS - shift)) > 0) + if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0) return bit ^ 1; - while (limb_index-- > 0) + while (--limb_index >= 0) if (d->_mp_d[limb_index] > 0) return bit ^ 1; } @@ -3290,7 +3706,7 @@ mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index) gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit)); - dn -= (dp[dn-1] == 0); + dn = mpn_normalized_size (dp, dn); d->_mp_size = (d->_mp_size < 0) ? - dn : dn; } @@ -3330,8 +3746,8 @@ mpz_combit (mpz_t d, mp_bitcnt_t bit_index) void mpz_com (mpz_t r, const mpz_t u) { - mpz_neg (r, u); - mpz_sub_ui (r, r, 1); + mpz_add_ui (r, u, 1); + mpz_neg (r, r); } void @@ -3368,12 +3784,13 @@ mpz_and (mpz_t r, const mpz_t u, const mpz_t v) /* If the smaller input is positive, higher limbs don't matter. */ rn = vx ? un : vn; - rp = MPZ_REALLOC (r, rn + rc); + rp = MPZ_REALLOC (r, rn + (mp_size_t) rc); up = u->_mp_d; vp = v->_mp_d; - for (i = 0; i < vn; i++) + i = 0; + do { ul = (up[i] ^ ux) + uc; uc = ul < uc; @@ -3385,6 +3802,7 @@ mpz_and (mpz_t r, const mpz_t u, const mpz_t v) rc = rl < rc; rp[i] = rl; } + while (++i < vn); assert (vc == 0); for (; i < rn; i++) @@ -3439,12 +3857,13 @@ mpz_ior (mpz_t r, const mpz_t u, const mpz_t v) don't matter. */ rn = vx ? vn : un; - rp = MPZ_REALLOC (r, rn + rc); + rp = MPZ_REALLOC (r, rn + (mp_size_t) rc); up = u->_mp_d; vp = v->_mp_d; - for (i = 0; i < vn; i++) + i = 0; + do { ul = (up[i] ^ ux) + uc; uc = ul < uc; @@ -3456,6 +3875,7 @@ mpz_ior (mpz_t r, const mpz_t u, const mpz_t v) rc = rl < rc; rp[i] = rl; } + while (++i < vn); assert (vc == 0); for (; i < rn; i++) @@ -3506,12 +3926,13 @@ mpz_xor (mpz_t r, const mpz_t u, const mpz_t v) vx = -vc; rx = -rc; - rp = MPZ_REALLOC (r, un + rc); + rp = MPZ_REALLOC (r, un + (mp_size_t) rc); up = u->_mp_d; vp = v->_mp_d; - for (i = 0; i < vn; i++) + i = 0; + do { ul = (up[i] ^ ux) + uc; uc = ul < uc; @@ -3523,6 +3944,7 @@ mpz_xor (mpz_t r, const mpz_t u, const mpz_t v) rc = rl < rc; rp[i] = rl; } + while (++i < vn); assert (vc == 0); for (; i < un; i++) @@ -3548,32 +3970,45 @@ gmp_popcount_limb (mp_limb_t x) unsigned c; /* Do 16 bits at a time, to avoid limb-sized constants. */ - for (c = 0; x > 0; x >>= 16) + int LOCAL_SHIFT_BITS = 16; + for (c = 0; x > 0;) { - unsigned w = ((x >> 1) & 0x5555) + (x & 0x5555); + unsigned w = x - ((x >> 1) & 0x5555); w = ((w >> 2) & 0x3333) + (w & 0x3333); - w = ((w >> 4) & 0x0f0f) + (w & 0x0f0f); - w = (w >> 8) + (w & 0x00ff); + w = (w >> 4) + w; + w = ((w >> 8) & 0x000f) + (w & 0x000f); c += w; + if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) + x >>= LOCAL_SHIFT_BITS; + else + x = 0; } return c; } mp_bitcnt_t -mpz_popcount (const mpz_t u) +mpn_popcount (mp_srcptr p, mp_size_t n) { - mp_size_t un, i; + mp_size_t i; mp_bitcnt_t c; + for (c = 0, i = 0; i < n; i++) + c += gmp_popcount_limb (p[i]); + + return c; +} + +mp_bitcnt_t +mpz_popcount (const mpz_t u) +{ + mp_size_t un; + un = u->_mp_size; if (un < 0) return ~(mp_bitcnt_t) 0; - for (c = 0, i = 0; i < un; i++) - c += gmp_popcount_limb (u->_mp_d[i]); - - return c; + return mpn_popcount (u->_mp_d, un); } mp_bitcnt_t @@ -3590,16 +4025,13 @@ mpz_hamdist (const mpz_t u, const mpz_t v) if ( (un ^ vn) < 0) return ~(mp_bitcnt_t) 0; - if (un < 0) + comp = - (uc = vc = (un < 0)); + if (uc) { assert (vn < 0); un = -un; vn = -vn; - uc = vc = 1; - comp = - (mp_limb_t) 1; } - else - uc = vc = comp = 0; up = u->_mp_d; vp = v->_mp_d; @@ -3635,10 +4067,8 @@ mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit) { mp_ptr up; mp_size_t us, un, i; - mp_limb_t limb, ux, uc; - unsigned cnt; + mp_limb_t limb, ux; - up = u->_mp_d; us = u->_mp_size; un = GMP_ABS (us); i = starting_bit / GMP_LIMB_BITS; @@ -3648,36 +4078,24 @@ mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit) if (i >= un) return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit); - if (us < 0) - { - ux = GMP_LIMB_MAX; - uc = mpn_zero_p (up, i); - } - else - ux = uc = 0; - - limb = (ux ^ up[i]) + uc; - uc = limb < uc; - - /* Mask to 0 all bits before starting_bit, thus ignoring them. */ - limb &= (GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS)); + up = u->_mp_d; + ux = 0; + limb = up[i]; - while (limb == 0) + if (starting_bit != 0) { - i++; - if (i == un) + if (us < 0) { - assert (uc == 0); - /* For the u > 0 case, this can happen only for the first - masked limb. For the u < 0 case, it happens when the - highest limbs of the absolute value are all ones. */ - return (us >= 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS); + ux = mpn_zero_p (up, i); + limb = ~ limb + ux; + ux = - (mp_limb_t) (limb >= ux); } - limb = (ux ^ up[i]) + uc; - uc = limb < uc; + + /* Mask to 0 all bits before starting_bit, thus ignoring them. */ + limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); } - gmp_ctz (cnt, limb); - return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt; + + return mpn_common_scan (limb, i, up, un, ux); } mp_bitcnt_t @@ -3685,46 +4103,28 @@ mpz_scan0 (const mpz_t u, mp_bitcnt_t starting_bit) { mp_ptr up; mp_size_t us, un, i; - mp_limb_t limb, ux, uc; - unsigned cnt; + mp_limb_t limb, ux; - up = u->_mp_d; us = u->_mp_size; + ux = - (mp_limb_t) (us >= 0); un = GMP_ABS (us); i = starting_bit / GMP_LIMB_BITS; /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for u<0. Notice this test picks up all cases of u==0 too. */ if (i >= un) - return (us >= 0 ? starting_bit : ~(mp_bitcnt_t) 0); + return (ux ? starting_bit : ~(mp_bitcnt_t) 0); - if (us < 0) - { - ux = GMP_LIMB_MAX; - uc = mpn_zero_p (up, i); - } - else - ux = uc = 0; + up = u->_mp_d; + limb = up[i] ^ ux; - limb = (ux ^ up[i]) + uc; - uc = limb < uc; + if (ux == 0) + limb -= mpn_zero_p (up, i); /* limb = ~(~limb + zero_p) */ - /* Mask to 1 all bits before starting_bit, thus ignoring them. */ - limb |= ((mp_limb_t) 1 << (starting_bit % GMP_LIMB_BITS)) - 1; + /* Mask all bits before starting_bit, thus ignoring them. */ + limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); - while (limb == GMP_LIMB_MAX) - { - i++; - if (i == un) - { - assert (uc == 0); - return (us >= 0 ? un * GMP_LIMB_BITS : ~(mp_bitcnt_t) 0); - } - limb = (ux ^ up[i]) + uc; - uc = limb < uc; - } - gmp_ctz (cnt, ~limb); - return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt; + return mpn_common_scan (limb, i, up, un, ux); } @@ -3741,7 +4141,7 @@ mpz_sizeinbase (const mpz_t u, int base) size_t ndigits; assert (base >= 2); - assert (base <= 36); + assert (base <= 62); un = GMP_ABS (u->_mp_size); if (un == 0) @@ -3770,11 +4170,15 @@ mpz_sizeinbase (const mpz_t u, int base) mpn_copyi (tp, up, un); mpn_div_qr_1_invert (&bi, base); - for (ndigits = 0; un > 0; ndigits++) + ndigits = 0; + do { + ndigits++; mpn_div_qr_1_preinv (tp, tp, un, &bi); un -= (tp[un-1] == 0); } + while (un > 0); + gmp_free (tp); return ndigits; } @@ -3787,23 +4191,26 @@ mpz_get_str (char *sp, int base, const mpz_t u) mp_size_t un; size_t i, sn; - if (base >= 0) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + if (base > 1) { - digits = "0123456789abcdefghijklmnopqrstuvwxyz"; + if (base <= 36) + digits = "0123456789abcdefghijklmnopqrstuvwxyz"; + else if (base > 62) + return NULL; } + else if (base >= -1) + base = 10; else { base = -base; - digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (base > 36) + return NULL; } - if (base <= 1) - base = 10; - if (base > 36) - return NULL; sn = 1 + mpz_sizeinbase (u, base); if (!sp) - sp = gmp_xalloc (1 + sn); + sp = (char *) gmp_xalloc (1 + sn); un = GMP_ABS (u->_mp_size); @@ -3847,41 +4254,34 @@ mpz_get_str (char *sp, int base, const mpz_t u) int mpz_set_str (mpz_t r, const char *sp, int base) { - unsigned bits; + unsigned bits, value_of_a; mp_size_t rn, alloc; mp_ptr rp; - size_t sn; size_t dn; int sign; unsigned char *dp; - assert (base == 0 || (base >= 2 && base <= 36)); + assert (base == 0 || (base >= 2 && base <= 62)); while (isspace( (unsigned char) *sp)) sp++; - if (*sp == '-') - { - sign = 1; - sp++; - } - else - sign = 0; + sign = (*sp == '-'); + sp += sign; if (base == 0) { - if (*sp == '0') + if (sp[0] == '0') { - sp++; - if (*sp == 'x' || *sp == 'X') + if (sp[1] == 'x' || sp[1] == 'X') { base = 16; - sp++; + sp += 2; } - else if (*sp == 'b' || *sp == 'B') + else if (sp[1] == 'b' || sp[1] == 'B') { base = 2; - sp++; + sp += 2; } else base = 8; @@ -3890,25 +4290,30 @@ mpz_set_str (mpz_t r, const char *sp, int base) base = 10; } - sn = strlen (sp); - dp = gmp_xalloc (sn + (sn == 0)); + if (!*sp) + { + r->_mp_size = 0; + return -1; + } + dp = (unsigned char *) gmp_xalloc (strlen (sp)); + value_of_a = (base > 36) ? 36 : 10; for (dn = 0; *sp; sp++) { unsigned digit; if (isspace ((unsigned char) *sp)) continue; - if (*sp >= '0' && *sp <= '9') + else if (*sp >= '0' && *sp <= '9') digit = *sp - '0'; else if (*sp >= 'a' && *sp <= 'z') - digit = *sp - 'a' + 10; + digit = *sp - 'a' + value_of_a; else if (*sp >= 'A' && *sp <= 'Z') digit = *sp - 'A' + 10; else digit = base; /* fail */ - if (digit >= base) + if (digit >= (unsigned) base) { gmp_free (dp); r->_mp_size = 0; @@ -3918,11 +4323,17 @@ mpz_set_str (mpz_t r, const char *sp, int base) dp[dn++] = digit; } + if (!dn) + { + gmp_free (dp); + r->_mp_size = 0; + return -1; + } bits = mpn_base_power_of_two_p (base); if (bits > 0) { - alloc = (sn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; + alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; rp = MPZ_REALLOC (r, alloc); rn = mpn_set_str_bits (rp, dp, dn, bits); } @@ -3930,9 +4341,12 @@ mpz_set_str (mpz_t r, const char *sp, int base) { struct mpn_base_info info; mpn_get_base_info (&info, base); - alloc = (sn + info.exp - 1) / info.exp; + alloc = (dn + info.exp - 1) / info.exp; rp = MPZ_REALLOC (r, alloc); rn = mpn_set_str_other (rp, dp, dn, base, &info); + /* Normalization, needed for all-zero input. */ + assert (rn > 0); + rn -= rp[rn-1] == 0; } assert (rn <= alloc); gmp_free (dp); @@ -3966,14 +4380,9 @@ mpz_out_str (FILE *stream, int base, const mpz_t x) static int gmp_detect_endian (void) { - static const int i = 1; + static const int i = 2; const unsigned char *p = (const unsigned char *) &i; - if (*p == 1) - /* Little endian */ - return -1; - else - /* Big endian */ - return 1; + return 1 - *p; } /* Import and export. Does not support nails. */ @@ -4036,29 +4445,22 @@ mpz_import (mpz_t r, size_t count, int order, size_t size, int endian, } } } - if (bytes > 0) + assert (i + (bytes > 0) == rn); + if (limb != 0) rp[i++] = limb; - assert (i == rn); + else + i = mpn_normalized_size (rp, i); - r->_mp_size = mpn_normalized_size (rp, i); + r->_mp_size = i; } void * mpz_export (void *r, size_t *countp, int order, size_t size, int endian, size_t nails, const mpz_t u) { - unsigned char *p; - ptrdiff_t word_step; - size_t count, k; + size_t count; mp_size_t un; - /* The current (partial) limb. */ - mp_limb_t limb; - /* The number of bytes left to to in this limb. */ - size_t bytes; - /* The index where the limb was read. */ - mp_size_t i; - if (nails != 0) gmp_die ("mpz_import: Nails not supported."); @@ -4066,62 +4468,90 @@ mpz_export (void *r, size_t *countp, int order, size_t size, int endian, assert (endian >= -1 && endian <= 1); assert (size > 0 || u->_mp_size == 0); - un = GMP_ABS (u->_mp_size); - if (un == 0) - { - if (countp) - *countp = 0; - return r; - } + un = u->_mp_size; + count = 0; + if (un != 0) + { + size_t k; + unsigned char *p; + ptrdiff_t word_step; + /* The current (partial) limb. */ + mp_limb_t limb; + /* The number of bytes left to to in this limb. */ + size_t bytes; + /* The index where the limb was read. */ + mp_size_t i; - /* Count bytes in top limb. */ - for (limb = u->_mp_d[un-1], k = 0; limb > 0; k++, limb >>= CHAR_BIT) - ; + un = GMP_ABS (un); - assert (k > 0); + /* Count bytes in top limb. */ + limb = u->_mp_d[un-1]; + assert (limb != 0); - count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size; + k = (GMP_LIMB_BITS <= CHAR_BIT); + if (!k) + { + do { + int LOCAL_CHAR_BIT = CHAR_BIT; + k++; limb >>= LOCAL_CHAR_BIT; + } while (limb != 0); + } + /* else limb = 0; */ - if (!r) - r = gmp_xalloc (count * size); + count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size; - if (endian == 0) - endian = gmp_detect_endian (); + if (!r) + r = gmp_xalloc (count * size); - p = (unsigned char *) r; + if (endian == 0) + endian = gmp_detect_endian (); - word_step = (order != endian) ? 2 * size : 0; + p = (unsigned char *) r; - /* Process bytes from the least significant end, so point p at the - least significant word. */ - if (order == 1) - { - p += size * (count - 1); - word_step = - word_step; - } + word_step = (order != endian) ? 2 * size : 0; - /* And at least significant byte of that word. */ - if (endian == 1) - p += (size - 1); + /* Process bytes from the least significant end, so point p at the + least significant word. */ + if (order == 1) + { + p += size * (count - 1); + word_step = - word_step; + } - for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step) - { - size_t j; - for (j = 0; j < size; j++, p -= (ptrdiff_t) endian) - { - if (bytes == 0) - { - if (i < un) - limb = u->_mp_d[i++]; - bytes = sizeof (mp_limb_t); - } - *p = limb; - limb >>= CHAR_BIT; - bytes--; - } - } - assert (i == un); - assert (k == count); + /* And at least significant byte of that word. */ + if (endian == 1) + p += (size - 1); + + for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step) + { + size_t j; + for (j = 0; j < size; ++j, p -= (ptrdiff_t) endian) + { + if (sizeof (mp_limb_t) == 1) + { + if (i < un) + *p = u->_mp_d[i++]; + else + *p = 0; + } + else + { + int LOCAL_CHAR_BIT = CHAR_BIT; + if (bytes == 0) + { + if (i < un) + limb = u->_mp_d[i++]; + bytes = sizeof (mp_limb_t); + } + *p = limb; + limb >>= LOCAL_CHAR_BIT; + bytes--; + } + } + } + assert (i == un); + assert (k == count); + } if (countp) *countp = count; |