]>
git.saurik.com Git - apple/security.git/blob - OSX/libsecurity_cryptkit/lib/CurveParamDocs/giants.c
1 /**************************************************************
5 * Library for large-integer arithmetic.
7 * The large-gcd implementation is due to J. P. Buhler.
8 * Special mod routines use ideas of R. McIntosh.
9 * Contributions from G. Woltman, A. Powell acknowledged.
12 * 18 Jul 99 REC Added routine fer_mod(), for use when Fermat
13 giant itself is available.
14 * 17 Jul 99 REC Fixed sign bug in fermatmod()
15 * 17 Apr 99 REC Fixed various comment/line wraps
16 * 25 Mar 99 REC G. Woltman/A. Powell fixes Karat. routines
17 * 05 Mar 99 REC Moved invaux, binvaux giants to stack
18 * 05 Mar 99 REC Moved gread/gwrite giants to stack
19 * 05 Mar 99 REC No static on cur_den, cur_recip (A. Powell)
20 * 28 Feb 99 REC Error detection added atop newgiant().
21 * 27 Feb 99 REC Reduced wasted work in addsignal().
22 * 27 Feb 99 REC Reduced wasted work in FFTmulg().
23 * 19 Feb 99 REC Generalized iaddg() per R. Mcintosh.
24 * 2 Feb 99 REC Fixed comments.
25 * 6 Dec 98 REC Fixed yet another Karatsuba glitch.
26 * 1 Dec 98 REC Fixed errant case of addg().
27 * 28 Nov 98 REC Installed A. Powell's (correct) variant of
29 * 15 May 98 REC Modified gwrite() to handle huge integers.
30 * 13 May 98 REC Changed to static stack declarations
31 * 11 May 98 REC Installed Karatsuba multiply, to handle
32 * medregion 'twixt grammar- and FFT-multiply.
33 * 1 May 98 JF gshifts now handle bits < 0 correctly.
34 * 30 Apr 98 JF 68k assembler code removed,
35 * stack giant size now invariant and based
36 * on first call of newgiant(),
37 * stack memory leaks fixed.
38 * 29 Apr 98 JF function prototyes cleaned up,
39 * GCD no longer uses personal stack,
40 * optimized shifts for bits%16 == 0.
41 * 27 Apr 98 JF scratch giants now replaced with stack
42 * 20 Apr 98 JF grammarsquareg fixed for asize == 0.
43 * scratch giants now static.
44 * 29 Jan 98 JF Corrected out-of-range errors in
45 * mersennemod and fermatmod.
46 * 23 Dec 97 REC Sped up divide routines via split-shift.
47 * 18 Nov 97 JF Improved mersennemod, fermatmod.
48 * 9 Nov 97 JF Sped up grammarsquareg.
49 * 20 May 97 RDW Fixed Win32 compiler warnings.
50 * 18 May 97 REC Installed new, fast divide.
51 * 17 May 97 REC Reduced memory for FFT multiply.
52 * 26 Apr 97 REC Creation.
54 * c. 1997,1998 Perfectly Scientific, Inc.
55 * All Rights Reserved.
57 **************************************************************/
69 /* Compiler options. */
72 #pragma warning( disable : 4127 4706 ) /* disable conditional is constant warning */
76 /* Global variables. */
79 int mulmode
= AUTO_MUL
;
82 static int cur_stack_size
= 0;
83 static int cur_stack_elem
= 0;
84 static int stack_glen
= 0;
88 int current_max_size
= 0,
91 int checkFFTerror
= 0;
93 static giant u0
=NULL
, u1
=NULL
, v0
=NULL
, v1
=NULL
;
94 static double *z
= NULL
,
97 /* stack handling functions. */
98 static giant
popg(void);
99 static void pushg(int);
102 /* Private function prototypes. */
105 double gfloor(double);
106 int radixdiv(int, int, giant
);
107 void columnwrite(FILE *, short *, char *, short, int);
109 void normal_addg(giant
, giant
);
110 void normal_subg(giant
, giant
);
111 void reverse_subg(giant
, giant
);
112 int binvaux(giant
, giant
);
113 int invaux(giant
, giant
);
114 int allzeros(int, int, giant
);
115 void auxmulg(giant a
, giant b
);
116 void karatmulg(giant a
, giant b
);
117 void karatsquareg(giant b
);
118 void grammarmulg(giant a
, giant b
);
119 void grammarsquareg(giant b
);
122 void addsignal(giant
, double *, int);
123 void FFTsquareg(giant x
);
124 void FFTmulg(giant y
, giant x
);
125 void scramble_real();
126 void fft_real_to_hermitian(double *z
, int n
);
127 void fftinv_hermitian_to_real(double *z
, int n
);
128 void mul_hermitian(double *a
, double *b
, int n
);
129 void square_hermitian(double *b
, int n
);
130 void giant_to_double(giant x
, int sizex
, double *z
, int L
);
131 void gswap(giant
*, giant
*);
132 void onestep(giant
, giant
, gmatrix
);
133 void mulvM(gmatrix
, giant
, giant
);
134 void mulmM(gmatrix
, gmatrix
);
135 void writeM(gmatrix
);
136 static void punch(giant
, gmatrix
);
137 static void dotproduct(giant
, giant
, giant
, giant
);
138 void fix(giant
*, giant
*);
139 void hgcd(int, giant
, giant
, gmatrix
);
140 void shgcd(int, int, gmatrix
);
144 /**************************************************************
148 **************************************************************/
151 /**************************************************************
153 * Initialization and utility functions
155 **************************************************************/
179 sinCos
= (double *)malloc(sizeof(double)*(1+(n
>>2)));
180 for (j
=0;j
<=(n
>>2);j
++)
182 sinCos
[j
] = sin(e
*j
);
192 int seg
= n
/(cur_run
>>2);
196 case 0: return(sinCos
[n
]);
197 case 1: return(sinCos
[(cur_run
>>1)-n
]);
198 case 2: return(-sinCos
[n
-(cur_run
>>1)]);
199 case 3: return(-sinCos
[cur_run
-n
]);
210 int quart
= (cur_run
>>2);
213 return(s_sin(n
+quart
));
214 return(-s_sin(n
-quart
));
232 if (current_max_size
<= 0) current_max_size
= MAX_SHORTS
;
234 if (cur_stack_size
== 0) {
235 /* Initialize the stack if we're just starting out.
236 * Note that all stack giants will be whatever current_max_size is
237 * when newgiant() is first called. */
238 cur_stack_size
= STACK_GROW
;
239 stack
= (giant
*) malloc (cur_stack_size
* sizeof(giant
));
240 for(i
= 0; i
< STACK_GROW
; i
++)
242 if (stack_glen
== 0) stack_glen
= current_max_size
;
243 } else if (cur_stack_elem
>= cur_stack_size
) {
244 /* Expand the stack if we need to. */
246 cur_stack_size
+= STACK_GROW
;
247 stack
= (giant
*) realloc (stack
,cur_stack_size
* sizeof(giant
));
248 for (; i
< cur_stack_size
; i
++)
250 } else if (cur_stack_elem
< cur_stack_size
- 2*STACK_GROW
) {
251 /* Prune the stack if it's too big. Disabled, so the stack can only expand */
252 /* cur_stack_size -= STACK_GROW;
253 for (i = cur_stack_size - STACK_GROW; i < cur_stack_size; i++)
255 stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant)); */
258 /* Malloc our giant. */
259 if (stack
[cur_stack_elem
] == NULL
)
260 stack
[cur_stack_elem
] = malloc(stack_glen
*sizeof(short)+sizeof(int));
261 stack
[cur_stack_elem
]->sign
= 0;
263 return(stack
[cur_stack_elem
++]);
274 if (cur_stack_elem
< 0) cur_stack_elem
= 0;
286 if (numshorts
> MAX_SHORTS
) {
287 fprintf(stderr
, "Requested giant too big.\n");
291 numshorts
= MAX_SHORTS
;
292 size
= numshorts
*sizeof(short)+sizeof(int);
293 thegiant
= (giant
)malloc(size
);
296 if (newmin(2*numshorts
,MAX_SHORTS
) > current_max_size
)
297 current_max_size
= newmin(2*numshorts
,MAX_SHORTS
);
299 /* If newgiant() is being called for the first time, set the
300 * size of the stack giants. */
301 if (stack_glen
== 0) stack_glen
= current_max_size
;
311 /* Allocates space for a gmatrix struct, but does not
312 * allocate space for the giants. */
314 return((gmatrix
) malloc (4*sizeof(giant
)));
322 int b
= 16, c
= 1<<15, w
;
326 w
= n
->n
[abs(n
->sign
) - 1];
332 return (16*(abs(n
->sign
)-1) + b
);
342 int i
= abs(pos
)>>4, c
= 1 << (pos
&15);
344 return ((n
->n
[i
]) & c
);
353 return((g
->sign
==1)&&(g
->n
[0]==1));
360 /* Returns TR if thegiant == 0. */
363 int length
= abs(thegiant
->sign
);
364 register unsigned short * numpointer
= thegiant
->n
;
368 for(count
= 0; count
<length
; ++count
,++numpointer
)
370 if (*numpointer
!= 0 )
383 /* destgiant becomes equal to srcgiant. */
385 int numbytes
= sizeof(int) + abs(srcgiant
->sign
)*sizeof(short);
387 memcpy((char *)destgiant
,(char *)srcgiant
,numbytes
);
396 /* The giant g becomes set to the integer value i. */
398 unsigned int j
= abs(i
);
406 g
->n
[0] = (unsigned short)(j
& 0xFFFF);
410 g
->n
[1] = (unsigned short)j
;
418 g
->sign
= -(g
->sign
);
426 /* Calculate the value of an int-sized giant NOT exceeding 31 bits. */
428 register int size
= abs(x
->sign
);
429 register int sign
= (x
->sign
< 0) ? -1 : 1;
436 return sign
* x
->n
[0];
438 return sign
* (x
->n
[0]+((x
->n
[1])<<16));
440 fprintf(stderr
,"Giant too large for gtoi\n");
451 /* Returns the sign of g. */
463 /* Returns -1,0,1 if a<b, a=b, a>b, respectively. */
466 int size
= abs(a
->sign
);
468 if(isZero(a
)) size
= 0;
470 if (isZero(b
)) return(0); else return(-gsign(b
));
473 if (b
->sign
== 0) return(gsign(a
));
474 if (gsign(a
)!=gsign(b
)) return(gsign(a
));
475 if (size
>abs(b
->sign
)) return(gsign(a
));
476 if (size
<abs(b
->sign
)) return(-gsign(a
));
480 if (a
->n
[size
] > b
->n
[size
]) return(gsign(a
));
481 if (a
->n
[size
] < b
->n
[size
]) return(-gsign(a
));
493 /* Returns -1,0,1 if a<b, a=b, a>b, respectively. */
495 int sa
= a
->sign
, j
, sb
= b
->sign
, va
, vb
, sgn
;
503 sa
= -sa
; /* Take absolute value of sa. */
510 for(j
= sa
-1; j
>= 0; j
--)
533 /**************************************************************
535 * Private I/O Functions
537 **************************************************************/
546 /* Divides giant of arbitrary base by divisor.
547 * Returns remainder. Used by idivg and gread. */
550 int finalsize
= abs(thegiant
->sign
);
552 unsigned short *digitpointer
=&thegiant
->n
[j
];
553 unsigned int num
,rem
=0;
557 error
= DIVIDEBYZERO
;
563 num
=rem
*base
+ *digitpointer
;
564 *digitpointer
= (unsigned short)(num
/divisor
);
567 if (*digitpointer
== 0)
577 if ((divisor
<0) ^ (thegiant
->sign
<0))
578 finalsize
=-finalsize
;
579 thegiant
->sign
=finalsize
;
592 /* Used by gwriteln. */
597 sprintf(outstring
,format
,arg
);
598 for (i
=0; outstring
[i
]!=0; ++i
)
602 if (*column
>= COLUMNWIDTH
)
604 fputs("\\\n",filepointer
);
608 fputc(outstring
[i
],filepointer
);
620 /* Outputs thegiant to filepointer. Output is terminated by a newline. */
624 unsigned short *numpointer
;
625 giant garbagegiant
, basetengrand
;
627 basetengrand
= popg();
628 garbagegiant
= popg();
630 if (isZero(thegiant
))
632 fputs("0",filepointer
);
636 numpointer
= basetengrand
->n
;
637 gtog(thegiant
,garbagegiant
);
639 basetengrand
->sign
= 0;
642 *numpointer
= (unsigned short)idivg(10000,garbagegiant
);
644 if (++basetengrand
->sign
>= current_max_size
)
649 } while (!isZero(garbagegiant
));
653 i
= basetengrand
->sign
-1;
655 if (thegiant
->sign
<0 && basetengrand
->n
[i
]!=0)
656 columnwrite(filepointer
,&column
,"-",0, newlines
);
657 columnwrite(filepointer
,&column
,"%d",basetengrand
->n
[i
],newlines
);
661 columnwrite(filepointer
,&column
,"%04d",basetengrand
->n
[i
],newlines
);
675 gwrite(theg
, filepointer
, 1);
676 fputc('\n',filepointer
);
687 int isneg
,size
,backslash
=FA
,numdigits
=0;
688 unsigned short *numpointer
;
689 giant basetenthousand
;
690 static char *inbuf
= NULL
;
692 basetenthousand
= popg();
694 inbuf
= (char*)malloc(MAX_DIGITS
);
696 currentchar
= (char)fgetc(filepointer
);
697 if (currentchar
=='-')
704 if (currentchar
!='+')
705 ungetc(currentchar
,filepointer
);
710 currentchar
= (char)fgetc(filepointer
);
711 if ((currentchar
>='0') && (currentchar
<='9'))
713 inbuf
[numdigits
]=currentchar
;
714 if(++numdigits
==MAX_DIGITS
)
720 if (currentchar
=='\\')
723 } while(((currentchar
!=' ') && (currentchar
!='\n') &&
724 (currentchar
!='\t')) || (backslash
) );
730 inbuf
[numdigits
] = 0;
734 basetenthousand
->n
[size
] = (unsigned short)strtol(&inbuf
[numdigits
],NULL
,10);
736 } while (numdigits
>0);
738 basetenthousand
->sign
= size
;
740 numpointer
= theg
->n
;
743 *numpointer
= (unsigned short)
744 radixdiv(10000,1<<(8*sizeof(short)),basetenthousand
);
746 if (++theg
->sign
>= current_max_size
)
751 } while (!isZero(basetenthousand
));
754 theg
->sign
= -theg
->sign
;
761 /**************************************************************
763 * Private Math Functions
765 **************************************************************/
783 /* g becomes the absolute value of g. */
794 /* Giant g becomes g + (int)i. */
796 int w
,j
=0,carry
= 0, size
= abs(g
->sign
);
803 else if(g
->sign
< 0) {
815 g
->n
[j
] = (unsigned short) (w
& 65535L);
818 } while ((carry
!=0) && (j
<size
));
823 g
->n
[size
] = (unsigned short)carry
;
828 /* New subtract routines.
829 The basic subtract "subg()" uses the following logic table:
831 a b if(b > a) if(a > b)
833 + + b := b - a b := -(a - b)
834 - + b := b + (-a) N.A.
835 + - N.A. b := -((-b) + a)
836 - - b := (-a) - (-b) b := -((-b) - (-a))
838 The basic addition routine "addg()" uses:
840 a b if(b > -a) if(-a > b)
843 - + b := b - (-a) b := -((-a) - b)
844 + - b := a - (-b) b := -((-b) - a)
845 - - N.A. b := -((-b) + (-a))
847 In this way, internal routines "normal_addg," "normal_subg,"
848 and "reverse_subg;" each of which assumes non-negative
849 operands and a non-negative result, are now used for greater
858 /* b := a + b, both a,b assumed non-negative. */
861 int asize
= a
->sign
, bsize
= b
->sign
;
864 unsigned short *aptr
= a
->n
, *bptr
= b
->n
;
868 for (j
=0; j
<asize
; j
++)
870 k
= *aptr
++ + *bptr
+ carry
;
877 *bptr
++ = (unsigned short)k
;
879 for (j
=asize
; j
<bsize
; j
++)
888 *bptr
++ = (unsigned short)k
;
893 for (j
=0; j
<bsize
; j
++)
895 k
= *aptr
++ + *bptr
+ carry
;
902 *bptr
++ = (unsigned short)k
;
904 for (j
=bsize
; j
<asize
; j
++)
913 *bptr
++ = (unsigned short)k
;
929 /* b := b - a; requires b, a non-negative and b >= a. */
931 int j
, size
= b
->sign
;
938 for (j
=0; j
<a
->sign
; ++j
)
940 k
+= 0xffff - a
->n
[j
] + b
->n
[j
];
941 b
->n
[j
] = (unsigned short)(k
& 0xffff);
944 for (j
=a
->sign
; j
<size
; ++j
)
946 k
+= 0xffff + b
->n
[j
];
947 b
->n
[j
] = (unsigned short)(k
& 0xffff);
951 if (b
->n
[0] == 0xffff)
956 while ((size
-- > 0) && (b
->n
[size
]==0));
958 b
->sign
= (b
->n
[size
]==0) ? 0 : size
+1;
967 /* b := a - b; requires b, a non-negative and a >= b. */
969 int j
, size
= a
->sign
;
973 for (j
=0; j
<b
->sign
; ++j
)
975 k
+= 0xffff - b
->n
[j
] + a
->n
[j
];
976 b
->n
[j
] = (unsigned short)(k
& 0xffff);
979 for (j
=b
->sign
; j
<size
; ++j
)
981 k
+= 0xffff + a
->n
[j
];
982 b
->n
[j
] = (unsigned short)(k
& 0xffff);
986 b
->sign
= size
; /* REC, 21 Apr 1996. */
987 if (b
->n
[0] == 0xffff)
992 while (!b
->n
[--size
]);
1002 /* b := b + a, any signs any result. */
1004 int asgn
= a
->sign
, bsgn
= b
->sign
;
1013 if ((asgn
< 0) == (bsgn
< 0))
1030 if (gcompg(b
,a
) >= 0)
1057 /* b := b - a, any signs, any result. */
1059 int asgn
= a
->sign
, bsgn
= b
->sign
;
1069 if ((asgn
< 0) != (bsgn
< 0))
1085 if (gcompg(b
,a
) >= 0)
1096 if (gcompg(b
,a
) >= 0)
1113 /* Returns the number of trailing zero bits in g. */
1115 register int numshorts
= abs(g
->sign
), j
, bcount
=0;
1116 register unsigned short gshort
, c
;
1118 for (j
=0;j
<numshorts
;j
++)
1122 for (bcount
=0;bcount
<16; bcount
++)
1131 return(bcount
+ 16*j
);
1140 /* u becomes greatest power of two not exceeding u/v. */
1142 int diff
= bitlen(u
) - bitlen(v
);
1152 gshiftleft(diff
,scratch7
);
1153 if (gcompg(u
,scratch7
) < 0)
1173 /* Binary inverse method. Returns zero if no inverse exists,
1174 * in which case x becomes GCD(x,p). */
1177 giant scratch7
, u0
, u1
, v0
, v1
;
1213 if(x
->sign
<0) addg(p
, x
);
1231 return(binvaux(p
,x
));
1242 return(invaux(p
,x
));
1250 /* Returns zero if no inverse exists, in which case x becomes
1254 giant scratch7
, u0
, u1
, v0
, v1
;
1256 if ((x
->sign
==1)&&(x
->n
[0]==1))
1289 if ((v0
->sign
!=1)||(v0
->n
[0]!=1))
1330 while (bitval(x
, ++k
) == 0);
1335 gshiftleft(q
-k
, u0
);
1345 if (!gcompg(v1
,x
)) {
1366 /* Classical Euclid GCD. v becomes gcd(a, v). */
1370 v
->sign
= abs(v
->sign
);
1377 u
->sign
= abs(u
->sign
);
1396 if (bitlen(y
)<= GCDLIMIT
)
1407 /* Binary form of the gcd. b becomes the gcd of a,b. */
1409 int k
= isZero(b
), m
= isZero(a
);
1434 /* Now neither a nor b is zero. */
1436 u
->sign
= abs(a
->sign
);
1438 v
->sign
= abs(b
->sign
);
1439 k
= numtrailzeros(u
);
1440 m
= numtrailzeros(v
);
1457 m
= numtrailzeros(t
);
1483 /* g becomes m^n, NO mod performed. */
1485 giant scratch2
= popg();
1508 if (n
->n
[n
->sign
-1] == 0)
1510 fprintf(stderr
,"%d %d tilt",n
->sign
, (int)(n
->n
[n
->sign
-1]));
1523 /* r becomes the steady-state reciprocal
1524 * 2^(2b)/d, where b = bit-length of d-1. */
1529 if (isZero(d
) || (d
->sign
< 0))
1545 gshiftright(b
, tmp
);
1547 gshiftright(b
, tmp
);
1550 if (gcompg(r
, tmp2
) <= 0)
1555 gshiftleft(2*b
, tmp
);
1560 while (tmp
->sign
< 0)
1574 /* n := n/d, where r is the precalculated
1575 * steady-state reciprocal of d. */
1577 int s
= 2*(bitlen(r
)-1), sign
= gsign(n
);
1580 if (isZero(d
) || (d
->sign
< 0))
1588 n
->sign
= abs(n
->sign
);
1594 gshiftright(s
, tmp
);
1598 if (gcompg(n
,d
) >= 0)
1603 if (gcompg(n
,d
) < 0)
1618 /* This is the fastest mod of the present collection.
1619 * n := n % d, where r is the precalculated
1620 * steady-state reciprocal of d. */
1623 int s
= (bitlen(r
)-1), sign
= n
->sign
;
1626 if (isZero(d
) || (d
->sign
< 0))
1634 n
->sign
= abs(n
->sign
);
1637 gtog(n
, tmp
); gshiftright(s
-1, tmp
);
1639 gshiftright(s
+1, tmp
);
1642 if (gcompg(n
,d
) >= 0)
1644 if (gcompg(n
,d
) < 0)
1666 int s
= 2*(bitlen(r
)-1), sign
= n
->sign
;
1669 if (isZero(d
) || (d
->sign
< 0))
1677 n
->sign
= abs(n
->sign
);
1682 gshiftright(s
, tmp
);
1685 if (gcompg(n
,d
) >= 0)
1687 if (gcompg(n
,d
) < 0)
1707 /* n becomes n%d. n is arbitrary, but the denominator d must be positive! */
1709 if (cur_recip
== NULL
) {
1710 cur_recip
= newgiant(current_max_size
);
1711 cur_den
= newgiant(current_max_size
);
1713 make_recip(d
, cur_recip
);
1714 } else if (gcompg(d
, cur_den
)) {
1716 make_recip(d
, cur_recip
);
1718 modg_via_recip(d
, cur_recip
, n
);
1730 /* a becomes (a*b) (mod 2^q-k) where q % 16 == 0 and k is "small" (0 < k < 65535).
1731 * Returns 0 if unsuccessful, otherwise 1. */
1733 giant carry
, kk
, scratch
;
1735 int asize
= abs(a
->sign
), bsize
= abs(b
->sign
);
1736 unsigned short *aptr
,*bptr
,*destptr
;
1740 if ((q
% 16) || (k
<= 0) || (k
>= 65535)) {
1748 for (i
=0; i
<asize
+bsize
; i
++) scratch
->n
[i
]=0;
1753 for (i
= 0; i
< bsize
; i
++) {
1758 if (kpower
>= 1) itog (kpower
,kk
);
1759 for (j
= 1; j
< kpower
; k
++) smulg(kpower
,kk
);
1764 for (j
= 0; j
< bsize
; b
++) {
1766 smulg(*aptr
++,scratch
);
1767 smulg(mult
,scratch
);
1768 iaddg(*destptr
,scratch
);
1769 addg(carry
,scratch
);
1770 *destptr
++ = scratch
->n
[0];
1771 gshiftright(scratch
,16);
1772 gtog(scratch
,carry
);
1773 if (destptr
- scratch
->n
>= words
) {
1783 unsigned int prod
,carry
=0;
1784 int asize
= abs(a
->sign
), bsize
= abs(b
->sign
);
1785 unsigned short *aptr
,*bptr
,*destptr
;
1786 unsigned short mult
;
1789 giant scratch
= popg(), scratch2
= popg(), scratch3
= popg();
1790 short *carryptr
= scratch
->n
;
1791 int kpower
,kpowerlimit
, curk
;
1793 if ((q
% 16) || (k
<= 0) || (k
>= 65535)) {
1799 for (i
=0; i
<asize
+bsize
; i
++) scratch
->n
[i
]=0;
1804 for (i
=0; i
<bsize
; ++i
)
1811 destptr
= scratch
->n
+ i
;
1815 } else if (kpower
<= kpowerlimit
) {
1818 for (j
= 1; j
< kpower
; j
++) curk
*= k
;
1821 for (j
= 1; j
< kpower
; j
++) smulg(k
,scratch
);
1825 for (j
= 0; j
< asize
; j
++) {
1827 prod
= *aptr
++ * mult
+ *destptr
+ carry
;
1828 *destptr
++ = (unsigned short)(prod
& 0xFFFF);
1830 } else if (kpower
< kpowerlimit
) {
1831 prod
= kcur
* *aptr
++;
1838 prod
+= *destptr
+ carry
;
1839 carry
= prod
>> 16 + temp
;
1840 *destptr
++ = (unsigned short)(prod
& 0xFFFF);
1842 gtog(scratch
,scratch3
);
1843 smulg(*aptr
++,scratch3
);
1844 smulg(mult
,scratch3
);
1845 iaddg(*destptr
,scratch3
);
1846 addg(scratch3
,scratch2
);
1847 *destptr
++ = scratch2
->n
[0];
1848 memmove(scratch2
->n
,scratch2
->n
+1,2*(scratch2
->size
-1));
1851 if (destptr
- scratch
->n
> words
) {
1855 } else if (kpower
< kpowerlimit
) {
1858 } else if (kpower
== kpowerlimit
) {
1860 for (j
= 1; j
< kpower
; j
++) smulg(k
,scratch
);
1861 itog(carry
,scratch2
);
1872 /* Next, deal with the carry term. Needs to be improved to
1873 handle overflow carry cases. */
1874 if (kpower
<= kpowerlimit
) {
1875 iaddg(carry
,scratch
);
1877 addg(scratch2
,scratch
);
1879 while(scratch
->sign
> q
)
1880 gtog(scratch
,scratch2
)
1883 scratch
->sign
= destptr
- scratch
->n
;
1886 scratch
->sign
*= gsign(a
)*gsign(b
);
1899 /* theg becomes theg/divisor. Returns remainder. */
1901 int base
= 1<<(8*sizeof(short));
1903 n
= radixdiv(base
,divisor
,theg
);
1913 /* n becomes n/d. n is arbitrary, but the denominator d must be positive! */
1915 if (cur_recip
== NULL
) {
1916 cur_recip
= newgiant(current_max_size
);
1917 cur_den
= newgiant(current_max_size
);
1919 make_recip(d
, cur_recip
);
1920 } else if (gcompg(d
, cur_den
)) {
1922 make_recip(d
, cur_recip
);
1924 divg_via_recip(d
, cur_recip
, n
);
1934 /* x becomes x^n (mod g). */
1936 giant scratch2
= popg();
1963 /* x becomes x^n (mod g). */
1966 giant scratch2
= popg();
1974 if (bitval(n
, pos
++))
1994 /* x becomes x^n (mod 2^q+1). */
1996 giant scratch2
= popg();
2011 fermatmod(q
, scratch2
);
2024 /* x becomes x^n (mod 2^q+1). */
2027 giant scratch2
= popg();
2035 if (bitval(n
, pos
++))
2043 fermatmod(q
, scratch2
);
2055 /* x becomes x^n (mod 2^q-1). */
2057 giant scratch2
= popg();
2072 mersennemod(q
, scratch2
);
2085 /* x becomes x^n (mod 2^q-1). */
2088 giant scratch2
= popg();
2096 if (bitval(n
, pos
++))
2104 mersennemod(q
, scratch2
);
2115 /* shift g left bits bits. Equivalent to g = g*2^bits. */
2117 int rem
= bits
&15, crem
= 16-rem
, words
= bits
>>4;
2118 int size
= abs(g
->sign
), j
, k
, sign
= gsign(g
);
2119 unsigned short carry
, dat
;
2126 gshiftright(-bits
,g
);
2129 if (size
+words
+1 > current_max_size
) {
2134 memmove(g
->n
+ words
, g
->n
, size
* sizeof(short));
2135 for (j
= 0; j
< words
; j
++) g
->n
[j
] = 0;
2136 g
->sign
+= (g
->sign
< 0)?(-words
):(words
);
2140 for (j
=size
-1; j
>=0; j
--) {
2142 g
->n
[k
--] = (unsigned short)((dat
>> crem
) | carry
);
2143 carry
= (unsigned short)(dat
<< rem
);
2153 g
->sign
= sign
*(k
+1);
2163 /* shift g right bits bits. Equivalent to g = g/2^bits. */
2165 register int j
,size
=abs(g
->sign
);
2166 register unsigned int carry
;
2167 int words
= bits
>> 4;
2168 int remain
= bits
& 15, cremain
= (16-remain
);
2175 gshiftleft(-bits
,g
);
2178 if (words
>= size
) {
2183 memmove(g
->n
,g
->n
+ words
,(size
- words
) * sizeof(short));
2184 g
->sign
+= (g
->sign
< 0)?(words
):(-words
);
2190 for(j
=0;j
<size
-1;++j
)
2192 carry
= g
->n
[j
+words
+1] << cremain
;
2193 g
->n
[j
] = (unsigned short)((g
->n
[j
+words
] >> remain
) | carry
);
2195 g
->n
[size
-1] = (unsigned short)(g
->n
[size
-1+words
] >> remain
);
2198 if (g
->n
[size
-1] == 0)
2215 /* dest becomes lowermost n bits of src. Equivalent to dest = src % 2^n. */
2217 register int words
= n
>> 4, numbytes
= words
*sizeof(short);
2218 register int bits
= n
& 15;
2222 if (words
>= abs(src
->sign
))
2226 memcpy((char *)(dest
->n
), (char *)(src
->n
), numbytes
);
2229 dest
->n
[words
] = (unsigned short)(src
->n
[words
] & ((1<<bits
)-1));
2232 while ((dest
->n
[words
-1] == 0) && (words
> 0))
2237 dest
->sign
= -words
;
2258 return((int)(!(g
->n
[shorts
] & ((1<<bits
)-1))));
2267 /* negate g. g is mod 2^n+1. */
2269 register int shorts
= n
>>4,
2273 register unsigned carry
,temp
;
2275 for (temp
=(unsigned)shorts
; (int)temp
>g
->sign
-1; --temp
)
2279 if (g
->n
[shorts
] & mask
)
2280 { /* if high bit is set, -g = 1. */
2285 if (allzeros(shorts
,bits
,g
))
2286 return; /* if g=0, -g = 0. */
2290 g
->n
[i
] = (unsigned short)(~(g
->n
[i
+1]));
2292 g
->n
[shorts
] ^= mask
-1;
2298 temp
= g
->n
[i
]+carry
;
2299 g
->n
[i
++] = (unsigned short)(temp
& 0xffff);
2302 while(!g
->n
[shorts
])
2315 /* g := g (mod 2^n - 1) */
2318 giant scratch3
= popg(), scratch4
= popg();
2320 if ((the_sign
= gsign(g
)) < 0) absg(g
);
2321 while (bitlen(g
) > n
) {
2323 gshiftright(n
,scratch3
);
2325 gshiftleft(n
,scratch3
);
2329 if ((s
= gsign(g
)) < 0) absg(g
);
2331 gshiftleft(n
,scratch3
);
2333 subg(scratch4
,scratch3
);
2334 if(gcompg(g
,scratch3
) >= 0) subg(scratch3
,g
);
2352 /* g := g (mod 2^n + 1), */
2355 giant scratch3
= popg();
2357 if ((the_sign
= gsign(g
)) < 0) absg(g
);
2358 while (bitlen(g
) > n
) {
2360 gshiftright(n
,scratch3
);
2362 gshiftleft(n
,scratch3
);
2365 if((bitlen(g
) < n
) && (the_sign
* (g
->sign
) >= 0)) goto leave
;
2367 if ((s
= gsign(g
)) < 0) absg(g
);
2369 gshiftleft(n
,scratch3
);
2371 if(gcompg(g
,scratch3
) >= 0) subg(scratch3
,g
);
2372 if (s
* the_sign
< 0) {
2388 /* Same as fermatmod(), except modulus = 2^n should be passed
2389 if available (i.e. if already allocated and set). */
2392 giant scratch3
= popg();
2394 if ((the_sign
= gsign(g
)) < 0) absg(g
);
2395 while (bitlen(g
) > n
) {
2397 gshiftright(n
,scratch3
);
2399 gshiftleft(n
,scratch3
);
2402 if((bitlen(g
) < n
) && (the_sign
* (g
->sign
) >= 0)) goto leave
;
2404 if ((s
= gsign(g
)) < 0) absg(g
);
2405 if(gcompg(g
,modulus
) >= 0) subg(modulus
,g
);
2406 if (s
* the_sign
< 0) {
2421 /* g becomes g * i. */
2423 unsigned short carry
= 0;
2424 int size
= abs(g
->sign
);
2425 register int j
,k
,mul
= abs(i
);
2426 unsigned short *digit
= g
->n
;
2428 for (j
=0; j
<size
; ++j
)
2430 k
= *digit
* mul
+ carry
;
2431 carry
= (unsigned short)(k
>>16);
2432 *digit
= (unsigned short)(k
& 0xffff);
2437 if (++j
>= current_max_size
)
2445 if ((g
->sign
>0) ^ (i
>0))
2456 /* b becomes b^2. */
2467 /* b becomes a*b. */
2478 /* Optimized general multiply, b becomes a*b. Modes are:
2479 * AUTO_MUL: switch according to empirical speed criteria.
2480 * GRAMMAR_MUL: force grammar-school algorithm.
2481 * KARAT_MUL: force Karatsuba divide-conquer method.
2482 * FFT_MUL: force floating point FFT method. */
2485 int square
= (a
==b
);
2491 if (square
) grammarsquareg(b
);
2492 else grammarmulg(a
,b
);
2501 if (square
) karatsquareg(b
);
2502 else karatmulg(a
,b
);
2505 sizea
= abs(a
->sign
);
2506 sizeb
= abs(b
->sign
);
2507 if((sizea
> KARAT_BREAK
) && (sizea
<= FFT_BREAK
) &&
2508 (sizeb
> KARAT_BREAK
) && (sizeb
<= FFT_BREAK
)){
2509 if(square
) karatsquareg(b
);
2510 else karatmulg(a
,b
);
2513 grammartime
= (float)sizea
;
2514 grammartime
*= (float)sizeb
;
2515 if (grammartime
< FFT_BREAK
* FFT_BREAK
)
2517 if (square
) grammarsquareg(b
);
2518 else grammarmulg(a
,b
);
2522 if (square
) FFTsquareg(b
);
2532 int s
= x
->sign
, sg
= 1;
2539 while(x
->n
[s
] == 0) {
2546 /* Next, improved Karatsuba routines from A. Powell,
2547 improvements by G. Woltman. */
2550 karatmulg(giant x
, giant y
)
2551 /* y becomes x*y. */
2553 int s
= abs(x
->sign
), t
= abs(y
->sign
), w
, bits
,
2554 sg
= gsign(x
)*gsign(y
);
2555 giant a
, b
, c
, d
, e
, f
;
2557 if((s
<= KARAT_BREAK
) || (t
<= KARAT_BREAK
)) {
2561 w
= (s
+ t
+ 2)/4; bits
= 16*w
;
2562 a
= popg(); b
= popg(); c
= popg();
2563 d
= popg(); e
= popg(); f
= popg();
2564 gtog(x
,a
); absg(a
); if (w
<= s
) {a
->sign
= w
; justg(a
);}
2566 gshiftright(bits
, b
);
2567 gtog(y
,c
); absg(c
); if (w
<= t
) {c
->sign
= w
; justg(c
);}
2569 gshiftright(bits
,d
);
2570 gtog(a
,e
); normal_addg(b
,e
); /* e := (a + b) */
2571 gtog(c
,f
); normal_addg(d
,f
); /* f := (c + d) */
2572 karatmulg(e
,f
); /* f := (a + b)(c + d) */
2573 karatmulg(c
,a
); /* a := a c */
2574 karatmulg(d
,b
); /* b := b d */
2576 /* f := (a + b)(c + d) - a c */
2578 /* f := (a + b)(c + d) - a c - b d */
2579 gshiftleft(bits
, b
);
2581 gshiftleft(bits
, b
);
2583 gtog(b
, y
); y
->sign
*= sg
;
2590 karatsquareg(giant x
)
2591 /* x becomes x^2. */
2593 int s
= abs(x
->sign
), w
, bits
;
2596 if(s
<= KARAT_BREAK
) {
2600 w
= (s
+1)/2; bits
= 16*w
;
2601 a
= popg(); b
= popg(); c
= popg();
2602 gtog(x
, a
); a
->sign
= w
; justg(a
);
2603 gtog(x
, b
); absg(b
);
2604 gshiftright(bits
, b
);
2605 gtog(a
,c
); normal_addg(b
,c
);
2611 gshiftleft(bits
, b
);
2613 gshiftleft(bits
, b
);
2626 /* b becomes a*b. */
2629 unsigned int prod
,carry
=0;
2630 int asize
= abs(a
->sign
), bsize
= abs(b
->sign
);
2631 unsigned short *aptr
,*bptr
,*destptr
;
2632 unsigned short mult
;
2633 giant scratch
= popg();
2635 for (i
=0; i
<asize
+bsize
; ++i
)
2641 for (i
=0; i
<bsize
; ++i
)
2648 destptr
= &(scratch
->n
[i
]);
2649 for (j
=0; j
<asize
; ++j
)
2651 prod
= *(aptr
++) * mult
+ *destptr
+ carry
;
2652 *(destptr
++) = (unsigned short)(prod
& 0xffff);
2655 *destptr
= (unsigned short)carry
;
2661 scratch
->sign
= gsign(a
)*gsign(b
)*bsize
;
2673 unsigned int cur_term
;
2674 unsigned int prod
, carry
=0, temp
;
2675 int asize
= abs(a
->sign
), max
= asize
* 2 - 1;
2676 unsigned short *ptr
= a
->n
, *ptr1
, *ptr2
;
2690 scratch
->n
[0] = temp
;
2693 for (cur_term
= 1; cur_term
< max
; cur_term
++) {
2695 if (cur_term
<= asize
) {
2698 ptr1
+= cur_term
- asize
;
2701 prod
= carry
& 0xFFFF;
2703 while(ptr1
< ptr2
) {
2704 temp
= *ptr1
++ * *ptr2
--;
2705 prod
+= (temp
<< 1) & 0xFFFF;
2706 carry
+= (temp
>> 15);
2711 prod
+= temp
& 0xFFFF;
2712 carry
+= (temp
>> 16);
2714 carry
+= prod
>> 16;
2715 scratch
->n
[cur_term
] = (unsigned short) (prod
);
2718 scratch
->n
[cur_term
] = carry
;
2719 scratch
->sign
= cur_term
+1;
2720 } else scratch
->sign
= cur_term
;
2727 /**************************************************************
2729 * FFT multiply Functions
2731 **************************************************************/
2740 /* Returns least power of two greater than n. */
2761 register int j
, k
, m
, car
, last
;
2762 register double f
, g
,err
;
2768 f
= gfloor(z
[j
]+0.5);
2769 if(f
!= 0.0) last
= j
;
2772 err
= fabs(f
- z
[j
]);
2773 if (err
> maxFFTerror
)
2780 g
= gfloor(f
*TWOM16
);
2781 z
[j
+k
] += f
-g
*TWO16
;
2787 for(j
=0;j
< last
+ 1;j
++)
2789 m
= (int)(z
[j
]+car
);
2790 x
->n
[j
] = (unsigned short)(m
& 0xffff);
2794 x
->n
[j
] = (unsigned short)car
;
2798 while(!(x
->n
[j
])) --j
;
2809 int j
,size
= abs(x
->sign
);
2817 L
= lpt(size
+size
, &j
);
2818 if(!z
) z
= (double *)malloc(MAX_SHORTS
* sizeof(double));
2819 giant_to_double(x
, size
, z
, L
);
2820 fft_real_to_hermitian(z
, L
);
2821 square_hermitian(z
, L
);
2822 fftinv_hermitian_to_real(z
, L
);
2824 x
->sign
= abs(x
->sign
);
2834 /* x becomes y*x. */
2835 int lambda
, sizex
= abs(x
->sign
), sizey
= abs(y
->sign
);
2836 int finalsign
= gsign(x
)*gsign(y
);
2839 if ((sizex
<=4)||(sizey
<=4))
2844 L
= lpt(sizex
+sizey
, &lambda
);
2845 if(!z
) z
= (double *)malloc(MAX_SHORTS
* sizeof(double));
2846 if(!z2
) z2
= (double *)malloc(MAX_SHORTS
* sizeof(double));
2848 giant_to_double(x
, sizex
, z
, L
);
2849 giant_to_double(y
, sizey
, z2
, L
);
2850 fft_real_to_hermitian(z
, L
);
2851 fft_real_to_hermitian(z2
, L
);
2852 mul_hermitian(z2
, z
, L
);
2853 fftinv_hermitian_to_real(z
, L
);
2855 x
->sign
= finalsign
*abs(x
->sign
);
2866 register double tmp
;
2868 for (i
=0,j
=0;i
<n
-1;i
++)
2888 fft_real_to_hermitian(
2892 /* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
2893 * This is a decimation-in-time, split-radix algorithm.
2896 register double cc1
, ss1
, cc3
, ss3
;
2897 register int is
, id
, i0
, i1
, i2
, i3
, i4
, i5
, i6
, i7
, i8
,
2898 a
, a3
, b
, b3
, nminus
= n
-1, dil
, expand
;
2899 register double *x
, e
;
2901 double t1
, t2
, t3
, t4
, t5
, t6
;
2902 register int n2
, n4
, n8
, i
, j
;
2906 scramble_real(z
, n
);
2907 x
= z
-1; /* FORTRAN compatibility. */
2912 for (i0
=is
;i0
<=n
;i0
+=id
)
2933 for (i
=is
;i
<n
;i
+=id
)
2949 t1
= (x
[i3
]+x
[i4
])*SQRTHALF
;
2950 t2
= (x
[i3
]-x
[i4
])*SQRTHALF
;
2952 x
[i3
] = -x
[i2
] - t1
;
2963 a3
= (a
+(a
<<1))&nminus
;
2981 i5
= i
+ n4
- j
+ 2;
2985 t1
= x
[i3
]*cc1
+ x
[i7
]*ss1
;
2986 t2
= x
[i7
]*cc1
- x
[i3
]*ss1
;
2987 t3
= x
[i4
]*cc3
+ x
[i8
]*ss3
;
2988 t4
= x
[i8
]*cc3
- x
[i4
]*ss3
;
2997 x
[i7
] = -x
[i2
] - t3
;
3015 fftinv_hermitian_to_real(
3019 /* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
3020 * This is a decimation-in-frequency, split-radix algorithm.
3023 register double cc1
, ss1
, cc3
, ss3
;
3024 register int is
, id
, i0
, i1
, i2
, i3
, i4
, i5
, i6
, i7
, i8
,
3025 a
, a3
, b
, b3
, nminus
= n
-1, dil
, expand
;
3026 register double *x
, e
;
3028 double t1
, t2
, t3
, t4
, t5
;
3029 int n2
, n4
, n8
, i
, j
;
3053 x
[i3
] = t1
- 2.0*x
[i4
];
3054 x
[i4
] = t1
+ 2.0*x
[i4
];
3061 t1
= (x
[i2
]-x
[i1
])*SQRTHALF
;
3062 t2
= (x
[i4
]+x
[i3
])*SQRTHALF
;
3064 x
[i2
] = x
[i4
]-x
[i3
];
3065 x
[i3
] = -2.0*(t2
+t1
);
3066 x
[i4
] = 2.0*(t1
-t2
);
3075 a3
= (a
+(a
<<1))&nminus
;
3102 x
[i6
] = x
[i8
] - x
[i3
];
3104 x
[i2
] = x
[i4
] - x
[i7
];
3109 x
[i3
] = t5
*cc1
+ t4
*ss1
;
3110 x
[i7
] = -t4
*cc1
+ t5
*ss1
;
3111 x
[i4
] = t1
*cc3
- t2
*ss3
;
3112 x
[i8
] = t2
*cc3
+ t1
*ss3
;
3123 for (i0
=is
;i0
<=n
;i0
+=id
)
3133 scramble_real(z
, n
);
3149 register int k
, half
= n
>>1;
3150 register double aa
, bb
, am
, bm
;
3154 for (k
=1;k
<half
;k
++)
3160 b
[k
] = aa
*bb
- am
*bm
;
3161 b
[n
-k
] = aa
*bm
+ am
*bb
;
3172 register int k
, half
= n
>>1;
3173 register double c
, d
;
3177 for (k
=1;k
<half
;k
++)
3198 for (j
=sizex
;j
<L
;j
++)
3202 for (j
=0;j
<sizex
;j
++)
3229 /* Do one step of the euclidean algorithm and modify
3230 * the matrix A accordingly. */
3252 /* Multiply vector by Matrix; changes x,y. */
3254 giant s0
= popg(), s1
= popg();
3258 dotproduct(x
,y
,A
->ul
,s0
);
3259 dotproduct(x
,y
,A
->ll
,s1
);
3272 /* Multiply matrix by Matrix; changes second matrix. */
3283 dotproduct(A
->ur
,A
->ul
,B
->ll
,s0
);
3284 dotproduct(A
->ur
,A
->ul
,B
->lr
,s1
);
3285 dotproduct(A
->ll
,A
->lr
,B
->ul
,s2
);
3286 dotproduct(A
->ll
,A
->lr
,B
->ur
,s3
);
3317 /* Multiply the matrix A on the left by [0,1,1,-q]. */
3323 gswap(&A
->ul
,&A
->ll
);
3328 gswap(&A
->ur
,&A
->lr
);
3343 /* Replace last argument with the dot product of two 2-vectors. */
3361 /* A giant gcd. Modifies its arguments. */
3363 giant x
= popg(), y
= popg();
3364 gmatrix A
= newgmatrix();
3366 gtog(xx
,x
); gtog(yy
,y
);
3370 if (bitlen(y
) <= GCDLIMIT
)
3384 if (bitlen(y
) <= GCDLIMIT
)
3401 /* Insure that x > y >= 0. */
3407 if( gcompg(*p
,*q
) < 0 )
3419 /* hgcd(n,x,y,A) chops n bits off x and y and computes th
3420 * 2 by 2 matrix A such that A[x y] is the pair of terms
3421 * in the remainder sequence starting with x,y that is
3422 * half the size of x. Note that the argument A is modified
3423 * but that the arguments xx and yy are left unchanged.
3437 if (bitlen(x
) <= INTLIMIT
)
3439 shgcd(gtoi(x
),gtoi(y
),A
);
3443 gmatrix B
= newgmatrix();
3444 int m
= bitlen(x
)/2;
3450 negg(x
); negg(A
->ul
); negg(A
->ur
);
3454 negg(y
); negg(A
->ll
); negg(A
->lr
);
3456 if (gcompg(x
,y
) < 0)
3459 gswap(&A
->ul
,&A
->ll
);
3460 gswap(&A
->ur
,&A
->lr
);
3491 * Do a half gcd on the integers a and b, putting the result in A
3492 * It is fairly easy to use the 2 by 2 matrix description of the
3493 * extended Euclidean algorithm to prove that the quantity q*t
3497 register int q
, t
, start
= x
;
3498 int Aul
= 1, Aur
= 0, All
= 0, Alr
= 1;
3500 while (y
!= 0 && y
> start
/y
)