]> git.saurik.com Git - apple/security.git/blob - OSX/libsecurity_cryptkit/lib/CurveParamDocs/giants.c
Security-59754.80.3.tar.gz
[apple/security.git] / OSX / libsecurity_cryptkit / lib / CurveParamDocs / giants.c
1 /**************************************************************
2 *
3 * giants.c
4 *
5 * Library for large-integer arithmetic.
6 *
7 * The large-gcd implementation is due to J. P. Buhler.
8 * Special mod routines use ideas of R. McIntosh.
9 * Contributions from G. Woltman, A. Powell acknowledged.
10 *
11 * Updates:
12 * 18 Jul 99 REC Added routine fer_mod(), for use when Fermat
13 giant itself is available.
14 * 17 Jul 99 REC Fixed sign bug in fermatmod()
15 * 17 Apr 99 REC Fixed various comment/line wraps
16 * 25 Mar 99 REC G. Woltman/A. Powell fixes Karat. routines
17 * 05 Mar 99 REC Moved invaux, binvaux giants to stack
18 * 05 Mar 99 REC Moved gread/gwrite giants to stack
19 * 05 Mar 99 REC No static on cur_den, cur_recip (A. Powell)
20 * 28 Feb 99 REC Error detection added atop newgiant().
21 * 27 Feb 99 REC Reduced wasted work in addsignal().
22 * 27 Feb 99 REC Reduced wasted work in FFTmulg().
23 * 19 Feb 99 REC Generalized iaddg() per R. Mcintosh.
24 * 2 Feb 99 REC Fixed comments.
25 * 6 Dec 98 REC Fixed yet another Karatsuba glitch.
26 * 1 Dec 98 REC Fixed errant case of addg().
27 * 28 Nov 98 REC Installed A. Powell's (correct) variant of
28 Karatsuba multiply.
29 * 15 May 98 REC Modified gwrite() to handle huge integers.
30 * 13 May 98 REC Changed to static stack declarations
31 * 11 May 98 REC Installed Karatsuba multiply, to handle
32 * medregion 'twixt grammar- and FFT-multiply.
33 * 1 May 98 JF gshifts now handle bits < 0 correctly.
34 * 30 Apr 98 JF 68k assembler code removed,
35 * stack giant size now invariant and based
36 * on first call of newgiant(),
37 * stack memory leaks fixed.
38 * 29 Apr 98 JF function prototyes cleaned up,
39 * GCD no longer uses personal stack,
40 * optimized shifts for bits%16 == 0.
41 * 27 Apr 98 JF scratch giants now replaced with stack
42 * 20 Apr 98 JF grammarsquareg fixed for asize == 0.
43 * scratch giants now static.
44 * 29 Jan 98 JF Corrected out-of-range errors in
45 * mersennemod and fermatmod.
46 * 23 Dec 97 REC Sped up divide routines via split-shift.
47 * 18 Nov 97 JF Improved mersennemod, fermatmod.
48 * 9 Nov 97 JF Sped up grammarsquareg.
49 * 20 May 97 RDW Fixed Win32 compiler warnings.
50 * 18 May 97 REC Installed new, fast divide.
51 * 17 May 97 REC Reduced memory for FFT multiply.
52 * 26 Apr 97 REC Creation.
53 *
54 * c. 1997,1998 Perfectly Scientific, Inc.
55 * All Rights Reserved.
56 *
57 **************************************************************/
58
59
60 /* Include Files. */
61
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <math.h>
66 #include "giants.h"
67
68
69 /* Compiler options. */
70
71 #ifdef _WIN32
72 #pragma warning( disable : 4127 4706 ) /* disable conditional is constant warning */
73 #endif
74
75
76 /* Global variables. */
77
78 int error = 0;
79 int mulmode = AUTO_MUL;
80 int cur_prec = 0;
81 int cur_shift = 0;
82 static int cur_stack_size = 0;
83 static int cur_stack_elem = 0;
84 static int stack_glen = 0;
85 static giant *stack;
86 giant cur_den = NULL,
87 cur_recip = NULL;
88 int current_max_size = 0,
89 cur_run = 0;
90 double * sinCos=NULL;
91 int checkFFTerror = 0;
92 double maxFFTerror;
93 static giant u0=NULL, u1=NULL, v0=NULL, v1=NULL;
94 static double *z = NULL,
95 *z2 = NULL;
96
97 /* stack handling functions. */
98 static giant popg(void);
99 static void pushg(int);
100
101
102 /* Private function prototypes. */
103
104 int gerr(void);
105 double gfloor(double);
106 int radixdiv(int, int, giant);
107 void columnwrite(FILE *, short *, char *, short, int);
108
109 void normal_addg(giant, giant);
110 void normal_subg(giant, giant);
111 void reverse_subg(giant, giant);
112 int binvaux(giant, giant);
113 int invaux(giant, giant);
114 int allzeros(int, int, giant);
115 void auxmulg(giant a, giant b);
116 void karatmulg(giant a, giant b);
117 void karatsquareg(giant b);
118 void grammarmulg(giant a, giant b);
119 void grammarsquareg(giant b);
120
121 int lpt(int, int *);
122 void addsignal(giant, double *, int);
123 void FFTsquareg(giant x);
124 void FFTmulg(giant y, giant x);
125 void scramble_real();
126 void fft_real_to_hermitian(double *z, int n);
127 void fftinv_hermitian_to_real(double *z, int n);
128 void mul_hermitian(double *a, double *b, int n);
129 void square_hermitian(double *b, int n);
130 void giant_to_double(giant x, int sizex, double *z, int L);
131 void gswap(giant *, giant *);
132 void onestep(giant, giant, gmatrix);
133 void mulvM(gmatrix, giant, giant);
134 void mulmM(gmatrix, gmatrix);
135 void writeM(gmatrix);
136 static void punch(giant, gmatrix);
137 static void dotproduct(giant, giant, giant, giant);
138 void fix(giant *, giant *);
139 void hgcd(int, giant, giant, gmatrix);
140 void shgcd(int, int, gmatrix);
141
142
143
144 /**************************************************************
145 *
146 * Functions
147 *
148 **************************************************************/
149
150
151 /**************************************************************
152 *
153 * Initialization and utility functions
154 *
155 **************************************************************/
156
157 double
158 gfloor(
159 double f
160 )
161 {
162 return floor(f);
163 }
164
165
166 void
167 init_sinCos(
168 int n
169 )
170 {
171 int j;
172 double e = TWOPI/n;
173
174 if (n<=cur_run)
175 return;
176 cur_run = n;
177 if (sinCos)
178 free(sinCos);
179 sinCos = (double *)malloc(sizeof(double)*(1+(n>>2)));
180 for (j=0;j<=(n>>2);j++)
181 {
182 sinCos[j] = sin(e*j);
183 }
184 }
185
186
187 double
188 s_sin(
189 int n
190 )
191 {
192 int seg = n/(cur_run>>2);
193
194 switch (seg)
195 {
196 case 0: return(sinCos[n]);
197 case 1: return(sinCos[(cur_run>>1)-n]);
198 case 2: return(-sinCos[n-(cur_run>>1)]);
199 case 3: return(-sinCos[cur_run-n]);
200 }
201 return 0;
202 }
203
204
205 double
206 s_cos(
207 int n
208 )
209 {
210 int quart = (cur_run>>2);
211
212 if (n < quart)
213 return(s_sin(n+quart));
214 return(-s_sin(n-quart));
215 }
216
217
218 int
219 gerr(void)
220 {
221 return(error);
222 }
223
224
225 giant
226 popg (
227 void
228 )
229 {
230 int i;
231
232 if (current_max_size <= 0) current_max_size = MAX_SHORTS;
233
234 if (cur_stack_size == 0) {
235 /* Initialize the stack if we're just starting out.
236 * Note that all stack giants will be whatever current_max_size is
237 * when newgiant() is first called. */
238 cur_stack_size = STACK_GROW;
239 stack = (giant *) malloc (cur_stack_size * sizeof(giant));
240 for(i = 0; i < STACK_GROW; i++)
241 stack[i] = NULL;
242 if (stack_glen == 0) stack_glen = current_max_size;
243 } else if (cur_stack_elem >= cur_stack_size) {
244 /* Expand the stack if we need to. */
245 i = cur_stack_size;
246 cur_stack_size += STACK_GROW;
247 stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant));
248 for (; i < cur_stack_size; i++)
249 stack[i] = NULL;
250 } else if (cur_stack_elem < cur_stack_size - 2*STACK_GROW) {
251 /* Prune the stack if it's too big. Disabled, so the stack can only expand */
252 /* cur_stack_size -= STACK_GROW;
253 for (i = cur_stack_size - STACK_GROW; i < cur_stack_size; i++)
254 free(stack[i]);
255 stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant)); */
256 }
257
258 /* Malloc our giant. */
259 if (stack[cur_stack_elem] == NULL)
260 stack[cur_stack_elem] = malloc(stack_glen*sizeof(short)+sizeof(int));
261 stack[cur_stack_elem]->sign = 0;
262
263 return(stack[cur_stack_elem++]);
264 }
265
266
267 void
268 pushg (
269 int a
270 )
271 {
272 if (a < 0) return;
273 cur_stack_elem -= a;
274 if (cur_stack_elem < 0) cur_stack_elem = 0;
275 }
276
277
278 giant
279 newgiant(
280 int numshorts
281 )
282 {
283 int size;
284 giant thegiant;
285
286 if (numshorts > MAX_SHORTS) {
287 fprintf(stderr, "Requested giant too big.\n");
288 fflush(stderr);
289 }
290 if (numshorts<=0)
291 numshorts = MAX_SHORTS;
292 size = numshorts*sizeof(short)+sizeof(int);
293 thegiant = (giant)malloc(size);
294 thegiant->sign = 0;
295
296 if (newmin(2*numshorts,MAX_SHORTS) > current_max_size)
297 current_max_size = newmin(2*numshorts,MAX_SHORTS);
298
299 /* If newgiant() is being called for the first time, set the
300 * size of the stack giants. */
301 if (stack_glen == 0) stack_glen = current_max_size;
302
303 return(thegiant);
304 }
305
306
307 gmatrix
308 newgmatrix(
309 void
310 )
311 /* Allocates space for a gmatrix struct, but does not
312 * allocate space for the giants. */
313 {
314 return((gmatrix) malloc (4*sizeof(giant)));
315 }
316
317 int
318 bitlen(
319 giant n
320 )
321 {
322 int b = 16, c = 1<<15, w;
323
324 if (isZero(n))
325 return(0);
326 w = n->n[abs(n->sign) - 1];
327 while ((w&c) == 0)
328 {
329 b--;
330 c >>= 1;
331 }
332 return (16*(abs(n->sign)-1) + b);
333 }
334
335
336 int
337 bitval(
338 giant n,
339 int pos
340 )
341 {
342 int i = abs(pos)>>4, c = 1 << (pos&15);
343
344 return ((n->n[i]) & c);
345 }
346
347
348 int
349 isone(
350 giant g
351 )
352 {
353 return((g->sign==1)&&(g->n[0]==1));
354 }
355
356
357 int isZero(
358 giant thegiant
359 )
360 /* Returns TR if thegiant == 0. */
361 {
362 register int count;
363 int length = abs(thegiant->sign);
364 register unsigned short * numpointer = thegiant->n;
365
366 if (length)
367 {
368 for(count = 0; count<length; ++count,++numpointer)
369 {
370 if (*numpointer != 0 )
371 return(FA);
372 }
373 }
374 return(TR);
375 }
376
377
378 void
379 gtog(
380 giant srcgiant,
381 giant destgiant
382 )
383 /* destgiant becomes equal to srcgiant. */
384 {
385 int numbytes = sizeof(int) + abs(srcgiant->sign)*sizeof(short);
386
387 memcpy((char *)destgiant,(char *)srcgiant,numbytes);
388 }
389
390
391 void
392 itog(
393 int i,
394 giant g
395 )
396 /* The giant g becomes set to the integer value i. */
397 {
398 unsigned int j = abs(i);
399
400 if (i==0)
401 {
402 g->sign = 0;
403 g->n[0] = 0;
404 return;
405 }
406 g->n[0] = (unsigned short)(j & 0xFFFF);
407 j >>= 16;
408 if (j)
409 {
410 g->n[1] = (unsigned short)j;
411 g->sign = 2;
412 }
413 else
414 {
415 g->sign = 1;
416 }
417 if (i<0)
418 g->sign = -(g->sign);
419 }
420
421
422 signed int
423 gtoi(
424 giant x
425 )
426 /* Calculate the value of an int-sized giant NOT exceeding 31 bits. */
427 {
428 register int size = abs(x->sign);
429 register int sign = (x->sign < 0) ? -1 : 1;
430
431 switch(size)
432 {
433 case 0:
434 break;
435 case 1:
436 return sign * x->n[0];
437 case 2:
438 return sign * (x->n[0]+((x->n[1])<<16));
439 default:
440 fprintf(stderr,"Giant too large for gtoi\n");
441 break;
442 }
443 return 0;
444 }
445
446
447 int
448 gsign(
449 giant g
450 )
451 /* Returns the sign of g. */
452 {
453 if (isZero(g))
454 return(0);
455 if (g->sign >0)
456 return(1);
457 return(-1);
458 }
459
460
461 #if 0
462 int gcompg(a,b)
463 /* Returns -1,0,1 if a<b, a=b, a>b, respectively. */
464 giant a,b;
465 {
466 int size = abs(a->sign);
467
468 if(isZero(a)) size = 0;
469 if (size == 0) {
470 if (isZero(b)) return(0); else return(-gsign(b));
471 }
472
473 if (b->sign == 0) return(gsign(a));
474 if (gsign(a)!=gsign(b)) return(gsign(a));
475 if (size>abs(b->sign)) return(gsign(a));
476 if (size<abs(b->sign)) return(-gsign(a));
477
478 do {
479 --size;
480 if (a->n[size] > b->n[size]) return(gsign(a));
481 if (a->n[size] < b->n[size]) return(-gsign(a));
482 } while(size>0);
483
484 return(0);
485 }
486 #else
487
488 int
489 gcompg(
490 giant a,
491 giant b
492 )
493 /* Returns -1,0,1 if a<b, a=b, a>b, respectively. */
494 {
495 int sa = a->sign, j, sb = b->sign, va, vb, sgn;
496
497 if(sa > sb)
498 return(1);
499 if(sa < sb)
500 return(-1);
501 if(sa < 0)
502 {
503 sa = -sa; /* Take absolute value of sa. */
504 sgn = -1;
505 }
506 else
507 {
508 sgn = 1;
509 }
510 for(j = sa-1; j >= 0; j--)
511 {
512 va = a->n[j];
513 vb = b->n[j];
514 if (va > vb)
515 return(sgn);
516 if (va < vb)
517 return(-sgn);
518 }
519 return(0);
520 }
521 #endif
522
523
524 void
525 setmulmode(
526 int mode
527 )
528 {
529 mulmode = mode;
530 }
531
532
533 /**************************************************************
534 *
535 * Private I/O Functions
536 *
537 **************************************************************/
538
539
540 int
541 radixdiv(
542 int base,
543 int divisor,
544 giant thegiant
545 )
546 /* Divides giant of arbitrary base by divisor.
547 * Returns remainder. Used by idivg and gread. */
548 {
549 int first = TR;
550 int finalsize = abs(thegiant->sign);
551 int j = finalsize-1;
552 unsigned short *digitpointer=&thegiant->n[j];
553 unsigned int num,rem=0;
554
555 if (divisor == 0)
556 {
557 error = DIVIDEBYZERO;
558 exit(error);
559 }
560
561 while (j>=0)
562 {
563 num=rem*base + *digitpointer;
564 *digitpointer = (unsigned short)(num/divisor);
565 if (first)
566 {
567 if (*digitpointer == 0)
568 --finalsize;
569 else
570 first = FA;
571 }
572 rem = num % divisor;
573 --digitpointer;
574 --j;
575 }
576
577 if ((divisor<0) ^ (thegiant->sign<0))
578 finalsize=-finalsize;
579 thegiant->sign=finalsize;
580 return(rem);
581 }
582
583
584 void
585 columnwrite(
586 FILE *filepointer,
587 short *column,
588 char *format,
589 short arg,
590 int newlines
591 )
592 /* Used by gwriteln. */
593 {
594 char outstring[10];
595 short i;
596
597 sprintf(outstring,format,arg);
598 for (i=0; outstring[i]!=0; ++i)
599 {
600 if (newlines)
601 {
602 if (*column >= COLUMNWIDTH)
603 {
604 fputs("\\\n",filepointer);
605 *column = 0;
606 }
607 }
608 fputc(outstring[i],filepointer);
609 ++*column;
610 }
611 }
612
613
614 void
615 gwrite(
616 giant thegiant,
617 FILE *filepointer,
618 int newlines
619 )
620 /* Outputs thegiant to filepointer. Output is terminated by a newline. */
621 {
622 short column;
623 unsigned int i;
624 unsigned short *numpointer;
625 giant garbagegiant, basetengrand;
626
627 basetengrand = popg();
628 garbagegiant = popg();
629
630 if (isZero(thegiant))
631 {
632 fputs("0",filepointer);
633 }
634 else
635 {
636 numpointer = basetengrand->n;
637 gtog(thegiant,garbagegiant);
638
639 basetengrand->sign = 0;
640 do
641 {
642 *numpointer = (unsigned short)idivg(10000,garbagegiant);
643 ++numpointer;
644 if (++basetengrand->sign >= current_max_size)
645 {
646 error = OVFLOW;
647 exit(error);
648 }
649 } while (!isZero(garbagegiant));
650
651 if (!error)
652 {
653 i = basetengrand->sign-1;
654 column = 0;
655 if (thegiant->sign<0 && basetengrand->n[i]!=0)
656 columnwrite(filepointer,&column,"-",0, newlines);
657 columnwrite(filepointer,&column,"%d",basetengrand->n[i],newlines);
658 for( ; i>0; )
659 {
660 --i;
661 columnwrite(filepointer,&column,"%04d",basetengrand->n[i],newlines);
662 }
663 }
664 }
665 pushg(2);
666 }
667
668
669 void
670 gwriteln(
671 giant theg,
672 FILE *filepointer
673 )
674 {
675 gwrite(theg, filepointer, 1);
676 fputc('\n',filepointer);
677 }
678
679
680 void
681 gread(
682 giant theg,
683 FILE *filepointer
684 )
685 {
686 char currentchar;
687 int isneg,size,backslash=FA,numdigits=0;
688 unsigned short *numpointer;
689 giant basetenthousand;
690 static char *inbuf = NULL;
691
692 basetenthousand = popg();
693 if (inbuf == NULL)
694 inbuf = (char*)malloc(MAX_DIGITS);
695
696 currentchar = (char)fgetc(filepointer);
697 if (currentchar=='-')
698 {
699 isneg=TR;
700 }
701 else
702 {
703 isneg=FA;
704 if (currentchar!='+')
705 ungetc(currentchar,filepointer);
706 }
707
708 do
709 {
710 currentchar = (char)fgetc(filepointer);
711 if ((currentchar>='0') && (currentchar<='9'))
712 {
713 inbuf[numdigits]=currentchar;
714 if(++numdigits==MAX_DIGITS)
715 break;
716 backslash=FA;
717 }
718 else
719 {
720 if (currentchar=='\\')
721 backslash=TR;
722 }
723 } while(((currentchar!=' ') && (currentchar!='\n') &&
724 (currentchar!='\t')) || (backslash) );
725 if (numdigits)
726 {
727 size = 0;
728 do
729 {
730 inbuf[numdigits] = 0;
731 numdigits-=4;
732 if (numdigits<0)
733 numdigits=0;
734 basetenthousand->n[size] = (unsigned short)strtol(&inbuf[numdigits],NULL,10);
735 ++size;
736 } while (numdigits>0);
737
738 basetenthousand->sign = size;
739 theg->sign = 0;
740 numpointer = theg->n;
741 do
742 {
743 *numpointer = (unsigned short)
744 radixdiv(10000,1<<(8*sizeof(short)),basetenthousand);
745 ++numpointer;
746 if (++theg->sign >= current_max_size)
747 {
748 error = OVFLOW;
749 exit(error);
750 }
751 } while (!isZero(basetenthousand));
752
753 if (isneg)
754 theg->sign = -theg->sign;
755 }
756 pushg(1);
757 }
758
759
760
761 /**************************************************************
762 *
763 * Private Math Functions
764 *
765 **************************************************************/
766
767
768 void
769 negg(
770 giant g
771 )
772 /* g becomes -g. */
773 {
774 g->sign = -g->sign;
775 }
776
777
778 void
779 absg(
780 giant g
781 )
782 {
783 /* g becomes the absolute value of g. */
784 if (g->sign <0)
785 g->sign=-g->sign;
786 }
787
788
789 void
790 iaddg(
791 int i,
792 giant g
793 )
794 /* Giant g becomes g + (int)i. */
795 {
796 int w,j=0,carry = 0, size = abs(g->sign);
797 giant tmp;
798
799 if (isZero(g))
800 {
801 itog(i,g);
802 }
803 else if(g->sign < 0) {
804 tmp = popg();
805 itog(i, tmp);
806 addg(tmp, g);
807 pushg(1);
808 return;
809 }
810 else
811 {
812 w = g->n[0]+i;
813 do
814 {
815 g->n[j] = (unsigned short) (w & 65535L);
816 carry = w >> 16;
817 w = g->n[++j]+carry;
818 } while ((carry!=0) && (j<size));
819 }
820 if (carry)
821 {
822 ++g->sign;
823 g->n[size] = (unsigned short)carry;
824 }
825 }
826
827
828 /* New subtract routines.
829 The basic subtract "subg()" uses the following logic table:
830
831 a b if(b > a) if(a > b)
832
833 + + b := b - a b := -(a - b)
834 - + b := b + (-a) N.A.
835 + - N.A. b := -((-b) + a)
836 - - b := (-a) - (-b) b := -((-b) - (-a))
837
838 The basic addition routine "addg()" uses:
839
840 a b if(b > -a) if(-a > b)
841
842 + + b := b + a N.A.
843 - + b := b - (-a) b := -((-a) - b)
844 + - b := a - (-b) b := -((-b) - a)
845 - - N.A. b := -((-b) + (-a))
846
847 In this way, internal routines "normal_addg," "normal_subg,"
848 and "reverse_subg;" each of which assumes non-negative
849 operands and a non-negative result, are now used for greater
850 efficiency.
851 */
852
853 void
854 normal_addg(
855 giant a,
856 giant b
857 )
858 /* b := a + b, both a,b assumed non-negative. */
859 {
860 int carry = 0;
861 int asize = a->sign, bsize = b->sign;
862 long k;
863 int j=0;
864 unsigned short *aptr = a->n, *bptr = b->n;
865
866 if (asize < bsize)
867 {
868 for (j=0; j<asize; j++)
869 {
870 k = *aptr++ + *bptr + carry;
871 carry = 0;
872 if (k >= 65536L)
873 {
874 k -= 65536L;
875 ++carry;
876 }
877 *bptr++ = (unsigned short)k;
878 }
879 for (j=asize; j<bsize; j++)
880 {
881 k = *bptr + carry;
882 carry = 0;
883 if (k >= 65536L)
884 {
885 k -= 65536L;
886 ++carry;
887 }
888 *bptr++ = (unsigned short)k;
889 }
890 }
891 else
892 {
893 for (j=0; j<bsize; j++)
894 {
895 k = *aptr++ + *bptr + carry;
896 carry = 0;
897 if (k >= 65536L)
898 {
899 k -= 65536L;
900 ++carry;
901 }
902 *bptr++ = (unsigned short)k;
903 }
904 for (j=bsize; j<asize; j++)
905 {
906 k = *aptr++ + carry;
907 carry = 0;
908 if (k >= 65536L)
909 {
910 k -= 65536L;
911 ++carry;
912 }
913 *bptr++ = (unsigned short)k;
914 }
915 }
916 if (carry)
917 {
918 *bptr = 1; ++j;
919 }
920 b->sign = j;
921 }
922
923
924 void
925 normal_subg(
926 giant a,
927 giant b
928 )
929 /* b := b - a; requires b, a non-negative and b >= a. */
930 {
931 int j, size = b->sign;
932 unsigned int k;
933
934 if (a->sign == 0)
935 return;
936
937 k = 0;
938 for (j=0; j<a->sign; ++j)
939 {
940 k += 0xffff - a->n[j] + b->n[j];
941 b->n[j] = (unsigned short)(k & 0xffff);
942 k >>= 16;
943 }
944 for (j=a->sign; j<size; ++j)
945 {
946 k += 0xffff + b->n[j];
947 b->n[j] = (unsigned short)(k & 0xffff);
948 k >>= 16;
949 }
950
951 if (b->n[0] == 0xffff)
952 iaddg(1,b);
953 else
954 ++b->n[0];
955
956 while ((size-- > 0) && (b->n[size]==0));
957
958 b->sign = (b->n[size]==0) ? 0 : size+1;
959 }
960
961
962 void
963 reverse_subg(
964 giant a,
965 giant b
966 )
967 /* b := a - b; requires b, a non-negative and a >= b. */
968 {
969 int j, size = a->sign;
970 unsigned int k;
971
972 k = 0;
973 for (j=0; j<b->sign; ++j)
974 {
975 k += 0xffff - b->n[j] + a->n[j];
976 b->n[j] = (unsigned short)(k & 0xffff);
977 k >>= 16;
978 }
979 for (j=b->sign; j<size; ++j)
980 {
981 k += 0xffff + a->n[j];
982 b->n[j] = (unsigned short)(k & 0xffff);
983 k >>= 16;
984 }
985
986 b->sign = size; /* REC, 21 Apr 1996. */
987 if (b->n[0] == 0xffff)
988 iaddg(1,b);
989 else
990 ++b->n[0];
991
992 while (!b->n[--size]);
993
994 b->sign = size+1;
995 }
996
997 void
998 addg(
999 giant a,
1000 giant b
1001 )
1002 /* b := b + a, any signs any result. */
1003 {
1004 int asgn = a->sign, bsgn = b->sign;
1005
1006 if (asgn == 0)
1007 return;
1008 if (bsgn == 0)
1009 {
1010 gtog(a,b);
1011 return;
1012 }
1013 if ((asgn < 0) == (bsgn < 0))
1014 {
1015 if (bsgn > 0)
1016 {
1017 normal_addg(a,b);
1018 return;
1019 }
1020 absg(b);
1021 if(a != b) absg(a);
1022 normal_addg(a,b);
1023 negg(b);
1024 if(a != b) negg(a);
1025 return;
1026 }
1027 if(bsgn > 0)
1028 {
1029 negg(a);
1030 if (gcompg(b,a) >= 0)
1031 {
1032 normal_subg(a,b);
1033 negg(a);
1034 return;
1035 }
1036 reverse_subg(a,b);
1037 negg(a);
1038 negg(b);
1039 return;
1040 }
1041 negg(b);
1042 if(gcompg(b,a) < 0)
1043 {
1044 reverse_subg(a,b);
1045 return;
1046 }
1047 normal_subg(a,b);
1048 negg(b);
1049 return;
1050 }
1051
1052 void
1053 subg(
1054 giant a,
1055 giant b
1056 )
1057 /* b := b - a, any signs, any result. */
1058 {
1059 int asgn = a->sign, bsgn = b->sign;
1060
1061 if (asgn == 0)
1062 return;
1063 if (bsgn == 0)
1064 {
1065 gtog(a,b);
1066 negg(b);
1067 return;
1068 }
1069 if ((asgn < 0) != (bsgn < 0))
1070 {
1071 if (bsgn > 0)
1072 {
1073 negg(a);
1074 normal_addg(a,b);
1075 negg(a);
1076 return;
1077 }
1078 negg(b);
1079 normal_addg(a,b);
1080 negg(b);
1081 return;
1082 }
1083 if (bsgn > 0)
1084 {
1085 if (gcompg(b,a) >= 0)
1086 {
1087 normal_subg(a,b);
1088 return;
1089 }
1090 reverse_subg(a,b);
1091 negg(b);
1092 return;
1093 }
1094 negg(a);
1095 negg(b);
1096 if (gcompg(b,a) >= 0)
1097 {
1098 normal_subg(a,b);
1099 negg(a);
1100 negg(b);
1101 return;
1102 }
1103 reverse_subg(a,b);
1104 negg(a);
1105 return;
1106 }
1107
1108
1109 int
1110 numtrailzeros(
1111 giant g
1112 )
1113 /* Returns the number of trailing zero bits in g. */
1114 {
1115 register int numshorts = abs(g->sign), j, bcount=0;
1116 register unsigned short gshort, c;
1117
1118 for (j=0;j<numshorts;j++)
1119 {
1120 gshort = g->n[j];
1121 c = 1;
1122 for (bcount=0;bcount<16; bcount++)
1123 {
1124 if (c & gshort)
1125 break;
1126 c <<= 1;
1127 }
1128 if (bcount<16)
1129 break;
1130 }
1131 return(bcount + 16*j);
1132 }
1133
1134
1135 void
1136 bdivg(
1137 giant v,
1138 giant u
1139 )
1140 /* u becomes greatest power of two not exceeding u/v. */
1141 {
1142 int diff = bitlen(u) - bitlen(v);
1143 giant scratch7;
1144
1145 if (diff<0)
1146 {
1147 itog(0,u);
1148 return;
1149 }
1150 scratch7 = popg();
1151 gtog(v, scratch7);
1152 gshiftleft(diff,scratch7);
1153 if (gcompg(u,scratch7) < 0)
1154 diff--;
1155 if (diff<0)
1156 {
1157 itog(0,u);
1158 pushg(1);
1159 return;
1160 }
1161 itog(1,u);
1162 gshiftleft(diff,u);
1163
1164 pushg(1);
1165 }
1166
1167
1168 int
1169 binvaux(
1170 giant p,
1171 giant x
1172 )
1173 /* Binary inverse method. Returns zero if no inverse exists,
1174 * in which case x becomes GCD(x,p). */
1175 {
1176
1177 giant scratch7, u0, u1, v0, v1;
1178
1179 if (isone(x))
1180 return(1);
1181 u0 = popg();
1182 u1 = popg();
1183 v0 = popg();
1184 v1 = popg();
1185 itog(1, v0);
1186 gtog(x, v1);
1187 itog(0,x);
1188 gtog(p, u1);
1189
1190 scratch7 = popg();
1191 while(!isZero(v1))
1192 {
1193 gtog(u1, u0);
1194 bdivg(v1, u0);
1195 gtog(x, scratch7);
1196 gtog(v0, x);
1197 mulg(u0, v0);
1198 subg(v0,scratch7);
1199 gtog(scratch7, v0);
1200
1201 gtog(u1, scratch7);
1202 gtog(v1, u1);
1203 mulg(u0, v1);
1204 subg(v1,scratch7);
1205 gtog(scratch7, v1);
1206 }
1207
1208 pushg(1);
1209
1210 if (!isone(u1))
1211 {
1212 gtog(u1,x);
1213 if(x->sign<0) addg(p, x);
1214 pushg(4);
1215 return(0);
1216 }
1217 if(x->sign<0)
1218 addg(p, x);
1219 pushg(4);
1220 return(1);
1221 }
1222
1223
1224 int
1225 binvg(
1226 giant p,
1227 giant x
1228 )
1229 {
1230 modg(p, x);
1231 return(binvaux(p,x));
1232 }
1233
1234
1235 int
1236 invg(
1237 giant p,
1238 giant x
1239 )
1240 {
1241 modg(p, x);
1242 return(invaux(p,x));
1243 }
1244
1245 int
1246 invaux(
1247 giant p,
1248 giant x
1249 )
1250 /* Returns zero if no inverse exists, in which case x becomes
1251 * GCD(x,p). */
1252 {
1253
1254 giant scratch7, u0, u1, v0, v1;
1255
1256 if ((x->sign==1)&&(x->n[0]==1))
1257 return(1);
1258
1259 u0 = popg();
1260 u1 = popg();
1261 v0 = popg();
1262 v1 = popg();
1263
1264 itog(1,u1);
1265 gtog(p, v0);
1266 gtog(x, v1);
1267 itog(0,x);
1268
1269 scratch7 = popg();
1270 while (!isZero(v1))
1271 {
1272 gtog(v0, u0);
1273 divg(v1, u0);
1274 gtog(u0, scratch7);
1275 mulg(v1, scratch7);
1276 subg(v0, scratch7);
1277 negg(scratch7);
1278 gtog(v1, v0);
1279 gtog(scratch7, v1);
1280 gtog(u1, scratch7);
1281 mulg(u0, scratch7);
1282 subg(x, scratch7);
1283 negg(scratch7);
1284 gtog(u1,x);
1285 gtog(scratch7, u1);
1286 }
1287 pushg(1);
1288
1289 if ((v0->sign!=1)||(v0->n[0]!=1))
1290 {
1291 gtog(v0,x);
1292 pushg(4);
1293 return(0);
1294 }
1295 if(x->sign<0)
1296 addg(p, x);
1297 pushg(4);
1298 return(1);
1299 }
1300
1301
1302 int
1303 mersenneinvg(
1304 int q,
1305 giant x
1306 )
1307 {
1308 int k;
1309 giant u0, u1, v1;
1310
1311 u0 = popg();
1312 u1 = popg();
1313 v1 = popg();
1314
1315 itog(1, u0);
1316 itog(0, u1);
1317 itog(1, v1);
1318 gshiftleft(q, v1);
1319 subg(u0, v1);
1320 mersennemod(q, x);
1321 while (1)
1322 {
1323 k = -1;
1324 if (isZero(x))
1325 {
1326 gtog(v1, x);
1327 pushg(3);
1328 return(0);
1329 }
1330 while (bitval(x, ++k) == 0);
1331
1332 gshiftright(k, x);
1333 if (k)
1334 {
1335 gshiftleft(q-k, u0);
1336 mersennemod(q, u0);
1337 }
1338 if (isone(x))
1339 break;
1340 addg(u1, u0);
1341 mersennemod(q, u0);
1342 negg(u1);
1343 addg(u0, u1);
1344 mersennemod(q, u1);
1345 if (!gcompg(v1,x)) {
1346 pushg(3);
1347 return(0);
1348 }
1349 addg(v1, x);
1350 negg(v1);
1351 addg(x, v1);
1352 mersennemod(q, v1);
1353 }
1354 gtog(u0, x);
1355 mersennemod(q,x);
1356 pushg(3);
1357 return(1);
1358 }
1359
1360
1361 void
1362 cgcdg(
1363 giant a,
1364 giant v
1365 )
1366 /* Classical Euclid GCD. v becomes gcd(a, v). */
1367 {
1368 giant u, r;
1369
1370 v->sign = abs(v->sign);
1371 if (isZero(a))
1372 return;
1373
1374 u = popg();
1375 r = popg();
1376 gtog(a, u);
1377 u->sign = abs(u->sign);
1378 while (!isZero(v))
1379 {
1380 gtog(u, r);
1381 modg(v, r);
1382 gtog(v, u);
1383 gtog(r, v);
1384 }
1385 gtog(u,v);
1386 pushg(2);
1387 }
1388
1389
1390 void
1391 gcdg(
1392 giant x,
1393 giant y
1394 )
1395 {
1396 if (bitlen(y)<= GCDLIMIT)
1397 bgcdg(x,y);
1398 else
1399 ggcd(x,y);
1400 }
1401
1402 void
1403 bgcdg(
1404 giant a,
1405 giant b
1406 )
1407 /* Binary form of the gcd. b becomes the gcd of a,b. */
1408 {
1409 int k = isZero(b), m = isZero(a);
1410 giant u, v, t;
1411
1412 if (k || m)
1413 {
1414 if (m)
1415 {
1416 if (k)
1417 itog(1,b);
1418 return;
1419 }
1420 if (k)
1421 {
1422 if (m)
1423 itog(1,b);
1424 else
1425 gtog(a,b);
1426 return;
1427 }
1428 }
1429
1430 u = popg();
1431 v = popg();
1432 t = popg();
1433
1434 /* Now neither a nor b is zero. */
1435 gtog(a, u);
1436 u->sign = abs(a->sign);
1437 gtog(b, v);
1438 v->sign = abs(b->sign);
1439 k = numtrailzeros(u);
1440 m = numtrailzeros(v);
1441 if (k>m)
1442 k = m;
1443 gshiftright(k,u);
1444 gshiftright(k,v);
1445 if (u->n[0] & 1)
1446 {
1447 gtog(v, t);
1448 negg(t);
1449 }
1450 else
1451 {
1452 gtog(u,t);
1453 }
1454
1455 while (!isZero(t))
1456 {
1457 m = numtrailzeros(t);
1458 gshiftright(m, t);
1459 if (t->sign > 0)
1460 {
1461 gtog(t, u);
1462 subg(v,t);
1463 }
1464 else
1465 {
1466 gtog(t, v);
1467 negg(v);
1468 addg(u,t);
1469 }
1470 }
1471 gtog(u,b);
1472 gshiftleft(k, b);
1473 pushg(3);
1474 }
1475
1476
1477 void
1478 powerg(
1479 int m,
1480 int n,
1481 giant g
1482 )
1483 /* g becomes m^n, NO mod performed. */
1484 {
1485 giant scratch2 = popg();
1486
1487 itog(1, g);
1488 itog(m, scratch2);
1489 while (n)
1490 {
1491 if (n & 1)
1492 mulg(scratch2, g);
1493 n >>= 1;
1494 if (n)
1495 squareg(scratch2);
1496 }
1497 pushg(1);
1498 }
1499
1500 #if 0
1501 void
1502 jtest(
1503 giant n
1504 )
1505 {
1506 if (n->sign)
1507 {
1508 if (n->n[n->sign-1] == 0)
1509 {
1510 fprintf(stderr,"%d %d tilt",n->sign, (int)(n->n[n->sign-1]));
1511 exit(7);
1512 }
1513 }
1514 }
1515 #endif
1516
1517
1518 void
1519 make_recip(
1520 giant d,
1521 giant r
1522 )
1523 /* r becomes the steady-state reciprocal
1524 * 2^(2b)/d, where b = bit-length of d-1. */
1525 {
1526 int b;
1527 giant tmp, tmp2;
1528
1529 if (isZero(d) || (d->sign < 0))
1530 {
1531 exit(SIGN);
1532 }
1533 tmp = popg();
1534 tmp2 = popg();
1535 itog(1, r);
1536 subg(r, d);
1537 b = bitlen(d);
1538 addg(r, d);
1539 gshiftleft(b, r);
1540 gtog(r, tmp2);
1541 while (1)
1542 {
1543 gtog(r, tmp);
1544 squareg(tmp);
1545 gshiftright(b, tmp);
1546 mulg(d, tmp);
1547 gshiftright(b, tmp);
1548 addg(r, r);
1549 subg(tmp, r);
1550 if (gcompg(r, tmp2) <= 0)
1551 break;
1552 gtog(r, tmp2);
1553 }
1554 itog(1, tmp);
1555 gshiftleft(2*b, tmp);
1556 gtog(r, tmp2);
1557 mulg(d, tmp2);
1558 subg(tmp2, tmp);
1559 itog(1, tmp2);
1560 while (tmp->sign < 0)
1561 {
1562 subg(tmp2, r);
1563 addg(d, tmp);
1564 }
1565 pushg(2);
1566 }
1567
1568 void
1569 divg_via_recip(
1570 giant d,
1571 giant r,
1572 giant n
1573 )
1574 /* n := n/d, where r is the precalculated
1575 * steady-state reciprocal of d. */
1576 {
1577 int s = 2*(bitlen(r)-1), sign = gsign(n);
1578 giant tmp, tmp2;
1579
1580 if (isZero(d) || (d->sign < 0))
1581 {
1582 exit(SIGN);
1583 }
1584
1585 tmp = popg();
1586 tmp2 = popg();
1587
1588 n->sign = abs(n->sign);
1589 itog(0, tmp2);
1590 while (1)
1591 {
1592 gtog(n, tmp);
1593 mulg(r, tmp);
1594 gshiftright(s, tmp);
1595 addg(tmp, tmp2);
1596 mulg(d, tmp);
1597 subg(tmp, n);
1598 if (gcompg(n,d) >= 0)
1599 {
1600 subg(d,n);
1601 iaddg(1, tmp2);
1602 }
1603 if (gcompg(n,d) < 0)
1604 break;
1605 }
1606 gtog(tmp2, n);
1607 n->sign *= sign;
1608 pushg(2);
1609 }
1610
1611 #if 1
1612 void
1613 modg_via_recip(
1614 giant d,
1615 giant r,
1616 giant n
1617 )
1618 /* This is the fastest mod of the present collection.
1619 * n := n % d, where r is the precalculated
1620 * steady-state reciprocal of d. */
1621
1622 {
1623 int s = (bitlen(r)-1), sign = n->sign;
1624 giant tmp, tmp2;
1625
1626 if (isZero(d) || (d->sign < 0))
1627 {
1628 exit(SIGN);
1629 }
1630
1631 tmp = popg();
1632 tmp2 = popg();
1633
1634 n->sign = abs(n->sign);
1635 while (1)
1636 {
1637 gtog(n, tmp); gshiftright(s-1, tmp);
1638 mulg(r, tmp);
1639 gshiftright(s+1, tmp);
1640 mulg(d, tmp);
1641 subg(tmp, n);
1642 if (gcompg(n,d) >= 0)
1643 subg(d,n);
1644 if (gcompg(n,d) < 0)
1645 break;
1646 }
1647 if (sign >= 0)
1648 goto done;
1649 if (isZero(n))
1650 goto done;
1651 negg(n);
1652 addg(d,n);
1653 done:
1654 pushg(2);
1655 return;
1656 }
1657
1658 #else
1659 void
1660 modg_via_recip(
1661 giant d,
1662 giant r,
1663 giant n
1664 )
1665 {
1666 int s = 2*(bitlen(r)-1), sign = n->sign;
1667 giant tmp, tmp2;
1668
1669 if (isZero(d) || (d->sign < 0))
1670 {
1671 exit(SIGN);
1672 }
1673
1674 tmp = popg();
1675 tmp2 = popg()
1676
1677 n->sign = abs(n->sign);
1678 while (1)
1679 {
1680 gtog(n, tmp);
1681 mulg(r, tmp);
1682 gshiftright(s, tmp);
1683 mulg(d, tmp);
1684 subg(tmp, n);
1685 if (gcompg(n,d) >= 0)
1686 subg(d,n);
1687 if (gcompg(n,d) < 0)
1688 break;
1689 }
1690 if (sign >= 0)
1691 goto done;
1692 if (isZero(n))
1693 goto done;
1694 negg(n);
1695 addg(d,n);
1696 done:
1697 pushg(2);
1698 return;
1699 }
1700 #endif
1701
1702 void
1703 modg(
1704 giant d,
1705 giant n
1706 )
1707 /* n becomes n%d. n is arbitrary, but the denominator d must be positive! */
1708 {
1709 if (cur_recip == NULL) {
1710 cur_recip = newgiant(current_max_size);
1711 cur_den = newgiant(current_max_size);
1712 gtog(d, cur_den);
1713 make_recip(d, cur_recip);
1714 } else if (gcompg(d, cur_den)) {
1715 gtog(d, cur_den);
1716 make_recip(d, cur_recip);
1717 }
1718 modg_via_recip(d, cur_recip, n);
1719 }
1720
1721
1722 #if 0
1723 int
1724 feemulmod (
1725 giant a,
1726 giant b,
1727 int q,
1728 int k
1729 )
1730 /* a becomes (a*b) (mod 2^q-k) where q % 16 == 0 and k is "small" (0 < k < 65535).
1731 * Returns 0 if unsuccessful, otherwise 1. */
1732 {
1733 giant carry, kk, scratch;
1734 int i, j;
1735 int asize = abs(a->sign), bsize = abs(b->sign);
1736 unsigned short *aptr,*bptr,*destptr;
1737 unsigned int words;
1738 int kpower, curk;
1739
1740 if ((q % 16) || (k <= 0) || (k >= 65535)) {
1741 return (0);
1742 }
1743
1744 carry = popg();
1745 kk = popg();
1746 scratch = popg();
1747
1748 for (i=0; i<asize+bsize; i++) scratch->n[i]=0;
1749
1750 words = q >> 4;
1751
1752 bptr = b->n;
1753 for (i = 0; i < bsize; i++) {
1754 mult = *bptr++;
1755 if (mult) {
1756 kpower = i/words;
1757
1758 if (kpower >= 1) itog (kpower,kk);
1759 for (j = 1; j < kpower; k++) smulg(kpower,kk);
1760
1761 itog(0,carry);
1762
1763 aptr = a->n;
1764 for (j = 0; j < bsize; b++) {
1765 gtog(kk,scratch);
1766 smulg(*aptr++,scratch);
1767 smulg(mult,scratch);
1768 iaddg(*destptr,scratch);
1769 addg(carry,scratch);
1770 *destptr++ = scratch->n[0];
1771 gshiftright(scratch,16);
1772 gtog(scratch,carry);
1773 if (destptr - scratch->n >= words) {
1774 smulg(k, carry);
1775 smulg(k, kk);
1776 destptr -= words;
1777 }
1778 }
1779 }
1780 }
1781
1782 int i,j,m;
1783 unsigned int prod,carry=0;
1784 int asize = abs(a->sign), bsize = abs(b->sign);
1785 unsigned short *aptr,*bptr,*destptr;
1786 unsigned short mult;
1787 int words, excess;
1788 int temp;
1789 giant scratch = popg(), scratch2 = popg(), scratch3 = popg();
1790 short *carryptr = scratch->n;
1791 int kpower,kpowerlimit, curk;
1792
1793 if ((q % 16) || (k <= 0) || (k >= 65535)) {
1794 return (0);
1795 }
1796
1797 scratch
1798
1799 for (i=0; i<asize+bsize; i++) scratch->n[i]=0;
1800
1801 words = q >> 4;
1802
1803 bptr = b->n;
1804 for (i=0; i<bsize; ++i)
1805 {
1806 mult = *bptr++;
1807 if (mult)
1808 {
1809 kpower = i/words;
1810 aptr = a->n;
1811 destptr = scratch->n + i;
1812
1813 if (kpower == 0) {
1814 carry = 0;
1815 } else if (kpower <= kpowerlimit) {
1816 carry = 0;
1817 curk = k;
1818 for (j = 1; j < kpower; j++) curk *= k;
1819 } else {
1820 itog (k,scratch);
1821 for (j = 1; j < kpower; j++) smulg(k,scratch);
1822 itog(0,scratch2);
1823 }
1824
1825 for (j = 0; j < asize; j++) {
1826 if(kpower == 0) {
1827 prod = *aptr++ * mult + *destptr + carry;
1828 *destptr++ = (unsigned short)(prod & 0xFFFF);
1829 carry = prod >> 16;
1830 } else if (kpower < kpowerlimit) {
1831 prod = kcur * *aptr++;
1832 temp = prod >> 16;
1833 prod &= 0xFFFF;
1834 temp *= mult;
1835 prod *= mult;
1836 temp += prod >> 16;
1837 prod &= 0xFFFF;
1838 prod += *destptr + carry;
1839 carry = prod >> 16 + temp;
1840 *destptr++ = (unsigned short)(prod & 0xFFFF);
1841 } else {
1842 gtog(scratch,scratch3);
1843 smulg(*aptr++,scratch3);
1844 smulg(mult,scratch3);
1845 iaddg(*destptr,scratch3);
1846 addg(scratch3,scratch2);
1847 *destptr++ = scratch2->n[0];
1848 memmove(scratch2->n,scratch2->n+1,2*(scratch2->size-1));
1849 scratch2->sign--;
1850 }
1851 if (destptr - scratch->n > words) {
1852 if (kpower == 0) {
1853 curk = k;
1854 carry *= k;
1855 } else if (kpower < kpowerlimit) {
1856 curk *= k;
1857 carry *= curk;
1858 } else if (kpower == kpowerlimit) {
1859 itog (k,scratch);
1860 for (j = 1; j < kpower; j++) smulg(k,scratch);
1861 itog(carry,scratch2);
1862 smulg(k,scratch2);
1863 } else {
1864 smulg(k,scratch);
1865 smulg(k,scratch2);
1866 }
1867 kpower++;
1868 destptr -= words;
1869 }
1870 }
1871
1872 /* Next, deal with the carry term. Needs to be improved to
1873 handle overflow carry cases. */
1874 if (kpower <= kpowerlimit) {
1875 iaddg(carry,scratch);
1876 } else {
1877 addg(scratch2,scratch);
1878 }
1879 while(scratch->sign > q)
1880 gtog(scratch,scratch2)
1881 }
1882 }
1883 scratch->sign = destptr - scratch->n;
1884 if (!carry)
1885 --(scratch->sign);
1886 scratch->sign *= gsign(a)*gsign(b);
1887 gtog(scratch,a);
1888 pushg(3);
1889 return (1);
1890 }
1891 #endif
1892
1893 int
1894 idivg(
1895 int divisor,
1896 giant theg
1897 )
1898 {
1899 /* theg becomes theg/divisor. Returns remainder. */
1900 int n;
1901 int base = 1<<(8*sizeof(short));
1902
1903 n = radixdiv(base,divisor,theg);
1904 return(n);
1905 }
1906
1907
1908 void
1909 divg(
1910 giant d,
1911 giant n
1912 )
1913 /* n becomes n/d. n is arbitrary, but the denominator d must be positive! */
1914 {
1915 if (cur_recip == NULL) {
1916 cur_recip = newgiant(current_max_size);
1917 cur_den = newgiant(current_max_size);
1918 gtog(d, cur_den);
1919 make_recip(d, cur_recip);
1920 } else if (gcompg(d, cur_den)) {
1921 gtog(d, cur_den);
1922 make_recip(d, cur_recip);
1923 }
1924 divg_via_recip(d, cur_recip, n);
1925 }
1926
1927
1928 void
1929 powermod(
1930 giant x,
1931 int n,
1932 giant g
1933 )
1934 /* x becomes x^n (mod g). */
1935 {
1936 giant scratch2 = popg();
1937 gtog(x, scratch2);
1938 itog(1, x);
1939 while (n)
1940 {
1941 if (n & 1)
1942 {
1943 mulg(scratch2, x);
1944 modg(g, x);
1945 }
1946 n >>= 1;
1947 if (n)
1948 {
1949 squareg(scratch2);
1950 modg(g, scratch2);
1951 }
1952 }
1953 pushg(1);
1954 }
1955
1956
1957 void
1958 powermodg(
1959 giant x,
1960 giant n,
1961 giant g
1962 )
1963 /* x becomes x^n (mod g). */
1964 {
1965 int len, pos;
1966 giant scratch2 = popg();
1967
1968 gtog(x, scratch2);
1969 itog(1, x);
1970 len = bitlen(n);
1971 pos = 0;
1972 while (1)
1973 {
1974 if (bitval(n, pos++))
1975 {
1976 mulg(scratch2, x);
1977 modg(g, x);
1978 }
1979 if (pos>=len)
1980 break;
1981 squareg(scratch2);
1982 modg(g, scratch2);
1983 }
1984 pushg(1);
1985 }
1986
1987
1988 void
1989 fermatpowermod(
1990 giant x,
1991 int n,
1992 int q
1993 )
1994 /* x becomes x^n (mod 2^q+1). */
1995 {
1996 giant scratch2 = popg();
1997
1998 gtog(x, scratch2);
1999 itog(1, x);
2000 while (n)
2001 {
2002 if (n & 1)
2003 {
2004 mulg(scratch2, x);
2005 fermatmod(q, x);
2006 }
2007 n >>= 1;
2008 if (n)
2009 {
2010 squareg(scratch2);
2011 fermatmod(q, scratch2);
2012 }
2013 }
2014 pushg(1);
2015 }
2016
2017
2018 void
2019 fermatpowermodg(
2020 giant x,
2021 giant n,
2022 int q
2023 )
2024 /* x becomes x^n (mod 2^q+1). */
2025 {
2026 int len, pos;
2027 giant scratch2 = popg();
2028
2029 gtog(x, scratch2);
2030 itog(1, x);
2031 len = bitlen(n);
2032 pos = 0;
2033 while (1)
2034 {
2035 if (bitval(n, pos++))
2036 {
2037 mulg(scratch2, x);
2038 fermatmod(q, x);
2039 }
2040 if (pos>=len)
2041 break;
2042 squareg(scratch2);
2043 fermatmod(q, scratch2);
2044 }
2045 pushg(1);
2046 }
2047
2048
2049 void
2050 mersennepowermod(
2051 giant x,
2052 int n,
2053 int q
2054 )
2055 /* x becomes x^n (mod 2^q-1). */
2056 {
2057 giant scratch2 = popg();
2058
2059 gtog(x, scratch2);
2060 itog(1, x);
2061 while (n)
2062 {
2063 if (n & 1)
2064 {
2065 mulg(scratch2, x);
2066 mersennemod(q, x);
2067 }
2068 n >>= 1;
2069 if (n)
2070 {
2071 squareg(scratch2);
2072 mersennemod(q, scratch2);
2073 }
2074 }
2075 pushg(1);
2076 }
2077
2078
2079 void
2080 mersennepowermodg(
2081 giant x,
2082 giant n,
2083 int q
2084 )
2085 /* x becomes x^n (mod 2^q-1). */
2086 {
2087 int len, pos;
2088 giant scratch2 = popg();
2089
2090 gtog(x, scratch2);
2091 itog(1, x);
2092 len = bitlen(n);
2093 pos = 0;
2094 while (1)
2095 {
2096 if (bitval(n, pos++))
2097 {
2098 mulg(scratch2, x);
2099 mersennemod(q, x);
2100 }
2101 if (pos>=len)
2102 break;
2103 squareg(scratch2);
2104 mersennemod(q, scratch2);
2105 }
2106 pushg(1);
2107 }
2108
2109
2110 void
2111 gshiftleft(
2112 int bits,
2113 giant g
2114 )
2115 /* shift g left bits bits. Equivalent to g = g*2^bits. */
2116 {
2117 int rem = bits&15, crem = 16-rem, words = bits>>4;
2118 int size = abs(g->sign), j, k, sign = gsign(g);
2119 unsigned short carry, dat;
2120
2121 if (!bits)
2122 return;
2123 if (!size)
2124 return;
2125 if (bits < 0) {
2126 gshiftright(-bits,g);
2127 return;
2128 }
2129 if (size+words+1 > current_max_size) {
2130 error = OVFLOW;
2131 exit(error);
2132 }
2133 if (rem == 0) {
2134 memmove(g->n + words, g->n, size * sizeof(short));
2135 for (j = 0; j < words; j++) g->n[j] = 0;
2136 g->sign += (g->sign < 0)?(-words):(words);
2137 } else {
2138 k = size+words;
2139 carry = 0;
2140 for (j=size-1; j>=0; j--) {
2141 dat = g->n[j];
2142 g->n[k--] = (unsigned short)((dat >> crem) | carry);
2143 carry = (unsigned short)(dat << rem);
2144 }
2145 do {
2146 g->n[k--] = carry;
2147 carry = 0;
2148 } while(k>=0);
2149
2150 k = size+words;
2151 if (g->n[k] == 0)
2152 --k;
2153 g->sign = sign*(k+1);
2154 }
2155 }
2156
2157
2158 void
2159 gshiftright(
2160 int bits,
2161 giant g
2162 )
2163 /* shift g right bits bits. Equivalent to g = g/2^bits. */
2164 {
2165 register int j,size=abs(g->sign);
2166 register unsigned int carry;
2167 int words = bits >> 4;
2168 int remain = bits & 15, cremain = (16-remain);
2169
2170 if (bits==0)
2171 return;
2172 if (isZero(g))
2173 return;
2174 if (bits < 0) {
2175 gshiftleft(-bits,g);
2176 return;
2177 }
2178 if (words >= size) {
2179 g->sign = 0;
2180 return;
2181 }
2182 if (remain == 0) {
2183 memmove(g->n,g->n + words,(size - words) * sizeof(short));
2184 g->sign += (g->sign < 0)?(words):(-words);
2185 } else {
2186 size -= words;
2187
2188 if (size)
2189 {
2190 for(j=0;j<size-1;++j)
2191 {
2192 carry = g->n[j+words+1] << cremain;
2193 g->n[j] = (unsigned short)((g->n[j+words] >> remain ) | carry);
2194 }
2195 g->n[size-1] = (unsigned short)(g->n[size-1+words] >> remain);
2196 }
2197
2198 if (g->n[size-1] == 0)
2199 --size;
2200
2201 if (g->sign > 0)
2202 g->sign = size;
2203 else
2204 g->sign = -size;
2205 }
2206 }
2207
2208
2209 void
2210 extractbits(
2211 int n,
2212 giant src,
2213 giant dest
2214 )
2215 /* dest becomes lowermost n bits of src. Equivalent to dest = src % 2^n. */
2216 {
2217 register int words = n >> 4, numbytes = words*sizeof(short);
2218 register int bits = n & 15;
2219
2220 if (n<=0)
2221 return;
2222 if (words >= abs(src->sign))
2223 gtog(src,dest);
2224 else
2225 {
2226 memcpy((char *)(dest->n), (char *)(src->n), numbytes);
2227 if (bits)
2228 {
2229 dest->n[words] = (unsigned short)(src->n[words] & ((1<<bits)-1));
2230 ++words;
2231 }
2232 while ((dest->n[words-1] == 0) && (words > 0))
2233 {
2234 --words;
2235 }
2236 if (src->sign<0)
2237 dest->sign = -words;
2238 else
2239 dest->sign = words;
2240 }
2241 }
2242
2243
2244 int
2245 allzeros(
2246 int shorts,
2247 int bits,
2248 giant g
2249 )
2250 {
2251 int i=shorts;
2252
2253 while (i>0)
2254 {
2255 if (g->n[--i])
2256 return(0);
2257 }
2258 return((int)(!(g->n[shorts] & ((1<<bits)-1))));
2259 }
2260
2261
2262 void
2263 fermatnegate(
2264 int n,
2265 giant g
2266 )
2267 /* negate g. g is mod 2^n+1. */
2268 {
2269 register int shorts = n>>4,
2270 bits = n & 15,
2271 i = shorts,
2272 mask = 1<<bits;
2273 register unsigned carry,temp;
2274
2275 for (temp=(unsigned)shorts; (int)temp>g->sign-1; --temp)
2276 {
2277 g->n[temp] = 0;
2278 }
2279 if (g->n[shorts] & mask)
2280 { /* if high bit is set, -g = 1. */
2281 g->sign = 1;
2282 g->n[0] = 1;
2283 return;
2284 }
2285 if (allzeros(shorts,bits,g))
2286 return; /* if g=0, -g = 0. */
2287
2288 while (i>0)
2289 { --i;
2290 g->n[i] = (unsigned short)(~(g->n[i+1]));
2291 }
2292 g->n[shorts] ^= mask-1;
2293
2294 carry = 2;
2295 i = 0;
2296 while (carry)
2297 {
2298 temp = g->n[i]+carry;
2299 g->n[i++] = (unsigned short)(temp & 0xffff);
2300 carry = temp>>16;
2301 }
2302 while(!g->n[shorts])
2303 {
2304 --shorts;
2305 }
2306 g->sign = shorts+1;
2307 }
2308
2309
2310 void
2311 mersennemod (
2312 int n,
2313 giant g
2314 )
2315 /* g := g (mod 2^n - 1) */
2316 {
2317 int the_sign, s;
2318 giant scratch3 = popg(), scratch4 = popg();
2319
2320 if ((the_sign = gsign(g)) < 0) absg(g);
2321 while (bitlen(g) > n) {
2322 gtog(g,scratch3);
2323 gshiftright(n,scratch3);
2324 addg(scratch3,g);
2325 gshiftleft(n,scratch3);
2326 subg(scratch3,g);
2327 }
2328 if(!isZero(g)) {
2329 if ((s = gsign(g)) < 0) absg(g);
2330 itog(1,scratch3);
2331 gshiftleft(n,scratch3);
2332 itog(1,scratch4);
2333 subg(scratch4,scratch3);
2334 if(gcompg(g,scratch3) >= 0) subg(scratch3,g);
2335 if (s < 0) {
2336 g->sign = -g->sign;
2337 addg(scratch3,g);
2338 }
2339 if (the_sign < 0) {
2340 g->sign = -g->sign;
2341 addg(scratch3,g);
2342 }
2343 }
2344 pushg(2);
2345 }
2346
2347 void
2348 fermatmod (
2349 int n,
2350 giant g
2351 )
2352 /* g := g (mod 2^n + 1), */
2353 {
2354 int the_sign, s;
2355 giant scratch3 = popg();
2356
2357 if ((the_sign = gsign(g)) < 0) absg(g);
2358 while (bitlen(g) > n) {
2359 gtog(g,scratch3);
2360 gshiftright(n,scratch3);
2361 subg(scratch3,g);
2362 gshiftleft(n,scratch3);
2363 subg(scratch3,g);
2364 }
2365 if((bitlen(g) < n) && (the_sign * (g->sign) >= 0)) goto leave;
2366 if(!isZero(g)) {
2367 if ((s = gsign(g)) < 0) absg(g);
2368 itog(1,scratch3);
2369 gshiftleft(n,scratch3);
2370 iaddg(1,scratch3);
2371 if(gcompg(g,scratch3) >= 0) subg(scratch3,g);
2372 if (s * the_sign < 0) {
2373 g->sign = -g->sign;
2374 addg(scratch3,g);
2375 }
2376 }
2377 leave:
2378 pushg(1);
2379
2380 }
2381
2382 void
2383 fer_mod (
2384 int n,
2385 giant g,
2386 giant modulus
2387 )
2388 /* Same as fermatmod(), except modulus = 2^n should be passed
2389 if available (i.e. if already allocated and set). */
2390 {
2391 int the_sign, s;
2392 giant scratch3 = popg();
2393
2394 if ((the_sign = gsign(g)) < 0) absg(g);
2395 while (bitlen(g) > n) {
2396 gtog(g,scratch3);
2397 gshiftright(n,scratch3);
2398 subg(scratch3,g);
2399 gshiftleft(n,scratch3);
2400 subg(scratch3,g);
2401 }
2402 if((bitlen(g) < n) && (the_sign * (g->sign) >= 0)) goto leave;
2403 if(!isZero(g)) {
2404 if ((s = gsign(g)) < 0) absg(g);
2405 if(gcompg(g,modulus) >= 0) subg(modulus,g);
2406 if (s * the_sign < 0) {
2407 g->sign = -g->sign;
2408 addg(modulus,g);
2409 }
2410 }
2411 leave:
2412 pushg(1);
2413 }
2414
2415
2416 void
2417 smulg(
2418 unsigned short i,
2419 giant g
2420 )
2421 /* g becomes g * i. */
2422 {
2423 unsigned short carry = 0;
2424 int size = abs(g->sign);
2425 register int j,k,mul = abs(i);
2426 unsigned short *digit = g->n;
2427
2428 for (j=0; j<size; ++j)
2429 {
2430 k = *digit * mul + carry;
2431 carry = (unsigned short)(k>>16);
2432 *digit = (unsigned short)(k & 0xffff);
2433 ++digit;
2434 }
2435 if (carry)
2436 {
2437 if (++j >= current_max_size)
2438 {
2439 error = OVFLOW;
2440 exit(error);
2441 }
2442 *digit = carry;
2443 }
2444
2445 if ((g->sign>0) ^ (i>0))
2446 g->sign = -j;
2447 else
2448 g->sign = j;
2449 }
2450
2451
2452 void
2453 squareg(
2454 giant b
2455 )
2456 /* b becomes b^2. */
2457 {
2458 auxmulg(b,b);
2459 }
2460
2461
2462 void
2463 mulg(
2464 giant a,
2465 giant b
2466 )
2467 /* b becomes a*b. */
2468 {
2469 auxmulg(a,b);
2470 }
2471
2472
2473 void
2474 auxmulg(
2475 giant a,
2476 giant b
2477 )
2478 /* Optimized general multiply, b becomes a*b. Modes are:
2479 * AUTO_MUL: switch according to empirical speed criteria.
2480 * GRAMMAR_MUL: force grammar-school algorithm.
2481 * KARAT_MUL: force Karatsuba divide-conquer method.
2482 * FFT_MUL: force floating point FFT method. */
2483 {
2484 float grammartime;
2485 int square = (a==b);
2486 int sizea, sizeb;
2487
2488 switch (mulmode)
2489 {
2490 case GRAMMAR_MUL:
2491 if (square) grammarsquareg(b);
2492 else grammarmulg(a,b);
2493 break;
2494 case FFT_MUL:
2495 if (square)
2496 FFTsquareg(b);
2497 else
2498 FFTmulg(a,b);
2499 break;
2500 case KARAT_MUL:
2501 if (square) karatsquareg(b);
2502 else karatmulg(a,b);
2503 break;
2504 case AUTO_MUL:
2505 sizea = abs(a->sign);
2506 sizeb = abs(b->sign);
2507 if((sizea > KARAT_BREAK) && (sizea <= FFT_BREAK) &&
2508 (sizeb > KARAT_BREAK) && (sizeb <= FFT_BREAK)){
2509 if(square) karatsquareg(b);
2510 else karatmulg(a,b);
2511
2512 } else {
2513 grammartime = (float)sizea;
2514 grammartime *= (float)sizeb;
2515 if (grammartime < FFT_BREAK * FFT_BREAK)
2516 {
2517 if (square) grammarsquareg(b);
2518 else grammarmulg(a,b);
2519 }
2520 else
2521 {
2522 if (square) FFTsquareg(b);
2523 else FFTmulg(a,b);
2524 }
2525 }
2526 break;
2527 }
2528 }
2529
2530 void
2531 justg(giant x) {
2532 int s = x->sign, sg = 1;
2533
2534 if(s<0) {
2535 sg = -1;
2536 s = -s;
2537 }
2538 --s;
2539 while(x->n[s] == 0) {
2540 --s;
2541 if(s < 0) break;
2542 }
2543 x->sign = sg*(s+1);
2544 }
2545
2546 /* Next, improved Karatsuba routines from A. Powell,
2547 improvements by G. Woltman. */
2548
2549 void
2550 karatmulg(giant x, giant y)
2551 /* y becomes x*y. */
2552 {
2553 int s = abs(x->sign), t = abs(y->sign), w, bits,
2554 sg = gsign(x)*gsign(y);
2555 giant a, b, c, d, e, f;
2556
2557 if((s <= KARAT_BREAK) || (t <= KARAT_BREAK)) {
2558 grammarmulg(x,y);
2559 return;
2560 }
2561 w = (s + t + 2)/4; bits = 16*w;
2562 a = popg(); b = popg(); c = popg();
2563 d = popg(); e = popg(); f = popg();
2564 gtog(x,a); absg(a); if (w <= s) {a->sign = w; justg(a);}
2565 gtog(x,b); absg(b);
2566 gshiftright(bits, b);
2567 gtog(y,c); absg(c); if (w <= t) {c->sign = w; justg(c);}
2568 gtog(y,d); absg(d);
2569 gshiftright(bits,d);
2570 gtog(a,e); normal_addg(b,e); /* e := (a + b) */
2571 gtog(c,f); normal_addg(d,f); /* f := (c + d) */
2572 karatmulg(e,f); /* f := (a + b)(c + d) */
2573 karatmulg(c,a); /* a := a c */
2574 karatmulg(d,b); /* b := b d */
2575 normal_subg(a,f);
2576 /* f := (a + b)(c + d) - a c */
2577 normal_subg(b,f);
2578 /* f := (a + b)(c + d) - a c - b d */
2579 gshiftleft(bits, b);
2580 normal_addg(f, b);
2581 gshiftleft(bits, b);
2582 normal_addg(a, b);
2583 gtog(b, y); y->sign *= sg;
2584 pushg(6);
2585
2586 return;
2587 }
2588
2589 void
2590 karatsquareg(giant x)
2591 /* x becomes x^2. */
2592 {
2593 int s = abs(x->sign), w, bits;
2594 giant a, b, c;
2595
2596 if(s <= KARAT_BREAK) {
2597 grammarsquareg(x);
2598 return;
2599 }
2600 w = (s+1)/2; bits = 16*w;
2601 a = popg(); b = popg(); c = popg();
2602 gtog(x, a); a->sign = w; justg(a);
2603 gtog(x, b); absg(b);
2604 gshiftright(bits, b);
2605 gtog(a,c); normal_addg(b,c);
2606 karatsquareg(c);
2607 karatsquareg(a);
2608 karatsquareg(b);
2609 normal_subg(b, c);
2610 normal_subg(a, c);
2611 gshiftleft(bits, b);
2612 normal_addg(c,b);
2613 gshiftleft(bits, b);
2614 normal_addg(a, b);
2615 gtog(b, x);
2616 pushg(3);
2617
2618 return;
2619 }
2620
2621 void
2622 grammarmulg(
2623 giant a,
2624 giant b
2625 )
2626 /* b becomes a*b. */
2627 {
2628 int i,j;
2629 unsigned int prod,carry=0;
2630 int asize = abs(a->sign), bsize = abs(b->sign);
2631 unsigned short *aptr,*bptr,*destptr;
2632 unsigned short mult;
2633 giant scratch = popg();
2634
2635 for (i=0; i<asize+bsize; ++i)
2636 {
2637 scratch->n[i]=0;
2638 }
2639
2640 bptr = &(b->n[0]);
2641 for (i=0; i<bsize; ++i)
2642 {
2643 mult = *(bptr++);
2644 if (mult)
2645 {
2646 carry = 0;
2647 aptr = &(a->n[0]);
2648 destptr = &(scratch->n[i]);
2649 for (j=0; j<asize; ++j)
2650 {
2651 prod = *(aptr++) * mult + *destptr + carry;
2652 *(destptr++) = (unsigned short)(prod & 0xffff);
2653 carry = prod >> 16;
2654 }
2655 *destptr = (unsigned short)carry;
2656 }
2657 }
2658 bsize+=asize;
2659 if (!carry)
2660 --bsize;
2661 scratch->sign = gsign(a)*gsign(b)*bsize;
2662 gtog(scratch,b);
2663 pushg(1);
2664 }
2665
2666
2667 void
2668 grammarsquareg (
2669 giant a
2670 )
2671 /* a := a^2. */
2672 {
2673 unsigned int cur_term;
2674 unsigned int prod, carry=0, temp;
2675 int asize = abs(a->sign), max = asize * 2 - 1;
2676 unsigned short *ptr = a->n, *ptr1, *ptr2;
2677 giant scratch;
2678
2679 if(asize == 0) {
2680 itog(0,a);
2681 return;
2682 }
2683
2684 scratch = popg();
2685
2686 asize--;
2687
2688 temp = *ptr;
2689 temp *= temp;
2690 scratch->n[0] = temp;
2691 carry = temp >> 16;
2692
2693 for (cur_term = 1; cur_term < max; cur_term++) {
2694 ptr1 = ptr2 = ptr;
2695 if (cur_term <= asize) {
2696 ptr2 += cur_term;
2697 } else {
2698 ptr1 += cur_term - asize;
2699 ptr2 += asize;
2700 }
2701 prod = carry & 0xFFFF;
2702 carry >>= 16;
2703 while(ptr1 < ptr2) {
2704 temp = *ptr1++ * *ptr2--;
2705 prod += (temp << 1) & 0xFFFF;
2706 carry += (temp >> 15);
2707 }
2708 if (ptr1 == ptr2) {
2709 temp = *ptr1;
2710 temp *= temp;
2711 prod += temp & 0xFFFF;
2712 carry += (temp >> 16);
2713 }
2714 carry += prod >> 16;
2715 scratch->n[cur_term] = (unsigned short) (prod);
2716 }
2717 if (carry) {
2718 scratch->n[cur_term] = carry;
2719 scratch->sign = cur_term+1;
2720 } else scratch->sign = cur_term;
2721
2722 gtog(scratch,a);
2723 pushg(1);
2724 }
2725
2726
2727 /**************************************************************
2728 *
2729 * FFT multiply Functions
2730 *
2731 **************************************************************/
2732
2733 int initL = 0;
2734
2735 int
2736 lpt(
2737 int n,
2738 int *lambda
2739 )
2740 /* Returns least power of two greater than n. */
2741 {
2742 register int i = 1;
2743
2744 *lambda = 0;
2745 while (i<n)
2746 {
2747 i<<=1;
2748 ++(*lambda);
2749 }
2750 return(i);
2751 }
2752
2753
2754 void
2755 addsignal(
2756 giant x,
2757 double *z,
2758 int n
2759 )
2760 {
2761 register int j, k, m, car, last;
2762 register double f, g,err;
2763
2764 maxFFTerror = 0;
2765 last = 0;
2766 for (j=0;j<n;j++)
2767 {
2768 f = gfloor(z[j]+0.5);
2769 if(f != 0.0) last = j;
2770 if (checkFFTerror)
2771 {
2772 err = fabs(f - z[j]);
2773 if (err > maxFFTerror)
2774 maxFFTerror = err;
2775 }
2776 z[j] =0;
2777 k = 0;
2778 do
2779 {
2780 g = gfloor(f*TWOM16);
2781 z[j+k] += f-g*TWO16;
2782 ++k;
2783 f=g;
2784 } while(f != 0.0);
2785 }
2786 car = 0;
2787 for(j=0;j < last + 1;j++)
2788 {
2789 m = (int)(z[j]+car);
2790 x->n[j] = (unsigned short)(m & 0xffff);
2791 car = (m>>16);
2792 }
2793 if (car)
2794 x->n[j] = (unsigned short)car;
2795 else
2796 --j;
2797
2798 while(!(x->n[j])) --j;
2799
2800 x->sign = j+1;
2801 }
2802
2803
2804 void
2805 FFTsquareg(
2806 giant x
2807 )
2808 {
2809 int j,size = abs(x->sign);
2810 register int L;
2811
2812 if (size<4)
2813 {
2814 grammarmulg(x,x);
2815 return;
2816 }
2817 L = lpt(size+size, &j);
2818 if(!z) z = (double *)malloc(MAX_SHORTS * sizeof(double));
2819 giant_to_double(x, size, z, L);
2820 fft_real_to_hermitian(z, L);
2821 square_hermitian(z, L);
2822 fftinv_hermitian_to_real(z, L);
2823 addsignal(x,z,L);
2824 x->sign = abs(x->sign);
2825 }
2826
2827
2828 void
2829 FFTmulg(
2830 giant y,
2831 giant x
2832 )
2833 {
2834 /* x becomes y*x. */
2835 int lambda, sizex = abs(x->sign), sizey = abs(y->sign);
2836 int finalsign = gsign(x)*gsign(y);
2837 register int L;
2838
2839 if ((sizex<=4)||(sizey<=4))
2840 {
2841 grammarmulg(y,x);
2842 return;
2843 }
2844 L = lpt(sizex+sizey, &lambda);
2845 if(!z) z = (double *)malloc(MAX_SHORTS * sizeof(double));
2846 if(!z2) z2 = (double *)malloc(MAX_SHORTS * sizeof(double));
2847
2848 giant_to_double(x, sizex, z, L);
2849 giant_to_double(y, sizey, z2, L);
2850 fft_real_to_hermitian(z, L);
2851 fft_real_to_hermitian(z2, L);
2852 mul_hermitian(z2, z, L);
2853 fftinv_hermitian_to_real(z, L);
2854 addsignal(x,z,L);
2855 x->sign = finalsign*abs(x->sign);
2856 }
2857
2858
2859 void
2860 scramble_real(
2861 double *x,
2862 int n
2863 )
2864 {
2865 register int i,j,k;
2866 register double tmp;
2867
2868 for (i=0,j=0;i<n-1;i++)
2869 {
2870 if (i<j)
2871 {
2872 tmp = x[j];
2873 x[j]=x[i];
2874 x[i]=tmp;
2875 }
2876 k = n/2;
2877 while (k<=j)
2878 {
2879 j -= k;
2880 k>>=1;
2881 }
2882 j += k;
2883 }
2884 }
2885
2886
2887 void
2888 fft_real_to_hermitian(
2889 double *z,
2890 int n
2891 )
2892 /* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
2893 * This is a decimation-in-time, split-radix algorithm.
2894 */
2895 {
2896 register double cc1, ss1, cc3, ss3;
2897 register int is, id, i0, i1, i2, i3, i4, i5, i6, i7, i8,
2898 a, a3, b, b3, nminus = n-1, dil, expand;
2899 register double *x, e;
2900 int nn = n>>1;
2901 double t1, t2, t3, t4, t5, t6;
2902 register int n2, n4, n8, i, j;
2903
2904 init_sinCos(n);
2905 expand = cur_run/n;
2906 scramble_real(z, n);
2907 x = z-1; /* FORTRAN compatibility. */
2908 is = 1;
2909 id = 4;
2910 do
2911 {
2912 for (i0=is;i0<=n;i0+=id)
2913 {
2914 i1 = i0+1;
2915 e = x[i0];
2916 x[i0] = e + x[i1];
2917 x[i1] = e - x[i1];
2918 }
2919 is = (id<<1)-1;
2920 id <<= 2;
2921 } while(is<n);
2922
2923 n2 = 2;
2924 while(nn>>=1)
2925 {
2926 n2 <<= 1;
2927 n4 = n2>>2;
2928 n8 = n2>>3;
2929 is = 0;
2930 id = n2<<1;
2931 do
2932 {
2933 for (i=is;i<n;i+=id)
2934 {
2935 i1 = i+1;
2936 i2 = i1 + n4;
2937 i3 = i2 + n4;
2938 i4 = i3 + n4;
2939 t1 = x[i4]+x[i3];
2940 x[i4] -= x[i3];
2941 x[i3] = x[i1] - t1;
2942 x[i1] += t1;
2943 if (n4==1)
2944 continue;
2945 i1 += n8;
2946 i2 += n8;
2947 i3 += n8;
2948 i4 += n8;
2949 t1 = (x[i3]+x[i4])*SQRTHALF;
2950 t2 = (x[i3]-x[i4])*SQRTHALF;
2951 x[i4] = x[i2] - t1;
2952 x[i3] = -x[i2] - t1;
2953 x[i2] = x[i1] - t2;
2954 x[i1] += t2;
2955 }
2956 is = (id<<1) - n2;
2957 id <<= 2;
2958 } while(is<n);
2959 dil = n/n2;
2960 a = dil;
2961 for (j=2;j<=n8;j++)
2962 {
2963 a3 = (a+(a<<1))&nminus;
2964 b = a*expand;
2965 b3 = a3*expand;
2966 cc1 = s_cos(b);
2967 ss1 = s_sin(b);
2968 cc3 = s_cos(b3);
2969 ss3 = s_sin(b3);
2970 a = (a+dil)&nminus;
2971 is = 0;
2972 id = n2<<1;
2973 do
2974 {
2975 for(i=is;i<n;i+=id)
2976 {
2977 i1 = i+j;
2978 i2 = i1 + n4;
2979 i3 = i2 + n4;
2980 i4 = i3 + n4;
2981 i5 = i + n4 - j + 2;
2982 i6 = i5 + n4;
2983 i7 = i6 + n4;
2984 i8 = i7 + n4;
2985 t1 = x[i3]*cc1 + x[i7]*ss1;
2986 t2 = x[i7]*cc1 - x[i3]*ss1;
2987 t3 = x[i4]*cc3 + x[i8]*ss3;
2988 t4 = x[i8]*cc3 - x[i4]*ss3;
2989 t5 = t1 + t3;
2990 t6 = t2 + t4;
2991 t3 = t1 - t3;
2992 t4 = t2 - t4;
2993 t2 = x[i6] + t6;
2994 x[i3] = t6 - x[i6];
2995 x[i8] = t2;
2996 t2 = x[i2] - t3;
2997 x[i7] = -x[i2] - t3;
2998 x[i4] = t2;
2999 t1 = x[i1] + t5;
3000 x[i6] = x[i1] - t5;
3001 x[i1] = t1;
3002 t1 = x[i5] + t4;
3003 x[i5] -= t4;
3004 x[i2] = t1;
3005 }
3006 is = (id<<1) - n2;
3007 id <<= 2;
3008 } while(is<n);
3009 }
3010 }
3011 }
3012
3013
3014 void
3015 fftinv_hermitian_to_real(
3016 double *z,
3017 int n
3018 )
3019 /* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
3020 * This is a decimation-in-frequency, split-radix algorithm.
3021 */
3022 {
3023 register double cc1, ss1, cc3, ss3;
3024 register int is, id, i0, i1, i2, i3, i4, i5, i6, i7, i8,
3025 a, a3, b, b3, nminus = n-1, dil, expand;
3026 register double *x, e;
3027 int nn = n>>1;
3028 double t1, t2, t3, t4, t5;
3029 int n2, n4, n8, i, j;
3030
3031 init_sinCos(n);
3032 expand = cur_run/n;
3033 x = z-1;
3034 n2 = n<<1;
3035 while(nn >>= 1)
3036 {
3037 is = 0;
3038 id = n2;
3039 n2 >>= 1;
3040 n4 = n2>>2;
3041 n8 = n4>>1;
3042 do
3043 {
3044 for(i=is;i<n;i+=id)
3045 {
3046 i1 = i+1;
3047 i2 = i1 + n4;
3048 i3 = i2 + n4;
3049 i4 = i3 + n4;
3050 t1 = x[i1] - x[i3];
3051 x[i1] += x[i3];
3052 x[i2] += x[i2];
3053 x[i3] = t1 - 2.0*x[i4];
3054 x[i4] = t1 + 2.0*x[i4];
3055 if (n4==1)
3056 continue;
3057 i1 += n8;
3058 i2 += n8;
3059 i3 += n8;
3060 i4 += n8;
3061 t1 = (x[i2]-x[i1])*SQRTHALF;
3062 t2 = (x[i4]+x[i3])*SQRTHALF;
3063 x[i1] += x[i2];
3064 x[i2] = x[i4]-x[i3];
3065 x[i3] = -2.0*(t2+t1);
3066 x[i4] = 2.0*(t1-t2);
3067 }
3068 is = (id<<1) - n2;
3069 id <<= 2;
3070 } while (is<n-1);
3071 dil = n/n2;
3072 a = dil;
3073 for (j=2;j<=n8;j++)
3074 {
3075 a3 = (a+(a<<1))&nminus;
3076 b = a*expand;
3077 b3 = a3*expand;
3078 cc1 = s_cos(b);
3079 ss1 = s_sin(b);
3080 cc3 = s_cos(b3);
3081 ss3 = s_sin(b3);
3082 a = (a+dil)&nminus;
3083 is = 0;
3084 id = n2<<1;
3085 do
3086 {
3087 for(i=is;i<n;i+=id)
3088 {
3089 i1 = i+j;
3090 i2 = i1+n4;
3091 i3 = i2+n4;
3092 i4 = i3+n4;
3093 i5 = i+n4-j+2;
3094 i6 = i5+n4;
3095 i7 = i6+n4;
3096 i8 = i7+n4;
3097 t1 = x[i1] - x[i6];
3098 x[i1] += x[i6];
3099 t2 = x[i5] - x[i2];
3100 x[i5] += x[i2];
3101 t3 = x[i8] + x[i3];
3102 x[i6] = x[i8] - x[i3];
3103 t4 = x[i4] + x[i7];
3104 x[i2] = x[i4] - x[i7];
3105 t5 = t1 - t4;
3106 t1 += t4;
3107 t4 = t2 - t3;
3108 t2 += t3;
3109 x[i3] = t5*cc1 + t4*ss1;
3110 x[i7] = -t4*cc1 + t5*ss1;
3111 x[i4] = t1*cc3 - t2*ss3;
3112 x[i8] = t2*cc3 + t1*ss3;
3113 }
3114 is = (id<<1) - n2;
3115 id <<= 2;
3116 } while(is<n-1);
3117 }
3118 }
3119 is = 1;
3120 id = 4;
3121 do
3122 {
3123 for (i0=is;i0<=n;i0+=id)
3124 {
3125 i1 = i0+1;
3126 e = x[i0];
3127 x[i0] = e + x[i1];
3128 x[i1] = e - x[i1];
3129 }
3130 is = (id<<1) - 1;
3131 id <<= 2;
3132 } while(is<n);
3133 scramble_real(z, n);
3134 e = 1/(double)n;
3135 for (i=0;i<n;i++)
3136 {
3137 z[i] *= e;
3138 }
3139 }
3140
3141
3142 void
3143 mul_hermitian(
3144 double *a,
3145 double *b,
3146 int n
3147 )
3148 {
3149 register int k, half = n>>1;
3150 register double aa, bb, am, bm;
3151
3152 b[0] *= a[0];
3153 b[half] *= a[half];
3154 for (k=1;k<half;k++)
3155 {
3156 aa = a[k];
3157 bb = b[k];
3158 am = a[n-k];
3159 bm = b[n-k];
3160 b[k] = aa*bb - am*bm;
3161 b[n-k] = aa*bm + am*bb;
3162 }
3163 }
3164
3165
3166 void
3167 square_hermitian(
3168 double *b,
3169 int n
3170 )
3171 {
3172 register int k, half = n>>1;
3173 register double c, d;
3174
3175 b[0] *= b[0];
3176 b[half] *= b[half];
3177 for (k=1;k<half;k++)
3178 {
3179 c = b[k];
3180 d = b[n-k];
3181 b[n-k] = 2.0*c*d;
3182 b[k] = (c+d)*(c-d);
3183 }
3184 }
3185
3186
3187 void
3188 giant_to_double
3189 (
3190 giant x,
3191 int sizex,
3192 double *z,
3193 int L
3194 )
3195 {
3196 register int j;
3197
3198 for (j=sizex;j<L;j++)
3199 {
3200 z[j]=0.0;
3201 }
3202 for (j=0;j<sizex;j++)
3203 {
3204 z[j] = x->n[j];
3205 }
3206 }
3207
3208
3209 void
3210 gswap(
3211 giant *p,
3212 giant *q
3213 )
3214 {
3215 giant t;
3216
3217 t = *p;
3218 *p = *q;
3219 *q = t;
3220 }
3221
3222
3223 void
3224 onestep(
3225 giant x,
3226 giant y,
3227 gmatrix A
3228 )
3229 /* Do one step of the euclidean algorithm and modify
3230 * the matrix A accordingly. */
3231 {
3232 giant s4 = popg();
3233
3234 gtog(x,s4);
3235 gtog(y,x);
3236 gtog(s4,y);
3237 divg(x,s4);
3238 punch(s4,A);
3239 mulg(x,s4);
3240 subg(s4,y);
3241
3242 pushg(1);
3243 }
3244
3245
3246 void
3247 mulvM(
3248 gmatrix A,
3249 giant x,
3250 giant y
3251 )
3252 /* Multiply vector by Matrix; changes x,y. */
3253 {
3254 giant s0 = popg(), s1 = popg();
3255
3256 gtog(A->ur,s0);
3257 gtog( A->lr,s1);
3258 dotproduct(x,y,A->ul,s0);
3259 dotproduct(x,y,A->ll,s1);
3260 gtog(s0,x);
3261 gtog(s1,y);
3262
3263 pushg(2);
3264 }
3265
3266
3267 void
3268 mulmM(
3269 gmatrix A,
3270 gmatrix B
3271 )
3272 /* Multiply matrix by Matrix; changes second matrix. */
3273 {
3274 giant s0 = popg();
3275 giant s1 = popg();
3276 giant s2 = popg();
3277 giant s3 = popg();
3278
3279 gtog(B->ul,s0);
3280 gtog(B->ur,s1);
3281 gtog(B->ll,s2);
3282 gtog(B->lr,s3);
3283 dotproduct(A->ur,A->ul,B->ll,s0);
3284 dotproduct(A->ur,A->ul,B->lr,s1);
3285 dotproduct(A->ll,A->lr,B->ul,s2);
3286 dotproduct(A->ll,A->lr,B->ur,s3);
3287 gtog(s0,B->ul);
3288 gtog(s1,B->ur);
3289 gtog(s2,B->ll);
3290 gtog(s3,B->lr);
3291
3292 pushg(4);
3293 }
3294
3295
3296 void
3297 writeM(
3298 gmatrix A
3299 )
3300 {
3301 printf(" ul:");
3302 gout(A->ul);
3303 printf(" ur:");
3304 gout(A->ur);
3305 printf(" ll:");
3306 gout(A->ll);
3307 printf(" lr:");
3308 gout(A->lr);
3309 }
3310
3311
3312 void
3313 punch(
3314 giant q,
3315 gmatrix A
3316 )
3317 /* Multiply the matrix A on the left by [0,1,1,-q]. */
3318 {
3319 giant s0 = popg();
3320
3321 gtog(A->ll,s0);
3322 mulg(q,A->ll);
3323 gswap(&A->ul,&A->ll);
3324 subg(A->ul,A->ll);
3325 gtog(s0,A->ul);
3326 gtog(A->lr,s0);
3327 mulg(q,A->lr);
3328 gswap(&A->ur,&A->lr);
3329 subg(A->ur,A->lr);
3330 gtog(s0,A->ur);
3331
3332 pushg(1);
3333 }
3334
3335
3336 static void
3337 dotproduct(
3338 giant a,
3339 giant b,
3340 giant c,
3341 giant d
3342 )
3343 /* Replace last argument with the dot product of two 2-vectors. */
3344 {
3345 giant s4 = popg();
3346
3347 gtog(c,s4);
3348 mulg(a, s4);
3349 mulg(b,d);
3350 addg(s4,d);
3351
3352 pushg(1);
3353 }
3354
3355
3356 void
3357 ggcd(
3358 giant xx,
3359 giant yy
3360 )
3361 /* A giant gcd. Modifies its arguments. */
3362 {
3363 giant x = popg(), y = popg();
3364 gmatrix A = newgmatrix();
3365
3366 gtog(xx,x); gtog(yy,y);
3367 for(;;)
3368 {
3369 fix(&x,&y);
3370 if (bitlen(y) <= GCDLIMIT )
3371 break;
3372 A->ul = popg();
3373 A->ur = popg();
3374 A->ll = popg();
3375 A->lr = popg();
3376 itog(1,A->ul);
3377 itog(0,A->ur);
3378 itog(0,A->ll);
3379 itog(1,A->lr);
3380 hgcd(0,x,y,A);
3381 mulvM(A,x,y);
3382 pushg(4);
3383 fix(&x,&y);
3384 if (bitlen(y) <= GCDLIMIT )
3385 break;
3386 modg(y,x);
3387 gswap(&x,&y);
3388 }
3389 bgcdg(x,y);
3390 gtog(y,yy);
3391 pushg(2);
3392 free(A);
3393 }
3394
3395
3396 void
3397 fix(
3398 giant *p,
3399 giant *q
3400 )
3401 /* Insure that x > y >= 0. */
3402 {
3403 if( gsign(*p) < 0 )
3404 negg(*p);
3405 if( gsign(*q) < 0 )
3406 negg(*q);
3407 if( gcompg(*p,*q) < 0 )
3408 gswap(p,q);
3409 }
3410
3411
3412 void
3413 hgcd(
3414 int n,
3415 giant xx,
3416 giant yy,
3417 gmatrix A
3418 )
3419 /* hgcd(n,x,y,A) chops n bits off x and y and computes th
3420 * 2 by 2 matrix A such that A[x y] is the pair of terms
3421 * in the remainder sequence starting with x,y that is
3422 * half the size of x. Note that the argument A is modified
3423 * but that the arguments xx and yy are left unchanged.
3424 */
3425 {
3426 giant x, y;
3427
3428 if (isZero(yy))
3429 return;
3430
3431 x = popg();
3432 y = popg();
3433 gtog(xx,x);
3434 gtog(yy,y);
3435 gshiftright(n,x);
3436 gshiftright(n,y);
3437 if (bitlen(x) <= INTLIMIT )
3438 {
3439 shgcd(gtoi(x),gtoi(y),A);
3440 }
3441 else
3442 {
3443 gmatrix B = newgmatrix();
3444 int m = bitlen(x)/2;
3445
3446 hgcd(m,x,y,A);
3447 mulvM(A,x,y);
3448 if (gsign(x) < 0)
3449 {
3450 negg(x); negg(A->ul); negg(A->ur);
3451 }
3452 if (gsign(y) < 0)
3453 {
3454 negg(y); negg(A->ll); negg(A->lr);
3455 }
3456 if (gcompg(x,y) < 0)
3457 {
3458 gswap(&x,&y);
3459 gswap(&A->ul,&A->ll);
3460 gswap(&A->ur,&A->lr);
3461 }
3462 if (!isZero(y))
3463 {
3464 onestep(x,y,A);
3465 m /= 2;
3466 B->ul = popg();
3467 B->ur = popg();
3468 B->ll = popg();
3469 B->lr = popg();
3470 itog(1,B->ul);
3471 itog(0,B->ur);
3472 itog(0,B->ll);
3473 itog(1,B->lr);
3474 hgcd(m,x,y,B);
3475 mulmM(B,A);
3476 pushg(4);
3477 }
3478 free(B);
3479 }
3480 pushg(2);
3481 }
3482
3483
3484 void
3485 shgcd(
3486 register int x,
3487 register int y,
3488 gmatrix A
3489 )
3490 /*
3491 * Do a half gcd on the integers a and b, putting the result in A
3492 * It is fairly easy to use the 2 by 2 matrix description of the
3493 * extended Euclidean algorithm to prove that the quantity q*t
3494 * never overflows.
3495 */
3496 {
3497 register int q, t, start = x;
3498 int Aul = 1, Aur = 0, All = 0, Alr = 1;
3499
3500 while (y != 0 && y > start/y)
3501 {
3502 q = x/y;
3503 t = y;
3504 y = x%y;
3505 x = t;
3506 t = All;
3507 All = Aul-q*t;
3508 Aul = t;
3509 t = Alr;
3510 Alr = Aur-q*t;
3511 Aur = t;
3512 }
3513 itog(Aul,A->ul);
3514 itog(Aur,A->ur);
3515 itog(All,A->ll);
3516 itog(Alr,A->lr);
3517 }