]>
Commit | Line | Data |
---|---|---|
b1ab9ed8 A |
1 | /* Copyright (c) 1998 Apple Computer, Inc. All rights reserved. |
2 | * | |
3 | * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT | |
4 | * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE | |
5 | * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE COMPUTER, INC. AND THE | |
6 | * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE COMPUTER, | |
7 | * INC. ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL | |
8 | * EXPOSE YOU TO LIABILITY. | |
9 | *************************************************************************** | |
10 | ||
11 | giantFFT.c | |
12 | Library for large-integer arithmetic via FFT. Currently unused | |
13 | in CryptKit. | |
14 | ||
15 | R. E. Crandall, Scientific Computation Group, NeXT Computer, Inc. | |
16 | ||
17 | Revision History | |
18 | ---------------- | |
19 | 19 Jan 1998 Doug Mitchell at Apple | |
20 | Split off from NSGiantIntegers.c. | |
21 | ||
22 | */ | |
23 | ||
24 | /* | |
25 | * FIXME - make sure platform-specific math lib has floor(), fmod(), | |
26 | * sin(), pow() | |
27 | */ | |
28 | #include <math.h> | |
29 | #include "NSGiantIntegers.h" | |
30 | ||
31 | #define AUTO_MUL 0 | |
32 | #define GRAMMAR_MUL 1 | |
33 | #define FFT_MUL 2 | |
34 | ||
35 | #define TWOPI (double)(2*3.1415926535897932384626433) | |
36 | #define SQRT2 (double)(1.414213562373095048801688724209) | |
37 | #define SQRTHALF (double)(0.707106781186547524400844362104) | |
38 | #define TWO16 (double)(65536.0) | |
39 | #define TWOM16 (double)(0.0000152587890625) | |
40 | #define BREAK_SHORTS 400 // Number of shorts at which FFT breaks over. | |
41 | ||
42 | static int lpt(int n, int *lambda); | |
43 | static void mul_hermitian(double *a, double *b, int n) ; | |
44 | static void square_hermitian(double *b, int n); | |
45 | static void addsignal(giant x, double *zs, int n); | |
46 | static void scramble_real(double *x, int n); | |
47 | static void fft_real_to_hermitian(double *zs, int n); | |
48 | static void fftinv_hermitian_to_real(double *zs, int n); | |
49 | static void GiantFFTSquare(giant gx); | |
50 | static void GiantFFTMul(giant,giant); | |
51 | static void giant_to_double(giant x, int sizex, double *zs, int L); | |
52 | ||
53 | static int mulmode = AUTO_MUL; | |
54 | ||
55 | void mulg(giant a, giant b) { /* b becomes a*b. */ | |
56 | PROF_START; | |
57 | INCR_MULGS; | |
58 | GiantAuxMul(a,b); | |
59 | #if FEE_DEBUG | |
60 | (void)bitlen(b); // XXX | |
61 | #endif FEE_DEBUG | |
62 | PROF_END(mulgTime); | |
63 | PROF_INCR(numMulg); | |
64 | } | |
65 | ||
66 | static void GiantAuxMul(giant a, giant b) { | |
67 | /* Optimized general multiply, b becomes a*b. Modes are: | |
68 | AUTO_MUL: switch according to empirical speed criteria. | |
69 | GRAMMAR_MUL: force grammar-school algorithm. | |
70 | FFT_MUL: force floating point FFT method. | |
71 | */ | |
72 | int square = (a==b); | |
73 | ||
74 | if (isZero(b)) return; | |
75 | if (isZero(a)) { | |
76 | gtog(a, b); | |
77 | return; | |
78 | } | |
79 | switch(mulmode) { | |
80 | case GRAMMAR_MUL: | |
81 | GiantGrammarMul(a,b); | |
82 | break; | |
83 | case FFT_MUL: | |
84 | if (square) { | |
85 | GiantFFTSquare(b); | |
86 | } | |
87 | else { | |
88 | GiantFFTMul(a,b); | |
89 | } | |
90 | break; | |
91 | case AUTO_MUL: { | |
92 | int sizea, sizeb; | |
93 | float grammartime; | |
94 | sizea = abs(a->sign); | |
95 | sizeb = abs(b->sign); | |
96 | grammartime = sizea; grammartime *= sizeb; | |
97 | if(grammartime < BREAK_SHORTS*BREAK_SHORTS) { | |
98 | GiantGrammarMul(a,b); | |
99 | } | |
100 | else { | |
101 | if (square) GiantFFTSquare(b); | |
102 | else GiantFFTMul(a,b); | |
103 | } | |
104 | break; | |
105 | } | |
106 | } | |
107 | } | |
108 | ||
109 | /***************** Commence FFT multiply routines ****************/ | |
110 | ||
111 | static int CurrentRun = 0; | |
112 | double *sincos = NULL; | |
113 | static void init_sincos(int n) { | |
114 | int j; | |
115 | double e = TWOPI/n; | |
116 | ||
117 | if (n <= CurrentRun) return; | |
118 | CurrentRun = n; | |
119 | if (sincos) free(sincos); | |
120 | sincos = (double *)malloc(sizeof(double)*(1+(n>>2))); | |
121 | for(j=0;j<=(n>>2);j++) { | |
122 | sincos[j] = sin(e*j); | |
123 | } | |
124 | } | |
125 | ||
126 | static double s_sin(int n) { | |
127 | int seg = n/(CurrentRun>>2); | |
128 | ||
129 | switch(seg) { | |
130 | case 0: return(sincos[n]); | |
131 | case 1: return(sincos[(CurrentRun>>1)-n]); | |
132 | case 2: return(-sincos[n-(CurrentRun>>1)]); | |
133 | case 3: | |
134 | default: return(-sincos[CurrentRun-n]); | |
135 | } | |
136 | } | |
137 | ||
138 | static double s_cos(int n) { | |
139 | int quart = (CurrentRun>>2); | |
140 | ||
141 | if (n < quart) return(s_sin(n+quart)); | |
142 | return(-s_sin(n-quart)); | |
143 | } | |
144 | ||
145 | ||
146 | static int lpt(int n, int *lambda) { | |
147 | /* returns least power of two greater than n */ | |
148 | register int i = 1; | |
149 | ||
150 | *lambda = 0; | |
151 | while(i<n) { | |
152 | i<<=1; | |
153 | ++(*lambda); | |
154 | } | |
155 | return(i); | |
156 | } | |
157 | ||
158 | static void addsignal(giant x, double *zs, int n) { | |
159 | register int j, k, m, car; | |
160 | register double f, g; | |
161 | /*double err, maxerr = 0.0;*/ | |
162 | ||
163 | for(j=0;j<n;j++) { | |
164 | f = floor(zs[j]+0.5); | |
165 | ||
166 | /* err = fabs(zs[j]-f); | |
167 | if(err>maxerr) maxerr = err; | |
168 | */ | |
169 | ||
170 | zs[j] =0; | |
171 | k = 0; | |
172 | do{ | |
173 | g = floor(f*TWOM16); | |
174 | zs[j+k] += f-g*TWO16; | |
175 | ++k; | |
176 | f=g; | |
177 | } while(f != 0.0); | |
178 | } | |
179 | car = 0; | |
180 | for(j=0;j<n;j++) { | |
181 | m = zs[j]+car; | |
182 | x->n[j] = m & 0xffff; | |
183 | car = (m>>16); | |
184 | } | |
185 | if(car) x->n[j] = car; | |
186 | else --j; | |
187 | while(!(x->n[j])) --j; | |
188 | x->sign = j+1; | |
189 | if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow"); | |
190 | } | |
191 | ||
192 | static void GiantFFTSquare(giant gx) { | |
193 | int j,size = abs(gx->sign); | |
194 | register int L; | |
195 | ||
196 | if(size<4) { GiantGrammarMul(gx,gx); return; } | |
197 | L = lpt(size+size, &j); | |
198 | { | |
199 | //was...double doubles[L]; | |
200 | //is... | |
201 | double *doubles = malloc(sizeof(double) * L); | |
202 | // end | |
203 | giant_to_double(gx, size, doubles, L); | |
204 | fft_real_to_hermitian(doubles, L); | |
205 | square_hermitian(doubles, L); | |
206 | fftinv_hermitian_to_real(doubles, L); | |
207 | addsignal(gx, doubles, L); | |
208 | // new | |
209 | free(doubles); | |
210 | } | |
211 | gx->sign = abs(gx->sign); | |
212 | bitlen(gx); // XXX | |
213 | if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow"); | |
214 | } | |
215 | ||
216 | static void GiantFFTMul(giant y, giant x) { /* x becomes y*x. */ | |
217 | int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign); | |
218 | int finalsign = gsign(x)*gsign(y); | |
219 | register int L; | |
220 | ||
221 | if((sizex<=4)||(sizey<=4)) { GiantGrammarMul(y,x); return; } | |
222 | size = sizex; if(size<sizey) size=sizey; | |
223 | L = lpt(size+size, &lambda); | |
224 | { | |
225 | //double doubles1[L]; | |
226 | //double doubles2[L]; | |
227 | double *doubles1 = malloc(sizeof(double) * L); | |
228 | double *doubles2 = malloc(sizeof(double) * L); | |
229 | ||
230 | giant_to_double(x, sizex, doubles1, L); | |
231 | giant_to_double(y, sizey, doubles2, L); | |
232 | fft_real_to_hermitian(doubles1, L); | |
233 | fft_real_to_hermitian(doubles2, L); | |
234 | mul_hermitian(doubles2, doubles1, L); | |
235 | fftinv_hermitian_to_real(doubles1, L); | |
236 | addsignal(x, doubles1, L); | |
237 | ||
238 | free(doubles1); | |
239 | free(doubles2); | |
240 | } | |
241 | x->sign = finalsign*abs(x->sign); | |
242 | bitlen(x); // XXX | |
243 | if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow"); | |
244 | } | |
245 | ||
246 | static void scramble_real(double *x, int n) { | |
247 | register int i,j,k; | |
248 | register double tmp; | |
249 | ||
250 | for(i=0,j=0;i<n-1;i++) { | |
251 | if(i<j) { | |
252 | tmp = x[j]; | |
253 | x[j]=x[i]; | |
254 | x[i]=tmp; | |
255 | } | |
256 | k = n/2; | |
257 | while(k<=j) { | |
258 | j -= k; | |
259 | k>>=1; | |
260 | } | |
261 | j += k; | |
262 | } | |
263 | } | |
264 | ||
265 | static void fft_real_to_hermitian(double *zs, int n) { | |
266 | /* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). | |
267 | This is a decimation-in-time, split-radix algorithm. | |
268 | */ | |
269 | register double cc1, ss1, cc3, ss3; | |
270 | register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8, | |
271 | a, a3, b, b3, nminus = n-1, dil, expand; | |
272 | register double *x, e; | |
273 | int nn = n>>1; | |
274 | double t1, t2, t3, t4, t5, t6; | |
275 | register int n2, n4, n8, i, j; | |
276 | ||
277 | init_sincos(n); | |
278 | expand = CurrentRun/n; | |
279 | scramble_real(zs, n); | |
280 | x = zs-1; /* FORTRAN compatibility. */ | |
281 | is = 1; | |
282 | iD = 4; | |
283 | do{ | |
284 | for(i0=is;i0<=n;i0+=iD) { | |
285 | i1 = i0+1; | |
286 | e = x[i0]; | |
287 | x[i0] = e + x[i1]; | |
288 | x[i1] = e - x[i1]; | |
289 | } | |
290 | is = (iD<<1)-1; | |
291 | iD <<= 2; | |
292 | } while(is<n); | |
293 | n2 = 2; | |
294 | while(nn>>=1) { | |
295 | n2 <<= 1; | |
296 | n4 = n2>>2; | |
297 | n8 = n2>>3; | |
298 | is = 0; | |
299 | iD = n2<<1; | |
300 | do { | |
301 | for(i=is;i<n;i+=iD) { | |
302 | i1 = i+1; | |
303 | i2 = i1 + n4; | |
304 | i3 = i2 + n4; | |
305 | i4 = i3 + n4; | |
306 | t1 = x[i4]+x[i3]; | |
307 | x[i4] -= x[i3]; | |
308 | x[i3] = x[i1] - t1; | |
309 | x[i1] += t1; | |
310 | if(n4==1) continue; | |
311 | i1 += n8; | |
312 | i2 += n8; | |
313 | i3 += n8; | |
314 | i4 += n8; | |
315 | t1 = (x[i3]+x[i4])*SQRTHALF; | |
316 | t2 = (x[i3]-x[i4])*SQRTHALF; | |
317 | x[i4] = x[i2] - t1; | |
318 | x[i3] = -x[i2] - t1; | |
319 | x[i2] = x[i1] - t2; | |
320 | x[i1] += t2; | |
321 | } | |
322 | is = (iD<<1) - n2; | |
323 | iD <<= 2; | |
324 | } while(is<n); | |
325 | dil = n/n2; | |
326 | a = dil; | |
327 | for(j=2;j<=n8;j++) { | |
328 | a3 = (a+(a<<1))&nminus; | |
329 | b = a*expand; | |
330 | b3 = a3*expand; | |
331 | cc1 = s_cos(b); | |
332 | ss1 = s_sin(b); | |
333 | cc3 = s_cos(b3); | |
334 | ss3 = s_sin(b3); | |
335 | a = (a+dil)&nminus; | |
336 | is = 0; | |
337 | iD = n2<<1; | |
338 | do { | |
339 | for(i=is;i<n;i+=iD) { | |
340 | i1 = i+j; | |
341 | i2 = i1 + n4; | |
342 | i3 = i2 + n4; | |
343 | i4 = i3 + n4; | |
344 | i5 = i + n4 - j + 2; | |
345 | i6 = i5 + n4; | |
346 | i7 = i6 + n4; | |
347 | i8 = i7 + n4; | |
348 | t1 = x[i3]*cc1 + x[i7]*ss1; | |
349 | t2 = x[i7]*cc1 - x[i3]*ss1; | |
350 | t3 = x[i4]*cc3 + x[i8]*ss3; | |
351 | t4 = x[i8]*cc3 - x[i4]*ss3; | |
352 | t5 = t1 + t3; | |
353 | t6 = t2 + t4; | |
354 | t3 = t1 - t3; | |
355 | t4 = t2 - t4; | |
356 | t2 = x[i6] + t6; | |
357 | x[i3] = t6 - x[i6]; | |
358 | x[i8] = t2; | |
359 | t2 = x[i2] - t3; | |
360 | x[i7] = -x[i2] - t3; | |
361 | x[i4] = t2; | |
362 | t1 = x[i1] + t5; | |
363 | x[i6] = x[i1] - t5; | |
364 | x[i1] = t1; | |
365 | t1 = x[i5] + t4; | |
366 | x[i5] -= t4; | |
367 | x[i2] = t1; | |
368 | } | |
369 | is = (iD<<1) - n2; | |
370 | iD <<= 2; | |
371 | } while(is<n); | |
372 | } | |
373 | } | |
374 | } | |
375 | ||
376 | static void fftinv_hermitian_to_real(double *zs, int n) { | |
377 | /* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). | |
378 | This is a decimation-in-frequency, split-radix algorithm. | |
379 | */ | |
380 | register double cc1, ss1, cc3, ss3; | |
381 | register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8, | |
382 | a, a3, b, b3, nminus = n-1, dil, expand; | |
383 | register double *x, e; | |
384 | int nn = n>>1; | |
385 | double t1, t2, t3, t4, t5; | |
386 | int n2, n4, n8, i, j; | |
387 | ||
388 | init_sincos(n); | |
389 | expand = CurrentRun/n; | |
390 | x = zs-1; | |
391 | n2 = n<<1; | |
392 | while(nn >>= 1) { | |
393 | is = 0; | |
394 | iD = n2; | |
395 | n2 >>= 1; | |
396 | n4 = n2>>2; | |
397 | n8 = n4>>1; | |
398 | do { | |
399 | for(i=is;i<n;i+=iD) { | |
400 | i1 = i+1; | |
401 | i2 = i1 + n4; | |
402 | i3 = i2 + n4; | |
403 | i4 = i3 + n4; | |
404 | t1 = x[i1] - x[i3]; | |
405 | x[i1] += x[i3]; | |
406 | x[i2] += x[i2]; | |
407 | x[i3] = t1 - 2.0*x[i4]; | |
408 | x[i4] = t1 + 2.0*x[i4]; | |
409 | if(n4==1) continue; | |
410 | i1 += n8; | |
411 | i2 += n8; | |
412 | i3 += n8; | |
413 | i4 += n8; | |
414 | t1 = (x[i2]-x[i1])*SQRTHALF; | |
415 | t2 = (x[i4]+x[i3])*SQRTHALF; | |
416 | x[i1] += x[i2]; | |
417 | x[i2] = x[i4]-x[i3]; | |
418 | x[i3] = -2.0*(t2+t1); | |
419 | x[i4] = 2.0*(t1-t2); | |
420 | } | |
421 | is = (iD<<1) - n2; | |
422 | iD <<= 2; | |
423 | } while(is<n-1); | |
424 | dil = n/n2; | |
425 | a = dil; | |
426 | for(j=2;j<=n8;j++) { | |
427 | a3 = (a+(a<<1))&nminus; | |
428 | b = a*expand; | |
429 | b3 = a3*expand; | |
430 | cc1 = s_cos(b); | |
431 | ss1 = s_sin(b); | |
432 | cc3 = s_cos(b3); | |
433 | ss3 = s_sin(b3); | |
434 | a = (a+dil)&nminus; | |
435 | is = 0; | |
436 | iD = n2<<1; | |
437 | do { | |
438 | for(i=is;i<n;i+=iD) { | |
439 | i1 = i+j; | |
440 | i2 = i1+n4; | |
441 | i3 = i2+n4; | |
442 | i4 = i3+n4; | |
443 | i5 = i+n4-j+2; | |
444 | i6 = i5+n4; | |
445 | i7 = i6+n4; | |
446 | i8 = i7+n4; | |
447 | t1 = x[i1] - x[i6]; | |
448 | x[i1] += x[i6]; | |
449 | t2 = x[i5] - x[i2]; | |
450 | x[i5] += x[i2]; | |
451 | t3 = x[i8] + x[i3]; | |
452 | x[i6] = x[i8] - x[i3]; | |
453 | t4 = x[i4] + x[i7]; | |
454 | x[i2] = x[i4] - x[i7]; | |
455 | t5 = t1 - t4; | |
456 | t1 += t4; | |
457 | t4 = t2 - t3; | |
458 | t2 += t3; | |
459 | x[i3] = t5*cc1 + t4*ss1; | |
460 | x[i7] = -t4*cc1 + t5*ss1; | |
461 | x[i4] = t1*cc3 - t2*ss3; | |
462 | x[i8] = t2*cc3 + t1*ss3; | |
463 | } | |
464 | is = (iD<<1) - n2; | |
465 | iD <<= 2; | |
466 | } while(is<n-1); | |
467 | } | |
468 | } | |
469 | is = 1; | |
470 | iD = 4; | |
471 | do { | |
472 | for(i0=is;i0<=n;i0+=iD){ | |
473 | i1 = i0+1; | |
474 | e = x[i0]; | |
475 | x[i0] = e + x[i1]; | |
476 | x[i1] = e - x[i1]; | |
477 | } | |
478 | is = (iD<<1) - 1; | |
479 | iD <<= 2; | |
480 | } while(is<n); | |
481 | scramble_real(zs, n); | |
482 | e = 1/(double)n; | |
483 | for(i=0;i<n;i++) zs[i] *= e; | |
484 | } | |
485 | ||
486 | ||
487 | static void mul_hermitian(double *a, double *b, int n) { | |
488 | register int k, half = n>>1; | |
489 | register double aa, bb, am, bm; | |
490 | ||
491 | b[0] *= a[0]; | |
492 | b[half] *= a[half]; | |
493 | for(k=1;k<half;k++) { | |
494 | aa = a[k]; bb = b[k]; | |
495 | am = a[n-k]; bm = b[n-k]; | |
496 | b[k] = aa*bb - am*bm; | |
497 | b[n-k] = aa*bm + am*bb; | |
498 | } | |
499 | } | |
500 | ||
501 | static void square_hermitian(double *b, int n) { | |
502 | register int k, half = n>>1; | |
503 | register double c, d; | |
504 | ||
505 | b[0] *= b[0]; | |
506 | b[half] *= b[half]; | |
507 | for(k=1;k<half;k++) { | |
508 | c = b[k]; d = b[n-k]; | |
509 | b[n-k] = 2.0*c*d; | |
510 | b[k] = (c+d)*(c-d); | |
511 | } | |
512 | } | |
513 | ||
514 | static void giant_to_double(giant x, int sizex, double *zs, int L) { | |
515 | register int j; | |
516 | for(j=sizex;j<L;j++) zs[j]=0.0; | |
517 | for(j=0;j<sizex;j++) { | |
518 | zs[j] = x->n[j]; | |
519 | } | |
520 | } |