]> git.saurik.com Git - apple/security.git/blame - libsecurity_cryptkit/lib/giantFFT.c
Security-55471.14.18.tar.gz
[apple/security.git] / libsecurity_cryptkit / lib / giantFFT.c
CommitLineData
b1ab9ed8
A
1/* Copyright (c) 1998 Apple Computer, Inc. All rights reserved.
2 *
3 * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT
4 * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE
5 * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE COMPUTER, INC. AND THE
6 * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE COMPUTER,
7 * INC. ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL
8 * EXPOSE YOU TO LIABILITY.
9 ***************************************************************************
10
11 giantFFT.c
12 Library for large-integer arithmetic via FFT. Currently unused
13 in CryptKit.
14
15 R. E. Crandall, Scientific Computation Group, NeXT Computer, Inc.
16
17 Revision History
18 ----------------
19 19 Jan 1998 Doug Mitchell at Apple
20 Split off from NSGiantIntegers.c.
21
22*/
23
24/*
25 * FIXME - make sure platform-specific math lib has floor(), fmod(),
26 * sin(), pow()
27 */
28#include <math.h>
29#include "NSGiantIntegers.h"
30
31#define AUTO_MUL 0
32#define GRAMMAR_MUL 1
33#define FFT_MUL 2
34
35#define TWOPI (double)(2*3.1415926535897932384626433)
36#define SQRT2 (double)(1.414213562373095048801688724209)
37#define SQRTHALF (double)(0.707106781186547524400844362104)
38#define TWO16 (double)(65536.0)
39#define TWOM16 (double)(0.0000152587890625)
40#define BREAK_SHORTS 400 // Number of shorts at which FFT breaks over.
41
42static int lpt(int n, int *lambda);
43static void mul_hermitian(double *a, double *b, int n) ;
44static void square_hermitian(double *b, int n);
45static void addsignal(giant x, double *zs, int n);
46static void scramble_real(double *x, int n);
47static void fft_real_to_hermitian(double *zs, int n);
48static void fftinv_hermitian_to_real(double *zs, int n);
49static void GiantFFTSquare(giant gx);
50static void GiantFFTMul(giant,giant);
51static void giant_to_double(giant x, int sizex, double *zs, int L);
52
53static int mulmode = AUTO_MUL;
54
55void mulg(giant a, giant b) { /* b becomes a*b. */
56 PROF_START;
57 INCR_MULGS;
58 GiantAuxMul(a,b);
59 #if FEE_DEBUG
60 (void)bitlen(b); // XXX
61 #endif FEE_DEBUG
62 PROF_END(mulgTime);
63 PROF_INCR(numMulg);
64}
65
66static void GiantAuxMul(giant a, giant b) {
67/* Optimized general multiply, b becomes a*b. Modes are:
68 AUTO_MUL: switch according to empirical speed criteria.
69 GRAMMAR_MUL: force grammar-school algorithm.
70 FFT_MUL: force floating point FFT method.
71*/
72 int square = (a==b);
73
74 if (isZero(b)) return;
75 if (isZero(a)) {
76 gtog(a, b);
77 return;
78 }
79 switch(mulmode) {
80 case GRAMMAR_MUL:
81 GiantGrammarMul(a,b);
82 break;
83 case FFT_MUL:
84 if (square) {
85 GiantFFTSquare(b);
86 }
87 else {
88 GiantFFTMul(a,b);
89 }
90 break;
91 case AUTO_MUL: {
92 int sizea, sizeb;
93 float grammartime;
94 sizea = abs(a->sign);
95 sizeb = abs(b->sign);
96 grammartime = sizea; grammartime *= sizeb;
97 if(grammartime < BREAK_SHORTS*BREAK_SHORTS) {
98 GiantGrammarMul(a,b);
99 }
100 else {
101 if (square) GiantFFTSquare(b);
102 else GiantFFTMul(a,b);
103 }
104 break;
105 }
106 }
107}
108
109/***************** Commence FFT multiply routines ****************/
110
111static int CurrentRun = 0;
112double *sincos = NULL;
113static void init_sincos(int n) {
114 int j;
115 double e = TWOPI/n;
116
117 if (n <= CurrentRun) return;
118 CurrentRun = n;
119 if (sincos) free(sincos);
120 sincos = (double *)malloc(sizeof(double)*(1+(n>>2)));
121 for(j=0;j<=(n>>2);j++) {
122 sincos[j] = sin(e*j);
123 }
124}
125
126static double s_sin(int n) {
127 int seg = n/(CurrentRun>>2);
128
129 switch(seg) {
130 case 0: return(sincos[n]);
131 case 1: return(sincos[(CurrentRun>>1)-n]);
132 case 2: return(-sincos[n-(CurrentRun>>1)]);
133 case 3:
134 default: return(-sincos[CurrentRun-n]);
135 }
136}
137
138static double s_cos(int n) {
139 int quart = (CurrentRun>>2);
140
141 if (n < quart) return(s_sin(n+quart));
142 return(-s_sin(n-quart));
143}
144
145
146static int lpt(int n, int *lambda) {
147/* returns least power of two greater than n */
148 register int i = 1;
149
150 *lambda = 0;
151 while(i<n) {
152 i<<=1;
153 ++(*lambda);
154 }
155 return(i);
156}
157
158static void addsignal(giant x, double *zs, int n) {
159 register int j, k, m, car;
160 register double f, g;
161 /*double err, maxerr = 0.0;*/
162
163 for(j=0;j<n;j++) {
164 f = floor(zs[j]+0.5);
165
166 /* err = fabs(zs[j]-f);
167 if(err>maxerr) maxerr = err;
168 */
169
170 zs[j] =0;
171 k = 0;
172 do{
173 g = floor(f*TWOM16);
174 zs[j+k] += f-g*TWO16;
175 ++k;
176 f=g;
177 } while(f != 0.0);
178 }
179 car = 0;
180 for(j=0;j<n;j++) {
181 m = zs[j]+car;
182 x->n[j] = m & 0xffff;
183 car = (m>>16);
184 }
185 if(car) x->n[j] = car;
186 else --j;
187 while(!(x->n[j])) --j;
188 x->sign = j+1;
189 if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow");
190}
191
192static void GiantFFTSquare(giant gx) {
193 int j,size = abs(gx->sign);
194 register int L;
195
196 if(size<4) { GiantGrammarMul(gx,gx); return; }
197 L = lpt(size+size, &j);
198 {
199 //was...double doubles[L];
200 //is...
201 double *doubles = malloc(sizeof(double) * L);
202 // end
203 giant_to_double(gx, size, doubles, L);
204 fft_real_to_hermitian(doubles, L);
205 square_hermitian(doubles, L);
206 fftinv_hermitian_to_real(doubles, L);
207 addsignal(gx, doubles, L);
208 // new
209 free(doubles);
210 }
211 gx->sign = abs(gx->sign);
212 bitlen(gx); // XXX
213 if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow");
214}
215
216static void GiantFFTMul(giant y, giant x) { /* x becomes y*x. */
217 int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign);
218 int finalsign = gsign(x)*gsign(y);
219 register int L;
220
221 if((sizex<=4)||(sizey<=4)) { GiantGrammarMul(y,x); return; }
222 size = sizex; if(size<sizey) size=sizey;
223 L = lpt(size+size, &lambda);
224 {
225 //double doubles1[L];
226 //double doubles2[L];
227 double *doubles1 = malloc(sizeof(double) * L);
228 double *doubles2 = malloc(sizeof(double) * L);
229
230 giant_to_double(x, sizex, doubles1, L);
231 giant_to_double(y, sizey, doubles2, L);
232 fft_real_to_hermitian(doubles1, L);
233 fft_real_to_hermitian(doubles2, L);
234 mul_hermitian(doubles2, doubles1, L);
235 fftinv_hermitian_to_real(doubles1, L);
236 addsignal(x, doubles1, L);
237
238 free(doubles1);
239 free(doubles2);
240 }
241 x->sign = finalsign*abs(x->sign);
242 bitlen(x); // XXX
243 if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow");
244}
245
246static void scramble_real(double *x, int n) {
247 register int i,j,k;
248 register double tmp;
249
250 for(i=0,j=0;i<n-1;i++) {
251 if(i<j) {
252 tmp = x[j];
253 x[j]=x[i];
254 x[i]=tmp;
255 }
256 k = n/2;
257 while(k<=j) {
258 j -= k;
259 k>>=1;
260 }
261 j += k;
262 }
263}
264
265static void fft_real_to_hermitian(double *zs, int n) {
266/* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
267 This is a decimation-in-time, split-radix algorithm.
268 */
269 register double cc1, ss1, cc3, ss3;
270 register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
271 a, a3, b, b3, nminus = n-1, dil, expand;
272 register double *x, e;
273 int nn = n>>1;
274 double t1, t2, t3, t4, t5, t6;
275 register int n2, n4, n8, i, j;
276
277 init_sincos(n);
278 expand = CurrentRun/n;
279 scramble_real(zs, n);
280 x = zs-1; /* FORTRAN compatibility. */
281 is = 1;
282 iD = 4;
283 do{
284 for(i0=is;i0<=n;i0+=iD) {
285 i1 = i0+1;
286 e = x[i0];
287 x[i0] = e + x[i1];
288 x[i1] = e - x[i1];
289 }
290 is = (iD<<1)-1;
291 iD <<= 2;
292 } while(is<n);
293 n2 = 2;
294 while(nn>>=1) {
295 n2 <<= 1;
296 n4 = n2>>2;
297 n8 = n2>>3;
298 is = 0;
299 iD = n2<<1;
300 do {
301 for(i=is;i<n;i+=iD) {
302 i1 = i+1;
303 i2 = i1 + n4;
304 i3 = i2 + n4;
305 i4 = i3 + n4;
306 t1 = x[i4]+x[i3];
307 x[i4] -= x[i3];
308 x[i3] = x[i1] - t1;
309 x[i1] += t1;
310 if(n4==1) continue;
311 i1 += n8;
312 i2 += n8;
313 i3 += n8;
314 i4 += n8;
315 t1 = (x[i3]+x[i4])*SQRTHALF;
316 t2 = (x[i3]-x[i4])*SQRTHALF;
317 x[i4] = x[i2] - t1;
318 x[i3] = -x[i2] - t1;
319 x[i2] = x[i1] - t2;
320 x[i1] += t2;
321 }
322 is = (iD<<1) - n2;
323 iD <<= 2;
324 } while(is<n);
325 dil = n/n2;
326 a = dil;
327 for(j=2;j<=n8;j++) {
328 a3 = (a+(a<<1))&nminus;
329 b = a*expand;
330 b3 = a3*expand;
331 cc1 = s_cos(b);
332 ss1 = s_sin(b);
333 cc3 = s_cos(b3);
334 ss3 = s_sin(b3);
335 a = (a+dil)&nminus;
336 is = 0;
337 iD = n2<<1;
338 do {
339 for(i=is;i<n;i+=iD) {
340 i1 = i+j;
341 i2 = i1 + n4;
342 i3 = i2 + n4;
343 i4 = i3 + n4;
344 i5 = i + n4 - j + 2;
345 i6 = i5 + n4;
346 i7 = i6 + n4;
347 i8 = i7 + n4;
348 t1 = x[i3]*cc1 + x[i7]*ss1;
349 t2 = x[i7]*cc1 - x[i3]*ss1;
350 t3 = x[i4]*cc3 + x[i8]*ss3;
351 t4 = x[i8]*cc3 - x[i4]*ss3;
352 t5 = t1 + t3;
353 t6 = t2 + t4;
354 t3 = t1 - t3;
355 t4 = t2 - t4;
356 t2 = x[i6] + t6;
357 x[i3] = t6 - x[i6];
358 x[i8] = t2;
359 t2 = x[i2] - t3;
360 x[i7] = -x[i2] - t3;
361 x[i4] = t2;
362 t1 = x[i1] + t5;
363 x[i6] = x[i1] - t5;
364 x[i1] = t1;
365 t1 = x[i5] + t4;
366 x[i5] -= t4;
367 x[i2] = t1;
368 }
369 is = (iD<<1) - n2;
370 iD <<= 2;
371 } while(is<n);
372 }
373 }
374}
375
376static void fftinv_hermitian_to_real(double *zs, int n) {
377/* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
378 This is a decimation-in-frequency, split-radix algorithm.
379 */
380 register double cc1, ss1, cc3, ss3;
381 register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
382 a, a3, b, b3, nminus = n-1, dil, expand;
383 register double *x, e;
384 int nn = n>>1;
385 double t1, t2, t3, t4, t5;
386 int n2, n4, n8, i, j;
387
388 init_sincos(n);
389 expand = CurrentRun/n;
390 x = zs-1;
391 n2 = n<<1;
392 while(nn >>= 1) {
393 is = 0;
394 iD = n2;
395 n2 >>= 1;
396 n4 = n2>>2;
397 n8 = n4>>1;
398 do {
399 for(i=is;i<n;i+=iD) {
400 i1 = i+1;
401 i2 = i1 + n4;
402 i3 = i2 + n4;
403 i4 = i3 + n4;
404 t1 = x[i1] - x[i3];
405 x[i1] += x[i3];
406 x[i2] += x[i2];
407 x[i3] = t1 - 2.0*x[i4];
408 x[i4] = t1 + 2.0*x[i4];
409 if(n4==1) continue;
410 i1 += n8;
411 i2 += n8;
412 i3 += n8;
413 i4 += n8;
414 t1 = (x[i2]-x[i1])*SQRTHALF;
415 t2 = (x[i4]+x[i3])*SQRTHALF;
416 x[i1] += x[i2];
417 x[i2] = x[i4]-x[i3];
418 x[i3] = -2.0*(t2+t1);
419 x[i4] = 2.0*(t1-t2);
420 }
421 is = (iD<<1) - n2;
422 iD <<= 2;
423 } while(is<n-1);
424 dil = n/n2;
425 a = dil;
426 for(j=2;j<=n8;j++) {
427 a3 = (a+(a<<1))&nminus;
428 b = a*expand;
429 b3 = a3*expand;
430 cc1 = s_cos(b);
431 ss1 = s_sin(b);
432 cc3 = s_cos(b3);
433 ss3 = s_sin(b3);
434 a = (a+dil)&nminus;
435 is = 0;
436 iD = n2<<1;
437 do {
438 for(i=is;i<n;i+=iD) {
439 i1 = i+j;
440 i2 = i1+n4;
441 i3 = i2+n4;
442 i4 = i3+n4;
443 i5 = i+n4-j+2;
444 i6 = i5+n4;
445 i7 = i6+n4;
446 i8 = i7+n4;
447 t1 = x[i1] - x[i6];
448 x[i1] += x[i6];
449 t2 = x[i5] - x[i2];
450 x[i5] += x[i2];
451 t3 = x[i8] + x[i3];
452 x[i6] = x[i8] - x[i3];
453 t4 = x[i4] + x[i7];
454 x[i2] = x[i4] - x[i7];
455 t5 = t1 - t4;
456 t1 += t4;
457 t4 = t2 - t3;
458 t2 += t3;
459 x[i3] = t5*cc1 + t4*ss1;
460 x[i7] = -t4*cc1 + t5*ss1;
461 x[i4] = t1*cc3 - t2*ss3;
462 x[i8] = t2*cc3 + t1*ss3;
463 }
464 is = (iD<<1) - n2;
465 iD <<= 2;
466 } while(is<n-1);
467 }
468 }
469 is = 1;
470 iD = 4;
471 do {
472 for(i0=is;i0<=n;i0+=iD){
473 i1 = i0+1;
474 e = x[i0];
475 x[i0] = e + x[i1];
476 x[i1] = e - x[i1];
477 }
478 is = (iD<<1) - 1;
479 iD <<= 2;
480 } while(is<n);
481 scramble_real(zs, n);
482 e = 1/(double)n;
483 for(i=0;i<n;i++) zs[i] *= e;
484}
485
486
487static void mul_hermitian(double *a, double *b, int n) {
488 register int k, half = n>>1;
489 register double aa, bb, am, bm;
490
491 b[0] *= a[0];
492 b[half] *= a[half];
493 for(k=1;k<half;k++) {
494 aa = a[k]; bb = b[k];
495 am = a[n-k]; bm = b[n-k];
496 b[k] = aa*bb - am*bm;
497 b[n-k] = aa*bm + am*bb;
498 }
499}
500
501static void square_hermitian(double *b, int n) {
502 register int k, half = n>>1;
503 register double c, d;
504
505 b[0] *= b[0];
506 b[half] *= b[half];
507 for(k=1;k<half;k++) {
508 c = b[k]; d = b[n-k];
509 b[n-k] = 2.0*c*d;
510 b[k] = (c+d)*(c-d);
511 }
512}
513
514static void giant_to_double(giant x, int sizex, double *zs, int L) {
515 register int j;
516 for(j=sizex;j<L;j++) zs[j]=0.0;
517 for(j=0;j<sizex;j++) {
518 zs[j] = x->n[j];
519 }
520}