]> git.saurik.com Git - apple/security.git/blob - OSX/libsecurity_cryptkit/lib/giantFFT.c
Security-58286.1.32.tar.gz
[apple/security.git] / OSX / libsecurity_cryptkit / lib / giantFFT.c
1 /* Copyright (c) 1998,2011,2014 Apple Inc. All Rights Reserved.
2 *
3 * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT
4 * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE
5 * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE, INC. AND THE
6 * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE,
7 * INC. ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL
8 * EXPOSE YOU TO LIABILITY.
9 ***************************************************************************
10
11 giantFFT.c
12 Library for large-integer arithmetic via FFT. Currently unused
13 in CryptKit.
14
15
16 Revision History
17 ----------------
18 19 Jan 1998 at Apple
19 Split off from NSGiantIntegers.c.
20
21 */
22
23 /*
24 * FIXME - make sure platform-specific math lib has floor(), fmod(),
25 * sin(), pow()
26 */
27 #include <math.h>
28 #include "NSGiantIntegers.h"
29
30 #define AUTO_MUL 0
31 #define GRAMMAR_MUL 1
32 #define FFT_MUL 2
33
34 #define TWOPI (double)(2*3.1415926535897932384626433)
35 #define SQRT2 (double)(1.414213562373095048801688724209)
36 #define SQRTHALF (double)(0.707106781186547524400844362104)
37 #define TWO16 (double)(65536.0)
38 #define TWOM16 (double)(0.0000152587890625)
39 #define BREAK_SHORTS 400 // Number of shorts at which FFT breaks over.
40
41 static int lpt(int n, int *lambda);
42 static void mul_hermitian(double *a, double *b, int n) ;
43 static void square_hermitian(double *b, int n);
44 static void addsignal(giant x, double *zs, int n);
45 static void scramble_real(double *x, int n);
46 static void fft_real_to_hermitian(double *zs, int n);
47 static void fftinv_hermitian_to_real(double *zs, int n);
48 static void GiantFFTSquare(giant gx);
49 static void GiantFFTMul(giant,giant);
50 static void giant_to_double(giant x, int sizex, double *zs, int L);
51
52 static int mulmode = AUTO_MUL;
53
54 void mulg(giant a, giant b) { /* b becomes a*b. */
55 PROF_START;
56 INCR_MULGS;
57 GiantAuxMul(a,b);
58 #if FEE_DEBUG
59 (void)bitlen(b); // XXX
60 #endif FEE_DEBUG
61 PROF_END(mulgTime);
62 PROF_INCR(numMulg);
63 }
64
65 static void GiantAuxMul(giant a, giant b) {
66 /* Optimized general multiply, b becomes a*b. Modes are:
67 AUTO_MUL: switch according to empirical speed criteria.
68 GRAMMAR_MUL: force grammar-school algorithm.
69 FFT_MUL: force floating point FFT method.
70 */
71 int square = (a==b);
72
73 if (isZero(b)) return;
74 if (isZero(a)) {
75 gtog(a, b);
76 return;
77 }
78 switch(mulmode) {
79 case GRAMMAR_MUL:
80 GiantGrammarMul(a,b);
81 break;
82 case FFT_MUL:
83 if (square) {
84 GiantFFTSquare(b);
85 }
86 else {
87 GiantFFTMul(a,b);
88 }
89 break;
90 case AUTO_MUL: {
91 int sizea, sizeb;
92 float grammartime;
93 sizea = abs(a->sign);
94 sizeb = abs(b->sign);
95 grammartime = sizea; grammartime *= sizeb;
96 if(grammartime < BREAK_SHORTS*BREAK_SHORTS) {
97 GiantGrammarMul(a,b);
98 }
99 else {
100 if (square) GiantFFTSquare(b);
101 else GiantFFTMul(a,b);
102 }
103 break;
104 }
105 }
106 }
107
108 /***************** Commence FFT multiply routines ****************/
109
110 static int CurrentRun = 0;
111 double *sincos = NULL;
112 static void init_sincos(int n) {
113 int j;
114 double e = TWOPI/n;
115
116 if (n <= CurrentRun) return;
117 CurrentRun = n;
118 if (sincos) free(sincos);
119 sincos = (double *)malloc(sizeof(double)*(1+(n>>2)));
120 for(j=0;j<=(n>>2);j++) {
121 sincos[j] = sin(e*j);
122 }
123 }
124
125 static double s_sin(int n) {
126 int seg = n/(CurrentRun>>2);
127
128 switch(seg) {
129 case 0: return(sincos[n]);
130 case 1: return(sincos[(CurrentRun>>1)-n]);
131 case 2: return(-sincos[n-(CurrentRun>>1)]);
132 case 3:
133 default: return(-sincos[CurrentRun-n]);
134 }
135 }
136
137 static double s_cos(int n) {
138 int quart = (CurrentRun>>2);
139
140 if (n < quart) return(s_sin(n+quart));
141 return(-s_sin(n-quart));
142 }
143
144
145 static int lpt(int n, int *lambda) {
146 /* returns least power of two greater than n */
147 register int i = 1;
148
149 *lambda = 0;
150 while(i<n) {
151 i<<=1;
152 ++(*lambda);
153 }
154 return(i);
155 }
156
157 static void addsignal(giant x, double *zs, int n) {
158 register int j, k, m, car;
159 register double f, g;
160 /*double err, maxerr = 0.0;*/
161
162 for(j=0;j<n;j++) {
163 f = floor(zs[j]+0.5);
164
165 /* err = fabs(zs[j]-f);
166 if(err>maxerr) maxerr = err;
167 */
168
169 zs[j] =0;
170 k = 0;
171 do{
172 g = floor(f*TWOM16);
173 zs[j+k] += f-g*TWO16;
174 ++k;
175 f=g;
176 } while(f != 0.0);
177 }
178 car = 0;
179 for(j=0;j<n;j++) {
180 m = zs[j]+car;
181 x->n[j] = m & 0xffff;
182 car = (m>>16);
183 }
184 if(car) x->n[j] = car;
185 else --j;
186 while(!(x->n[j])) --j;
187 x->sign = j+1;
188 if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow");
189 }
190
191 static void GiantFFTSquare(giant gx) {
192 int j,size = abs(gx->sign);
193 register int L;
194
195 if(size<4) { GiantGrammarMul(gx,gx); return; }
196 L = lpt(size+size, &j);
197 {
198 //was...double doubles[L];
199 //is...
200 double *doubles = malloc(sizeof(double) * L);
201 // end
202 giant_to_double(gx, size, doubles, L);
203 fft_real_to_hermitian(doubles, L);
204 square_hermitian(doubles, L);
205 fftinv_hermitian_to_real(doubles, L);
206 addsignal(gx, doubles, L);
207 // new
208 free(doubles);
209 }
210 gx->sign = abs(gx->sign);
211 bitlen(gx); // XXX
212 if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow");
213 }
214
215 static void GiantFFTMul(giant y, giant x) { /* x becomes y*x. */
216 int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign);
217 int finalsign = gsign(x)*gsign(y);
218 register int L;
219
220 if((sizex<=4)||(sizey<=4)) { GiantGrammarMul(y,x); return; }
221 size = sizex; if(size<sizey) size=sizey;
222 L = lpt(size+size, &lambda);
223 {
224 //double doubles1[L];
225 //double doubles2[L];
226 double *doubles1 = malloc(sizeof(double) * L);
227 double *doubles2 = malloc(sizeof(double) * L);
228
229 giant_to_double(x, sizex, doubles1, L);
230 giant_to_double(y, sizey, doubles2, L);
231 fft_real_to_hermitian(doubles1, L);
232 fft_real_to_hermitian(doubles2, L);
233 mul_hermitian(doubles2, doubles1, L);
234 fftinv_hermitian_to_real(doubles1, L);
235 addsignal(x, doubles1, L);
236
237 free(doubles1);
238 free(doubles2);
239 }
240 x->sign = finalsign*abs(x->sign);
241 bitlen(x); // XXX
242 if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow");
243 }
244
245 static void scramble_real(double *x, int n) {
246 register int i,j,k;
247 register double tmp;
248
249 for(i=0,j=0;i<n-1;i++) {
250 if(i<j) {
251 tmp = x[j];
252 x[j]=x[i];
253 x[i]=tmp;
254 }
255 k = n/2;
256 while(k<=j) {
257 j -= k;
258 k>>=1;
259 }
260 j += k;
261 }
262 }
263
264 static void fft_real_to_hermitian(double *zs, int n) {
265 /* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
266 This is a decimation-in-time, split-radix algorithm.
267 */
268 register double cc1, ss1, cc3, ss3;
269 register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
270 a, a3, b, b3, nminus = n-1, dil, expand;
271 register double *x, e;
272 int nn = n>>1;
273 double t1, t2, t3, t4, t5, t6;
274 register int n2, n4, n8, i, j;
275
276 init_sincos(n);
277 expand = CurrentRun/n;
278 scramble_real(zs, n);
279 x = zs-1; /* FORTRAN compatibility. */
280 is = 1;
281 iD = 4;
282 do{
283 for(i0=is;i0<=n;i0+=iD) {
284 i1 = i0+1;
285 e = x[i0];
286 x[i0] = e + x[i1];
287 x[i1] = e - x[i1];
288 }
289 is = (iD<<1)-1;
290 iD <<= 2;
291 } while(is<n);
292 n2 = 2;
293 while(nn>>=1) {
294 n2 <<= 1;
295 n4 = n2>>2;
296 n8 = n2>>3;
297 is = 0;
298 iD = n2<<1;
299 do {
300 for(i=is;i<n;i+=iD) {
301 i1 = i+1;
302 i2 = i1 + n4;
303 i3 = i2 + n4;
304 i4 = i3 + n4;
305 t1 = x[i4]+x[i3];
306 x[i4] -= x[i3];
307 x[i3] = x[i1] - t1;
308 x[i1] += t1;
309 if(n4==1) continue;
310 i1 += n8;
311 i2 += n8;
312 i3 += n8;
313 i4 += n8;
314 t1 = (x[i3]+x[i4])*SQRTHALF;
315 t2 = (x[i3]-x[i4])*SQRTHALF;
316 x[i4] = x[i2] - t1;
317 x[i3] = -x[i2] - t1;
318 x[i2] = x[i1] - t2;
319 x[i1] += t2;
320 }
321 is = (iD<<1) - n2;
322 iD <<= 2;
323 } while(is<n);
324 dil = n/n2;
325 a = dil;
326 for(j=2;j<=n8;j++) {
327 a3 = (a+(a<<1))&nminus;
328 b = a*expand;
329 b3 = a3*expand;
330 cc1 = s_cos(b);
331 ss1 = s_sin(b);
332 cc3 = s_cos(b3);
333 ss3 = s_sin(b3);
334 a = (a+dil)&nminus;
335 is = 0;
336 iD = n2<<1;
337 do {
338 for(i=is;i<n;i+=iD) {
339 i1 = i+j;
340 i2 = i1 + n4;
341 i3 = i2 + n4;
342 i4 = i3 + n4;
343 i5 = i + n4 - j + 2;
344 i6 = i5 + n4;
345 i7 = i6 + n4;
346 i8 = i7 + n4;
347 t1 = x[i3]*cc1 + x[i7]*ss1;
348 t2 = x[i7]*cc1 - x[i3]*ss1;
349 t3 = x[i4]*cc3 + x[i8]*ss3;
350 t4 = x[i8]*cc3 - x[i4]*ss3;
351 t5 = t1 + t3;
352 t6 = t2 + t4;
353 t3 = t1 - t3;
354 t4 = t2 - t4;
355 t2 = x[i6] + t6;
356 x[i3] = t6 - x[i6];
357 x[i8] = t2;
358 t2 = x[i2] - t3;
359 x[i7] = -x[i2] - t3;
360 x[i4] = t2;
361 t1 = x[i1] + t5;
362 x[i6] = x[i1] - t5;
363 x[i1] = t1;
364 t1 = x[i5] + t4;
365 x[i5] -= t4;
366 x[i2] = t1;
367 }
368 is = (iD<<1) - n2;
369 iD <<= 2;
370 } while(is<n);
371 }
372 }
373 }
374
375 static void fftinv_hermitian_to_real(double *zs, int n) {
376 /* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
377 This is a decimation-in-frequency, split-radix algorithm.
378 */
379 register double cc1, ss1, cc3, ss3;
380 register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
381 a, a3, b, b3, nminus = n-1, dil, expand;
382 register double *x, e;
383 int nn = n>>1;
384 double t1, t2, t3, t4, t5;
385 int n2, n4, n8, i, j;
386
387 init_sincos(n);
388 expand = CurrentRun/n;
389 x = zs-1;
390 n2 = n<<1;
391 while(nn >>= 1) {
392 is = 0;
393 iD = n2;
394 n2 >>= 1;
395 n4 = n2>>2;
396 n8 = n4>>1;
397 do {
398 for(i=is;i<n;i+=iD) {
399 i1 = i+1;
400 i2 = i1 + n4;
401 i3 = i2 + n4;
402 i4 = i3 + n4;
403 t1 = x[i1] - x[i3];
404 x[i1] += x[i3];
405 x[i2] += x[i2];
406 x[i3] = t1 - 2.0*x[i4];
407 x[i4] = t1 + 2.0*x[i4];
408 if(n4==1) continue;
409 i1 += n8;
410 i2 += n8;
411 i3 += n8;
412 i4 += n8;
413 t1 = (x[i2]-x[i1])*SQRTHALF;
414 t2 = (x[i4]+x[i3])*SQRTHALF;
415 x[i1] += x[i2];
416 x[i2] = x[i4]-x[i3];
417 x[i3] = -2.0*(t2+t1);
418 x[i4] = 2.0*(t1-t2);
419 }
420 is = (iD<<1) - n2;
421 iD <<= 2;
422 } while(is<n-1);
423 dil = n/n2;
424 a = dil;
425 for(j=2;j<=n8;j++) {
426 a3 = (a+(a<<1))&nminus;
427 b = a*expand;
428 b3 = a3*expand;
429 cc1 = s_cos(b);
430 ss1 = s_sin(b);
431 cc3 = s_cos(b3);
432 ss3 = s_sin(b3);
433 a = (a+dil)&nminus;
434 is = 0;
435 iD = n2<<1;
436 do {
437 for(i=is;i<n;i+=iD) {
438 i1 = i+j;
439 i2 = i1+n4;
440 i3 = i2+n4;
441 i4 = i3+n4;
442 i5 = i+n4-j+2;
443 i6 = i5+n4;
444 i7 = i6+n4;
445 i8 = i7+n4;
446 t1 = x[i1] - x[i6];
447 x[i1] += x[i6];
448 t2 = x[i5] - x[i2];
449 x[i5] += x[i2];
450 t3 = x[i8] + x[i3];
451 x[i6] = x[i8] - x[i3];
452 t4 = x[i4] + x[i7];
453 x[i2] = x[i4] - x[i7];
454 t5 = t1 - t4;
455 t1 += t4;
456 t4 = t2 - t3;
457 t2 += t3;
458 x[i3] = t5*cc1 + t4*ss1;
459 x[i7] = -t4*cc1 + t5*ss1;
460 x[i4] = t1*cc3 - t2*ss3;
461 x[i8] = t2*cc3 + t1*ss3;
462 }
463 is = (iD<<1) - n2;
464 iD <<= 2;
465 } while(is<n-1);
466 }
467 }
468 is = 1;
469 iD = 4;
470 do {
471 for(i0=is;i0<=n;i0+=iD){
472 i1 = i0+1;
473 e = x[i0];
474 x[i0] = e + x[i1];
475 x[i1] = e - x[i1];
476 }
477 is = (iD<<1) - 1;
478 iD <<= 2;
479 } while(is<n);
480 scramble_real(zs, n);
481 e = 1/(double)n;
482 for(i=0;i<n;i++) zs[i] *= e;
483 }
484
485
486 static void mul_hermitian(double *a, double *b, int n) {
487 register int k, half = n>>1;
488 register double aa, bb, am, bm;
489
490 b[0] *= a[0];
491 b[half] *= a[half];
492 for(k=1;k<half;k++) {
493 aa = a[k]; bb = b[k];
494 am = a[n-k]; bm = b[n-k];
495 b[k] = aa*bb - am*bm;
496 b[n-k] = aa*bm + am*bb;
497 }
498 }
499
500 static void square_hermitian(double *b, int n) {
501 register int k, half = n>>1;
502 register double c, d;
503
504 b[0] *= b[0];
505 b[half] *= b[half];
506 for(k=1;k<half;k++) {
507 c = b[k]; d = b[n-k];
508 b[n-k] = 2.0*c*d;
509 b[k] = (c+d)*(c-d);
510 }
511 }
512
513 static void giant_to_double(giant x, int sizex, double *zs, int L) {
514 register int j;
515 for(j=sizex;j<L;j++) zs[j]=0.0;
516 for(j=0;j<sizex;j++) {
517 zs[j] = x->n[j];
518 }
519 }