libsecurity_cryptkit/lib/giantFFT.c

   1 /* Copyright (c) 1998 Apple Computer, Inc.  All rights reserved.
   2  *
   3  * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT
   4  * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE
   5  * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE COMPUTER, INC. AND THE
   6  * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE COMPUTER,
   7  * INC.  ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL
   8  * EXPOSE YOU TO LIABILITY.
   9  ***************************************************************************
  10
  11    giantFFT.c
  12    Library for large-integer arithmetic via FFT. Currently unused
  13    in CryptKit.
  14
  15    R. E. Crandall, Scientific Computation Group, NeXT Computer, Inc.
  16
  17  Revision History
  18  ----------------
  19  19 Jan 1998    Doug Mitchell at Apple
  20         Split off from NSGiantIntegers.c.
  21
  22 */
  23
  24 /*
  25  * FIXME - make sure platform-specific math lib has floor(), fmod(),
  26  *         sin(), pow()
  27  */
  28 #include <math.h>
  29 #include "NSGiantIntegers.h"
  30
  31 #define AUTO_MUL        0
  32 #define GRAMMAR_MUL     1
  33 #define FFT_MUL         2
  34
  35 #define TWOPI           (double)(2*3.1415926535897932384626433)
  36 #define SQRT2           (double)(1.414213562373095048801688724209)
  37 #define SQRTHALF        (double)(0.707106781186547524400844362104)
  38 #define TWO16           (double)(65536.0)
  39 #define TWOM16          (double)(0.0000152587890625)
  40 #define BREAK_SHORTS    400    // Number of shorts at which FFT breaks over.
  41
  42 static int lpt(int n, int *lambda);
  43 static void mul_hermitian(double *a, double *b, int n) ;
  44 static void square_hermitian(double *b, int n);
  45 static void addsignal(giant x, double *zs, int n);
  46 static void scramble_real(double *x, int n);
  47 static void fft_real_to_hermitian(double *zs, int n);
  48 static void fftinv_hermitian_to_real(double *zs, int n);
  49 static void GiantFFTSquare(giant gx);
  50 static void GiantFFTMul(giant,giant);
  51 static void giant_to_double(giant x, int sizex, double *zs, int L);
  52
  53 static int mulmode = AUTO_MUL;
  54
  55 void mulg(giant a, giant b) { /* b becomes a*b. */
  56         PROF_START;
  57         INCR_MULGS;
  58         GiantAuxMul(a,b);
  59         #if     FEE_DEBUG
  60         (void)bitlen(b); // XXX
  61         #endif  FEE_DEBUG
  62         PROF_END(mulgTime);
  63         PROF_INCR(numMulg);
  64 }
  65
  66 static void GiantAuxMul(giant a, giant b) {
  67 /* Optimized general multiply, b becomes a*b. Modes are:
  68    AUTO_MUL: switch according to empirical speed criteria.
  69    GRAMMAR_MUL: force grammar-school algorithm.
  70    FFT_MUL: force floating point FFT method.
  71 */
  72     int square = (a==b);
  73
  74     if (isZero(b)) return;
  75     if (isZero(a)) {
  76         gtog(a, b);
  77         return;
  78     }
  79     switch(mulmode) {
  80     case GRAMMAR_MUL:
  81         GiantGrammarMul(a,b);
  82         break;
  83     case FFT_MUL:
  84         if (square) {
  85             GiantFFTSquare(b);
  86         }
  87         else {
  88             GiantFFTMul(a,b);
  89         }
  90         break;
  91     case AUTO_MUL: {
  92         int sizea, sizeb;
  93         float grammartime;
  94         sizea = abs(a->sign);
  95         sizeb = abs(b->sign);
  96         grammartime = sizea; grammartime *= sizeb;
  97         if(grammartime < BREAK_SHORTS*BREAK_SHORTS) {
  98                 GiantGrammarMul(a,b);
  99         }
 100         else {
 101             if (square) GiantFFTSquare(b);
 102             else GiantFFTMul(a,b);
 103         }
 104         break;
 105       }
 106    }
 107 }
 108
 109 /***************** Commence FFT multiply routines ****************/
 110
 111 static int CurrentRun = 0;
 112 double *sincos = NULL;
 113 static void init_sincos(int n) {
 114     int j;
 115     double e = TWOPI/n;
 116
 117     if (n <= CurrentRun) return;
 118     CurrentRun = n;
 119     if (sincos) free(sincos);
 120     sincos = (double *)malloc(sizeof(double)*(1+(n>>2)));
 121     for(j=0;j<=(n>>2);j++) {
 122         sincos[j] = sin(e*j);
 123     }
 124 }
 125
 126 static double s_sin(int n) {
 127     int seg = n/(CurrentRun>>2);
 128
 129     switch(seg) {
 130     case 0: return(sincos[n]);
 131     case 1: return(sincos[(CurrentRun>>1)-n]);
 132     case 2: return(-sincos[n-(CurrentRun>>1)]);
 133     case 3:
 134     default: return(-sincos[CurrentRun-n]);
 135     }
 136 }
 137
 138 static double s_cos(int n) {
 139     int quart = (CurrentRun>>2);
 140
 141     if (n < quart) return(s_sin(n+quart));
 142     return(-s_sin(n-quart));
 143 }
 144
 145
 146 static int lpt(int n, int *lambda) {
 147 /* returns least power of two greater than n */
 148     register int i = 1;
 149
 150     *lambda = 0;
 151     while(i<n) {
 152         i<<=1;
 153         ++(*lambda);
 154     }
 155     return(i);
 156 }
 157
 158 static void addsignal(giant x, double *zs, int n) {
 159    register int j, k, m, car;
 160    register double f, g;
 161    /*double  err,  maxerr = 0.0;*/
 162
 163    for(j=0;j<n;j++) {
 164         f = floor(zs[j]+0.5);
 165
 166         /* err = fabs(zs[j]-f);
 167         if(err>maxerr) maxerr = err;
 168         */
 169
 170         zs[j] =0;
 171         k = 0;
 172         do{
 173            g = floor(f*TWOM16);
 174            zs[j+k] += f-g*TWO16;
 175            ++k;
 176            f=g;
 177         } while(f != 0.0);
 178    }
 179    car = 0;
 180    for(j=0;j<n;j++) {
 181         m = zs[j]+car;
 182         x->n[j] = m & 0xffff;
 183         car = (m>>16);
 184    }
 185    if(car) x->n[j] = car;
 186       else --j;
 187    while(!(x->n[j])) --j;
 188    x->sign = j+1;
 189    if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow");
 190 }
 191
 192 static void GiantFFTSquare(giant gx) {
 193     int j,size = abs(gx->sign);
 194     register int L;
 195
 196     if(size<4) { GiantGrammarMul(gx,gx); return; }
 197     L = lpt(size+size, &j);
 198     {
 199         //was...double doubles[L];
 200         //is...
 201         double *doubles = malloc(sizeof(double) * L);
 202         // end
 203         giant_to_double(gx, size, doubles, L);
 204         fft_real_to_hermitian(doubles, L);
 205         square_hermitian(doubles, L);
 206         fftinv_hermitian_to_real(doubles, L);
 207         addsignal(gx, doubles, L);
 208         // new
 209         free(doubles);
 210     }
 211     gx->sign = abs(gx->sign);
 212     bitlen(gx); // XXX
 213     if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow");
 214 }
 215
 216 static void GiantFFTMul(giant y, giant x) { /* x becomes y*x. */
 217     int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign);
 218     int finalsign = gsign(x)*gsign(y);
 219     register int L;
 220
 221     if((sizex<=4)||(sizey<=4)) { GiantGrammarMul(y,x); return; }
 222     size = sizex; if(size<sizey) size=sizey;
 223     L = lpt(size+size, &lambda);
 224     {
 225         //double doubles1[L];
 226         //double doubles2[L];
 227         double *doubles1 = malloc(sizeof(double) * L);
 228         double *doubles2 = malloc(sizeof(double) * L);
 229
 230         giant_to_double(x, sizex, doubles1, L);
 231         giant_to_double(y, sizey, doubles2, L);
 232         fft_real_to_hermitian(doubles1, L);
 233         fft_real_to_hermitian(doubles2, L);
 234         mul_hermitian(doubles2, doubles1, L);
 235         fftinv_hermitian_to_real(doubles1, L);
 236         addsignal(x, doubles1, L);
 237
 238         free(doubles1);
 239         free(doubles2);
 240     }
 241     x->sign = finalsign*abs(x->sign);
 242     bitlen(x); // XXX
 243     if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow");
 244 }
 245
 246 static void scramble_real(double *x, int n) {
 247     register int i,j,k;
 248     register double tmp;
 249
 250     for(i=0,j=0;i<n-1;i++) {
 251         if(i<j) {
 252             tmp = x[j];
 253             x[j]=x[i];
 254             x[i]=tmp;
 255         }
 256         k = n/2;
 257         while(k<=j) {
 258             j -= k;
 259             k>>=1;
 260         }
 261         j += k;
 262     }
 263 }
 264
 265 static void fft_real_to_hermitian(double *zs, int n) {
 266 /* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
 267    This is a decimation-in-time, split-radix algorithm.
 268  */
 269         register double cc1, ss1, cc3, ss3;
 270         register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
 271                      a, a3, b, b3, nminus = n-1, dil, expand;
 272         register double *x, e;
 273         int nn = n>>1;
 274         double t1, t2, t3, t4, t5, t6;
 275         register int n2, n4, n8, i, j;
 276
 277         init_sincos(n);
 278         expand = CurrentRun/n;
 279         scramble_real(zs, n);
 280         x = zs-1;  /* FORTRAN compatibility. */
 281         is = 1;
 282         iD = 4;
 283         do{
 284            for(i0=is;i0<=n;i0+=iD) {
 285                 i1 = i0+1;
 286                 e = x[i0];
 287                 x[i0] = e + x[i1];
 288                 x[i1] = e - x[i1];
 289            }
 290            is = (iD<<1)-1;
 291            iD <<= 2;
 292         } while(is<n);
 293         n2 = 2;
 294         while(nn>>=1) {
 295                 n2 <<= 1;
 296                 n4 = n2>>2;
 297                 n8 = n2>>3;
 298                 is = 0;
 299                 iD = n2<<1;
 300                 do {
 301                         for(i=is;i<n;i+=iD) {
 302                                 i1 = i+1;
 303                                 i2 = i1 + n4;
 304                                 i3 = i2 + n4;
 305                                 i4 = i3 + n4;
 306                                 t1 = x[i4]+x[i3];
 307                                 x[i4] -= x[i3];
 308                                 x[i3] = x[i1] - t1;
 309                                 x[i1] += t1;
 310                                 if(n4==1) continue;
 311                                 i1 += n8;
 312                                 i2 += n8;
 313                                 i3 += n8;
 314                                 i4 += n8;
 315                                 t1 = (x[i3]+x[i4])*SQRTHALF;
 316                                 t2 = (x[i3]-x[i4])*SQRTHALF;
 317                                 x[i4] = x[i2] - t1;
 318                                 x[i3] = -x[i2] - t1;
 319                                 x[i2] = x[i1] - t2;
 320                                 x[i1] += t2;
 321                         }
 322                         is = (iD<<1) - n2;
 323                         iD <<= 2;
 324                 } while(is<n);
 325                 dil = n/n2;
 326                 a = dil;
 327                 for(j=2;j<=n8;j++) {
 328                         a3 = (a+(a<<1))&nminus;
 329                         b = a*expand;
 330                         b3 = a3*expand;
 331                         cc1 = s_cos(b);
 332                         ss1 = s_sin(b);
 333                         cc3 = s_cos(b3);
 334                         ss3 = s_sin(b3);
 335                         a = (a+dil)&nminus;
 336                         is = 0;
 337                         iD = n2<<1;
 338                         do {
 339                                 for(i=is;i<n;i+=iD) {
 340                                         i1 = i+j;
 341                                         i2 = i1 + n4;
 342                                         i3 = i2 + n4;
 343                                         i4 = i3 + n4;
 344                                         i5 = i + n4 - j + 2;
 345                                         i6 = i5 + n4;
 346                                         i7 = i6 + n4;
 347                                         i8 = i7 + n4;
 348                                         t1 = x[i3]*cc1 + x[i7]*ss1;
 349                                         t2 = x[i7]*cc1 - x[i3]*ss1;
 350                                         t3 = x[i4]*cc3 + x[i8]*ss3;
 351                                         t4 = x[i8]*cc3 - x[i4]*ss3;
 352                                         t5 = t1 + t3;
 353                                         t6 = t2 + t4;
 354                                         t3 = t1 - t3;
 355                                         t4 = t2 - t4;
 356                                         t2 = x[i6] + t6;
 357                                         x[i3] = t6 - x[i6];
 358                                         x[i8] = t2;
 359                                         t2 = x[i2] - t3;
 360                                         x[i7] = -x[i2] - t3;
 361                                         x[i4] = t2;
 362                                         t1 = x[i1] + t5;
 363                                         x[i6] = x[i1] - t5;
 364                                         x[i1] = t1;
 365                                         t1 = x[i5] + t4;
 366                                         x[i5] -= t4;
 367                                         x[i2] = t1;
 368                                 }
 369                                 is = (iD<<1) - n2;
 370                                 iD <<= 2;
 371                         } while(is<n);
 372                 }
 373         }
 374 }
 375
 376 static void fftinv_hermitian_to_real(double *zs, int n) {
 377 /* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
 378    This is a decimation-in-frequency, split-radix algorithm.
 379  */
 380         register double cc1, ss1, cc3, ss3;
 381         register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
 382                  a, a3, b, b3, nminus = n-1, dil, expand;
 383         register double *x, e;
 384         int nn = n>>1;
 385         double t1, t2, t3, t4, t5;
 386         int n2, n4, n8, i, j;
 387
 388         init_sincos(n);
 389         expand = CurrentRun/n;
 390         x = zs-1;
 391         n2 = n<<1;
 392         while(nn >>= 1) {
 393                 is = 0;
 394                 iD = n2;
 395                 n2 >>= 1;
 396                 n4 = n2>>2;
 397                 n8 = n4>>1;
 398                 do {
 399                         for(i=is;i<n;i+=iD) {
 400                                 i1 = i+1;
 401                                 i2 = i1 + n4;
 402                                 i3 = i2 + n4;
 403                                 i4 = i3 + n4;
 404                                 t1 = x[i1] - x[i3];
 405                                 x[i1] += x[i3];
 406                                 x[i2] += x[i2];
 407                                 x[i3] = t1 - 2.0*x[i4];
 408                                 x[i4] = t1 + 2.0*x[i4];
 409                                 if(n4==1) continue;
 410                                 i1 += n8;
 411                                 i2 += n8;
 412                                 i3 += n8;
 413                                 i4 += n8;
 414                                 t1 = (x[i2]-x[i1])*SQRTHALF;
 415                                 t2 = (x[i4]+x[i3])*SQRTHALF;
 416                                 x[i1] += x[i2];
 417                                 x[i2] = x[i4]-x[i3];
 418                                 x[i3] = -2.0*(t2+t1);
 419                                 x[i4] = 2.0*(t1-t2);
 420                         }
 421                         is = (iD<<1) - n2;
 422                         iD <<= 2;
 423                 } while(is<n-1);
 424                 dil = n/n2;
 425                 a = dil;
 426                 for(j=2;j<=n8;j++) {
 427                         a3 = (a+(a<<1))&nminus;
 428                         b = a*expand;
 429                         b3 = a3*expand;
 430                         cc1 = s_cos(b);
 431                         ss1 = s_sin(b);
 432                         cc3 = s_cos(b3);
 433                         ss3 = s_sin(b3);
 434                         a = (a+dil)&nminus;
 435                         is = 0;
 436                         iD = n2<<1;
 437                         do {
 438                            for(i=is;i<n;i+=iD) {
 439                                 i1 = i+j;
 440                                 i2 = i1+n4;
 441                                 i3 = i2+n4;
 442                                 i4 = i3+n4;
 443                                 i5 = i+n4-j+2;
 444                                 i6 = i5+n4;
 445                                 i7 = i6+n4;
 446                                 i8 = i7+n4;
 447                                 t1 = x[i1] - x[i6];
 448                                 x[i1] += x[i6];
 449                                 t2 = x[i5] - x[i2];
 450                                 x[i5] += x[i2];
 451                                 t3 = x[i8] + x[i3];
 452                                 x[i6] = x[i8] - x[i3];
 453                                 t4 = x[i4] + x[i7];
 454                                 x[i2] = x[i4] - x[i7];
 455                                 t5 = t1 - t4;
 456                                 t1 += t4;
 457                                 t4 = t2 - t3;
 458                                 t2 += t3;
 459                                 x[i3] = t5*cc1 + t4*ss1;
 460                                 x[i7] = -t4*cc1 + t5*ss1;
 461                                 x[i4] = t1*cc3 - t2*ss3;
 462                                 x[i8] = t2*cc3 + t1*ss3;
 463                            }
 464                            is = (iD<<1) - n2;
 465                            iD <<= 2;
 466                         } while(is<n-1);
 467                 }
 468         }
 469         is = 1;
 470         iD = 4;
 471         do {
 472           for(i0=is;i0<=n;i0+=iD){
 473                 i1 = i0+1;
 474                 e = x[i0];
 475                 x[i0] = e + x[i1];
 476                 x[i1] = e - x[i1];
 477           }
 478           is = (iD<<1) - 1;
 479           iD <<= 2;
 480         } while(is<n);
 481         scramble_real(zs, n);
 482         e = 1/(double)n;
 483         for(i=0;i<n;i++) zs[i] *= e;
 484 }
 485
 486
 487 static void mul_hermitian(double *a, double *b, int n) {
 488         register int k, half = n>>1;
 489         register double aa, bb, am, bm;
 490
 491         b[0] *= a[0];
 492         b[half] *= a[half];
 493         for(k=1;k<half;k++) {
 494                 aa = a[k]; bb = b[k];
 495                 am = a[n-k]; bm = b[n-k];
 496                 b[k] = aa*bb - am*bm;
 497                 b[n-k] = aa*bm + am*bb;
 498         }
 499 }
 500
 501 static void square_hermitian(double *b, int n) {
 502         register int k, half = n>>1;
 503         register double c, d;
 504
 505         b[0] *= b[0];
 506         b[half] *= b[half];
 507         for(k=1;k<half;k++) {
 508                 c = b[k]; d = b[n-k];
 509                 b[n-k] = 2.0*c*d;
 510                 b[k] = (c+d)*(c-d);
 511         }
 512 }
 513
 514 static void giant_to_double(giant x, int sizex, double *zs, int L) {
 515         register int j;
 516         for(j=sizex;j<L;j++) zs[j]=0.0;
 517         for(j=0;j<sizex;j++) {
 518                  zs[j] = x->n[j];
 519         }
 520 }