OSX/libsecurity_cryptkit/lib/giantFFT.c

   1 /* Copyright (c) 1998,2011,2014 Apple Inc.  All Rights Reserved.
   2  *
   3  * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT
   4  * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE
   5  * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE, INC. AND THE
   6  * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE,
   7  * INC.  ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL
   8  * EXPOSE YOU TO LIABILITY.
   9  ***************************************************************************
  10
  11    giantFFT.c
  12    Library for large-integer arithmetic via FFT. Currently unused
  13    in CryptKit.
  14
  15
  16  Revision History
  17  ----------------
  18  19 Jan 1998 at Apple
  19         Split off from NSGiantIntegers.c.
  20
  21 */
  22
  23 /*
  24  * FIXME - make sure platform-specific math lib has floor(), fmod(),
  25  *         sin(), pow()
  26  */
  27 #include <math.h>
  28 #include "NSGiantIntegers.h"
  29
  30 #define AUTO_MUL        0
  31 #define GRAMMAR_MUL     1
  32 #define FFT_MUL         2
  33
  34 #define TWOPI           (double)(2*3.1415926535897932384626433)
  35 #define SQRT2           (double)(1.414213562373095048801688724209)
  36 #define SQRTHALF        (double)(0.707106781186547524400844362104)
  37 #define TWO16           (double)(65536.0)
  38 #define TWOM16          (double)(0.0000152587890625)
  39 #define BREAK_SHORTS    400    // Number of shorts at which FFT breaks over.
  40
  41 static int lpt(int n, int *lambda);
  42 static void mul_hermitian(double *a, double *b, int n) ;
  43 static void square_hermitian(double *b, int n);
  44 static void addsignal(giant x, double *zs, int n);
  45 static void scramble_real(double *x, int n);
  46 static void fft_real_to_hermitian(double *zs, int n);
  47 static void fftinv_hermitian_to_real(double *zs, int n);
  48 static void GiantFFTSquare(giant gx);
  49 static void GiantFFTMul(giant,giant);
  50 static void giant_to_double(giant x, int sizex, double *zs, int L);
  51
  52 static int mulmode = AUTO_MUL;
  53
  54 void mulg(giant a, giant b) { /* b becomes a*b. */
  55         PROF_START;
  56         INCR_MULGS;
  57         GiantAuxMul(a,b);
  58         #if     FEE_DEBUG
  59         (void)bitlen(b); // XXX
  60         #endif  FEE_DEBUG
  61         PROF_END(mulgTime);
  62         PROF_INCR(numMulg);
  63 }
  64
  65 static void GiantAuxMul(giant a, giant b) {
  66 /* Optimized general multiply, b becomes a*b. Modes are:
  67    AUTO_MUL: switch according to empirical speed criteria.
  68    GRAMMAR_MUL: force grammar-school algorithm.
  69    FFT_MUL: force floating point FFT method.
  70 */
  71     int square = (a==b);
  72
  73     if (isZero(b)) return;
  74     if (isZero(a)) {
  75         gtog(a, b);
  76         return;
  77     }
  78     switch(mulmode) {
  79     case GRAMMAR_MUL:
  80         GiantGrammarMul(a,b);
  81         break;
  82     case FFT_MUL:
  83         if (square) {
  84             GiantFFTSquare(b);
  85         }
  86         else {
  87             GiantFFTMul(a,b);
  88         }
  89         break;
  90     case AUTO_MUL: {
  91         int sizea, sizeb;
  92         float grammartime;
  93         sizea = abs(a->sign);
  94         sizeb = abs(b->sign);
  95         grammartime = sizea; grammartime *= sizeb;
  96         if(grammartime < BREAK_SHORTS*BREAK_SHORTS) {
  97                 GiantGrammarMul(a,b);
  98         }
  99         else {
 100             if (square) GiantFFTSquare(b);
 101             else GiantFFTMul(a,b);
 102         }
 103         break;
 104       }
 105    }
 106 }
 107
 108 /***************** Commence FFT multiply routines ****************/
 109
 110 static int CurrentRun = 0;
 111 double *sincos = NULL;
 112 static void init_sincos(int n) {
 113     int j;
 114     double e = TWOPI/n;
 115
 116     if (n <= CurrentRun) return;
 117     CurrentRun = n;
 118     if (sincos) free(sincos);
 119     sincos = (double *)malloc(sizeof(double)*(1+(n>>2)));
 120     for(j=0;j<=(n>>2);j++) {
 121         sincos[j] = sin(e*j);
 122     }
 123 }
 124
 125 static double s_sin(int n) {
 126     int seg = n/(CurrentRun>>2);
 127
 128     switch(seg) {
 129     case 0: return(sincos[n]);
 130     case 1: return(sincos[(CurrentRun>>1)-n]);
 131     case 2: return(-sincos[n-(CurrentRun>>1)]);
 132     case 3:
 133     default: return(-sincos[CurrentRun-n]);
 134     }
 135 }
 136
 137 static double s_cos(int n) {
 138     int quart = (CurrentRun>>2);
 139
 140     if (n < quart) return(s_sin(n+quart));
 141     return(-s_sin(n-quart));
 142 }
 143
 144
 145 static int lpt(int n, int *lambda) {
 146 /* returns least power of two greater than n */
 147     register int i = 1;
 148
 149     *lambda = 0;
 150     while(i<n) {
 151         i<<=1;
 152         ++(*lambda);
 153     }
 154     return(i);
 155 }
 156
 157 static void addsignal(giant x, double *zs, int n) {
 158    register int j, k, m, car;
 159    register double f, g;
 160    /*double  err,  maxerr = 0.0;*/
 161
 162    for(j=0;j<n;j++) {
 163         f = floor(zs[j]+0.5);
 164
 165         /* err = fabs(zs[j]-f);
 166         if(err>maxerr) maxerr = err;
 167         */
 168
 169         zs[j] =0;
 170         k = 0;
 171         do{
 172            g = floor(f*TWOM16);
 173            zs[j+k] += f-g*TWO16;
 174            ++k;
 175            f=g;
 176         } while(f != 0.0);
 177    }
 178    car = 0;
 179    for(j=0;j<n;j++) {
 180         m = zs[j]+car;
 181         x->n[j] = m & 0xffff;
 182         car = (m>>16);
 183    }
 184    if(car) x->n[j] = car;
 185       else --j;
 186    while(!(x->n[j])) --j;
 187    x->sign = j+1;
 188    if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow");
 189 }
 190
 191 static void GiantFFTSquare(giant gx) {
 192     int j,size = abs(gx->sign);
 193     register int L;
 194
 195     if(size<4) { GiantGrammarMul(gx,gx); return; }
 196     L = lpt(size+size, &j);
 197     {
 198         //was...double doubles[L];
 199         //is...
 200         double *doubles = malloc(sizeof(double) * L);
 201         // end
 202         giant_to_double(gx, size, doubles, L);
 203         fft_real_to_hermitian(doubles, L);
 204         square_hermitian(doubles, L);
 205         fftinv_hermitian_to_real(doubles, L);
 206         addsignal(gx, doubles, L);
 207         // new
 208         free(doubles);
 209     }
 210     gx->sign = abs(gx->sign);
 211     bitlen(gx); // XXX
 212     if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow");
 213 }
 214
 215 static void GiantFFTMul(giant y, giant x) { /* x becomes y*x. */
 216     int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign);
 217     int finalsign = gsign(x)*gsign(y);
 218     register int L;
 219
 220     if((sizex<=4)||(sizey<=4)) { GiantGrammarMul(y,x); return; }
 221     size = sizex; if(size<sizey) size=sizey;
 222     L = lpt(size+size, &lambda);
 223     {
 224         //double doubles1[L];
 225         //double doubles2[L];
 226         double *doubles1 = malloc(sizeof(double) * L);
 227         double *doubles2 = malloc(sizeof(double) * L);
 228
 229         giant_to_double(x, sizex, doubles1, L);
 230         giant_to_double(y, sizey, doubles2, L);
 231         fft_real_to_hermitian(doubles1, L);
 232         fft_real_to_hermitian(doubles2, L);
 233         mul_hermitian(doubles2, doubles1, L);
 234         fftinv_hermitian_to_real(doubles1, L);
 235         addsignal(x, doubles1, L);
 236
 237         free(doubles1);
 238         free(doubles2);
 239     }
 240     x->sign = finalsign*abs(x->sign);
 241     bitlen(x); // XXX
 242     if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow");
 243 }
 244
 245 static void scramble_real(double *x, int n) {
 246     register int i,j,k;
 247     register double tmp;
 248
 249     for(i=0,j=0;i<n-1;i++) {
 250         if(i<j) {
 251             tmp = x[j];
 252             x[j]=x[i];
 253             x[i]=tmp;
 254         }
 255         k = n/2;
 256         while(k<=j) {
 257             j -= k;
 258             k>>=1;
 259         }
 260         j += k;
 261     }
 262 }
 263
 264 static void fft_real_to_hermitian(double *zs, int n) {
 265 /* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
 266    This is a decimation-in-time, split-radix algorithm.
 267  */
 268         register double cc1, ss1, cc3, ss3;
 269         register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
 270                      a, a3, b, b3, nminus = n-1, dil, expand;
 271         register double *x, e;
 272         int nn = n>>1;
 273         double t1, t2, t3, t4, t5, t6;
 274         register int n2, n4, n8, i, j;
 275
 276         init_sincos(n);
 277         expand = CurrentRun/n;
 278         scramble_real(zs, n);
 279         x = zs-1;  /* FORTRAN compatibility. */
 280         is = 1;
 281         iD = 4;
 282         do{
 283            for(i0=is;i0<=n;i0+=iD) {
 284                 i1 = i0+1;
 285                 e = x[i0];
 286                 x[i0] = e + x[i1];
 287                 x[i1] = e - x[i1];
 288            }
 289            is = (iD<<1)-1;
 290            iD <<= 2;
 291         } while(is<n);
 292         n2 = 2;
 293         while(nn>>=1) {
 294                 n2 <<= 1;
 295                 n4 = n2>>2;
 296                 n8 = n2>>3;
 297                 is = 0;
 298                 iD = n2<<1;
 299                 do {
 300                         for(i=is;i<n;i+=iD) {
 301                                 i1 = i+1;
 302                                 i2 = i1 + n4;
 303                                 i3 = i2 + n4;
 304                                 i4 = i3 + n4;
 305                                 t1 = x[i4]+x[i3];
 306                                 x[i4] -= x[i3];
 307                                 x[i3] = x[i1] - t1;
 308                                 x[i1] += t1;
 309                                 if(n4==1) continue;
 310                                 i1 += n8;
 311                                 i2 += n8;
 312                                 i3 += n8;
 313                                 i4 += n8;
 314                                 t1 = (x[i3]+x[i4])*SQRTHALF;
 315                                 t2 = (x[i3]-x[i4])*SQRTHALF;
 316                                 x[i4] = x[i2] - t1;
 317                                 x[i3] = -x[i2] - t1;
 318                                 x[i2] = x[i1] - t2;
 319                                 x[i1] += t2;
 320                         }
 321                         is = (iD<<1) - n2;
 322                         iD <<= 2;
 323                 } while(is<n);
 324                 dil = n/n2;
 325                 a = dil;
 326                 for(j=2;j<=n8;j++) {
 327                         a3 = (a+(a<<1))&nminus;
 328                         b = a*expand;
 329                         b3 = a3*expand;
 330                         cc1 = s_cos(b);
 331                         ss1 = s_sin(b);
 332                         cc3 = s_cos(b3);
 333                         ss3 = s_sin(b3);
 334                         a = (a+dil)&nminus;
 335                         is = 0;
 336                         iD = n2<<1;
 337                         do {
 338                                 for(i=is;i<n;i+=iD) {
 339                                         i1 = i+j;
 340                                         i2 = i1 + n4;
 341                                         i3 = i2 + n4;
 342                                         i4 = i3 + n4;
 343                                         i5 = i + n4 - j + 2;
 344                                         i6 = i5 + n4;
 345                                         i7 = i6 + n4;
 346                                         i8 = i7 + n4;
 347                                         t1 = x[i3]*cc1 + x[i7]*ss1;
 348                                         t2 = x[i7]*cc1 - x[i3]*ss1;
 349                                         t3 = x[i4]*cc3 + x[i8]*ss3;
 350                                         t4 = x[i8]*cc3 - x[i4]*ss3;
 351                                         t5 = t1 + t3;
 352                                         t6 = t2 + t4;
 353                                         t3 = t1 - t3;
 354                                         t4 = t2 - t4;
 355                                         t2 = x[i6] + t6;
 356                                         x[i3] = t6 - x[i6];
 357                                         x[i8] = t2;
 358                                         t2 = x[i2] - t3;
 359                                         x[i7] = -x[i2] - t3;
 360                                         x[i4] = t2;
 361                                         t1 = x[i1] + t5;
 362                                         x[i6] = x[i1] - t5;
 363                                         x[i1] = t1;
 364                                         t1 = x[i5] + t4;
 365                                         x[i5] -= t4;
 366                                         x[i2] = t1;
 367                                 }
 368                                 is = (iD<<1) - n2;
 369                                 iD <<= 2;
 370                         } while(is<n);
 371                 }
 372         }
 373 }
 374
 375 static void fftinv_hermitian_to_real(double *zs, int n) {
 376 /* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
 377    This is a decimation-in-frequency, split-radix algorithm.
 378  */
 379         register double cc1, ss1, cc3, ss3;
 380         register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8,
 381                  a, a3, b, b3, nminus = n-1, dil, expand;
 382         register double *x, e;
 383         int nn = n>>1;
 384         double t1, t2, t3, t4, t5;
 385         int n2, n4, n8, i, j;
 386
 387         init_sincos(n);
 388         expand = CurrentRun/n;
 389         x = zs-1;
 390         n2 = n<<1;
 391         while(nn >>= 1) {
 392                 is = 0;
 393                 iD = n2;
 394                 n2 >>= 1;
 395                 n4 = n2>>2;
 396                 n8 = n4>>1;
 397                 do {
 398                         for(i=is;i<n;i+=iD) {
 399                                 i1 = i+1;
 400                                 i2 = i1 + n4;
 401                                 i3 = i2 + n4;
 402                                 i4 = i3 + n4;
 403                                 t1 = x[i1] - x[i3];
 404                                 x[i1] += x[i3];
 405                                 x[i2] += x[i2];
 406                                 x[i3] = t1 - 2.0*x[i4];
 407                                 x[i4] = t1 + 2.0*x[i4];
 408                                 if(n4==1) continue;
 409                                 i1 += n8;
 410                                 i2 += n8;
 411                                 i3 += n8;
 412                                 i4 += n8;
 413                                 t1 = (x[i2]-x[i1])*SQRTHALF;
 414                                 t2 = (x[i4]+x[i3])*SQRTHALF;
 415                                 x[i1] += x[i2];
 416                                 x[i2] = x[i4]-x[i3];
 417                                 x[i3] = -2.0*(t2+t1);
 418                                 x[i4] = 2.0*(t1-t2);
 419                         }
 420                         is = (iD<<1) - n2;
 421                         iD <<= 2;
 422                 } while(is<n-1);
 423                 dil = n/n2;
 424                 a = dil;
 425                 for(j=2;j<=n8;j++) {
 426                         a3 = (a+(a<<1))&nminus;
 427                         b = a*expand;
 428                         b3 = a3*expand;
 429                         cc1 = s_cos(b);
 430                         ss1 = s_sin(b);
 431                         cc3 = s_cos(b3);
 432                         ss3 = s_sin(b3);
 433                         a = (a+dil)&nminus;
 434                         is = 0;
 435                         iD = n2<<1;
 436                         do {
 437                            for(i=is;i<n;i+=iD) {
 438                                 i1 = i+j;
 439                                 i2 = i1+n4;
 440                                 i3 = i2+n4;
 441                                 i4 = i3+n4;
 442                                 i5 = i+n4-j+2;
 443                                 i6 = i5+n4;
 444                                 i7 = i6+n4;
 445                                 i8 = i7+n4;
 446                                 t1 = x[i1] - x[i6];
 447                                 x[i1] += x[i6];
 448                                 t2 = x[i5] - x[i2];
 449                                 x[i5] += x[i2];
 450                                 t3 = x[i8] + x[i3];
 451                                 x[i6] = x[i8] - x[i3];
 452                                 t4 = x[i4] + x[i7];
 453                                 x[i2] = x[i4] - x[i7];
 454                                 t5 = t1 - t4;
 455                                 t1 += t4;
 456                                 t4 = t2 - t3;
 457                                 t2 += t3;
 458                                 x[i3] = t5*cc1 + t4*ss1;
 459                                 x[i7] = -t4*cc1 + t5*ss1;
 460                                 x[i4] = t1*cc3 - t2*ss3;
 461                                 x[i8] = t2*cc3 + t1*ss3;
 462                            }
 463                            is = (iD<<1) - n2;
 464                            iD <<= 2;
 465                         } while(is<n-1);
 466                 }
 467         }
 468         is = 1;
 469         iD = 4;
 470         do {
 471           for(i0=is;i0<=n;i0+=iD){
 472                 i1 = i0+1;
 473                 e = x[i0];
 474                 x[i0] = e + x[i1];
 475                 x[i1] = e - x[i1];
 476           }
 477           is = (iD<<1) - 1;
 478           iD <<= 2;
 479         } while(is<n);
 480         scramble_real(zs, n);
 481         e = 1/(double)n;
 482         for(i=0;i<n;i++) zs[i] *= e;
 483 }
 484
 485
 486 static void mul_hermitian(double *a, double *b, int n) {
 487         register int k, half = n>>1;
 488         register double aa, bb, am, bm;
 489
 490         b[0] *= a[0];
 491         b[half] *= a[half];
 492         for(k=1;k<half;k++) {
 493                 aa = a[k]; bb = b[k];
 494                 am = a[n-k]; bm = b[n-k];
 495                 b[k] = aa*bb - am*bm;
 496                 b[n-k] = aa*bm + am*bb;
 497         }
 498 }
 499
 500 static void square_hermitian(double *b, int n) {
 501         register int k, half = n>>1;
 502         register double c, d;
 503
 504         b[0] *= b[0];
 505         b[half] *= b[half];
 506         for(k=1;k<half;k++) {
 507                 c = b[k]; d = b[n-k];
 508                 b[n-k] = 2.0*c*d;
 509                 b[k] = (c+d)*(c-d);
 510         }
 511 }
 512
 513 static void giant_to_double(giant x, int sizex, double *zs, int L) {
 514         register int j;
 515         for(j=sizex;j<L;j++) zs[j]=0.0;
 516         for(j=0;j<sizex;j++) {
 517                  zs[j] = x->n[j];
 518         }
 519 }